doc update, official baselines for T1A and T1B refactored

This commit is contained in:
Alejandro Moreo Fernandez 2021-11-12 14:30:02 +01:00
parent 689ac2bbb0
commit 3eb760901f
18 changed files with 846 additions and 438 deletions

View File

@ -1,76 +0,0 @@
import pickle
import numpy as np
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm
import pandas as pd
import quapy as qp
from quapy.data import LabelledCollection
from quapy.method.aggregative import *
import quapy.functional as F
from data import *
import os
import constants
from sklearn.decomposition import TruncatedSVD
# LeQua official baselines for task T1A (Binary/Vector)
# =====================================================
predictions_path = os.path.join('predictions', 'T1A')
os.makedirs(predictions_path, exist_ok=True)
models_path = os.path.join('models', 'T1A')
os.makedirs(models_path, exist_ok=True)
pathT1A = './data/T1A/public'
T1A_devvectors_path = os.path.join(pathT1A, 'dev_vectors')
T1A_devprevalence_path = os.path.join(pathT1A, 'dev_prevalences.csv')
T1A_trainpath = os.path.join(pathT1A, 'training_vectors.txt')
train = LabelledCollection.load(T1A_trainpath, load_binary_vectors)
nF = train.instances.shape[1]
svd = TruncatedSVD(n_components=300)
train.instances = svd.fit_transform(train.instances)
qp.environ['SAMPLE_SIZE'] = constants.T1A_SAMPLE_SIZE
print(f'number of classes: {len(train.classes_)}')
print(f'number of training documents: {len(train)}')
print(f'training prevalence: {F.strprev(train.prevalence())}')
print(f'training matrix shape: {train.instances.shape}')
true_prevalence = ResultSubmission.load(T1A_devprevalence_path)
for quantifier in [CC, ACC, PCC, PACC, EMQ, HDy]:
# classifier = CalibratedClassifierCV(LogisticRegression())
classifier = LogisticRegression()
model = quantifier(classifier).fit(train)
quantifier_name = model.__class__.__name__
predictions = ResultSubmission(categories=['negative', 'positive'])
for samplename, sample in tqdm(gen_load_samples_T1(T1A_devvectors_path, nF),
desc=quantifier_name, total=len(true_prevalence)):
sample = svd.transform(sample)
predictions.add(samplename, model.quantify(sample))
predictions.dump(os.path.join(predictions_path, quantifier_name + '.svd.csv'))
pickle.dump(model, open(os.path.join(models_path, quantifier_name+'.svd.pkl'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
mae, mrae = evaluate_submission(true_prevalence, predictions)
print(f'{quantifier_name} mae={mae:.3f} mrae={mrae:.3f}')
"""
validation
CC 0.1862 1.9587
ACC 0.0394 0.2669
PCC 0.1789 2.1383
PACC 0.0354 0.1587
EMQ 0.0224 0.0960
HDy 0.0467 0.2121
"""

91
LeQua2022/baselines_T1.py Normal file
View File

@ -0,0 +1,91 @@
import argparse
import pickle
from sklearn.linear_model import LogisticRegression as LR
from quapy.method.aggregative import *
import quapy.functional as F
from data import *
import os
import constants
# LeQua official baselines for task T1B (Multiclass/Vector)
# =========================================================
def baselines():
yield CC(LR(n_jobs=-1)), "CC"
yield ACC(LR(n_jobs=-1)), "ACC"
yield PCC(LR(n_jobs=-1)), "PCC"
yield PACC(LR(n_jobs=-1)), "PACC"
yield EMQ(CalibratedClassifierCV(LR(), n_jobs=-1)), "SLD"
yield HDy(LR(n_jobs=-1)) if args.task == 'T1A' else OneVsAll(HDy(LR()), n_jobs=-1), "HDy"
def main(args):
models_path = qp.util.create_if_not_exist(os.path.join(args.modeldir, args.task))
path_dev_vectors = os.path.join(args.datadir, 'dev_vectors')
path_dev_prevs = os.path.join(args.datadir, 'dev_prevalences.csv')
path_train = os.path.join(args.datadir, 'training_vectors.txt')
qp.environ['SAMPLE_SIZE'] = constants.SAMPLE_SIZE[args.task]
train = LabelledCollection.load(path_train, load_binary_vectors)
nF = train.instances.shape[1]
print(f'number of classes: {len(train.classes_)}')
print(f'number of training documents: {len(train)}')
print(f'training prevalence: {F.strprev(train.prevalence())}')
print(f'training matrix shape: {train.instances.shape}')
param_grid = {
'C': np.logspace(-3,3,7),
'class_weight': ['balanced', None]
}
def gen_samples():
return gen_load_samples_T1(path_dev_vectors, nF, ground_truth_path=path_dev_prevs, return_id=False)
for quantifier, q_name in baselines():
print(f'{q_name}: Model selection')
quantifier = qp.model_selection.GridSearchQ(
quantifier,
param_grid,
sample_size=None,
protocol='gen',
error=qp.error.mae,
refit=False,
verbose=True
).fit(train, gen_samples)
print(f'{q_name} got MAE={quantifier.best_score_:.3f} (hyper-params: {quantifier.best_params_})')
model_path = os.path.join(models_path, q_name+'.pkl')
print(f'saving model in {model_path}')
pickle.dump(quantifier.best_model(), open(model_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='LeQua2022 Task T1A/T1B baselines')
parser.add_argument('task', metavar='TASK', type=str, choices=['T1A', 'T1B'],
help='Task name (T1A, T1B)')
parser.add_argument('datadir', metavar='DATA-PATH', type=str,
help='Path of the directory containing "dev_prevalences.csv", "training_vectors.txt", and '
'the directory "dev_vectors"')
parser.add_argument('modeldir', metavar='MODEL-PATH', type=str,
help='Path where to save the models. '
'A subdirectory named <task> will be automatically created.')
args = parser.parse_args()
if not os.path.exists(args.datadir):
raise FileNotFoundError(f'path {args.datadir} does not exist')
if not os.path.isdir(args.datadir):
raise ValueError(f'path {args.datadir} is not a valid directory')
if not os.path.exists(os.path.join(args.datadir, "dev_prevalences.csv")):
raise FileNotFoundError(f'path {args.datadir} does not contain "dev_prevalences.csv" file')
if not os.path.exists(os.path.join(args.datadir, "training_vectors.txt")):
raise FileNotFoundError(f'path {args.datadir} does not contain "training_vectors.txt" file')
if not os.path.exists(os.path.join(args.datadir, "dev_vectors")):
raise FileNotFoundError(f'path {args.datadir} does not contain "dev_vectors" folder')
main(args)

View File

@ -1,71 +0,0 @@
import pickle
import numpy as np
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm
import pandas as pd
import quapy as qp
from quapy.data import LabelledCollection
from quapy.method.aggregative import *
import quapy.functional as F
from data import *
import os
import constants
# LeQua official baselines for task T1A (Binary/Vector)
# =====================================================
predictions_path = os.path.join('predictions', 'T1A')
os.makedirs(predictions_path, exist_ok=True)
models_path = os.path.join('models', 'T1A')
os.makedirs(models_path, exist_ok=True)
pathT1A = './data/T1A/public'
T1A_devvectors_path = os.path.join(pathT1A, 'dev_vectors')
T1A_devprevalence_path = os.path.join(pathT1A, 'dev_prevalences.csv')
T1A_trainpath = os.path.join(pathT1A, 'training_vectors.txt')
train = LabelledCollection.load(T1A_trainpath, load_binary_vectors)
nF = train.instances.shape[1]
qp.environ['SAMPLE_SIZE'] = constants.T1A_SAMPLE_SIZE
print(f'number of classes: {len(train.classes_)}')
print(f'number of training documents: {len(train)}')
print(f'training prevalence: {F.strprev(train.prevalence())}')
print(f'training matrix shape: {train.instances.shape}')
true_prevalence = ResultSubmission.load(T1A_devprevalence_path)
for quantifier in [CC, ACC, PCC, PACC, EMQ, HDy]:
# classifier = CalibratedClassifierCV(LogisticRegression(C=1))
classifier = LogisticRegression(C=1)
model = quantifier(classifier).fit(train)
quantifier_name = model.__class__.__name__
predictions = ResultSubmission(categories=['negative', 'positive'])
for samplename, sample in tqdm(gen_load_samples_T1(T1A_devvectors_path, nF),
desc=quantifier_name, total=len(true_prevalence)):
predictions.add(samplename, model.quantify(sample))
predictions.dump(os.path.join(predictions_path, quantifier_name + '.csv'))
pickle.dump(model, open(os.path.join(models_path, quantifier_name+'.pkl'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
mae, mrae = evaluate_submission(true_prevalence, predictions)
print(f'{quantifier_name} mae={mae:.3f} mrae={mrae:.3f}')
"""
validation
CC 0.1862 1.9587
ACC 0.0394 0.2669
PCC 0.1789 2.1383
PACC 0.0354 0.1587
EMQ 0.0224 0.0960
HDy 0.0467 0.2121
"""

View File

@ -1,81 +0,0 @@
import pickle
from sklearn.linear_model import LogisticRegression
from quapy.method.aggregative import *
import quapy.functional as F
from data import *
import os
import constants
# LeQua official baselines for task T1A (Binary/Vector)
# =====================================================
predictions_path = os.path.join('predictions', 'T1A')
os.makedirs(predictions_path, exist_ok=True)
models_path = os.path.join('models', 'T1A')
os.makedirs(models_path, exist_ok=True)
pathT1A = './data/T1A/public'
T1A_devvectors_path = os.path.join(pathT1A, 'dev_vectors')
T1A_devprevalence_path = os.path.join(pathT1A, 'dev_prevalences.csv')
T1A_trainpath = os.path.join(pathT1A, 'training_vectors.txt')
train = LabelledCollection.load(T1A_trainpath, load_binary_vectors)
nF = train.instances.shape[1]
qp.environ['SAMPLE_SIZE'] = constants.T1A_SAMPLE_SIZE
print(f'number of classes: {len(train.classes_)}')
print(f'number of training documents: {len(train)}')
print(f'training prevalence: {F.strprev(train.prevalence())}')
print(f'training matrix shape: {train.instances.shape}')
true_prevalence = ResultSubmission.load(T1A_devprevalence_path)
param_grid = {
'C': np.logspace(-3,3,7),
'class_weight': ['balanced', None]
}
def gen_samples():
return gen_load_samples_T1(T1A_devvectors_path, nF, ground_truth_path=T1A_devprevalence_path, return_id=False)
for quantifier in [EMQ]: # [CC, ACC, PCC, PACC, EMQ, HDy]:
if quantifier == EMQ:
classifier = CalibratedClassifierCV(LogisticRegression(), n_jobs=-1)
else:
classifier = LogisticRegression()
model = quantifier(classifier)
print(f'{model.__class__.__name__}: Model selection')
model = qp.model_selection.GridSearchQ(
model,
param_grid,
sample_size=None,
protocol='gen',
error=qp.error.mae,
refit=False,
verbose=True
).fit(train, gen_samples)
quantifier_name = model.best_model().__class__.__name__
print(f'{quantifier_name} mae={model.best_score_:.3f} (params: {model.best_params_})')
pickle.dump(model.best_model(),
open(os.path.join(models_path, quantifier_name+'.pkl'), 'wb'),
protocol=pickle.HIGHEST_PROTOCOL)
"""
validation
CC 0.1862 1.9587
ACC 0.0394 0.2669
PCC 0.1789 2.1383
PACC 0.0354 0.1587
EMQ 0.0224 0.0960
HDy 0.0467 0.2121
"""

View File

@ -1,55 +0,0 @@
import pickle
import numpy as np
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm
import pandas as pd
import quapy as qp
from quapy.data import LabelledCollection
from quapy.method.aggregative import *
import quapy.functional as F
from data import *
import os
import constants
predictions_path = os.path.join('predictions', 'T1B') # multiclass - vector
os.makedirs(predictions_path, exist_ok=True)
pathT1B = './data/T1B/public'
T1B_devvectors_path = os.path.join(pathT1B, 'dev_vectors')
T1B_devprevalence_path = os.path.join(pathT1B, 'dev_prevalences.csv')
T1B_trainpath = os.path.join(pathT1B, 'training_vectors.txt')
T1B_catmap = os.path.join(pathT1B, 'training_vectors_label_map.txt')
train = LabelledCollection.load(T1B_trainpath, load_binary_vectors)
nF = train.instances.shape[1]
qp.environ['SAMPLE_SIZE'] = constants.T1B_SAMPLE_SIZE
print(f'number of classes: {len(train.classes_)}')
print(f'number of training documents: {len(train)}')
print(f'training prevalence: {F.strprev(train.prevalence())}')
print(f'training matrix shape: {train.instances.shape}')
true_prevalence = ResultSubmission.load(T1B_devprevalence_path)
cat2code, categories = load_category_map(T1B_catmap)
for quantifier in [PACC]: # [CC, ACC, PCC, PACC, EMQ]:
classifier = CalibratedClassifierCV(LogisticRegression())
model = quantifier(classifier).fit(train)
quantifier_name = model.__class__.__name__
predictions = ResultSubmission(categories=categories)
for samplename, sample in tqdm(gen_load_samples_T1(T1B_devvectors_path, nF),
desc=quantifier_name, total=len(true_prevalence)):
predictions.add(samplename, model.quantify(sample))
predictions.dump(os.path.join(predictions_path, quantifier_name + '.csv'))
mae, mrae = evaluate_submission(true_prevalence, predictions)
print(f'{quantifier_name} mae={mae:.3f} mrae={mrae:.3f}')

View File

@ -9,4 +9,11 @@ T1B_SAMPLE_SIZE = 1000
T2A_SAMPLE_SIZE = 250 T2A_SAMPLE_SIZE = 250
T2B_SAMPLE_SIZE = 1000 T2B_SAMPLE_SIZE = 1000
SAMPLE_SIZE={
'T1A': T1A_SAMPLE_SIZE,
'T1B': T1B_SAMPLE_SIZE,
'T2A': T2A_SAMPLE_SIZE,
'T2A': T2B_SAMPLE_SIZE
}
ERROR_TOL = 1E-3 ERROR_TOL = 1E-3

View File

@ -34,27 +34,23 @@ def load_category_map(path):
def load_binary_vectors(path, nF=None): def load_binary_vectors(path, nF=None):
return sklearn.datasets.load_svmlight_file(path, n_features=nF) X, y = sklearn.datasets.load_svmlight_file(path, n_features=nF)
y = y.astype(int)
return X, y
def __gen_load_samples_with_groudtruth(path_dir:str, return_id:bool, ground_truth_path:str, load_fn, **load_kwargs): def __gen_load_samples_with_groudtruth(path_dir:str, return_id:bool, ground_truth_path:str, load_fn, **load_kwargs):
true_prevs = ResultSubmission.load(ground_truth_path) true_prevs = ResultSubmission.load(ground_truth_path)
for id, prevalence in true_prevs.iterrows(): for id, prevalence in true_prevs.iterrows():
sample, _ = load_fn(os.path.join(path_dir, f'{id}.txt'), **load_kwargs) sample, _ = load_fn(os.path.join(path_dir, f'{id}.txt'), **load_kwargs)
if return_id: yield (id, sample, prevalence) if return_id else (sample, prevalence)
yield id, sample, prevalence
else:
yield sample, prevalence
def __gen_load_samples_without_groudtruth(path_dir:str, return_id:bool, load_fn, **load_kwargs): def __gen_load_samples_without_groudtruth(path_dir:str, return_id:bool, load_fn, **load_kwargs):
nsamples = len(glob(os.path.join(path_dir, '*.txt'))) nsamples = len(glob(os.path.join(path_dir, '*.txt')))
for id in range(nsamples): for id in range(nsamples):
sample, _ = load_fn(os.path.join(path_dir, f'{id}.txt'), **load_kwargs) sample, _ = load_fn(os.path.join(path_dir, f'{id}.txt'), **load_kwargs)
if return_id: yield (id, sample) if return_id else sample
yield id, sample
else:
yield sample
def gen_load_samples_T1(path_dir:str, nF:int, ground_truth_path:str = None, return_id=True): def gen_load_samples_T1(path_dir:str, nF:int, ground_truth_path:str = None, return_id=True):
@ -68,6 +64,17 @@ def gen_load_samples_T1(path_dir:str, nF:int, ground_truth_path:str = None, retu
yield r yield r
def genSVD_load_samples_T1(load_fn, path_dir:str, nF:int, ground_truth_path:str = None, return_id=True):
if ground_truth_path is None:
# the generator function returns tuples (filename:str, sample:csr_matrix)
gen_fn = __gen_load_samples_without_groudtruth(path_dir, return_id, load_fn, nF=nF)
else:
# the generator function returns tuples (filename:str, sample:csr_matrix, prevalence:ndarray)
gen_fn = __gen_load_samples_with_groudtruth(path_dir, return_id, ground_truth_path, load_fn, nF=nF)
for r in gen_fn:
yield r
def gen_load_samples_T2A(path_dir:str, ground_truth_path:str = None): def gen_load_samples_T2A(path_dir:str, ground_truth_path:str = None):
# for ... : yield # for ... : yield
pass pass

View File

@ -78,7 +78,7 @@ Features
Methods Methods
Model Selection Model Selection
Plotting Plotting
API Developer documentation<modules> API Developers documentation<modules>

View File

@ -230,8 +230,6 @@
<li><a href="quapy.method.html#quapy.method.aggregative.ThresholdOptimization.compute_table">compute_table() (quapy.method.aggregative.ThresholdOptimization method)</a> <li><a href="quapy.method.html#quapy.method.aggregative.ThresholdOptimization.compute_table">compute_table() (quapy.method.aggregative.ThresholdOptimization method)</a>
</li> </li>
<li><a href="quapy.method.html#quapy.method.aggregative.ThresholdOptimization.compute_tpr">compute_tpr() (quapy.method.aggregative.ThresholdOptimization method)</a> <li><a href="quapy.method.html#quapy.method.aggregative.ThresholdOptimization.compute_tpr">compute_tpr() (quapy.method.aggregative.ThresholdOptimization method)</a>
</li>
<li><a href="quapy.classification.html#quapy.classification.neural.CNNnet.conv_block">conv_block() (quapy.classification.neural.CNNnet method)</a>
</li> </li>
<li><a href="quapy.data.html#quapy.data.base.LabelledCollection.counts">counts() (quapy.data.base.LabelledCollection method)</a> <li><a href="quapy.data.html#quapy.data.base.LabelledCollection.counts">counts() (quapy.data.base.LabelledCollection method)</a>
</li> </li>
@ -337,7 +335,7 @@
</li> </li>
<li><a href="quapy.data.html#quapy.data.datasets.fetch_UCILabelledCollection">fetch_UCILabelledCollection() (in module quapy.data.datasets)</a> <li><a href="quapy.data.html#quapy.data.datasets.fetch_UCILabelledCollection">fetch_UCILabelledCollection() (in module quapy.data.datasets)</a>
</li> </li>
<li><a href="quapy.classification.html#quapy.classification.methods.PCALR.fit">fit() (quapy.classification.methods.PCALR method)</a> <li><a href="quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.fit">fit() (quapy.classification.methods.LowRankLogisticRegression method)</a>
<ul> <ul>
<li><a href="quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.fit">(quapy.classification.neural.NeuralClassifierTrainer method)</a> <li><a href="quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.fit">(quapy.classification.neural.NeuralClassifierTrainer method)</a>
@ -407,7 +405,7 @@
</li> </li>
<li><a href="quapy.html#quapy.functional.get_nprevpoints_approximation">get_nprevpoints_approximation() (in module quapy.functional)</a> <li><a href="quapy.html#quapy.functional.get_nprevpoints_approximation">get_nprevpoints_approximation() (in module quapy.functional)</a>
</li> </li>
<li><a href="quapy.classification.html#quapy.classification.methods.PCALR.get_params">get_params() (quapy.classification.methods.PCALR method)</a> <li><a href="quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.get_params">get_params() (quapy.classification.methods.LowRankLogisticRegression method)</a>
<ul> <ul>
<li><a href="quapy.classification.html#quapy.classification.neural.CNNnet.get_params">(quapy.classification.neural.CNNnet method)</a> <li><a href="quapy.classification.html#quapy.classification.neural.CNNnet.get_params">(quapy.classification.neural.CNNnet method)</a>
@ -469,12 +467,8 @@
</ul></li> </ul></li>
<li><a href="quapy.data.html#quapy.data.preprocessing.IndexTransformer">IndexTransformer (class in quapy.data.preprocessing)</a> <li><a href="quapy.data.html#quapy.data.preprocessing.IndexTransformer">IndexTransformer (class in quapy.data.preprocessing)</a>
</li> </li>
<li><a href="quapy.classification.html#quapy.classification.neural.LSTMnet.init_hidden">init_hidden() (quapy.classification.neural.LSTMnet method)</a> <li><a href="quapy.method.html#quapy.method.neural.QuaNetModule.init_hidden">init_hidden() (quapy.method.neural.QuaNetModule method)</a>
<ul>
<li><a href="quapy.method.html#quapy.method.neural.QuaNetModule.init_hidden">(quapy.method.neural.QuaNetModule method)</a>
</li> </li>
</ul></li>
</ul></td> </ul></td>
<td style="width: 33%; vertical-align: top;"><ul> <td style="width: 33%; vertical-align: top;"><ul>
<li><a href="quapy.method.html#quapy.method.base.isaggregative">isaggregative() (in module quapy.method.base)</a> <li><a href="quapy.method.html#quapy.method.base.isaggregative">isaggregative() (in module quapy.method.base)</a>
@ -515,14 +509,16 @@
</li> </li>
<li><a href="quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.learner">learner (quapy.method.aggregative.AggregativeQuantifier property)</a> <li><a href="quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.learner">learner (quapy.method.aggregative.AggregativeQuantifier property)</a>
</li> </li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="quapy.data.html#quapy.data.base.Dataset.load">load() (quapy.data.base.Dataset class method)</a> <li><a href="quapy.data.html#quapy.data.base.Dataset.load">load() (quapy.data.base.Dataset class method)</a>
<ul> <ul>
<li><a href="quapy.data.html#quapy.data.base.LabelledCollection.load">(quapy.data.base.LabelledCollection class method)</a> <li><a href="quapy.data.html#quapy.data.base.LabelledCollection.load">(quapy.data.base.LabelledCollection class method)</a>
</li> </li>
</ul></li> </ul></li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression">LowRankLogisticRegression (class in quapy.classification.methods)</a>
</li>
<li><a href="quapy.classification.html#quapy.classification.neural.LSTMnet">LSTMnet (class in quapy.classification.neural)</a> <li><a href="quapy.classification.html#quapy.classification.neural.LSTMnet">LSTMnet (class in quapy.classification.neural)</a>
</li> </li>
</ul></td> </ul></td>
@ -673,8 +669,6 @@
<li><a href="quapy.method.html#quapy.method.aggregative.PACC">PACC (class in quapy.method.aggregative)</a> <li><a href="quapy.method.html#quapy.method.aggregative.PACC">PACC (class in quapy.method.aggregative)</a>
</li> </li>
<li><a href="quapy.html#quapy.util.parallel">parallel() (in module quapy.util)</a> <li><a href="quapy.html#quapy.util.parallel">parallel() (in module quapy.util)</a>
</li>
<li><a href="quapy.classification.html#quapy.classification.methods.PCALR">PCALR (class in quapy.classification.methods)</a>
</li> </li>
<li><a href="quapy.method.html#quapy.method.aggregative.PCC">PCC (class in quapy.method.aggregative)</a> <li><a href="quapy.method.html#quapy.method.aggregative.PCC">PCC (class in quapy.method.aggregative)</a>
</li> </li>
@ -686,7 +680,7 @@
<li><a href="quapy.method.html#quapy.method.aggregative.OneVsAll.posterior_probabilities">(quapy.method.aggregative.OneVsAll method)</a> <li><a href="quapy.method.html#quapy.method.aggregative.OneVsAll.posterior_probabilities">(quapy.method.aggregative.OneVsAll method)</a>
</li> </li>
</ul></li> </ul></li>
<li><a href="quapy.classification.html#quapy.classification.methods.PCALR.predict">predict() (quapy.classification.methods.PCALR method)</a> <li><a href="quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.predict">predict() (quapy.classification.methods.LowRankLogisticRegression method)</a>
<ul> <ul>
<li><a href="quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.predict">(quapy.classification.neural.NeuralClassifierTrainer method)</a> <li><a href="quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.predict">(quapy.classification.neural.NeuralClassifierTrainer method)</a>
@ -694,7 +688,7 @@
<li><a href="quapy.classification.html#quapy.classification.svmperf.SVMperf.predict">(quapy.classification.svmperf.SVMperf method)</a> <li><a href="quapy.classification.html#quapy.classification.svmperf.SVMperf.predict">(quapy.classification.svmperf.SVMperf method)</a>
</li> </li>
</ul></li> </ul></li>
<li><a href="quapy.classification.html#quapy.classification.methods.PCALR.predict_proba">predict_proba() (quapy.classification.methods.PCALR method)</a> <li><a href="quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.predict_proba">predict_proba() (quapy.classification.methods.LowRankLogisticRegression method)</a>
<ul> <ul>
<li><a href="quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.predict_proba">(quapy.classification.neural.NeuralClassifierTrainer method)</a> <li><a href="quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.predict_proba">(quapy.classification.neural.NeuralClassifierTrainer method)</a>
@ -952,7 +946,7 @@
</li> </li>
<li><a href="quapy.html#quapy.error.se">se() (in module quapy.error)</a> <li><a href="quapy.html#quapy.error.se">se() (in module quapy.error)</a>
</li> </li>
<li><a href="quapy.classification.html#quapy.classification.methods.PCALR.set_params">set_params() (quapy.classification.methods.PCALR method)</a> <li><a href="quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.set_params">set_params() (quapy.classification.methods.LowRankLogisticRegression method)</a>
<ul> <ul>
<li><a href="quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.set_params">(quapy.classification.neural.NeuralClassifierTrainer method)</a> <li><a href="quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.set_params">(quapy.classification.neural.NeuralClassifierTrainer method)</a>
@ -1032,7 +1026,7 @@
</li> </li>
<li><a href="quapy.method.html#quapy.method.aggregative.training_helper">training_helper() (in module quapy.method.aggregative)</a> <li><a href="quapy.method.html#quapy.method.aggregative.training_helper">training_helper() (in module quapy.method.aggregative)</a>
</li> </li>
<li><a href="quapy.classification.html#quapy.classification.methods.PCALR.transform">transform() (quapy.classification.methods.PCALR method)</a> <li><a href="quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.transform">transform() (quapy.classification.methods.LowRankLogisticRegression method)</a>
<ul> <ul>
<li><a href="quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.transform">(quapy.classification.neural.NeuralClassifierTrainer method)</a> <li><a href="quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.transform">(quapy.classification.neural.NeuralClassifierTrainer method)</a>

View File

@ -138,7 +138,7 @@ See the <a class="reference internal" href="Evaluation.html"><span class="doc">E
<li class="toctree-l2"><a class="reference internal" href="Plotting.html#error-by-drift">Error by Drift</a></li> <li class="toctree-l2"><a class="reference internal" href="Plotting.html#error-by-drift">Error by Drift</a></li>
</ul> </ul>
</li> </li>
<li class="toctree-l1"><a class="reference internal" href="modules.html">API Developer documentation</a><ul> <li class="toctree-l1"><a class="reference internal" href="modules.html">API Developers documentation</a><ul>
<li class="toctree-l2"><a class="reference internal" href="quapy.html">quapy package</a></li> <li class="toctree-l2"><a class="reference internal" href="quapy.html">quapy package</a></li>
</ul> </ul>
</li> </li>

Binary file not shown.

View File

@ -59,69 +59,108 @@
<div class="section" id="module-quapy.classification.methods"> <div class="section" id="module-quapy.classification.methods">
<span id="quapy-classification-methods-module"></span><h2>quapy.classification.methods module<a class="headerlink" href="#module-quapy.classification.methods" title="Permalink to this headline"></a></h2> <span id="quapy-classification-methods-module"></span><h2>quapy.classification.methods module<a class="headerlink" href="#module-quapy.classification.methods" title="Permalink to this headline"></a></h2>
<dl class="py class"> <dl class="py class">
<dt class="sig sig-object py" id="quapy.classification.methods.PCALR"> <dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression">
<em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.methods.</span></span><span class="sig-name descname"><span class="pre">PCALR</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n_components</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.PCALR" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.methods.</span></span><span class="sig-name descname"><span class="pre">LowRankLogisticRegression</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n_components</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sklearn.base.BaseEstimator</span></code></p> <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sklearn.base.BaseEstimator</span></code></p>
<p>An example of a classification method that also generates embedded inputs, as those required for QuaNet. <p>An example of a classification method (i.e., an object that implements <cite>fit</cite>, <cite>predict</cite>, and <cite>predict_proba</cite>)
This example simply combines a Principal Component Analysis (PCA) with Logistic Regression (LR).</p> that also generates embedded inputs (i.e., that implements <cite>transform</cite>), as those required for
<dl class="py method"> <code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.method.neural.QuaNet</span></code>. This is a mock method to allow for easily instantiating
<dt class="sig sig-object py" id="quapy.classification.methods.PCALR.fit"> <code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.method.neural.QuaNet</span></code> on array-like real-valued instances.
<span class="sig-name descname"><span class="pre">fit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.PCALR.fit" title="Permalink to this definition"></a></dt> The transformation consists of applying <code class="xref py py-class docutils literal notranslate"><span class="pre">sklearn.decomposition.TruncatedSVD</span></code>
<dd></dd></dl> while classification is performed using <code class="xref py py-class docutils literal notranslate"><span class="pre">sklearn.linear_model.LogisticRegression</span></code> on the low-rank space</p>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.methods.PCALR.get_params">
<span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.PCALR.get_params" title="Permalink to this definition"></a></dt>
<dd><p>Get parameters for this estimator.</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters</dt> <dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>deep</strong> (<em>bool</em><em>, </em><em>default=True</em>) If True, will return the parameters for this estimator and <dd class="field-odd"><ul class="simple">
contained subobjects that are estimators.</p> <li><p><strong>n_components</strong> the number of principal components to retain</p></li>
<li><p><strong>kwargs</strong> parameters for the
<a class="reference external" href="https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html">Logistic Regression</a> classifier</p></li>
</ul>
</dd>
</dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.fit">
<span class="sig-name descname"><span class="pre">fit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.fit" title="Permalink to this definition"></a></dt>
<dd><p>Fit the model according to the given training data. The fit consists of
fitting TruncatedSVD and Logistic Regression.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>X</strong> array-like of shape <cite>(n_samples, n_features)</cite> with the instances</p></li>
<li><p><strong>y</strong> array-like of shape <cite>(n_samples, n_classes)</cite> with the class labels</p></li>
</ul>
</dd> </dd>
<dt class="field-even">Returns</dt> <dt class="field-even">Returns</dt>
<dd class="field-even"><p><strong>params</strong> Parameter names mapped to their values.</p> <dd class="field-even"><p><cite>self</cite></p>
</dd>
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p>dict</p>
</dd> </dd>
</dl> </dl>
</dd></dl> </dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.methods.PCALR.predict"> <dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.get_params">
<span class="sig-name descname"><span class="pre">predict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.PCALR.predict" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.get_params" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Get hyper-parameters for this estimator</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>a dictionary with parameter names mapped to their values</p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.methods.PCALR.predict_proba"> <dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.predict">
<span class="sig-name descname"><span class="pre">predict_proba</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.PCALR.predict_proba" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">predict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.predict" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Predicts labels for the instances <cite>X</cite></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>X</strong> array-like of shape <cite>(n_samples, n_features)</cite> instances to classify</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>a <cite>numpy</cite> array of length <cite>n</cite> containing the label predictions, where <cite>n</cite> is the number of
instances in <cite>X</cite></p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.methods.PCALR.set_params"> <dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.predict_proba">
<span class="sig-name descname"><span class="pre">set_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">params</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.PCALR.set_params" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">predict_proba</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.predict_proba" title="Permalink to this definition"></a></dt>
<dd><p>Predicts posterior probabilities for the instances <cite>X</cite></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>X</strong> array-like of shape <cite>(n_samples, n_features)</cite> instances to classify</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>array-like of shape <cite>(n_samples, n_classes)</cite> with the posterior probabilities</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.set_params">
<span class="sig-name descname"><span class="pre">set_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">params</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.set_params" title="Permalink to this definition"></a></dt>
<dd><p>Set the parameters of this estimator.</p> <dd><p>Set the parameters of this estimator.</p>
<p>The method works on simple estimators as well as on nested objects
(such as <code class="xref py py-class docutils literal notranslate"><span class="pre">Pipeline</span></code>). The latter have
parameters of the form <code class="docutils literal notranslate"><span class="pre">&lt;component&gt;__&lt;parameter&gt;</span></code> so that its
possible to update each component of a nested object.</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters</dt> <dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>**params</strong> (<em>dict</em>) Estimator parameters.</p> <dd class="field-odd"><p><strong>parameters</strong> a <cite>**kwargs</cite> dictionary with the estimator parameters for
</dd> <a class="reference external" href="https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html">Logistic Regression</a>
<dt class="field-even">Returns</dt> and eventually also <cite>n_components</cite> for PCA</p>
<dd class="field-even"><p><strong>self</strong> Estimator instance.</p>
</dd>
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p>estimator instance</p>
</dd> </dd>
</dl> </dl>
</dd></dl> </dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.methods.PCALR.transform"> <dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.transform">
<span class="sig-name descname"><span class="pre">transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.PCALR.transform" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.transform" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Returns the low-rank approximation of X with <cite>n_components</cite> dimensions</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>X</strong> array-like of shape <cite>(n_samples, n_features)</cite> instances to embed</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>array-like of shape <cite>(n_samples, n_components)</cite> with the embedded instances</p>
</dd>
</dl>
</dd></dl>
</dd></dl> </dd></dl>
@ -132,25 +171,63 @@ possible to update each component of a nested object.</p>
<dt class="sig sig-object py" id="quapy.classification.neural.CNNnet"> <dt class="sig sig-object py" id="quapy.classification.neural.CNNnet">
<em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">CNNnet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">vocabulary_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_classes</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">embedding_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">256</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repr_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kernel_heights</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">[3,</span> <span class="pre">5,</span> <span class="pre">7]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stride</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">padding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">drop_p</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.5</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.CNNnet" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">CNNnet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">vocabulary_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_classes</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">embedding_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">256</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repr_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kernel_heights</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">[3,</span> <span class="pre">5,</span> <span class="pre">7]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stride</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">padding</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">drop_p</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.5</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.CNNnet" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.classification.neural.TextClassifierNet</span></code></a></p> <dd><p>Bases: <a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.classification.neural.TextClassifierNet</span></code></a></p>
<dl class="py method"> <p>An implementation of <a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.classification.neural.TextClassifierNet</span></code></a> based on
<dt class="sig sig-object py" id="quapy.classification.neural.CNNnet.conv_block"> Convolutional Neural Networks.</p>
<span class="sig-name descname"><span class="pre">conv_block</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">conv_layer</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.CNNnet.conv_block" title="Permalink to this definition"></a></dt> <dl class="field-list simple">
<dd></dd></dl> <dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>vocabulary_size</strong> the size of the vocabulary</p></li>
<li><p><strong>n_classes</strong> number of target classes</p></li>
<li><p><strong>embedding_size</strong> the dimensionality of the word embeddings space (default 100)</p></li>
<li><p><strong>hidden_size</strong> the dimensionality of the hidden space (default 256)</p></li>
<li><p><strong>repr_size</strong> the dimensionality of the document embeddings space (default 100)</p></li>
<li><p><strong>kernel_heights</strong> list of kernel lengths (default [3,5,7]), i.e., the number of
consecutive tokens that each kernel covers</p></li>
<li><p><strong>stride</strong> convolutional stride (default 1)</p></li>
<li><p><strong>stride</strong> convolutional pad (default 0)</p></li>
<li><p><strong>drop_p</strong> drop probability for dropout (default 0.5)</p></li>
</ul>
</dd>
</dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.CNNnet.document_embedding"> <dt class="sig sig-object py" id="quapy.classification.neural.CNNnet.document_embedding">
<span class="sig-name descname"><span class="pre">document_embedding</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.CNNnet.document_embedding" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">document_embedding</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.CNNnet.document_embedding" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Embeds documents (i.e., performs the forward pass up to the
next-to-last layer).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>input</strong> a batch of instances, typically generated by a torchs <cite>DataLoader</cite>
instance (see <a class="reference internal" href="#quapy.classification.neural.TorchDataset" title="quapy.classification.neural.TorchDataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.classification.neural.TorchDataset</span></code></a>)</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>a torch tensor of shape <cite>(n_samples, n_dimensions)</cite>, where
<cite>n_samples</cite> is the number of documents, and <cite>n_dimensions</cite> is the
dimensionality of the embedding</p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.CNNnet.get_params"> <dt class="sig sig-object py" id="quapy.classification.neural.CNNnet.get_params">
<span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.CNNnet.get_params" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.CNNnet.get_params" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Get hyper-parameters for this estimator</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>a dictionary with parameter names mapped to their values</p>
</dd>
</dl>
</dd></dl>
<dl class="py property"> <dl class="py property">
<dt class="sig sig-object py" id="quapy.classification.neural.CNNnet.vocabulary_size"> <dt class="sig sig-object py" id="quapy.classification.neural.CNNnet.vocabulary_size">
<em class="property"><span class="pre">property</span> </em><span class="sig-name descname"><span class="pre">vocabulary_size</span></span><a class="headerlink" href="#quapy.classification.neural.CNNnet.vocabulary_size" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">property</span> </em><span class="sig-name descname"><span class="pre">vocabulary_size</span></span><a class="headerlink" href="#quapy.classification.neural.CNNnet.vocabulary_size" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Return the size of the vocabulary</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>integer</p>
</dd>
</dl>
</dd></dl>
</dd></dl> </dd></dl>
@ -158,25 +235,60 @@ possible to update each component of a nested object.</p>
<dt class="sig sig-object py" id="quapy.classification.neural.LSTMnet"> <dt class="sig sig-object py" id="quapy.classification.neural.LSTMnet">
<em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">LSTMnet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">vocabulary_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_classes</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">embedding_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">256</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repr_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lstm_class_nlayers</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">drop_p</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.5</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.LSTMnet" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">LSTMnet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">vocabulary_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_classes</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">embedding_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">256</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repr_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lstm_class_nlayers</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">drop_p</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.5</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.LSTMnet" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.classification.neural.TextClassifierNet</span></code></a></p> <dd><p>Bases: <a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.classification.neural.TextClassifierNet</span></code></a></p>
<p>An implementation of <a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.classification.neural.TextClassifierNet</span></code></a> based on
Long Short Term Memory networks.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>vocabulary_size</strong> the size of the vocabulary</p></li>
<li><p><strong>n_classes</strong> number of target classes</p></li>
<li><p><strong>embedding_size</strong> the dimensionality of the word embeddings space (default 100)</p></li>
<li><p><strong>hidden_size</strong> the dimensionality of the hidden space (default 256)</p></li>
<li><p><strong>repr_size</strong> the dimensionality of the document embeddings space (default 100)</p></li>
<li><p><strong>lstm_class_nlayers</strong> number of LSTM layers (default 1)</p></li>
<li><p><strong>drop_p</strong> drop probability for dropout (default 0.5)</p></li>
</ul>
</dd>
</dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.LSTMnet.document_embedding"> <dt class="sig sig-object py" id="quapy.classification.neural.LSTMnet.document_embedding">
<span class="sig-name descname"><span class="pre">document_embedding</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.LSTMnet.document_embedding" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">document_embedding</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.LSTMnet.document_embedding" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Embeds documents (i.e., performs the forward pass up to the
next-to-last layer).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>x</strong> a batch of instances, typically generated by a torchs <cite>DataLoader</cite>
instance (see <a class="reference internal" href="#quapy.classification.neural.TorchDataset" title="quapy.classification.neural.TorchDataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.classification.neural.TorchDataset</span></code></a>)</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>a torch tensor of shape <cite>(n_samples, n_dimensions)</cite>, where
<cite>n_samples</cite> is the number of documents, and <cite>n_dimensions</cite> is the
dimensionality of the embedding</p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.LSTMnet.get_params"> <dt class="sig sig-object py" id="quapy.classification.neural.LSTMnet.get_params">
<span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.LSTMnet.get_params" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.LSTMnet.get_params" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Get hyper-parameters for this estimator</p>
<dl class="field-list simple">
<dl class="py method"> <dt class="field-odd">Returns</dt>
<dt class="sig sig-object py" id="quapy.classification.neural.LSTMnet.init_hidden"> <dd class="field-odd"><p>a dictionary with parameter names mapped to their values</p>
<span class="sig-name descname"><span class="pre">init_hidden</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">set_size</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.LSTMnet.init_hidden" title="Permalink to this definition"></a></dt> </dd>
<dd></dd></dl> </dl>
</dd></dl>
<dl class="py property"> <dl class="py property">
<dt class="sig sig-object py" id="quapy.classification.neural.LSTMnet.vocabulary_size"> <dt class="sig sig-object py" id="quapy.classification.neural.LSTMnet.vocabulary_size">
<em class="property"><span class="pre">property</span> </em><span class="sig-name descname"><span class="pre">vocabulary_size</span></span><a class="headerlink" href="#quapy.classification.neural.LSTMnet.vocabulary_size" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">property</span> </em><span class="sig-name descname"><span class="pre">vocabulary_size</span></span><a class="headerlink" href="#quapy.classification.neural.LSTMnet.vocabulary_size" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Return the size of the vocabulary</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>integer</p>
</dd>
</dl>
</dd></dl>
</dd></dl> </dd></dl>
@ -184,45 +296,135 @@ possible to update each component of a nested object.</p>
<dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer"> <dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer">
<em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">NeuralClassifierTrainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">net</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><span class="pre">quapy.classification.neural.TextClassifierNet</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">lr</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.001</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">weight_decay</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">patience</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">epochs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">200</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size_test</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">512</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">padding_length</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">300</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'cpu'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">checkpointpath</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'../checkpoint/classifier_net.dat'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">NeuralClassifierTrainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">net</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="#quapy.classification.neural.TextClassifierNet" title="quapy.classification.neural.TextClassifierNet"><span class="pre">quapy.classification.neural.TextClassifierNet</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">lr</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.001</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">weight_decay</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">patience</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">epochs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">200</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size_test</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">512</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">padding_length</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">300</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'cpu'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">checkpointpath</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'../checkpoint/classifier_net.dat'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p> <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>Trains a neural network for text classification.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>net</strong> an instance of <cite>TextClassifierNet</cite> implementing the forward pass</p></li>
<li><p><strong>lr</strong> learning rate (default 1e-3)</p></li>
<li><p><strong>weight_decay</strong> weight decay (default 0)</p></li>
<li><p><strong>patience</strong> number of epochs that do not show any improvement in validation
to wait before applying early stop (default 10)</p></li>
<li><p><strong>epochs</strong> maximum number of training epochs (default 200)</p></li>
<li><p><strong>batch_size</strong> batch size for training (default 64)</p></li>
<li><p><strong>batch_size_test</strong> batch size for test (default 512)</p></li>
<li><p><strong>padding_length</strong> maximum number of tokens to consider in a document (default 300)</p></li>
<li><p><strong>device</strong> specify cpu (default) or cuda for enabling gpu</p></li>
<li><p><strong>checkpointpath</strong> where to store the parameters of the best model found so far
according to the evaluation in the held-out validation split (default ../checkpoint/classifier_net.dat)</p></li>
</ul>
</dd>
</dl>
<dl class="py property"> <dl class="py property">
<dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.device"> <dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.device">
<em class="property"><span class="pre">property</span> </em><span class="sig-name descname"><span class="pre">device</span></span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.device" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">property</span> </em><span class="sig-name descname"><span class="pre">device</span></span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.device" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Gets the device in which the network is allocated</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>device</p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.fit"> <dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.fit">
<span class="sig-name descname"><span class="pre">fit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">labels</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">val_split</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.3</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.fit" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">fit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">labels</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">val_split</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.3</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.fit" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Fits the model according to the given training data.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>instances</strong> list of lists of indexed tokens</p></li>
<li><p><strong>labels</strong> array-like of shape <cite>(n_samples, n_classes)</cite> with the class labels</p></li>
<li><p><strong>val_split</strong> proportion of training documents to be taken as the validation set (default 0.3)</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p></p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.get_params"> <dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.get_params">
<span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.get_params" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.get_params" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Get hyper-parameters for this estimator</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>a dictionary with parameter names mapped to their values</p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.predict"> <dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.predict">
<span class="sig-name descname"><span class="pre">predict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.predict" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">predict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.predict" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Predicts labels for the instances</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>instances</strong> list of lists of indexed tokens</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>a <cite>numpy</cite> array of length <cite>n</cite> containing the label predictions, where <cite>n</cite> is the number of
instances in <cite>X</cite></p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.predict_proba"> <dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.predict_proba">
<span class="sig-name descname"><span class="pre">predict_proba</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.predict_proba" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">predict_proba</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.predict_proba" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Predicts posterior probabilities for the instances</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>X</strong> array-like of shape <cite>(n_samples, n_features)</cite> instances to classify</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>array-like of shape <cite>(n_samples, n_classes)</cite> with the posterior probabilities</p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.reset_net_params"> <dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.reset_net_params">
<span class="sig-name descname"><span class="pre">reset_net_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_classes</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.reset_net_params" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">reset_net_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">vocab_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_classes</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.reset_net_params" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Reinitialize the network parameters</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>vocab_size</strong> the size of the vocabulary</p></li>
<li><p><strong>n_classes</strong> the number of target classes</p></li>
</ul>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.set_params"> <dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.set_params">
<span class="sig-name descname"><span class="pre">set_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">params</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.set_params" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">set_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">params</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.set_params" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Set the parameters of this trainer and the learner it is training.
In this current version, parameter names for the trainer and learner should
be disjoint.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>params</strong> a <cite>**kwargs</cite> dictionary with the parameters</p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.transform"> <dt class="sig sig-object py" id="quapy.classification.neural.NeuralClassifierTrainer.transform">
<span class="sig-name descname"><span class="pre">transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.transform" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.NeuralClassifierTrainer.transform" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Returns the embeddings of the instances</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>instances</strong> list of lists of indexed tokens</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>array-like of shape <cite>(n_samples, embed_size)</cite> with the embedded instances,
where <cite>embed_size</cite> is defined by the classification network</p>
</dd>
</dl>
</dd></dl>
</dd></dl> </dd></dl>
@ -230,49 +432,95 @@ possible to update each component of a nested object.</p>
<dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet"> <dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet">
<em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">TextClassifierNet</span></span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">TextClassifierNet</span></span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">torch.nn.modules.module.Module</span></code></p> <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">torch.nn.modules.module.Module</span></code></p>
<p>Abstract Text classifier (<cite>torch.nn.Module</cite>)</p>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.dimensions"> <dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.dimensions">
<span class="sig-name descname"><span class="pre">dimensions</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.dimensions" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">dimensions</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.dimensions" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Gets the number of dimensions of the embedding space</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>integer</p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.document_embedding"> <dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.document_embedding">
<em class="property"><span class="pre">abstract</span> </em><span class="sig-name descname"><span class="pre">document_embedding</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.document_embedding" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">abstract</span> </em><span class="sig-name descname"><span class="pre">document_embedding</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.document_embedding" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Embeds documents (i.e., performs the forward pass up to the
next-to-last layer).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>x</strong> a batch of instances, typically generated by a torchs <cite>DataLoader</cite>
instance (see <a class="reference internal" href="#quapy.classification.neural.TorchDataset" title="quapy.classification.neural.TorchDataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.classification.neural.TorchDataset</span></code></a>)</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>a torch tensor of shape <cite>(n_samples, n_dimensions)</cite>, where
<cite>n_samples</cite> is the number of documents, and <cite>n_dimensions</cite> is the
dimensionality of the embedding</p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.forward"> <dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.forward">
<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.forward" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.forward" title="Permalink to this definition"></a></dt>
<dd><p>Defines the computation performed at every call.</p> <dd><p>Performs the forward pass.</p>
<p>Should be overridden by all subclasses.</p> <dl class="field-list simple">
<div class="admonition note"> <dt class="field-odd">Parameters</dt>
<p class="admonition-title">Note</p> <dd class="field-odd"><p><strong>x</strong> a batch of instances, typically generated by a torchs <cite>DataLoader</cite>
<p>Although the recipe for forward pass needs to be defined within instance (see <a class="reference internal" href="#quapy.classification.neural.TorchDataset" title="quapy.classification.neural.TorchDataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.classification.neural.TorchDataset</span></code></a>)</p>
this function, one should call the <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code> instance afterwards </dd>
instead of this since the former takes care of running the <dt class="field-even">Returns</dt>
registered hooks while the latter silently ignores them.</p> <dd class="field-even"><p>a tensor of shape <cite>(n_instances, n_classes)</cite> with the decision scores
</div> for each of the instances and classes</p>
</dd>
</dl>
</dd></dl> </dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.get_params"> <dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.get_params">
<em class="property"><span class="pre">abstract</span> </em><span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.get_params" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">abstract</span> </em><span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.get_params" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Get hyper-parameters for this estimator</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>a dictionary with parameter names mapped to their values</p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.predict_proba"> <dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.predict_proba">
<span class="sig-name descname"><span class="pre">predict_proba</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.predict_proba" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">predict_proba</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.predict_proba" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Predicts posterior probabilities for the instances in <cite>x</cite></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>x</strong> a torch tensor of indexed tokens with shape <cite>(n_instances, pad_length)</cite>
where <cite>n_instances</cite> is the number of instances in the batch, and <cite>pad_length</cite>
is length of the pad in the batch</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>array-like of shape <cite>(n_samples, n_classes)</cite> with the posterior probabilities</p>
</dd>
</dl>
</dd></dl>
<dl class="py property"> <dl class="py property">
<dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.vocabulary_size"> <dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.vocabulary_size">
<em class="property"><span class="pre">property</span> </em><span class="sig-name descname"><span class="pre">vocabulary_size</span></span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.vocabulary_size" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">property</span> </em><span class="sig-name descname"><span class="pre">vocabulary_size</span></span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.vocabulary_size" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Return the size of the vocabulary</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>integer</p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.xavier_uniform"> <dt class="sig sig-object py" id="quapy.classification.neural.TextClassifierNet.xavier_uniform">
<span class="sig-name descname"><span class="pre">xavier_uniform</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.xavier_uniform" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">xavier_uniform</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TextClassifierNet.xavier_uniform" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Performs Xavier initialization of the network parameters</p>
</dd></dl>
</dd></dl> </dd></dl>
@ -280,10 +528,36 @@ registered hooks while the latter silently ignores them.</p>
<dt class="sig sig-object py" id="quapy.classification.neural.TorchDataset"> <dt class="sig sig-object py" id="quapy.classification.neural.TorchDataset">
<em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">TorchDataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">labels</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TorchDataset" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.neural.</span></span><span class="sig-name descname"><span class="pre">TorchDataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">labels</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TorchDataset" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">torch.utils.data.dataset.Dataset</span></code></p> <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">torch.utils.data.dataset.Dataset</span></code></p>
<p>Transforms labelled instances into a Torchs <code class="xref py py-class docutils literal notranslate"><span class="pre">torch.utils.data.DataLoader</span></code> object</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>instances</strong> list of lists of indexed tokens</p></li>
<li><p><strong>labels</strong> array-like of shape <cite>(n_samples, n_classes)</cite> with the class labels</p></li>
</ul>
</dd>
</dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.neural.TorchDataset.asDataloader"> <dt class="sig sig-object py" id="quapy.classification.neural.TorchDataset.asDataloader">
<span class="sig-name descname"><span class="pre">asDataloader</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">batch_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pad_length</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TorchDataset.asDataloader" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">asDataloader</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">batch_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pad_length</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.neural.TorchDataset.asDataloader" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Converts the labelled collection into a Torch DataLoader with dynamic padding for
the batch</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>batch_size</strong> batch size</p></li>
<li><p><strong>shuffle</strong> whether or not to shuffle instances</p></li>
<li><p><strong>pad_length</strong> the maximum length for the list of tokens (dynamic padding is
applied, meaning that if the longest document in the batch is shorter than
<cite>pad_length</cite>, then the batch is padded up to its length, and not to <cite>pad_length</cite>.</p></li>
<li><p><strong>device</strong> whether to allocate tensors in cpu or in cuda</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>a <code class="xref py py-class docutils literal notranslate"><span class="pre">torch.utils.data.DataLoader</span></code> object</p>
</dd>
</dl>
</dd></dl>
</dd></dl> </dd></dl>
@ -294,38 +568,79 @@ registered hooks while the latter silently ignores them.</p>
<dt class="sig sig-object py" id="quapy.classification.svmperf.SVMperf"> <dt class="sig sig-object py" id="quapy.classification.svmperf.SVMperf">
<em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.svmperf.</span></span><span class="sig-name descname"><span class="pre">SVMperf</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">svmperf_base</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.01</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loss</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'01'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.svmperf.SVMperf" title="Permalink to this definition"></a></dt> <em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">quapy.classification.svmperf.</span></span><span class="sig-name descname"><span class="pre">SVMperf</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">svmperf_base</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">C</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.01</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loss</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'01'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.svmperf.SVMperf" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sklearn.base.BaseEstimator</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">sklearn.base.ClassifierMixin</span></code></p> <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">sklearn.base.BaseEstimator</span></code>, <code class="xref py py-class docutils literal notranslate"><span class="pre">sklearn.base.ClassifierMixin</span></code></p>
<p>A wrapper for the <a class="reference external" href="https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html">SVM-perf package</a> by Thorsten Joachims.
When using losses for quantification, the source code has to be patched. See
the <a class="reference external" href="https://hlt-isti.github.io/QuaPy/build/html/Installation.html#svm-perf-with-quantification-oriented-losses">installation documentation</a>
for further details.</p>
<p>References:</p>
<blockquote>
<div><ul class="simple">
<li><p><a class="reference external" href="https://dl.acm.org/doi/abs/10.1145/2700406?casa_token=8D2fHsGCVn0AAAAA:ZfThYOvrzWxMGfZYlQW_y8Cagg-o_l6X_PcF09mdETQ4Tu7jK98mxFbGSXp9ZSO14JkUIYuDGFG0">Esuli et al.2015</a></p></li>
<li><p><a class="reference external" href="https://www.sciencedirect.com/science/article/abs/pii/S003132031400291X">Barranquero et al.2015</a></p></li>
</ul>
</div></blockquote>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>svmperf_base</strong> path to directory containing the binary files <cite>svm_perf_learn</cite> and <cite>svm_perf_classify</cite></p></li>
<li><p><strong>C</strong> trade-off between training error and margin (default 0.01)</p></li>
<li><p><strong>verbose</strong> set to True to print svm-perf std outputs</p></li>
<li><p><strong>loss</strong> the loss to optimize for. Available losses are “01”, “f1”, “kld”, “nkld”, “q”, “qacc”, “qf1”, “qgm”, “mae”, “mrae”.</p></li>
</ul>
</dd>
</dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.svmperf.SVMperf.decision_function"> <dt class="sig sig-object py" id="quapy.classification.svmperf.SVMperf.decision_function">
<span class="sig-name descname"><span class="pre">decision_function</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.svmperf.SVMperf.decision_function" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">decision_function</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.svmperf.SVMperf.decision_function" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Evaluate the decision function for the samples in <cite>X</cite>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>X</strong> array-like of shape <cite>(n_samples, n_features)</cite> containing the instances to classify</p></li>
<li><p><strong>y</strong> unused</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>array-like of shape <cite>(n_samples,)</cite> containing the decision scores of the instances</p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.svmperf.SVMperf.fit"> <dt class="sig sig-object py" id="quapy.classification.svmperf.SVMperf.fit">
<span class="sig-name descname"><span class="pre">fit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.svmperf.SVMperf.fit" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">fit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.svmperf.SVMperf.fit" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Trains the SVM for the multivariate performance loss</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>X</strong> training instances</p></li>
<li><p><strong>y</strong> a binary vector of labels</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><cite>self</cite></p>
</dd>
</dl>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.svmperf.SVMperf.predict"> <dt class="sig sig-object py" id="quapy.classification.svmperf.SVMperf.predict">
<span class="sig-name descname"><span class="pre">predict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.svmperf.SVMperf.predict" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">predict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.svmperf.SVMperf.predict" title="Permalink to this definition"></a></dt>
<dd></dd></dl> <dd><p>Predicts labels for the instances <cite>X</cite>
:param X: array-like of shape <cite>(n_samples, n_features)</cite> instances to classify
:return: a <cite>numpy</cite> array of length <cite>n</cite> containing the label predictions, where <cite>n</cite> is the number of</p>
<blockquote>
<div><p>instances in <cite>X</cite></p>
</div></blockquote>
</dd></dl>
<dl class="py method"> <dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.svmperf.SVMperf.set_params"> <dt class="sig sig-object py" id="quapy.classification.svmperf.SVMperf.set_params">
<span class="sig-name descname"><span class="pre">set_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">parameters</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.svmperf.SVMperf.set_params" title="Permalink to this definition"></a></dt> <span class="sig-name descname"><span class="pre">set_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">parameters</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.svmperf.SVMperf.set_params" title="Permalink to this definition"></a></dt>
<dd><p>Set the parameters of this estimator.</p> <dd><p>Set the hyper-parameters for svm-perf. Currently, only the <cite>C</cite> parameter is supported</p>
<p>The method works on simple estimators as well as on nested objects
(such as <code class="xref py py-class docutils literal notranslate"><span class="pre">Pipeline</span></code>). The latter have
parameters of the form <code class="docutils literal notranslate"><span class="pre">&lt;component&gt;__&lt;parameter&gt;</span></code> so that its
possible to update each component of a nested object.</p>
<dl class="field-list simple"> <dl class="field-list simple">
<dt class="field-odd">Parameters</dt> <dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>**params</strong> (<em>dict</em>) Estimator parameters.</p> <dd class="field-odd"><p><strong>parameters</strong> a <cite>**kwargs</cite> dictionary <cite>{C: &lt;float&gt;}</cite></p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><strong>self</strong> Estimator instance.</p>
</dd>
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p>estimator instance</p>
</dd> </dd>
</dl> </dl>
</dd></dl> </dd></dl>

File diff suppressed because one or more lines are too long

View File

@ -3,10 +3,18 @@ from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
class PCALR(BaseEstimator): class LowRankLogisticRegression(BaseEstimator):
""" """
An example of a classification method that also generates embedded inputs, as those required for QuaNet. An example of a classification method (i.e., an object that implements `fit`, `predict`, and `predict_proba`)
This example simply combines a Principal Component Analysis (PCA) with Logistic Regression (LR). that also generates embedded inputs (i.e., that implements `transform`), as those required for
:class:`quapy.method.neural.QuaNet`. This is a mock method to allow for easily instantiating
:class:`quapy.method.neural.QuaNet` on array-like real-valued instances.
The transformation consists of applying :class:`sklearn.decomposition.TruncatedSVD`
while classification is performed using :class:`sklearn.linear_model.LogisticRegression` on the low-rank space.
:param n_components: the number of principal components to retain
:param kwargs: parameters for the
`Logistic Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html>`__ classifier
""" """
def __init__(self, n_components=100, **kwargs): def __init__(self, n_components=100, **kwargs):
@ -14,35 +22,76 @@ class PCALR(BaseEstimator):
self.learner = LogisticRegression(**kwargs) self.learner = LogisticRegression(**kwargs)
def get_params(self): def get_params(self):
"""
Get hyper-parameters for this estimator.
:return: a dictionary with parameter names mapped to their values
"""
params = {'n_components': self.n_components} params = {'n_components': self.n_components}
params.update(self.learner.get_params()) params.update(self.learner.get_params())
return params return params
def set_params(self, **params): def set_params(self, **params):
if 'n_components' in params: """
self.n_components = params['n_components'] Set the parameters of this estimator.
del params['n_components']
self.learner.set_params(**params) :param parameters: a `**kwargs` dictionary with the estimator parameters for
`Logistic Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html>`__
and eventually also `n_components` for `TruncatedSVD`
"""
params_ = dict(params)
if 'n_components' in params_:
self.n_components = params_['n_components']
del params_['n_components']
self.learner.set_params(**params_)
def fit(self, X, y): def fit(self, X, y):
self.learner.fit(X, y) """
Fit the model according to the given training data. The fit consists of
fitting `TruncatedSVD` and then `LogisticRegression` on the low-rank representation.
:param X: array-like of shape `(n_samples, n_features)` with the instances
:param y: array-like of shape `(n_samples, n_classes)` with the class labels
:return: `self`
"""
nF = X.shape[1] nF = X.shape[1]
self.pca = None self.pca = None
if nF > self.n_components: if nF > self.n_components:
self.pca = TruncatedSVD(self.n_components).fit(X, y) self.pca = TruncatedSVD(self.n_components).fit(X)
X = self.transform(X)
self.learner.fit(X, y)
self.classes_ = self.learner.classes_ self.classes_ = self.learner.classes_
return self return self
def predict(self, X): def predict(self, X):
# X = self.transform(X) """
Predicts labels for the instances `X` embedded into the low-rank space.
:param X: array-like of shape `(n_samples, n_features)` instances to classify
:return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of
instances in `X`
"""
X = self.transform(X)
return self.learner.predict(X) return self.learner.predict(X)
def predict_proba(self, X): def predict_proba(self, X):
# X = self.transform(X) """
Predicts posterior probabilities for the instances `X` embedded into the low-rank space.
:param X: array-like of shape `(n_samples, n_features)` instances to classify
:return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities
"""
X = self.transform(X)
return self.learner.predict_proba(X) return self.learner.predict_proba(X)
def transform(self, X): def transform(self, X):
"""
Returns the low-rank approximation of `X` with `n_components` dimensions, or `X` unaltered if
`n_components` >= `X.shape[1]`.
:param X: array-like of shape `(n_samples, n_features)` instances to embed
:return: array-like of shape `(n_samples, n_components)` with the embedded instances
"""
if self.pca is None: if self.pca is None:
return X return X
return self.pca.transform(X) return self.pca.transform(X)

View File

@ -16,6 +16,22 @@ from quapy.util import EarlyStop
class NeuralClassifierTrainer: class NeuralClassifierTrainer:
"""
Trains a neural network for text classification.
:param net: an instance of `TextClassifierNet` implementing the forward pass
:param lr: learning rate (default 1e-3)
:param weight_decay: weight decay (default 0)
:param patience: number of epochs that do not show any improvement in validation
to wait before applying early stop (default 10)
:param epochs: maximum number of training epochs (default 200)
:param batch_size: batch size for training (default 64)
:param batch_size_test: batch size for test (default 512)
:param padding_length: maximum number of tokens to consider in a document (default 300)
:param device: specify 'cpu' (default) or 'cuda' for enabling gpu
:param checkpointpath: where to store the parameters of the best model found so far
according to the evaluation in the held-out validation split (default '../checkpoint/classifier_net.dat')
"""
def __init__(self, def __init__(self,
net: 'TextClassifierNet', net: 'TextClassifierNet',
@ -45,23 +61,36 @@ class NeuralClassifierTrainer:
'device': torch.device(device) 'device': torch.device(device)
} }
self.learner_hyperparams = self.net.get_params() self.learner_hyperparams = self.net.get_params()
self.checkpointpath = checkpointpath self.checkpointpath = checkpointpath
self.classes_ = np.asarray([0, 1]) self.classes_ = np.asarray([0, 1])
print(f'[NeuralNetwork running on {device}]') print(f'[NeuralNetwork running on {device}]')
os.makedirs(Path(checkpointpath).parent, exist_ok=True) os.makedirs(Path(checkpointpath).parent, exist_ok=True)
def reset_net_params(self, vocab_size, n_classes): def reset_net_params(self, vocab_size, n_classes):
"""Reinitialize the network parameters
:param vocab_size: the size of the vocabulary
:param n_classes: the number of target classes
"""
self.net = self.net.__class__(vocab_size, n_classes, **self.learner_hyperparams) self.net = self.net.__class__(vocab_size, n_classes, **self.learner_hyperparams)
self.net = self.net.to(self.trainer_hyperparams['device']) self.net = self.net.to(self.trainer_hyperparams['device'])
self.net.xavier_uniform() self.net.xavier_uniform()
def get_params(self): def get_params(self):
"""Get hyper-parameters for this estimator
:return: a dictionary with parameter names mapped to their values
"""
return {**self.net.get_params(), **self.trainer_hyperparams} return {**self.net.get_params(), **self.trainer_hyperparams}
def set_params(self, **params): def set_params(self, **params):
"""Set the parameters of this trainer and the learner it is training.
In this current version, parameter names for the trainer and learner should
be disjoint.
:param params: a `**kwargs` dictionary with the parameters
"""
trainer_hyperparams = self.trainer_hyperparams trainer_hyperparams = self.trainer_hyperparams
learner_hyperparams = self.net.get_params() learner_hyperparams = self.net.get_params()
for key, val in params.items(): for key, val in params.items():
@ -81,6 +110,10 @@ class NeuralClassifierTrainer:
@property @property
def device(self): def device(self):
""" Gets the device in which the network is allocated
:return: device
"""
return next(self.net.parameters()).device return next(self.net.parameters()).device
def _train_epoch(self, data, status, pbar, epoch): def _train_epoch(self, data, status, pbar, epoch):
@ -132,6 +165,14 @@ class NeuralClassifierTrainer:
f'macroF1={100 * self.status["va"]["f1"]:.2f}%') f'macroF1={100 * self.status["va"]["f1"]:.2f}%')
def fit(self, instances, labels, val_split=0.3): def fit(self, instances, labels, val_split=0.3):
"""
Fits the model according to the given training data.
:param instances: list of lists of indexed tokens
:param labels: array-like of shape `(n_samples, n_classes)` with the class labels
:param val_split: proportion of training documents to be taken as the validation set (default 0.3)
:return:
"""
train, val = LabelledCollection(instances, labels).split_stratified(1-val_split) train, val = LabelledCollection(instances, labels).split_stratified(1-val_split)
opt = self.trainer_hyperparams opt = self.trainer_hyperparams
checkpoint = self.checkpointpath checkpoint = self.checkpointpath
@ -169,9 +210,22 @@ class NeuralClassifierTrainer:
return self return self
def predict(self, instances): def predict(self, instances):
"""
Predicts labels for the instances
:param instances: list of lists of indexed tokens
:return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of
instances in `X`
"""
return np.argmax(self.predict_proba(instances), axis=-1) return np.argmax(self.predict_proba(instances), axis=-1)
def predict_proba(self, instances): def predict_proba(self, instances):
"""
Predicts posterior probabilities for the instances
:param X: array-like of shape `(n_samples, n_features)` instances to classify
:return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities
"""
self.net.eval() self.net.eval()
opt = self.trainer_hyperparams opt = self.trainer_hyperparams
with torch.no_grad(): with torch.no_grad():
@ -182,6 +236,13 @@ class NeuralClassifierTrainer:
return np.concatenate(positive_probs) return np.concatenate(positive_probs)
def transform(self, instances): def transform(self, instances):
"""
Returns the embeddings of the instances
:param instances: list of lists of indexed tokens
:return: array-like of shape `(n_samples, embed_size)` with the embedded instances,
where `embed_size` is defined by the classification network
"""
self.net.eval() self.net.eval()
embeddings = [] embeddings = []
opt = self.trainer_hyperparams opt = self.trainer_hyperparams
@ -193,6 +254,12 @@ class NeuralClassifierTrainer:
class TorchDataset(torch.utils.data.Dataset): class TorchDataset(torch.utils.data.Dataset):
"""
Transforms labelled instances into a Torch's :class:`torch.utils.data.DataLoader` object
:param instances: list of lists of indexed tokens
:param labels: array-like of shape `(n_samples, n_classes)` with the class labels
"""
def __init__(self, instances, labels=None): def __init__(self, instances, labels=None):
self.instances = instances self.instances = instances
@ -205,6 +272,18 @@ class TorchDataset(torch.utils.data.Dataset):
return {'doc': self.instances[index], 'label': self.labels[index] if self.labels is not None else None} return {'doc': self.instances[index], 'label': self.labels[index] if self.labels is not None else None}
def asDataloader(self, batch_size, shuffle, pad_length, device): def asDataloader(self, batch_size, shuffle, pad_length, device):
"""
Converts the labelled collection into a Torch DataLoader with dynamic padding for
the batch
:param batch_size: batch size
:param shuffle: whether or not to shuffle instances
:param pad_length: the maximum length for the list of tokens (dynamic padding is
applied, meaning that if the longest document in the batch is shorter than
`pad_length`, then the batch is padded up to its length, and not to `pad_length`.
:param device: whether to allocate tensors in cpu or in cuda
:return: a :class:`torch.utils.data.DataLoader` object
"""
def collate(batch): def collate(batch):
data = [torch.LongTensor(item['doc'][:pad_length]) for item in batch] data = [torch.LongTensor(item['doc'][:pad_length]) for item in batch]
data = pad_sequence(data, batch_first=True, padding_value=qp.environ['PAD_INDEX']).to(device) data = pad_sequence(data, batch_first=True, padding_value=qp.environ['PAD_INDEX']).to(device)
@ -220,37 +299,97 @@ class TorchDataset(torch.utils.data.Dataset):
class TextClassifierNet(torch.nn.Module, metaclass=ABCMeta): class TextClassifierNet(torch.nn.Module, metaclass=ABCMeta):
"""
Abstract Text classifier (`torch.nn.Module`)
"""
@abstractmethod @abstractmethod
def document_embedding(self, x): ... def document_embedding(self, x):
"""Embeds documents (i.e., performs the forward pass up to the
next-to-last layer).
:param x: a batch of instances, typically generated by a torch's `DataLoader`
instance (see :class:`quapy.classification.neural.TorchDataset`)
:return: a torch tensor of shape `(n_samples, n_dimensions)`, where
`n_samples` is the number of documents, and `n_dimensions` is the
dimensionality of the embedding
"""
...
def forward(self, x): def forward(self, x):
"""Performs the forward pass.
:param x: a batch of instances, typically generated by a torch's `DataLoader`
instance (see :class:`quapy.classification.neural.TorchDataset`)
:return: a tensor of shape `(n_instances, n_classes)` with the decision scores
for each of the instances and classes
"""
doc_embedded = self.document_embedding(x) doc_embedded = self.document_embedding(x)
return self.output(doc_embedded) return self.output(doc_embedded)
def dimensions(self): def dimensions(self):
"""Gets the number of dimensions of the embedding space
:return: integer
"""
return self.dim return self.dim
def predict_proba(self, x): def predict_proba(self, x):
"""
Predicts posterior probabilities for the instances in `x`
:param x: a torch tensor of indexed tokens with shape `(n_instances, pad_length)`
where `n_instances` is the number of instances in the batch, and `pad_length`
is length of the pad in the batch
:return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities
"""
logits = self(x) logits = self(x)
return torch.softmax(logits, dim=1).detach().cpu().numpy() return torch.softmax(logits, dim=1).detach().cpu().numpy()
def xavier_uniform(self): def xavier_uniform(self):
"""
Performs Xavier initialization of the network parameters
"""
for p in self.parameters(): for p in self.parameters():
if p.dim() > 1 and p.requires_grad: if p.dim() > 1 and p.requires_grad:
torch.nn.init.xavier_uniform_(p) torch.nn.init.xavier_uniform_(p)
@abstractmethod @abstractmethod
def get_params(self): ... def get_params(self):
"""
Get hyper-parameters for this estimator
:return: a dictionary with parameter names mapped to their values
"""
...
@property @property
def vocabulary_size(self): ... def vocabulary_size(self):
"""
Return the size of the vocabulary
:return: integer
"""
...
class LSTMnet(TextClassifierNet): class LSTMnet(TextClassifierNet):
"""
An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on
Long Short Term Memory networks.
:param vocabulary_size: the size of the vocabulary
:param n_classes: number of target classes
:param embedding_size: the dimensionality of the word embeddings space (default 100)
:param hidden_size: the dimensionality of the hidden space (default 256)
:param repr_size: the dimensionality of the document embeddings space (default 100)
:param lstm_class_nlayers: number of LSTM layers (default 1)
:param drop_p: drop probability for dropout (default 0.5)
"""
def __init__(self, vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, lstm_class_nlayers=1, def __init__(self, vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, lstm_class_nlayers=1,
drop_p=0.5): drop_p=0.5):
super().__init__() super().__init__()
self.vocabulary_size_ = vocabulary_size self.vocabulary_size_ = vocabulary_size
self.n_classes = n_classes self.n_classes = n_classes
@ -270,7 +409,7 @@ class LSTMnet(TextClassifierNet):
self.doc_embedder = torch.nn.Linear(hidden_size, self.dim) self.doc_embedder = torch.nn.Linear(hidden_size, self.dim)
self.output = torch.nn.Linear(self.dim, n_classes) self.output = torch.nn.Linear(self.dim, n_classes)
def init_hidden(self, set_size): def __init_hidden(self, set_size):
opt = self.hyperparams opt = self.hyperparams
var_hidden = torch.zeros(opt['lstm_class_nlayers'], set_size, opt['hidden_size']) var_hidden = torch.zeros(opt['lstm_class_nlayers'], set_size, opt['hidden_size'])
var_cell = torch.zeros(opt['lstm_class_nlayers'], set_size, opt['hidden_size']) var_cell = torch.zeros(opt['lstm_class_nlayers'], set_size, opt['hidden_size'])
@ -279,21 +418,55 @@ class LSTMnet(TextClassifierNet):
return var_hidden, var_cell return var_hidden, var_cell
def document_embedding(self, x): def document_embedding(self, x):
"""Embeds documents (i.e., performs the forward pass up to the
next-to-last layer).
:param x: a batch of instances, typically generated by a torch's `DataLoader`
instance (see :class:`quapy.classification.neural.TorchDataset`)
:return: a torch tensor of shape `(n_samples, n_dimensions)`, where
`n_samples` is the number of documents, and `n_dimensions` is the
dimensionality of the embedding
"""
embedded = self.word_embedding(x) embedded = self.word_embedding(x)
rnn_output, rnn_hidden = self.lstm(embedded, self.init_hidden(x.size()[0])) rnn_output, rnn_hidden = self.lstm(embedded, self.__init_hidden(x.size()[0]))
abstracted = self.dropout(F.relu(rnn_hidden[0][-1])) abstracted = self.dropout(F.relu(rnn_hidden[0][-1]))
abstracted = self.doc_embedder(abstracted) abstracted = self.doc_embedder(abstracted)
return abstracted return abstracted
def get_params(self): def get_params(self):
"""
Get hyper-parameters for this estimator
:return: a dictionary with parameter names mapped to their values
"""
return self.hyperparams return self.hyperparams
@property @property
def vocabulary_size(self): def vocabulary_size(self):
"""
Return the size of the vocabulary
:return: integer
"""
return self.vocabulary_size_ return self.vocabulary_size_
class CNNnet(TextClassifierNet): class CNNnet(TextClassifierNet):
"""
An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on
Convolutional Neural Networks.
:param vocabulary_size: the size of the vocabulary
:param n_classes: number of target classes
:param embedding_size: the dimensionality of the word embeddings space (default 100)
:param hidden_size: the dimensionality of the hidden space (default 256)
:param repr_size: the dimensionality of the document embeddings space (default 100)
:param kernel_heights: list of kernel lengths (default [3,5,7]), i.e., the number of
consecutive tokens that each kernel covers
:param stride: convolutional stride (default 1)
:param stride: convolutional pad (default 0)
:param drop_p: drop probability for dropout (default 0.5)
"""
def __init__(self, vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, def __init__(self, vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100,
kernel_heights=[3, 5, 7], stride=1, padding=0, drop_p=0.5): kernel_heights=[3, 5, 7], stride=1, padding=0, drop_p=0.5):
@ -320,19 +493,28 @@ class CNNnet(TextClassifierNet):
self.doc_embedder = torch.nn.Linear(len(kernel_heights) * hidden_size, self.dim) self.doc_embedder = torch.nn.Linear(len(kernel_heights) * hidden_size, self.dim)
self.output = nn.Linear(self.dim, n_classes) self.output = nn.Linear(self.dim, n_classes)
def conv_block(self, input, conv_layer): def __conv_block(self, input, conv_layer):
conv_out = conv_layer(input) # conv_out.size() = (batch_size, out_channels, dim, 1) conv_out = conv_layer(input) # conv_out.size() = (batch_size, out_channels, dim, 1)
activation = F.relu(conv_out.squeeze(3)) # activation.size() = (batch_size, out_channels, dim1) activation = F.relu(conv_out.squeeze(3)) # activation.size() = (batch_size, out_channels, dim1)
max_out = F.max_pool1d(activation, activation.size()[2]).squeeze(2) # maxpool_out.size() = (batch_size, out_channels) max_out = F.max_pool1d(activation, activation.size()[2]).squeeze(2) # maxpool_out.size() = (batch_size, out_channels)
return max_out return max_out
def document_embedding(self, input): def document_embedding(self, input):
"""Embeds documents (i.e., performs the forward pass up to the
next-to-last layer).
:param input: a batch of instances, typically generated by a torch's `DataLoader`
instance (see :class:`quapy.classification.neural.TorchDataset`)
:return: a torch tensor of shape `(n_samples, n_dimensions)`, where
`n_samples` is the number of documents, and `n_dimensions` is the
dimensionality of the embedding
"""
input = self.word_embedding(input) input = self.word_embedding(input)
input = input.unsqueeze(1) # input.size() = (batch_size, 1, num_seq, embedding_length) input = input.unsqueeze(1) # input.size() = (batch_size, 1, num_seq, embedding_length)
max_out1 = self.conv_block(input, self.conv1) max_out1 = self.__conv_block(input, self.conv1)
max_out2 = self.conv_block(input, self.conv2) max_out2 = self.__conv_block(input, self.conv2)
max_out3 = self.conv_block(input, self.conv3) max_out3 = self.__conv_block(input, self.conv3)
all_out = torch.cat((max_out1, max_out2, max_out3), 1) # all_out.size() = (batch_size, num_kernels*out_channels) all_out = torch.cat((max_out1, max_out2, max_out3), 1) # all_out.size() = (batch_size, num_kernels*out_channels)
abstracted = self.dropout(F.relu(all_out)) # (batch_size, num_kernels*out_channels) abstracted = self.dropout(F.relu(all_out)) # (batch_size, num_kernels*out_channels)
@ -340,10 +522,20 @@ class CNNnet(TextClassifierNet):
return abstracted return abstracted
def get_params(self): def get_params(self):
"""
Get hyper-parameters for this estimator
:return: a dictionary with parameter names mapped to their values
"""
return self.hyperparams return self.hyperparams
@property @property
def vocabulary_size(self): def vocabulary_size(self):
"""
Return the size of the vocabulary
:return: integer
"""
return self.vocabulary_size_ return self.vocabulary_size_

View File

@ -1,17 +1,29 @@
import random import random
import subprocess import subprocess
import tempfile
from os import remove, makedirs from os import remove, makedirs
from os.path import join, exists from os.path import join, exists
from subprocess import PIPE, STDOUT from subprocess import PIPE, STDOUT
import shutil
import numpy as np import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.datasets import dump_svmlight_file from sklearn.datasets import dump_svmlight_file
class SVMperf(BaseEstimator, ClassifierMixin): class SVMperf(BaseEstimator, ClassifierMixin):
"""A wrapper for the `SVM-perf package <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`__ by Thorsten Joachims.
When using losses for quantification, the source code has to be patched. See
the `installation documentation <https://hlt-isti.github.io/QuaPy/build/html/Installation.html#svm-perf-with-quantification-oriented-losses>`__
for further details.
References:
* `Esuli et al.2015 <https://dl.acm.org/doi/abs/10.1145/2700406?casa_token=8D2fHsGCVn0AAAAA:ZfThYOvrzWxMGfZYlQW_y8Cagg-o_l6X_PcF09mdETQ4Tu7jK98mxFbGSXp9ZSO14JkUIYuDGFG0>`__
* `Barranquero et al.2015 <https://www.sciencedirect.com/science/article/abs/pii/S003132031400291X>`__
:param svmperf_base: path to directory containing the binary files `svm_perf_learn` and `svm_perf_classify`
:param C: trade-off between training error and margin (default 0.01)
:param verbose: set to True to print svm-perf std outputs
:param loss: the loss to optimize for. Available losses are "01", "f1", "kld", "nkld", "q", "qacc", "qf1", "qgm", "mae", "mrae".
"""
# losses with their respective codes in svm_perf implementation # losses with their respective codes in svm_perf implementation
valid_losses = {'01':0, 'f1':1, 'kld':12, 'nkld':13, 'q':22, 'qacc':23, 'qf1':24, 'qgm':25, 'mae':26, 'mrae':27} valid_losses = {'01':0, 'f1':1, 'kld':12, 'nkld':13, 'q':22, 'qacc':23, 'qf1':24, 'qgm':25, 'mae':26, 'mrae':27}
@ -24,10 +36,22 @@ class SVMperf(BaseEstimator, ClassifierMixin):
self.loss = loss self.loss = loss
def set_params(self, **parameters): def set_params(self, **parameters):
"""
Set the hyper-parameters for svm-perf. Currently, only the `C` parameter is supported
:param parameters: a `**kwargs` dictionary `{'C': <float>}`
"""
assert list(parameters.keys()) == ['C'], 'currently, only the C parameter is supported' assert list(parameters.keys()) == ['C'], 'currently, only the C parameter is supported'
self.C = parameters['C'] self.C = parameters['C']
def fit(self, X, y): def fit(self, X, y):
"""
Trains the SVM for the multivariate performance loss
:param X: training instances
:param y: a binary vector of labels
:return: `self`
"""
assert self.loss in SVMperf.valid_losses, \ assert self.loss in SVMperf.valid_losses, \
f'unsupported loss {self.loss}, valid ones are {list(SVMperf.valid_losses.keys())}' f'unsupported loss {self.loss}, valid ones are {list(SVMperf.valid_losses.keys())}'
@ -68,11 +92,24 @@ class SVMperf(BaseEstimator, ClassifierMixin):
return self return self
def predict(self, X): def predict(self, X):
"""
Predicts labels for the instances `X`
:param X: array-like of shape `(n_samples, n_features)` instances to classify
:return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of
instances in `X`
"""
confidence_scores = self.decision_function(X) confidence_scores = self.decision_function(X)
predictions = (confidence_scores > 0) * 1 predictions = (confidence_scores > 0) * 1
return predictions return predictions
def decision_function(self, X, y=None): def decision_function(self, X, y=None):
"""
Evaluate the decision function for the samples in `X`.
:param X: array-like of shape `(n_samples, n_features)` containing the instances to classify
:param y: unused
:return: array-like of shape `(n_samples,)` containing the decision scores of the instances
"""
assert hasattr(self, 'tmpdir'), 'predict called before fit' assert hasattr(self, 'tmpdir'), 'predict called before fit'
assert self.tmpdir is not None, 'model directory corrupted' assert self.tmpdir is not None, 'model directory corrupted'
assert exists(self.model), 'model not found' assert exists(self.model), 'model not found'

View File

@ -91,8 +91,8 @@ class GridSearchQ(BaseQuantifier):
if self.protocol=='npp' and (self.eval_budget is None or self.eval_budget <= 0): if self.protocol=='npp' and (self.eval_budget is None or self.eval_budget <= 0):
raise ValueError(f'when protocol="npp" the parameter eval_budget should be ' raise ValueError(f'when protocol="npp" the parameter eval_budget should be '
f'indicated (and should be >0).') f'indicated (and should be >0).')
if self.n_prevpoints != 1: if self.n_repetitions != 1:
print('[warning] n_prevpoints has been set and will be ignored for the selected protocol') print('[warning] n_repetitions has been set and will be ignored for the selected protocol')
def _sout(self, msg): def _sout(self, msg):
if self.verbose: if self.verbose:
@ -165,7 +165,6 @@ class GridSearchQ(BaseQuantifier):
params_values = list(self.param_grid.values()) params_values = list(self.param_grid.values())
model = self.model model = self.model
n_jobs = self.n_jobs
if self.timeout > 0: if self.timeout > 0:
def handler(signum, frame): def handler(signum, frame):
@ -174,7 +173,6 @@ class GridSearchQ(BaseQuantifier):
signal.signal(signal.SIGALRM, handler) signal.signal(signal.SIGALRM, handler)
self._sout(f'starting optimization with n_jobs={n_jobs}')
self.param_scores_ = {} self.param_scores_ = {}
self.best_score_ = None self.best_score_ = None
some_timeouts = False some_timeouts = False

View File

@ -83,6 +83,7 @@ def download_file_if_not_exists(url, archive_path):
def create_if_not_exist(path): def create_if_not_exist(path):
os.makedirs(path, exist_ok=True) os.makedirs(path, exist_ok=True)
return path
def get_quapy_home(): def get_quapy_home():