diff --git a/docs/build/html/_modules/index.html b/docs/build/html/_modules/index.html new file mode 100644 index 0000000..f4894ba --- /dev/null +++ b/docs/build/html/_modules/index.html @@ -0,0 +1,123 @@ + + + + + + Overview: module code — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ + +
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/classification/calibration.html b/docs/build/html/_modules/quapy/classification/calibration.html new file mode 100644 index 0000000..d2d88ca --- /dev/null +++ b/docs/build/html/_modules/quapy/classification/calibration.html @@ -0,0 +1,351 @@ + + + + + + quapy.classification.calibration — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for quapy.classification.calibration

+from copy import deepcopy
+
+from abstention.calibration import NoBiasVectorScaling, TempScaling, VectorScaling
+from sklearn.base import BaseEstimator, clone
+from sklearn.model_selection import cross_val_predict, train_test_split
+import numpy as np
+
+
+# Wrappers of calibration defined by Alexandari et al. in paper <http://proceedings.mlr.press/v119/alexandari20a.html>
+# requires "pip install abstension"
+# see https://github.com/kundajelab/abstention
+
+
+
+[docs] +class RecalibratedProbabilisticClassifier: + """ + Abstract class for (re)calibration method from `abstention.calibration`, as defined in + `Alexandari, A., Kundaje, A., & Shrikumar, A. (2020, November). Maximum likelihood with bias-corrected calibration + is hard-to-beat at label shift adaptation. In International Conference on Machine Learning (pp. 222-232). PMLR. + <http://proceedings.mlr.press/v119/alexandari20a.html>`_: + """ + pass
+ + + +
+[docs] +class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabilisticClassifier): + """ + Applies a (re)calibration method from `abstention.calibration`, as defined in + `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_. + + + :param classifier: a scikit-learn probabilistic classifier + :param calibrator: the calibration object (an instance of abstention.calibration.CalibratorFactory) + :param val_split: indicate an integer k for performing kFCV to obtain the posterior probabilities, or a float p + in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the + training instances (the rest is used for training). In any case, the classifier is retrained in the whole + training set afterwards. Default value is 5. + :param n_jobs: indicate the number of parallel workers (only when val_split is an integer); default=None + :param verbose: whether or not to display information in the standard output + """ + + def __init__(self, classifier, calibrator, val_split=5, n_jobs=None, verbose=False): + self.classifier = classifier + self.calibrator = calibrator + self.val_split = val_split + self.n_jobs = n_jobs + self.verbose = verbose + +
+[docs] + def fit(self, X, y): + """ + Fits the calibration for the probabilistic classifier. + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :param y: array-like of shape `(n_samples,)` with the class labels + :return: self + """ + k = self.val_split + if isinstance(k, int): + if k < 2: + raise ValueError('wrong value for val_split: the number of folds must be > 2') + return self.fit_cv(X, y) + elif isinstance(k, float): + if not (0 < k < 1): + raise ValueError('wrong value for val_split: the proportion of validation documents must be in (0,1)') + return self.fit_tr_val(X, y)
+ + +
+[docs] + def fit_cv(self, X, y): + """ + Fits the calibration in a cross-validation manner, i.e., it generates posterior probabilities for all + training instances via cross-validation, and then retrains the classifier on all training instances. + The posterior probabilities thus generated are used for calibrating the outputs of the classifier. + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :param y: array-like of shape `(n_samples,)` with the class labels + :return: self + """ + posteriors = cross_val_predict( + self.classifier, X, y, cv=self.val_split, n_jobs=self.n_jobs, verbose=self.verbose, method='predict_proba' + ) + self.classifier.fit(X, y) + nclasses = len(np.unique(y)) + self.calibration_function = self.calibrator(posteriors, np.eye(nclasses)[y], posterior_supplied=True) + return self
+ + +
+[docs] + def fit_tr_val(self, X, y): + """ + Fits the calibration in a train/val-split manner, i.e.t, it partitions the training instances into a + training and a validation set, and then uses the training samples to learn classifier which is then used + to generate posterior probabilities for the held-out validation data. These posteriors are used to calibrate + the classifier. The classifier is not retrained on the whole dataset. + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :param y: array-like of shape `(n_samples,)` with the class labels + :return: self + """ + Xtr, Xva, ytr, yva = train_test_split(X, y, test_size=self.val_split, stratify=y) + self.classifier.fit(Xtr, ytr) + posteriors = self.classifier.predict_proba(Xva) + nclasses = len(np.unique(yva)) + self.calibration_function = self.calibrator(posteriors, np.eye(nclasses)[yva], posterior_supplied=True) + return self
+ + +
+[docs] + def predict(self, X): + """ + Predicts class labels for the data instances in `X` + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :return: array-like of shape `(n_samples,)` with the class label predictions + """ + return self.classifier.predict(X)
+ + +
+[docs] + def predict_proba(self, X): + """ + Generates posterior probabilities for the data instances in `X` + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :return: array-like of shape `(n_samples, n_classes)` with posterior probabilities + """ + posteriors = self.classifier.predict_proba(X) + return self.calibration_function(posteriors)
+ + + @property + def classes_(self): + """ + Returns the classes on which the classifier has been trained on + + :return: array-like of shape `(n_classes)` + """ + return self.classifier.classes_
+ + + +
+[docs] +class NBVSCalibration(RecalibratedProbabilisticClassifierBase): + """ + Applies the No-Bias Vector Scaling (NBVS) calibration method from `abstention.calibration`, as defined in + `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_: + + :param classifier: a scikit-learn probabilistic classifier + :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p + in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the + training instances (the rest is used for training). In any case, the classifier is retrained in the whole + training set afterwards. Default value is 5. + :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) + :param verbose: whether or not to display information in the standard output + """ + + def __init__(self, classifier, val_split=5, n_jobs=None, verbose=False): + self.classifier = classifier + self.calibrator = NoBiasVectorScaling(verbose=verbose) + self.val_split = val_split + self.n_jobs = n_jobs + self.verbose = verbose
+ + + +
+[docs] +class BCTSCalibration(RecalibratedProbabilisticClassifierBase): + """ + Applies the Bias-Corrected Temperature Scaling (BCTS) calibration method from `abstention.calibration`, as defined in + `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_: + + :param classifier: a scikit-learn probabilistic classifier + :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p + in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the + training instances (the rest is used for training). In any case, the classifier is retrained in the whole + training set afterwards. Default value is 5. + :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) + :param verbose: whether or not to display information in the standard output + """ + + def __init__(self, classifier, val_split=5, n_jobs=None, verbose=False): + self.classifier = classifier + self.calibrator = TempScaling(verbose=verbose, bias_positions='all') + self.val_split = val_split + self.n_jobs = n_jobs + self.verbose = verbose
+ + + +
+[docs] +class TSCalibration(RecalibratedProbabilisticClassifierBase): + """ + Applies the Temperature Scaling (TS) calibration method from `abstention.calibration`, as defined in + `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_: + + :param classifier: a scikit-learn probabilistic classifier + :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p + in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the + training instances (the rest is used for training). In any case, the classifier is retrained in the whole + training set afterwards. Default value is 5. + :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) + :param verbose: whether or not to display information in the standard output + """ + + def __init__(self, classifier, val_split=5, n_jobs=None, verbose=False): + self.classifier = classifier + self.calibrator = TempScaling(verbose=verbose) + self.val_split = val_split + self.n_jobs = n_jobs + self.verbose = verbose
+ + + +
+[docs] +class VSCalibration(RecalibratedProbabilisticClassifierBase): + """ + Applies the Vector Scaling (VS) calibration method from `abstention.calibration`, as defined in + `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_: + + :param classifier: a scikit-learn probabilistic classifier + :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p + in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the + training instances (the rest is used for training). In any case, the classifier is retrained in the whole + training set afterwards. Default value is 5. + :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) + :param verbose: whether or not to display information in the standard output + """ + + def __init__(self, classifier, val_split=5, n_jobs=None, verbose=False): + self.classifier = classifier + self.calibrator = VectorScaling(verbose=verbose) + self.val_split = val_split + self.n_jobs = n_jobs + self.verbose = verbose
+ + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/classification/methods.html b/docs/build/html/_modules/quapy/classification/methods.html new file mode 100644 index 0000000..883f802 --- /dev/null +++ b/docs/build/html/_modules/quapy/classification/methods.html @@ -0,0 +1,220 @@ + + + + + + quapy.classification.methods — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.classification.methods

+from sklearn.base import BaseEstimator
+from sklearn.decomposition import TruncatedSVD
+from sklearn.linear_model import LogisticRegression
+
+
+
+[docs] +class LowRankLogisticRegression(BaseEstimator): + """ + An example of a classification method (i.e., an object that implements `fit`, `predict`, and `predict_proba`) + that also generates embedded inputs (i.e., that implements `transform`), as those required for + :class:`quapy.method.neural.QuaNet`. This is a mock method to allow for easily instantiating + :class:`quapy.method.neural.QuaNet` on array-like real-valued instances. + The transformation consists of applying :class:`sklearn.decomposition.TruncatedSVD` + while classification is performed using :class:`sklearn.linear_model.LogisticRegression` on the low-rank space. + + :param n_components: the number of principal components to retain + :param kwargs: parameters for the + `Logistic Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html>`__ classifier + """ + + def __init__(self, n_components=100, **kwargs): + self.n_components = n_components + self.classifier = LogisticRegression(**kwargs) + +
+[docs] + def get_params(self): + """ + Get hyper-parameters for this estimator. + + :return: a dictionary with parameter names mapped to their values + """ + params = {'n_components': self.n_components} + params.update(self.classifier.get_params()) + return params
+ + +
+[docs] + def set_params(self, **params): + """ + Set the parameters of this estimator. + + :param parameters: a `**kwargs` dictionary with the estimator parameters for + `Logistic Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html>`__ + and eventually also `n_components` for `TruncatedSVD` + """ + params_ = dict(params) + if 'n_components' in params_: + self.n_components = params_['n_components'] + del params_['n_components'] + self.classifier.set_params(**params_)
+ + +
+[docs] + def fit(self, X, y): + """ + Fit the model according to the given training data. The fit consists of + fitting `TruncatedSVD` and then `LogisticRegression` on the low-rank representation. + + :param X: array-like of shape `(n_samples, n_features)` with the instances + :param y: array-like of shape `(n_samples, n_classes)` with the class labels + :return: `self` + """ + nF = X.shape[1] + self.pca = None + if nF > self.n_components: + self.pca = TruncatedSVD(self.n_components).fit(X) + X = self.transform(X) + self.classifier.fit(X, y) + self.classes_ = self.classifier.classes_ + return self
+ + +
+[docs] + def predict(self, X): + """ + Predicts labels for the instances `X` embedded into the low-rank space. + + :param X: array-like of shape `(n_samples, n_features)` instances to classify + :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of + instances in `X` + """ + X = self.transform(X) + return self.classifier.predict(X)
+ + +
+[docs] + def predict_proba(self, X): + """ + Predicts posterior probabilities for the instances `X` embedded into the low-rank space. + + :param X: array-like of shape `(n_samples, n_features)` instances to classify + :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities + """ + X = self.transform(X) + return self.classifier.predict_proba(X)
+ + +
+[docs] + def transform(self, X): + """ + Returns the low-rank approximation of `X` with `n_components` dimensions, or `X` unaltered if + `n_components` >= `X.shape[1]`. + + :param X: array-like of shape `(n_samples, n_features)` instances to embed + :return: array-like of shape `(n_samples, n_components)` with the embedded instances + """ + if self.pca is None: + return X + return self.pca.transform(X)
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/classification/neural.html b/docs/build/html/_modules/quapy/classification/neural.html new file mode 100644 index 0000000..fd5d9b1 --- /dev/null +++ b/docs/build/html/_modules/quapy/classification/neural.html @@ -0,0 +1,715 @@ + + + + + + quapy.classification.neural — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.classification.neural

+import os
+from abc import ABCMeta, abstractmethod
+from pathlib import Path
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from sklearn.metrics import accuracy_score, f1_score
+from torch.nn.utils.rnn import pad_sequence
+from tqdm import tqdm
+
+import quapy as qp
+from quapy.data import LabelledCollection
+from quapy.util import EarlyStop
+
+
+
+[docs] +class NeuralClassifierTrainer: + """ + Trains a neural network for text classification. + + :param net: an instance of `TextClassifierNet` implementing the forward pass + :param lr: learning rate (default 1e-3) + :param weight_decay: weight decay (default 0) + :param patience: number of epochs that do not show any improvement in validation + to wait before applying early stop (default 10) + :param epochs: maximum number of training epochs (default 200) + :param batch_size: batch size for training (default 64) + :param batch_size_test: batch size for test (default 512) + :param padding_length: maximum number of tokens to consider in a document (default 300) + :param device: specify 'cpu' (default) or 'cuda' for enabling gpu + :param checkpointpath: where to store the parameters of the best model found so far + according to the evaluation in the held-out validation split (default '../checkpoint/classifier_net.dat') + """ + + def __init__(self, + net: 'TextClassifierNet', + lr=1e-3, + weight_decay=0, + patience=10, + epochs=200, + batch_size=64, + batch_size_test=512, + padding_length=300, + device='cuda', + checkpointpath='../checkpoint/classifier_net.dat'): + + super().__init__() + + assert isinstance(net, TextClassifierNet), f'net is not an instance of {TextClassifierNet.__name__}' + self.net = net.to(device) + self.vocab_size = self.net.vocabulary_size + self.trainer_hyperparams={ + 'lr': lr, + 'weight_decay': weight_decay, + 'patience': patience, + 'epochs': epochs, + 'batch_size': batch_size, + 'batch_size_test': batch_size_test, + 'padding_length': padding_length, + 'device': torch.device(device) + } + self.learner_hyperparams = self.net.get_params() + self.checkpointpath = checkpointpath + + print(f'[NeuralNetwork running on {device}]') + os.makedirs(Path(checkpointpath).parent, exist_ok=True) + +
+[docs] + def reset_net_params(self, vocab_size, n_classes): + """Reinitialize the network parameters + + :param vocab_size: the size of the vocabulary + :param n_classes: the number of target classes + """ + self.net = self.net.__class__(vocab_size, n_classes, **self.learner_hyperparams) + self.net = self.net.to(self.trainer_hyperparams['device']) + self.net.xavier_uniform()
+ + +
+[docs] + def get_params(self): + """Get hyper-parameters for this estimator + + :return: a dictionary with parameter names mapped to their values + """ + return {**self.net.get_params(), **self.trainer_hyperparams}
+ + +
+[docs] + def set_params(self, **params): + """Set the parameters of this trainer and the learner it is training. + In this current version, parameter names for the trainer and learner should + be disjoint. + + :param params: a `**kwargs` dictionary with the parameters + """ + trainer_hyperparams = self.trainer_hyperparams + learner_hyperparams = self.net.get_params() + for key, val in params.items(): + if key in trainer_hyperparams and key in learner_hyperparams: + raise ValueError(f'the use of parameter {key} is ambiguous since it can refer to ' + f'a parameters of the Trainer or the learner {self.net.__name__}') + elif key not in trainer_hyperparams and key not in learner_hyperparams: + raise ValueError(f'parameter {key} is not valid') + + if key in trainer_hyperparams: + trainer_hyperparams[key] = val + else: + learner_hyperparams[key] = val + + self.trainer_hyperparams = trainer_hyperparams + self.learner_hyperparams = learner_hyperparams
+ + + @property + def device(self): + """ Gets the device in which the network is allocated + + :return: device + """ + return next(self.net.parameters()).device + + def _train_epoch(self, data, status, pbar, epoch): + self.net.train() + criterion = torch.nn.CrossEntropyLoss() + losses, predictions, true_labels = [], [], [] + for xi, yi in data: + self.optim.zero_grad() + logits = self.net.forward(xi) + loss = criterion(logits, yi) + loss.backward() + self.optim.step() + losses.append(loss.item()) + preds = torch.softmax(logits, dim=-1).detach().cpu().numpy().argmax(axis=-1) + + status["loss"] = np.mean(losses) + predictions.extend(preds.tolist()) + true_labels.extend(yi.detach().cpu().numpy().tolist()) + status["acc"] = accuracy_score(true_labels, predictions) + status["f1"] = f1_score(true_labels, predictions, average='macro') + self.__update_progress_bar(pbar, epoch) + + def _test_epoch(self, data, status, pbar, epoch): + self.net.eval() + criterion = torch.nn.CrossEntropyLoss() + losses, predictions, true_labels = [], [], [] + with torch.no_grad(): + for xi, yi in data: + logits = self.net.forward(xi) + loss = criterion(logits, yi) + losses.append(loss.item()) + preds = torch.softmax(logits, dim=-1).detach().cpu().numpy().argmax(axis=-1) + predictions.extend(preds.tolist()) + true_labels.extend(yi.detach().cpu().numpy().tolist()) + + status["loss"] = np.mean(losses) + status["acc"] = accuracy_score(true_labels, predictions) + status["f1"] = f1_score(true_labels, predictions, average='macro') + self.__update_progress_bar(pbar, epoch) + + def __update_progress_bar(self, pbar, epoch): + pbar.set_description(f'[{self.net.__class__.__name__}] training epoch={epoch} ' + f'tr-loss={self.status["tr"]["loss"]:.5f} ' + f'tr-acc={100 * self.status["tr"]["acc"]:.2f}% ' + f'tr-macroF1={100 * self.status["tr"]["f1"]:.2f}% ' + f'patience={self.early_stop.patience}/{self.early_stop.PATIENCE_LIMIT} ' + f'val-loss={self.status["va"]["loss"]:.5f} ' + f'val-acc={100 * self.status["va"]["acc"]:.2f}% ' + f'macroF1={100 * self.status["va"]["f1"]:.2f}%') + +
+[docs] + def fit(self, instances, labels, val_split=0.3): + """ + Fits the model according to the given training data. + + :param instances: list of lists of indexed tokens + :param labels: array-like of shape `(n_samples, n_classes)` with the class labels + :param val_split: proportion of training documents to be taken as the validation set (default 0.3) + :return: + """ + train, val = LabelledCollection(instances, labels).split_stratified(1-val_split) + self.classes_ = train.classes_ + opt = self.trainer_hyperparams + checkpoint = self.checkpointpath + self.reset_net_params(self.vocab_size, train.n_classes) + + train_generator = TorchDataset(train.instances, train.labels).asDataloader( + opt['batch_size'], shuffle=True, pad_length=opt['padding_length'], device=opt['device']) + valid_generator = TorchDataset(val.instances, val.labels).asDataloader( + opt['batch_size_test'], shuffle=False, pad_length=opt['padding_length'], device=opt['device']) + + self.status = {'tr': {'loss': -1, 'acc': -1, 'f1': -1}, + 'va': {'loss': -1, 'acc': -1, 'f1': -1}} + + self.optim = torch.optim.Adam(self.net.parameters(), lr=opt['lr'], weight_decay=opt['weight_decay']) + self.early_stop = EarlyStop(opt['patience'], lower_is_better=False) + + with tqdm(range(1, opt['epochs'] + 1)) as pbar: + for epoch in pbar: + self._train_epoch(train_generator, self.status['tr'], pbar, epoch) + self._test_epoch(valid_generator, self.status['va'], pbar, epoch) + + self.early_stop(self.status['va']['f1'], epoch) + if self.early_stop.IMPROVED: + torch.save(self.net.state_dict(), checkpoint) + elif self.early_stop.STOP: + print(f'training ended by patience exhasted; loading best model parameters in {checkpoint} ' + f'for epoch {self.early_stop.best_epoch}') + self.net.load_state_dict(torch.load(checkpoint)) + break + + print('performing one training pass over the validation set...') + self._train_epoch(valid_generator, self.status['tr'], pbar, epoch=0) + print('[done]') + + return self
+ + +
+[docs] + def predict(self, instances): + """ + Predicts labels for the instances + + :param instances: list of lists of indexed tokens + :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of + instances in `X` + """ + return np.argmax(self.predict_proba(instances), axis=-1)
+ + +
+[docs] + def predict_proba(self, instances): + """ + Predicts posterior probabilities for the instances + + :param X: array-like of shape `(n_samples, n_features)` instances to classify + :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities + """ + self.net.eval() + opt = self.trainer_hyperparams + with torch.no_grad(): + posteriors = [] + for xi in TorchDataset(instances).asDataloader( + opt['batch_size_test'], shuffle=False, pad_length=opt['padding_length'], device=opt['device']): + posteriors.append(self.net.predict_proba(xi)) + return np.concatenate(posteriors)
+ + +
+[docs] + def transform(self, instances): + """ + Returns the embeddings of the instances + + :param instances: list of lists of indexed tokens + :return: array-like of shape `(n_samples, embed_size)` with the embedded instances, + where `embed_size` is defined by the classification network + """ + self.net.eval() + embeddings = [] + opt = self.trainer_hyperparams + with torch.no_grad(): + for xi in TorchDataset(instances).asDataloader( + opt['batch_size_test'], shuffle=False, pad_length=opt['padding_length'], device=opt['device']): + embeddings.append(self.net.document_embedding(xi).detach().cpu().numpy()) + return np.concatenate(embeddings)
+
+ + + +
+[docs] +class TorchDataset(torch.utils.data.Dataset): + """ + Transforms labelled instances into a Torch's :class:`torch.utils.data.DataLoader` object + + :param instances: list of lists of indexed tokens + :param labels: array-like of shape `(n_samples, n_classes)` with the class labels + """ + + def __init__(self, instances, labels=None): + self.instances = instances + self.labels = labels + + def __len__(self): + return len(self.instances) + + def __getitem__(self, index): + return {'doc': self.instances[index], 'label': self.labels[index] if self.labels is not None else None} + +
+[docs] + def asDataloader(self, batch_size, shuffle, pad_length, device): + """ + Converts the labelled collection into a Torch DataLoader with dynamic padding for + the batch + + :param batch_size: batch size + :param shuffle: whether or not to shuffle instances + :param pad_length: the maximum length for the list of tokens (dynamic padding is + applied, meaning that if the longest document in the batch is shorter than + `pad_length`, then the batch is padded up to its length, and not to `pad_length`. + :param device: whether to allocate tensors in cpu or in cuda + :return: a :class:`torch.utils.data.DataLoader` object + """ + def collate(batch): + data = [torch.LongTensor(item['doc'][:pad_length]) for item in batch] + data = pad_sequence(data, batch_first=True, padding_value=qp.environ['PAD_INDEX']).to(device) + targets = [item['label'] for item in batch] + if targets[0] is None: + return data + else: + targets = torch.as_tensor(targets, dtype=torch.long).to(device) + return [data, targets] + + torchDataset = TorchDataset(self.instances, self.labels) + return torch.utils.data.DataLoader(torchDataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate)
+
+ + + +
+[docs] +class TextClassifierNet(torch.nn.Module, metaclass=ABCMeta): + """ + Abstract Text classifier (`torch.nn.Module`) + """ + +
+[docs] + @abstractmethod + def document_embedding(self, x): + """Embeds documents (i.e., performs the forward pass up to the + next-to-last layer). + + :param x: a batch of instances, typically generated by a torch's `DataLoader` + instance (see :class:`quapy.classification.neural.TorchDataset`) + :return: a torch tensor of shape `(n_samples, n_dimensions)`, where + `n_samples` is the number of documents, and `n_dimensions` is the + dimensionality of the embedding + """ + ...
+ + +
+[docs] + def forward(self, x): + """Performs the forward pass. + + :param x: a batch of instances, typically generated by a torch's `DataLoader` + instance (see :class:`quapy.classification.neural.TorchDataset`) + :return: a tensor of shape `(n_instances, n_classes)` with the decision scores + for each of the instances and classes + """ + doc_embedded = self.document_embedding(x) + return self.output(doc_embedded)
+ + +
+[docs] + def dimensions(self): + """Gets the number of dimensions of the embedding space + + :return: integer + """ + return self.dim
+ + +
+[docs] + def predict_proba(self, x): + """ + Predicts posterior probabilities for the instances in `x` + + :param x: a torch tensor of indexed tokens with shape `(n_instances, pad_length)` + where `n_instances` is the number of instances in the batch, and `pad_length` + is length of the pad in the batch + :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities + """ + logits = self(x) + return torch.softmax(logits, dim=1).detach().cpu().numpy()
+ + +
+[docs] + def xavier_uniform(self): + """ + Performs Xavier initialization of the network parameters + """ + for p in self.parameters(): + if p.dim() > 1 and p.requires_grad: + torch.nn.init.xavier_uniform_(p)
+ + +
+[docs] + @abstractmethod + def get_params(self): + """ + Get hyper-parameters for this estimator + + :return: a dictionary with parameter names mapped to their values + """ + ...
+ + + @property + def vocabulary_size(self): + """ + Return the size of the vocabulary + + :return: integer + """ + ...
+ + + +
+[docs] +class LSTMnet(TextClassifierNet): + """ + An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on + Long Short Term Memory networks. + + :param vocabulary_size: the size of the vocabulary + :param n_classes: number of target classes + :param embedding_size: the dimensionality of the word embeddings space (default 100) + :param hidden_size: the dimensionality of the hidden space (default 256) + :param repr_size: the dimensionality of the document embeddings space (default 100) + :param lstm_class_nlayers: number of LSTM layers (default 1) + :param drop_p: drop probability for dropout (default 0.5) + """ + + def __init__(self, vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, lstm_class_nlayers=1, + drop_p=0.5): + + super().__init__() + self.vocabulary_size_ = vocabulary_size + self.n_classes = n_classes + self.hyperparams={ + 'embedding_size': embedding_size, + 'hidden_size': hidden_size, + 'repr_size': repr_size, + 'lstm_class_nlayers': lstm_class_nlayers, + 'drop_p': drop_p + } + + self.word_embedding = torch.nn.Embedding(vocabulary_size, embedding_size) + self.lstm = torch.nn.LSTM(embedding_size, hidden_size, lstm_class_nlayers, dropout=drop_p, batch_first=True) + self.dropout = torch.nn.Dropout(drop_p) + + self.dim = repr_size + self.doc_embedder = torch.nn.Linear(hidden_size, self.dim) + self.output = torch.nn.Linear(self.dim, n_classes) + + def __init_hidden(self, set_size): + opt = self.hyperparams + var_hidden = torch.zeros(opt['lstm_class_nlayers'], set_size, opt['hidden_size']) + var_cell = torch.zeros(opt['lstm_class_nlayers'], set_size, opt['hidden_size']) + if next(self.lstm.parameters()).is_cuda: + var_hidden, var_cell = var_hidden.cuda(), var_cell.cuda() + return var_hidden, var_cell + +
+[docs] + def document_embedding(self, x): + """Embeds documents (i.e., performs the forward pass up to the + next-to-last layer). + + :param x: a batch of instances, typically generated by a torch's `DataLoader` + instance (see :class:`quapy.classification.neural.TorchDataset`) + :return: a torch tensor of shape `(n_samples, n_dimensions)`, where + `n_samples` is the number of documents, and `n_dimensions` is the + dimensionality of the embedding + """ + embedded = self.word_embedding(x) + rnn_output, rnn_hidden = self.lstm(embedded, self.__init_hidden(x.size()[0])) + abstracted = self.dropout(F.relu(rnn_hidden[0][-1])) + abstracted = self.doc_embedder(abstracted) + return abstracted
+ + +
+[docs] + def get_params(self): + """ + Get hyper-parameters for this estimator + + :return: a dictionary with parameter names mapped to their values + """ + return self.hyperparams
+ + + @property + def vocabulary_size(self): + """ + Return the size of the vocabulary + + :return: integer + """ + return self.vocabulary_size_
+ + + +
+[docs] +class CNNnet(TextClassifierNet): + """ + An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on + Convolutional Neural Networks. + + :param vocabulary_size: the size of the vocabulary + :param n_classes: number of target classes + :param embedding_size: the dimensionality of the word embeddings space (default 100) + :param hidden_size: the dimensionality of the hidden space (default 256) + :param repr_size: the dimensionality of the document embeddings space (default 100) + :param kernel_heights: list of kernel lengths (default [3,5,7]), i.e., the number of + consecutive tokens that each kernel covers + :param stride: convolutional stride (default 1) + :param stride: convolutional pad (default 0) + :param drop_p: drop probability for dropout (default 0.5) + """ + + def __init__(self, vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, + kernel_heights=[3, 5, 7], stride=1, padding=0, drop_p=0.5): + super(CNNnet, self).__init__() + + self.vocabulary_size_ = vocabulary_size + self.n_classes = n_classes + self.hyperparams={ + 'embedding_size': embedding_size, + 'hidden_size': hidden_size, + 'repr_size': repr_size, + 'kernel_heights':kernel_heights, + 'stride': stride, + 'drop_p': drop_p + } + self.word_embedding = torch.nn.Embedding(vocabulary_size, embedding_size) + in_channels = 1 + self.conv1 = nn.Conv2d(in_channels, hidden_size, (kernel_heights[0], embedding_size), stride, padding) + self.conv2 = nn.Conv2d(in_channels, hidden_size, (kernel_heights[1], embedding_size), stride, padding) + self.conv3 = nn.Conv2d(in_channels, hidden_size, (kernel_heights[2], embedding_size), stride, padding) + self.dropout = nn.Dropout(drop_p) + + self.dim = repr_size + self.doc_embedder = torch.nn.Linear(len(kernel_heights) * hidden_size, self.dim) + self.output = nn.Linear(self.dim, n_classes) + + def __conv_block(self, input, conv_layer): + conv_out = conv_layer(input) # conv_out.size() = (batch_size, out_channels, dim, 1) + activation = F.relu(conv_out.squeeze(3)) # activation.size() = (batch_size, out_channels, dim1) + max_out = F.max_pool1d(activation, activation.size()[2]).squeeze(2) # maxpool_out.size() = (batch_size, out_channels) + return max_out + +
+[docs] + def document_embedding(self, input): + """Embeds documents (i.e., performs the forward pass up to the + next-to-last layer). + + :param input: a batch of instances, typically generated by a torch's `DataLoader` + instance (see :class:`quapy.classification.neural.TorchDataset`) + :return: a torch tensor of shape `(n_samples, n_dimensions)`, where + `n_samples` is the number of documents, and `n_dimensions` is the + dimensionality of the embedding + """ + input = self.word_embedding(input) + input = input.unsqueeze(1) # input.size() = (batch_size, 1, num_seq, embedding_length) + + max_out1 = self.__conv_block(input, self.conv1) + max_out2 = self.__conv_block(input, self.conv2) + max_out3 = self.__conv_block(input, self.conv3) + + all_out = torch.cat((max_out1, max_out2, max_out3), 1) # all_out.size() = (batch_size, num_kernels*out_channels) + abstracted = self.dropout(F.relu(all_out)) # (batch_size, num_kernels*out_channels) + abstracted = self.doc_embedder(abstracted) + return abstracted
+ + +
+[docs] + def get_params(self): + """ + Get hyper-parameters for this estimator + + :return: a dictionary with parameter names mapped to their values + """ + return self.hyperparams
+ + + @property + def vocabulary_size(self): + """ + Return the size of the vocabulary + + :return: integer + """ + return self.vocabulary_size_
+ + + + + + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/classification/svmperf.html b/docs/build/html/_modules/quapy/classification/svmperf.html new file mode 100644 index 0000000..959ad48 --- /dev/null +++ b/docs/build/html/_modules/quapy/classification/svmperf.html @@ -0,0 +1,268 @@ + + + + + + quapy.classification.svmperf — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.classification.svmperf

+import random
+import shutil
+import subprocess
+import tempfile
+from os import remove, makedirs
+from os.path import join, exists
+from subprocess import PIPE, STDOUT
+import numpy as np
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.datasets import dump_svmlight_file
+
+
+
+[docs] +class SVMperf(BaseEstimator, ClassifierMixin): + """A wrapper for the `SVM-perf package <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`__ by Thorsten Joachims. + When using losses for quantification, the source code has to be patched. See + the `installation documentation <https://hlt-isti.github.io/QuaPy/build/html/Installation.html#svm-perf-with-quantification-oriented-losses>`__ + for further details. + + References: + + * `Esuli et al.2015 <https://dl.acm.org/doi/abs/10.1145/2700406?casa_token=8D2fHsGCVn0AAAAA:ZfThYOvrzWxMGfZYlQW_y8Cagg-o_l6X_PcF09mdETQ4Tu7jK98mxFbGSXp9ZSO14JkUIYuDGFG0>`__ + * `Barranquero et al.2015 <https://www.sciencedirect.com/science/article/abs/pii/S003132031400291X>`__ + + :param svmperf_base: path to directory containing the binary files `svm_perf_learn` and `svm_perf_classify` + :param C: trade-off between training error and margin (default 0.01) + :param verbose: set to True to print svm-perf std outputs + :param loss: the loss to optimize for. Available losses are "01", "f1", "kld", "nkld", "q", "qacc", "qf1", "qgm", "mae", "mrae". + :param host_folder: directory where to store the trained model; set to None (default) for using a tmp directory + (temporal directories are automatically deleted) + """ + + # losses with their respective codes in svm_perf implementation + valid_losses = {'01':0, 'f1':1, 'kld':12, 'nkld':13, 'q':22, 'qacc':23, 'qf1':24, 'qgm':25, 'mae':26, 'mrae':27} + + def __init__(self, svmperf_base, C=0.01, verbose=False, loss='01', host_folder=None): + assert exists(svmperf_base), f'path {svmperf_base} does not seem to point to a valid path' + self.svmperf_base = svmperf_base + self.C = C + self.verbose = verbose + self.loss = loss + self.host_folder = host_folder + + # def set_params(self, **parameters): + # """ + # Set the hyper-parameters for svm-perf. Currently, only the `C` and `loss` parameters are supported + # + # :param parameters: a `**kwargs` dictionary `{'C': <float>}` + # """ + # assert sorted(list(parameters.keys())) == ['C', 'loss'], \ + # 'currently, only the C and loss parameters are supported' + # self.C = parameters.get('C', self.C) + # self.loss = parameters.get('loss', self.loss) + # + # def get_params(self, deep=True): + # return {'C': self.C, 'loss': self.loss} + +
+[docs] + def fit(self, X, y): + """ + Trains the SVM for the multivariate performance loss + + :param X: training instances + :param y: a binary vector of labels + :return: `self` + """ + assert self.loss in SVMperf.valid_losses, \ + f'unsupported loss {self.loss}, valid ones are {list(SVMperf.valid_losses.keys())}' + + self.svmperf_learn = join(self.svmperf_base, 'svm_perf_learn') + self.svmperf_classify = join(self.svmperf_base, 'svm_perf_classify') + self.loss_cmd = '-w 3 -l ' + str(self.valid_losses[self.loss]) + self.c_cmd = '-c ' + str(self.C) + + self.classes_ = sorted(np.unique(y)) + self.n_classes_ = len(self.classes_) + + local_random = random.Random() + # this would allow to run parallel instances of predict + random_code = 'svmperfprocess'+'-'.join(str(local_random.randint(0, 1000000)) for _ in range(5)) + if self.host_folder is None: + # tmp dir are removed after the fit terminates in multiprocessing... + self.tmpdir = tempfile.TemporaryDirectory(suffix=random_code).name + else: + self.tmpdir = join(self.host_folder, '.' + random_code) + makedirs(self.tmpdir, exist_ok=True) + + self.model = join(self.tmpdir, 'model-'+random_code) + traindat = join(self.tmpdir, f'train-{random_code}.dat') + + dump_svmlight_file(X, y, traindat, zero_based=False) + + cmd = ' '.join([self.svmperf_learn, self.c_cmd, self.loss_cmd, traindat, self.model]) + if self.verbose: + print('[Running]', cmd) + p = subprocess.run(cmd.split(), stdout=PIPE, stderr=STDOUT) + if not exists(self.model): + print(p.stderr.decode('utf-8')) + remove(traindat) + + if self.verbose: + print(p.stdout.decode('utf-8')) + + return self
+ + +
+[docs] + def predict(self, X): + """ + Predicts labels for the instances `X` + + :param X: array-like of shape `(n_samples, n_features)` instances to classify + :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of + instances in `X` + """ + confidence_scores = self.decision_function(X) + predictions = (confidence_scores > 0) * 1 + return predictions
+ + +
+[docs] + def decision_function(self, X, y=None): + """ + Evaluate the decision function for the samples in `X`. + + :param X: array-like of shape `(n_samples, n_features)` containing the instances to classify + :param y: unused + :return: array-like of shape `(n_samples,)` containing the decision scores of the instances + """ + assert hasattr(self, 'tmpdir'), 'predict called before fit' + assert self.tmpdir is not None, 'model directory corrupted' + assert exists(self.model), 'model not found' + if y is None: + y = np.zeros(X.shape[0]) + + # in order to allow for parallel runs of predict, a random code is assigned + local_random = random.Random() + random_code = '-'.join(str(local_random.randint(0, 1000000)) for _ in range(5)) + predictions_path = join(self.tmpdir, 'predictions' + random_code + '.dat') + testdat = join(self.tmpdir, 'test' + random_code + '.dat') + dump_svmlight_file(X, y, testdat, zero_based=False) + + cmd = ' '.join([self.svmperf_classify, testdat, self.model, predictions_path]) + if self.verbose: + print('[Running]', cmd) + p = subprocess.run(cmd.split(), stdout=PIPE, stderr=STDOUT) + + if self.verbose: + print(p.stdout.decode('utf-8')) + + scores = np.loadtxt(predictions_path) + remove(testdat) + remove(predictions_path) + + return scores
+ + + def __del__(self): + if hasattr(self, 'tmpdir'): + shutil.rmtree(self.tmpdir, ignore_errors=True)
+ + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/data/_ifcb.html b/docs/build/html/_modules/quapy/data/_ifcb.html new file mode 100644 index 0000000..942a5e6 --- /dev/null +++ b/docs/build/html/_modules/quapy/data/_ifcb.html @@ -0,0 +1,165 @@ + + + + + + quapy.data._ifcb — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.data._ifcb

+import os
+import pandas as pd
+from quapy.protocol import AbstractProtocol
+
+
+[docs] +class IFCBTrainSamplesFromDir(AbstractProtocol): + + def __init__(self, path_dir:str, classes: list): + self.path_dir = path_dir + self.classes = classes + self.samples = [] + for filename in os.listdir(path_dir): + if filename.endswith('.csv'): + self.samples.append(filename) + + def __call__(self): + for sample in self.samples: + s = pd.read_csv(os.path.join(self.path_dir,sample)) + # all columns but the first where we get the class + X = s.iloc[:, 1:].to_numpy() + y = s.iloc[:, 0].to_numpy() + yield X, y + +
+[docs] + def total(self): + """ + Returns the total number of samples that the protocol generates. + + :return: The number of training samples to generate. + """ + return len(self.samples)
+
+ + + +
+[docs] +class IFCBTestSamples(AbstractProtocol): + + def __init__(self, path_dir:str, test_prevalences_path: str): + self.path_dir = path_dir + self.test_prevalences = pd.read_csv(os.path.join(path_dir, test_prevalences_path)) + + def __call__(self): + for _, test_sample in self.test_prevalences.iterrows(): + #Load the sample from disk + X = pd.read_csv(os.path.join(self.path_dir,test_sample['sample']+'.csv')).to_numpy() + prevalences = test_sample.iloc[1:].to_numpy().astype(float) + yield X, prevalences + +
+[docs] + def total(self): + """ + Returns the total number of samples that the protocol generates. + + :return: The number of test samples to generate. + """ + return len(self.test_prevalences.index)
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/data/_lequa2022.html b/docs/build/html/_modules/quapy/data/_lequa2022.html new file mode 100644 index 0000000..f2a8fab --- /dev/null +++ b/docs/build/html/_modules/quapy/data/_lequa2022.html @@ -0,0 +1,307 @@ + + + + + + quapy.data._lequa2022 — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.data._lequa2022

+from typing import Tuple, Union
+import pandas as pd
+import numpy as np
+import os
+
+from quapy.protocol import AbstractProtocol
+
+DEV_SAMPLES = 1000
+TEST_SAMPLES = 5000
+
+ERROR_TOL = 1E-3
+
+
+
+[docs] +def load_category_map(path): + cat2code = {} + with open(path, 'rt') as fin: + for line in fin: + category, code = line.split() + cat2code[category] = int(code) + code2cat = [cat for cat, code in sorted(cat2code.items(), key=lambda x: x[1])] + return cat2code, code2cat
+ + + +
+[docs] +def load_raw_documents(path): + df = pd.read_csv(path) + documents = list(df["text"].values) + labels = None + if "label" in df.columns: + labels = df["label"].values.astype(int) + return documents, labels
+ + + +
+[docs] +def load_vector_documents(path): + D = pd.read_csv(path).to_numpy(dtype=float) + labelled = D.shape[1] == 301 + if labelled: + X, y = D[:, 1:], D[:, 0].astype(int).flatten() + else: + X, y = D, None + return X, y
+ + + +
+[docs] +class SamplesFromDir(AbstractProtocol): + + def __init__(self, path_dir:str, ground_truth_path:str, load_fn): + self.path_dir = path_dir + self.load_fn = load_fn + self.true_prevs = ResultSubmission.load(ground_truth_path) + + def __call__(self): + for id, prevalence in self.true_prevs.iterrows(): + sample, _ = self.load_fn(os.path.join(self.path_dir, f'{id}.txt')) + yield sample, prevalence
+ + + +
+[docs] +class ResultSubmission: + + def __init__(self): + self.df = None + + def __init_df(self, categories: int): + if not isinstance(categories, int) or categories < 2: + raise TypeError('wrong format for categories: an int (>=2) was expected') + df = pd.DataFrame(columns=list(range(categories))) + df.index.set_names('id', inplace=True) + self.df = df + + @property + def n_categories(self): + return len(self.df.columns.values) + +
+[docs] + def add(self, sample_id: int, prevalence_values: np.ndarray): + if not isinstance(sample_id, int): + raise TypeError(f'error: expected int for sample_sample, found {type(sample_id)}') + if not isinstance(prevalence_values, np.ndarray): + raise TypeError(f'error: expected np.ndarray for prevalence_values, found {type(prevalence_values)}') + if self.df is None: + self.__init_df(categories=len(prevalence_values)) + if sample_id in self.df.index.values: + raise ValueError(f'error: prevalence values for "{sample_id}" already added') + if prevalence_values.ndim != 1 and prevalence_values.size != self.n_categories: + raise ValueError(f'error: wrong shape found for prevalence vector {prevalence_values}') + if (prevalence_values < 0).any() or (prevalence_values > 1).any(): + raise ValueError(f'error: prevalence values out of range [0,1] for "{sample_id}"') + if np.abs(prevalence_values.sum() - 1) > ERROR_TOL: + raise ValueError(f'error: prevalence values do not sum up to one for "{sample_id}"' + f'(error tolerance {ERROR_TOL})') + + self.df.loc[sample_id] = prevalence_values
+ + + def __len__(self): + return len(self.df) + +
+[docs] + @classmethod + def load(cls, path: str) -> 'ResultSubmission': + df = ResultSubmission.check_file_format(path) + r = ResultSubmission() + r.df = df + return r
+ + +
+[docs] + def dump(self, path: str): + ResultSubmission.check_dataframe_format(self.df) + self.df.to_csv(path)
+ + +
+[docs] + def prevalence(self, sample_id: int): + sel = self.df.loc[sample_id] + if sel.empty: + return None + else: + return sel.values.flatten()
+ + +
+[docs] + def iterrows(self): + for index, row in self.df.iterrows(): + prevalence = row.values.flatten() + yield index, prevalence
+ + +
+[docs] + @classmethod + def check_file_format(cls, path) -> Union[pd.DataFrame, Tuple[pd.DataFrame, str]]: + try: + df = pd.read_csv(path, index_col=0) + except Exception as e: + print(f'the file {path} does not seem to be a valid csv file. ') + print(e) + return ResultSubmission.check_dataframe_format(df, path=path)
+ + +
+[docs] + @classmethod + def check_dataframe_format(cls, df, path=None) -> Union[pd.DataFrame, Tuple[pd.DataFrame, str]]: + hint_path = '' # if given, show the data path in the error message + if path is not None: + hint_path = f' in {path}' + + if df.index.name != 'id' or len(df.columns) < 2: + raise ValueError(f'wrong header{hint_path}, ' + f'the format of the header should be "id,0,...,n-1", ' + f'where n is the number of categories') + if [int(ci) for ci in df.columns.values] != list(range(len(df.columns))): + raise ValueError(f'wrong header{hint_path}, category ids should be 0,1,2,...,n-1, ' + f'where n is the number of categories') + if df.empty: + raise ValueError(f'error{hint_path}: results file is empty') + elif len(df) != DEV_SAMPLES and len(df) != TEST_SAMPLES: + raise ValueError(f'wrong number of prevalence values found{hint_path}; ' + f'expected {DEV_SAMPLES} for development sets and ' + f'{TEST_SAMPLES} for test sets; found {len(df)}') + + ids = set(df.index.values) + expected_ids = set(range(len(df))) + if ids != expected_ids: + missing = expected_ids - ids + if missing: + raise ValueError(f'there are {len(missing)} missing ids{hint_path}: {sorted(missing)}') + unexpected = ids - expected_ids + if unexpected: + raise ValueError(f'there are {len(missing)} unexpected ids{hint_path}: {sorted(unexpected)}') + + for category_id in df.columns: + if (df[category_id] < 0).any() or (df[category_id] > 1).any(): + raise ValueError(f'error{hint_path} column "{category_id}" contains values out of range [0,1]') + + prevs = df.values + round_errors = np.abs(prevs.sum(axis=-1) - 1.) > ERROR_TOL + if round_errors.any(): + raise ValueError(f'warning: prevalence values in rows with id {np.where(round_errors)[0].tolist()} ' + f'do not sum up to 1 (error tolerance {ERROR_TOL}), ' + f'probably due to some rounding errors.') + + return df
+
+ + + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/data/base.html b/docs/build/html/_modules/quapy/data/base.html new file mode 100644 index 0000000..e3a2e89 --- /dev/null +++ b/docs/build/html/_modules/quapy/data/base.html @@ -0,0 +1,728 @@ + + + + + + quapy.data.base — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.data.base

+import itertools
+from functools import cached_property
+from typing import Iterable
+
+import numpy as np
+from scipy.sparse import issparse
+from scipy.sparse import vstack
+from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold
+from numpy.random import RandomState
+from quapy.functional import strprev
+from quapy.util import temp_seed
+
+
+
+[docs] +class LabelledCollection: + """ + A LabelledCollection is a set of objects each with a label attached to each of them. + This class implements several sampling routines and other utilities. + + :param instances: array-like (np.ndarray, list, or csr_matrix are supported) + :param labels: array-like with the same length of instances + :param classes: optional, list of classes from which labels are taken. If not specified, the classes are inferred + from the labels. The classes must be indicated in cases in which some of the labels might have no examples + (i.e., a prevalence of 0) + """ + + def __init__(self, instances, labels, classes=None): + if issparse(instances): + self.instances = instances + elif isinstance(instances, list) and len(instances) > 0 and isinstance(instances[0], str): + # lists of strings occupy too much as ndarrays (although python-objects add a heavy overload) + self.instances = np.asarray(instances, dtype=object) + else: + self.instances = np.asarray(instances) + self.labels = np.asarray(labels) + n_docs = len(self) + if classes is None: + self.classes_ = np.unique(self.labels) + self.classes_.sort() + else: + self.classes_ = np.unique(np.asarray(classes)) + self.classes_.sort() + if len(set(self.labels).difference(set(classes))) > 0: + raise ValueError(f'labels ({set(self.labels)}) contain values not included in classes_ ({set(classes)})') + self.index = {class_: np.arange(n_docs)[self.labels == class_] for class_ in self.classes_} + +
+[docs] + @classmethod + def load(cls, path: str, loader_func: callable, classes=None, **loader_kwargs): + """ + Loads a labelled set of data and convert it into a :class:`LabelledCollection` instance. The function in charge + of reading the instances must be specified. This function can be a custom one, or any of the reading functions + defined in :mod:`quapy.data.reader` module. + + :param path: string, the path to the file containing the labelled instances + :param loader_func: a custom function that implements the data loader and returns a tuple with instances and + labels + :param classes: array-like, the classes according to which the instances are labelled + :param loader_kwargs: any argument that the `loader_func` function needs in order to read the instances, i.e., + these arguments are used to call `loader_func(path, **loader_kwargs)` + :return: a :class:`LabelledCollection` object + """ + return LabelledCollection(*loader_func(path, **loader_kwargs), classes)
+ + + def __len__(self): + """ + Returns the length of this collection (number of labelled instances) + + :return: integer + """ + return self.instances.shape[0] + +
+[docs] + def prevalence(self): + """ + Returns the prevalence, or relative frequency, of the classes in the codeframe. + + :return: a np.ndarray of shape `(n_classes)` with the relative frequencies of each class, in the same order + as listed by `self.classes_` + """ + return self.counts() / len(self)
+ + +
+[docs] + def counts(self): + """ + Returns the number of instances for each of the classes in the codeframe. + + :return: a np.ndarray of shape `(n_classes)` with the number of instances of each class, in the same order + as listed by `self.classes_` + """ + return np.asarray([len(self.index[class_]) for class_ in self.classes_])
+ + + @property + def n_classes(self): + """ + The number of classes + + :return: integer + """ + return len(self.classes_) + + @property + def binary(self): + """ + Returns True if the number of classes is 2 + + :return: boolean + """ + return self.n_classes == 2 + +
+[docs] + def sampling_index(self, size, *prevs, shuffle=True, random_state=None): + """ + Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the + prevalence values are not specified, then returns the index of a uniform sampling. + For each class, the sampling is drawn with replacement if the requested prevalence is larger than + the actual prevalence of the class, or without replacement otherwise. + + :param size: integer, the requested size + :param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since + it is constrained. E.g., for binary collections, only the prevalence `p` for the first class (as listed in + `self.classes_` can be specified, while the other class takes prevalence value `1-p` + :param shuffle: if set to True (default), shuffles the index before returning it + :param random_state: seed for reproducing sampling + :return: a np.ndarray of shape `(size)` with the indexes + """ + if len(prevs) == 0: # no prevalence was indicated; returns an index for uniform sampling + return self.uniform_sampling_index(size, random_state=random_state) + if len(prevs) == self.n_classes - 1: + prevs = prevs + (1 - sum(prevs),) + assert len(prevs) == self.n_classes, 'unexpected number of prevalences' + assert sum(prevs) == 1, f'prevalences ({prevs}) wrong range (sum={sum(prevs)})' + + # Decide how many instances should be taken for each class in order to satisfy the requested prevalence + # accurately, and the number of instances in the sample (exactly). If int(size * prevs[i]) (which is + # <= size * prevs[i]) examples are drawn from class i, there could be a remainder number of instances to take + # to satisfy the size constrain. The remainder is distributed along the classes with probability = prevs. + # (This aims at avoiding the remainder to be placed in a class for which the prevalence requested is 0.) + n_requests = {class_: round(size * prevs[i]) for i, class_ in enumerate(self.classes_)} + remainder = size - sum(n_requests.values()) + with temp_seed(random_state): + # due to rounding, the remainder can be 0, >0, or <0 + if remainder > 0: + # when the remainder is >0 we randomly add 1 to the requests for each class; + # more prevalent classes are more likely to be taken in order to minimize the impact in the final prevalence + for rand_class in np.random.choice(self.classes_, size=remainder, p=prevs): + n_requests[rand_class] += 1 + elif remainder < 0: + # when the remainder is <0 we randomly remove 1 from the requests, unless the request is 0 for a chosen + # class; we repeat until remainder==0 + while remainder!=0: + rand_class = np.random.choice(self.classes_, p=prevs) + if n_requests[rand_class] > 0: + n_requests[rand_class] -= 1 + remainder += 1 + + indexes_sample = [] + for class_, n_requested in n_requests.items(): + n_candidates = len(self.index[class_]) + index_sample = self.index[class_][ + np.random.choice(n_candidates, size=n_requested, replace=(n_requested > n_candidates)) + ] if n_requested > 0 else [] + + indexes_sample.append(index_sample) + + indexes_sample = np.concatenate(indexes_sample).astype(int) + + if shuffle: + indexes_sample = np.random.permutation(indexes_sample) + + return indexes_sample
+ + +
+[docs] + def uniform_sampling_index(self, size, random_state=None): + """ + Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn + with replacement if the requested size is greater than the number of instances, or without replacement + otherwise. + + :param size: integer, the size of the uniform sample + :param random_state: if specified, guarantees reproducibility of the split. + :return: a np.ndarray of shape `(size)` with the indexes + """ + if random_state is not None: + ng = RandomState(seed=random_state) + else: + ng = np.random + return ng.choice(len(self), size, replace=size > len(self))
+ + +
+[docs] + def sampling(self, size, *prevs, shuffle=True, random_state=None): + """ + Return a random sample (an instance of :class:`LabelledCollection`) of desired size and desired prevalence + values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than + the actual prevalence of the class, or with replacement otherwise. + + :param size: integer, the requested size + :param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since + it is constrained. E.g., for binary collections, only the prevalence `p` for the first class (as listed in + `self.classes_` can be specified, while the other class takes prevalence value `1-p` + :param shuffle: if set to True (default), shuffles the index before returning it + :param random_state: seed for reproducing sampling + :return: an instance of :class:`LabelledCollection` with length == `size` and prevalence close to `prevs` (or + prevalence == `prevs` if the exact prevalence values can be met as proportions of instances) + """ + prev_index = self.sampling_index(size, *prevs, shuffle=shuffle, random_state=random_state) + return self.sampling_from_index(prev_index)
+ + +
+[docs] + def uniform_sampling(self, size, random_state=None): + """ + Returns a uniform sample (an instance of :class:`LabelledCollection`) of desired size. The sampling is drawn + with replacement if the requested size is greater than the number of instances, or without replacement + otherwise. + + :param size: integer, the requested size + :param random_state: if specified, guarantees reproducibility of the split. + :return: an instance of :class:`LabelledCollection` with length == `size` + """ + unif_index = self.uniform_sampling_index(size, random_state=random_state) + return self.sampling_from_index(unif_index)
+ + +
+[docs] + def sampling_from_index(self, index): + """ + Returns an instance of :class:`LabelledCollection` whose elements are sampled from this collection using the + index. + + :param index: np.ndarray + :return: an instance of :class:`LabelledCollection` + """ + documents = self.instances[index] + labels = self.labels[index] + return LabelledCollection(documents, labels, classes=self.classes_)
+ + +
+[docs] + def split_stratified(self, train_prop=0.6, random_state=None): + """ + Returns two instances of :class:`LabelledCollection` split with stratification from this collection, at desired + proportion. + + :param train_prop: the proportion of elements to include in the left-most returned collection (typically used + as the training collection). The rest of elements are included in the right-most returned collection + (typically used as a test collection). + :param random_state: if specified, guarantees reproducibility of the split. + :return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the + second one with `1-train_prop` elements + """ + tr_docs, te_docs, tr_labels, te_labels = train_test_split( + self.instances, self.labels, train_size=train_prop, stratify=self.labels, random_state=random_state + ) + training = LabelledCollection(tr_docs, tr_labels, classes=self.classes_) + test = LabelledCollection(te_docs, te_labels, classes=self.classes_) + return training, test
+ + +
+[docs] + def split_random(self, train_prop=0.6, random_state=None): + """ + Returns two instances of :class:`LabelledCollection` split randomly from this collection, at desired + proportion. + + :param train_prop: the proportion of elements to include in the left-most returned collection (typically used + as the training collection). The rest of elements are included in the right-most returned collection + (typically used as a test collection). + :param random_state: if specified, guarantees reproducibility of the split. + :return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the + second one with `1-train_prop` elements + """ + indexes = np.random.RandomState(seed=random_state).permutation(len(self)) + if isinstance(train_prop, int): + assert train_prop < len(self), \ + 'argument train_prop cannot be greater than the number of elements in the collection' + splitpoint = train_prop + elif isinstance(train_prop, float): + assert 0 < train_prop < 1, \ + 'argument train_prop out of range (0,1)' + splitpoint = int(np.round(len(self)*train_prop)) + left, right = indexes[:splitpoint], indexes[splitpoint:] + training = self.sampling_from_index(left) + test = self.sampling_from_index(right) + return training, test
+ + + def __add__(self, other): + """ + Returns a new :class:`LabelledCollection` as the union of this collection with another collection. + Both labelled collections must have the same classes. + + :param other: another :class:`LabelledCollection` + :return: a :class:`LabelledCollection` representing the union of both collections + """ + if not all(np.sort(self.classes_)==np.sort(other.classes_)): + raise NotImplementedError(f'unsupported operation for collections on different classes; ' + f'expected {self.classes_}, found {other.classes_}') + return LabelledCollection.join(self, other) + +
+[docs] + @classmethod + def join(cls, *args: Iterable['LabelledCollection']): + """ + Returns a new :class:`LabelledCollection` as the union of the collections given in input. + + :param args: instances of :class:`LabelledCollection` + :return: a :class:`LabelledCollection` representing the union of both collections + """ + + args = [lc for lc in args if lc is not None] + assert len(args) > 0, 'empty list is not allowed for mix' + + assert all([isinstance(lc, LabelledCollection) for lc in args]), \ + 'only instances of LabelledCollection allowed' + + first_instances = args[0].instances + first_type = type(first_instances) + assert all([type(lc.instances)==first_type for lc in args[1:]]), \ + 'not all the collections are of instances of the same type' + + if issparse(first_instances) or isinstance(first_instances, np.ndarray): + first_ndim = first_instances.ndim + assert all([lc.instances.ndim == first_ndim for lc in args[1:]]), \ + 'not all the ndarrays are of the same dimension' + if first_ndim > 1: + first_shape = first_instances.shape[1:] + assert all([lc.instances.shape[1:] == first_shape for lc in args[1:]]), \ + 'not all the ndarrays are of the same shape' + if issparse(first_instances): + instances = vstack([lc.instances for lc in args]) + else: + instances = np.concatenate([lc.instances for lc in args]) + elif isinstance(first_instances, list): + instances = list(itertools.chain(lc.instances for lc in args)) + else: + raise NotImplementedError('unsupported operation for collection types') + labels = np.concatenate([lc.labels for lc in args]) + classes = np.unique(labels).sort() + return LabelledCollection(instances, labels, classes=classes)
+ + + @property + def Xy(self): + """ + Gets the instances and labels. This is useful when working with `sklearn` estimators, e.g.: + + >>> svm = LinearSVC().fit(*my_collection.Xy) + + :return: a tuple `(instances, labels)` from this collection + """ + return self.instances, self.labels + + @property + def Xp(self): + """ + Gets the instances and the true prevalence. This is useful when implementing evaluation protocols from + a :class:`LabelledCollection` object. + + :return: a tuple `(instances, prevalence)` from this collection + """ + return self.instances, self.prevalence() + + @property + def X(self): + """ + An alias to self.instances + + :return: self.instances + """ + return self.instances + + @property + def y(self): + """ + An alias to self.labels + + :return: self.labels + """ + return self.labels + + @property + def p(self): + """ + An alias to self.prevalence() + + :return: self.prevalence() + """ + return self.prevalence() + + +
+[docs] + def stats(self, show=True): + """ + Returns (and eventually prints) a dictionary with some stats of this collection. E.g.,: + + >>> data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5) + >>> data.training.stats() + >>> #instances=3821, type=<class 'scipy.sparse.csr.csr_matrix'>, #features=4403, #classes=[0 1], prevs=[0.081, 0.919] + + :param show: if set to True (default), prints the stats in standard output + :return: a dictionary containing some stats of this collection. Keys include `#instances` (the number of + instances), `type` (the type representing the instances), `#features` (the number of features, if the + instances are in array-like format), `#classes` (the classes of the collection), `prevs` (the prevalence + values for each class) + """ + ninstances = len(self) + instance_type = type(self.instances[0]) + if instance_type == list: + nfeats = len(self.instances[0]) + elif instance_type == np.ndarray or issparse(self.instances): + nfeats = self.instances.shape[1] + else: + nfeats = '?' + stats_ = {'instances': ninstances, + 'type': instance_type, + 'features': nfeats, + 'classes': self.classes_, + 'prevs': strprev(self.prevalence())} + if show: + print(f'#instances={stats_["instances"]}, type={stats_["type"]}, #features={stats_["features"]}, ' + f'#classes={stats_["classes"]}, prevs={stats_["prevs"]}') + return stats_
+ + +
+[docs] + def kFCV(self, nfolds=5, nrepeats=1, random_state=None): + """ + Generator of stratified folds to be used in k-fold cross validation. + + :param nfolds: integer (default 5), the number of folds to generate + :param nrepeats: integer (default 1), the number of rounds of k-fold cross validation to run + :param random_state: integer (default 0), guarantees that the folds generated are reproducible + :return: yields `nfolds * nrepeats` folds for k-fold cross validation + """ + kf = RepeatedStratifiedKFold(n_splits=nfolds, n_repeats=nrepeats, random_state=random_state) + for train_index, test_index in kf.split(*self.Xy): + train = self.sampling_from_index(train_index) + test = self.sampling_from_index(test_index) + yield train, test
+
+ + + +
+[docs] +class Dataset: + """ + Abstraction of training and test :class:`LabelledCollection` objects. + + :param training: a :class:`LabelledCollection` instance + :param test: a :class:`LabelledCollection` instance + :param vocabulary: if indicated, is a dictionary of the terms used in this textual dataset + :param name: a string representing the name of the dataset + """ + + def __init__(self, training: LabelledCollection, test: LabelledCollection, vocabulary: dict = None, name=''): + assert set(training.classes_) == set(test.classes_), 'incompatible labels in training and test collections' + self.training = training + self.test = test + self.vocabulary = vocabulary + self.name = name + +
+[docs] + @classmethod + def SplitStratified(cls, collection: LabelledCollection, train_size=0.6): + """ + Generates a :class:`Dataset` from a stratified split of a :class:`LabelledCollection` instance. + See :meth:`LabelledCollection.split_stratified` + + :param collection: :class:`LabelledCollection` + :param train_size: the proportion of training documents (the rest conforms the test split) + :return: an instance of :class:`Dataset` + """ + return Dataset(*collection.split_stratified(train_prop=train_size))
+ + + @property + def classes_(self): + """ + The classes according to which the training collection is labelled + + :return: The classes according to which the training collection is labelled + """ + return self.training.classes_ + + @property + def n_classes(self): + """ + The number of classes according to which the training collection is labelled + + :return: integer + """ + return self.training.n_classes + + @property + def binary(self): + """ + Returns True if the training collection is labelled according to two classes + + :return: boolean + """ + return self.training.binary + +
+[docs] + @classmethod + def load(cls, train_path, test_path, loader_func: callable, classes=None, **loader_kwargs): + """ + Loads a training and a test labelled set of data and convert it into a :class:`Dataset` instance. + The function in charge of reading the instances must be specified. This function can be a custom one, or any of + the reading functions defined in :mod:`quapy.data.reader` module. + + :param train_path: string, the path to the file containing the training instances + :param test_path: string, the path to the file containing the test instances + :param loader_func: a custom function that implements the data loader and returns a tuple with instances and + labels + :param classes: array-like, the classes according to which the instances are labelled + :param loader_kwargs: any argument that the `loader_func` function needs in order to read the instances. + See :meth:`LabelledCollection.load` for further details. + :return: a :class:`Dataset` object + """ + + training = LabelledCollection.load(train_path, loader_func, classes, **loader_kwargs) + test = LabelledCollection.load(test_path, loader_func, classes, **loader_kwargs) + return Dataset(training, test)
+ + + @property + def vocabulary_size(self): + """ + If the dataset is textual, and the vocabulary was indicated, returns the size of the vocabulary + + :return: integer + """ + return len(self.vocabulary) + + @property + def train_test(self): + """ + Alias to `self.training` and `self.test` + + :return: the training and test collections + :return: the training and test collections + """ + return self.training, self.test + +
+[docs] + def stats(self, show=True): + """ + Returns (and eventually prints) a dictionary with some stats of this dataset. E.g.,: + + >>> data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5) + >>> data.stats() + >>> Dataset=kindle #tr-instances=3821, #te-instances=21591, type=<class 'scipy.sparse.csr.csr_matrix'>, #features=4403, #classes=[0 1], tr-prevs=[0.081, 0.919], te-prevs=[0.063, 0.937] + + :param show: if set to True (default), prints the stats in standard output + :return: a dictionary containing some stats of this collection for the training and test collections. The keys + are `train` and `test`, and point to dedicated dictionaries of stats, for each collection, with keys + `#instances` (the number of instances), `type` (the type representing the instances), + `#features` (the number of features, if the instances are in array-like format), `#classes` (the classes of + the collection), `prevs` (the prevalence values for each class) + """ + tr_stats = self.training.stats(show=False) + te_stats = self.test.stats(show=False) + if show: + print(f'Dataset={self.name} #tr-instances={tr_stats["instances"]}, #te-instances={te_stats["instances"]}, ' + f'type={tr_stats["type"]}, #features={tr_stats["features"]}, #classes={tr_stats["classes"]}, ' + f'tr-prevs={tr_stats["prevs"]}, te-prevs={te_stats["prevs"]}') + return {'train': tr_stats, 'test': te_stats}
+ + +
+[docs] + @classmethod + def kFCV(cls, data: LabelledCollection, nfolds=5, nrepeats=1, random_state=0): + """ + Generator of stratified folds to be used in k-fold cross validation. This function is only a wrapper around + :meth:`LabelledCollection.kFCV` that returns :class:`Dataset` instances made of training and test folds. + + :param nfolds: integer (default 5), the number of folds to generate + :param nrepeats: integer (default 1), the number of rounds of k-fold cross validation to run + :param random_state: integer (default 0), guarantees that the folds generated are reproducible + :return: yields `nfolds * nrepeats` folds for k-fold cross validation as instances of :class:`Dataset` + """ + for i, (train, test) in enumerate(data.kFCV(nfolds=nfolds, nrepeats=nrepeats, random_state=random_state)): + yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
+ + + +
+[docs] + def reduce(self, n_train=100, n_test=100): + """ + Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set. + + :param n_train: number of training documents to keep (default 100) + :param n_test: number of test documents to keep (default 100) + :return: self + """ + self.training = self.training.sampling(n_train, *self.training.prevalence()) + self.test = self.test.sampling(n_test, *self.test.prevalence()) + return self
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/data/datasets.html b/docs/build/html/_modules/quapy/data/datasets.html new file mode 100644 index 0000000..02c1ac7 --- /dev/null +++ b/docs/build/html/_modules/quapy/data/datasets.html @@ -0,0 +1,945 @@ + + + + + + quapy.data.datasets — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.data.datasets

+
+[docs] +def warn(*args, **kwargs): + pass
+ +import warnings +warnings.warn = warn +import os +import zipfile +from os.path import join +import pandas as pd +from ucimlrepo import fetch_ucirepo +from quapy.data.base import Dataset, LabelledCollection +from quapy.data.preprocessing import text2tfidf, reduce_columns +from quapy.data.reader import * +from quapy.util import download_file_if_not_exists, download_file, get_quapy_home, pickled_resource + + +REVIEWS_SENTIMENT_DATASETS = ['hp', 'kindle', 'imdb'] +TWITTER_SENTIMENT_DATASETS_TEST = ['gasp', 'hcr', 'omd', 'sanders', + 'semeval13', 'semeval14', 'semeval15', 'semeval16', + 'sst', 'wa', 'wb'] +TWITTER_SENTIMENT_DATASETS_TRAIN = ['gasp', 'hcr', 'omd', 'sanders', + 'semeval', 'semeval16', + 'sst', 'wa', 'wb'] +UCI_BINARY_DATASETS = ['acute.a', 'acute.b', + 'balance.1', 'balance.2', 'balance.3', + 'breast-cancer', + 'cmc.1', 'cmc.2', 'cmc.3', + 'ctg.1', 'ctg.2', 'ctg.3', + #'diabetes', # <-- I haven't found this one... + 'german', + 'haberman', + 'ionosphere', + 'iris.1', 'iris.2', 'iris.3', + 'mammographic', + 'pageblocks.5', + #'phoneme', # <-- I haven't found this one... + 'semeion', + 'sonar', + 'spambase', + 'spectf', + 'tictactoe', + 'transfusion', + 'wdbc', + 'wine.1', 'wine.2', 'wine.3', + 'wine-q-red', 'wine-q-white', + 'yeast'] + +UCI_MULTICLASS_DATASETS = ['dry-bean', + 'wine-quality', + 'academic-success', + 'digits', + 'letter'] + +LEQUA2022_TASKS = ['T1A', 'T1B', 'T2A', 'T2B'] + +_TXA_SAMPLE_SIZE = 250 +_TXB_SAMPLE_SIZE = 1000 + +LEQUA2022_SAMPLE_SIZE = { + 'TXA': _TXA_SAMPLE_SIZE, + 'TXB': _TXB_SAMPLE_SIZE, + 'T1A': _TXA_SAMPLE_SIZE, + 'T1B': _TXB_SAMPLE_SIZE, + 'T2A': _TXA_SAMPLE_SIZE, + 'T2B': _TXB_SAMPLE_SIZE, + 'binary': _TXA_SAMPLE_SIZE, + 'multiclass': _TXB_SAMPLE_SIZE +} + + +
+[docs] +def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False) -> Dataset: + """ + Loads a Reviews dataset as a Dataset instance, as used in + `Esuli, A., Moreo, A., and Sebastiani, F. "A recurrent neural network for sentiment quantification." + Proceedings of the 27th ACM International Conference on Information and Knowledge Management. 2018. <https://dl.acm.org/doi/abs/10.1145/3269206.3269287>`_. + The list of valid dataset names can be accessed in `quapy.data.datasets.REVIEWS_SENTIMENT_DATASETS` + + :param dataset_name: the name of the dataset: valid ones are 'hp', 'kindle', 'imdb' + :param tfidf: set to True to transform the raw documents into tfidf weighted matrices + :param min_df: minimun number of documents that should contain a term in order for the term to be + kept (ignored if tfidf==False) + :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default + ~/quay_data/ directory) + :param pickle: set to True to pickle the Dataset object the first time it is generated, in order to allow for + faster subsequent invokations + :return: a :class:`quapy.data.base.Dataset` instance + """ + assert dataset_name in REVIEWS_SENTIMENT_DATASETS, \ + f'Name {dataset_name} does not match any known dataset for sentiment reviews. ' \ + f'Valid ones are {REVIEWS_SENTIMENT_DATASETS}' + if data_home is None: + data_home = get_quapy_home() + + URL_TRAIN = f'https://zenodo.org/record/4117827/files/{dataset_name}_train.txt' + URL_TEST = f'https://zenodo.org/record/4117827/files/{dataset_name}_test.txt' + os.makedirs(join(data_home, 'reviews'), exist_ok=True) + train_path = join(data_home, 'reviews', dataset_name, 'train.txt') + test_path = join(data_home, 'reviews', dataset_name, 'test.txt') + download_file_if_not_exists(URL_TRAIN, train_path) + download_file_if_not_exists(URL_TEST, test_path) + + pickle_path = None + if pickle: + pickle_path = join(data_home, 'reviews', 'pickle', f'{dataset_name}.pkl') + data = pickled_resource(pickle_path, Dataset.load, train_path, test_path, from_text) + + if tfidf: + text2tfidf(data, inplace=True) + if min_df is not None: + reduce_columns(data, min_df=min_df, inplace=True) + + data.name = dataset_name + + return data
+ + + +
+[docs] +def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_home=None, pickle=False) -> Dataset: + """ + Loads a Twitter dataset as a :class:`quapy.data.base.Dataset` instance, as used in: + `Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis. + Social Network Analysis and Mining6(19), 1–22 (2016) <https://link.springer.com/content/pdf/10.1007/s13278-016-0327-z.pdf>`_ + Note that the datasets 'semeval13', 'semeval14', 'semeval15' share the same training set. + The list of valid dataset names corresponding to training sets can be accessed in + `quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN`, while the test sets can be accessed in + `quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TEST` + + :param dataset_name: the name of the dataset: valid ones are 'gasp', 'hcr', 'omd', 'sanders', 'semeval13', + 'semeval14', 'semeval15', 'semeval16', 'sst', 'wa', 'wb' + :param for_model_selection: if True, then returns the train split as the training set and the devel split + as the test set; if False, then returns the train+devel split as the training set and the test set as the + test set + :param min_df: minimun number of documents that should contain a term in order for the term to be kept + :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default + ~/quay_data/ directory) + :param pickle: set to True to pickle the Dataset object the first time it is generated, in order to allow for + faster subsequent invokations + :return: a :class:`quapy.data.base.Dataset` instance + """ + assert dataset_name in TWITTER_SENTIMENT_DATASETS_TRAIN + TWITTER_SENTIMENT_DATASETS_TEST, \ + f'Name {dataset_name} does not match any known dataset for sentiment twitter. ' \ + f'Valid ones are {TWITTER_SENTIMENT_DATASETS_TRAIN} for model selection and ' \ + f'{TWITTER_SENTIMENT_DATASETS_TEST} for test (datasets "semeval14", "semeval15", "semeval16" share ' \ + f'a common training set "semeval")' + if data_home is None: + data_home = get_quapy_home() + + URL = 'https://zenodo.org/record/4255764/files/tweet_sentiment_quantification_snam.zip' + unzipped_path = join(data_home, 'tweet_sentiment_quantification_snam') + if not os.path.exists(unzipped_path): + downloaded_path = join(data_home, 'tweet_sentiment_quantification_snam.zip') + download_file(URL, downloaded_path) + with zipfile.ZipFile(downloaded_path) as file: + file.extractall(data_home) + os.remove(downloaded_path) + + if dataset_name in {'semeval13', 'semeval14', 'semeval15'}: + trainset_name = 'semeval' + testset_name = 'semeval' if for_model_selection else dataset_name + print(f"the training and development sets for datasets 'semeval13', 'semeval14', 'semeval15' are common " + f"(called 'semeval'); returning trainin-set='{trainset_name}' and test-set={testset_name}") + else: + if dataset_name == 'semeval' and for_model_selection==False: + raise ValueError('dataset "semeval" can only be used for model selection. ' + 'Use "semeval13", "semeval14", or "semeval15" for model evaluation.') + trainset_name = testset_name = dataset_name + + if for_model_selection: + train = join(unzipped_path, 'train', f'{trainset_name}.train.feature.txt') + test = join(unzipped_path, 'test', f'{testset_name}.dev.feature.txt') + else: + train = join(unzipped_path, 'train', f'{trainset_name}.train+dev.feature.txt') + if dataset_name == 'semeval16': # there is a different test name in the case of semeval16 only + test = join(unzipped_path, 'test', f'{testset_name}.dev-test.feature.txt') + else: + test = join(unzipped_path, 'test', f'{testset_name}.test.feature.txt') + + pickle_path = None + if pickle: + mode = "train-dev" if for_model_selection else "train+dev-test" + pickle_path = join(unzipped_path, 'pickle', f'{testset_name}.{mode}.pkl') + data = pickled_resource(pickle_path, Dataset.load, train, test, from_sparse) + + if min_df is not None: + reduce_columns(data, min_df=min_df, inplace=True) + + data.name = dataset_name + + return data
+ + + +
+[docs] +def fetch_UCIBinaryDataset(dataset_name, data_home=None, test_split=0.3, verbose=False) -> Dataset: + """ + Loads a UCI dataset as an instance of :class:`quapy.data.base.Dataset`, as used in + `Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017). + Using ensembles for problems with characterizable changes in data distribution: A case study on quantification. + Information Fusion, 34, 87-100. <https://www.sciencedirect.com/science/article/pii/S1566253516300628>`_ + and + `Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019). + Dynamic ensemble selection for quantification tasks. + Information Fusion, 45, 1-15. <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_. + The datasets do not come with a predefined train-test split (see :meth:`fetch_UCILabelledCollection` for further + information on how to use these collections), and so a train-test split is generated at desired proportion. + The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS` + + :param dataset_name: a dataset name + :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default + ~/quay_data/ directory) + :param test_split: proportion of documents to be included in the test set. The rest conforms the training set + :param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets + :return: a :class:`quapy.data.base.Dataset` instance + """ + data = fetch_UCIBinaryLabelledCollection(dataset_name, data_home, verbose) + return Dataset(*data.split_stratified(1 - test_split, random_state=0))
+ + + +
+[docs] +def fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, verbose=False) -> LabelledCollection: + """ + Loads a UCI collection as an instance of :class:`quapy.data.base.LabelledCollection`, as used in + `Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017). + Using ensembles for problems with characterizable changes in data distribution: A case study on quantification. + Information Fusion, 34, 87-100. <https://www.sciencedirect.com/science/article/pii/S1566253516300628>`_ + and + `Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019). + Dynamic ensemble selection for quantification tasks. + Information Fusion, 45, 1-15. <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_. + The datasets do not come with a predefined train-test split, and so Pérez-Gállego et al. adopted a 5FCVx2 evaluation + protocol, meaning that each collection was used to generate two rounds (hence the x2) of 5 fold cross validation. + This can be reproduced by using :meth:`quapy.data.base.Dataset.kFCV`, e.g.: + + >>> import quapy as qp + >>> collection = qp.datasets.fetch_UCIBinaryLabelledCollection("yeast") + >>> for data in qp.train.Dataset.kFCV(collection, nfolds=5, nrepeats=2): + >>> ... + + The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS` + + :param dataset_name: a dataset name + :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default + ~/quay_data/ directory) + :param test_split: proportion of documents to be included in the test set. The rest conforms the training set + :param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets + :return: a :class:`quapy.data.base.LabelledCollection` instance + """ + + assert dataset_name in UCI_BINARY_DATASETS, \ + f'Name {dataset_name} does not match any known dataset from the UCI Machine Learning datasets repository. ' \ + f'Valid ones are {UCI_BINARY_DATASETS}' + if data_home is None: + data_home = get_quapy_home() + + dataset_fullname = { + 'acute.a': 'Acute Inflammations (urinary bladder)', + 'acute.b': 'Acute Inflammations (renal pelvis)', + 'balance.1': 'Balance Scale Weight & Distance Database (left)', + 'balance.2': 'Balance Scale Weight & Distance Database (balanced)', + 'balance.3': 'Balance Scale Weight & Distance Database (right)', + 'breast-cancer': 'Breast Cancer Wisconsin (Original)', + 'cmc.1': 'Contraceptive Method Choice (no use)', + 'cmc.2': 'Contraceptive Method Choice (long term)', + 'cmc.3': 'Contraceptive Method Choice (short term)', + 'ctg.1': 'Cardiotocography Data Set (normal)', + 'ctg.2': 'Cardiotocography Data Set (suspect)', + 'ctg.3': 'Cardiotocography Data Set (pathologic)', + 'german': 'Statlog German Credit Data', + 'haberman': "Haberman's Survival Data", + 'ionosphere': 'Johns Hopkins University Ionosphere DB', + 'iris.1': 'Iris Plants Database(x)', + 'iris.2': 'Iris Plants Database(versicolour)', + 'iris.3': 'Iris Plants Database(virginica)', + 'mammographic': 'Mammographic Mass', + 'pageblocks.5': 'Page Blocks Classification (5)', + 'semeion': 'Semeion Handwritten Digit (8)', + 'sonar': 'Sonar, Mines vs. Rocks', + 'spambase': 'Spambase Data Set', + 'spectf': 'SPECTF Heart Data', + 'tictactoe': 'Tic-Tac-Toe Endgame Database', + 'transfusion': 'Blood Transfusion Service Center Data Set', + 'wdbc': 'Wisconsin Diagnostic Breast Cancer', + 'wine.1': 'Wine Recognition Data (1)', + 'wine.2': 'Wine Recognition Data (2)', + 'wine.3': 'Wine Recognition Data (3)', + 'wine-q-red': 'Wine Quality Red (6-10)', + 'wine-q-white': 'Wine Quality White (6-10)', + 'yeast': 'Yeast', + } + + # the identifier is an alias for the dataset group, it's part of the url data-folder, and is the name we use + # to download the raw dataset + identifier_map = { + 'acute.a': 'acute', + 'acute.b': 'acute', + 'balance.1': 'balance-scale', + 'balance.2': 'balance-scale', + 'balance.3': 'balance-scale', + 'breast-cancer': 'breast-cancer-wisconsin', + 'cmc.1': 'cmc', + 'cmc.2': 'cmc', + 'cmc.3': 'cmc', + 'ctg.1': '00193', + 'ctg.2': '00193', + 'ctg.3': '00193', + 'german': 'statlog/german', + 'haberman': 'haberman', + 'ionosphere': 'ionosphere', + 'iris.1': 'iris', + 'iris.2': 'iris', + 'iris.3': 'iris', + 'mammographic': 'mammographic-masses', + 'pageblocks.5': 'page-blocks', + 'semeion': 'semeion', + 'sonar': 'undocumented/connectionist-bench/sonar', + 'spambase': 'spambase', + 'spectf': 'spect', + 'tictactoe': 'tic-tac-toe', + 'transfusion': 'blood-transfusion', + 'wdbc': 'breast-cancer-wisconsin', + 'wine-q-red': 'wine-quality', + 'wine-q-white': 'wine-quality', + 'wine.1': 'wine', + 'wine.2': 'wine', + 'wine.3': 'wine', + 'yeast': 'yeast', + } + + # the filename is the name of the file within the data_folder indexed by the identifier + file_name = { + 'acute': 'diagnosis.data', + '00193': 'CTG.xls', + 'statlog/german': 'german.data-numeric', + 'mammographic-masses': 'mammographic_masses.data', + 'page-blocks': 'page-blocks.data.Z', + 'undocumented/connectionist-bench/sonar': 'sonar.all-data', + 'spect': ['SPECTF.train', 'SPECTF.test'], + 'blood-transfusion': 'transfusion.data', + 'wine-quality': ['winequality-red.csv', 'winequality-white.csv'], + 'breast-cancer-wisconsin': 'breast-cancer-wisconsin.data' if dataset_name=='breast-cancer' else 'wdbc.data' + } + + # the filename containing the dataset description (if any) + desc_name = { + 'acute': 'diagnosis.names', + '00193': None, + 'statlog/german': 'german.doc', + 'mammographic-masses': 'mammographic_masses.names', + 'undocumented/connectionist-bench/sonar': 'sonar.names', + 'spect': 'SPECTF.names', + 'blood-transfusion': 'transfusion.names', + 'wine-quality': 'winequality.names', + 'breast-cancer-wisconsin': 'breast-cancer-wisconsin.names' if dataset_name == 'breast-cancer' else 'wdbc.names' + } + + identifier = identifier_map[dataset_name] + filename = file_name.get(identifier, f'{identifier}.data') + descfile = desc_name.get(identifier, f'{identifier}.names') + fullname = dataset_fullname[dataset_name] + + URL = f'http://archive.ics.uci.edu/ml/machine-learning-databases/{identifier}' + data_dir = join(data_home, 'uci_datasets', identifier) + if isinstance(filename, str): # filename could be a list of files, in which case it will be processed later + data_path = join(data_dir, filename) + download_file_if_not_exists(f'{URL}/{filename}', data_path) + + if descfile: + try: + download_file_if_not_exists(f'{URL}/{descfile}', f'{data_dir}/{descfile}') + if verbose: + print(open(f'{data_dir}/{descfile}', 'rt').read()) + except Exception: + print('could not read the description file') + elif verbose: + print('no file description available') + + if verbose: + print(f'Loading {dataset_name} ({fullname})') + if identifier == 'acute': + df = pd.read_csv(data_path, header=None, encoding='utf-16', sep='\t') + + df[0] = df[0].apply(lambda x: float(x.replace(',', '.'))).astype(float, copy=False) + [_df_replace(df, col) for col in range(1, 6)] + X = df.loc[:, 0:5].values + if dataset_name == 'acute.a': + y = binarize(df[6], pos_class='yes') + elif dataset_name == 'acute.b': + y = binarize(df[7], pos_class='yes') + + if identifier == 'balance-scale': + df = pd.read_csv(data_path, header=None, sep=',') + if dataset_name == 'balance.1': + y = binarize(df[0], pos_class='L') + elif dataset_name == 'balance.2': + y = binarize(df[0], pos_class='B') + elif dataset_name == 'balance.3': + y = binarize(df[0], pos_class='R') + X = df.loc[:, 1:].astype(float).values + + if identifier == 'breast-cancer-wisconsin' and dataset_name=='breast-cancer': + df = pd.read_csv(data_path, header=None, sep=',') + Xy = df.loc[:, 1:10] + Xy[Xy=='?']=np.nan + Xy = Xy.dropna(axis=0) + X = Xy.loc[:, 1:9] + X = X.astype(float).values + y = binarize(Xy[10], pos_class=2) + + if identifier == 'breast-cancer-wisconsin' and dataset_name=='wdbc': + df = pd.read_csv(data_path, header=None, sep=',') + X = df.loc[:, 2:32].astype(float).values + y = df[1].values + y = binarize(y, pos_class='M') + + if identifier == 'cmc': + df = pd.read_csv(data_path, header=None, sep=',') + X = df.loc[:, 0:8].astype(float).values + y = df[9].astype(int).values + if dataset_name == 'cmc.1': + y = binarize(y, pos_class=1) + elif dataset_name == 'cmc.2': + y = binarize(y, pos_class=2) + elif dataset_name == 'cmc.3': + y = binarize(y, pos_class=3) + + if identifier == '00193': + df = pd.read_excel(data_path, sheet_name='Data', skipfooter=3) + df = df[list(range(1,24))] # select columns numbered (number 23 is the target label) + # replaces the header with the first row + new_header = df.iloc[0] # grab the first row for the header + df = df[1:] # take the data less the header row + df.columns = new_header # set the header row as the df header + X = df.iloc[:, 0:22].astype(float).values + y = df['NSP'].astype(int).values + if dataset_name == 'ctg.1': + y = binarize(y, pos_class=1) # 1==Normal + elif dataset_name == 'ctg.2': + y = binarize(y, pos_class=2) # 2==Suspect + elif dataset_name == 'ctg.3': + y = binarize(y, pos_class=3) # 3==Pathologic + + if identifier == 'statlog/german': + df = pd.read_csv(data_path, header=None, delim_whitespace=True) + X = df.iloc[:, 0:24].astype(float).values + y = df[24].astype(int).values + y = binarize(y, pos_class=1) + + if identifier == 'haberman': + df = pd.read_csv(data_path, header=None) + X = df.iloc[:, 0:3].astype(float).values + y = df[3].astype(int).values + y = binarize(y, pos_class=2) + + if identifier == 'ionosphere': + df = pd.read_csv(data_path, header=None) + X = df.iloc[:, 0:34].astype(float).values + y = df[34].values + y = binarize(y, pos_class='b') + + if identifier == 'iris': + df = pd.read_csv(data_path, header=None) + X = df.iloc[:, 0:4].astype(float).values + y = df[4].values + if dataset_name == 'iris.1': + y = binarize(y, pos_class='Iris-setosa') # 1==Setosa + elif dataset_name == 'iris.2': + y = binarize(y, pos_class='Iris-versicolor') # 2==Versicolor + elif dataset_name == 'iris.3': + y = binarize(y, pos_class='Iris-virginica') # 3==Virginica + + if identifier == 'mammographic-masses': + df = pd.read_csv(data_path, header=None, sep=',') + df[df == '?'] = np.nan + Xy = df.dropna(axis=0) + X = Xy.iloc[:, 0:5] + X = X.astype(float).values + y = binarize(Xy.iloc[:,5], pos_class=1) + + if identifier == 'page-blocks': + data_path_ = data_path.replace('.Z', '') + if not os.path.exists(data_path_): + raise FileNotFoundError(f'Warning: file {data_path_} does not exist. If this is the first time you ' + f'attempt to load this dataset, then you have to manually unzip the {data_path} ' + f'and name the extracted file {data_path_} (unfortunately, neither zipfile, nor ' + f'gzip can handle unix compressed files automatically -- there is a repo in GitHub ' + f'https://github.com/umeat/unlzw where the problem seems to be solved anyway).') + df = pd.read_csv(data_path_, header=None, delim_whitespace=True) + X = df.iloc[:, 0:10].astype(float).values + y = df[10].values + y = binarize(y, pos_class=5) # 5==block "graphic" + + if identifier == 'semeion': + df = pd.read_csv(data_path, header=None, delim_whitespace=True ) + X = df.iloc[:, 0:256].astype(float).values + y = df[263].values # 263 stands for digit 8 (labels are one-hot vectors from col 256-266) + y = binarize(y, pos_class=1) + + if identifier == 'undocumented/connectionist-bench/sonar': + df = pd.read_csv(data_path, header=None, sep=',') + X = df.iloc[:, 0:60].astype(float).values + y = df[60].values + y = binarize(y, pos_class='R') + + if identifier == 'spambase': + df = pd.read_csv(data_path, header=None, sep=',') + X = df.iloc[:, 0:57].astype(float).values + y = df[57].values + y = binarize(y, pos_class=1) + + if identifier == 'spect': + dfs = [] + for file in filename: + data_path = join(data_dir, file) + download_file_if_not_exists(f'{URL}/{file}', data_path) + dfs.append(pd.read_csv(data_path, header=None, sep=',')) + df = pd.concat(dfs) + X = df.iloc[:, 1:45].astype(float).values + y = df[0].values + y = binarize(y, pos_class=0) + + if identifier == 'tic-tac-toe': + df = pd.read_csv(data_path, header=None, sep=',') + X = df.iloc[:, 0:9].replace('o',0).replace('b',1).replace('x',2).values + y = df[9].values + y = binarize(y, pos_class='negative') + + if identifier == 'blood-transfusion': + df = pd.read_csv(data_path, sep=',') + X = df.iloc[:, 0:4].astype(float).values + y = df.iloc[:, 4].values + y = binarize(y, pos_class=1) + + if identifier == 'wine': + df = pd.read_csv(data_path, header=None, sep=',') + X = df.iloc[:, 1:14].astype(float).values + y = df[0].values + if dataset_name == 'wine.1': + y = binarize(y, pos_class=1) + elif dataset_name == 'wine.2': + y = binarize(y, pos_class=2) + elif dataset_name == 'wine.3': + y = binarize(y, pos_class=3) + + if identifier == 'wine-quality': + filename = filename[0] if dataset_name=='wine-q-red' else filename[1] + data_path = join(data_dir, filename) + download_file_if_not_exists(f'{URL}/{filename}', data_path) + df = pd.read_csv(data_path, sep=';') + X = df.iloc[:, 0:11].astype(float).values + y = df.iloc[:, 11].values > 5 + + if identifier == 'yeast': + df = pd.read_csv(data_path, header=None, delim_whitespace=True) + X = df.iloc[:, 1:9].astype(float).values + y = df.iloc[:, 9].values + y = binarize(y, pos_class='NUC') + + data = LabelledCollection(X, y) + if verbose: + data.stats() + return data
+ + + +
+[docs] +def fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, verbose=False) -> Dataset: + """ + Loads a UCI multiclass dataset as an instance of :class:`quapy.data.base.Dataset`. + + The list of available datasets is taken from https://archive.ics.uci.edu/, following these criteria: + - It has more than 1000 instances + - It is suited for classification + - It has more than two classes + - It is available for Python import (requires ucimlrepo package) + + >>> import quapy as qp + >>> dataset = qp.datasets.fetch_UCIMulticlassDataset("dry-bean") + >>> train, test = dataset.train_test + >>> ... + + The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_MULTICLASS_DATASETS` + + The datasets are downloaded only once and pickled into disk, saving time for consecutive calls. + + :param dataset_name: a dataset name + :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default + ~/quay_data/ directory) + :param test_split: proportion of documents to be included in the test set. The rest conforms the training set + :param verbose: set to True (default is False) to get information (stats) about the dataset + :return: a :class:`quapy.data.base.Dataset` instance + """ + data = fetch_UCIMulticlassLabelledCollection(dataset_name, data_home, verbose) + return Dataset(*data.split_stratified(1 - test_split, random_state=0))
+ + + +
+[docs] +def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, verbose=False) -> LabelledCollection: + """ + Loads a UCI multiclass collection as an instance of :class:`quapy.data.base.LabelledCollection`. + + The list of available datasets is taken from https://archive.ics.uci.edu/, following these criteria: + - It has more than 1000 instances + - It is suited for classification + - It has more than two classes + - It is available for Python import (requires ucimlrepo package) + + >>> import quapy as qp + >>> collection = qp.datasets.fetch_UCIMulticlassLabelledCollection("dry-bean") + >>> X, y = collection.Xy + >>> ... + + The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_MULTICLASS_DATASETS` + + The datasets are downloaded only once and pickled into disk, saving time for consecutive calls. + + :param dataset_name: a dataset name + :param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default + ~/quay_data/ directory) + :param test_split: proportion of documents to be included in the test set. The rest conforms the training set + :param verbose: set to True (default is False) to get information (stats) about the dataset + :return: a :class:`quapy.data.base.LabelledCollection` instance + """ + assert dataset_name in UCI_MULTICLASS_DATASETS, \ + f'Name {dataset_name} does not match any known dataset from the ' \ + f'UCI Machine Learning datasets repository (multiclass). ' \ + f'Valid ones are {UCI_MULTICLASS_DATASETS}' + + if data_home is None: + data_home = get_quapy_home() + + identifiers = { + "dry-bean": 602, + "wine-quality": 186, + "academic-success": 697, + "digits": 80, + "letter": 59 + } + + full_names = { + "dry-bean": "Dry Bean Dataset", + "wine-quality": "Wine Quality", + "academic-success": "Predict students' dropout and academic success", + "digits": "Optical Recognition of Handwritten Digits", + "letter": "Letter Recognition" + } + + identifier = identifiers[dataset_name] + fullname = full_names[dataset_name] + + if verbose: + print(f'Loading UCI Muticlass {dataset_name} ({fullname})') + + file = join(data_home, 'uci_multiclass', dataset_name+'.pkl') + + def download(id): + data = fetch_ucirepo(id=id) + X, y = data['data']['features'].to_numpy(), data['data']['targets'].to_numpy().squeeze() + classes = np.sort(np.unique(y)) + y = np.searchsorted(classes, y) + return LabelledCollection(X, y) + + data = pickled_resource(file, download, identifier) + + if verbose: + data.stats() + + return data
+ + + +def _df_replace(df, col, repl={'yes': 1, 'no':0}, astype=float): + df[col] = df[col].apply(lambda x:repl[x]).astype(astype, copy=False) + + +
+[docs] +def fetch_lequa2022(task, data_home=None): + """ + Loads the official datasets provided for the `LeQua <https://lequa2022.github.io/index>`_ competition. + In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification + problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide raw documents instead. + Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B are multiclass quantification + problems consisting of estimating the class prevalence values of 28 different merchandise products. + We refer to the `Esuli, A., Moreo, A., Sebastiani, F., & Sperduti, G. (2022). + A Detailed Overview of LeQua@ CLEF 2022: Learning to Quantify. + <https://ceur-ws.org/Vol-3180/paper-146.pdf>`_ for a detailed description + on the tasks and datasets. + + The datasets are downloaded only once, and stored for fast reuse. + + See `lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these + datasets. + + + :param task: a string representing the task name; valid ones are T1A, T1B, T2A, and T2B + :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default + ~/quay_data/ directory) + :return: a tuple `(train, val_gen, test_gen)` where `train` is an instance of + :class:`quapy.data.base.LabelledCollection`, `val_gen` and `test_gen` are instances of + :class:`quapy.data._lequa2022.SamplesFromDir`, a subclass of :class:`quapy.protocol.AbstractProtocol`, + that return a series of samples stored in a directory which are labelled by prevalence. + """ + + from quapy.data._lequa2022 import load_raw_documents, load_vector_documents, SamplesFromDir + + assert task in LEQUA2022_TASKS, \ + f'Unknown task {task}. Valid ones are {LEQUA2022_TASKS}' + if data_home is None: + data_home = get_quapy_home() + + URL_TRAINDEV=f'https://zenodo.org/record/6546188/files/{task}.train_dev.zip' + URL_TEST=f'https://zenodo.org/record/6546188/files/{task}.test.zip' + URL_TEST_PREV=f'https://zenodo.org/record/6546188/files/{task}.test_prevalences.zip' + + lequa_dir = join(data_home, 'lequa2022') + os.makedirs(lequa_dir, exist_ok=True) + + def download_unzip_and_remove(unzipped_path, url): + tmp_path = join(lequa_dir, task + '_tmp.zip') + download_file_if_not_exists(url, tmp_path) + with zipfile.ZipFile(tmp_path) as file: + file.extractall(unzipped_path) + os.remove(tmp_path) + + if not os.path.exists(join(lequa_dir, task)): + download_unzip_and_remove(lequa_dir, URL_TRAINDEV) + download_unzip_and_remove(lequa_dir, URL_TEST) + download_unzip_and_remove(lequa_dir, URL_TEST_PREV) + + if task in ['T1A', 'T1B']: + load_fn = load_vector_documents + elif task in ['T2A', 'T2B']: + load_fn = load_raw_documents + + tr_path = join(lequa_dir, task, 'public', 'training_data.txt') + train = LabelledCollection.load(tr_path, loader_func=load_fn) + + val_samples_path = join(lequa_dir, task, 'public', 'dev_samples') + val_true_prev_path = join(lequa_dir, task, 'public', 'dev_prevalences.txt') + val_gen = SamplesFromDir(val_samples_path, val_true_prev_path, load_fn=load_fn) + + test_samples_path = join(lequa_dir, task, 'public', 'test_samples') + test_true_prev_path = join(lequa_dir, task, 'public', 'test_prevalences.txt') + test_gen = SamplesFromDir(test_samples_path, test_true_prev_path, load_fn=load_fn) + + return train, val_gen, test_gen
+ + + +
+[docs] +def fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=None): + """ + Loads the IFCB dataset for quantification from `Zenodo <https://zenodo.org/records/10036244>`_ (for more + information on this dataset, please follow the zenodo link). + This dataset is based on the data available publicly at + `WHOI-Plankton repo <https://github.com/hsosik/WHOI-Plankton>`_. + The scripts for the processing are available at `P. González's repo <https://github.com/pglez82/IFCB_Zenodo>`_. + Basically, this is the IFCB dataset with precomputed features for testing quantification algorithms. + + The datasets are downloaded only once, and stored for fast reuse. + + :param single_sample_train: a boolean. If true, it will return the train dataset as a + :class:`quapy.data.base.LabelledCollection` (all examples together). + If false, a generator of training samples will be returned. Each example in the training set has an individual label. + :param for_model_selection: if True, then returns a split 30% of the training set (86 out of 286 samples) to be used for model selection; + if False, then returns the full training set as training set and the test set as the test set + :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default + ~/quay_data/ directory) + :return: a tuple `(train, test_gen)` where `train` is an instance of + :class:`quapy.data.base.LabelledCollection`, if `single_sample_train` is true or + :class:`quapy.data._ifcb.IFCBTrainSamplesFromDir`, i.e. a sampling protocol that returns a series of samples + labelled example by example. test_gen will be a :class:`quapy.data._ifcb.IFCBTestSamples`, + i.e., a sampling protocol that returns a series of samples labelled by prevalence. + """ + + from quapy.data._ifcb import IFCBTrainSamplesFromDir, IFCBTestSamples, get_sample_list, generate_modelselection_split + + if data_home is None: + data_home = get_quapy_home() + + URL_TRAIN=f'https://zenodo.org/records/10036244/files/IFCB.train.zip' + URL_TEST=f'https://zenodo.org/records/10036244/files/IFCB.test.zip' + URL_TEST_PREV=f'https://zenodo.org/records/10036244/files/IFCB.test_prevalences.zip' + + ifcb_dir = join(data_home, 'ifcb') + os.makedirs(ifcb_dir, exist_ok=True) + + def download_unzip_and_remove(unzipped_path, url): + tmp_path = join(ifcb_dir, 'ifcb_tmp.zip') + download_file_if_not_exists(url, tmp_path) + with zipfile.ZipFile(tmp_path) as file: + file.extractall(unzipped_path) + os.remove(tmp_path) + + if not os.path.exists(os.path.join(ifcb_dir,'train')): + download_unzip_and_remove(ifcb_dir, URL_TRAIN) + if not os.path.exists(os.path.join(ifcb_dir,'test')): + download_unzip_and_remove(ifcb_dir, URL_TEST) + if not os.path.exists(os.path.join(ifcb_dir,'test_prevalences.csv')): + download_unzip_and_remove(ifcb_dir, URL_TEST_PREV) + + # Load test prevalences and classes + test_true_prev_path = join(ifcb_dir, 'test_prevalences.csv') + test_true_prev = pd.read_csv(test_true_prev_path) + classes = test_true_prev.columns[1:] + + #Load train and test samples + train_samples_path = join(ifcb_dir,'train') + test_samples_path = join(ifcb_dir,'test') + + if for_model_selection: + # In this case, return 70% of training data as the training set and 30% as the test set + samples = get_sample_list(train_samples_path) + train, test = generate_modelselection_split(samples, split=0.3) + train_gen = IFCBTrainSamplesFromDir(path_dir=train_samples_path, classes=classes, samples=train) + + # Test prevalence is computed from class labels + test_gen = IFCBTestSamples(path_dir=train_samples_path, test_prevalences=None, samples=test, classes=classes) + else: + # In this case, we use all training samples as the training set and the test samples as the test set + train_gen = IFCBTrainSamplesFromDir(path_dir=train_samples_path, classes=classes) + test_gen = IFCBTestSamples(path_dir=test_samples_path, test_prevalences=test_true_prev) + + # In the case the user wants it, join all the train samples in one LabelledCollection + if single_sample_train: + train = LabelledCollection.join(*[lc for lc in train_gen()]) + return train, test_gen + else: + return train_gen, test_gen
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/data/preprocessing.html b/docs/build/html/_modules/quapy/data/preprocessing.html new file mode 100644 index 0000000..a50aa64 --- /dev/null +++ b/docs/build/html/_modules/quapy/data/preprocessing.html @@ -0,0 +1,373 @@ + + + + + + quapy.data.preprocessing — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.data.preprocessing

+import numpy as np
+from scipy.sparse import spmatrix
+from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
+from sklearn.preprocessing import StandardScaler
+from tqdm import tqdm
+
+import quapy as qp
+from quapy.data.base import Dataset
+from quapy.util import map_parallel
+from .base import LabelledCollection
+
+
+
+[docs] +def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kwargs): + """ + Transforms a :class:`quapy.data.base.Dataset` of textual instances into a :class:`quapy.data.base.Dataset` of + tfidf weighted sparse vectors + + :param dataset: a :class:`quapy.data.base.Dataset` where the instances of training and test collections are + lists of str + :param min_df: minimum number of occurrences for a word to be considered as part of the vocabulary (default 3) + :param sublinear_tf: whether or not to apply the log scalling to the tf counters (default True) + :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default) + :param kwargs: the rest of parameters of the transformation (as for sklearn's + `TfidfVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html>`_) + :return: a new :class:`quapy.data.base.Dataset` in `csr_matrix` format (if inplace=False) or a reference to the + current Dataset (if inplace=True) where the instances are stored in a `csr_matrix` of real-valued tfidf scores + """ + __check_type(dataset.training.instances, np.ndarray, str) + __check_type(dataset.test.instances, np.ndarray, str) + + vectorizer = TfidfVectorizer(min_df=min_df, sublinear_tf=sublinear_tf, **kwargs) + training_documents = vectorizer.fit_transform(dataset.training.instances) + test_documents = vectorizer.transform(dataset.test.instances) + + if inplace: + dataset.training = LabelledCollection(training_documents, dataset.training.labels, dataset.classes_) + dataset.test = LabelledCollection(test_documents, dataset.test.labels, dataset.classes_) + dataset.vocabulary = vectorizer.vocabulary_ + return dataset + else: + training = LabelledCollection(training_documents, dataset.training.labels.copy(), dataset.classes_) + test = LabelledCollection(test_documents, dataset.test.labels.copy(), dataset.classes_) + return Dataset(training, test, vectorizer.vocabulary_)
+ + + +
+[docs] +def reduce_columns(dataset: Dataset, min_df=5, inplace=False): + """ + Reduces the dimensionality of the instances, represented as a `csr_matrix` (or any subtype of + `scipy.sparse.spmatrix`), of training and test documents by removing the columns of words which are not present + in at least `min_df` instances in the training set + + :param dataset: a :class:`quapy.data.base.Dataset` in which instances are represented in sparse format (any + subtype of scipy.sparse.spmatrix) + :param min_df: integer, minimum number of instances below which the columns are removed + :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default) + :return: a new :class:`quapy.data.base.Dataset` (if inplace=False) or a reference to the current + :class:`quapy.data.base.Dataset` (inplace=True) where the dimensions corresponding to infrequent terms + in the training set have been removed + """ + __check_type(dataset.training.instances, spmatrix) + __check_type(dataset.test.instances, spmatrix) + assert dataset.training.instances.shape[1] == dataset.test.instances.shape[1], 'unaligned vector spaces' + + def filter_by_occurrences(X, W): + column_prevalence = np.asarray((X > 0).sum(axis=0)).flatten() + take_columns = column_prevalence >= min_df + X = X[:, take_columns] + W = W[:, take_columns] + return X, W + + Xtr, Xte = filter_by_occurrences(dataset.training.instances, dataset.test.instances) + if inplace: + dataset.training.instances = Xtr + dataset.test.instances = Xte + return dataset + else: + training = LabelledCollection(Xtr, dataset.training.labels.copy(), dataset.classes_) + test = LabelledCollection(Xte, dataset.test.labels.copy(), dataset.classes_) + return Dataset(training, test)
+ + + +
+[docs] +def standardize(dataset: Dataset, inplace=False): + """ + Standardizes the real-valued columns of a :class:`quapy.data.base.Dataset`. + Standardization, aka z-scoring, of a variable `X` comes down to subtracting the average and normalizing by the + standard deviation. + + :param dataset: a :class:`quapy.data.base.Dataset` object + :param inplace: set to True if the transformation is to be applied inplace, or to False (default) if a new + :class:`quapy.data.base.Dataset` is to be returned + :return: an instance of :class:`quapy.data.base.Dataset` + """ + s = StandardScaler(copy=not inplace) + training = s.fit_transform(dataset.training.instances) + test = s.transform(dataset.test.instances) + if inplace: + return dataset + else: + return Dataset(training, test, dataset.vocabulary, dataset.name)
+ + + +
+[docs] +def index(dataset: Dataset, min_df=5, inplace=False, **kwargs): + """ + Indexes the tokens of a textual :class:`quapy.data.base.Dataset` of string documents. + To index a document means to replace each different token by a unique numerical index. + Rare words (i.e., words occurring less than `min_df` times) are replaced by a special token `UNK` + + :param dataset: a :class:`quapy.data.base.Dataset` object where the instances of training and test documents + are lists of str + :param min_df: minimum number of occurrences below which the term is replaced by a `UNK` index + :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default) + :param kwargs: the rest of parameters of the transformation (as for sklearn's + `CountVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html>_`) + :return: a new :class:`quapy.data.base.Dataset` (if inplace=False) or a reference to the current + :class:`quapy.data.base.Dataset` (inplace=True) consisting of lists of integer values representing indices. + """ + __check_type(dataset.training.instances, np.ndarray, str) + __check_type(dataset.test.instances, np.ndarray, str) + + indexer = IndexTransformer(min_df=min_df, **kwargs) + training_index = indexer.fit_transform(dataset.training.instances) + test_index = indexer.transform(dataset.test.instances) + + training_index = np.asarray(training_index, dtype=object) + test_index = np.asarray(test_index, dtype=object) + + if inplace: + dataset.training = LabelledCollection(training_index, dataset.training.labels, dataset.classes_) + dataset.test = LabelledCollection(test_index, dataset.test.labels, dataset.classes_) + dataset.vocabulary = indexer.vocabulary_ + return dataset + else: + training = LabelledCollection(training_index, dataset.training.labels.copy(), dataset.classes_) + test = LabelledCollection(test_index, dataset.test.labels.copy(), dataset.classes_) + return Dataset(training, test, indexer.vocabulary_)
+ + + +def __check_type(container, container_type=None, element_type=None): + if container_type: + assert isinstance(container, container_type), \ + f'unexpected type of container (expected {container_type}, found {type(container)})' + if element_type: + assert isinstance(container[0], element_type), \ + f'unexpected type of element (expected {container_type}, found {type(container)})' + + +
+[docs] +class IndexTransformer: + """ + This class implements a sklearn's-style transformer that indexes text as numerical ids for the tokens it + contains, and that would be generated by sklearn's + `CountVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html>`_ + + :param kwargs: keyworded arguments from + `CountVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html>`_ + """ + + def __init__(self, **kwargs): + self.vect = CountVectorizer(**kwargs) + self.unk = -1 # a valid index is assigned after fit + self.pad = -2 # a valid index is assigned after fit + +
+[docs] + def fit(self, X): + """ + Fits the transformer, i.e., decides on the vocabulary, given a list of strings. + + :param X: a list of strings + :return: self + """ + self.vect.fit(X) + self.analyzer = self.vect.build_analyzer() + self.vocabulary_ = self.vect.vocabulary_ + self.unk = self.add_word(qp.environ['UNK_TOKEN'], qp.environ['UNK_INDEX']) + self.pad = self.add_word(qp.environ['PAD_TOKEN'], qp.environ['PAD_INDEX']) + return self
+ + +
+[docs] + def transform(self, X, n_jobs=None): + """ + Transforms the strings in `X` as lists of numerical ids + + :param X: a list of strings + :param n_jobs: the number of parallel workers to carry out this task + :return: a `np.ndarray` of numerical ids + """ + # given the number of tasks and the number of jobs, generates the slices for the parallel processes + assert self.unk != -1, 'transform called before fit' + n_jobs = qp._get_njobs(n_jobs) + return map_parallel(func=self._index, args=X, n_jobs=n_jobs)
+ + + + def _index(self, documents): + vocab = self.vocabulary_.copy() + return [[vocab.get(word, self.unk) for word in self.analyzer(doc)] for doc in tqdm(documents, 'indexing')] + +
+[docs] + def fit_transform(self, X, n_jobs=None): + """ + Fits the transform on `X` and transforms it. + + :param X: a list of strings + :param n_jobs: the number of parallel workers to carry out this task + :return: a `np.ndarray` of numerical ids + """ + return self.fit(X).transform(X, n_jobs=n_jobs)
+ + +
+[docs] + def vocabulary_size(self): + """ + Gets the length of the vocabulary according to which the document tokens have been indexed + + :return: integer + """ + return len(self.vocabulary_)
+ + +
+[docs] + def add_word(self, word, id=None, nogaps=True): + """ + Adds a new token (regardless of whether it has been found in the text or not), with dedicated id. + Useful to define special tokens for codifying unknown words, or padding tokens. + + :param word: string, surface form of the token + :param id: integer, numerical value to assign to the token (leave as None for indicating the next valid id, + default) + :param nogaps: if set to True (default) asserts that the id indicated leads to no numerical gaps with + precedent ids stored so far + :return: integer, the numerical id for the new token + """ + if word in self.vocabulary_: + raise ValueError(f'word {word} already in dictionary') + if id is None: + # add the word with the next id + self.vocabulary_[word] = len(self.vocabulary_) + else: + id2word = {id_:word_ for word_, id_ in self.vocabulary_.items()} + if id in id2word: + old_word = id2word[id] + self.vocabulary_[word] = id + del self.vocabulary_[old_word] + self.add_word(old_word) + elif nogaps: + if id > self.vocabulary_size()+1: + raise ValueError(f'word {word} added with id {id}, while the current vocabulary size ' + f'is of {self.vocabulary_size()}, and id gaps are not allowed') + return self.vocabulary_[word]
+
+ + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/data/reader.html b/docs/build/html/_modules/quapy/data/reader.html new file mode 100644 index 0000000..4c9c163 --- /dev/null +++ b/docs/build/html/_modules/quapy/data/reader.html @@ -0,0 +1,244 @@ + + + + + + quapy.data.reader — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.data.reader

+import numpy as np
+from scipy.sparse import dok_matrix
+from tqdm import tqdm
+
+
+
+[docs] +def from_text(path, encoding='utf-8', verbose=1, class2int=True): + """ + Reads a labelled colletion of documents. + File fomart <0 or 1>\t<document>\n + + :param path: path to the labelled collection + :param encoding: the text encoding used to open the file + :param verbose: if >0 (default) shows some progress information in standard output + :return: a list of sentences, and a list of labels + """ + all_sentences, all_labels = [], [] + if verbose>0: + file = tqdm(open(path, 'rt', encoding=encoding).readlines(), f'loading {path}') + else: + file = open(path, 'rt', encoding=encoding).readlines() + for line in file: + line = line.strip() + if line: + try: + label, sentence = line.split('\t') + sentence = sentence.strip() + if class2int: + label = int(label) + if sentence: + all_sentences.append(sentence) + all_labels.append(label) + except ValueError: + print(f'format error in {line}') + return all_sentences, all_labels
+ + + +
+[docs] +def from_sparse(path): + """ + Reads a labelled collection of real-valued instances expressed in sparse format + File format <-1 or 0 or 1>[\s col(int):val(float)]\n + + :param path: path to the labelled collection + :return: a `csr_matrix` containing the instances (rows), and a ndarray containing the labels + """ + + def split_col_val(col_val): + col, val = col_val.split(':') + col, val = int(col) - 1, float(val) + return col, val + + all_documents, all_labels = [], [] + max_col = 0 + for line in tqdm(open(path, 'rt').readlines(), f'loading {path}'): + parts = line.strip().split() + if parts: + all_labels.append(int(parts[0])) + cols, vals = zip(*[split_col_val(col_val) for col_val in parts[1:]]) + cols, vals = np.asarray(cols), np.asarray(vals) + max_col = max(max_col, cols.max()) + all_documents.append((cols, vals)) + n_docs = len(all_labels) + X = dok_matrix((n_docs, max_col + 1), dtype=float) + for i, (cols, vals) in tqdm(enumerate(all_documents), total=len(all_documents), + desc=f'\-- filling matrix of shape {X.shape}'): + X[i, cols] = vals + X = X.tocsr() + y = np.asarray(all_labels) + 1 + return X, y
+ + + +
+[docs] +def from_csv(path, encoding='utf-8'): + """ + Reads a csv file in which columns are separated by ','. + File format <label>,<feat1>,<feat2>,...,<featn>\n + + :param path: path to the csv file + :param encoding: the text encoding used to open the file + :return: a np.ndarray for the labels and a ndarray (float) for the covariates + """ + + X, y = [], [] + for instance in tqdm(open(path, 'rt', encoding=encoding).readlines(), desc=f'reading {path}'): + yi, *xi = instance.strip().split(',') + X.append(list(map(float,xi))) + y.append(yi) + X = np.asarray(X) + y = np.asarray(y) + return X, y
+ + + +
+[docs] +def reindex_labels(y): + """ + Re-indexes a list of labels as a list of indexes, and returns the classnames corresponding to the indexes. + E.g.: + + >>> reindex_labels(['B', 'B', 'A', 'C']) + >>> (array([1, 1, 0, 2]), array(['A', 'B', 'C'], dtype='<U1')) + + :param y: the list or array of original labels + :return: a ndarray (int) of class indexes, and a ndarray of classnames corresponding to the indexes. + """ + y = np.asarray(y) + classnames = np.asarray(sorted(np.unique(y))) + label2index = {label: index for index, label in enumerate(classnames)} + indexed = np.empty(y.shape, dtype=int) + for label in classnames: + indexed[y==label] = label2index[label] + return indexed, classnames
+ + + +
+[docs] +def binarize(y, pos_class): + """ + Binarizes a categorical array-like collection of labels towards the positive class `pos_class`. E.g.,: + + >>> binarize([1, 2, 3, 1, 1, 0], pos_class=2) + >>> array([0, 1, 0, 0, 0, 0]) + + :param y: array-like of labels + :param pos_class: integer, the positive class + :return: a binary np.ndarray, in which values 1 corresponds to positions in whcih `y` had `pos_class` labels, and + 0 otherwise + """ + y = np.asarray(y) + ybin = np.zeros(y.shape, dtype=int) + ybin[y == pos_class] = 1 + return ybin
+ + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/error.html b/docs/build/html/_modules/quapy/error.html new file mode 100644 index 0000000..5c1ce33 --- /dev/null +++ b/docs/build/html/_modules/quapy/error.html @@ -0,0 +1,486 @@ + + + + + + quapy.error — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.error

+"""Implementation of error measures used for quantification"""
+
+import numpy as np
+from sklearn.metrics import f1_score
+import quapy as qp
+
+
+
+[docs] +def from_name(err_name): + """Gets an error function from its name. E.g., `from_name("mae")` + will return function :meth:`quapy.error.mae` + + :param err_name: string, the error name + :return: a callable implementing the requested error + """ + assert err_name in ERROR_NAMES, f'unknown error {err_name}' + callable_error = globals()[err_name] + return callable_error
+ + + +
+[docs] +def f1e(y_true, y_pred): + """F1 error: simply computes the error in terms of macro :math:`F_1`, i.e., + :math:`1-F_1^M`, where :math:`F_1` is the harmonic mean of precision and recall, + defined as :math:`\\frac{2tp}{2tp+fp+fn}`, with `tp`, `fp`, and `fn` standing + for true positives, false positives, and false negatives, respectively. + `Macro` averaging means the :math:`F_1` is computed for each category independently, + and then averaged. + + :param y_true: array-like of true labels + :param y_pred: array-like of predicted labels + :return: :math:`1-F_1^M` + """ + return 1. - f1_score(y_true, y_pred, average='macro')
+ + + +
+[docs] +def acce(y_true, y_pred): + """Computes the error in terms of 1-accuracy. The accuracy is computed as + :math:`\\frac{tp+tn}{tp+fp+fn+tn}`, with `tp`, `fp`, `fn`, and `tn` standing + for true positives, false positives, false negatives, and true negatives, + respectively + + :param y_true: array-like of true labels + :param y_pred: array-like of predicted labels + :return: 1-accuracy + """ + return 1. - (y_true == y_pred).mean()
+ + + +
+[docs] +def mae(prevs, prevs_hat): + """Computes the mean absolute error (see :meth:`quapy.error.ae`) across the sample pairs. + + :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values + :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted + prevalence values + :return: mean absolute error + """ + return ae(prevs, prevs_hat).mean()
+ + + +
+[docs] +def ae(prevs, prevs_hat): + """Computes the absolute error between the two prevalence vectors. + Absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as + :math:`AE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}|\\hat{p}(y)-p(y)|`, + where :math:`\\mathcal{Y}` are the classes of interest. + + :param prevs: array-like of shape `(n_classes,)` with the true prevalence values + :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values + :return: absolute error + """ + assert prevs.shape == prevs_hat.shape, f'wrong shape {prevs.shape} vs. {prevs_hat.shape}' + return abs(prevs_hat - prevs).mean(axis=-1)
+ + + +
+[docs] +def nae(prevs, prevs_hat): + """Computes the normalized absolute error between the two prevalence vectors. + Normalized absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as + :math:`NAE(p,\\hat{p})=\\frac{AE(p,\\hat{p})}{z_{AE}}`, + where :math:`z_{AE}=\\frac{2(1-\\min_{y\\in \\mathcal{Y}} p(y))}{|\\mathcal{Y}|}`, and :math:`\\mathcal{Y}` + are the classes of interest. + + :param prevs: array-like of shape `(n_classes,)` with the true prevalence values + :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values + :return: normalized absolute error + """ + assert prevs.shape == prevs_hat.shape, f'wrong shape {prevs.shape} vs. {prevs_hat.shape}' + return abs(prevs_hat - prevs).sum(axis=-1)/(2*(1-prevs.min(axis=-1)))
+ + + +
+[docs] +def mnae(prevs, prevs_hat): + """Computes the mean normalized absolute error (see :meth:`quapy.error.nae`) across the sample pairs. + + :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values + :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted + prevalence values + :return: mean normalized absolute error + """ + return nae(prevs, prevs_hat).mean()
+ + + +
+[docs] +def mse(prevs, prevs_hat): + """Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs. + + :param prevs: array-like of shape `(n_samples, n_classes,)` with the + true prevalence values + :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the + predicted prevalence values + :return: mean squared error + """ + return se(prevs, prevs_hat).mean()
+ + + +
+[docs] +def se(prevs, prevs_hat): + """Computes the squared error between the two prevalence vectors. + Squared error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as + :math:`SE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}(\\hat{p}(y)-p(y))^2`, + where + :math:`\\mathcal{Y}` are the classes of interest. + + :param prevs: array-like of shape `(n_classes,)` with the true prevalence values + :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values + :return: absolute error + """ + return ((prevs_hat - prevs) ** 2).mean(axis=-1)
+ + + +
+[docs] +def mkld(prevs, prevs_hat, eps=None): + """Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the + sample pairs. The distributions are smoothed using the `eps` factor + (see :meth:`quapy.error.smooth`). + + :param prevs: array-like of shape `(n_samples, n_classes,)` with the true + prevalence values + :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted + prevalence values + :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain + zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. + If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE` + (which has thus to be set beforehand). + :return: mean Kullback-Leibler distribution + """ + return kld(prevs, prevs_hat, eps).mean()
+ + + +
+[docs] +def kld(prevs, prevs_hat, eps=None): + """Computes the Kullback-Leibler divergence between the two prevalence distributions. + Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}` + is computed as + :math:`KLD(p,\\hat{p})=D_{KL}(p||\\hat{p})= + \\sum_{y\\in \\mathcal{Y}} p(y)\\log\\frac{p(y)}{\\hat{p}(y)}`, + where :math:`\\mathcal{Y}` are the classes of interest. + The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`). + + :param prevs: array-like of shape `(n_classes,)` with the true prevalence values + :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values + :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain + zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. + If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE` + (which has thus to be set beforehand). + :return: Kullback-Leibler divergence between the two distributions + """ + eps = __check_eps(eps) + smooth_prevs = prevs + eps + smooth_prevs_hat = prevs_hat + eps + return (smooth_prevs*np.log(smooth_prevs/smooth_prevs_hat)).sum(axis=-1)
+ + + +
+[docs] +def mnkld(prevs, prevs_hat, eps=None): + """Computes the mean Normalized Kullback-Leibler divergence (see :meth:`quapy.error.nkld`) + across the sample pairs. The distributions are smoothed using the `eps` factor + (see :meth:`quapy.error.smooth`). + + :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values + :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted + prevalence values + :param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain + zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. + If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE` + (which has thus to be set beforehand). + :return: mean Normalized Kullback-Leibler distribution + """ + return nkld(prevs, prevs_hat, eps).mean()
+ + + +
+[docs] +def nkld(prevs, prevs_hat, eps=None): + """Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions. + Normalized Kullback-Leibler divergence between two prevalence distributions :math:`p` and + :math:`\\hat{p}` is computed as + math:`NKLD(p,\\hat{p}) = 2\\frac{e^{KLD(p,\\hat{p})}}{e^{KLD(p,\\hat{p})}+1}-1`, + where + :math:`\\mathcal{Y}` are the classes of interest. + The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`). + + :param prevs: array-like of shape `(n_classes,)` with the true prevalence values + :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values + :param eps: smoothing factor. NKLD is not defined in cases in which the distributions + contain zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample + size. If `eps=None`, the sample size will be taken from the environment variable + `SAMPLE_SIZE` (which has thus to be set beforehand). + :return: Normalized Kullback-Leibler divergence between the two distributions + """ + ekld = np.exp(kld(prevs, prevs_hat, eps)) + return 2. * ekld / (1 + ekld) - 1.
+ + + +
+[docs] +def mrae(prevs, prevs_hat, eps=None): + """Computes the mean relative absolute error (see :meth:`quapy.error.rae`) across + the sample pairs. The distributions are smoothed using the `eps` factor (see + :meth:`quapy.error.smooth`). + + :param prevs: array-like of shape `(n_samples, n_classes,)` with the true + prevalence values + :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted + prevalence values + :param eps: smoothing factor. `mrae` is not defined in cases in which the true + distribution contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, + with :math:`T` the sample size. If `eps=None`, the sample size will be taken from + the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand). + :return: mean relative absolute error + """ + return rae(prevs, prevs_hat, eps).mean()
+ + + +
+[docs] +def rae(prevs, prevs_hat, eps=None): + """Computes the absolute relative error between the two prevalence vectors. + Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` + is computed as + :math:`RAE(p,\\hat{p})= + \\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}\\frac{|\\hat{p}(y)-p(y)|}{p(y)}`, + where :math:`\\mathcal{Y}` are the classes of interest. + The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`). + + :param prevs: array-like of shape `(n_classes,)` with the true prevalence values + :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values + :param eps: smoothing factor. `rae` is not defined in cases in which the true distribution + contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the + sample size. If `eps=None`, the sample size will be taken from the environment variable + `SAMPLE_SIZE` (which has thus to be set beforehand). + :return: relative absolute error + """ + eps = __check_eps(eps) + prevs = smooth(prevs, eps) + prevs_hat = smooth(prevs_hat, eps) + return (abs(prevs - prevs_hat) / prevs).mean(axis=-1)
+ + + +
+[docs] +def nrae(prevs, prevs_hat, eps=None): + """Computes the normalized absolute relative error between the two prevalence vectors. + Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` + is computed as + :math:`NRAE(p,\\hat{p})= \\frac{RAE(p,\\hat{p})}{z_{RAE}}`, + where + :math:`z_{RAE} = \\frac{|\\mathcal{Y}|-1+\\frac{1-\\min_{y\\in \\mathcal{Y}} p(y)}{\\min_{y\\in \\mathcal{Y}} p(y)}}{|\\mathcal{Y}|}` + and :math:`\\mathcal{Y}` are the classes of interest. + The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`). + + :param prevs: array-like of shape `(n_classes,)` with the true prevalence values + :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values + :param eps: smoothing factor. `nrae` is not defined in cases in which the true distribution + contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the + sample size. If `eps=None`, the sample size will be taken from the environment variable + `SAMPLE_SIZE` (which has thus to be set beforehand). + :return: normalized relative absolute error + """ + eps = __check_eps(eps) + prevs = smooth(prevs, eps) + prevs_hat = smooth(prevs_hat, eps) + min_p = prevs.min(axis=-1) + return (abs(prevs - prevs_hat) / prevs).sum(axis=-1)/(prevs.shape[-1]-1+(1-min_p)/min_p)
+ + + +
+[docs] +def mnrae(prevs, prevs_hat, eps=None): + """Computes the mean normalized relative absolute error (see :meth:`quapy.error.nrae`) across + the sample pairs. The distributions are smoothed using the `eps` factor (see + :meth:`quapy.error.smooth`). + + :param prevs: array-like of shape `(n_samples, n_classes,)` with the true + prevalence values + :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted + prevalence values + :param eps: smoothing factor. `mnrae` is not defined in cases in which the true + distribution contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, + with :math:`T` the sample size. If `eps=None`, the sample size will be taken from + the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand). + :return: mean normalized relative absolute error + """ + return nrae(prevs, prevs_hat, eps).mean()
+ + + +
+[docs] +def smooth(prevs, eps): + """ Smooths a prevalence distribution with :math:`\\epsilon` (`eps`) as: + :math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+ + \\displaystyle\\sum_{y\\in \\mathcal{Y}}p(y)}` + + :param prevs: array-like of shape `(n_classes,)` with the true prevalence values + :param eps: smoothing factor + :return: array-like of shape `(n_classes,)` with the smoothed distribution + """ + n_classes = prevs.shape[-1] + return (prevs + eps) / (eps * n_classes + 1)
+ + + +def __check_eps(eps=None): + if eps is None: + sample_size = qp.environ['SAMPLE_SIZE'] + if sample_size is None: + raise ValueError('eps was not defined, and qp.environ["SAMPLE_SIZE"] was not set') + eps = 1. / (2. * sample_size) + return eps + + +CLASSIFICATION_ERROR = {f1e, acce} +QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld} +QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld} +QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae} +CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR} +QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR} +QUANTIFICATION_ERROR_SINGLE_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SINGLE} +QUANTIFICATION_ERROR_SMOOTH_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SMOOTH} +ERROR_NAMES = \ + CLASSIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_SINGLE_NAMES + +f1_error = f1e +acc_error = acce +mean_absolute_error = mae +absolute_error = ae +mean_relative_absolute_error = mrae +relative_absolute_error = rae +normalized_absolute_error = nae +normalized_relative_absolute_error = nrae +mean_normalized_absolute_error = mnae +mean_normalized_relative_absolute_error = mnrae +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/evaluation.html b/docs/build/html/_modules/quapy/evaluation.html new file mode 100644 index 0000000..2208550 --- /dev/null +++ b/docs/build/html/_modules/quapy/evaluation.html @@ -0,0 +1,302 @@ + + + + + + quapy.evaluation — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.evaluation

+from typing import Union, Callable, Iterable
+import numpy as np
+from tqdm import tqdm
+import quapy as qp
+from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol, IterateProtocol
+from quapy.method.base import BaseQuantifier
+import pandas as pd
+
+
+
+[docs] +def prediction( + model: BaseQuantifier, + protocol: AbstractProtocol, + aggr_speedup: Union[str, bool] = 'auto', + verbose=False): + """ + Uses a quantification model to generate predictions for the samples generated via a specific protocol. + This function is central to all evaluation processes, and is endowed with an optimization to speed-up the + prediction of protocols that generate samples from a large collection. The optimization applies to aggregative + quantifiers only, and to OnLabelledCollectionProtocol protocols, and comes down to generating the classification + predictions once and for all, and then generating samples over the classification predictions (instead of over + the raw instances), so that the classifier prediction is never called again. This behaviour is obtained by + setting `aggr_speedup` to 'auto' or True, and is only carried out if the overall process is convenient in terms + of computations (e.g., if the number of classification predictions needed for the original collection exceed the + number of classification predictions needed for all samples, then the optimization is not undertaken). + + :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier` + :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of + :class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the protocol + in charge of generating the samples for which the model has to issue class prevalence predictions. + :param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of + instances in the original collection on which the protocol acts is larger than the number of instances + in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is + convenient or not. Set to False to deactivate. + :param verbose: boolean, show or not information in stdout + :return: a tuple `(true_prevs, estim_prevs)` in which each element in the tuple is an array of shape + `(n_samples, n_classes)` containing the true, or predicted, prevalence values for each sample + """ + assert aggr_speedup in [False, True, 'auto', 'force'], 'invalid value for aggr_speedup' + + sout = lambda x: print(x) if verbose else None + + apply_optimization = False + + if aggr_speedup in [True, 'auto', 'force']: + # checks whether the prediction can be made more efficiently; this check consists in verifying if the model is + # of type aggregative, if the protocol is based on LabelledCollection, and if the total number of documents to + # classify using the protocol would exceed the number of test documents in the original collection + from quapy.method.aggregative import AggregativeQuantifier + if isinstance(model, AggregativeQuantifier) and isinstance(protocol, OnLabelledCollectionProtocol): + if aggr_speedup == 'force': + apply_optimization = True + sout(f'forcing aggregative speedup') + elif hasattr(protocol, 'sample_size'): + nD = len(protocol.get_labelled_collection()) + samplesD = protocol.total() * protocol.sample_size + if nD < samplesD: + apply_optimization = True + sout(f'speeding up the prediction for the aggregative quantifier, ' + f'total classifications {nD} instead of {samplesD}') + + if apply_optimization: + pre_classified = model.classify(protocol.get_labelled_collection().instances) + protocol_with_predictions = protocol.on_preclassified_instances(pre_classified) + return __prediction_helper(model.aggregate, protocol_with_predictions, verbose) + else: + return __prediction_helper(model.quantify, protocol, verbose)
+ + + +def __prediction_helper(quantification_fn, protocol: AbstractProtocol, verbose=False): + true_prevs, estim_prevs = [], [] + for sample_instances, sample_prev in tqdm(protocol(), total=protocol.total(), desc='predicting') if verbose else protocol(): + estim_prevs.append(quantification_fn(sample_instances)) + true_prevs.append(sample_prev) + + true_prevs = np.asarray(true_prevs) + estim_prevs = np.asarray(estim_prevs) + + return true_prevs, estim_prevs + + +
+[docs] +def evaluation_report(model: BaseQuantifier, + protocol: AbstractProtocol, + error_metrics: Iterable[Union[str,Callable]] = 'mae', + aggr_speedup: Union[str, bool] = 'auto', + verbose=False): + """ + Generates a report (a pandas' DataFrame) containing information of the evaluation of the model as according + to a specific protocol and in terms of one or more evaluation metrics (errors). + + + :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier` + :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of + :class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the protocol + in charge of generating the samples in which the model is evaluated. + :param error_metrics: a string, or list of strings, representing the name(s) of an error function in `qp.error` + (e.g., 'mae', the default value), or a callable function, or a list of callable functions, implementing + the error function itself. + :param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of + instances in the original collection on which the protocol acts is larger than the number of instances + in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is + convenient or not. Set to False to deactivate. + :param verbose: boolean, show or not information in stdout + :return: a pandas' DataFrame containing the columns 'true-prev' (the true prevalence of each sample), + 'estim-prev' (the prevalence estimated by the model for each sample), and as many columns as error metrics + have been indicated, each displaying the score in terms of that metric for every sample. + """ + + true_prevs, estim_prevs = prediction(model, protocol, aggr_speedup=aggr_speedup, verbose=verbose) + return _prevalence_report(true_prevs, estim_prevs, error_metrics)
+ + + +def _prevalence_report(true_prevs, estim_prevs, error_metrics: Iterable[Union[str, Callable]] = 'mae'): + + if isinstance(error_metrics, str): + error_metrics = [error_metrics] + + error_funcs = [qp.error.from_name(e) if isinstance(e, str) else e for e in error_metrics] + assert all(hasattr(e, '__call__') for e in error_funcs), 'invalid error functions' + error_names = [e.__name__ for e in error_funcs] + + row_entries = [] + for true_prev, estim_prev in zip(true_prevs, estim_prevs): + series = {'true-prev': true_prev, 'estim-prev': estim_prev} + for error_name, error_metric in zip(error_names, error_funcs): + score = error_metric(true_prev, estim_prev) + series[error_name] = score + row_entries.append(series) + + df = pd.DataFrame.from_records(row_entries) + return df + + +
+[docs] +def evaluate( + model: BaseQuantifier, + protocol: AbstractProtocol, + error_metric: Union[str, Callable], + aggr_speedup: Union[str, bool] = 'auto', + verbose=False): + """ + Evaluates a quantification model according to a specific sample generation protocol and in terms of one + evaluation metric (error). + + :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier` + :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of + :class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the + protocol in charge of generating the samples in which the model is evaluated. + :param error_metric: a string representing the name(s) of an error function in `qp.error` + (e.g., 'mae'), or a callable function implementing the error function itself. + :param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of + instances in the original collection on which the protocol acts is larger than the number of instances + in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is + convenient or not. Set to False to deactivate. + :param verbose: boolean, show or not information in stdout + :return: if the error metric is not averaged (e.g., 'ae', 'rae'), returns an array of shape `(n_samples,)` with + the error scores for each sample; if the error metric is averaged (e.g., 'mae', 'mrae') then returns + a single float + """ + + if isinstance(error_metric, str): + error_metric = qp.error.from_name(error_metric) + true_prevs, estim_prevs = prediction(model, protocol, aggr_speedup=aggr_speedup, verbose=verbose) + return error_metric(true_prevs, estim_prevs)
+ + + +
+[docs] +def evaluate_on_samples( + model: BaseQuantifier, + samples: Iterable[qp.data.LabelledCollection], + error_metric: Union[str, Callable], + verbose=False): + """ + Evaluates a quantification model on a given set of samples and in terms of one evaluation metric (error). + + :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier` + :param samples: a list of samples on which the quantifier is to be evaluated + :param error_metric: a string representing the name(s) of an error function in `qp.error` + (e.g., 'mae'), or a callable function implementing the error function itself. + :param verbose: boolean, show or not information in stdout + :return: if the error metric is not averaged (e.g., 'ae', 'rae'), returns an array of shape `(n_samples,)` with + the error scores for each sample; if the error metric is averaged (e.g., 'mae', 'mrae') then returns + a single float + """ + + return evaluate(model, IterateProtocol(samples), error_metric, aggr_speedup=False, verbose=verbose)
+ + + + + + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/functional.html b/docs/build/html/_modules/quapy/functional.html new file mode 100644 index 0000000..1d41ac8 --- /dev/null +++ b/docs/build/html/_modules/quapy/functional.html @@ -0,0 +1,518 @@ + + + + + + quapy.functional — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.functional

+import itertools
+from collections import defaultdict
+from typing import Union, Callable
+
+import scipy
+import numpy as np
+
+
+
+[docs] +def prevalence_linspace(n_prevalences=21, repeats=1, smooth_limits_epsilon=0.01): + """ + Produces an array of uniformly separated values of prevalence. + By default, produces an array of 21 prevalence values, with + step 0.05 and with the limits smoothed, i.e.: + [0.01, 0.05, 0.10, 0.15, ..., 0.90, 0.95, 0.99] + + :param n_prevalences: the number of prevalence values to sample from the [0,1] interval (default 21) + :param repeats: number of times each prevalence is to be repeated (defaults to 1) + :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1 + :return: an array of uniformly separated prevalence values + """ + p = np.linspace(0., 1., num=n_prevalences, endpoint=True) + p[0] += smooth_limits_epsilon + p[-1] -= smooth_limits_epsilon + if p[0] > p[1]: + raise ValueError(f'the smoothing in the limits is greater than the prevalence step') + if repeats > 1: + p = np.repeat(p, repeats) + return p
+ + + +
+[docs] +def prevalence_from_labels(labels, classes): + """ + Computed the prevalence values from a vector of labels. + + :param labels: array-like of shape `(n_instances)` with the label for each instance + :param classes: the class labels. This is needed in order to correctly compute the prevalence vector even when + some classes have no examples. + :return: an ndarray of shape `(len(classes))` with the class prevalence values + """ + if labels.ndim != 1: + raise ValueError(f'param labels does not seem to be a ndarray of label predictions') + unique, counts = np.unique(labels, return_counts=True) + by_class = defaultdict(lambda:0, dict(zip(unique, counts))) + prevalences = np.asarray([by_class[class_] for class_ in classes], dtype=float) + prevalences /= prevalences.sum() + return prevalences
+ + + +
+[docs] +def prevalence_from_probabilities(posteriors, binarize: bool = False): + """ + Returns a vector of prevalence values from a matrix of posterior probabilities. + + :param posteriors: array-like of shape `(n_instances, n_classes,)` with posterior probabilities for each class + :param binarize: set to True (default is False) for computing the prevalence values on crisp decisions (i.e., + converting the vectors of posterior probabilities into class indices, by taking the argmax). + :return: array of shape `(n_classes,)` containing the prevalence values + """ + if posteriors.ndim != 2: + raise ValueError(f'param posteriors does not seem to be a ndarray of posteior probabilities') + if binarize: + predictions = np.argmax(posteriors, axis=-1) + return prevalence_from_labels(predictions, np.arange(posteriors.shape[1])) + else: + prevalences = posteriors.mean(axis=0) + prevalences /= prevalences.sum() + return prevalences
+ + + +
+[docs] +def as_binary_prevalence(positive_prevalence: Union[float, np.ndarray], clip_if_necessary=False): + """ + Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two + values representing a binary distribution. + + :param positive_prevalence: prevalence for the positive class + :param clip_if_necessary: if True, clips the value in [0,1] in order to guarantee the resulting distribution + is valid. If False, it then checks that the value is in the valid range, and raises an error if not. + :return: np.ndarray of shape `(2,)` + """ + if clip_if_necessary: + positive_prevalence = np.clip(positive_prevalence, 0, 1) + else: + assert 0 <= positive_prevalence <= 1, 'the value provided is not a valid prevalence for the positive class' + return np.asarray([1-positive_prevalence, positive_prevalence]).T
+ + + + +
+[docs] +def HellingerDistance(P, Q) -> float: + """ + Computes the Hellingher Distance (HD) between (discretized) distributions `P` and `Q`. + The HD for two discrete distributions of `k` bins is defined as: + + .. math:: + HD(P,Q) = \\frac{ 1 }{ \\sqrt{ 2 } } \\sqrt{ \\sum_{i=1}^k ( \\sqrt{p_i} - \\sqrt{q_i} )^2 } + + :param P: real-valued array-like of shape `(k,)` representing a discrete distribution + :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution + :return: float + """ + return np.sqrt(np.sum((np.sqrt(P) - np.sqrt(Q))**2))
+ + + +
+[docs] +def TopsoeDistance(P, Q, epsilon=1e-20): + """ + Topsoe distance between two (discretized) distributions `P` and `Q`. + The Topsoe distance for two discrete distributions of `k` bins is defined as: + + .. math:: + Topsoe(P,Q) = \\sum_{i=1}^k \\left( p_i \\log\\left(\\frac{ 2 p_i + \\epsilon }{ p_i+q_i+\\epsilon }\\right) + + q_i \\log\\left(\\frac{ 2 q_i + \\epsilon }{ p_i+q_i+\\epsilon }\\right) \\right) + + :param P: real-valued array-like of shape `(k,)` representing a discrete distribution + :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution + :return: float + """ + return np.sum(P*np.log((2*P+epsilon)/(P+Q+epsilon)) + Q*np.log((2*Q+epsilon)/(P+Q+epsilon)))
+ + + +
+[docs] +def uniform_prevalence_sampling(n_classes, size=1): + """ + Implements the `Kraemer algorithm <http://www.cs.cmu.edu/~nasmith/papers/smith+tromble.tr04.pdf>`_ + for sampling uniformly at random from the unit simplex. This implementation is adapted from this + `post <https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex>_`. + + :param n_classes: integer, number of classes (dimensionality of the simplex) + :param size: number of samples to return + :return: `np.ndarray` of shape `(size, n_classes,)` if `size>1`, or of shape `(n_classes,)` otherwise + """ + if n_classes == 2: + u = np.random.rand(size) + u = np.vstack([1-u, u]).T + else: + u = np.random.rand(size, n_classes-1) + u.sort(axis=-1) + _0s = np.zeros(shape=(size, 1)) + _1s = np.ones(shape=(size, 1)) + a = np.hstack([_0s, u]) + b = np.hstack([u, _1s]) + u = b-a + if size == 1: + u = u.flatten() + return u
+ + + +uniform_simplex_sampling = uniform_prevalence_sampling + + +
+[docs] +def strprev(prevalences, prec=3): + """ + Returns a string representation for a prevalence vector. E.g., + + >>> strprev([1/3, 2/3], prec=2) + >>> '[0.33, 0.67]' + + :param prevalences: a vector of prevalence values + :param prec: float precision + :return: string + """ + return '['+ ', '.join([f'{p:.{prec}f}' for p in prevalences]) + ']'
+ + + +
+[docs] +def adjusted_quantification(prevalence_estim, tpr, fpr, clip=True): + """ + Implements the adjustment of ACC and PACC for the binary case. The adjustment for a prevalence estimate of the + positive class `p` comes down to computing: + + .. math:: + ACC(p) = \\frac{ p - fpr }{ tpr - fpr } + + :param prevalence_estim: float, the estimated value for the positive class + :param tpr: float, the true positive rate of the classifier + :param fpr: float, the false positive rate of the classifier + :param clip: set to True (default) to clip values that might exceed the range [0,1] + :return: float, the adjusted count + """ + + den = tpr - fpr + if den == 0: + den += 1e-8 + adjusted = (prevalence_estim - fpr) / den + if clip: + adjusted = np.clip(adjusted, 0., 1.) + return adjusted
+ + + +
+[docs] +def normalize_prevalence(prevalences): + """ + Normalize a vector or matrix of prevalence values. The normalization consists of applying a L1 normalization in + cases in which the prevalence values are not all-zeros, and to convert the prevalence values into `1/n_classes` in + cases in which all values are zero. + + :param prevalences: array-like of shape `(n_classes,)` or of shape `(n_samples, n_classes,)` with prevalence values + :return: a normalized vector or matrix of prevalence values + """ + prevalences = np.asarray(prevalences) + n_classes = prevalences.shape[-1] + accum = prevalences.sum(axis=-1, keepdims=True) + prevalences = np.true_divide(prevalences, accum, where=accum>0) + allzeros = accum.flatten()==0 + if any(allzeros): + if prevalences.ndim == 1: + prevalences = np.full(shape=n_classes, fill_value=1./n_classes) + else: + prevalences[accum.flatten()==0] = np.full(shape=n_classes, fill_value=1./n_classes) + return prevalences
+ + + +def __num_prevalence_combinations_depr(n_prevpoints:int, n_classes:int, n_repeats:int=1): + """ + Computes the number of prevalence combinations in the n_classes-dimensional simplex if `nprevpoints` equally distant + prevalence values are generated and `n_repeats` repetitions are requested. + + :param n_classes: integer, number of classes + :param n_prevpoints: integer, number of prevalence points. + :param n_repeats: integer, number of repetitions for each prevalence combination + :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the + number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0] + """ + __cache={} + def __f(nc,np): + if (nc,np) in __cache: # cached result + return __cache[(nc,np)] + if nc==1: # stop condition + return 1 + else: # recursive call + x = sum([__f(nc-1, np-i) for i in range(np)]) + __cache[(nc,np)] = x + return x + return __f(n_classes, n_prevpoints) * n_repeats + + +
+[docs] +def num_prevalence_combinations(n_prevpoints:int, n_classes:int, n_repeats:int=1): + """ + Computes the number of valid prevalence combinations in the n_classes-dimensional simplex if `n_prevpoints` equally + distant prevalence values are generated and `n_repeats` repetitions are requested. + The computation comes down to calculating: + + .. math:: + \\binom{N+C-1}{C-1} \\times r + + where `N` is `n_prevpoints-1`, i.e., the number of probability mass blocks to allocate, `C` is the number of + classes, and `r` is `n_repeats`. This solution comes from the + `Stars and Bars <https://brilliant.org/wiki/integer-equations-star-and-bars/>`_ problem. + + :param n_classes: integer, number of classes + :param n_prevpoints: integer, number of prevalence points. + :param n_repeats: integer, number of repetitions for each prevalence combination + :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the + number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0] + """ + N = n_prevpoints-1 + C = n_classes + r = n_repeats + return int(scipy.special.binom(N + C - 1, C - 1) * r)
+ + + +
+[docs] +def get_nprevpoints_approximation(combinations_budget:int, n_classes:int, n_repeats:int=1): + """ + Searches for the largest number of (equidistant) prevalence points to define for each of the `n_classes` classes so + that the number of valid prevalence values generated as combinations of prevalence points (points in a + `n_classes`-dimensional simplex) do not exceed combinations_budget. + + :param combinations_budget: integer, maximum number of combinations allowed + :param n_classes: integer, number of classes + :param n_repeats: integer, number of repetitions for each prevalence combination + :return: the largest number of prevalence points that generate less than combinations_budget valid prevalences + """ + assert n_classes > 0 and n_repeats > 0 and combinations_budget > 0, 'parameters must be positive integers' + n_prevpoints = 1 + while True: + combinations = num_prevalence_combinations(n_prevpoints, n_classes, n_repeats) + if combinations > combinations_budget: + return n_prevpoints-1 + else: + n_prevpoints += 1
+ + + +
+[docs] +def check_prevalence_vector(p, raise_exception=False, toleranze=1e-08): + """ + Checks that p is a valid prevalence vector, i.e., that it contains values in [0,1] and that the values sum up to 1. + + :param p: the prevalence vector to check + :return: True if `p` is valid, False otherwise + """ + p = np.asarray(p) + if not all(p>=0): + if raise_exception: + raise ValueError('the prevalence vector contains negative numbers') + return False + if not all(p<=1): + if raise_exception: + raise ValueError('the prevalence vector contains values >1') + return False + if not np.isclose(p.sum(), 1, atol=toleranze): + if raise_exception: + raise ValueError('the prevalence vector does not sum up to 1') + return False + return True
+ + + +
+[docs] +def get_divergence(divergence: Union[str, Callable]): + if isinstance(divergence, str): + if divergence=='HD': + return HellingerDistance + elif divergence=='topsoe': + return TopsoeDistance + else: + raise ValueError(f'unknown divergence {divergence}') + elif callable(divergence): + return divergence + else: + raise ValueError(f'argument "divergence" not understood; use a str or a callable function')
+ + + +
+[docs] +def argmin_prevalence(loss, n_classes, method='optim_minimize'): + if method == 'optim_minimize': + return optim_minimize(loss, n_classes) + elif method == 'linear_search': + return linear_search(loss, n_classes) + elif method == 'ternary_search': + raise NotImplementedError() + else: + raise NotImplementedError()
+ + + +
+[docs] +def optim_minimize(loss, n_classes): + """ + Searches for the optimal prevalence values, i.e., an `n_classes`-dimensional vector of the (`n_classes`-1)-simplex + that yields the smallest lost. This optimization is carried out by means of a constrained search using scipy's + SLSQP routine. + + :param loss: (callable) the function to minimize + :param n_classes: (int) the number of classes, i.e., the dimensionality of the prevalence vector + :return: (ndarray) the best prevalence vector found + """ + from scipy import optimize + + # the initial point is set as the uniform distribution + uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,)) + + # solutions are bounded to those contained in the unit-simplex + bounds = tuple((0, 1) for _ in range(n_classes)) # values in [0,1] + constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1 + r = optimize.minimize(loss, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints) + return r.x
+ + + + + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/method/_kdey.html b/docs/build/html/_modules/quapy/method/_kdey.html new file mode 100644 index 0000000..198113f --- /dev/null +++ b/docs/build/html/_modules/quapy/method/_kdey.html @@ -0,0 +1,503 @@ + + + + + + quapy.method._kdey — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.method._kdey

+from typing import Union
+import numpy as np
+from sklearn.base import BaseEstimator
+from sklearn.neighbors import KernelDensity
+
+import quapy as qp
+from quapy.data import LabelledCollection
+from quapy.method.aggregative import AggregativeSoftQuantifier
+import quapy.functional as F
+
+from sklearn.metrics.pairwise import rbf_kernel
+
+
+
+[docs] +class KDEBase: + """ + Common ancestor for KDE-based methods. Implements some common routines. + """ + + BANDWIDTH_METHOD = ['scott', 'silverman'] + + @classmethod + def _check_bandwidth(cls, bandwidth): + """ + Checks that the bandwidth parameter is correct + + :param bandwidth: either a string (see BANDWIDTH_METHOD) or a float + :return: nothing, but raises an exception for invalid values + """ + assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \ + f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values' + if isinstance(bandwidth, float): + assert 0 < bandwidth < 1, "the bandwith for KDEy should be in (0,1), since this method models the unit simplex" + +
+[docs] + def get_kde_function(self, X, bandwidth): + """ + Wraps the KDE function from scikit-learn. + + :param X: data for which the density function is to be estimated + :param bandwidth: the bandwidth of the kernel + :return: a scikit-learn's KernelDensity object + """ + return KernelDensity(bandwidth=bandwidth).fit(X)
+ + +
+[docs] + def pdf(self, kde, X): + """ + Wraps the density evalution of scikit-learn's KDE. Scikit-learn returns log-scores (s), so this + function returns :math:`e^{s}` + + :param kde: a previously fit KDE function + :param X: the data for which the density is to be estimated + :return: np.ndarray with the densities + """ + return np.exp(kde.score_samples(X))
+ + +
+[docs] + def get_mixture_components(self, X, y, n_classes, bandwidth): + """ + Returns an array containing the mixture components, i.e., the KDE functions for each class. + + :param X: the data containing the covariates + :param y: the class labels + :param n_classes: integer, the number of classes + :param bandwidth: float, the bandwidth of the kernel + :return: a list of KernelDensity objects, each fitted with the corresponding class-specific covariates + """ + return [self.get_kde_function(X[y == cat], bandwidth) for cat in range(n_classes)]
+
+ + + + +
+[docs] +class KDEyML(AggregativeSoftQuantifier, KDEBase): + """ + Kernel Density Estimation model for quantification (KDEy) relying on the Kullback-Leibler divergence (KLD) as + the divergence measure to be minimized. This method was first proposed in the paper + `Kernel Density Estimation for Multiclass Quantification <https://arxiv.org/abs/2401.00490>`_, in which + the authors show that minimizing the distribution mathing criterion for KLD is akin to performing + maximum likelihood (ML). + + The distribution matching optimization problem comes down to solving: + + :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} \\mathcal{D}(\\boldsymbol{p}_{\\alpha}||q_{\\widetilde{U}})` + + where :math:`p_{\\alpha}` is the mixture of class-specific KDEs with mixture parameter (hence class prevalence) + :math:`\\alpha` defined by + + :math:`\\boldsymbol{p}_{\\alpha}(\\widetilde{x}) = \\sum_{i=1}^n \\alpha_i p_{\\widetilde{L}_i}(\\widetilde{x})` + + where :math:`p_X(\\boldsymbol{x}) = \\frac{1}{|X|} \\sum_{x_i\\in X} K\\left(\\frac{x-x_i}{h}\\right)` is the + KDE function that uses the datapoints in X as the kernel centers. + + In KDEy-ML, the divergence is taken to be the Kullback-Leibler Divergence. This is equivalent to solving: + :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} - + \\mathbb{E}_{q_{\\widetilde{U}}} \\left[ \\log \\boldsymbol{p}_{\\alpha}(\\widetilde{x}) \\right]` + + which corresponds to the maximum likelihood estimate. + + :param classifier: a sklearn's Estimator that generates a binary classifier. + :param val_split: specifies the data used for generating classifier predictions. This specification + can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to + be extracted from the training set; or as an integer (default 5), indicating that the predictions + are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value + for `k`); or as a collection defining the specific set of data to use for validation. + Alternatively, this set can be specified at fit time by indicating the exact set of data + on which the predictions are to be generated. + :param bandwidth: float, the bandwidth of the Kernel + :param n_jobs: number of parallel workers + :param random_state: a seed to be set before fitting any base quantifier (default None) + """ + + def __init__(self, classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None, random_state=None): + self._check_bandwidth(bandwidth) + self.classifier = classifier + self.val_split = val_split + self.bandwidth = bandwidth + self.n_jobs = n_jobs + self.random_state=random_state + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + self.mix_densities = self.get_mixture_components(*classif_predictions.Xy, data.n_classes, self.bandwidth) + return self
+ + +
+[docs] + def aggregate(self, posteriors: np.ndarray): + """ + Searches for the mixture model parameter (the sought prevalence values) that maximizes the likelihood + of the data (i.e., that minimizes the negative log-likelihood) + + :param posteriors: instances in the sample converted into posterior probabilities + :return: a vector of class prevalence estimates + """ + np.random.RandomState(self.random_state) + epsilon = 1e-10 + n_classes = len(self.mix_densities) + test_densities = [self.pdf(kde_i, posteriors) for kde_i in self.mix_densities] + + def neg_loglikelihood(prev): + test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities)) + test_loglikelihood = np.log(test_mixture_likelihood + epsilon) + return -np.sum(test_loglikelihood) + + return F.optim_minimize(neg_loglikelihood, n_classes)
+
+ + + +
+[docs] +class KDEyHD(AggregativeSoftQuantifier, KDEBase): + """ + Kernel Density Estimation model for quantification (KDEy) relying on the squared Hellinger Disntace (HD) as + the divergence measure to be minimized. This method was first proposed in the paper + `Kernel Density Estimation for Multiclass Quantification <https://arxiv.org/abs/2401.00490>`_, in which + the authors proposed a Monte Carlo approach for minimizing the divergence. + + The distribution matching optimization problem comes down to solving: + + :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} \\mathcal{D}(\\boldsymbol{p}_{\\alpha}||q_{\\widetilde{U}})` + + where :math:`p_{\\alpha}` is the mixture of class-specific KDEs with mixture parameter (hence class prevalence) + :math:`\\alpha` defined by + + :math:`\\boldsymbol{p}_{\\alpha}(\\widetilde{x}) = \\sum_{i=1}^n \\alpha_i p_{\\widetilde{L}_i}(\\widetilde{x})` + + where :math:`p_X(\\boldsymbol{x}) = \\frac{1}{|X|} \\sum_{x_i\\in X} K\\left(\\frac{x-x_i}{h}\\right)` is the + KDE function that uses the datapoints in X as the kernel centers. + + In KDEy-HD, the divergence is taken to be the squared Hellinger Distance, an f-divergence with corresponding + f-generator function given by: + + :math:`f(u)=(\\sqrt{u}-1)^2` + + The authors proposed a Monte Carlo solution that relies on importance sampling: + + :math:`\\hat{D}_f(p||q)= \\frac{1}{t} \\sum_{i=1}^t f\\left(\\frac{p(x_i)}{q(x_i)}\\right) \\frac{q(x_i)}{r(x_i)}` + + where the datapoints (trials) :math:`x_1,\\ldots,x_t\\sim_{\\mathrm{iid}} r` with :math:`r` the + uniform distribution. + + :param classifier: a sklearn's Estimator that generates a binary classifier. + :param val_split: specifies the data used for generating classifier predictions. This specification + can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to + be extracted from the training set; or as an integer (default 5), indicating that the predictions + are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value + for `k`); or as a collection defining the specific set of data to use for validation. + Alternatively, this set can be specified at fit time by indicating the exact set of data + on which the predictions are to be generated. + :param bandwidth: float, the bandwidth of the Kernel + :param n_jobs: number of parallel workers + :param random_state: a seed to be set before fitting any base quantifier (default None) + :param montecarlo_trials: number of Monte Carlo trials (default 10000) + """ + + def __init__(self, classifier: BaseEstimator, val_split=10, divergence: str='HD', + bandwidth=0.1, n_jobs=None, random_state=None, montecarlo_trials=10000): + + self._check_bandwidth(bandwidth) + self.classifier = classifier + self.val_split = val_split + self.divergence = divergence + self.bandwidth = bandwidth + self.n_jobs = n_jobs + self.random_state=random_state + self.montecarlo_trials = montecarlo_trials + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + self.mix_densities = self.get_mixture_components(*classif_predictions.Xy, data.n_classes, self.bandwidth) + + N = self.montecarlo_trials + rs = self.random_state + n = data.n_classes + self.reference_samples = np.vstack([kde_i.sample(N//n, random_state=rs) for kde_i in self.mix_densities]) + self.reference_classwise_densities = np.asarray([self.pdf(kde_j, self.reference_samples) for kde_j in self.mix_densities]) + self.reference_density = np.mean(self.reference_classwise_densities, axis=0) # equiv. to (uniform @ self.reference_classwise_densities) + + return self
+ + +
+[docs] + def aggregate(self, posteriors: np.ndarray): + # we retain all n*N examples (sampled from a mixture with uniform parameter), and then + # apply importance sampling (IS). In this version we compute D(p_alpha||q) with IS + n_classes = len(self.mix_densities) + + test_kde = self.get_kde_function(posteriors, self.bandwidth) + test_densities = self.pdf(test_kde, self.reference_samples) + + def f_squared_hellinger(u): + return (np.sqrt(u)-1)**2 + + # todo: this will fail when self.divergence is a callable, and is not the right place to do it anyway + if self.divergence.lower() == 'hd': + f = f_squared_hellinger + else: + raise ValueError('only squared HD is currently implemented') + + epsilon = 1e-10 + qs = test_densities + epsilon + rs = self.reference_density + epsilon + iw = qs/rs #importance weights + p_class = self.reference_classwise_densities + epsilon + fracs = p_class/qs + + def divergence(prev): + # ps / qs = (prev @ p_class) / qs = prev @ (p_class / qs) = prev @ fracs + ps_div_qs = prev @ fracs + return np.mean( f(ps_div_qs) * iw ) + + return F.optim_minimize(divergence, n_classes)
+
+ + + +
+[docs] +class KDEyCS(AggregativeSoftQuantifier): + """ + Kernel Density Estimation model for quantification (KDEy) relying on the Cauchy-Schwarz divergence (CS) as + the divergence measure to be minimized. This method was first proposed in the paper + `Kernel Density Estimation for Multiclass Quantification <https://arxiv.org/abs/2401.00490>`_, in which + the authors proposed a Monte Carlo approach for minimizing the divergence. + + The distribution matching optimization problem comes down to solving: + + :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} \\mathcal{D}(\\boldsymbol{p}_{\\alpha}||q_{\\widetilde{U}})` + + where :math:`p_{\\alpha}` is the mixture of class-specific KDEs with mixture parameter (hence class prevalence) + :math:`\\alpha` defined by + + :math:`\\boldsymbol{p}_{\\alpha}(\\widetilde{x}) = \\sum_{i=1}^n \\alpha_i p_{\\widetilde{L}_i}(\\widetilde{x})` + + where :math:`p_X(\\boldsymbol{x}) = \\frac{1}{|X|} \\sum_{x_i\\in X} K\\left(\\frac{x-x_i}{h}\\right)` is the + KDE function that uses the datapoints in X as the kernel centers. + + In KDEy-CS, the divergence is taken to be the Cauchy-Schwarz divergence given by: + + :math:`\\mathcal{D}_{\\mathrm{CS}}(p||q)=-\\log\\left(\\frac{\\int p(x)q(x)dx}{\\sqrt{\\int p(x)^2dx \\int q(x)^2dx}}\\right)` + + The authors showed that this distribution matching admits a closed-form solution + + :param classifier: a sklearn's Estimator that generates a binary classifier. + :param val_split: specifies the data used for generating classifier predictions. This specification + can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to + be extracted from the training set; or as an integer (default 5), indicating that the predictions + are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value + for `k`); or as a collection defining the specific set of data to use for validation. + Alternatively, this set can be specified at fit time by indicating the exact set of data + on which the predictions are to be generated. + :param bandwidth: float, the bandwidth of the Kernel + :param n_jobs: number of parallel workers + """ + + def __init__(self, classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None): + KDEBase._check_bandwidth(bandwidth) + self.classifier = classifier + self.val_split = val_split + self.bandwidth = bandwidth + self.n_jobs = n_jobs + +
+[docs] + def gram_matrix_mix_sum(self, X, Y=None): + # this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y)) + # to contain pairwise evaluations of N(x|mu,Sigma1+Sigma2) with mu=y and Sigma1 and Sigma2 are + # two "scalar matrices" (h^2)*I each, so Sigma1+Sigma2 has scalar 2(h^2) (h is the bandwidth) + h = self.bandwidth + variance = 2 * (h**2) + nD = X.shape[1] + gamma = 1/(2*variance) + norm_factor = 1/np.sqrt(((2*np.pi)**nD) * (variance**(nD))) + gram = norm_factor * rbf_kernel(X, Y, gamma=gamma) + return gram.sum()
+ + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + + P, y = classif_predictions.Xy + n = data.n_classes + + assert all(sorted(np.unique(y)) == np.arange(n)), \ + 'label name gaps not allowed in current implementation' + + # counts_inv keeps track of the relative weight of each datapoint within its class + # (i.e., the weight in its KDE model) + counts_inv = 1 / (data.counts()) + + # tr_tr_sums corresponds to symbol \overline{B} in the paper + tr_tr_sums = np.zeros(shape=(n,n), dtype=float) + for i in range(n): + for j in range(n): + if i > j: + tr_tr_sums[i,j] = tr_tr_sums[j,i] + else: + block = self.gram_matrix_mix_sum(P[y == i], P[y == j] if i!=j else None) + tr_tr_sums[i, j] = block + + # keep track of these data structures for the test phase + self.Ptr = P + self.ytr = y + self.tr_tr_sums = tr_tr_sums + self.counts_inv = counts_inv + + return self
+ + + +
+[docs] + def aggregate(self, posteriors: np.ndarray): + Ptr = self.Ptr + Pte = posteriors + y = self.ytr + tr_tr_sums = self.tr_tr_sums + + M, nD = Pte.shape + Minv = (1/M) # t in the paper + n = Ptr.shape[1] + + # becomes a constant that does not affect the optimization, no need to compute it + # partC = 0.5*np.log(self.gram_matrix_mix_sum(Pte) * Kinv * Kinv) + + # tr_te_sums corresponds to \overline{a}*(1/Li)*(1/M) in the paper (note the constants + # are already aggregated to tr_te_sums, so these multiplications are not carried out + # at each iteration of the optimization phase) + tr_te_sums = np.zeros(shape=n, dtype=float) + for i in range(n): + tr_te_sums[i] = self.gram_matrix_mix_sum(Ptr[y==i], Pte) + + def divergence(alpha): + # called \overline{r} in the paper + alpha_ratio = alpha * self.counts_inv + + # recal that tr_te_sums already accounts for the constant terms (1/Li)*(1/M) + partA = -np.log((alpha_ratio @ tr_te_sums) * Minv) + partB = 0.5 * np.log(alpha_ratio @ tr_tr_sums @ alpha_ratio) + return partA + partB #+ partC + + return F.optim_minimize(divergence, n)
+
+ + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/method/_neural.html b/docs/build/html/_modules/quapy/method/_neural.html new file mode 100644 index 0000000..a4f6a27 --- /dev/null +++ b/docs/build/html/_modules/quapy/method/_neural.html @@ -0,0 +1,549 @@ + + + + + + quapy.method._neural — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.method._neural

+import os
+from pathlib import Path
+import random
+
+import torch
+from torch.nn import MSELoss
+from torch.nn.functional import relu
+
+from quapy.protocol import UPP
+from quapy.method.aggregative import *
+from quapy.util import EarlyStop
+from tqdm import tqdm
+
+
+
+[docs] +class QuaNetTrainer(BaseQuantifier): + """ + Implementation of `QuaNet <https://dl.acm.org/doi/abs/10.1145/3269206.3269287>`_, a neural network for + quantification. This implementation uses `PyTorch <https://pytorch.org/>`_ and can take advantage of GPU + for speeding-up the training phase. + + Example: + + >>> import quapy as qp + >>> from quapy.method.meta import QuaNet + >>> from quapy.classification.neural import NeuralClassifierTrainer, CNNnet + >>> + >>> # use samples of 100 elements + >>> qp.environ['SAMPLE_SIZE'] = 100 + >>> + >>> # load the kindle dataset as text, and convert words to numerical indexes + >>> dataset = qp.datasets.fetch_reviews('kindle', pickle=True) + >>> qp.train.preprocessing.index(dataset, min_df=5, inplace=True) + >>> + >>> # the text classifier is a CNN trained by NeuralClassifierTrainer + >>> cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes) + >>> classifier = NeuralClassifierTrainer(cnn, device='cuda') + >>> + >>> # train QuaNet (QuaNet is an alias to QuaNetTrainer) + >>> model = QuaNet(classifier, qp.environ['SAMPLE_SIZE'], device='cuda') + >>> model.fit(dataset.training) + >>> estim_prevalence = model.quantify(dataset.test.instances) + + :param classifier: an object implementing `fit` (i.e., that can be trained on labelled data), + `predict_proba` (i.e., that can generate posterior probabilities of unlabelled examples) and + `transform` (i.e., that can generate embedded representations of the unlabelled instances). + :param sample_size: integer, the sample size; default is None, meaning that the sample size should be + taken from qp.environ["SAMPLE_SIZE"] + :param n_epochs: integer, maximum number of training epochs + :param tr_iter_per_poch: integer, number of training iterations before considering an epoch complete + :param va_iter_per_poch: integer, number of validation iterations to perform after each epoch + :param lr: float, the learning rate + :param lstm_hidden_size: integer, hidden dimensionality of the LSTM cells + :param lstm_nlayers: integer, number of LSTM layers + :param ff_layers: list of integers, dimensions of the densely-connected FF layers on top of the + quantification embedding + :param bidirectional: boolean, indicates whether the LSTM is bidirectional or not + :param qdrop_p: float, dropout probability + :param patience: integer, number of epochs showing no improvement in the validation set before stopping the + training phase (early stopping) + :param checkpointdir: string, a path where to store models' checkpoints + :param checkpointname: string (optional), the name of the model's checkpoint + :param device: string, indicate "cpu" or "cuda" + """ + + def __init__(self, + classifier, + sample_size=None, + n_epochs=100, + tr_iter_per_poch=500, + va_iter_per_poch=100, + lr=1e-3, + lstm_hidden_size=64, + lstm_nlayers=1, + ff_layers=[1024, 512], + bidirectional=True, + qdrop_p=0.5, + patience=10, + checkpointdir='../checkpoint', + checkpointname=None, + device='cuda'): + + assert hasattr(classifier, 'transform'), \ + f'the classifier {classifier.__class__.__name__} does not seem to be able to produce document embeddings ' \ + f'since it does not implement the method "transform"' + assert hasattr(classifier, 'predict_proba'), \ + f'the classifier {classifier.__class__.__name__} does not seem to be able to produce posterior probabilities ' \ + f'since it does not implement the method "predict_proba"' + self.classifier = classifier + self.sample_size = qp._get_sample_size(sample_size) + self.n_epochs = n_epochs + self.tr_iter = tr_iter_per_poch + self.va_iter = va_iter_per_poch + self.lr = lr + self.quanet_params = { + 'lstm_hidden_size': lstm_hidden_size, + 'lstm_nlayers': lstm_nlayers, + 'ff_layers': ff_layers, + 'bidirectional': bidirectional, + 'qdrop_p': qdrop_p + } + + self.patience = patience + if checkpointname is None: + local_random = random.Random() + random_code = '-'.join(str(local_random.randint(0, 1000000)) for _ in range(5)) + checkpointname = 'QuaNet-'+random_code + self.checkpointdir = checkpointdir + self.checkpoint = os.path.join(checkpointdir, checkpointname) + self.device = torch.device(device) + + self.__check_params_colision(self.quanet_params, self.classifier.get_params()) + self._classes_ = None + +
+[docs] + def fit(self, data: LabelledCollection, fit_classifier=True): + """ + Trains QuaNet. + + :param data: the training data on which to train QuaNet. If `fit_classifier=True`, the data will be split in + 40/40/20 for training the classifier, training QuaNet, and validating QuaNet, respectively. If + `fit_classifier=False`, the data will be split in 66/34 for training QuaNet and validating it, respectively. + :param fit_classifier: if True, trains the classifier on a split containing 40% of the data + :return: self + """ + self._classes_ = data.classes_ + os.makedirs(self.checkpointdir, exist_ok=True) + + if fit_classifier: + classifier_data, unused_data = data.split_stratified(0.4) + train_data, valid_data = unused_data.split_stratified(0.66) # 0.66 split of 60% makes 40% and 20% + self.classifier.fit(*classifier_data.Xy) + else: + classifier_data = None + train_data, valid_data = data.split_stratified(0.66) + + # estimate the hard and soft stats tpr and fpr of the classifier + self.tr_prev = data.prevalence() + + # compute the posterior probabilities of the instances + valid_posteriors = self.classifier.predict_proba(valid_data.instances) + train_posteriors = self.classifier.predict_proba(train_data.instances) + + # turn instances' original representations into embeddings + valid_data_embed = LabelledCollection(self.classifier.transform(valid_data.instances), valid_data.labels, self._classes_) + train_data_embed = LabelledCollection(self.classifier.transform(train_data.instances), train_data.labels, self._classes_) + + self.quantifiers = { + 'cc': CC(self.classifier).fit(None, fit_classifier=False), + 'acc': ACC(self.classifier).fit(None, fit_classifier=False, val_split=valid_data), + 'pcc': PCC(self.classifier).fit(None, fit_classifier=False), + 'pacc': PACC(self.classifier).fit(None, fit_classifier=False, val_split=valid_data), + } + if classifier_data is not None: + self.quantifiers['emq'] = EMQ(self.classifier).fit(classifier_data, fit_classifier=False) + + self.status = { + 'tr-loss': -1, + 'va-loss': -1, + 'tr-mae': -1, + 'va-mae': -1, + } + + nQ = len(self.quantifiers) + nC = data.n_classes + self.quanet = QuaNetModule( + doc_embedding_size=train_data_embed.instances.shape[1], + n_classes=data.n_classes, + stats_size=nQ*nC, + order_by=0 if data.binary else None, + **self.quanet_params + ).to(self.device) + print(self.quanet) + + self.optim = torch.optim.Adam(self.quanet.parameters(), lr=self.lr) + early_stop = EarlyStop(self.patience, lower_is_better=True) + + checkpoint = self.checkpoint + + for epoch_i in range(1, self.n_epochs): + self._epoch(train_data_embed, train_posteriors, self.tr_iter, epoch_i, early_stop, train=True) + self._epoch(valid_data_embed, valid_posteriors, self.va_iter, epoch_i, early_stop, train=False) + + early_stop(self.status['va-loss'], epoch_i) + if early_stop.IMPROVED: + torch.save(self.quanet.state_dict(), checkpoint) + elif early_stop.STOP: + print(f'training ended by patience exhausted; loading best model parameters in {checkpoint} ' + f'for epoch {early_stop.best_epoch}') + self.quanet.load_state_dict(torch.load(checkpoint)) + break + + return self
+ + + def _get_aggregative_estims(self, posteriors): + label_predictions = np.argmax(posteriors, axis=-1) + prevs_estim = [] + for quantifier in self.quantifiers.values(): + predictions = posteriors if isinstance(quantifier, AggregativeSoftQuantifier) else label_predictions + prevs_estim.extend(quantifier.aggregate(predictions)) + + # there is no real need for adding static estims like the TPR or FPR from training since those are constant + + return prevs_estim + +
+[docs] + def quantify(self, instances): + posteriors = self.classifier.predict_proba(instances) + embeddings = self.classifier.transform(instances) + quant_estims = self._get_aggregative_estims(posteriors) + self.quanet.eval() + with torch.no_grad(): + prevalence = self.quanet.forward(embeddings, posteriors, quant_estims) + if self.device == torch.device('cuda'): + prevalence = prevalence.cpu() + prevalence = prevalence.numpy().flatten() + return prevalence
+ + + def _epoch(self, data: LabelledCollection, posteriors, iterations, epoch, early_stop, train): + mse_loss = MSELoss() + + self.quanet.train(mode=train) + losses = [] + mae_errors = [] + sampler = UPP( + data, + sample_size=self.sample_size, + repeats=iterations, + random_state=None if train else 0 # different samples during train, same samples during validation + ) + pbar = tqdm(sampler.samples_parameters(), total=sampler.total()) + for it, index in enumerate(pbar): + sample_data = data.sampling_from_index(index) + sample_posteriors = posteriors[index] + quant_estims = self._get_aggregative_estims(sample_posteriors) + ptrue = torch.as_tensor([sample_data.prevalence()], dtype=torch.float, device=self.device) + if train: + self.optim.zero_grad() + phat = self.quanet.forward(sample_data.instances, sample_posteriors, quant_estims) + loss = mse_loss(phat, ptrue) + mae = mae_loss(phat, ptrue) + loss.backward() + self.optim.step() + else: + with torch.no_grad(): + phat = self.quanet.forward(sample_data.instances, sample_posteriors, quant_estims) + loss = mse_loss(phat, ptrue) + mae = mae_loss(phat, ptrue) + + losses.append(loss.item()) + mae_errors.append(mae.item()) + + mse = np.mean(losses) + mae = np.mean(mae_errors) + if train: + self.status['tr-loss'] = mse + self.status['tr-mae'] = mae + else: + self.status['va-loss'] = mse + self.status['va-mae'] = mae + + if train: + pbar.set_description(f'[QuaNet] ' + f'epoch={epoch} [it={it}/{iterations}]\t' + f'tr-mseloss={self.status["tr-loss"]:.5f} tr-maeloss={self.status["tr-mae"]:.5f}\t' + f'val-mseloss={self.status["va-loss"]:.5f} val-maeloss={self.status["va-mae"]:.5f} ' + f'patience={early_stop.patience}/{early_stop.PATIENCE_LIMIT}') + +
+[docs] + def get_params(self, deep=True): + classifier_params = self.classifier.get_params() + classifier_params = {'classifier__'+k:v for k,v in classifier_params.items()} + return {**classifier_params, **self.quanet_params}
+ + +
+[docs] + def set_params(self, **parameters): + learner_params = {} + for key, val in parameters.items(): + if key in self.quanet_params: + self.quanet_params[key] = val + elif key.startswith('classifier__'): + learner_params[key.replace('classifier__', '')] = val + else: + raise ValueError('unknown parameter ', key) + self.classifier.set_params(**learner_params)
+ + + def __check_params_colision(self, quanet_params, learner_params): + quanet_keys = set(quanet_params.keys()) + learner_keys = set(learner_params.keys()) + intersection = quanet_keys.intersection(learner_keys) + if len(intersection) > 0: + raise ValueError(f'the use of parameters {intersection} is ambiguous sine those can refer to ' + f'the parameters of QuaNet or the learner {self.classifier.__class__.__name__}') + +
+[docs] + def clean_checkpoint(self): + """ + Removes the checkpoint + """ + os.remove(self.checkpoint)
+ + +
+[docs] + def clean_checkpoint_dir(self): + """ + Removes anything contained in the checkpoint directory + """ + import shutil + shutil.rmtree(self.checkpointdir, ignore_errors=True)
+ + + @property + def classes_(self): + return self._classes_
+ + + +
+[docs] +def mae_loss(output, target): + """ + Torch-like wrapper for the Mean Absolute Error + + :param output: predictions + :param target: ground truth values + :return: mean absolute error loss + """ + return torch.mean(torch.abs(output - target))
+ + + +
+[docs] +class QuaNetModule(torch.nn.Module): + """ + Implements the `QuaNet <https://dl.acm.org/doi/abs/10.1145/3269206.3269287>`_ forward pass. + See :class:`QuaNetTrainer` for training QuaNet. + + :param doc_embedding_size: integer, the dimensionality of the document embeddings + :param n_classes: integer, number of classes + :param stats_size: integer, number of statistics estimated by simple quantification methods + :param lstm_hidden_size: integer, hidden dimensionality of the LSTM cell + :param lstm_nlayers: integer, number of LSTM layers + :param ff_layers: list of integers, dimensions of the densely-connected FF layers on top of the + quantification embedding + :param bidirectional: boolean, whether or not to use bidirectional LSTM + :param qdrop_p: float, dropout probability + :param order_by: integer, class for which the document embeddings are to be sorted + """ + + def __init__(self, + doc_embedding_size, + n_classes, + stats_size, + lstm_hidden_size=64, + lstm_nlayers=1, + ff_layers=[1024, 512], + bidirectional=True, + qdrop_p=0.5, + order_by=0): + + super().__init__() + + self.n_classes = n_classes + self.order_by = order_by + self.hidden_size = lstm_hidden_size + self.nlayers = lstm_nlayers + self.bidirectional = bidirectional + self.ndirections = 2 if self.bidirectional else 1 + self.qdrop_p = qdrop_p + self.lstm = torch.nn.LSTM(doc_embedding_size + n_classes, # +n_classes stands for the posterior probs. (concatenated) + lstm_hidden_size, lstm_nlayers, bidirectional=bidirectional, + dropout=qdrop_p, batch_first=True) + self.dropout = torch.nn.Dropout(self.qdrop_p) + + lstm_output_size = self.hidden_size * self.ndirections + ff_input_size = lstm_output_size + stats_size + prev_size = ff_input_size + self.ff_layers = torch.nn.ModuleList() + for lin_size in ff_layers: + self.ff_layers.append(torch.nn.Linear(prev_size, lin_size)) + prev_size = lin_size + self.output = torch.nn.Linear(prev_size, n_classes) + + @property + def device(self): + return torch.device('cuda') if next(self.parameters()).is_cuda else torch.device('cpu') + + def _init_hidden(self): + directions = 2 if self.bidirectional else 1 + var_hidden = torch.zeros(self.nlayers * directions, 1, self.hidden_size) + var_cell = torch.zeros(self.nlayers * directions, 1, self.hidden_size) + if next(self.lstm.parameters()).is_cuda: + var_hidden, var_cell = var_hidden.cuda(), var_cell.cuda() + return var_hidden, var_cell + +
+[docs] + def forward(self, doc_embeddings, doc_posteriors, statistics): + device = self.device + doc_embeddings = torch.as_tensor(doc_embeddings, dtype=torch.float, device=device) + doc_posteriors = torch.as_tensor(doc_posteriors, dtype=torch.float, device=device) + statistics = torch.as_tensor(statistics, dtype=torch.float, device=device) + + if self.order_by is not None: + order = torch.argsort(doc_posteriors[:, self.order_by]) + doc_embeddings = doc_embeddings[order] + doc_posteriors = doc_posteriors[order] + + embeded_posteriors = torch.cat((doc_embeddings, doc_posteriors), dim=-1) + + # the entire set represents only one instance in quapy contexts, and so the batch_size=1 + # the shape should be (1, number-of-instances, embedding-size + n_classes) + embeded_posteriors = embeded_posteriors.unsqueeze(0) + + self.lstm.flatten_parameters() + _, (rnn_hidden,_) = self.lstm(embeded_posteriors, self._init_hidden()) + rnn_hidden = rnn_hidden.view(self.nlayers, self.ndirections, 1, self.hidden_size) + quant_embedding = rnn_hidden[0].view(-1) + quant_embedding = torch.cat((quant_embedding, statistics)) + + abstracted = quant_embedding.unsqueeze(0) + for linear in self.ff_layers: + abstracted = self.dropout(relu(linear(abstracted))) + + logits = self.output(abstracted).view(1, -1) + prevalence = torch.softmax(logits, -1) + + return prevalence
+
+ + + + + + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/method/_threshold_optim.html b/docs/build/html/_modules/quapy/method/_threshold_optim.html new file mode 100644 index 0000000..0aa215b --- /dev/null +++ b/docs/build/html/_modules/quapy/method/_threshold_optim.html @@ -0,0 +1,417 @@ + + + + + + quapy.method._threshold_optim — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for quapy.method._threshold_optim

+from abc import abstractmethod
+
+import numpy as np
+from sklearn.base import BaseEstimator
+import quapy as qp
+import quapy.functional as F
+from quapy.data import LabelledCollection
+from quapy.method.aggregative import BinaryAggregativeQuantifier
+
+
+
+[docs] +class ThresholdOptimization(BinaryAggregativeQuantifier): + """ + Abstract class of Threshold Optimization variants for :class:`ACC` as proposed by + `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and + `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_. + The goal is to bring improved stability to the denominator of the adjustment. + The different variants are based on different heuristics for choosing a decision threshold + that would allow for more true positives and many more false positives, on the grounds this + would deliver larger denominators. + + :param classifier: a sklearn's Estimator that generates a classifier + :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the + misclassification rates are to be estimated. + This parameter can be indicated as a real value (between 0 and 1), representing a proportion of + validation data, or as an integer, indicating that the misclassification rates should be estimated via + `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a + :class:`quapy.data.base.LabelledCollection` (the split itself). + """ + + def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None): + self.classifier = classifier + self.val_split = val_split + self.n_jobs = qp._get_njobs(n_jobs) + +
+[docs] + @abstractmethod + def condition(self, tpr, fpr) -> float: + """ + Implements the criterion according to which the threshold should be selected. + This function should return the (float) score to be minimized. + + :param tpr: float, true positive rate + :param fpr: float, false positive rate + :return: float, a score for the given `tpr` and `fpr` + """ + ...
+ + +
+[docs] + def discard(self, tpr, fpr) -> bool: + """ + Indicates whether a combination of tpr and fpr should be discarded + + :param tpr: float, true positive rate + :param fpr: float, false positive rate + :return: true if the combination is to be discarded, false otherwise + """ + return (tpr - fpr) == 0
+ + + + def _eval_candidate_thresholds(self, decision_scores, y): + """ + Seeks for the best `tpr` and `fpr` according to the score obtained at different + decision thresholds. The scoring function is implemented in function `_condition`. + + :param decision_scores: array-like with the classification scores + :param y: predicted labels for the validation set (or for the training set via `k`-fold cross validation) + :return: best `tpr` and `fpr` and `threshold` according to `_condition` + """ + candidate_thresholds = np.unique(decision_scores) + + candidates = [] + scores = [] + for candidate_threshold in candidate_thresholds: + y_ = self.classes_[1 * (decision_scores >= candidate_threshold)] + TP, FP, FN, TN = self._compute_table(y, y_) + tpr = self._compute_tpr(TP, FN) + fpr = self._compute_fpr(FP, TN) + if not self.discard(tpr, fpr): + candidate_score = self.condition(tpr, fpr) + candidates.append([tpr, fpr, candidate_threshold]) + scores.append(candidate_score) + + if len(candidates) == 0: + # if no candidate gives rise to a valid combination of tpr and fpr, this method defaults to the standard + # classify & count; this is akin to assign tpr=1, fpr=0, threshold=0 + tpr, fpr, threshold = 1, 0, 0 + candidates.append([tpr, fpr, threshold]) + scores.append(0) + + candidates = np.asarray(candidates) + candidates = candidates[np.argsort(scores)] # sort candidates by candidate_score + + return candidates + +
+[docs] + def aggregate_with_threshold(self, classif_predictions, tprs, fprs, thresholds): + # This function performs the adjusted count for given tpr, fpr, and threshold. + # Note that, due to broadcasting, tprs, fprs, and thresholds could be arrays of length > 1 + prevs_estims = np.mean(classif_predictions[:, None] >= thresholds, axis=0) + prevs_estims = (prevs_estims - fprs) / (tprs - fprs) + prevs_estims = F.as_binary_prevalence(prevs_estims, clip_if_necessary=True) + return prevs_estims.squeeze()
+ + + def _compute_table(self, y, y_): + TP = np.logical_and(y == y_, y == self.pos_label).sum() + FP = np.logical_and(y != y_, y == self.neg_label).sum() + FN = np.logical_and(y != y_, y == self.pos_label).sum() + TN = np.logical_and(y == y_, y == self.neg_label).sum() + return TP, FP, FN, TN + + def _compute_tpr(self, TP, FP): + if TP + FP == 0: + return 1 + return TP / (TP + FP) + + def _compute_fpr(self, FP, TN): + if FP + TN == 0: + return 0 + return FP / (FP + TN) + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + decision_scores, y = classif_predictions.Xy + # the standard behavior is to keep the best threshold only + self.tpr, self.fpr, self.threshold = self._eval_candidate_thresholds(decision_scores, y)[0] + return self
+ + +
+[docs] + def aggregate(self, classif_predictions: np.ndarray): + # the standard behavior is to compute the adjusted count using the best threshold found + return self.aggregate_with_threshold(classif_predictions, self.tpr, self.fpr, self.threshold)
+
+ + + +
+[docs] +class T50(ThresholdOptimization): + """ + Threshold Optimization variant for :class:`ACC` as proposed by + `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and + `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks + for the threshold that makes `tpr` closest to 0.5. + The goal is to bring improved stability to the denominator of the adjustment. + + :param classifier: a sklearn's Estimator that generates a classifier + :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the + misclassification rates are to be estimated. + This parameter can be indicated as a real value (between 0 and 1), representing a proportion of + validation data, or as an integer, indicating that the misclassification rates should be estimated via + `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a + :class:`quapy.data.base.LabelledCollection` (the split itself). + """ + + def __init__(self, classifier: BaseEstimator, val_split=5): + super().__init__(classifier, val_split) + +
+[docs] + def condition(self, tpr, fpr) -> float: + return abs(tpr - 0.5)
+
+ + + +
+[docs] +class MAX(ThresholdOptimization): + """ + Threshold Optimization variant for :class:`ACC` as proposed by + `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and + `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks + for the threshold that maximizes `tpr-fpr`. + The goal is to bring improved stability to the denominator of the adjustment. + + :param classifier: a sklearn's Estimator that generates a classifier + :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the + misclassification rates are to be estimated. + This parameter can be indicated as a real value (between 0 and 1), representing a proportion of + validation data, or as an integer, indicating that the misclassification rates should be estimated via + `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a + :class:`quapy.data.base.LabelledCollection` (the split itself). + """ + + def __init__(self, classifier: BaseEstimator, val_split=5): + super().__init__(classifier, val_split) + +
+[docs] + def condition(self, tpr, fpr) -> float: + # MAX strives to maximize (tpr - fpr), which is equivalent to minimize (fpr - tpr) + return (fpr - tpr)
+
+ + + +
+[docs] +class X(ThresholdOptimization): + """ + Threshold Optimization variant for :class:`ACC` as proposed by + `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and + `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks + for the threshold that yields `tpr=1-fpr`. + The goal is to bring improved stability to the denominator of the adjustment. + + :param classifier: a sklearn's Estimator that generates a classifier + :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the + misclassification rates are to be estimated. + This parameter can be indicated as a real value (between 0 and 1), representing a proportion of + validation data, or as an integer, indicating that the misclassification rates should be estimated via + `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a + :class:`quapy.data.base.LabelledCollection` (the split itself). + """ + + def __init__(self, classifier: BaseEstimator, val_split=5): + super().__init__(classifier, val_split) + +
+[docs] + def condition(self, tpr, fpr) -> float: + return abs(1 - (tpr + fpr))
+
+ + + +
+[docs] +class MS(ThresholdOptimization): + """ + Median Sweep. Threshold Optimization variant for :class:`ACC` as proposed by + `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and + `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that generates + class prevalence estimates for all decision thresholds and returns the median of them all. + The goal is to bring improved stability to the denominator of the adjustment. + + :param classifier: a sklearn's Estimator that generates a classifier + :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the + misclassification rates are to be estimated. + This parameter can be indicated as a real value (between 0 and 1), representing a proportion of + validation data, or as an integer, indicating that the misclassification rates should be estimated via + `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a + :class:`quapy.data.base.LabelledCollection` (the split itself). + """ + def __init__(self, classifier: BaseEstimator, val_split=5): + super().__init__(classifier, val_split) + +
+[docs] + def condition(self, tpr, fpr) -> float: + return 1
+ + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + decision_scores, y = classif_predictions.Xy + # keeps all candidates + tprs_fprs_thresholds = self._eval_candidate_thresholds(decision_scores, y) + self.tprs = tprs_fprs_thresholds[:, 0] + self.fprs = tprs_fprs_thresholds[:, 1] + self.thresholds = tprs_fprs_thresholds[:, 2] + return self
+ + +
+[docs] + def aggregate(self, classif_predictions: np.ndarray): + prevalences = self.aggregate_with_threshold(classif_predictions, self.tprs, self.fprs, self.thresholds) + if prevalences.ndim==2: + prevalences = np.median(prevalences, axis=0) + return prevalences
+
+ + + +
+[docs] +class MS2(MS): + """ + Median Sweep 2. Threshold Optimization variant for :class:`ACC` as proposed by + `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and + `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that generates + class prevalence estimates for all decision thresholds and returns the median of for cases in + which `tpr-fpr>0.25` + The goal is to bring improved stability to the denominator of the adjustment. + + :param classifier: a sklearn's Estimator that generates a classifier + :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the + misclassification rates are to be estimated. + This parameter can be indicated as a real value (between 0 and 1), representing a proportion of + validation data, or as an integer, indicating that the misclassification rates should be estimated via + `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a + :class:`quapy.data.base.LabelledCollection` (the split itself). + """ + def __init__(self, classifier: BaseEstimator, val_split=5): + super().__init__(classifier, val_split) + +
+[docs] + def discard(self, tpr, fpr) -> bool: + return (tpr-fpr) <= 0.25
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/method/aggregative.html b/docs/build/html/_modules/quapy/method/aggregative.html new file mode 100644 index 0000000..f34498e --- /dev/null +++ b/docs/build/html/_modules/quapy/method/aggregative.html @@ -0,0 +1,1613 @@ + + + + + + quapy.method.aggregative — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.method.aggregative

+from abc import ABC, abstractmethod
+from copy import deepcopy
+from typing import Callable, Union
+import numpy as np
+from abstention.calibration import NoBiasVectorScaling, TempScaling, VectorScaling
+from scipy import optimize
+from sklearn.base import BaseEstimator
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.metrics import confusion_matrix
+from sklearn.model_selection import cross_val_predict
+
+import quapy as qp
+import quapy.functional as F
+from quapy.functional import get_divergence
+from quapy.classification.calibration import NBVSCalibration, BCTSCalibration, TSCalibration, VSCalibration
+from quapy.classification.svmperf import SVMperf
+from quapy.data import LabelledCollection
+from quapy.method.base import BaseQuantifier, BinaryQuantifier, OneVsAllGeneric
+
+
+# Abstract classes
+# ------------------------------------
+
+
+[docs] +class AggregativeQuantifier(BaseQuantifier, ABC): + """ + Abstract class for quantification methods that base their estimations on the aggregation of classification + results. Aggregative quantifiers implement a pipeline that consists of generating classification predictions + and aggregating them. For this reason, the training phase is implemented by :meth:`classification_fit` followed + by :meth:`aggregation_fit`, while the testing phase is implemented by :meth:`classify` followed by + :meth:`aggregate`. Subclasses of this abstract class must provide implementations for these methods. + Aggregative quantifiers also maintain a :attr:`classifier` attribute. + + The method :meth:`fit` comes with a default implementation based on :meth:`classification_fit` + and :meth:`aggregation_fit`. + + The method :meth:`quantify` comes with a default implementation based on :meth:`classify` + and :meth:`aggregate`. + """ + + val_split_ = None + + @property + def val_split(self): + return self.val_split_ + + @val_split.setter + def val_split(self, val_split): + if isinstance(val_split, LabelledCollection): + print('warning: setting val_split with a LabelledCollection will be inefficient in' + 'model selection. Rather pass the LabelledCollection at fit time') + self.val_split_ = val_split + + def _check_init_parameters(self): + """ + Implements any check to be performed in the parameters of the init method before undertaking + the training of the quantifier. This is made as to allow for a quick execution stop when the + parameters are not valid. + + :return: Nothing. May raise an exception. + """ + pass + + def _check_non_empty_classes(self, data: LabelledCollection): + """ + Asserts all classes have positive instances. + + :param data: LabelledCollection + :return: Nothing. May raise an exception. + """ + sample_prevs = data.prevalence() + empty_classes = np.argwhere(sample_prevs==0).flatten() + if len(empty_classes)>0: + empty_class_names = data.classes_[empty_classes] + raise ValueError(f'classes {empty_class_names} have no training examples') + +
+[docs] + def fit(self, data: LabelledCollection, fit_classifier=True, val_split=None): + """ + Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function. + + :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data + :param fit_classifier: whether to train the learner (default is True). Set to False if the + learner has been trained outside the quantifier. + :return: self + """ + self._check_init_parameters() + classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on=val_split) + self.aggregation_fit(classif_predictions, data) + return self
+ + +
+[docs] + def classifier_fit_predict(self, data: LabelledCollection, fit_classifier=True, predict_on=None): + """ + Trains the classifier if requested (`fit_classifier=True`) and generate the necessary predictions to + train the aggregation function. + + :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data + :param fit_classifier: whether to train the learner (default is True). Set to False if the + learner has been trained outside the quantifier. + :param predict_on: specifies the set on which predictions need to be issued. This parameter can + be specified as None (default) to indicate no prediction is needed; a float in (0, 1) to + indicate the proportion of instances to be used for predictions (the remainder is used for + training); an integer >1 to indicate that the predictions must be generated via k-fold + cross-validation, using this integer as k; or the data sample itself on which to generate + the predictions. + """ + assert isinstance(fit_classifier, bool), 'unexpected type for "fit_classifier", must be boolean' + + self._check_classifier(adapt_if_necessary=(self._classifier_method() == 'predict_proba')) + + if fit_classifier: + self._check_non_empty_classes(data) + + if predict_on is None: + predict_on = self.val_split + + if predict_on is None: + if fit_classifier: + self.classifier.fit(*data.Xy) + predictions = None + elif isinstance(predict_on, float): + if fit_classifier: + if not (0. < predict_on < 1.): + raise ValueError(f'proportion {predict_on=} out of range, must be in (0,1)') + train, val = data.split_stratified(train_prop=(1 - predict_on)) + self.classifier.fit(*train.Xy) + predictions = LabelledCollection(self.classify(val.X), val.y, classes=data.classes_) + else: + raise ValueError(f'wrong type for predict_on: since fit_classifier=False, ' + f'the set on which predictions have to be issued must be ' + f'explicitly indicated') + + elif isinstance(predict_on, LabelledCollection): + if fit_classifier: + self.classifier.fit(*data.Xy) + predictions = LabelledCollection(self.classify(predict_on.X), predict_on.y, classes=predict_on.classes_) + + elif isinstance(predict_on, int): + if fit_classifier: + if predict_on <= 1: + raise ValueError(f'invalid value {predict_on} in fit. ' + f'Specify a integer >1 for kFCV estimation.') + else: + n_jobs = self.n_jobs if hasattr(self, 'n_jobs') else qp._get_njobs(None) + predictions = cross_val_predict( + self.classifier, *data.Xy, cv=predict_on, n_jobs=n_jobs, method=self._classifier_method()) + predictions = LabelledCollection(predictions, data.y, classes=data.classes_) + self.classifier.fit(*data.Xy) + else: + raise ValueError(f'wrong type for predict_on: since fit_classifier=False, ' + f'the set on which predictions have to be issued must be ' + f'explicitly indicated') + + else: + raise ValueError( + f'error: param "predict_on" ({type(predict_on)}) not understood; ' + f'use either a float indicating the split proportion, or a ' + f'tuple (X,y) indicating the validation partition') + + return predictions
+ + +
+[docs] + @abstractmethod + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + """ + Trains the aggregation function. + + :param classif_predictions: a LabelledCollection containing the label predictions issued + by the classifier + :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data + """ + ...
+ + + @property + def classifier(self): + """ + Gives access to the classifier + + :return: the classifier (typically an sklearn's Estimator) + """ + return self.classifier_ + + @classifier.setter + def classifier(self, classifier): + """ + Setter for the classifier + + :param classifier: the classifier + """ + self.classifier_ = classifier + +
+[docs] + def classify(self, instances): + """ + Provides the label predictions for the given instances. The predictions should respect the format expected by + :meth:`aggregate`, e.g., posterior probabilities for probabilistic quantifiers, or crisp predictions for + non-probabilistic quantifiers. The default one is "decision_function". + + :param instances: array-like of shape `(n_instances, n_features,)` + :return: np.ndarray of shape `(n_instances,)` with label predictions + """ + return getattr(self.classifier, self._classifier_method())(instances)
+ + + def _classifier_method(self): + """ + Name of the method that must be used for issuing label predictions. The default one is "decision_function". + + :return: string + """ + return 'decision_function' + + def _check_classifier(self, adapt_if_necessary=False): + """ + Guarantees that the underlying classifier implements the method required for issuing predictions, i.e., + the method indicated by the :meth:`_classifier_method` + + :param adapt_if_necessary: if True, the method will try to comply with the required specifications + """ + assert hasattr(self.classifier, self._classifier_method()), \ + f"the method does not implement the required {self._classifier_method()} method" + +
+[docs] + def quantify(self, instances): + """ + Generate class prevalence estimates for the sample's instances by aggregating the label predictions generated + by the classifier. + + :param instances: array-like + :return: `np.ndarray` of shape `(n_classes)` with class prevalence estimates. + """ + classif_predictions = self.classify(instances) + return self.aggregate(classif_predictions)
+ + +
+[docs] + @abstractmethod + def aggregate(self, classif_predictions: np.ndarray): + """ + Implements the aggregation of label predictions. + + :param classif_predictions: `np.ndarray` of label predictions + :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates. + """ + ...
+ + + @property + def classes_(self): + """ + Class labels, in the same order in which class prevalence values are to be computed. + This default implementation actually returns the class labels of the learner. + + :return: array-like + """ + return self.classifier.classes_
+ + + +
+[docs] +class AggregativeCrispQuantifier(AggregativeQuantifier, ABC): + """ + Abstract class for quantification methods that base their estimations on the aggregation of crips decisions + as returned by a hard classifier. Aggregative crisp quantifiers thus extend Aggregative + Quantifiers by implementing specifications about crisp predictions. + """ + + def _classifier_method(self): + """ + Name of the method that must be used for issuing label predictions. For crisp quantifiers, the method + is 'predict', that returns an array of shape `(n_instances,)` of label predictions. + + :return: the string "predict", i.e., the standard method name for scikit-learn hard predictions + """ + return 'predict'
+ + + +
+[docs] +class AggregativeSoftQuantifier(AggregativeQuantifier, ABC): + """ + Abstract class for quantification methods that base their estimations on the aggregation of posterior + probabilities as returned by a probabilistic classifier. + Aggregative soft quantifiers thus extend Aggregative Quantifiers by implementing specifications + about soft predictions. + """ + + def _classifier_method(self): + """ + Name of the method that must be used for issuing label predictions. For probabilistic quantifiers, the method + is 'predict_proba', that returns an array of shape `(n_instances, n_dimensions,)` with posterior + probabilities. + + :return: the string "predict_proba", i.e., the standard method name for scikit-learn soft predictions + """ + return 'predict_proba' + + def _check_classifier(self, adapt_if_necessary=False): + """ + Guarantees that the underlying classifier implements the method indicated by the :meth:`_classifier_method`. + In case it does not, the classifier is calibrated (by means of the Platt's calibration method implemented by + scikit-learn in CalibratedClassifierCV, with cv=5). This calibration is only allowed if `adapt_if_necessary` + is set to True. If otherwise (i.e., the classifier is not probabilistic, and `adapt_if_necessary` is set + to False), an exception will be raised. + + :param adapt_if_necessary: a hard classifier is turned into a soft classifier if `adapt_if_necessary==True` + """ + if not hasattr(self.classifier, self._classifier_method()): + if adapt_if_necessary: + print(f'warning: The learner {self.classifier.__class__.__name__} does not seem to be ' + f'probabilistic. The learner will be calibrated (using CalibratedClassifierCV).') + self.classifier = CalibratedClassifierCV(self.classifier, cv=5) + else: + raise AssertionError(f'error: The learner {self.classifier.__class__.__name__} does not ' + f'seem to be probabilistic. The learner cannot be calibrated since ' + f'fit_classifier is set to False')
+ + + +
+[docs] +class BinaryAggregativeQuantifier(AggregativeQuantifier, BinaryQuantifier): + + @property + def pos_label(self): + return self.classifier.classes_[1] + + @property + def neg_label(self): + return self.classifier.classes_[0] + +
+[docs] + def fit(self, data: LabelledCollection, fit_classifier=True, val_split=None): + self._check_binary(data, self.__class__.__name__) + return super().fit(data, fit_classifier, val_split)
+
+ + + +# Methods +# ------------------------------------ +
+[docs] +class CC(AggregativeCrispQuantifier): + """ + The most basic Quantification method. One that simply classifies all instances and counts how many have been + attributed to each of the classes in order to compute class prevalence estimates. + + :param classifier: a sklearn's Estimator that generates a classifier + """ + + def __init__(self, classifier: BaseEstimator): + self.classifier = classifier + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + """ + Nothing to do here! + + :param classif_predictions: this is actually None + """ + pass
+ + +
+[docs] + def aggregate(self, classif_predictions: np.ndarray): + """ + Computes class prevalence estimates by counting the prevalence of each of the predicted labels. + + :param classif_predictions: array-like with label predictions + :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates. + """ + return F.prevalence_from_labels(classif_predictions, self.classes_)
+
+ + + +
+[docs] +class ACC(AggregativeCrispQuantifier): + """ + `Adjusted Classify & Count <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_, + the "adjusted" variant of :class:`CC`, that corrects the predictions of CC + according to the `misclassification rates`. + + :param classifier: a sklearn's Estimator that generates a classifier + :param val_split: specifies the data used for generating classifier predictions. This specification + can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to + be extracted from the training set; or as an integer (default 5), indicating that the predictions + are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value + for `k`); or as a collection defining the specific set of data to use for validation. + Alternatively, this set can be specified at fit time by indicating the exact set of data + on which the predictions are to be generated. + :param n_jobs: number of parallel workers + :param solver: indicates the method to be used for obtaining the final estimates. The choice + 'exact' comes down to solving the system of linear equations :math:`Ax=B` where `A` is a + matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in + binary) and `B` is the vector of prevalence values estimated via CC, as :math:`x=A^{-1}B`. This solution + might not exist for degenerated classifiers, in which case the method defaults to classify and count + (i.e., does not attempt any adjustment). + Another option is to search for the prevalence vector that minimizes the L2 norm of :math:`|Ax-B|`. The latter + is achieved by indicating solver='minimize'. This one generally works better, and is the default parameter. + More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and + Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications + (LQ 2022), ECML/PKDD 2022, Grenoble (France) <https://lq-2022.github.io/proceedings/CompleteVolume.pdf>`_. + """ + + def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='minimize'): + self.classifier = classifier + self.val_split = val_split + self.n_jobs = qp._get_njobs(n_jobs) + self.solver = solver + + def _check_init_parameters(self): + assert self.solver in ['exact', 'minimize'], "unknown solver; valid ones are 'exact', 'minimize'" + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + """ + Estimates the misclassification rates. + + :param classif_predictions: classifier predictions with true labels + """ + pred_labels, true_labels = classif_predictions.Xy + self.cc = CC(self.classifier) + self.Pte_cond_estim_ = self.getPteCondEstim(self.classifier.classes_, true_labels, pred_labels)
+ + +
+[docs] + @classmethod + def getPteCondEstim(cls, classes, y, y_): + # estimate the matrix with entry (i,j) being the estimate of P(hat_yi|yj), that is, the probability that a + # document that belongs to yj ends up being classified as belonging to yi + conf = confusion_matrix(y, y_, labels=classes).T + conf = conf.astype(float) + class_counts = conf.sum(axis=0) + for i, _ in enumerate(classes): + if class_counts[i] == 0: + conf[i, i] = 1 + else: + conf[:, i] /= class_counts[i] + return conf
+ + +
+[docs] + def aggregate(self, classif_predictions): + prevs_estim = self.cc.aggregate(classif_predictions) + return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim, solver=self.solver)
+ + +
+[docs] + @classmethod + def solve_adjustment(cls, PteCondEstim, prevs_estim, solver='exact'): + """ + Solves the system linear system :math:`Ax = B` with :math:`A` = `PteCondEstim` and :math:`B` = `prevs_estim` + + :param PteCondEstim: a `np.ndarray` of shape `(n_classes,n_classes,)` with entry `(i,j)` being the estimate + of :math:`P(y_i|y_j)`, that is, the probability that an instance that belongs to :math:`y_j` ends up being + classified as belonging to :math:`y_i` + :param prevs_estim: a `np.ndarray` of shape `(n_classes,)` with the class prevalence estimates + :param solver: indicates the method to use for solving the system of linear equations. Valid options are + 'exact' (tries to solve the system --may fail if the misclassificatin matrix has rank < n_classes) or + 'optim_minimize' (minimizes a norm --always exists). + :return: an adjusted `np.ndarray` of shape `(n_classes,)` with the corrected class prevalence estimates + """ + + A = PteCondEstim + B = prevs_estim + + if solver == 'exact': + # attempts an exact solution of the linear system (may fail) + + try: + adjusted_prevs = np.linalg.solve(A, B) + adjusted_prevs = np.clip(adjusted_prevs, 0, 1) + adjusted_prevs /= adjusted_prevs.sum() + except np.linalg.LinAlgError: + adjusted_prevs = prevs_estim # no way to adjust them! + + return adjusted_prevs + + elif solver == 'minimize': + # poses the problem as an optimization one, and tries to minimize the norm of the differences + + def loss(prev): + return np.linalg.norm(A @ prev - B) + + return F.optim_minimize(loss, n_classes=A.shape[0])
+
+ + + +
+[docs] +class PCC(AggregativeSoftQuantifier): + """ + `Probabilistic Classify & Count <https://ieeexplore.ieee.org/abstract/document/5694031>`_, + the probabilistic variant of CC that relies on the posterior probabilities returned by a probabilistic classifier. + + :param classifier: a sklearn's Estimator that generates a classifier + """ + + def __init__(self, classifier: BaseEstimator): + self.classifier = classifier + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + """ + Nothing to do here! + + :param classif_predictions: this is actually None + """ + pass
+ + +
+[docs] + def aggregate(self, classif_posteriors): + return F.prevalence_from_probabilities(classif_posteriors, binarize=False)
+
+ + + +
+[docs] +class PACC(AggregativeSoftQuantifier): + """ + `Probabilistic Adjusted Classify & Count <https://ieeexplore.ieee.org/abstract/document/5694031>`_, + the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier. + + :param classifier: a sklearn's Estimator that generates a classifier + :param val_split: specifies the data used for generating classifier predictions. This specification + can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to + be extracted from the training set; or as an integer (default 5), indicating that the predictions + are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value + for `k`). Alternatively, this set can be specified at fit time by indicating the exact set of data + on which the predictions are to be generated. + :param n_jobs: number of parallel workers + :param solver: indicates the method to be used for obtaining the final estimates. The choice + 'exact' comes down to solving the system of linear equations :math:`Ax=B` where `A` is a + matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in + binary) and `B` is the vector of prevalence values estimated via CC, as :math:`x=A^{-1}B`. This solution + might not exist for degenerated classifiers, in which case the method defaults to classify and count + (i.e., does not attempt any adjustment). + Another option is to search for the prevalence vector that minimizes the L2 norm of :math:`|Ax-B|`. The latter + is achieved by indicating solver='minimize'. This one generally works better, and is the default parameter. + More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and + Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications + (LQ 2022), ECML/PKDD 2022, Grenoble (France) <https://lq-2022.github.io/proceedings/CompleteVolume.pdf>`_. + + """ + + def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='minimize'): + self.classifier = classifier + self.val_split = val_split + self.n_jobs = qp._get_njobs(n_jobs) + self.solver = solver + + def _check_init_parameters(self): + assert self.solver in ['exact', 'minimize'], "unknown solver; valid ones are 'exact', 'minimize'" + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + """ + Estimates the misclassification rates + + :param classif_predictions: classifier soft predictions with true labels + """ + posteriors, true_labels = classif_predictions.Xy + self.pcc = PCC(self.classifier) + self.Pte_cond_estim_ = self.getPteCondEstim(self.classifier.classes_, true_labels, posteriors)
+ + +
+[docs] + def aggregate(self, classif_posteriors): + prevs_estim = self.pcc.aggregate(classif_posteriors) + return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim, solver=self.solver)
+ + +
+[docs] + @classmethod + def getPteCondEstim(cls, classes, y, y_): + # estimate the matrix with entry (i,j) being the estimate of P(hat_yi|yj), that is, the probability that a + # document that belongs to yj ends up being classified as belonging to yi + n_classes = len(classes) + confusion = np.eye(n_classes) + for i, class_ in enumerate(classes): + idx = y == class_ + if idx.any(): + confusion[i] = y_[idx].mean(axis=0) + + return confusion.T
+
+ + + +
+[docs] +class EMQ(AggregativeSoftQuantifier): + """ + `Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ), + aka `Saerens-Latinne-Decaestecker` (SLD) algorithm. + EMQ consists of using the well-known `Expectation Maximization algorithm` to iteratively update the posterior + probabilities generated by a probabilistic classifier and the class prevalence estimates obtained via + maximum-likelihood estimation, in a mutually recursive way, until convergence. + + This implementation also gives access to the heuristics proposed by `Alexandari et al. paper + <http://proceedings.mlr.press/v119/alexandari20a.html>`_. These heuristics consist of using, as the training + prevalence, an estimate of it obtained via k-fold cross validation (instead of the true training prevalence), + and to recalibrate the posterior probabilities of the classifier. + + :param classifier: a sklearn's Estimator that generates a classifier + :param val_split: specifies the data used for generating classifier predictions. This specification + can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to + be extracted from the training set; or as an integer, indicating that the predictions + are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value + for `k`, default 5); or as a collection defining the specific set of data to use for validation. + Alternatively, this set can be specified at fit time by indicating the exact set of data + on which the predictions are to be generated. This hyperparameter is only meant to be used when the + heuristics are to be applied, i.e., if a recalibration is required. The default value is None (meaning + the recalibration is not required). In case this hyperparameter is set to a value other than None, but + the recalibration is not required (recalib=None), a warning message will be raised. + :param exact_train_prev: set to True (default) for using the true training prevalence as the initial observation; + set to False for computing the training prevalence as an estimate of it, i.e., as the expected + value of the posterior probabilities of the training instances. + :param recalib: a string indicating the method of recalibration. + Available choices include "nbvs" (No-Bias Vector Scaling), "bcts" (Bias-Corrected Temperature Scaling, + default), "ts" (Temperature Scaling), and "vs" (Vector Scaling). Default is None (no recalibration). + :param n_jobs: number of parallel workers. Only used for recalibrating the classifier if `val_split` is set to + an integer `k` --the number of folds. + """ + + MAX_ITER = 1000 + EPSILON = 1e-4 + + def __init__(self, classifier: BaseEstimator, val_split=None, exact_train_prev=True, recalib=None, n_jobs=None): + self.classifier = classifier + self.val_split = val_split + self.exact_train_prev = exact_train_prev + self.recalib = recalib + self.n_jobs = n_jobs + +
+[docs] + @classmethod + def EMQ_BCTS(cls, classifier: BaseEstimator, n_jobs=None): + """ + Constructs an instance of EMQ using the best configuration found in the `Alexandari et al. paper + <http://proceedings.mlr.press/v119/alexandari20a.html>`_, i.e., one that relies on Bias-Corrected Temperature + Scaling (BCTS) as a recalibration function, and that uses an estimate of the training prevalence instead of + the true training prevalence. + + :param classifier: a sklearn's Estimator that generates a classifier + :param n_jobs: number of parallel workers. + :return: An instance of EMQ with BCTS + """ + return EMQ(classifier, val_split=5, exact_train_prev=False, recalib='bcts', n_jobs=n_jobs)
+ + + def _check_init_parameters(self): + if self.val_split is not None: + if self.exact_train_prev and self.recalib is None: + raise RuntimeWarning(f'The parameter {self.val_split=} was specified for EMQ, while the parameters ' + f'{self.exact_train_prev=} and {self.recalib=}. This has no effect and causes an unnecessary ' + f'overload.') + else: + if self.recalib is not None: + print(f'[warning] The parameter {self.recalib=} requires the val_split be different from None. ' + f'This parameter will be set to 5. To avoid this warning, set this value to a float value ' + f'indicating the proportion of training data to be used as validation, or to an integer ' + f'indicating the number of folds for kFCV.') + self.val_split=5 + +
+[docs] + def classify(self, instances): + """ + Provides the posterior probabilities for the given instances. If the classifier was required + to be recalibrated, then these posteriors are recalibrated accordingly. + + :param instances: array-like of shape `(n_instances, n_dimensions,)` + :return: np.ndarray of shape `(n_instances, n_classes,)` with posterior probabilities + """ + posteriors = self.classifier.predict_proba(instances) + if hasattr(self, 'calibration_function') and self.calibration_function is not None: + posteriors = self.calibration_function(posteriors) + return posteriors
+ + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + if self.recalib is not None: + P, y = classif_predictions.Xy + if self.recalib == 'nbvs': + calibrator = NoBiasVectorScaling() + elif self.recalib == 'bcts': + calibrator = TempScaling(bias_positions='all') + elif self.recalib == 'ts': + calibrator = TempScaling() + elif self.recalib == 'vs': + calibrator = VectorScaling() + else: + raise ValueError('invalid param argument for recalibration method; available ones are ' + '"nbvs", "bcts", "ts", and "vs".') + + self.calibration_function = calibrator(P, np.eye(data.n_classes)[y], posterior_supplied=True) + + if self.exact_train_prev: + self.train_prevalence = data.prevalence() + else: + train_posteriors = classif_predictions.X + if self.recalib is not None: + train_posteriors = self.calibration_function(train_posteriors) + self.train_prevalence = F.prevalence_from_probabilities(train_posteriors)
+ + +
+[docs] + def aggregate(self, classif_posteriors, epsilon=EPSILON): + priors, posteriors = self.EM(self.train_prevalence, classif_posteriors, epsilon) + return priors
+ + +
+[docs] + def predict_proba(self, instances, epsilon=EPSILON): + """ + Returns the posterior probabilities updated by the EM algorithm. + + :param instances: np.ndarray of shape `(n_instances, n_dimensions)` + :param epsilon: error tolerance + :return: np.ndarray of shape `(n_instances, n_classes)` + """ + classif_posteriors = self.classify(instances) + priors, posteriors = self.EM(self.train_prevalence, classif_posteriors, epsilon) + return posteriors
+ + +
+[docs] + @classmethod + def EM(cls, tr_prev, posterior_probabilities, epsilon=EPSILON): + """ + Computes the `Expectation Maximization` routine. + + :param tr_prev: array-like, the training prevalence + :param posterior_probabilities: `np.ndarray` of shape `(n_instances, n_classes,)` with the + posterior probabilities + :param epsilon: float, the threshold different between two consecutive iterations + to reach before stopping the loop + :return: a tuple with the estimated prevalence values (shape `(n_classes,)`) and + the corrected posterior probabilities (shape `(n_instances, n_classes,)`) + """ + Px = posterior_probabilities + Ptr = np.copy(tr_prev) + qs = np.copy(Ptr) # qs (the running estimate) is initialized as the training prevalence + + s, converged = 0, False + qs_prev_ = None + while not converged and s < EMQ.MAX_ITER: + # E-step: ps is Ps(y|xi) + ps_unnormalized = (qs / Ptr) * Px + ps = ps_unnormalized / ps_unnormalized.sum(axis=1, keepdims=True) + + # M-step: + qs = ps.mean(axis=0) + + if qs_prev_ is not None and qp.error.mae(qs, qs_prev_) < epsilon and s > 10: + converged = True + + qs_prev_ = qs + s += 1 + + if not converged: + print('[warning] the method has reached the maximum number of iterations; it might have not converged') + + return qs, ps
+
+ + + +
+[docs] +class HDy(AggregativeSoftQuantifier, BinaryAggregativeQuantifier): + """ + `Hellinger Distance y <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDy). + HDy is a probabilistic method for training binary quantifiers, that models quantification as the problem of + minimizing the divergence (in terms of the Hellinger Distance) between two distributions of posterior + probabilities returned by the classifier. One of the distributions is generated from the unlabelled examples and + the other is generated from a validation set. This latter distribution is defined as a mixture of the + class-conditional distributions of the posterior probabilities returned for the positive and negative validation + examples, respectively. The parameters of the mixture thus represent the estimates of the class prevalence values. + + :param classifier: a sklearn's Estimator that generates a binary classifier + :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out + validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5).. + """ + + def __init__(self, classifier: BaseEstimator, val_split=5): + self.classifier = classifier + self.val_split = val_split + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + """ + Trains a HDy quantifier. + + :param data: the training set + :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit) + :param val_split: either a float in (0,1) indicating the proportion of training instances to use for + validation (e.g., 0.3 for using 30% of the training set as validation data), or a + :class:`quapy.data.base.LabelledCollection` indicating the validation set itself + :return: self + """ + P, y = classif_predictions.Xy + Px = P[:, self.pos_label] # takes only the P(y=+1|x) + self.Pxy1 = Px[y == self.pos_label] + self.Pxy0 = Px[y == self.neg_label] + + # pre-compute the histogram for positive and negative examples + self.bins = np.linspace(10, 110, 11, dtype=int) # [10, 20, 30, ..., 100, 110] + + def hist(P, bins): + h = np.histogram(P, bins=bins, range=(0, 1), density=True)[0] + return h / h.sum() + + self.Pxy1_density = {bins: hist(self.Pxy1, bins) for bins in self.bins} + self.Pxy0_density = {bins: hist(self.Pxy0, bins) for bins in self.bins} + + return self
+ + +
+[docs] + def aggregate(self, classif_posteriors): + # "In this work, the number of bins b used in HDx and HDy was chosen from 10 to 110 in steps of 10, + # and the final estimated a priori probability was taken as the median of these 11 estimates." + # (González-Castro, et al., 2013). + + Px = classif_posteriors[:, self.pos_label] # takes only the P(y=+1|x) + + prev_estimations = [] + # for bins in np.linspace(10, 110, 11, dtype=int): #[10, 20, 30, ..., 100, 110] + # Pxy0_density, _ = np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True) + # Pxy1_density, _ = np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True) + for bins in self.bins: + Pxy0_density = self.Pxy0_density[bins] + Pxy1_density = self.Pxy1_density[bins] + + Px_test, _ = np.histogram(Px, bins=bins, range=(0, 1), density=True) + + # the authors proposed to search for the prevalence yielding the best matching as a linear search + # at small steps (modern implementations resort to an optimization procedure, + # see class DistributionMatching) + prev_selected, min_dist = None, None + for prev in F.prevalence_linspace(n_prevalences=101, repeats=1, smooth_limits_epsilon=0.0): + Px_train = prev * Pxy1_density + (1 - prev) * Pxy0_density + hdy = F.HellingerDistance(Px_train, Px_test) + if prev_selected is None or hdy < min_dist: + prev_selected, min_dist = prev, hdy + prev_estimations.append(prev_selected) + + class1_prev = np.median(prev_estimations) + return F.as_binary_prevalence(class1_prev)
+
+ + + +
+[docs] +class DyS(AggregativeSoftQuantifier, BinaryAggregativeQuantifier): + """ + `DyS framework <https://ojs.aaai.org/index.php/AAAI/article/view/4376>`_ (DyS). + DyS is a generalization of HDy method, using a Ternary Search in order to find the prevalence that + minimizes the distance between distributions. + Details for the ternary search have been got from <https://dl.acm.org/doi/pdf/10.1145/3219819.3220059> + + :param classifier: a sklearn's Estimator that generates a binary classifier + :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out + validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5).. + :param n_bins: an int with the number of bins to use to compute the histograms. + :param divergence: a str indicating the name of divergence (currently supported ones are "HD" or "topsoe"), or a + callable function computes the divergence between two distributions (two equally sized arrays). + :param tol: a float with the tolerance for the ternary search algorithm. + :param n_jobs: number of parallel workers. + """ + + def __init__(self, classifier: BaseEstimator, val_split=5, n_bins=8, divergence: Union[str, Callable]= 'HD', tol=1e-05, n_jobs=None): + self.classifier = classifier + self.val_split = val_split + self.tol = tol + self.divergence = divergence + self.n_bins = n_bins + self.n_jobs = n_jobs + + def _ternary_search(self, f, left, right, tol): + """ + Find maximum of unimodal function f() within [left, right] + """ + while abs(right - left) >= tol: + left_third = left + (right - left) / 3 + right_third = right - (right - left) / 3 + + if f(left_third) > f(right_third): + left = left_third + else: + right = right_third + + # Left and right are the current bounds; the maximum is between them + return (left + right) / 2 + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + Px, y = classif_predictions.Xy + Px = Px[:, self.pos_label] # takes only the P(y=+1|x) + self.Pxy1 = Px[y == self.pos_label] + self.Pxy0 = Px[y == self.neg_label] + self.Pxy1_density = np.histogram(self.Pxy1, bins=self.n_bins, range=(0, 1), density=True)[0] + self.Pxy0_density = np.histogram(self.Pxy0, bins=self.n_bins, range=(0, 1), density=True)[0] + return self
+ + +
+[docs] + def aggregate(self, classif_posteriors): + Px = classif_posteriors[:, self.pos_label] # takes only the P(y=+1|x) + + Px_test = np.histogram(Px, bins=self.n_bins, range=(0, 1), density=True)[0] + divergence = get_divergence(self.divergence) + + def distribution_distance(prev): + Px_train = prev * self.Pxy1_density + (1 - prev) * self.Pxy0_density + return divergence(Px_train, Px_test) + + class1_prev = self._ternary_search(f=distribution_distance, left=0, right=1, tol=self.tol) + return F.as_binary_prevalence(class1_prev)
+
+ + + +
+[docs] +class SMM(AggregativeSoftQuantifier, BinaryAggregativeQuantifier): + """ + `SMM method <https://ieeexplore.ieee.org/document/9260028>`_ (SMM). + SMM is a simplification of matching distribution methods where the representation of the examples + is created using the mean instead of a histogram (conceptually equivalent to PACC). + + :param classifier: a sklearn's Estimator that generates a binary classifier. + :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out + validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5).. + """ + + def __init__(self, classifier: BaseEstimator, val_split=5): + self.classifier = classifier + self.val_split = val_split + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + Px, y = classif_predictions.Xy + Px = Px[:, self.pos_label] # takes only the P(y=+1|x) + self.Pxy1 = Px[y == self.pos_label] + self.Pxy0 = Px[y == self.neg_label] + self.Pxy1_mean = np.mean(self.Pxy1) # equiv. TPR + self.Pxy0_mean = np.mean(self.Pxy0) # equiv. FPR + return self
+ + +
+[docs] + def aggregate(self, classif_posteriors): + Px = classif_posteriors[:, self.pos_label] # takes only the P(y=+1|x) + Px_mean = np.mean(Px) + + class1_prev = (Px_mean - self.Pxy0_mean)/(self.Pxy1_mean - self.Pxy0_mean) + return F.as_binary_prevalence(class1_prev, clip_if_necessary=True)
+
+ + + +
+[docs] +class DMy(AggregativeSoftQuantifier): + """ + Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of posterior + probabilities. This implementation takes the number of bins, the divergence, and the possibility to work on CDF + as hyperparameters. + + :param classifier: a `sklearn`'s Estimator that generates a probabilistic classifier + :param val_split: indicates the proportion of data to be used as a stratified held-out validation set to model the + validation distribution. + This parameter can be indicated as a real value (between 0 and 1), representing a proportion of + validation data, or as an integer, indicating that the validation distribution should be estimated via + `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a + :class:`quapy.data.base.LabelledCollection` (the split itself). + :param nbins: number of bins used to discretize the distributions (default 8) + :param divergence: a string representing a divergence measure (currently, "HD" and "topsoe" are implemented) + or a callable function taking two ndarrays of the same dimension as input (default "HD", meaning Hellinger + Distance) + :param cdf: whether to use CDF instead of PDF (default False) + :param n_jobs: number of parallel workers (default None) + """ + + def __init__(self, classifier, val_split=5, nbins=8, divergence: Union[str, Callable]='HD', + cdf=False, search='optim_minimize', n_jobs=None): + self.classifier = classifier + self.val_split = val_split + self.nbins = nbins + self.divergence = divergence + self.cdf = cdf + self.search = search + self.n_jobs = n_jobs + + # @classmethod + # def HDy(cls, classifier, val_split=5, n_jobs=None): + # from quapy.method.meta import MedianEstimator + # + # hdy = DMy(classifier=classifier, val_split=val_split, search='linear_search', divergence='HD') + # hdy = AggregativeMedianEstimator(hdy, param_grid={'nbins': np.linspace(10, 110, 11).astype(int)}, n_jobs=n_jobs) + # return hdy + + def _get_distributions(self, posteriors): + histograms = [] + post_dims = posteriors.shape[1] + if post_dims == 2: + # in binary quantification we can use only one class, since the other one is its complement + post_dims = 1 + for dim in range(post_dims): + hist = np.histogram(posteriors[:, dim], bins=self.nbins, range=(0, 1))[0] + histograms.append(hist) + + counts = np.vstack(histograms) + distributions = counts/counts.sum(axis=1)[:,np.newaxis] + if self.cdf: + distributions = np.cumsum(distributions, axis=1) + return distributions + +
+[docs] + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + """ + Trains the classifier (if requested) and generates the validation distributions out of the training data. + The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of + channels, and `nbins` the number of bins. In particular, let `V` be the validation distributions; then `di=V[i]` + are the distributions obtained from training data labelled with class `i`; while `dij = di[j]` is the discrete + distribution of posterior probabilities `P(Y=j|X=x)` for training data labelled with class `i`, and `dij[k]` + is the fraction of instances with a value in the `k`-th bin. + + :param data: the training set + :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit) + :param val_split: either a float in (0,1) indicating the proportion of training instances to use for + validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection + indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV + to estimate the parameters + """ + posteriors, true_labels = classif_predictions.Xy + n_classes = len(self.classifier.classes_) + + self.validation_distribution = qp.util.parallel( + func=self._get_distributions, + args=[posteriors[true_labels==cat] for cat in range(n_classes)], + n_jobs=self.n_jobs, + backend='threading' + )
+ + +
+[docs] + def aggregate(self, posteriors: np.ndarray): + """ + Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution + (the mixture) that best matches the test distribution, in terms of the divergence measure of choice. + In the multiclass case, with `n` the number of classes, the test and mixture distributions contain + `n` channels (proper distributions of binned posterior probabilities), on which the divergence is computed + independently. The matching is computed as an average of the divergence across all channels. + + :param posteriors: posterior probabilities of the instances in the sample + :return: a vector of class prevalence estimates + """ + test_distribution = self._get_distributions(posteriors) + divergence = get_divergence(self.divergence) + n_classes, n_channels, nbins = self.validation_distribution.shape + def loss(prev): + prev = np.expand_dims(prev, axis=0) + mixture_distribution = (prev @ self.validation_distribution.reshape(n_classes,-1)).reshape(n_channels, -1) + divs = [divergence(test_distribution[ch], mixture_distribution[ch]) for ch in range(n_channels)] + return np.mean(divs) + + return F.argmin_prevalence(loss, n_classes, method=self.search)
+
+ + + + +
+[docs] +def newELM(svmperf_base=None, loss='01', C=1): + """ + Explicit Loss Minimization (ELM) quantifiers. + Quantifiers based on ELM represent a family of methods based on structured output learning; + these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss + measure. This implementation relies on + `Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output + learning algorithm, which has to be installed and patched for the purpose (see this + `script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_). + This function equivalent to: + + >>> CC(SVMperf(svmperf_base, loss, C)) + + :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default) + this path will be obtained from qp.environ['SVMPERF_HOME'] + :param loss: the loss to optimize (see :attr:`quapy.classification.svmperf.SVMperf.valid_losses`) + :param C: trade-off between training error and margin (default 0.01) + :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the + underlying classifier + """ + if svmperf_base is None: + svmperf_base = qp.environ['SVMPERF_HOME'] + assert svmperf_base is not None, \ + 'param svmperf_base was not specified, and the variable SVMPERF_HOME has not been set in the environment' + return CC(SVMperf(svmperf_base, loss=loss, C=C))
+ + + +
+[docs] +def newSVMQ(svmperf_base=None, C=1): + """ + SVM(Q) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the `Q` loss combining a + classification-oriented loss and a quantification-oriented loss, as proposed by + `Barranquero et al. 2015 <https://www.sciencedirect.com/science/article/pii/S003132031400291X>`_. + Equivalent to: + + >>> CC(SVMperf(svmperf_base, loss='q', C=C)) + + Quantifiers based on ELM represent a family of methods based on structured output learning; + these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss + measure. This implementation relies on + `Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output + learning algorithm, which has to be installed and patched for the purpose (see this + `script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_). + This function is a wrapper around CC(SVMperf(svmperf_base, loss, C)) + + :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default) + this path will be obtained from qp.environ['SVMPERF_HOME'] + :param C: trade-off between training error and margin (default 0.01) + :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the + underlying classifier + """ + return newELM(svmperf_base, loss='q', C=C)
+ + +def newSVMKLD(svmperf_base=None, C=1): + """ + SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence + as proposed by `Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_. + Equivalent to: + + >>> CC(SVMperf(svmperf_base, loss='kld', C=C)) + + Quantifiers based on ELM represent a family of methods based on structured output learning; + these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss + measure. This implementation relies on + `Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output + learning algorithm, which has to be installed and patched for the purpose (see this + `script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_). + This function is a wrapper around CC(SVMperf(svmperf_base, loss, C)) + + :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default) + this path will be obtained from qp.environ['SVMPERF_HOME'] + :param C: trade-off between training error and margin (default 0.01) + :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the + underlying classifier + """ + return newELM(svmperf_base, loss='kld', C=C) + + +
+[docs] +def newSVMKLD(svmperf_base=None, C=1): + """ + SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence + normalized via the logistic function, as proposed by + `Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_. + Equivalent to: + + >>> CC(SVMperf(svmperf_base, loss='nkld', C=C)) + + Quantifiers based on ELM represent a family of methods based on structured output learning; + these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss + measure. This implementation relies on + `Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output + learning algorithm, which has to be installed and patched for the purpose (see this + `script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_). + This function is a wrapper around CC(SVMperf(svmperf_base, loss, C)) + + :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default) + this path will be obtained from qp.environ['SVMPERF_HOME'] + :param C: trade-off between training error and margin (default 0.01) + :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the + underlying classifier + """ + return newELM(svmperf_base, loss='nkld', C=C)
+ + +
+[docs] +def newSVMAE(svmperf_base=None, C=1): + """ + SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Absolute Error as first used by + `Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_. + Equivalent to: + + >>> CC(SVMperf(svmperf_base, loss='mae', C=C)) + + Quantifiers based on ELM represent a family of methods based on structured output learning; + these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss + measure. This implementation relies on + `Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output + learning algorithm, which has to be installed and patched for the purpose (see this + `script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_). + This function is a wrapper around CC(SVMperf(svmperf_base, loss, C)) + + :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default) + this path will be obtained from qp.environ['SVMPERF_HOME'] + :param C: trade-off between training error and margin (default 0.01) + :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the + underlying classifier + """ + return newELM(svmperf_base, loss='mae', C=C)
+ + +
+[docs] +def newSVMRAE(svmperf_base=None, C=1): + """ + SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Relative Absolute Error as first + used by `Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_. + Equivalent to: + + >>> CC(SVMperf(svmperf_base, loss='mrae', C=C)) + + Quantifiers based on ELM represent a family of methods based on structured output learning; + these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss + measure. This implementation relies on + `Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output + learning algorithm, which has to be installed and patched for the purpose (see this + `script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_). + This function is a wrapper around CC(SVMperf(svmperf_base, loss, C)) + + :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default) + this path will be obtained from qp.environ['SVMPERF_HOME'] + :param C: trade-off between training error and margin (default 0.01) + :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the + underlying classifier + """ + return newELM(svmperf_base, loss='mrae', C=C)
+ + + +
+[docs] +class OneVsAllAggregative(OneVsAllGeneric, AggregativeQuantifier): + """ + Allows any binary quantifier to perform quantification on single-label datasets. + The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the + class prevelences sum up to 1. + This variant was used, along with the :class:`EMQ` quantifier, in + `Gao and Sebastiani, 2016 <https://link.springer.com/content/pdf/10.1007/s13278-016-0327-z.pdf>`_. + + :param binary_quantifier: a quantifier (binary) that will be employed to work on multiclass model in a + one-vs-all manner + :param n_jobs: number of parallel workers + :param parallel_backend: the parallel backend for joblib (default "loky"); this is helpful for some quantifiers + (e.g., ELM-based ones) that cannot be run with multiprocessing, since the temp dir they create during fit will + is removed and no longer available at predict time. + """ + + def __init__(self, binary_quantifier, n_jobs=None, parallel_backend='multiprocessing'): + assert isinstance(binary_quantifier, BaseQuantifier), \ + f'{self.binary_quantifier} does not seem to be a Quantifier' + assert isinstance(binary_quantifier, AggregativeQuantifier), \ + f'{self.binary_quantifier} does not seem to be of type Aggregative' + self.binary_quantifier = binary_quantifier + self.n_jobs = qp._get_njobs(n_jobs) + self.parallel_backend = parallel_backend + +
+[docs] + def classify(self, instances): + """ + If the base quantifier is not probabilistic, returns a matrix of shape `(n,m,)` with `n` the number of + instances and `m` the number of classes. The entry `(i,j)` is a binary value indicating whether instance + `i `belongs to class `j`. The binary classifications are independent of each other, meaning that an instance + can end up be attributed to 0, 1, or more classes. + If the base quantifier is probabilistic, returns a matrix of shape `(n,m,2)` with `n` the number of instances + and `m` the number of classes. The entry `(i,j,1)` (resp. `(i,j,0)`) is a value in [0,1] indicating the + posterior probability that instance `i` belongs (resp. does not belong) to class `j`. The posterior + probabilities are independent of each other, meaning that, in general, they do not sum up to one. + + :param instances: array-like + :return: `np.ndarray` + """ + + classif_predictions = self._parallel(self._delayed_binary_classification, instances) + if isinstance(self.binary_quantifier, AggregativeSoftQuantifier): + return np.swapaxes(classif_predictions, 0, 1) + else: + return classif_predictions.T
+ + +
+[docs] + def aggregate(self, classif_predictions): + prevalences = self._parallel(self._delayed_binary_aggregate, classif_predictions) + return F.normalize_prevalence(prevalences)
+ + + def _delayed_binary_classification(self, c, X): + return self.dict_binary_quantifiers[c].classify(X) + + def _delayed_binary_aggregate(self, c, classif_predictions): + # the estimation for the positive class prevalence + return self.dict_binary_quantifiers[c].aggregate(classif_predictions[:, c])[1]
+ + + +
+[docs] +class AggregativeMedianEstimator(BinaryQuantifier): + """ + This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the + estimation returned by differently (hyper)parameterized base quantifiers. + The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions, + i.e., in cases of binary quantification. + + :param base_quantifier: the base, binary quantifier + :param random_state: a seed to be set before fitting any base quantifier (default None) + :param param_grid: the grid or parameters towards which the median will be computed + :param n_jobs: number of parllel workes + """ + def __init__(self, base_quantifier: AggregativeQuantifier, param_grid: dict, random_state=None, n_jobs=None): + self.base_quantifier = base_quantifier + self.param_grid = param_grid + self.random_state = random_state + self.n_jobs = qp._get_njobs(n_jobs) + +
+[docs] + def get_params(self, deep=True): + return self.base_quantifier.get_params(deep)
+ + +
+[docs] + def set_params(self, **params): + self.base_quantifier.set_params(**params)
+ + + def _delayed_fit(self, args): + with qp.util.temp_seed(self.random_state): + params, training = args + model = deepcopy(self.base_quantifier) + model.set_params(**params) + model.fit(training) + return model + + def _delayed_fit_classifier(self, args): + with qp.util.temp_seed(self.random_state): + print('enter job') + cls_params, training, kwargs = args + model = deepcopy(self.base_quantifier) + model.set_params(**cls_params) + predictions = model.classifier_fit_predict(training, **kwargs) + print('exit job') + return (model, predictions) + + def _delayed_fit_aggregation(self, args): + with qp.util.temp_seed(self.random_state): + ((model, predictions), q_params), training = args + model = deepcopy(model) + model.set_params(**q_params) + model.aggregation_fit(predictions, training) + return model + + +
+[docs] + def fit(self, training: LabelledCollection, **kwargs): + import itertools + + self._check_binary(training, self.__class__.__name__) + + if isinstance(self.base_quantifier, AggregativeQuantifier): + cls_configs, q_configs = qp.model_selection.group_params(self.param_grid) + + if len(cls_configs) > 1: + models_preds = qp.util.parallel( + self._delayed_fit_classifier, + ((params, training, kwargs) for params in cls_configs), + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs, + asarray=False, + backend='threading' + ) + else: + print('only 1') + model = self.base_quantifier + model.set_params(**cls_configs[0]) + predictions = model.classifier_fit_predict(training, **kwargs) + models_preds = [(model, predictions)] + + self.models = qp.util.parallel( + self._delayed_fit_aggregation, + ((setup, training) for setup in itertools.product(models_preds, q_configs)), + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs, + backend='threading' + ) + else: + configs = qp.model_selection.expand_grid(self.param_grid) + self.models = qp.util.parallel( + self._delayed_fit, + ((params, training) for params in configs), + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs, + backend='threading' + ) + return self
+ + + def _delayed_predict(self, args): + model, instances = args + return model.quantify(instances) + +
+[docs] + def quantify(self, instances): + prev_preds = qp.util.parallel( + self._delayed_predict, + ((model, instances) for model in self.models), + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs, + backend='threading' + ) + return np.median(prev_preds, axis=0)
+
+ + + +#--------------------------------------------------------------- +# imports +#--------------------------------------------------------------- + +from . import _threshold_optim + +T50 = _threshold_optim.T50 +MAX = _threshold_optim.MAX +X = _threshold_optim.X +MS = _threshold_optim.MS +MS2 = _threshold_optim.MS2 + + +from . import _kdey + +KDEyML = _kdey.KDEyML +KDEyHD = _kdey.KDEyHD +KDEyCS = _kdey.KDEyCS + +#--------------------------------------------------------------- +# aliases +#--------------------------------------------------------------- + +ClassifyAndCount = CC +AdjustedClassifyAndCount = ACC +ProbabilisticClassifyAndCount = PCC +ProbabilisticAdjustedClassifyAndCount = PACC +ExpectationMaximizationQuantifier = EMQ +DistributionMatchingY = DMy +SLD = EMQ +HellingerDistanceY = HDy +MedianSweep = MS +MedianSweep2 = MS2 +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/method/base.html b/docs/build/html/_modules/quapy/method/base.html new file mode 100644 index 0000000..cdc483c --- /dev/null +++ b/docs/build/html/_modules/quapy/method/base.html @@ -0,0 +1,238 @@ + + + + + + quapy.method.base — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.method.base

+from abc import ABCMeta, abstractmethod
+from copy import deepcopy
+
+from joblib import Parallel, delayed
+from sklearn.base import BaseEstimator
+
+import quapy as qp
+from quapy.data import LabelledCollection
+import numpy as np
+
+
+# Base Quantifier abstract class
+# ------------------------------------
+
+[docs] +class BaseQuantifier(BaseEstimator): + """ + Abstract Quantifier. A quantifier is defined as an object of a class that implements the method :meth:`fit` on + :class:`quapy.data.base.LabelledCollection`, the method :meth:`quantify`, and the :meth:`set_params` and + :meth:`get_params` for model selection (see :meth:`quapy.model_selection.GridSearchQ`) + """ + +
+[docs] + @abstractmethod + def fit(self, data: LabelledCollection): + """ + Trains a quantifier. + + :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data + :return: self + """ + ...
+ + +
+[docs] + @abstractmethod + def quantify(self, instances): + """ + Generate class prevalence estimates for the sample's instances + + :param instances: array-like + :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates. + """ + ...
+
+ + + +
+[docs] +class BinaryQuantifier(BaseQuantifier): + """ + Abstract class of binary quantifiers, i.e., quantifiers estimating class prevalence values for only two classes + (typically, to be interpreted as one class and its complement). + """ + + def _check_binary(self, data: LabelledCollection, quantifier_name): + assert data.binary, f'{quantifier_name} works only on problems of binary classification. ' \ + f'Use the class OneVsAll to enable {quantifier_name} work on single-label data.'
+ + + +
+[docs] +class OneVsAll: + pass
+ + + +
+[docs] +def newOneVsAll(binary_quantifier, n_jobs=None): + assert isinstance(binary_quantifier, BaseQuantifier), \ + f'{binary_quantifier} does not seem to be a Quantifier' + if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier): + return qp.method.aggregative.OneVsAllAggregative(binary_quantifier, n_jobs) + else: + return OneVsAllGeneric(binary_quantifier, n_jobs)
+ + + +
+[docs] +class OneVsAllGeneric(OneVsAll, BaseQuantifier): + """ + Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary + quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1. + """ + + def __init__(self, binary_quantifier, n_jobs=None): + assert isinstance(binary_quantifier, BaseQuantifier), \ + f'{binary_quantifier} does not seem to be a Quantifier' + if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier): + print('[warning] the quantifier seems to be an instance of qp.method.aggregative.AggregativeQuantifier; ' + f'you might prefer instantiating {qp.method.aggregative.OneVsAllAggregative.__name__}') + self.binary_quantifier = binary_quantifier + self.n_jobs = qp._get_njobs(n_jobs) + +
+[docs] + def fit(self, data: LabelledCollection, fit_classifier=True): + assert not data.binary, f'{self.__class__.__name__} expect non-binary data' + assert fit_classifier == True, 'fit_classifier must be True' + + self.dict_binary_quantifiers = {c: deepcopy(self.binary_quantifier) for c in data.classes_} + self._parallel(self._delayed_binary_fit, data) + return self
+ + + def _parallel(self, func, *args, **kwargs): + return np.asarray( + Parallel(n_jobs=self.n_jobs, backend='threading')( + delayed(func)(c, *args, **kwargs) for c in self.classes_ + ) + ) + +
+[docs] + def quantify(self, instances): + prevalences = self._parallel(self._delayed_binary_predict, instances) + return qp.functional.normalize_prevalence(prevalences)
+ + + @property + def classes_(self): + return sorted(self.dict_binary_quantifiers.keys()) + + def _delayed_binary_predict(self, c, X): + return self.dict_binary_quantifiers[c].quantify(X)[1] + + def _delayed_binary_fit(self, c, data): + bindata = LabelledCollection(data.instances, data.labels == c, classes=[False, True]) + self.dict_binary_quantifiers[c].fit(bindata)
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/method/meta.html b/docs/build/html/_modules/quapy/method/meta.html new file mode 100644 index 0000000..b24dcc3 --- /dev/null +++ b/docs/build/html/_modules/quapy/method/meta.html @@ -0,0 +1,861 @@ + + + + + + quapy.method.meta — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.method.meta

+import itertools
+from copy import deepcopy
+from typing import Union
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import f1_score, make_scorer, accuracy_score
+from sklearn.model_selection import GridSearchCV, cross_val_predict
+from tqdm import tqdm
+
+import quapy as qp
+from quapy import functional as F
+from quapy.data import LabelledCollection
+from quapy.model_selection import GridSearchQ
+from quapy.method.base import BaseQuantifier, BinaryQuantifier
+from quapy.method.aggregative import CC, ACC, PACC, HDy, EMQ, AggregativeQuantifier
+
+try:
+    from . import _neural
+except ModuleNotFoundError:
+    _neural = None
+
+
+if _neural:
+    QuaNet = _neural.QuaNetTrainer
+else:
+    QuaNet = "QuaNet is not available due to missing torch package"
+
+
+
+[docs] +class MedianEstimator2(BinaryQuantifier): + """ + This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the + estimation returned by differently (hyper)parameterized base quantifiers. + The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions, + i.e., in cases of binary quantification. + + :param base_quantifier: the base, binary quantifier + :param random_state: a seed to be set before fitting any base quantifier (default None) + :param param_grid: the grid or parameters towards which the median will be computed + :param n_jobs: number of parllel workes + """ + def __init__(self, base_quantifier: BinaryQuantifier, param_grid: dict, random_state=None, n_jobs=None): + self.base_quantifier = base_quantifier + self.param_grid = param_grid + self.random_state = random_state + self.n_jobs = qp._get_njobs(n_jobs) + +
+[docs] + def get_params(self, deep=True): + return self.base_quantifier.get_params(deep)
+ + +
+[docs] + def set_params(self, **params): + self.base_quantifier.set_params(**params)
+ + + def _delayed_fit(self, args): + with qp.util.temp_seed(self.random_state): + params, training = args + model = deepcopy(self.base_quantifier) + model.set_params(**params) + model.fit(training) + return model + +
+[docs] + def fit(self, training: LabelledCollection): + self._check_binary(training, self.__class__.__name__) + + configs = qp.model_selection.expand_grid(self.param_grid) + self.models = qp.util.parallel( + self._delayed_fit, + ((params, training) for params in configs), + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs + ) + return self
+ + + def _delayed_predict(self, args): + model, instances = args + return model.quantify(instances) + +
+[docs] + def quantify(self, instances): + prev_preds = qp.util.parallel( + self._delayed_predict, + ((model, instances) for model in self.models), + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs + ) + prev_preds = np.asarray(prev_preds) + return np.median(prev_preds, axis=0)
+
+ + + +
+[docs] +class MedianEstimator(BinaryQuantifier): + """ + This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the + estimation returned by differently (hyper)parameterized base quantifiers. + The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions, + i.e., in cases of binary quantification. + + :param base_quantifier: the base, binary quantifier + :param random_state: a seed to be set before fitting any base quantifier (default None) + :param param_grid: the grid or parameters towards which the median will be computed + :param n_jobs: number of parllel workes + """ + def __init__(self, base_quantifier: BinaryQuantifier, param_grid: dict, random_state=None, n_jobs=None): + self.base_quantifier = base_quantifier + self.param_grid = param_grid + self.random_state = random_state + self.n_jobs = qp._get_njobs(n_jobs) + +
+[docs] + def get_params(self, deep=True): + return self.base_quantifier.get_params(deep)
+ + +
+[docs] + def set_params(self, **params): + self.base_quantifier.set_params(**params)
+ + + def _delayed_fit(self, args): + with qp.util.temp_seed(self.random_state): + params, training = args + model = deepcopy(self.base_quantifier) + model.set_params(**params) + model.fit(training) + return model + + def _delayed_fit_classifier(self, args): + with qp.util.temp_seed(self.random_state): + cls_params, training = args + model = deepcopy(self.base_quantifier) + model.set_params(**cls_params) + predictions = model.classifier_fit_predict(training, predict_on=model.val_split) + return (model, predictions) + + def _delayed_fit_aggregation(self, args): + with qp.util.temp_seed(self.random_state): + ((model, predictions), q_params), training = args + model = deepcopy(model) + model.set_params(**q_params) + model.aggregation_fit(predictions, training) + return model + + +
+[docs] + def fit(self, training: LabelledCollection): + self._check_binary(training, self.__class__.__name__) + + if isinstance(self.base_quantifier, AggregativeQuantifier): + cls_configs, q_configs = qp.model_selection.group_params(self.param_grid) + + if len(cls_configs) > 1: + models_preds = qp.util.parallel( + self._delayed_fit_classifier, + ((params, training) for params in cls_configs), + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs, + asarray=False + ) + else: + model = self.base_quantifier + model.set_params(**cls_configs[0]) + predictions = model.classifier_fit_predict(training, predict_on=model.val_split) + models_preds = [(model, predictions)] + + self.models = qp.util.parallel( + self._delayed_fit_aggregation, + ((setup, training) for setup in itertools.product(models_preds, q_configs)), + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs, + asarray=False + ) + else: + configs = qp.model_selection.expand_grid(self.param_grid) + self.models = qp.util.parallel( + self._delayed_fit, + ((params, training) for params in configs), + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs, + asarray=False + ) + return self
+ + + def _delayed_predict(self, args): + model, instances = args + return model.quantify(instances) + +
+[docs] + def quantify(self, instances): + prev_preds = qp.util.parallel( + self._delayed_predict, + ((model, instances) for model in self.models), + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs, + asarray=False + ) + prev_preds = np.asarray(prev_preds) + return np.median(prev_preds, axis=0)
+
+ + + +
+[docs] +class Ensemble(BaseQuantifier): + VALID_POLICIES = {'ave', 'ptr', 'ds'} | qp.error.QUANTIFICATION_ERROR_NAMES + + """ + Implementation of the Ensemble methods for quantification described by + `Pérez-Gállego et al., 2017 <https://www.sciencedirect.com/science/article/pii/S1566253516300628>`_ + and + `Pérez-Gállego et al., 2019 <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_. + The policies implemented include: + + - Average (`policy='ave'`): computes class prevalence estimates as the average of the estimates + returned by the base quantifiers. + - Training Prevalence (`policy='ptr'`): applies a dynamic selection to the ensemble’s members by retaining only + those members such that the class prevalence values in the samples they use as training set are closest to + preliminary class prevalence estimates computed as the average of the estimates of all the members. The final + estimate is recomputed by considering only the selected members. + - Distribution Similarity (`policy='ds'`): performs a dynamic selection of base members by retaining + the members trained on samples whose distribution of posterior probabilities is closest, in terms of the + Hellinger Distance, to the distribution of posterior probabilities in the test sample + - Accuracy (`policy='<valid error name>'`): performs a static selection of the ensemble members by + retaining those that minimize a quantification error measure, which is passed as an argument. + + Example: + + >>> model = Ensemble(quantifier=ACC(LogisticRegression()), size=30, policy='ave', n_jobs=-1) + + :param quantifier: base quantification member of the ensemble + :param size: number of members + :param red_size: number of members to retain after selection (depending on the policy) + :param min_pos: minimum number of positive instances to consider a sample as valid + :param policy: the selection policy; available policies include: `ave` (default), `ptr`, `ds`, and accuracy + (which is instantiated via a valid error name, e.g., `mae`) + :param max_sample_size: maximum number of instances to consider in the samples (set to None + to indicate no limit, default) + :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out + validation split, or a :class:`quapy.data.base.LabelledCollection` (the split itself). + :param n_jobs: number of parallel workers (default 1) + :param verbose: set to True (default is False) to get some information in standard output + """ + + def __init__(self, + quantifier: BaseQuantifier, + size=50, + red_size=25, + min_pos=5, + policy='ave', + max_sample_size=None, + val_split:Union[qp.data.LabelledCollection, float]=None, + n_jobs=None, + verbose=False): + assert policy in Ensemble.VALID_POLICIES, \ + f'unknown policy={policy}; valid are {Ensemble.VALID_POLICIES}' + assert max_sample_size is None or max_sample_size > 0, \ + 'wrong value for max_sample_size; set it to a positive number or None' + self.base_quantifier = quantifier + self.size = size + self.min_pos = min_pos + self.red_size = red_size + self.policy = policy + self.val_split = val_split + self.n_jobs = qp._get_njobs(n_jobs) + self.post_proba_fn = None + self.verbose = verbose + self.max_sample_size = max_sample_size + + def _sout(self, msg): + if self.verbose: + print('[Ensemble]' + msg) + +
+[docs] + def fit(self, data: qp.data.LabelledCollection, val_split: Union[qp.data.LabelledCollection, float] = None): + + if self.policy == 'ds' and not data.binary: + raise ValueError(f'ds policy is only defined for binary quantification, but this dataset is not binary') + + if val_split is None: + val_split = self.val_split + + # randomly chooses the prevalences for each member of the ensemble (preventing classes with less than + # min_pos positive examples) + sample_size = len(data) if self.max_sample_size is None else min(self.max_sample_size, len(data)) + prevs = [_draw_simplex(ndim=data.n_classes, min_val=self.min_pos / sample_size) for _ in range(self.size)] + + posteriors = None + if self.policy == 'ds': + # precompute the training posterior probabilities + posteriors, self.post_proba_fn = self._ds_policy_get_posteriors(data) + + is_static_policy = (self.policy in qp.error.QUANTIFICATION_ERROR_NAMES) + + args = ( + (self.base_quantifier, data, val_split, prev, posteriors, is_static_policy, self.verbose, sample_size) + for prev in prevs + ) + self.ensemble = qp.util.parallel( + _delayed_new_instance, + tqdm(args, desc='fitting ensamble', total=self.size) if self.verbose else args, + asarray=False, + n_jobs=self.n_jobs) + + # static selection policy (the name of a quantification-oriented error function to minimize) + if self.policy in qp.error.QUANTIFICATION_ERROR_NAMES: + self._accuracy_policy(error_name=self.policy) + + self._sout('Fit [Done]') + return self
+ + +
+[docs] + def quantify(self, instances): + predictions = np.asarray( + qp.util.parallel(_delayed_quantify, ((Qi, instances) for Qi in self.ensemble), n_jobs=self.n_jobs) + ) + + if self.policy == 'ptr': + predictions = self._ptr_policy(predictions) + elif self.policy == 'ds': + predictions = self._ds_policy(predictions, instances) + + predictions = np.mean(predictions, axis=0) + return F.normalize_prevalence(predictions)
+ + +
+[docs] + def set_params(self, **parameters): + """ + This function should not be used within :class:`quapy.model_selection.GridSearchQ` (is here for compatibility + with the abstract class). + Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or + `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for + classification (not recommended). + + :param parameters: dictionary + :return: raises an Exception + """ + raise NotImplementedError(f'{self.__class__.__name__} should not be used within GridSearchQ; ' + f'instead, use Ensemble(GridSearchQ(q),...), with q a Quantifier (recommended), ' + f'or Ensemble(Q(GridSearchCV(l))) with Q a quantifier class that has a classifier ' + f'l optimized for classification (not recommended).')
+ + +
+[docs] + def get_params(self, deep=True): + """ + This function should not be used within :class:`quapy.model_selection.GridSearchQ` (is here for compatibility + with the abstract class). + Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or + `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for + classification (not recommended). + + :param deep: for compatibility with scikit-learn + :return: raises an Exception + """ + + raise NotImplementedError()
+ + + def _accuracy_policy(self, error_name): + """ + Selects the red_size best performant quantifiers in a static way (i.e., dropping all non-selected instances). + For each model in the ensemble, the performance is measured in terms of _error_name_ on the quantification of + the samples used for training the rest of the models in the ensemble. + """ + from quapy.evaluation import evaluate_on_samples + error = qp.error.from_name(error_name) + tests = [m[3] for m in self.ensemble] + scores = [] + for i, model in enumerate(self.ensemble): + scores.append(evaluate_on_samples(model[0], tests[:i] + tests[i + 1:], error)) + order = np.argsort(scores) + + self.ensemble = _select_k(self.ensemble, order, k=self.red_size) + + def _ptr_policy(self, predictions): + """ + Selects the predictions made by models that have been trained on samples with a prevalence that is most similar + to a first approximation of the test prevalence as made by all models in the ensemble. + """ + test_prev_estim = predictions.mean(axis=0) + tr_prevs = [m[1] for m in self.ensemble] + ptr_differences = [qp.error.mse(ptr_i, test_prev_estim) for ptr_i in tr_prevs] + order = np.argsort(ptr_differences) + return _select_k(predictions, order, k=self.red_size) + + def _ds_policy_get_posteriors(self, data: LabelledCollection): + """ + In the original article, there are some aspects regarding this method that are not mentioned. The paper says + that the distribution of posterior probabilities from training and test examples is compared by means of the + Hellinger Distance. However, how these posterior probabilities are generated is not specified. In the article, + a Logistic Regressor (LR) is used as the classifier device and that could be used for this purpose. However, in + general, a Quantifier is not necessarily an instance of Aggreggative Probabilistic Quantifiers, and so, that the + quantifier builds on top of a probabilistic classifier cannot be given for granted. Additionally, it would not + be correct to generate the posterior probabilities for training instances that have concurred in training the + classifier that generates them. + + This function thus generates the posterior probabilities for all training documents in a cross-validation way, + using LR with hyperparameters that have previously been optimized via grid search in 5FCV. + + :param data: a LabelledCollection + :return: (P,f,) where P is an ndarray containing the posterior probabilities of the training data, generated via + cross-validation and using an optimized LR, and the function to be used in order to generate posterior + probabilities for test instances. + """ + + X, y = data.Xy + lr_base = LogisticRegression(class_weight='balanced', max_iter=1000) + + param_grid = {'C': np.logspace(-4, 4, 9)} + optim = GridSearchCV(lr_base, param_grid=param_grid, cv=5, n_jobs=self.n_jobs, refit=True).fit(X, y) + + posteriors = cross_val_predict(optim.best_estimator_, X, y, cv=5, n_jobs=self.n_jobs, method='predict_proba') + posteriors_generator = optim.best_estimator_.predict_proba + + return posteriors, posteriors_generator + + def _ds_policy(self, predictions, test): + test_posteriors = self.post_proba_fn(test) + test_distribution = get_probability_distribution(test_posteriors) + tr_distributions = [m[2] for m in self.ensemble] + dist = [F.HellingerDistance(tr_dist_i, test_distribution) for tr_dist_i in tr_distributions] + order = np.argsort(dist) + return _select_k(predictions, order, k=self.red_size) + + @property + def aggregative(self): + """ + Indicates that the quantifier is not aggregative. + + :return: False + """ + return False + + @property + def probabilistic(self): + """ + Indicates that the quantifier is not probabilistic. + + :return: False + """ + return False
+ + + +
+[docs] +def get_probability_distribution(posterior_probabilities, bins=8): + """ + Gets a histogram out of the posterior probabilities (only for the binary case). + + :param posterior_probabilities: array-like of shape `(n_instances, 2,)` + :param bins: integer + :return: `np.ndarray` with the relative frequencies for each bin (for the positive class only) + """ + assert posterior_probabilities.shape[1] == 2, 'the posterior probabilities do not seem to be for a binary problem' + posterior_probabilities = posterior_probabilities[:, 1] # take the positive posteriors only + distribution, _ = np.histogram(posterior_probabilities, bins=bins, range=(0, 1), density=True) + return distribution
+ + + +def _select_k(elements, order, k): + return [elements[idx] for idx in order[:k]] + + +def _delayed_new_instance(args): + base_quantifier, data, val_split, prev, posteriors, keep_samples, verbose, sample_size = args + if verbose: + print(f'\tfit-start for prev {F.strprev(prev)}, sample_size={sample_size}') + model = deepcopy(base_quantifier) + + if val_split is not None: + if isinstance(val_split, float): + assert 0 < val_split < 1, 'val_split should be in (0,1)' + data, val_split = data.split_stratified(train_prop=1 - val_split) + + sample_index = data.sampling_index(sample_size, *prev) + sample = data.sampling_from_index(sample_index) + + if val_split is not None: + model.fit(sample, val_split=val_split) + else: + model.fit(sample) + + tr_prevalence = sample.prevalence() + tr_distribution = get_probability_distribution(posteriors[sample_index]) if (posteriors is not None) else None + + if verbose: + print(f'\t\--fit-ended for prev {F.strprev(prev)}') + + return (model, tr_prevalence, tr_distribution, sample if keep_samples else None) + + +def _delayed_quantify(args): + quantifier, instances = args + return quantifier[0].quantify(instances) + + +def _draw_simplex(ndim, min_val, max_trials=100): + """ + Returns a uniform sampling from the ndim-dimensional simplex but guarantees that all dimensions + are >= min_class_prev (for min_val>0, this makes the sampling not truly uniform) + + :param ndim: number of dimensions of the simplex + :param min_val: minimum class prevalence allowed. If less than 1/ndim a ValueError will be throw since + there is no possible solution. + :return: a sample from the ndim-dimensional simplex that is uniform in S(ndim)-R where S(ndim) is the simplex + and R is the simplex subset containing dimensions lower than min_val + """ + if min_val >= 1 / ndim: + raise ValueError(f'no sample can be draw from the {ndim}-dimensional simplex so that ' + f'all its values are >={min_val} (try with a larger value for min_pos)') + trials = 0 + while True: + u = F.uniform_simplex_sampling(ndim) + if all(u >= min_val): + return u + trials += 1 + if trials >= max_trials: + raise ValueError(f'it looks like finding a random simplex with all its dimensions being' + f'>= {min_val} is unlikely (it failed after {max_trials} trials)') + + +def _instantiate_ensemble(classifier, base_quantifier_class, param_grid, optim, param_model_sel, **kwargs): + if optim is None: + base_quantifier = base_quantifier_class(classifier) + elif optim in qp.error.CLASSIFICATION_ERROR: + if optim == qp.error.f1e: + scoring = make_scorer(f1_score) + elif optim == qp.error.acce: + scoring = make_scorer(accuracy_score) + classifier = GridSearchCV(classifier, param_grid, scoring=scoring) + base_quantifier = base_quantifier_class(classifier) + else: + base_quantifier = GridSearchQ(base_quantifier_class(classifier), + param_grid=param_grid, + **param_model_sel, + error=optim) + + return Ensemble(base_quantifier, **kwargs) + + +def _check_error(error): + if error is None: + return None + if error in qp.error.QUANTIFICATION_ERROR or error in qp.error.CLASSIFICATION_ERROR: + return error + elif isinstance(error, str): + return qp.error.from_name(error) + else: + raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n' + f'the name of an error function in {qp.error.ERROR_NAMES}') + + +
+[docs] +def ensembleFactory(classifier, base_quantifier_class, param_grid=None, optim=None, param_model_sel: dict = None, + **kwargs): + """ + Ensemble factory. Provides a unified interface for instantiating ensembles that can be optimized (via model + selection for quantification) for a given evaluation metric using :class:`quapy.model_selection.GridSearchQ`. + If the evaluation metric is classification-oriented + (instead of quantification-oriented), then the optimization will be carried out via sklearn's + `GridSearchCV <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html>`_. + + Example to instantiate an :class:`Ensemble` based on :class:`quapy.method.aggregative.PACC` + in which the base members are optimized for :meth:`quapy.error.mae` via + :class:`quapy.model_selection.GridSearchQ`. The ensemble follows the policy `Accuracy` based + on :meth:`quapy.error.mae` (the same measure being optimized), + meaning that a static selection of members of the ensemble is made based on their performance + in terms of this error. + + >>> param_grid = { + >>> 'C': np.logspace(-3,3,7), + >>> 'class_weight': ['balanced', None] + >>> } + >>> param_mod_sel = { + >>> 'sample_size': 500, + >>> 'protocol': 'app' + >>> } + >>> common={ + >>> 'max_sample_size': 1000, + >>> 'n_jobs': -1, + >>> 'param_grid': param_grid, + >>> 'param_mod_sel': param_mod_sel, + >>> } + >>> + >>> ensembleFactory(LogisticRegression(), PACC, optim='mae', policy='mae', **common) + + :param classifier: sklearn's Estimator that generates a classifier + :param base_quantifier_class: a class of quantifiers + :param param_grid: a dictionary with the grid of parameters to optimize for + :param optim: a valid quantification or classification error, or a string name of it + :param param_model_sel: a dictionary containing any keyworded argument to pass to + :class:`quapy.model_selection.GridSearchQ` + :param kwargs: kwargs for the class :class:`Ensemble` + :return: an instance of :class:`Ensemble` + """ + if optim is not None: + if param_grid is None: + raise ValueError(f'param_grid is None but optim was requested.') + if param_model_sel is None: + raise ValueError(f'param_model_sel is None but optim was requested.') + error = _check_error(optim) + return _instantiate_ensemble(classifier, base_quantifier_class, param_grid, error, param_model_sel, **kwargs)
+ + + +
+[docs] +def ECC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs): + """ + Implements an ensemble of :class:`quapy.method.aggregative.CC` quantifiers, as used by + `Pérez-Gállego et al., 2019 <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_. + + Equivalent to: + + >>> ensembleFactory(classifier, CC, param_grid, optim, param_mod_sel, **kwargs) + + See :meth:`ensembleFactory` for further details. + + :param classifier: sklearn's Estimator that generates a classifier + :param param_grid: a dictionary with the grid of parameters to optimize for + :param optim: a valid quantification or classification error, or a string name of it + :param param_model_sel: a dictionary containing any keyworded argument to pass to + :class:`quapy.model_selection.GridSearchQ` + :param kwargs: kwargs for the class :class:`Ensemble` + :return: an instance of :class:`Ensemble` + """ + + return ensembleFactory(classifier, CC, param_grid, optim, param_mod_sel, **kwargs)
+ + + +
+[docs] +def EACC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs): + """ + Implements an ensemble of :class:`quapy.method.aggregative.ACC` quantifiers, as used by + `Pérez-Gállego et al., 2019 <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_. + + Equivalent to: + + >>> ensembleFactory(classifier, ACC, param_grid, optim, param_mod_sel, **kwargs) + + See :meth:`ensembleFactory` for further details. + + :param classifier: sklearn's Estimator that generates a classifier + :param param_grid: a dictionary with the grid of parameters to optimize for + :param optim: a valid quantification or classification error, or a string name of it + :param param_model_sel: a dictionary containing any keyworded argument to pass to + :class:`quapy.model_selection.GridSearchQ` + :param kwargs: kwargs for the class :class:`Ensemble` + :return: an instance of :class:`Ensemble` + """ + + return ensembleFactory(classifier, ACC, param_grid, optim, param_mod_sel, **kwargs)
+ + + +
+[docs] +def EPACC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs): + """ + Implements an ensemble of :class:`quapy.method.aggregative.PACC` quantifiers. + + Equivalent to: + + >>> ensembleFactory(classifier, PACC, param_grid, optim, param_mod_sel, **kwargs) + + See :meth:`ensembleFactory` for further details. + + :param classifier: sklearn's Estimator that generates a classifier + :param param_grid: a dictionary with the grid of parameters to optimize for + :param optim: a valid quantification or classification error, or a string name of it + :param param_model_sel: a dictionary containing any keyworded argument to pass to + :class:`quapy.model_selection.GridSearchQ` + :param kwargs: kwargs for the class :class:`Ensemble` + :return: an instance of :class:`Ensemble` + """ + + return ensembleFactory(classifier, PACC, param_grid, optim, param_mod_sel, **kwargs)
+ + + +
+[docs] +def EHDy(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs): + """ + Implements an ensemble of :class:`quapy.method.aggregative.HDy` quantifiers, as used by + `Pérez-Gállego et al., 2019 <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_. + + Equivalent to: + + >>> ensembleFactory(classifier, HDy, param_grid, optim, param_mod_sel, **kwargs) + + See :meth:`ensembleFactory` for further details. + + :param classifier: sklearn's Estimator that generates a classifier + :param param_grid: a dictionary with the grid of parameters to optimize for + :param optim: a valid quantification or classification error, or a string name of it + :param param_model_sel: a dictionary containing any keyworded argument to pass to + :class:`quapy.model_selection.GridSearchQ` + :param kwargs: kwargs for the class :class:`Ensemble` + :return: an instance of :class:`Ensemble` + """ + + return ensembleFactory(classifier, HDy, param_grid, optim, param_mod_sel, **kwargs)
+ + + +
+[docs] +def EEMQ(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs): + """ + Implements an ensemble of :class:`quapy.method.aggregative.EMQ` quantifiers. + + Equivalent to: + + >>> ensembleFactory(classifier, EMQ, param_grid, optim, param_mod_sel, **kwargs) + + See :meth:`ensembleFactory` for further details. + + :param classifier: sklearn's Estimator that generates a classifier + :param param_grid: a dictionary with the grid of parameters to optimize for + :param optim: a valid quantification or classification error, or a string name of it + :param param_model_sel: a dictionary containing any keyworded argument to pass to + :class:`quapy.model_selection.GridSearchQ` + :param kwargs: kwargs for the class :class:`Ensemble` + :return: an instance of :class:`Ensemble` + """ + + return ensembleFactory(classifier, EMQ, param_grid, optim, param_mod_sel, **kwargs)
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/method/non_aggregative.html b/docs/build/html/_modules/quapy/method/non_aggregative.html new file mode 100644 index 0000000..3363c35 --- /dev/null +++ b/docs/build/html/_modules/quapy/method/non_aggregative.html @@ -0,0 +1,286 @@ + + + + + + quapy.method.non_aggregative — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.method.non_aggregative

+from typing import Union, Callable
+import numpy as np
+
+from quapy.functional import get_divergence
+from quapy.data import LabelledCollection
+from quapy.method.base import BaseQuantifier, BinaryQuantifier
+import quapy.functional as F
+
+
+
+[docs] +class MaximumLikelihoodPrevalenceEstimation(BaseQuantifier): + """ + The `Maximum Likelihood Prevalence Estimation` (MLPE) method is a lazy method that assumes there is no prior + probability shift between training and test instances (put it other way, that the i.i.d. assumpion holds). + The estimation of class prevalence values for any test sample is always (i.e., irrespective of the test sample + itself) the class prevalence seen during training. This method is considered to be a lower-bound quantifier that + any quantification method should beat. + """ + + def __init__(self): + self._classes_ = None + +
+[docs] + def fit(self, data: LabelledCollection): + """ + Computes the training prevalence and stores it. + + :param data: the training sample + :return: self + """ + self.estimated_prevalence = data.prevalence() + return self
+ + +
+[docs] + def quantify(self, instances): + """ + Ignores the input instances and returns, as the class prevalence estimantes, the training prevalence. + + :param instances: array-like (ignored) + :return: the class prevalence seen during training + """ + return self.estimated_prevalence
+
+ + + +
+[docs] +class DMx(BaseQuantifier): + """ + Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of covariates. + This implementation takes the number of bins, the divergence, and the possibility to work on CDF as hyperparameters. + + :param nbins: number of bins used to discretize the distributions (default 8) + :param divergence: a string representing a divergence measure (currently, "HD" and "topsoe" are implemented) + or a callable function taking two ndarrays of the same dimension as input (default "HD", meaning Hellinger + Distance) + :param cdf: whether to use CDF instead of PDF (default False) + :param n_jobs: number of parallel workers (default None) + """ + + def __init__(self, nbins=8, divergence: Union[str, Callable]='HD', cdf=False, search='optim_minimize', n_jobs=None): + self.nbins = nbins + self.divergence = divergence + self.cdf = cdf + self.search = search + self.n_jobs = n_jobs + +
+[docs] + @classmethod + def HDx(cls, n_jobs=None): + """ + `Hellinger Distance x <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDx). + HDx is a method for training binary quantifiers, that models quantification as the problem of + minimizing the average divergence (in terms of the Hellinger Distance) across the feature-specific normalized + histograms of two representations, one for the unlabelled examples, and another generated from the training + examples as a mixture model of the class-specific representations. The parameters of the mixture thus represent + the estimates of the class prevalence values. + + The method computes all matchings for nbins in [10, 20, ..., 110] and reports the mean of the median. + The best prevalence is searched via linear search, from 0 to 1 stepping by 0.01. + + :param n_jobs: number of parallel workers + :return: an instance of this class setup to mimick the performance of the HDx as originally proposed by + González-Castro, Alaiz-Rodríguez, Alegre (2013) + """ + from quapy.method.meta import MedianEstimator + + dmx = DMx(divergence='HD', cdf=False, search='linear_search') + nbins = {'nbins': np.linspace(10, 110, 11, dtype=int)} + hdx = MedianEstimator(base_quantifier=dmx, param_grid=nbins, n_jobs=n_jobs) + return hdx
+ + + def __get_distributions(self, X): + + histograms = [] + for feat_idx in range(self.nfeats): + feature = X[:, feat_idx] + feat_range = self.feat_ranges[feat_idx] + hist = np.histogram(feature, bins=self.nbins, range=feat_range)[0] + norm_hist = hist / hist.sum() + histograms.append(norm_hist) + distributions = np.vstack(histograms) + + if self.cdf: + distributions = np.cumsum(distributions, axis=1) + + return distributions + +
+[docs] + def fit(self, data: LabelledCollection): + """ + Generates the validation distributions out of the training data (covariates). + The validation distributions have shape `(n, nfeats, nbins)`, with `n` the number of classes, `nfeats` + the number of features, and `nbins` the number of bins. + In particular, let `V` be the validation distributions; then `di=V[i]` are the distributions obtained from + training data labelled with class `i`; while `dij = di[j]` is the discrete distribution for feature j in + training data labelled with class `i`, and `dij[k]` is the fraction of instances with a value in the `k`-th bin. + + :param data: the training set + """ + X, y = data.Xy + + self.nfeats = X.shape[1] + self.feat_ranges = _get_features_range(X) + + self.validation_distribution = np.asarray( + [self.__get_distributions(X[y==cat]) for cat in range(data.n_classes)] + ) + + return self
+ + +
+[docs] + def quantify(self, instances): + """ + Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution + (the mixture) that best matches the test distribution, in terms of the divergence measure of choice. + The matching is computed as the average dissimilarity (in terms of the dissimilarity measure of choice) + between all feature-specific discrete distributions. + + :param instances: instances in the sample + :return: a vector of class prevalence estimates + """ + + assert instances.shape[1] == self.nfeats, f'wrong shape; expected {self.nfeats}, found {instances.shape[1]}' + + test_distribution = self.__get_distributions(instances) + divergence = get_divergence(self.divergence) + n_classes, n_feats, nbins = self.validation_distribution.shape + def loss(prev): + prev = np.expand_dims(prev, axis=0) + mixture_distribution = (prev @ self.validation_distribution.reshape(n_classes,-1)).reshape(n_feats, -1) + divs = [divergence(test_distribution[feat], mixture_distribution[feat]) for feat in range(n_feats)] + return np.mean(divs) + + return F.argmin_prevalence(loss, n_classes, method=self.search)
+
+ + + + +def _get_features_range(X): + feat_ranges = [] + ncols = X.shape[1] + for col_idx in range(ncols): + feature = X[:,col_idx] + feat_ranges.append((np.min(feature), np.max(feature))) + return feat_ranges + + +#--------------------------------------------------------------- +# aliases +#--------------------------------------------------------------- + +DistributionMatchingX = DMx +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/model_selection.html b/docs/build/html/_modules/quapy/model_selection.html new file mode 100644 index 0000000..84fd962 --- /dev/null +++ b/docs/build/html/_modules/quapy/model_selection.html @@ -0,0 +1,554 @@ + + + + + + quapy.model_selection — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.model_selection

+import itertools
+import signal
+from copy import deepcopy
+from enum import Enum
+from typing import Union, Callable
+from functools import wraps
+
+import numpy as np
+from sklearn import clone
+
+import quapy as qp
+from quapy import evaluation
+from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol
+from quapy.data.base import LabelledCollection
+from quapy.method.aggregative import BaseQuantifier, AggregativeQuantifier
+from quapy.util import timeout
+from time import time
+
+
+
+[docs] +class Status(Enum): + SUCCESS = 1 + TIMEOUT = 2 + INVALID = 3 + ERROR = 4
+ + + +
+[docs] +class ConfigStatus: + def __init__(self, params, status, msg=''): + self.params = params + self.status = status + self.msg = msg + + def __str__(self): + return f':params:{self.params} :status:{self.status} ' + self.msg + + def __repr__(self): + return str(self) + +
+[docs] + def success(self): + return self.status == Status.SUCCESS
+ + +
+[docs] + def failed(self): + return self.status != Status.SUCCESS
+
+ + + +
+[docs] +class GridSearchQ(BaseQuantifier): + """Grid Search optimization targeting a quantification-oriented metric. + + Optimizes the hyperparameters of a quantification method, based on an evaluation method and on an evaluation + protocol for quantification. + + :param model: the quantifier to optimize + :type model: BaseQuantifier + :param param_grid: a dictionary with keys the parameter names and values the list of values to explore + :param protocol: a sample generation protocol, an instance of :class:`quapy.protocol.AbstractProtocol` + :param error: an error function (callable) or a string indicating the name of an error function (valid ones + are those in :class:`quapy.error.QUANTIFICATION_ERROR` + :param refit: whether to refit the model on the whole labelled collection (training+validation) with + the best chosen hyperparameter combination. Ignored if protocol='gen' + :param timeout: establishes a timer (in seconds) for each of the hyperparameters configurations being tested. + Whenever a run takes longer than this timer, that configuration will be ignored. If all configurations end up + being ignored, a TimeoutError exception is raised. If -1 (default) then no time bound is set. + :param raise_errors: boolean, if True then raises an exception when a param combination yields any error, if + otherwise is False (default), then the combination is marked with an error status, but the process goes on. + However, if no configuration yields a valid model, then a ValueError exception will be raised. + :param verbose: set to True to get information through the stdout + """ + + def __init__(self, + model: BaseQuantifier, + param_grid: dict, + protocol: AbstractProtocol, + error: Union[Callable, str] = qp.error.mae, + refit=True, + timeout=-1, + n_jobs=None, + raise_errors=False, + verbose=False): + + self.model = model + self.param_grid = param_grid + self.protocol = protocol + self.refit = refit + self.timeout = timeout + self.n_jobs = qp._get_njobs(n_jobs) + self.raise_errors = raise_errors + self.verbose = verbose + self.__check_error(error) + assert isinstance(protocol, AbstractProtocol), 'unknown protocol' + + def _sout(self, msg): + if self.verbose: + print(f'[{self.__class__.__name__}:{self.model.__class__.__name__}]: {msg}') + + def __check_error(self, error): + if error in qp.error.QUANTIFICATION_ERROR: + self.error = error + elif isinstance(error, str): + self.error = qp.error.from_name(error) + elif hasattr(error, '__call__'): + self.error = error + else: + raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n' + f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}') + + def _prepare_classifier(self, cls_params): + model = deepcopy(self.model) + + def job(cls_params): + model.set_params(**cls_params) + predictions = model.classifier_fit_predict(self._training) + return predictions + + predictions, status, took = self._error_handler(job, cls_params) + self._sout(f'[classifier fit] hyperparams={cls_params} [took {took:.3f}s]') + return model, predictions, status, took + + def _prepare_aggregation(self, args): + model, predictions, cls_took, cls_params, q_params = args + model = deepcopy(model) + params = {**cls_params, **q_params} + + def job(q_params): + model.set_params(**q_params) + model.aggregation_fit(predictions, self._training) + score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error) + return score + + score, status, aggr_took = self._error_handler(job, q_params) + self._print_status(params, score, status, aggr_took) + return model, params, score, status, (cls_took+aggr_took) + + def _prepare_nonaggr_model(self, params): + model = deepcopy(self.model) + + def job(params): + model.set_params(**params) + model.fit(self._training) + score = evaluation.evaluate(model, protocol=self.protocol, error_metric=self.error) + return score + + score, status, took = self._error_handler(job, params) + self._print_status(params, score, status, took) + return model, params, score, status, took + + def _break_down_fit(self): + """ + Decides whether to break down the fit phase in two (classifier-fit followed by aggregation-fit). + In order to do so, some conditions should be met: a) the quantifier is of type aggregative, + b) the set of hyperparameters can be split into two disjoint non-empty groups. + + :return: True if the conditions are met, False otherwise + """ + if not isinstance(self.model, AggregativeQuantifier): + return False + cls_configs, q_configs = group_params(self.param_grid) + if (len(cls_configs) == 1) or (len(q_configs)==1): + return False + return True + + def _compute_scores_aggregative(self, training): + # break down the set of hyperparameters into two: classifier-specific, quantifier-specific + cls_configs, q_configs = group_params(self.param_grid) + + # train all classifiers and get the predictions + self._training = training + cls_outs = qp.util.parallel( + self._prepare_classifier, + cls_configs, + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs + ) + + # filter out classifier configurations that yielded any error + success_outs = [] + for (model, predictions, status, took), cls_config in zip(cls_outs, cls_configs): + if status.success(): + success_outs.append((model, predictions, took, cls_config)) + else: + self.error_collector.append(status) + + if len(success_outs) == 0: + raise ValueError('No valid configuration found for the classifier!') + + # explore the quantifier-specific hyperparameters for each valid training configuration + aggr_configs = [(*out, q_config) for out, q_config in itertools.product(success_outs, q_configs)] + aggr_outs = qp.util.parallel( + self._prepare_aggregation, + aggr_configs, + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs + ) + + return aggr_outs + + def _compute_scores_nonaggregative(self, training): + configs = expand_grid(self.param_grid) + self._training = training + scores = qp.util.parallel( + self._prepare_nonaggr_model, + configs, + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs + ) + return scores + + def _print_status(self, params, score, status, took): + if status.success(): + self._sout(f'hyperparams=[{params}]\t got {self.error.__name__} = {score:.5f} [took {took:.3f}s]') + else: + self._sout(f'error={status}') + +
+[docs] + def fit(self, training: LabelledCollection): + """ Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing + the error metric. + + :param training: the training set on which to optimize the hyperparameters + :return: self + """ + + if self.refit and not isinstance(self.protocol, OnLabelledCollectionProtocol): + raise RuntimeWarning( + f'"refit" was requested, but the protocol does not implement ' + f'the {OnLabelledCollectionProtocol.__name__} interface' + ) + + tinit = time() + + self.error_collector = [] + + self._sout(f'starting model selection with n_jobs={self.n_jobs}') + if self._break_down_fit(): + results = self._compute_scores_aggregative(training) + else: + results = self._compute_scores_nonaggregative(training) + + self.param_scores_ = {} + self.best_score_ = None + for model, params, score, status, took in results: + if status.success(): + if self.best_score_ is None or score < self.best_score_: + self.best_score_ = score + self.best_params_ = params + self.best_model_ = model + self.param_scores_[str(params)] = score + else: + self.param_scores_[str(params)] = status.status + self.error_collector.append(status) + + tend = time()-tinit + + if self.best_score_ is None: + raise ValueError('no combination of hyperparameters seemed to work') + + self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) ' + f'[took {tend:.4f}s]') + + no_errors = len(self.error_collector) + if no_errors>0: + self._sout(f'warning: {no_errors} errors found') + for err in self.error_collector: + self._sout(f'\t{str(err)}') + + if self.refit: + if isinstance(self.protocol, OnLabelledCollectionProtocol): + tinit = time() + self._sout(f'refitting on the whole development set') + self.best_model_.fit(training + self.protocol.get_labelled_collection()) + tend = time() - tinit + self.refit_time_ = tend + else: + # already checked + raise RuntimeWarning(f'the model cannot be refit on the whole dataset') + + return self
+ + +
+[docs] + def quantify(self, instances): + """Estimate class prevalence values using the best model found after calling the :meth:`fit` method. + + :param instances: sample contanining the instances + :return: a ndarray of shape `(n_classes)` with class prevalence estimates as according to the best model found + by the model selection process. + """ + assert hasattr(self, 'best_model_'), 'quantify called before fit' + return self.best_model().quantify(instances)
+ + +
+[docs] + def set_params(self, **parameters): + """Sets the hyper-parameters to explore. + + :param parameters: a dictionary with keys the parameter names and values the list of values to explore + """ + self.param_grid = parameters
+ + +
+[docs] + def get_params(self, deep=True): + """Returns the dictionary of hyper-parameters to explore (`param_grid`) + + :param deep: Unused + :return: the dictionary `param_grid` + """ + return self.param_grid
+ + +
+[docs] + def best_model(self): + """ + Returns the best model found after calling the :meth:`fit` method, i.e., the one trained on the combination + of hyper-parameters that minimized the error function. + + :return: a trained quantifier + """ + if hasattr(self, 'best_model_'): + return self.best_model_ + raise ValueError('best_model called before fit')
+ + + def _error_handler(self, func, params): + """ + Endorses one job with two returned values: the status, and the time of execution + + :param func: the function to be called + :param params: parameters of the function + :return: `tuple(out, status, time)` where `out` is the function output, + `status` is an enum value from `Status`, and `time` is the time it + took to complete the call + """ + + output = None + + def _handle(status, exception): + if self.raise_errors: + raise exception + else: + return ConfigStatus(params, status) + + try: + with timeout(self.timeout): + tinit = time() + output = func(params) + status = ConfigStatus(params, Status.SUCCESS) + + except TimeoutError as e: + status = _handle(Status.TIMEOUT, e) + + except ValueError as e: + status = _handle(Status.INVALID, e) + + except Exception as e: + status = _handle(Status.ERROR, e) + + took = time() - tinit + return output, status, took
+ + + +
+[docs] +def cross_val_predict(quantifier: BaseQuantifier, data: LabelledCollection, nfolds=3, random_state=0): + """ + Akin to `scikit-learn's cross_val_predict <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_predict.html>`_ + but for quantification. + + :param quantifier: a quantifier issuing class prevalence values + :param data: a labelled collection + :param nfolds: number of folds for k-fold cross validation generation + :param random_state: random seed for reproducibility + :return: a vector of class prevalence values + """ + + total_prev = np.zeros(shape=data.n_classes) + + for train, test in data.kFCV(nfolds=nfolds, random_state=random_state): + quantifier.fit(train) + fold_prev = quantifier.quantify(test.X) + rel_size = 1. * len(test) / len(data) + total_prev += fold_prev*rel_size + + return total_prev
+ + + +
+[docs] +def expand_grid(param_grid: dict): + """ + Expands a param_grid dictionary as a list of configurations. + Example: + + >>> combinations = expand_grid({'A': [1, 10, 100], 'B': [True, False]}) + >>> print(combinations) + >>> [{'A': 1, 'B': True}, {'A': 1, 'B': False}, {'A': 10, 'B': True}, {'A': 10, 'B': False}, {'A': 100, 'B': True}, {'A': 100, 'B': False}] + + :param param_grid: dictionary with keys representing hyper-parameter names, and values representing the range + to explore for that hyper-parameter + :return: a list of configurations, i.e., combinations of hyper-parameter assignments in the grid. + """ + params_keys = list(param_grid.keys()) + params_values = list(param_grid.values()) + configs = [{k: combs[i] for i, k in enumerate(params_keys)} for combs in itertools.product(*params_values)] + return configs
+ + + +
+[docs] +def group_params(param_grid: dict): + """ + Partitions a param_grid dictionary as two lists of configurations, one for the classifier-specific + hyper-parameters, and another for que quantifier-specific hyper-parameters + + :param param_grid: dictionary with keys representing hyper-parameter names, and values representing the range + to explore for that hyper-parameter + :return: two expanded grids of configurations, one for the classifier, another for the quantifier + """ + classifier_params, quantifier_params = {}, {} + for key, values in param_grid.items(): + if key.startswith('classifier__') or key == 'val_split': + classifier_params[key] = values + else: + quantifier_params[key] = values + + classifier_configs = expand_grid(classifier_params) + quantifier_configs = expand_grid(quantifier_params) + + return classifier_configs, quantifier_configs
+ + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/plot.html b/docs/build/html/_modules/quapy/plot.html new file mode 100644 index 0000000..79179a1 --- /dev/null +++ b/docs/build/html/_modules/quapy/plot.html @@ -0,0 +1,687 @@ + + + + + + quapy.plot — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.plot

+from collections import defaultdict
+import matplotlib.pyplot as plt
+from matplotlib.cm import get_cmap
+import numpy as np
+from matplotlib import cm
+from scipy.stats import ttest_ind_from_stats
+from matplotlib.ticker import ScalarFormatter
+import math
+
+import quapy as qp
+
+plt.rcParams['figure.figsize'] = [10, 6]
+plt.rcParams['figure.dpi'] = 200
+plt.rcParams['font.size'] = 18
+
+
+
+[docs] +def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, show_std=True, legend=True, + train_prev=None, savepath=None, method_order=None): + """ + The diagonal plot displays the predicted prevalence values (along the y-axis) as a function of the true prevalence + values (along the x-axis). The optimal quantifier is described by the diagonal (0,0)-(1,1) of the plot (hence the + name). It is convenient for binary quantification problems, though it can be used for multiclass problems by + indicating which class is to be taken as the positive class. (For multiclass quantification problems, other plots + like the :meth:`error_by_drift` might be preferable though). + + :param method_names: array-like with the method names for each experiment + :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for + each experiment + :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components) + for each experiment + :param pos_class: index of the positive class + :param title: the title to be displayed in the plot + :param show_std: whether or not to show standard deviations (represented by color bands). This might be inconvenient + for cases in which many methods are compared, or when the standard deviations are high -- default True) + :param legend: whether or not to display the leyend (default True) + :param train_prev: if indicated (default is None), the training prevalence (for the positive class) is hightlighted + in the plot. This is convenient when all the experiments have been conducted in the same dataset. + :param savepath: path where to save the plot. If not indicated (as default), the plot is shown. + :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e., + listed in the legend and associated with matplotlib colors). + """ + fig, ax = plt.subplots() + ax.set_aspect('equal') + ax.grid() + ax.plot([0, 1], [0, 1], '--k', label='ideal', zorder=1) + + method_names, true_prevs, estim_prevs = _merge(method_names, true_prevs, estim_prevs) + + order = list(zip(method_names, true_prevs, estim_prevs)) + if method_order is not None: + table = {method_name:[true_prev, estim_prev] for method_name, true_prev, estim_prev in order} + order = [(method_name, *table[method_name]) for method_name in method_order] + + NUM_COLORS = len(method_names) + if NUM_COLORS>10: + cm = plt.get_cmap('tab20') + ax.set_prop_cycle(color=[cm(1. * i / NUM_COLORS) for i in range(NUM_COLORS)]) + for method, true_prev, estim_prev in order: + true_prev = true_prev[:,pos_class] + estim_prev = estim_prev[:,pos_class] + + x_ticks = np.unique(true_prev) + x_ticks.sort() + y_ave = np.asarray([estim_prev[true_prev == x].mean() for x in x_ticks]) + y_std = np.asarray([estim_prev[true_prev == x].std() for x in x_ticks]) + + ax.errorbar(x_ticks, y_ave, fmt='-', marker='o', label=method, markersize=3, zorder=2) + if show_std: + ax.fill_between(x_ticks, y_ave - y_std, y_ave + y_std, alpha=0.25) + + if train_prev is not None: + train_prev = train_prev[pos_class] + ax.scatter(train_prev, train_prev, c='c', label='tr-prev', linewidth=2, edgecolor='k', s=100, zorder=3) + + ax.set(xlabel='true prevalence', ylabel='estimated prevalence', title=title) + ax.set_ylim(0, 1) + ax.set_xlim(0, 1) + + if legend: + ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) + # box = ax.get_position() + # ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) + # ax.legend(loc='lower center', + # bbox_to_anchor=(1, -0.5), + # ncol=(len(method_names)+1)//2) + + _save_or_show(savepath)
+ + + +
+[docs] +def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None): + """ + Box-plots displaying the global bias (i.e., signed error computed as the estimated value minus the true value) + for each quantification method with respect to a given positive class. + + :param method_names: array-like with the method names for each experiment + :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for + each experiment + :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components) + for each experiment + :param pos_class: index of the positive class + :param title: the title to be displayed in the plot + :param savepath: path where to save the plot. If not indicated (as default), the plot is shown. + """ + + method_names, true_prevs, estim_prevs = _merge(method_names, true_prevs, estim_prevs) + + fig, ax = plt.subplots() + ax.grid() + + data, labels = [], [] + for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs): + true_prev = true_prev[:,pos_class] + estim_prev = estim_prev[:,pos_class] + data.append(estim_prev-true_prev) + labels.append(method) + + ax.boxplot(data, labels=labels, patch_artist=False, showmeans=True) + plt.xticks(rotation=45) + ax.set(ylabel='error bias', title=title) + + _save_or_show(savepath)
+ + + +
+[docs] +def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=None, nbins=5, colormap=cm.tab10, + vertical_xticks=False, legend=True, savepath=None): + """ + Box-plots displaying the local bias (i.e., signed error computed as the estimated value minus the true value) + for different bins of (true) prevalence of the positive classs, for each quantification method. + + :param method_names: array-like with the method names for each experiment + :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for + each experiment + :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components) + for each experiment + :param pos_class: index of the positive class + :param title: the title to be displayed in the plot + :param nbins: number of bins + :param colormap: the matplotlib colormap to use (default cm.tab10) + :param vertical_xticks: whether or not to add secondary grid (default is False) + :param legend: whether or not to display the legend (default is True) + :param savepath: path where to save the plot. If not indicated (as default), the plot is shown. + """ + from pylab import boxplot, plot, setp + + fig, ax = plt.subplots() + ax.grid() + + method_names, true_prevs, estim_prevs = _merge(method_names, true_prevs, estim_prevs) + + bins = np.linspace(0, 1, nbins+1) + binwidth = 1/nbins + data = {} + for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs): + true_prev = true_prev[:,pos_class] + estim_prev = estim_prev[:,pos_class] + + data[method] = [] + inds = np.digitize(true_prev, bins, right=True) + for ind in range(len(bins)): + selected = inds==ind + data[method].append(estim_prev[selected] - true_prev[selected]) + + nmethods = len(method_names) + boxwidth = binwidth/(nmethods+4) + for i,bin in enumerate(bins[:-1]): + boxdata = [data[method][i] for method in method_names] + positions = [bin+(i*boxwidth)+2*boxwidth for i,_ in enumerate(method_names)] + box = boxplot(boxdata, showmeans=False, positions=positions, widths = boxwidth, sym='+', patch_artist=True) + for boxid in range(len(method_names)): + c = colormap.colors[boxid%len(colormap.colors)] + setp(box['fliers'][boxid], color=c, marker='+', markersize=3., markeredgecolor=c) + setp(box['boxes'][boxid], color=c) + setp(box['medians'][boxid], color='k') + + major_xticks_positions, minor_xticks_positions = [], [] + major_xticks_labels, minor_xticks_labels = [], [] + for i,b in enumerate(bins[:-1]): + major_xticks_positions.append(b) + minor_xticks_positions.append(b + binwidth / 2) + major_xticks_labels.append('') + minor_xticks_labels.append(f'[{bins[i]:.2f}-{bins[i + 1]:.2f})') + ax.set_xticks(major_xticks_positions) + ax.set_xticks(minor_xticks_positions, minor=True) + ax.set_xticklabels(major_xticks_labels) + ax.set_xticklabels(minor_xticks_labels, minor=True, rotation='vertical' if vertical_xticks else 'horizontal') + + if vertical_xticks: + # Pad margins so that markers don't get clipped by the axes + plt.margins(0.2) + # Tweak spacing to prevent clipping of tick-labels + plt.subplots_adjust(bottom=0.15) + + if legend: + # adds the legend to the list hs, initialized with the "ideal" quantifier (one that has 0 bias across all bins. i.e. + # a line from (0,0) to (1,0). The other elements are simply labelled dot-plots that are to be removed (setting + # set_visible to False for all but the first element) after the legend has been placed + hs=[ax.plot([0, 1], [0, 0], '-k', zorder=2)[0]] + for colorid in range(len(method_names)): + color=colormap.colors[colorid % len(colormap.colors)] + h, = plot([0, 0], '-s', markerfacecolor=color, color='k',mec=color, linewidth=1.) + hs.append(h) + box = ax.get_position() + ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) + ax.legend(hs, ['ideal']+method_names, loc='center left', bbox_to_anchor=(1, 0.5)) + [h.set_visible(False) for h in hs[1:]] + + # x-axis and y-axis labels and limits + ax.set(xlabel='prevalence', ylabel='error bias', title=title) + ax.set_xlim(0, 1) + + _save_or_show(savepath)
+ + + +
+[docs] +def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, + n_bins=20, error_name='ae', show_std=False, + show_density=True, + show_legend=True, + logscale=False, + title=f'Quantification error as a function of distribution shift', + vlines=None, + method_order=None, + savepath=None): + """ + Plots the error (along the x-axis, as measured in terms of `error_name`) as a function of the train-test shift + (along the y-axis, as measured in terms of :meth:`quapy.error.ae`). This plot is useful especially for multiclass + problems, in which "diagonal plots" may be cumbersone, and in order to gain understanding about how methods + fare in different regions of the prior probability shift spectrum (e.g., in the low-shift regime vs. in the + high-shift regime). + + :param method_names: array-like with the method names for each experiment + :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for + each experiment + :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components) + for each experiment + :param tr_prevs: training prevalence of each experiment + :param n_bins: number of bins in which the y-axis is to be divided (default is 20) + :param error_name: a string representing the name of an error function (as defined in `quapy.error`, default is "ae") + :param show_std: whether or not to show standard deviations as color bands (default is False) + :param show_density: whether or not to display the distribution of experiments for each bin (default is True) + :param show_density: whether or not to display the legend of the chart (default is True) + :param logscale: whether or not to log-scale the y-error measure (default is False) + :param title: title of the plot (default is "Quantification error as a function of distribution shift") + :param vlines: array-like list of values (default is None). If indicated, highlights some regions of the space + using vertical dotted lines. + :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e., + listed in the legend and associated with matplotlib colors). + :param savepath: path where to save the plot. If not indicated (as default), the plot is shown. + """ + + fig, ax = plt.subplots() + ax.grid() + + x_error = qp.error.ae + y_error = getattr(qp.error, error_name) + + # get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same + # order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to + # x_error function) and 'y' is the estim-test shift (computed as according to y_error) + data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order) + + if method_order is None: + method_order = method_names + + _set_colors(ax, n_methods=len(method_order)) + + bins = np.linspace(0, 1, n_bins+1) + binwidth = 1 / n_bins + min_x, max_x, min_y, max_y = None, None, None, None + npoints = np.zeros(len(bins), dtype=float) + for method in method_order: + tr_test_drifts = data[method]['x'] + method_drifts = data[method]['y'] + if logscale: + ax.set_yscale("log") + ax.yaxis.set_major_formatter(ScalarFormatter()) + ax.yaxis.get_major_formatter().set_scientific(False) + ax.minorticks_off() + + inds = np.digitize(tr_test_drifts, bins, right=True) + + xs, ys, ystds = [], [], [] + for p,ind in enumerate(range(len(bins))): + selected = inds==ind + if selected.sum() > 0: + xs.append(ind*binwidth-binwidth/2) + ys.append(np.mean(method_drifts[selected])) + ystds.append(np.std(method_drifts[selected])) + npoints[p] += len(method_drifts[selected]) + + xs = np.asarray(xs) + ys = np.asarray(ys) + ystds = np.asarray(ystds) + + min_x_method, max_x_method, min_y_method, max_y_method = xs.min(), xs.max(), ys.min(), ys.max() + min_x = min_x_method if min_x is None or min_x_method < min_x else min_x + max_x = max_x_method if max_x is None or max_x_method > max_x else max_x + max_y = max_y_method if max_y is None or max_y_method > max_y else max_y + min_y = min_y_method if min_y is None or min_y_method < min_y else min_y + max_y = max_y_method if max_y is None or max_y_method > max_y else max_y + + ax.errorbar(xs, ys, fmt='-', marker='o', color='w', markersize=8, linewidth=4, zorder=1) + ax.errorbar(xs, ys, fmt='-', marker='o', label=method, markersize=6, linewidth=2, zorder=2) + + if show_std: + ax.fill_between(xs, ys-ystds, ys+ystds, alpha=0.25) + + if show_density: + ax2 = ax.twinx() + densities = npoints/np.sum(npoints) + ax2.bar([ind * binwidth-binwidth/2 for ind in range(len(bins))], + densities, alpha=0.15, color='g', width=binwidth, label='density') + ax2.set_ylim(0,max(densities)) + ax2.spines['right'].set_color('g') + ax2.tick_params(axis='y', colors='g') + + ax.set(xlabel=f'Distribution shift between training set and test sample', + ylabel=f'{error_name.upper()} (true distribution, predicted distribution)', + title=title) + box = ax.get_position() + ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) + if vlines: + for vline in vlines: + ax.axvline(vline, 0, 1, linestyle='--', color='k') + + ax.set_xlim(min_x, max_x) + if logscale: + #nice scale for the logaritmic axis + ax.set_ylim(0,10 ** math.ceil(math.log10(max_y))) + + + if show_legend: + fig.legend(loc='lower center', + bbox_to_anchor=(1, 0.5), + ncol=(len(method_names)+1)//2) + + _save_or_show(savepath)
+ + + +
+[docs] +def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, + n_bins=20, binning='isomerous', + x_error='ae', y_error='ae', ttest_alpha=0.005, tail_density_threshold=0.005, + method_order=None, + savepath=None): + """ + Displays (only) the top performing methods for different regions of the train-test shift in form of a broken + bar chart, in which each method has bars only for those regions in which either one of the following conditions + hold: (i) it is the best method (in average) for the bin, or (ii) it is not statistically significantly different + (in average) as according to a two-sided t-test on independent samples at confidence `ttest_alpha`. + The binning can be made "isometric" (same size), or "isomerous" (same number of experiments -- default). A second + plot is displayed on top, that displays the distribution of experiments for each bin (when binning="isometric") or + the percentiles points of the distribution (when binning="isomerous"). + + :param method_names: array-like with the method names for each experiment + :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for + each experiment + :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components) + for each experiment + :param tr_prevs: training prevalence of each experiment + :param n_bins: number of bins in which the y-axis is to be divided (default is 20) + :param binning: type of binning, either "isomerous" (default) or "isometric" + :param x_error: a string representing the name of an error function (as defined in `quapy.error`) to be used for + measuring the amount of train-test shift (default is "ae") + :param y_error: a string representing the name of an error function (as defined in `quapy.error`) to be used for + measuring the amount of error in the prevalence estimations (default is "ae") + :param ttest_alpha: the confidence interval above which a p-value (two-sided t-test on independent samples) is + to be considered as an indicator that the two means are not statistically significantly different. Default is + 0.005, meaning that a `p-value > 0.005` indicates the two methods involved are to be considered similar + :param tail_density_threshold: sets a threshold on the density of experiments (over the total number of experiments) + below which a bin in the tail (i.e., the right-most ones) will be discarded. This is in order to avoid some + bins to be shown for train-test outliers. + :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e., + listed in the legend and associated with matplotlib colors). + :param savepath: path where to save the plot. If not indicated (as default), the plot is shown. + :return: + """ + assert binning in ['isomerous', 'isometric'], 'unknown binning type; valid types are "isomerous" and "isometric"' + + x_error = getattr(qp.error, x_error) + y_error = getattr(qp.error, y_error) + + # get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same + # order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to + # x_error function) and 'y' is the estim-test shift (computed as according to y_error) + data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order) + + if method_order is None: + method_order = method_names + + if binning == 'isomerous': + # take bins containing the same amount of examples + tr_test_drifts = np.concatenate([data[m]['x'] for m in method_order]) + bins = np.quantile(tr_test_drifts, q=np.linspace(0, 1, n_bins+1)).flatten() + else: + # take equidistant bins + bins = np.linspace(0, 1, n_bins+1) + bins[0] = -0.001 + bins[-1] += 0.001 + + # we use this to keep track of how many datapoits contribute to each bin + inds_histogram_global = np.zeros(n_bins, dtype=float) + n_methods = len(method_order) + buckets = np.zeros(shape=(n_methods, n_bins, 3)) + for i, method in enumerate(method_order): + tr_test_drifts = data[method]['x'] + method_drifts = data[method]['y'] + + inds = np.digitize(tr_test_drifts, bins, right=False) + inds_histogram_global += np.histogram(tr_test_drifts, density=False, bins=bins)[0] + + for j in range(len(bins)): + selected = inds == j + if selected.sum() > 0: + buckets[i, j-1, 0] = np.mean(method_drifts[selected]) + buckets[i, j-1, 1] = np.std(method_drifts[selected]) + buckets[i, j-1, 2] = selected.sum() + + # cancel last buckets with low density + histogram = inds_histogram_global / inds_histogram_global.sum() + for tail in reversed(range(len(histogram))): + if histogram[tail] < tail_density_threshold: + buckets[:,tail,2] = 0 + else: + break + + salient_methods = set() + best_methods = [] + for bucket in range(buckets.shape[1]): + nc = buckets[:, bucket, 2].sum() + if nc == 0: + best_methods.append([]) + continue + + order = np.argsort(buckets[:, bucket, 0]) + rank1 = order[0] + best_bucket_methods = [method_order[rank1]] + best_mean, best_std, best_nc = buckets[rank1, bucket, :] + for method_index in order[1:]: + method_mean, method_std, method_nc = buckets[method_index, bucket, :] + _, pval = ttest_ind_from_stats(best_mean, best_std, best_nc, method_mean, method_std, method_nc) + if pval > ttest_alpha: + best_bucket_methods.append(method_order[method_index]) + best_methods.append(best_bucket_methods) + salient_methods.update(best_bucket_methods) + print(best_bucket_methods) + + if binning=='isomerous': + fig, axes = plt.subplots(2, 1, gridspec_kw={'height_ratios': [0.2, 1]}, figsize=(20, len(salient_methods))) + else: + fig, axes = plt.subplots(2, 1, gridspec_kw={'height_ratios': [1, 1]}, figsize=(20, len(salient_methods))) + + ax = axes[1] + high_from = 0 + yticks, yticks_method_names = [], [] + color = get_cmap('Accent').colors + vlines = [] + bar_high = 1 + for method in [m for m in method_order if m in salient_methods]: + broken_paths = [] + path_start, path_end = None, None + for i, best_bucket_methods in enumerate(best_methods): + if method in best_bucket_methods: + if path_start is None: + path_start = bins[i] + path_end = bins[i+1]-path_start + else: + path_end += bins[i+1]-bins[i] + else: + if path_start is not None: + broken_paths.append(tuple((path_start, path_end))) + path_start, path_end = None, None + if path_start is not None: + broken_paths.append(tuple((path_start, path_end))) + + ax.broken_barh(broken_paths, (high_from, bar_high), facecolors=color[len(yticks_method_names)]) + yticks.append(high_from+bar_high/2) + high_from += bar_high + yticks_method_names.append(method) + for path_start, path_end in broken_paths: + vlines.extend([path_start, path_start+path_end]) + + vlines = np.unique(vlines) + vlines = sorted(vlines) + for v in vlines[1:-1]: + ax.axvline(x=v, color='k', linestyle='--') + + ax.set_ylim(0, high_from) + ax.set_xlim(vlines[0], vlines[-1]) + ax.set_xlabel('Distribution shift between training set and sample') + + ax.set_yticks(yticks) + ax.set_yticklabels(yticks_method_names) + + # upper plot (explaining distribution) + ax = axes[0] + if binning == 'isometric': + # show the density for each region + bins[0]=0 + y_pos = [b+(bins[i+1]-b)/2 for i,b in enumerate(bins[:-1]) if histogram[i]>0] + bar_width = [bins[i+1]-bins[i] for i in range(len(bins[:-1])) if histogram[i]>0] + ax.bar(y_pos, [n for n in histogram if n>0], bar_width, align='center', alpha=0.5, color='silver') + ax.set_ylabel('shift\ndistribution', rotation=0, ha='right', va='center') + ax.set_xlim(vlines[0], vlines[-1]) + ax.get_xaxis().set_visible(False) + plt.subplots_adjust(wspace=0, hspace=0.1) + else: + # show the percentiles of the distribution + cumsum = np.cumsum(histogram) + for i in range(len(bins[:-1])): + start, width = bins[i], bins[i+1]-bins[i] + ax.broken_barh([tuple((start, width))], (0, 1), facecolors='whitesmoke' if i%2==0 else 'silver') + if i < len(bins)-2: + ax.text(bins[i+1], 0.5, '$P_{'+f'{int(np.round(cumsum[i]*100))}'+'}$', ha='center') + ax.set_ylim(0, 1) + ax.set_xlim(vlines[0], vlines[-1]) + ax.get_yaxis().set_visible(False) + ax.get_xaxis().set_visible(False) + plt.subplots_adjust(wspace=0, hspace=0) + + _save_or_show(savepath)
+ + + +def _merge(method_names, true_prevs, estim_prevs): + ndims = true_prevs[0].shape[1] + data = defaultdict(lambda: {'true': np.empty(shape=(0, ndims)), 'estim': np.empty(shape=(0, ndims))}) + method_order=[] + for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs): + data[method]['true'] = np.concatenate([data[method]['true'], true_prev]) + data[method]['estim'] = np.concatenate([data[method]['estim'], estim_prev]) + if method not in method_order: + method_order.append(method) + true_prevs_ = [data[m]['true'] for m in method_order] + estim_prevs_ = [data[m]['estim'] for m in method_order] + return method_order, true_prevs_, estim_prevs_ + + +def _set_colors(ax, n_methods): + NUM_COLORS = n_methods + cm = plt.get_cmap('tab20') + ax.set_prop_cycle(color=[cm(1. * i / NUM_COLORS) for i in range(NUM_COLORS)]) + + +def _save_or_show(savepath): + # if savepath is specified, then saves the plot in that path; otherwise the plot is shown + if savepath is not None: + qp.util.create_parent_dir(savepath) + # plt.tight_layout() + plt.savefig(savepath, bbox_inches='tight') + else: + plt.show() + + +def _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order): + data = defaultdict(lambda: {'x': np.empty(shape=(0)), 'y': np.empty(shape=(0))}) + + if method_order is None: + method_order = [] + + for method, test_prevs_i, estim_prevs_i, tr_prev_i in zip(method_names, true_prevs, estim_prevs, tr_prevs): + tr_prev_i = np.repeat(tr_prev_i.reshape(1, -1), repeats=test_prevs_i.shape[0], axis=0) + + tr_test_drifts = x_error(test_prevs_i, tr_prev_i) + data[method]['x'] = np.concatenate([data[method]['x'], tr_test_drifts]) + + method_drifts = y_error(test_prevs_i, estim_prevs_i) + data[method]['y'] = np.concatenate([data[method]['y'], method_drifts]) + + if method not in method_order: + method_order.append(method) + + return data +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/protocol.html b/docs/build/html/_modules/quapy/protocol.html new file mode 100644 index 0000000..f0330c2 --- /dev/null +++ b/docs/build/html/_modules/quapy/protocol.html @@ -0,0 +1,692 @@ + + + + + + quapy.protocol — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.protocol

+from copy import deepcopy
+import quapy as qp
+import numpy as np
+import itertools
+from contextlib import ExitStack
+from abc import ABCMeta, abstractmethod
+from quapy.data import LabelledCollection
+import quapy.functional as F
+from os.path import exists
+from glob import glob
+
+
+
+[docs] +class AbstractProtocol(metaclass=ABCMeta): + """ + Abstract parent class for sample generation protocols. + """ + + @abstractmethod + def __call__(self): + """ + Implements the protocol. Yields one sample at a time along with its prevalence + + :return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances + and in which `prev` is an `nd.array` with the class prevalence values + """ + ... + +
+[docs] + def total(self): + """ + Indicates the total number of samples that the protocol generates. + + :return: The number of samples to generate if known, or `None` otherwise. + """ + return None
+
+ + + +
+[docs] +class IterateProtocol(AbstractProtocol): + """ + A very simple protocol which simply iterates over a list of previously generated samples + + :param samples: a list of :class:`quapy.data.base.LabelledCollection` + """ + def __init__(self, samples: [LabelledCollection]): + self.samples = samples + + def __call__(self): + """ + Yields one sample from the initial list at a time + + :return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances + and in which `prev` is an `nd.array` with the class prevalence values + """ + for sample in self.samples: + yield sample.Xp + +
+[docs] + def total(self): + """ + Returns the number of samples in this protocol + + :return: int + """ + return len(self.samples)
+
+ + + +
+[docs] +class AbstractStochasticSeededProtocol(AbstractProtocol): + """ + An `AbstractStochasticSeededProtocol` is a protocol that generates, via any random procedure (e.g., + via random sampling), sequences of :class:`quapy.data.base.LabelledCollection` samples. + The protocol abstraction enforces + the object to be instantiated using a seed, so that the sequence can be fully replicated. + In order to make this functionality possible, the classes extending this abstraction need to + implement only two functions, :meth:`samples_parameters` which generates all the parameters + needed for extracting the samples, and :meth:`sample` that, given some parameters as input, + deterministically generates a sample. + + :param random_state: the seed for allowing to replicate any sequence of samples. Default is 0, meaning that + the sequence will be consistent every time the protocol is called. + """ + + _random_state = -1 # means "not set" + + def __init__(self, random_state=0): + self.random_state = random_state + + @property + def random_state(self): + return self._random_state + + @random_state.setter + def random_state(self, random_state): + self._random_state = random_state + +
+[docs] + @abstractmethod + def samples_parameters(self): + """ + This function has to return all the necessary parameters to replicate the samples + + :return: a list of parameters, each of which serves to deterministically generate a sample + """ + ...
+ + +
+[docs] + @abstractmethod + def sample(self, params): + """ + Extract one sample determined by the given parameters + + :param params: all the necessary parameters to generate a sample + :return: one sample (the same sample has to be generated for the same parameters) + """ + ...
+ + + def __call__(self): + """ + Yields one sample at a time. The type of object returned depends on the `collator` function. The + default behaviour returns tuples of the form `(sample, prevalence)`. + + :return: a tuple `(sample, prevalence)` if return_type='sample_prev', or an instance of + :class:`qp.data.LabelledCollection` if return_type='labelled_collection' + """ + with ExitStack() as stack: + if self.random_state == -1: + raise ValueError('The random seed has never been initialized. ' + 'Set it to None not to impose replicability.') + if self.random_state is not None: + stack.enter_context(qp.util.temp_seed(self.random_state)) + for params in self.samples_parameters(): + yield self.collator(self.sample(params)) + +
+[docs] + def collator(self, sample, *args): + """ + The collator prepares the sample to accommodate the desired output format before returning the output. + This collator simply returns the sample as it is. Classes inheriting from this abstract class can + implement their custom collators. + + :param sample: the sample to be returned + :param args: additional arguments + :return: the sample adhering to a desired output format (in this case, the sample is returned as it is) + """ + return sample
+
+ + + +
+[docs] +class OnLabelledCollectionProtocol: + """ + Protocols that generate samples from a :class:`qp.data.LabelledCollection` object. + """ + + RETURN_TYPES = ['sample_prev', 'labelled_collection', 'index'] + +
+[docs] + def get_labelled_collection(self): + """ + Returns the labelled collection on which this protocol acts. + + :return: an object of type :class:`qp.data.LabelledCollection` + """ + return self.data
+ + +
+[docs] + def on_preclassified_instances(self, pre_classifications, in_place=False): + """ + Returns a copy of this protocol that acts on a modified version of the original + :class:`qp.data.LabelledCollection` in which the original instances have been replaced + with the outputs of a classifier for each instance. (This is convenient for speeding-up + the evaluation procedures for many samples, by pre-classifying the instances in advance.) + + :param pre_classifications: the predictions issued by a classifier, typically an array-like + with shape `(n_instances,)` when the classifier is a hard one, or with shape + `(n_instances, n_classes)` when the classifier is a probabilistic one. + :param in_place: whether or not to apply the modification in-place or in a new copy (default). + :return: a copy of this protocol + """ + assert len(pre_classifications) == len(self.data), \ + f'error: the pre-classified data has different shape ' \ + f'(expected {len(self.data)}, found {len(pre_classifications)})' + if in_place: + self.data.instances = pre_classifications + return self + else: + new = deepcopy(self) + return new.on_preclassified_instances(pre_classifications, in_place=True)
+ + +
+[docs] + @classmethod + def get_collator(cls, return_type='sample_prev'): + """ + Returns a collator function, i.e., a function that prepares the yielded data + + :param return_type: either 'sample_prev' (default) if the collator is requested to yield tuples of + `(sample, prevalence)`, or 'labelled_collection' when it is requested to yield instances of + :class:`qp.data.LabelledCollection` + :return: the collator function (a callable function that takes as input an instance of + :class:`qp.data.LabelledCollection`) + """ + assert return_type in cls.RETURN_TYPES, \ + f'unknown return type passed as argument; valid ones are {cls.RETURN_TYPES}' + if return_type=='sample_prev': + return lambda lc:lc.Xp + elif return_type=='labelled_collection': + return lambda lc:lc
+
+ + + +
+[docs] +class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): + """ + Implementation of the artificial prevalence protocol (APP). + The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g., + [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of + prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ..., + [1, 0, 0] prevalence values of size `sample_size` will be yielded). The number of samples for each valid + combination of prevalence values is indicated by `repeats`. + + :param data: a `LabelledCollection` from which the samples will be drawn + :param sample_size: integer, number of instances in each sample; if None (default) then it is taken from + qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised. + :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the + grid (default is 21) + :param repeats: number of copies for each valid prevalence vector (default is 10) + :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1 + :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples + will be the same every time the protocol is called) + :param sanity_check: int, raises an exception warning the user that the number of examples to be generated exceed + this number; set to None for skipping this check + :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or + to "labelled_collection" to get instead instances of LabelledCollection + """ + + def __init__(self, data: LabelledCollection, sample_size=None, n_prevalences=21, repeats=10, + smooth_limits_epsilon=0, random_state=0, sanity_check=10000, return_type='sample_prev'): + super(APP, self).__init__(random_state) + self.data = data + self.sample_size = qp._get_sample_size(sample_size) + self.n_prevalences = n_prevalences + self.repeats = repeats + self.smooth_limits_epsilon = smooth_limits_epsilon + if not ((isinstance(sanity_check, int) and sanity_check>0) or sanity_check is None): + raise ValueError('param "sanity_check" must either be None or a positive integer') + if isinstance(sanity_check, int): + n = F.num_prevalence_combinations(n_prevpoints=n_prevalences, n_classes=data.n_classes, n_repeats=repeats) + if n > sanity_check: + raise RuntimeError( + f"Abort: the number of samples that will be generated by {self.__class__.__name__} ({n}) " + f"exceeds the maximum number of allowed samples ({sanity_check = }). Set 'sanity_check' to " + f"None, or to a higher number, for bypassing this check.") + + self.collator = OnLabelledCollectionProtocol.get_collator(return_type) + +
+[docs] + def prevalence_grid(self): + """ + Generates vectors of prevalence values from an exhaustive grid of prevalence values. The + number of prevalence values explored for each dimension depends on `n_prevalences`, so that, if, for example, + `n_prevalences=11` then the prevalence values of the grid are taken from [0, 0.1, 0.2, ..., 0.9, 1]. Only + valid prevalence distributions are returned, i.e., vectors of prevalence values that sum up to 1. For each + valid vector of prevalence values, `repeat` copies are returned. The vector of prevalence values can be + implicit (by setting `return_constrained_dim=False`), meaning that the last dimension (which is constrained + to 1 - sum of the rest) is not returned (note that, quite obviously, in this case the vector does not sum up to + 1). Note that this method is deterministic, i.e., there is no random sampling anywhere. + + :return: a `np.ndarray` of shape `(n, dimensions)` if `return_constrained_dim=True` or of shape + `(n, dimensions-1)` if `return_constrained_dim=False`, where `n` is the number of valid combinations found + in the grid multiplied by `repeat` + """ + dimensions = self.data.n_classes + s = F.prevalence_linspace(self.n_prevalences, repeats=1, smooth_limits_epsilon=self.smooth_limits_epsilon) + eps = (s[1]-s[0])/2 # handling floating rounding + s = [s] * (dimensions - 1) + prevs = [p for p in itertools.product(*s, repeat=1) if (sum(p) < (1.+eps))] + prevs = np.asarray(prevs).reshape(len(prevs), -1) + if self.repeats > 1: + prevs = np.repeat(prevs, self.repeats, axis=0) + return prevs
+ + +
+[docs] + def samples_parameters(self): + """ + Return all the necessary parameters to replicate the samples as according to the APP protocol. + + :return: a list of indexes that realize the APP sampling + """ + indexes = [] + for prevs in self.prevalence_grid(): + index = self.data.sampling_index(self.sample_size, *prevs) + indexes.append(index) + return indexes
+ + +
+[docs] + def sample(self, index): + """ + Realizes the sample given the index of the instances. + + :param index: indexes of the instances to select + :return: an instance of :class:`qp.data.LabelledCollection` + """ + return self.data.sampling_from_index(index)
+ + +
+[docs] + def total(self): + """ + Returns the number of samples that will be generated + + :return: int + """ + return F.num_prevalence_combinations(self.n_prevalences, self.data.n_classes, self.repeats)
+
+ + + +
+[docs] +class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): + """ + A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing + samples uniformly at random, therefore approximately preserving the natural prevalence of the collection. + + :param data: a `LabelledCollection` from which the samples will be drawn + :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from + qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised. + :param repeats: the number of samples to generate. Default is 100. + :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples + will be the same every time the protocol is called) + :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or + to "labelled_collection" to get instead instances of LabelledCollection + """ + + def __init__(self, data:LabelledCollection, sample_size=None, repeats=100, random_state=0, + return_type='sample_prev'): + super(NPP, self).__init__(random_state) + self.data = data + self.sample_size = qp._get_sample_size(sample_size) + self.repeats = repeats + self.random_state = random_state + self.collator = OnLabelledCollectionProtocol.get_collator(return_type) + +
+[docs] + def samples_parameters(self): + """ + Return all the necessary parameters to replicate the samples as according to the NPP protocol. + + :return: a list of indexes that realize the NPP sampling + """ + indexes = [] + for _ in range(self.repeats): + index = self.data.uniform_sampling_index(self.sample_size) + indexes.append(index) + return indexes
+ + +
+[docs] + def sample(self, index): + """ + Realizes the sample given the index of the instances. + + :param index: indexes of the instances to select + :return: an instance of :class:`qp.data.LabelledCollection` + """ + return self.data.sampling_from_index(index)
+ + +
+[docs] + def total(self): + """ + Returns the number of samples that will be generated (equals to "repeats") + + :return: int + """ + return self.repeats
+
+ + + +
+[docs] +class UPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): + """ + A variant of :class:`APP` that, instead of using a grid of equidistant prevalence values, + relies on the Kraemer algorithm for sampling unit (k-1)-simplex uniformly at random, with + k the number of classes. This protocol covers the entire range of prevalence values in a + statistical sense, i.e., unlike APP there is no guarantee that it is covered precisely + equally for all classes, but it is preferred in cases in which the number of possible + combinations of the grid values of APP makes this endeavour intractable. + + :param data: a `LabelledCollection` from which the samples will be drawn + :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from + qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised. + :param repeats: the number of samples to generate. Default is 100. + :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples + will be the same every time the protocol is called) + :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or + to "labelled_collection" to get instead instances of LabelledCollection + """ + + def __init__(self, data: LabelledCollection, sample_size=None, repeats=100, random_state=0, + return_type='sample_prev'): + super(UPP, self).__init__(random_state) + self.data = data + self.sample_size = qp._get_sample_size(sample_size) + self.repeats = repeats + self.random_state = random_state + self.collator = OnLabelledCollectionProtocol.get_collator(return_type) + +
+[docs] + def samples_parameters(self): + """ + Return all the necessary parameters to replicate the samples as according to the UPP protocol. + + :return: a list of indexes that realize the UPP sampling + """ + indexes = [] + for prevs in F.uniform_simplex_sampling(n_classes=self.data.n_classes, size=self.repeats): + index = self.data.sampling_index(self.sample_size, *prevs) + indexes.append(index) + return indexes
+ + +
+[docs] + def sample(self, index): + """ + Realizes the sample given the index of the instances. + + :param index: indexes of the instances to select + :return: an instance of :class:`qp.data.LabelledCollection` + """ + return self.data.sampling_from_index(index)
+ + +
+[docs] + def total(self): + """ + Returns the number of samples that will be generated (equals to "repeats") + + :return: int + """ + return self.repeats
+
+ + + +
+[docs] +class DomainMixer(AbstractStochasticSeededProtocol): + """ + Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence. + + :param domainA: one domain, an object of :class:`qp.data.LabelledCollection` + :param domainB: another domain, an object of :class:`qp.data.LabelledCollection` + :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from + qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised. + :param repeats: int, number of samples to draw for every mixture rate + :param prevalence: the prevalence to preserv along the mixtures. If specified, should be an array containing + one prevalence value (positive float) for each class and summing up to one. If not specified, the prevalence + will be taken from the domain A (default). + :param mixture_points: an integer indicating the number of points to take from a linear scale (e.g., 21 will + generate the mixture points [1, 0.95, 0.9, ..., 0]), or the array of mixture values itself. + the specific points + :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples + will be the same every time the protocol is called) + """ + + def __init__( + self, + domainA: LabelledCollection, + domainB: LabelledCollection, + sample_size, + repeats=1, + prevalence=None, + mixture_points=11, + random_state=0, + return_type='sample_prev'): + super(DomainMixer, self).__init__(random_state) + self.A = domainA + self.B = domainB + self.sample_size = qp._get_sample_size(sample_size) + self.repeats = repeats + if prevalence is None: + self.prevalence = domainA.prevalence() + else: + self.prevalence = np.asarray(prevalence) + assert len(self.prevalence) == domainA.n_classes, \ + f'wrong shape for the vector prevalence (expected {domainA.n_classes})' + assert F.check_prevalence_vector(self.prevalence), \ + f'the prevalence vector is not valid (either it contains values outside [0,1] or does not sum up to 1)' + if isinstance(mixture_points, int): + self.mixture_points = np.linspace(0, 1, mixture_points)[::-1] + else: + self.mixture_points = np.asarray(mixture_points) + assert all(np.logical_and(self.mixture_points >= 0, self.mixture_points<=1)), \ + 'mixture_model datatype not understood (expected int or a sequence of real values in [0,1])' + self.random_state = random_state + self.collator = OnLabelledCollectionProtocol.get_collator(return_type) + +
+[docs] + def samples_parameters(self): + """ + Return all the necessary parameters to replicate the samples as according to the this protocol. + + :return: a list of zipped indexes (from A and B) that realize the sampling + """ + indexesA, indexesB = [], [] + for propA in self.mixture_points: + for _ in range(self.repeats): + nA = int(np.round(self.sample_size * propA)) + nB = self.sample_size-nA + sampleAidx = self.A.sampling_index(nA, *self.prevalence) + sampleBidx = self.B.sampling_index(nB, *self.prevalence) + indexesA.append(sampleAidx) + indexesB.append(sampleBidx) + return list(zip(indexesA, indexesB))
+ + +
+[docs] + def sample(self, indexes): + """ + Realizes the sample given a pair of indexes of the instances from A and B. + + :param indexes: indexes of the instances to select from A and B + :return: an instance of :class:`qp.data.LabelledCollection` + """ + indexesA, indexesB = indexes + sampleA = self.A.sampling_from_index(indexesA) + sampleB = self.B.sampling_from_index(indexesB) + return sampleA+sampleB
+ + +
+[docs] + def total(self): + """ + Returns the number of samples that will be generated (equals to "repeats * mixture_points") + + :return: int + """ + return self.repeats * len(self.mixture_points)
+
+ + + +# aliases + +ArtificialPrevalenceProtocol = APP +NaturalPrevalenceProtocol = NPP +UniformPrevalenceProtocol = UPP +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/tests/test_base.html b/docs/build/html/_modules/quapy/tests/test_base.html new file mode 100644 index 0000000..baf8cfa --- /dev/null +++ b/docs/build/html/_modules/quapy/tests/test_base.html @@ -0,0 +1,110 @@ + + + + + + quapy.tests.test_base — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.tests.test_base

+import pytest
+
+
+[docs] +def test_import(): + import quapy as qp + assert qp.__version__ is not None
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/tests/test_datasets.html b/docs/build/html/_modules/quapy/tests/test_datasets.html new file mode 100644 index 0000000..785e535 --- /dev/null +++ b/docs/build/html/_modules/quapy/tests/test_datasets.html @@ -0,0 +1,178 @@ + + + + + + quapy.tests.test_datasets — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.tests.test_datasets

+import pytest
+
+from quapy.data.datasets import REVIEWS_SENTIMENT_DATASETS, TWITTER_SENTIMENT_DATASETS_TEST, \
+    TWITTER_SENTIMENT_DATASETS_TRAIN, UCI_BINARY_DATASETS, LEQUA2022_TASKS, UCI_MULTICLASS_DATASETS,\
+    fetch_reviews, fetch_twitter, fetch_UCIBinaryDataset, fetch_lequa2022, fetch_UCIMulticlassLabelledCollection
+
+
+
+[docs] +@pytest.mark.parametrize('dataset_name', REVIEWS_SENTIMENT_DATASETS) +def test_fetch_reviews(dataset_name): + dataset = fetch_reviews(dataset_name) + print(f'Dataset {dataset_name}') + print('Training set stats') + dataset.training.stats() + print('Test set stats') + dataset.test.stats()
+ + + +
+[docs] +@pytest.mark.parametrize('dataset_name', TWITTER_SENTIMENT_DATASETS_TEST + TWITTER_SENTIMENT_DATASETS_TRAIN) +def test_fetch_twitter(dataset_name): + try: + dataset = fetch_twitter(dataset_name) + except ValueError as ve: + if dataset_name == 'semeval' and ve.args[0].startswith( + 'dataset "semeval" can only be used for model selection.'): + dataset = fetch_twitter(dataset_name, for_model_selection=True) + print(f'Dataset {dataset_name}') + print('Training set stats') + dataset.training.stats() + print('Test set stats')
+ + + +
+[docs] +@pytest.mark.parametrize('dataset_name', UCI_BINARY_DATASETS) +def test_fetch_UCIDataset(dataset_name): + try: + dataset = fetch_UCIBinaryDataset(dataset_name) + except FileNotFoundError as fnfe: + if dataset_name == 'pageblocks.5' and fnfe.args[0].find( + 'If this is the first time you attempt to load this dataset') > 0: + print('The pageblocks.5 dataset requires some hand processing to be usable, skipping this test.') + return + print(f'Dataset {dataset_name}') + print('Training set stats') + dataset.training.stats() + print('Test set stats')
+ + + +
+[docs] +@pytest.mark.parametrize('dataset_name', UCI_MULTICLASS_DATASETS) +def test_fetch_UCIMultiDataset(dataset_name): + dataset = fetch_UCIMulticlassLabelledCollection(dataset_name) + print(f'Dataset {dataset_name}') + print('Training set stats') + dataset.stats() + print('Test set stats')
+ + + +
+[docs] +@pytest.mark.parametrize('dataset_name', LEQUA2022_TASKS) +def test_fetch_lequa2022(dataset_name): + train, gen_val, gen_test = fetch_lequa2022(dataset_name) + print(train.stats()) + print('Val:', gen_val.total()) + print('Test:', gen_test.total())
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/tests/test_evaluation.html b/docs/build/html/_modules/quapy/tests/test_evaluation.html new file mode 100644 index 0000000..d5603a4 --- /dev/null +++ b/docs/build/html/_modules/quapy/tests/test_evaluation.html @@ -0,0 +1,195 @@ + + + + + + quapy.tests.test_evaluation — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.tests.test_evaluation

+import unittest
+
+import numpy as np
+
+import quapy as qp
+from sklearn.linear_model import LogisticRegression
+from time import time
+
+from quapy.error import QUANTIFICATION_ERROR_SINGLE, QUANTIFICATION_ERROR, QUANTIFICATION_ERROR_NAMES, \
+    QUANTIFICATION_ERROR_SINGLE_NAMES
+from quapy.method.aggregative import EMQ, PCC
+from quapy.method.base import BaseQuantifier
+
+
+
+[docs] +class EvalTestCase(unittest.TestCase): +
+[docs] + def test_eval_speedup(self): + + data = qp.datasets.fetch_reviews('hp', tfidf=True, min_df=10, pickle=True) + train, test = data.training, data.test + + protocol = qp.protocol.APP(test, sample_size=1000, n_prevalences=11, repeats=1, random_state=1) + + class SlowLR(LogisticRegression): + def predict_proba(self, X): + import time + time.sleep(1) + return super().predict_proba(X) + + emq = EMQ(SlowLR()).fit(train) + + tinit = time() + score = qp.evaluation.evaluate(emq, protocol, error_metric='mae', verbose=True, aggr_speedup='force') + tend_optim = time()-tinit + print(f'evaluation (with optimization) took {tend_optim}s [MAE={score:.4f}]') + + class NonAggregativeEMQ(BaseQuantifier): + + def __init__(self, cls): + self.emq = EMQ(cls) + + def quantify(self, instances): + return self.emq.quantify(instances) + + def fit(self, data): + self.emq.fit(data) + return self + + emq = NonAggregativeEMQ(SlowLR()).fit(train) + + tinit = time() + score = qp.evaluation.evaluate(emq, protocol, error_metric='mae', verbose=True) + tend_no_optim = time() - tinit + print(f'evaluation (w/o optimization) took {tend_no_optim}s [MAE={score:.4f}]') + + self.assertEqual(tend_no_optim>(tend_optim/2), True)
+ + +
+[docs] + def test_evaluation_output(self): + + data = qp.datasets.fetch_reviews('hp', tfidf=True, min_df=10, pickle=True) + train, test = data.training, data.test + + qp.environ['SAMPLE_SIZE']=100 + + protocol = qp.protocol.APP(test, random_state=0) + + q = PCC(LogisticRegression()).fit(train) + + single_errors = list(QUANTIFICATION_ERROR_SINGLE_NAMES) + averaged_errors = ['m'+e for e in single_errors] + single_errors = single_errors + [qp.error.from_name(e) for e in single_errors] + averaged_errors = averaged_errors + [qp.error.from_name(e) for e in averaged_errors] + for error_metric, averaged_error_metric in zip(single_errors, averaged_errors): + score = qp.evaluation.evaluate(q, protocol, error_metric=averaged_error_metric) + self.assertTrue(isinstance(score, float)) + + scores = qp.evaluation.evaluate(q, protocol, error_metric=error_metric) + self.assertTrue(isinstance(scores, np.ndarray)) + + self.assertEqual(scores.mean(), score)
+
+ + + + +if __name__ == '__main__': + unittest.main() +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/tests/test_hierarchy.html b/docs/build/html/_modules/quapy/tests/test_hierarchy.html new file mode 100644 index 0000000..793091b --- /dev/null +++ b/docs/build/html/_modules/quapy/tests/test_hierarchy.html @@ -0,0 +1,143 @@ + + + + + + quapy.tests.test_hierarchy — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.tests.test_hierarchy

+import unittest
+from sklearn.linear_model import LogisticRegression
+from quapy.method.aggregative import *
+
+
+
+[docs] +class HierarchyTestCase(unittest.TestCase): + +
+[docs] + def test_aggregative(self): + lr = LogisticRegression() + for m in [CC(lr), PCC(lr), ACC(lr), PACC(lr)]: + self.assertEqual(isinstance(m, AggregativeQuantifier), True)
+ + +
+[docs] + def test_binary(self): + lr = LogisticRegression() + for m in [HDy(lr)]: + self.assertEqual(isinstance(m, BinaryQuantifier), True)
+ + +
+[docs] + def test_probabilistic(self): + lr = LogisticRegression() + for m in [CC(lr), ACC(lr)]: + self.assertEqual(isinstance(m, AggregativeCrispQuantifier), True) + self.assertEqual(isinstance(m, AggregativeSoftQuantifier), False) + for m in [PCC(lr), PACC(lr)]: + self.assertEqual(isinstance(m, AggregativeCrispQuantifier), False) + self.assertEqual(isinstance(m, AggregativeSoftQuantifier), True)
+
+ + + +if __name__ == '__main__': + unittest.main() +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/tests/test_labelcollection.html b/docs/build/html/_modules/quapy/tests/test_labelcollection.html new file mode 100644 index 0000000..682aeba --- /dev/null +++ b/docs/build/html/_modules/quapy/tests/test_labelcollection.html @@ -0,0 +1,176 @@ + + + + + + quapy.tests.test_labelcollection — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for quapy.tests.test_labelcollection

+import unittest
+import numpy as np
+from scipy.sparse import csr_matrix
+
+import quapy as qp
+
+
+
+[docs] +class LabelCollectionTestCase(unittest.TestCase): +
+[docs] + def test_split(self): + x = np.arange(100) + y = np.random.randint(0,5,100) + data = qp.data.LabelledCollection(x,y) + tr, te = data.split_random(0.7) + check_prev = tr.prevalence()*0.7 + te.prevalence()*0.3 + + self.assertEqual(len(tr), 70) + self.assertEqual(len(te), 30) + self.assertEqual(np.allclose(check_prev, data.prevalence()), True) + self.assertEqual(len(tr+te), len(data))
+ + +
+[docs] + def test_join(self): + x = np.arange(50) + y = np.random.randint(2, 5, 50) + data1 = qp.data.LabelledCollection(x, y) + + x = np.arange(200) + y = np.random.randint(0, 3, 200) + data2 = qp.data.LabelledCollection(x, y) + + x = np.arange(100) + y = np.random.randint(0, 6, 100) + data3 = qp.data.LabelledCollection(x, y) + + combined = qp.data.LabelledCollection.join(data1, data2, data3) + self.assertEqual(len(combined), len(data1)+len(data2)+len(data3)) + self.assertEqual(all(combined.classes_ == np.arange(6)), True) + + x = np.random.rand(10, 3) + y = np.random.randint(0, 1, 10) + data4 = qp.data.LabelledCollection(x, y) + with self.assertRaises(Exception): + combined = qp.data.LabelledCollection.join(data1, data2, data3, data4) + + x = np.random.rand(20, 3) + y = np.random.randint(0, 1, 20) + data5 = qp.data.LabelledCollection(x, y) + combined = qp.data.LabelledCollection.join(data4, data5) + self.assertEqual(len(combined), len(data4)+len(data5)) + + x = np.random.rand(10, 4) + y = np.random.randint(0, 1, 10) + data6 = qp.data.LabelledCollection(x, y) + with self.assertRaises(Exception): + combined = qp.data.LabelledCollection.join(data4, data5, data6) + + data4.instances = csr_matrix(data4.instances) + with self.assertRaises(Exception): + combined = qp.data.LabelledCollection.join(data4, data5) + data5.instances = csr_matrix(data5.instances) + combined = qp.data.LabelledCollection.join(data4, data5) + self.assertEqual(len(combined), len(data4) + len(data5))
+
+ + + +if __name__ == '__main__': + unittest.main() +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/tests/test_methods.html b/docs/build/html/_modules/quapy/tests/test_methods.html new file mode 100644 index 0000000..e2b28a9 --- /dev/null +++ b/docs/build/html/_modules/quapy/tests/test_methods.html @@ -0,0 +1,357 @@ + + + + + + quapy.tests.test_methods — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.tests.test_methods

+import numpy as np
+import pytest
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import LinearSVC
+
+import method.aggregative
+import quapy as qp
+from quapy.model_selection import GridSearchQ
+from quapy.method.base import BinaryQuantifier
+from quapy.data import Dataset, LabelledCollection
+from quapy.method import AGGREGATIVE_METHODS, NON_AGGREGATIVE_METHODS
+from quapy.method.meta import Ensemble
+from quapy.protocol import APP
+from quapy.method.aggregative import DMy
+from quapy.method.meta import MedianEstimator
+
+# datasets = [pytest.param(qp.datasets.fetch_twitter('hcr', pickle=True), id='hcr'),
+#             pytest.param(qp.datasets.fetch_UCIDataset('ionosphere'), id='ionosphere')]
+
+tinydatasets = [pytest.param(qp.datasets.fetch_twitter('hcr', pickle=True).reduce(), id='tiny_hcr'),
+                pytest.param(qp.datasets.fetch_UCIBinaryDataset('ionosphere').reduce(), id='tiny_ionosphere')]
+
+learners = [LogisticRegression, LinearSVC]
+
+
+
+[docs] +@pytest.mark.parametrize('dataset', tinydatasets) +@pytest.mark.parametrize('aggregative_method', AGGREGATIVE_METHODS) +@pytest.mark.parametrize('learner', learners) +def test_aggregative_methods(dataset: Dataset, aggregative_method, learner): + model = aggregative_method(learner()) + + if isinstance(model, BinaryQuantifier) and not dataset.binary: + print(f'skipping the test of binary model {type(model)} on non-binary dataset {dataset}') + return + + model.fit(dataset.training) + + estim_prevalences = model.quantify(dataset.test.instances) + + true_prevalences = dataset.test.prevalence() + error = qp.error.mae(true_prevalences, estim_prevalences) + + assert type(error) == np.float64
+ + + +
+[docs] +@pytest.mark.parametrize('dataset', tinydatasets) +@pytest.mark.parametrize('non_aggregative_method', NON_AGGREGATIVE_METHODS) +def test_non_aggregative_methods(dataset: Dataset, non_aggregative_method): + model = non_aggregative_method() + + if isinstance(model, BinaryQuantifier) and not dataset.binary: + print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') + return + + model.fit(dataset.training) + + estim_prevalences = model.quantify(dataset.test.instances) + + true_prevalences = dataset.test.prevalence() + error = qp.error.mae(true_prevalences, estim_prevalences) + + assert type(error) == np.float64
+ + + +
+[docs] +@pytest.mark.parametrize('base_method', [method.aggregative.ACC, method.aggregative.PACC]) +@pytest.mark.parametrize('learner', [LogisticRegression]) +@pytest.mark.parametrize('dataset', tinydatasets) +@pytest.mark.parametrize('policy', Ensemble.VALID_POLICIES) +def test_ensemble_method(base_method, learner, dataset: Dataset, policy): + + qp.environ['SAMPLE_SIZE'] = 20 + + base_quantifier=base_method(learner()) + + if not dataset.binary and policy=='ds': + print(f'skipping the test of binary policy ds on non-binary dataset {dataset}') + return + + model = Ensemble(quantifier=base_quantifier, size=3, policy=policy, n_jobs=-1) + + model.fit(dataset.training) + + estim_prevalences = model.quantify(dataset.test.instances) + + true_prevalences = dataset.test.prevalence() + error = qp.error.mae(true_prevalences, estim_prevalences) + + assert type(error) == np.float64
+ + + +
+[docs] +def test_quanet_method(): + try: + import quapy.classification.neural + except ModuleNotFoundError: + print('skipping QuaNet test due to missing torch package') + return + + qp.environ['SAMPLE_SIZE'] = 100 + + # load the kindle dataset as text, and convert words to numerical indexes + dataset = qp.datasets.fetch_reviews('kindle', pickle=True).reduce(200, 200) + qp.data.preprocessing.index(dataset, min_df=5, inplace=True) + + from quapy.classification.neural import CNNnet + cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes) + + from quapy.classification.neural import NeuralClassifierTrainer + learner = NeuralClassifierTrainer(cnn, device='cuda') + + from quapy.method.meta import QuaNet + model = QuaNet(learner, device='cuda') + + if isinstance(model, BinaryQuantifier) and not dataset.binary: + print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') + return + + model.fit(dataset.training) + + estim_prevalences = model.quantify(dataset.test.instances) + + true_prevalences = dataset.test.prevalence() + error = qp.error.mae(true_prevalences, estim_prevalences) + + assert type(error) == np.float64
+ + + +
+[docs] +def test_str_label_names(): + model = qp.method.aggregative.CC(LogisticRegression()) + + dataset = qp.datasets.fetch_reviews('imdb', pickle=True) + dataset = Dataset(dataset.training.sampling(1000, *dataset.training.prevalence()), + dataset.test.sampling(1000, 0.25, 0.75)) + qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True) + + np.random.seed(0) + model.fit(dataset.training) + + int_estim_prevalences = model.quantify(dataset.test.instances) + true_prevalences = dataset.test.prevalence() + + error = qp.error.mae(true_prevalences, int_estim_prevalences) + assert type(error) == np.float64 + + dataset_str = Dataset(LabelledCollection(dataset.training.instances, + ['one' if label == 1 else 'zero' for label in dataset.training.labels]), + LabelledCollection(dataset.test.instances, + ['one' if label == 1 else 'zero' for label in dataset.test.labels])) + assert all(dataset_str.training.classes_ == dataset_str.test.classes_), 'wrong indexation' + np.random.seed(0) + model.fit(dataset_str.training) + + str_estim_prevalences = model.quantify(dataset_str.test.instances) + true_prevalences = dataset_str.test.prevalence() + + error = qp.error.mae(true_prevalences, str_estim_prevalences) + assert type(error) == np.float64 + + print(true_prevalences) + print(int_estim_prevalences) + print(str_estim_prevalences) + + np.testing.assert_almost_equal(int_estim_prevalences[1], + str_estim_prevalences[list(model.classes_).index('one')])
+ + +# helper +def __fit_test(quantifier, train, test): + quantifier.fit(train) + test_samples = APP(test) + true_prevs, estim_prevs = qp.evaluation.prediction(quantifier, test_samples) + return qp.error.mae(true_prevs, estim_prevs), estim_prevs + + +
+[docs] +def test_median_meta(): + """ + This test compares the performance of the MedianQuantifier with respect to computing the median of the predictions + of a differently parameterized quantifier. We use the DistributionMatching base quantifier and the median is + computed across different values of nbins + """ + + qp.environ['SAMPLE_SIZE'] = 100 + + # grid of values + nbins_grid = list(range(2, 11)) + + dataset = 'kindle' + train, test = qp.datasets.fetch_reviews(dataset, tfidf=True, min_df=10).train_test + prevs = [] + errors = [] + for nbins in nbins_grid: + with qp.util.temp_seed(0): + q = DMy(LogisticRegression(), nbins=nbins) + mae, estim_prevs = __fit_test(q, train, test) + prevs.append(estim_prevs) + errors.append(mae) + print(f'{dataset} DistributionMatching(nbins={nbins}) got MAE {mae:.4f}') + prevs = np.asarray(prevs) + mae = np.mean(errors) + print(f'\tMAE={mae:.4f}') + + q = DMy(LogisticRegression()) + q = MedianEstimator(q, param_grid={'nbins': nbins_grid}, random_state=0, n_jobs=-1) + median_mae, prev = __fit_test(q, train, test) + print(f'\tMAE={median_mae:.4f}') + + np.testing.assert_almost_equal(np.median(prevs, axis=0), prev) + assert median_mae < mae, 'the median-based quantifier provided a higher error...'
+ + + +
+[docs] +def test_median_meta_modsel(): + """ + This test checks the median-meta quantifier with model selection + """ + + qp.environ['SAMPLE_SIZE'] = 100 + + dataset = 'kindle' + train, test = qp.datasets.fetch_reviews(dataset, tfidf=True, min_df=10).train_test + train, val = train.split_stratified(random_state=0) + + nbins_grid = [2, 4, 5, 10, 15] + + q = DMy(LogisticRegression()) + q = MedianEstimator(q, param_grid={'nbins': nbins_grid}, random_state=0, n_jobs=-1) + median_mae, _ = __fit_test(q, train, test) + print(f'\tMAE={median_mae:.4f}') + + q = DMy(LogisticRegression()) + lr_params = {'classifier__C': np.logspace(-1, 1, 3)} + q = MedianEstimator(q, param_grid={'nbins': nbins_grid}, random_state=0, n_jobs=-1) + q = GridSearchQ(q, param_grid=lr_params, protocol=APP(val), n_jobs=-1) + optimized_median_ave, _ = __fit_test(q, train, test) + print(f'\tMAE={optimized_median_ave:.4f}') + + assert optimized_median_ave < median_mae, "the optimized method yielded worse performance..."
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/tests/test_modsel.html b/docs/build/html/_modules/quapy/tests/test_modsel.html new file mode 100644 index 0000000..ff1c51c --- /dev/null +++ b/docs/build/html/_modules/quapy/tests/test_modsel.html @@ -0,0 +1,225 @@ + + + + + + quapy.tests.test_modsel — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.tests.test_modsel

+import unittest
+
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import SVC
+
+import quapy as qp
+from quapy.method.aggregative import PACC
+from quapy.model_selection import GridSearchQ
+from quapy.protocol import APP
+import time
+
+
+
+[docs] +class ModselTestCase(unittest.TestCase): + +
+[docs] + def test_modsel(self): + + q = PACC(LogisticRegression(random_state=1, max_iter=5000)) + + data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10) + training, validation = data.training.split_stratified(0.7, random_state=1) + + param_grid = {'classifier__C': np.logspace(-3,3,7)} + app = APP(validation, sample_size=100, random_state=1) + q = GridSearchQ( + q, param_grid, protocol=app, error='mae', refit=True, timeout=-1, verbose=True + ).fit(training) + print('best params', q.best_params_) + print('best score', q.best_score_) + + self.assertEqual(q.best_params_['classifier__C'], 10.0) + self.assertEqual(q.best_model().get_params()['classifier__C'], 10.0)
+ + +
+[docs] + def test_modsel_parallel(self): + + q = PACC(LogisticRegression(random_state=1, max_iter=5000)) + + data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10) + training, validation = data.training.split_stratified(0.7, random_state=1) + # test = data.test + + param_grid = {'classifier__C': np.logspace(-3,3,7)} + app = APP(validation, sample_size=100, random_state=1) + q = GridSearchQ( + q, param_grid, protocol=app, error='mae', refit=True, timeout=-1, n_jobs=-1, verbose=True + ).fit(training) + print('best params', q.best_params_) + print('best score', q.best_score_) + + self.assertEqual(q.best_params_['classifier__C'], 10.0) + self.assertEqual(q.best_model().get_params()['classifier__C'], 10.0)
+ + +
+[docs] + def test_modsel_parallel_speedup(self): + class SlowLR(LogisticRegression): + def fit(self, X, y, sample_weight=None): + time.sleep(1) + return super(SlowLR, self).fit(X, y, sample_weight) + + q = PACC(SlowLR(random_state=1, max_iter=5000)) + + data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10) + training, validation = data.training.split_stratified(0.7, random_state=1) + + param_grid = {'classifier__C': np.logspace(-3, 3, 7)} + app = APP(validation, sample_size=100, random_state=1) + + tinit = time.time() + GridSearchQ( + q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, n_jobs=1, verbose=True + ).fit(training) + tend_nooptim = time.time()-tinit + + tinit = time.time() + GridSearchQ( + q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, n_jobs=-1, verbose=True + ).fit(training) + tend_optim = time.time() - tinit + + print(f'parallel training took {tend_optim:.4f}s') + print(f'sequential training took {tend_nooptim:.4f}s') + + self.assertEqual(tend_optim < (0.5*tend_nooptim), True)
+ + +
+[docs] + def test_modsel_timeout(self): + + class SlowLR(LogisticRegression): + def fit(self, X, y, sample_weight=None): + import time + time.sleep(10) + super(SlowLR, self).fit(X, y, sample_weight) + + q = PACC(SlowLR()) + + data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10) + training, validation = data.training.split_stratified(0.7, random_state=1) + # test = data.test + + param_grid = {'classifier__C': np.logspace(-3,3,7)} + app = APP(validation, sample_size=100, random_state=1) + q = GridSearchQ( + q, param_grid, protocol=app, error='mae', refit=True, timeout=3, n_jobs=-1, verbose=True + ) + with self.assertRaises(TimeoutError): + q.fit(training)
+
+ + + +if __name__ == '__main__': + unittest.main() +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/tests/test_protocols.html b/docs/build/html/_modules/quapy/tests/test_protocols.html new file mode 100644 index 0000000..65e6d83 --- /dev/null +++ b/docs/build/html/_modules/quapy/tests/test_protocols.html @@ -0,0 +1,336 @@ + + + + + + quapy.tests.test_protocols — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.tests.test_protocols

+import unittest
+import numpy as np
+
+import quapy.functional
+from quapy.data import LabelledCollection
+from quapy.protocol import APP, NPP, UPP, DomainMixer, AbstractStochasticSeededProtocol
+
+
+
+[docs] +def mock_labelled_collection(prefix=''): + y = [0] * 250 + [1] * 250 + [2] * 250 + [3] * 250 + X = [prefix + str(i) + '-' + str(yi) for i, yi in enumerate(y)] + return LabelledCollection(X, y, classes=sorted(np.unique(y)))
+ + + +
+[docs] +def samples_to_str(protocol): + samples_str = "" + for instances, prev in protocol(): + samples_str += f'{instances}\t{prev}\n' + return samples_str
+ + + +
+[docs] +class TestProtocols(unittest.TestCase): + +
+[docs] + def test_app_sanity_check(self): + data = mock_labelled_collection() + n_prevpoints = 101 + repeats = 10 + with self.assertRaises(RuntimeError): + p = APP(data, sample_size=5, n_prevalences=n_prevpoints, repeats=repeats, random_state=42) + n_combinations = \ + quapy.functional.num_prevalence_combinations(n_prevpoints, n_classes=data.n_classes, n_repeats=repeats) + p = APP(data, sample_size=5, n_prevalences=n_prevpoints, random_state=42, sanity_check=n_combinations) + p = APP(data, sample_size=5, n_prevalences=n_prevpoints, random_state=42, sanity_check=None)
+ + +
+[docs] + def test_app_replicate(self): + data = mock_labelled_collection() + p = APP(data, sample_size=5, n_prevalences=11, random_state=42) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2) + + p = APP(data, sample_size=5, n_prevalences=11) # <- random_state is by default set to 0 + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2)
+ + +
+[docs] + def test_app_not_replicate(self): + data = mock_labelled_collection() + p = APP(data, sample_size=5, n_prevalences=11, random_state=None) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertNotEqual(samples1, samples2) + + p = APP(data, sample_size=5, n_prevalences=11, random_state=42) + samples1 = samples_to_str(p) + p = APP(data, sample_size=5, n_prevalences=11, random_state=0) + samples2 = samples_to_str(p) + + self.assertNotEqual(samples1, samples2)
+ + +
+[docs] + def test_app_number(self): + data = mock_labelled_collection() + p = APP(data, sample_size=100, n_prevalences=10, repeats=1) + + # surprisingly enough, for some n_prevalences the test fails, notwithstanding + # everything is correct. The problem is that in function APP.prevalence_grid() + # there is sometimes one rounding error that gets cumulated and + # surpasses 1.0 (by a very small float value, 0.0000000000002 or sthe like) + # so these tuples are mistakenly removed... I have tried with np.close, and + # other workarounds, but eventually happens that there is some negative probability + # in the sampling function... + + count = 0 + for _ in p(): + count+=1 + + self.assertEqual(count, p.total())
+ + +
+[docs] + def test_npp_replicate(self): + data = mock_labelled_collection() + p = NPP(data, sample_size=5, repeats=5, random_state=42) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2) + + p = NPP(data, sample_size=5, repeats=5) # <- random_state is by default set to 0 + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2)
+ + +
+[docs] + def test_npp_not_replicate(self): + data = mock_labelled_collection() + p = NPP(data, sample_size=5, repeats=5, random_state=None) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertNotEqual(samples1, samples2) + + p = NPP(data, sample_size=5, repeats=5, random_state=42) + samples1 = samples_to_str(p) + p = NPP(data, sample_size=5, repeats=5, random_state=0) + samples2 = samples_to_str(p) + self.assertNotEqual(samples1, samples2)
+ + +
+[docs] + def test_kraemer_replicate(self): + data = mock_labelled_collection() + p = UPP(data, sample_size=5, repeats=10, random_state=42) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2) + + p = UPP(data, sample_size=5, repeats=10) # <- random_state is by default set to 0 + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2)
+ + +
+[docs] + def test_kraemer_not_replicate(self): + data = mock_labelled_collection() + p = UPP(data, sample_size=5, repeats=10, random_state=None) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertNotEqual(samples1, samples2)
+ + +
+[docs] + def test_covariate_shift_replicate(self): + dataA = mock_labelled_collection('domA') + dataB = mock_labelled_collection('domB') + p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11, random_state=1) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2) + + p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11) # <- random_state is by default set to 0 + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2)
+ + +
+[docs] + def test_covariate_shift_not_replicate(self): + dataA = mock_labelled_collection('domA') + dataB = mock_labelled_collection('domB') + p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11, random_state=None) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertNotEqual(samples1, samples2)
+ + +
+[docs] + def test_no_seed_init(self): + class NoSeedInit(AbstractStochasticSeededProtocol): + def __init__(self): + self.data = mock_labelled_collection() + + def samples_parameters(self): + # return a matrix containing sampling indexes in the rows + return np.random.randint(0, len(self.data), 10*10).reshape(10, 10) + + def sample(self, params): + index = np.unique(params) + return self.data.sampling_from_index(index) + + p = NoSeedInit() + + # this should raise a ValueError, since the class is said to be AbstractStochasticSeededProtocol but the + # random_seed has never been passed to super(NoSeedInit, self).__init__(random_seed) + with self.assertRaises(ValueError): + for sample in p(): + pass + print('done')
+
+ + + +if __name__ == '__main__': + unittest.main() +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/tests/test_replicability.html b/docs/build/html/_modules/quapy/tests/test_replicability.html new file mode 100644 index 0000000..4731cce --- /dev/null +++ b/docs/build/html/_modules/quapy/tests/test_replicability.html @@ -0,0 +1,225 @@ + + + + + + quapy.tests.test_replicability — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for quapy.tests.test_replicability

+import unittest
+import quapy as qp
+from quapy.data import LabelledCollection
+from quapy.functional import strprev
+from sklearn.linear_model import LogisticRegression
+import numpy as np
+from quapy.method.aggregative import PACC
+import quapy.functional as F
+
+
+
+[docs] +class MyTestCase(unittest.TestCase): + +
+[docs] + def test_prediction_replicability(self): + + dataset = qp.datasets.fetch_UCIBinaryDataset('yeast') + + with qp.util.temp_seed(0): + lr = LogisticRegression(random_state=0, max_iter=10000) + pacc = PACC(lr) + prev = pacc.fit(dataset.training).quantify(dataset.test.X) + str_prev1 = strprev(prev, prec=5) + + with qp.util.temp_seed(0): + lr = LogisticRegression(random_state=0, max_iter=10000) + pacc = PACC(lr) + prev2 = pacc.fit(dataset.training).quantify(dataset.test.X) + str_prev2 = strprev(prev2, prec=5) + + self.assertEqual(str_prev1, str_prev2) # add assertion here
+ + + +
+[docs] + def test_samping_replicability(self): + + def equal_collections(c1, c2, value=True): + self.assertEqual(np.all(c1.Xtr == c2.Xtr), value) + self.assertEqual(np.all(c1.ytr == c2.ytr), value) + if value: + self.assertEqual(np.all(c1.classes_ == c2.classes_), value) + + X = list(map(str, range(100))) + y = np.random.randint(0, 2, 100) + data = LabelledCollection(instances=X, labels=y) + + sample1 = data.sampling(50) + sample2 = data.sampling(50) + equal_collections(sample1, sample2, False) + + sample1 = data.sampling(50, random_state=0) + sample2 = data.sampling(50, random_state=0) + equal_collections(sample1, sample2, True) + + sample1 = data.sampling(50, *[0.7, 0.3], random_state=0) + sample2 = data.sampling(50, *[0.7, 0.3], random_state=0) + equal_collections(sample1, sample2, True) + + with qp.util.temp_seed(0): + sample1 = data.sampling(50, *[0.7, 0.3]) + with qp.util.temp_seed(0): + sample2 = data.sampling(50, *[0.7, 0.3]) + equal_collections(sample1, sample2, True) + + sample1 = data.sampling(50, *[0.7, 0.3], random_state=0) + sample2 = data.sampling(50, *[0.7, 0.3], random_state=0) + equal_collections(sample1, sample2, True) + + sample1_tr, sample1_te = data.split_stratified(train_prop=0.7, random_state=0) + sample2_tr, sample2_te = data.split_stratified(train_prop=0.7, random_state=0) + equal_collections(sample1_tr, sample2_tr, True) + equal_collections(sample1_te, sample2_te, True) + + with qp.util.temp_seed(0): + sample1_tr, sample1_te = data.split_stratified(train_prop=0.7) + with qp.util.temp_seed(0): + sample2_tr, sample2_te = data.split_stratified(train_prop=0.7) + equal_collections(sample1_tr, sample2_tr, True) + equal_collections(sample1_te, sample2_te, True)
+ + + +
+[docs] + def test_parallel_replicability(self): + + train, test = qp.datasets.fetch_UCIMulticlassDataset('dry-bean').train_test + + test = test.sampling(500, *[0.1, 0.0, 0.1, 0.1, 0.2, 0.5, 0.0]) + + with qp.util.temp_seed(10): + pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2) + pacc.fit(train, val_split=0.5) + prev1 = F.strprev(pacc.quantify(test.instances)) + + with qp.util.temp_seed(0): + pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2) + pacc.fit(train, val_split=0.5) + prev2 = F.strprev(pacc.quantify(test.instances)) + + with qp.util.temp_seed(0): + pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2) + pacc.fit(train, val_split=0.5) + prev3 = F.strprev(pacc.quantify(test.instances)) + + print(prev1) + print(prev2) + print(prev3) + + self.assertNotEqual(prev1, prev2) + self.assertEqual(prev2, prev3)
+
+ + + + + +if __name__ == '__main__': + unittest.main() +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_modules/quapy/util.html b/docs/build/html/_modules/quapy/util.html new file mode 100644 index 0000000..be2ae3b --- /dev/null +++ b/docs/build/html/_modules/quapy/util.html @@ -0,0 +1,437 @@ + + + + + + quapy.util — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for quapy.util

+import contextlib
+import itertools
+import multiprocessing
+import os
+import pickle
+import urllib
+from pathlib import Path
+from contextlib import ExitStack
+import quapy as qp
+
+import numpy as np
+from joblib import Parallel, delayed
+from time import time
+import signal
+
+
+def _get_parallel_slices(n_tasks, n_jobs):
+    if n_jobs == -1:
+        n_jobs = multiprocessing.cpu_count()
+    batch = int(n_tasks / n_jobs)
+    remainder = n_tasks % n_jobs
+    return [slice(job * batch, (job + 1) * batch + (remainder if job == n_jobs - 1 else 0)) for job in range(n_jobs)]
+
+
+
+[docs] +def map_parallel(func, args, n_jobs): + """ + Applies func to n_jobs slices of args. E.g., if args is an array of 99 items and n_jobs=2, then + func is applied in two parallel processes to args[0:50] and to args[50:99]. func is a function + that already works with a list of arguments. + + :param func: function to be parallelized + :param args: array-like of arguments to be passed to the function in different parallel calls + :param n_jobs: the number of workers + """ + args = np.asarray(args) + slices = _get_parallel_slices(len(args), n_jobs) + results = Parallel(n_jobs=n_jobs)( + delayed(func)(args[slice_i]) for slice_i in slices + ) + return list(itertools.chain.from_iterable(results))
+ + + +
+[docs] +def parallel(func, args, n_jobs, seed=None, asarray=True, backend='loky'): + """ + A wrapper of multiprocessing: + + >>> Parallel(n_jobs=n_jobs)( + >>> delayed(func)(args_i) for args_i in args + >>> ) + + that takes the `quapy.environ` variable as input silently. + Seeds the child processes to ensure reproducibility when n_jobs>1. + + :param func: callable + :param args: args of func + :param seed: the numeric seed + :param asarray: set to True to return a np.ndarray instead of a list + :param backend: indicates the backend used for handling parallel works + """ + def func_dec(environ, seed, *args): + qp.environ = environ.copy() + qp.environ['N_JOBS'] = 1 + #set a context with a temporal seed to ensure results are reproducibles in parallel + with ExitStack() as stack: + if seed is not None: + stack.enter_context(qp.util.temp_seed(seed)) + return func(*args) + + out = Parallel(n_jobs=n_jobs, backend=backend)( + delayed(func_dec)(qp.environ, None if seed is None else seed+i, args_i) for i, args_i in enumerate(args) + ) + if asarray: + out = np.asarray(out) + return out
+ + + +
+[docs] +@contextlib.contextmanager +def temp_seed(random_state): + """ + Can be used in a "with" context to set a temporal seed without modifying the outer numpy's current state. E.g.: + + >>> with temp_seed(random_seed): + >>> pass # do any computation depending on np.random functionality + + :param random_state: the seed to set within the "with" context + """ + if random_state is not None: + state = np.random.get_state() + #save the seed just in case is needed (for instance for setting the seed to child processes) + qp.environ['_R_SEED'] = random_state + np.random.seed(random_state) + try: + yield + finally: + if random_state is not None: + np.random.set_state(state)
+ + + +
+[docs] +def download_file(url, archive_filename): + """ + Downloads a file from a url + + :param url: the url + :param archive_filename: destination filename + """ + def progress(blocknum, bs, size): + total_sz_mb = '%.2f MB' % (size / 1e6) + current_sz_mb = '%.2f MB' % ((blocknum * bs) / 1e6) + print('\rdownloaded %s / %s' % (current_sz_mb, total_sz_mb), end='') + print("Downloading %s" % url) + urllib.request.urlretrieve(url, filename=archive_filename, reporthook=progress) + print("")
+ + + +
+[docs] +def download_file_if_not_exists(url, archive_filename): + """ + Dowloads a function (using :meth:`download_file`) if the file does not exist. + + :param url: the url + :param archive_filename: destination filename + """ + if os.path.exists(archive_filename): + return + create_if_not_exist(os.path.dirname(archive_filename)) + download_file(url, archive_filename)
+ + + +
+[docs] +def create_if_not_exist(path): + """ + An alias to `os.makedirs(path, exist_ok=True)` that also returns the path. This is useful in cases like, e.g.: + + >>> path = create_if_not_exist(os.path.join(dir, subdir, anotherdir)) + + :param path: path to create + :return: the path itself + """ + os.makedirs(path, exist_ok=True) + return path
+ + + +
+[docs] +def get_quapy_home(): + """ + Gets the home directory of QuaPy, i.e., the directory where QuaPy saves permanent data, such as dowloaded datasets. + This directory is `~/quapy_data` + + :return: a string representing the path + """ + home = os.path.join(str(Path.home()), 'quapy_data') + os.makedirs(home, exist_ok=True) + return home
+ + + +
+[docs] +def create_parent_dir(path): + """ + Creates the parent dir (if any) of a given path, if not exists. E.g., for `./path/to/file.txt`, the path `./path/to` + is created. + + :param path: the path + """ + parentdir = Path(path).parent + if parentdir: + os.makedirs(parentdir, exist_ok=True)
+ + + +
+[docs] +def save_text_file(path, text): + """ + Saves a text file to disk, given its full path, and creates the parent directory if missing. + + :param path: path where to save the path. + :param text: text to save. + """ + create_parent_dir(path) + with open(text, 'wt') as fout: + fout.write(text)
+ + + +
+[docs] +def pickled_resource(pickle_path:str, generation_func:callable, *args): + """ + Allows for fast reuse of resources that are generated only once by calling generation_func(\\*args). The next times + this function is invoked, it loads the pickled resource. Example: + + >>> def some_array(n): # a mock resource created with one parameter (`n`) + >>> return np.random.rand(n) + >>> pickled_resource('./my_array.pkl', some_array, 10) # the resource does not exist: it is created by calling some_array(10) + >>> pickled_resource('./my_array.pkl', some_array, 10) # the resource exists; it is loaded from './my_array.pkl' + + :param pickle_path: the path where to save (first time) and load (next times) the resource + :param generation_func: the function that generates the resource, in case it does not exist in pickle_path + :param args: any arg that generation_func uses for generating the resources + :return: the resource + """ + if pickle_path is None: + return generation_func(*args) + else: + if os.path.exists(pickle_path): + return pickle.load(open(pickle_path, 'rb')) + else: + instance = generation_func(*args) + os.makedirs(str(Path(pickle_path).parent), exist_ok=True) + pickle.dump(instance, open(pickle_path, 'wb'), pickle.HIGHEST_PROTOCOL) + return instance
+ + + +def _check_sample_size(sample_size): + if sample_size is None: + assert qp.environ['SAMPLE_SIZE'] is not None, \ + 'error: sample_size set to None, and cannot be resolved from the environment' + sample_size = qp.environ['SAMPLE_SIZE'] + assert isinstance(sample_size, int) and sample_size > 0, \ + 'error: sample_size is not a positive integer' + return sample_size + + +
+[docs] +class EarlyStop: + """ + A class implementing the early-stopping condition typically used for training neural networks. + + >>> earlystop = EarlyStop(patience=2, lower_is_better=True) + >>> earlystop(0.9, epoch=0) + >>> earlystop(0.7, epoch=1) + >>> earlystop.IMPROVED # is True + >>> earlystop(1.0, epoch=2) + >>> earlystop.STOP # is False (patience=1) + >>> earlystop(1.0, epoch=3) + >>> earlystop.STOP # is True (patience=0) + >>> earlystop.best_epoch # is 1 + >>> earlystop.best_score # is 0.7 + + :param patience: the number of (consecutive) times that a monitored evaluation metric (typically obtaind in a + held-out validation split) can be found to be worse than the best one obtained so far, before flagging the + stopping condition. An instance of this class is `callable`, and is to be used as follows: + :param lower_is_better: if True (default) the metric is to be minimized. + :ivar best_score: keeps track of the best value seen so far + :ivar best_epoch: keeps track of the epoch in which the best score was set + :ivar STOP: flag (boolean) indicating the stopping condition + :ivar IMPROVED: flag (boolean) indicating whether there was an improvement in the last call + """ + + def __init__(self, patience, lower_is_better=True): + + self.PATIENCE_LIMIT = patience + self.better = lambda a,b: a<b if lower_is_better else a>b + self.patience = patience + self.best_score = None + self.best_epoch = None + self.STOP = False + self.IMPROVED = False + + def __call__(self, watch_score, epoch): + """ + Commits the new score found in epoch `epoch`. If the score improves over the best score found so far, then + the patiente counter gets reset. If otherwise, the patience counter is decreased, and in case it reachs 0, + the flag STOP becomes True. + + :param watch_score: the new score + :param epoch: the current epoch + """ + self.IMPROVED = (self.best_score is None or self.better(watch_score, self.best_score)) + if self.IMPROVED: + self.best_score = watch_score + self.best_epoch = epoch + self.patience = self.PATIENCE_LIMIT + else: + self.patience -= 1 + if self.patience <= 0: + self.STOP = True
+ + + +
+[docs] +@contextlib.contextmanager +def timeout(seconds): + """ + Opens a context that will launch an exception if not closed after a given number of seconds + + >>> def func(start_msg, end_msg): + >>> print(start_msg) + >>> sleep(2) + >>> print(end_msg) + >>> + >>> with timeout(1): + >>> func('begin function', 'end function') + >>> Out[] + >>> begin function + >>> TimeoutError + + + :param seconds: number of seconds, set to <=0 to ignore the timer + """ + if seconds > 0: + def handler(signum, frame): + raise TimeoutError() + + signal.signal(signal.SIGALRM, handler) + signal.alarm(seconds) + + yield + + if seconds > 0: + signal.alarm(0)
+ + +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_sources/api.rst.txt b/docs/build/html/_sources/api.rst.txt new file mode 100644 index 0000000..b628a93 --- /dev/null +++ b/docs/build/html/_sources/api.rst.txt @@ -0,0 +1,7 @@ +API +=== + +.. autosummary:: + :toctree: generated + + quapy \ No newline at end of file diff --git a/docs/build/html/_sources/generated/quapy.rst.txt b/docs/build/html/_sources/generated/quapy.rst.txt new file mode 100644 index 0000000..52098bb --- /dev/null +++ b/docs/build/html/_sources/generated/quapy.rst.txt @@ -0,0 +1,23 @@ +quapy +===== + +.. automodule:: quapy + + + + + + + + + + + + + + + + + + + diff --git a/docs/build/html/_sources/quapy.benchmarking.rst.txt b/docs/build/html/_sources/quapy.benchmarking.rst.txt new file mode 100644 index 0000000..cb481a5 --- /dev/null +++ b/docs/build/html/_sources/quapy.benchmarking.rst.txt @@ -0,0 +1,21 @@ +quapy.benchmarking package +========================== + +Submodules +---------- + +quapy.benchmarking.typical module +--------------------------------- + +.. automodule:: quapy.benchmarking.typical + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: quapy.benchmarking + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/build/html/_static/_sphinx_javascript_frameworks_compat.js b/docs/build/html/_static/_sphinx_javascript_frameworks_compat.js new file mode 100644 index 0000000..8141580 --- /dev/null +++ b/docs/build/html/_static/_sphinx_javascript_frameworks_compat.js @@ -0,0 +1,123 @@ +/* Compatability shim for jQuery and underscores.js. + * + * Copyright Sphinx contributors + * Released under the two clause BSD licence + */ + +/** + * small helper function to urldecode strings + * + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL + */ +jQuery.urldecode = function(x) { + if (!x) { + return x + } + return decodeURIComponent(x.replace(/\+/g, ' ')); +}; + +/** + * small helper function to urlencode strings + */ +jQuery.urlencode = encodeURIComponent; + +/** + * This function returns the parsed url parameters of the + * current request. Multiple values per key are supported, + * it will always return arrays of strings for the value parts. + */ +jQuery.getQueryParameters = function(s) { + if (typeof s === 'undefined') + s = document.location.search; + var parts = s.substr(s.indexOf('?') + 1).split('&'); + var result = {}; + for (var i = 0; i < parts.length; i++) { + var tmp = parts[i].split('=', 2); + var key = jQuery.urldecode(tmp[0]); + var value = jQuery.urldecode(tmp[1]); + if (key in result) + result[key].push(value); + else + result[key] = [value]; + } + return result; +}; + +/** + * highlight a given string on a jquery object by wrapping it in + * span elements with the given class name. + */ +jQuery.fn.highlightText = function(text, className) { + function highlight(node, addItems) { + if (node.nodeType === 3) { + var val = node.nodeValue; + var pos = val.toLowerCase().indexOf(text); + if (pos >= 0 && + !jQuery(node.parentNode).hasClass(className) && + !jQuery(node.parentNode).hasClass("nohighlight")) { + var span; + var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.className = className; + } + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + node.parentNode.insertBefore(span, node.parentNode.insertBefore( + document.createTextNode(val.substr(pos + text.length)), + node.nextSibling)); + node.nodeValue = val.substr(0, pos); + if (isInSVG) { + var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect"); + var bbox = node.parentElement.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute('class', className); + addItems.push({ + "parent": node.parentNode, + "target": rect}); + } + } + } + else if (!jQuery(node).is("button, select, textarea")) { + jQuery.each(node.childNodes, function() { + highlight(this, addItems); + }); + } + } + var addItems = []; + var result = this.each(function() { + highlight(this, addItems); + }); + for (var i = 0; i < addItems.length; ++i) { + jQuery(addItems[i].parent).before(addItems[i].target); + } + return result; +}; + +/* + * backward compatibility for jQuery.browser + * This will be supported until firefox bug is fixed. + */ +if (!jQuery.browser) { + jQuery.uaMatch = function(ua) { + ua = ua.toLowerCase(); + + var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || + /(webkit)[ \/]([\w.]+)/.exec(ua) || + /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || + /(msie) ([\w.]+)/.exec(ua) || + ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || + []; + + return { + browser: match[ 1 ] || "", + version: match[ 2 ] || "0" + }; + }; + jQuery.browser = {}; + jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; +} diff --git a/docs/build/html/_static/contents.png b/docs/build/html/_static/contents.png new file mode 100644 index 0000000..6c59aa1 Binary files /dev/null and b/docs/build/html/_static/contents.png differ diff --git a/docs/build/html/_static/css/badge_only.css b/docs/build/html/_static/css/badge_only.css new file mode 100644 index 0000000..c718cee --- /dev/null +++ b/docs/build/html/_static/css/badge_only.css @@ -0,0 +1 @@ +.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} \ No newline at end of file diff --git a/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff b/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff new file mode 100644 index 0000000..6cb6000 Binary files /dev/null and b/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff differ diff --git a/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff2 b/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff2 new file mode 100644 index 0000000..7059e23 Binary files /dev/null and b/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff2 differ diff --git a/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff b/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff new file mode 100644 index 0000000..f815f63 Binary files /dev/null and b/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff differ diff --git a/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff2 b/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff2 new file mode 100644 index 0000000..f2c76e5 Binary files /dev/null and b/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff2 differ diff --git a/docs/build/html/_static/css/fonts/fontawesome-webfont.eot b/docs/build/html/_static/css/fonts/fontawesome-webfont.eot new file mode 100644 index 0000000..e9f60ca Binary files /dev/null and b/docs/build/html/_static/css/fonts/fontawesome-webfont.eot differ diff --git a/docs/build/html/_static/css/fonts/fontawesome-webfont.svg b/docs/build/html/_static/css/fonts/fontawesome-webfont.svg new file mode 100644 index 0000000..855c845 --- /dev/null +++ b/docs/build/html/_static/css/fonts/fontawesome-webfont.svg @@ -0,0 +1,2671 @@ + + + + +Created by FontForge 20120731 at Mon Oct 24 17:37:40 2016 + By ,,, +Copyright Dave Gandy 2016. All rights reserved. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/build/html/_static/css/fonts/fontawesome-webfont.ttf b/docs/build/html/_static/css/fonts/fontawesome-webfont.ttf new file mode 100644 index 0000000..35acda2 Binary files /dev/null and b/docs/build/html/_static/css/fonts/fontawesome-webfont.ttf differ diff --git a/docs/build/html/_static/css/fonts/fontawesome-webfont.woff b/docs/build/html/_static/css/fonts/fontawesome-webfont.woff new file mode 100644 index 0000000..400014a Binary files /dev/null and b/docs/build/html/_static/css/fonts/fontawesome-webfont.woff differ diff --git a/docs/build/html/_static/css/fonts/fontawesome-webfont.woff2 b/docs/build/html/_static/css/fonts/fontawesome-webfont.woff2 new file mode 100644 index 0000000..4d13fc6 Binary files /dev/null and b/docs/build/html/_static/css/fonts/fontawesome-webfont.woff2 differ diff --git a/docs/build/html/_static/css/fonts/lato-bold-italic.woff b/docs/build/html/_static/css/fonts/lato-bold-italic.woff new file mode 100644 index 0000000..88ad05b Binary files /dev/null and b/docs/build/html/_static/css/fonts/lato-bold-italic.woff differ diff --git a/docs/build/html/_static/css/fonts/lato-bold-italic.woff2 b/docs/build/html/_static/css/fonts/lato-bold-italic.woff2 new file mode 100644 index 0000000..c4e3d80 Binary files /dev/null and b/docs/build/html/_static/css/fonts/lato-bold-italic.woff2 differ diff --git a/docs/build/html/_static/css/fonts/lato-bold.woff b/docs/build/html/_static/css/fonts/lato-bold.woff new file mode 100644 index 0000000..c6dff51 Binary files /dev/null and b/docs/build/html/_static/css/fonts/lato-bold.woff differ diff --git a/docs/build/html/_static/css/fonts/lato-bold.woff2 b/docs/build/html/_static/css/fonts/lato-bold.woff2 new file mode 100644 index 0000000..bb19504 Binary files /dev/null and b/docs/build/html/_static/css/fonts/lato-bold.woff2 differ diff --git a/docs/build/html/_static/css/fonts/lato-normal-italic.woff b/docs/build/html/_static/css/fonts/lato-normal-italic.woff new file mode 100644 index 0000000..76114bc Binary files /dev/null and b/docs/build/html/_static/css/fonts/lato-normal-italic.woff differ diff --git a/docs/build/html/_static/css/fonts/lato-normal-italic.woff2 b/docs/build/html/_static/css/fonts/lato-normal-italic.woff2 new file mode 100644 index 0000000..3404f37 Binary files /dev/null and b/docs/build/html/_static/css/fonts/lato-normal-italic.woff2 differ diff --git a/docs/build/html/_static/css/fonts/lato-normal.woff b/docs/build/html/_static/css/fonts/lato-normal.woff new file mode 100644 index 0000000..ae1307f Binary files /dev/null and b/docs/build/html/_static/css/fonts/lato-normal.woff differ diff --git a/docs/build/html/_static/css/fonts/lato-normal.woff2 b/docs/build/html/_static/css/fonts/lato-normal.woff2 new file mode 100644 index 0000000..3bf9843 Binary files /dev/null and b/docs/build/html/_static/css/fonts/lato-normal.woff2 differ diff --git a/docs/build/html/_static/css/theme.css b/docs/build/html/_static/css/theme.css new file mode 100644 index 0000000..19a446a --- /dev/null +++ b/docs/build/html/_static/css/theme.css @@ -0,0 +1,4 @@ +html{box-sizing:border-box}*,:after,:before{box-sizing:inherit}article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}[hidden],audio:not([controls]){display:none}*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}body{margin:0}a:active,a:hover{outline:0}abbr[title]{border-bottom:1px dotted}b,strong{font-weight:700}blockquote{margin:0}dfn{font-style:italic}ins{background:#ff9;text-decoration:none}ins,mark{color:#000}mark{background:#ff0;font-style:italic;font-weight:700}.rst-content code,.rst-content tt,code,kbd,pre,samp{font-family:monospace,serif;_font-family:courier new,monospace;font-size:1em}pre{white-space:pre}q{quotes:none}q:after,q:before{content:"";content:none}small{font-size:85%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sup{top:-.5em}sub{bottom:-.25em}dl,ol,ul{margin:0;padding:0;list-style:none;list-style-image:none}li{list-style:none}dd{margin:0}img{border:0;-ms-interpolation-mode:bicubic;vertical-align:middle;max-width:100%}svg:not(:root){overflow:hidden}figure,form{margin:0}label{cursor:pointer}button,input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}button,input{line-height:normal}button,input[type=button],input[type=reset],input[type=submit]{cursor:pointer;-webkit-appearance:button;*overflow:visible}button[disabled],input[disabled]{cursor:default}input[type=search]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box}textarea{resize:vertical}table{border-collapse:collapse;border-spacing:0}td{vertical-align:top}.chromeframe{margin:.2em 0;background:#ccc;color:#000;padding:.2em 0}.ir{display:block;border:0;text-indent:-999em;overflow:hidden;background-color:transparent;background-repeat:no-repeat;text-align:left;direction:ltr;*line-height:0}.ir br{display:none}.hidden{display:none!important;visibility:hidden}.visuallyhidden{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.visuallyhidden.focusable:active,.visuallyhidden.focusable:focus{clip:auto;height:auto;margin:0;overflow:visible;position:static;width:auto}.invisible{visibility:hidden}.relative{position:relative}big,small{font-size:100%}@media print{body,html,section{background:none!important}*{box-shadow:none!important;text-shadow:none!important;filter:none!important;-ms-filter:none!important}a,a:visited{text-decoration:underline}.ir a:after,a[href^="#"]:after,a[href^="javascript:"]:after{content:""}blockquote,pre{page-break-inside:avoid}thead{display:table-header-group}img,tr{page-break-inside:avoid}img{max-width:100%!important}@page{margin:.5cm}.rst-content .toctree-wrapper>p.caption,h2,h3,p{orphans:3;widows:3}.rst-content .toctree-wrapper>p.caption,h2,h3{page-break-after:avoid}}.btn,.fa:before,.icon:before,.rst-content .admonition,.rst-content .admonition-title:before,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .code-block-caption .headerlink:before,.rst-content .danger,.rst-content .eqno .headerlink:before,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning,.rst-content code.download span:first-child:before,.rst-content dl dt .headerlink:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content p .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.wy-alert,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before,.wy-menu-vertical li button.toctree-expand:before,input[type=color],input[type=date],input[type=datetime-local],input[type=datetime],input[type=email],input[type=month],input[type=number],input[type=password],input[type=search],input[type=tel],input[type=text],input[type=time],input[type=url],input[type=week],select,textarea{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}/*! + * Font Awesome 4.7.0 by @davegandy - http://fontawesome.io - @fontawesome + * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) + */@font-face{font-family:FontAwesome;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713);src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix&v=4.7.0) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#fontawesomeregular) format("svg");font-weight:400;font-style:normal}.fa,.icon,.rst-content .admonition-title,.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content code.download span:first-child,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li button.toctree-expand{display:inline-block;font:normal normal normal 14px/1 FontAwesome;font-size:inherit;text-rendering:auto;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.fa-lg{font-size:1.33333em;line-height:.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.28571em;text-align:center}.fa-ul{padding-left:0;margin-left:2.14286em;list-style-type:none}.fa-ul>li{position:relative}.fa-li{position:absolute;left:-2.14286em;width:2.14286em;top:.14286em;text-align:center}.fa-li.fa-lg{left:-1.85714em}.fa-border{padding:.2em .25em .15em;border:.08em solid #eee;border-radius:.1em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa-pull-left.icon,.fa.fa-pull-left,.rst-content .code-block-caption .fa-pull-left.headerlink,.rst-content .eqno .fa-pull-left.headerlink,.rst-content .fa-pull-left.admonition-title,.rst-content code.download span.fa-pull-left:first-child,.rst-content dl dt .fa-pull-left.headerlink,.rst-content h1 .fa-pull-left.headerlink,.rst-content h2 .fa-pull-left.headerlink,.rst-content h3 .fa-pull-left.headerlink,.rst-content h4 .fa-pull-left.headerlink,.rst-content h5 .fa-pull-left.headerlink,.rst-content h6 .fa-pull-left.headerlink,.rst-content p .fa-pull-left.headerlink,.rst-content table>caption .fa-pull-left.headerlink,.rst-content tt.download span.fa-pull-left:first-child,.wy-menu-vertical li.current>a button.fa-pull-left.toctree-expand,.wy-menu-vertical li.on a button.fa-pull-left.toctree-expand,.wy-menu-vertical li button.fa-pull-left.toctree-expand{margin-right:.3em}.fa-pull-right.icon,.fa.fa-pull-right,.rst-content .code-block-caption .fa-pull-right.headerlink,.rst-content .eqno .fa-pull-right.headerlink,.rst-content .fa-pull-right.admonition-title,.rst-content code.download span.fa-pull-right:first-child,.rst-content dl dt .fa-pull-right.headerlink,.rst-content h1 .fa-pull-right.headerlink,.rst-content h2 .fa-pull-right.headerlink,.rst-content h3 .fa-pull-right.headerlink,.rst-content h4 .fa-pull-right.headerlink,.rst-content h5 .fa-pull-right.headerlink,.rst-content h6 .fa-pull-right.headerlink,.rst-content p .fa-pull-right.headerlink,.rst-content table>caption .fa-pull-right.headerlink,.rst-content tt.download span.fa-pull-right:first-child,.wy-menu-vertical li.current>a button.fa-pull-right.toctree-expand,.wy-menu-vertical li.on a button.fa-pull-right.toctree-expand,.wy-menu-vertical li button.fa-pull-right.toctree-expand{margin-left:.3em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left,.pull-left.icon,.rst-content .code-block-caption .pull-left.headerlink,.rst-content .eqno .pull-left.headerlink,.rst-content .pull-left.admonition-title,.rst-content code.download span.pull-left:first-child,.rst-content dl dt .pull-left.headerlink,.rst-content h1 .pull-left.headerlink,.rst-content h2 .pull-left.headerlink,.rst-content h3 .pull-left.headerlink,.rst-content h4 .pull-left.headerlink,.rst-content h5 .pull-left.headerlink,.rst-content h6 .pull-left.headerlink,.rst-content p .pull-left.headerlink,.rst-content table>caption .pull-left.headerlink,.rst-content tt.download span.pull-left:first-child,.wy-menu-vertical li.current>a button.pull-left.toctree-expand,.wy-menu-vertical li.on a button.pull-left.toctree-expand,.wy-menu-vertical li button.pull-left.toctree-expand{margin-right:.3em}.fa.pull-right,.pull-right.icon,.rst-content .code-block-caption .pull-right.headerlink,.rst-content .eqno .pull-right.headerlink,.rst-content .pull-right.admonition-title,.rst-content code.download span.pull-right:first-child,.rst-content dl dt .pull-right.headerlink,.rst-content h1 .pull-right.headerlink,.rst-content h2 .pull-right.headerlink,.rst-content h3 .pull-right.headerlink,.rst-content h4 .pull-right.headerlink,.rst-content h5 .pull-right.headerlink,.rst-content h6 .pull-right.headerlink,.rst-content p .pull-right.headerlink,.rst-content table>caption .pull-right.headerlink,.rst-content tt.download span.pull-right:first-child,.wy-menu-vertical li.current>a button.pull-right.toctree-expand,.wy-menu-vertical li.on a button.pull-right.toctree-expand,.wy-menu-vertical li button.pull-right.toctree-expand{margin-left:.3em}.fa-spin{-webkit-animation:fa-spin 2s linear infinite;animation:fa-spin 2s linear infinite}.fa-pulse{-webkit-animation:fa-spin 1s steps(8) infinite;animation:fa-spin 1s steps(8) infinite}@-webkit-keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";-webkit-transform:rotate(90deg);-ms-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";-webkit-transform:rotate(180deg);-ms-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";-webkit-transform:rotate(270deg);-ms-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";-webkit-transform:scaleX(-1);-ms-transform:scaleX(-1);transform:scaleX(-1)}.fa-flip-vertical{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)";-webkit-transform:scaleY(-1);-ms-transform:scaleY(-1);transform:scaleY(-1)}:root .fa-flip-horizontal,:root .fa-flip-vertical,:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270{filter:none}.fa-stack{position:relative;display:inline-block;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:""}.fa-music:before{content:""}.fa-search:before,.icon-search:before{content:""}.fa-envelope-o:before{content:""}.fa-heart:before{content:""}.fa-star:before{content:""}.fa-star-o:before{content:""}.fa-user:before{content:""}.fa-film:before{content:""}.fa-th-large:before{content:""}.fa-th:before{content:""}.fa-th-list:before{content:""}.fa-check:before{content:""}.fa-close:before,.fa-remove:before,.fa-times:before{content:""}.fa-search-plus:before{content:""}.fa-search-minus:before{content:""}.fa-power-off:before{content:""}.fa-signal:before{content:""}.fa-cog:before,.fa-gear:before{content:""}.fa-trash-o:before{content:""}.fa-home:before,.icon-home:before{content:""}.fa-file-o:before{content:""}.fa-clock-o:before{content:""}.fa-road:before{content:""}.fa-download:before,.rst-content code.download span:first-child:before,.rst-content tt.download span:first-child:before{content:""}.fa-arrow-circle-o-down:before{content:""}.fa-arrow-circle-o-up:before{content:""}.fa-inbox:before{content:""}.fa-play-circle-o:before{content:""}.fa-repeat:before,.fa-rotate-right:before{content:""}.fa-refresh:before{content:""}.fa-list-alt:before{content:""}.fa-lock:before{content:""}.fa-flag:before{content:""}.fa-headphones:before{content:""}.fa-volume-off:before{content:""}.fa-volume-down:before{content:""}.fa-volume-up:before{content:""}.fa-qrcode:before{content:""}.fa-barcode:before{content:""}.fa-tag:before{content:""}.fa-tags:before{content:""}.fa-book:before,.icon-book:before{content:""}.fa-bookmark:before{content:""}.fa-print:before{content:""}.fa-camera:before{content:""}.fa-font:before{content:""}.fa-bold:before{content:""}.fa-italic:before{content:""}.fa-text-height:before{content:""}.fa-text-width:before{content:""}.fa-align-left:before{content:""}.fa-align-center:before{content:""}.fa-align-right:before{content:""}.fa-align-justify:before{content:""}.fa-list:before{content:""}.fa-dedent:before,.fa-outdent:before{content:""}.fa-indent:before{content:""}.fa-video-camera:before{content:""}.fa-image:before,.fa-photo:before,.fa-picture-o:before{content:""}.fa-pencil:before{content:""}.fa-map-marker:before{content:""}.fa-adjust:before{content:""}.fa-tint:before{content:""}.fa-edit:before,.fa-pencil-square-o:before{content:""}.fa-share-square-o:before{content:""}.fa-check-square-o:before{content:""}.fa-arrows:before{content:""}.fa-step-backward:before{content:""}.fa-fast-backward:before{content:""}.fa-backward:before{content:""}.fa-play:before{content:""}.fa-pause:before{content:""}.fa-stop:before{content:""}.fa-forward:before{content:""}.fa-fast-forward:before{content:""}.fa-step-forward:before{content:""}.fa-eject:before{content:""}.fa-chevron-left:before{content:""}.fa-chevron-right:before{content:""}.fa-plus-circle:before{content:""}.fa-minus-circle:before{content:""}.fa-times-circle:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before{content:""}.fa-check-circle:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before{content:""}.fa-question-circle:before{content:""}.fa-info-circle:before{content:""}.fa-crosshairs:before{content:""}.fa-times-circle-o:before{content:""}.fa-check-circle-o:before{content:""}.fa-ban:before{content:""}.fa-arrow-left:before{content:""}.fa-arrow-right:before{content:""}.fa-arrow-up:before{content:""}.fa-arrow-down:before{content:""}.fa-mail-forward:before,.fa-share:before{content:""}.fa-expand:before{content:""}.fa-compress:before{content:""}.fa-plus:before{content:""}.fa-minus:before{content:""}.fa-asterisk:before{content:""}.fa-exclamation-circle:before,.rst-content .admonition-title:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before{content:""}.fa-gift:before{content:""}.fa-leaf:before{content:""}.fa-fire:before,.icon-fire:before{content:""}.fa-eye:before{content:""}.fa-eye-slash:before{content:""}.fa-exclamation-triangle:before,.fa-warning:before{content:""}.fa-plane:before{content:""}.fa-calendar:before{content:""}.fa-random:before{content:""}.fa-comment:before{content:""}.fa-magnet:before{content:""}.fa-chevron-up:before{content:""}.fa-chevron-down:before{content:""}.fa-retweet:before{content:""}.fa-shopping-cart:before{content:""}.fa-folder:before{content:""}.fa-folder-open:before{content:""}.fa-arrows-v:before{content:""}.fa-arrows-h:before{content:""}.fa-bar-chart-o:before,.fa-bar-chart:before{content:""}.fa-twitter-square:before{content:""}.fa-facebook-square:before{content:""}.fa-camera-retro:before{content:""}.fa-key:before{content:""}.fa-cogs:before,.fa-gears:before{content:""}.fa-comments:before{content:""}.fa-thumbs-o-up:before{content:""}.fa-thumbs-o-down:before{content:""}.fa-star-half:before{content:""}.fa-heart-o:before{content:""}.fa-sign-out:before{content:""}.fa-linkedin-square:before{content:""}.fa-thumb-tack:before{content:""}.fa-external-link:before{content:""}.fa-sign-in:before{content:""}.fa-trophy:before{content:""}.fa-github-square:before{content:""}.fa-upload:before{content:""}.fa-lemon-o:before{content:""}.fa-phone:before{content:""}.fa-square-o:before{content:""}.fa-bookmark-o:before{content:""}.fa-phone-square:before{content:""}.fa-twitter:before{content:""}.fa-facebook-f:before,.fa-facebook:before{content:""}.fa-github:before,.icon-github:before{content:""}.fa-unlock:before{content:""}.fa-credit-card:before{content:""}.fa-feed:before,.fa-rss:before{content:""}.fa-hdd-o:before{content:""}.fa-bullhorn:before{content:""}.fa-bell:before{content:""}.fa-certificate:before{content:""}.fa-hand-o-right:before{content:""}.fa-hand-o-left:before{content:""}.fa-hand-o-up:before{content:""}.fa-hand-o-down:before{content:""}.fa-arrow-circle-left:before,.icon-circle-arrow-left:before{content:""}.fa-arrow-circle-right:before,.icon-circle-arrow-right:before{content:""}.fa-arrow-circle-up:before{content:""}.fa-arrow-circle-down:before{content:""}.fa-globe:before{content:""}.fa-wrench:before{content:""}.fa-tasks:before{content:""}.fa-filter:before{content:""}.fa-briefcase:before{content:""}.fa-arrows-alt:before{content:""}.fa-group:before,.fa-users:before{content:""}.fa-chain:before,.fa-link:before,.icon-link:before{content:""}.fa-cloud:before{content:""}.fa-flask:before{content:""}.fa-cut:before,.fa-scissors:before{content:""}.fa-copy:before,.fa-files-o:before{content:""}.fa-paperclip:before{content:""}.fa-floppy-o:before,.fa-save:before{content:""}.fa-square:before{content:""}.fa-bars:before,.fa-navicon:before,.fa-reorder:before{content:""}.fa-list-ul:before{content:""}.fa-list-ol:before{content:""}.fa-strikethrough:before{content:""}.fa-underline:before{content:""}.fa-table:before{content:""}.fa-magic:before{content:""}.fa-truck:before{content:""}.fa-pinterest:before{content:""}.fa-pinterest-square:before{content:""}.fa-google-plus-square:before{content:""}.fa-google-plus:before{content:""}.fa-money:before{content:""}.fa-caret-down:before,.icon-caret-down:before,.wy-dropdown .caret:before{content:""}.fa-caret-up:before{content:""}.fa-caret-left:before{content:""}.fa-caret-right:before{content:""}.fa-columns:before{content:""}.fa-sort:before,.fa-unsorted:before{content:""}.fa-sort-desc:before,.fa-sort-down:before{content:""}.fa-sort-asc:before,.fa-sort-up:before{content:""}.fa-envelope:before{content:""}.fa-linkedin:before{content:""}.fa-rotate-left:before,.fa-undo:before{content:""}.fa-gavel:before,.fa-legal:before{content:""}.fa-dashboard:before,.fa-tachometer:before{content:""}.fa-comment-o:before{content:""}.fa-comments-o:before{content:""}.fa-bolt:before,.fa-flash:before{content:""}.fa-sitemap:before{content:""}.fa-umbrella:before{content:""}.fa-clipboard:before,.fa-paste:before{content:""}.fa-lightbulb-o:before{content:""}.fa-exchange:before{content:""}.fa-cloud-download:before{content:""}.fa-cloud-upload:before{content:""}.fa-user-md:before{content:""}.fa-stethoscope:before{content:""}.fa-suitcase:before{content:""}.fa-bell-o:before{content:""}.fa-coffee:before{content:""}.fa-cutlery:before{content:""}.fa-file-text-o:before{content:""}.fa-building-o:before{content:""}.fa-hospital-o:before{content:""}.fa-ambulance:before{content:""}.fa-medkit:before{content:""}.fa-fighter-jet:before{content:""}.fa-beer:before{content:""}.fa-h-square:before{content:""}.fa-plus-square:before{content:""}.fa-angle-double-left:before{content:""}.fa-angle-double-right:before{content:""}.fa-angle-double-up:before{content:""}.fa-angle-double-down:before{content:""}.fa-angle-left:before{content:""}.fa-angle-right:before{content:""}.fa-angle-up:before{content:""}.fa-angle-down:before{content:""}.fa-desktop:before{content:""}.fa-laptop:before{content:""}.fa-tablet:before{content:""}.fa-mobile-phone:before,.fa-mobile:before{content:""}.fa-circle-o:before{content:""}.fa-quote-left:before{content:""}.fa-quote-right:before{content:""}.fa-spinner:before{content:""}.fa-circle:before{content:""}.fa-mail-reply:before,.fa-reply:before{content:""}.fa-github-alt:before{content:""}.fa-folder-o:before{content:""}.fa-folder-open-o:before{content:""}.fa-smile-o:before{content:""}.fa-frown-o:before{content:""}.fa-meh-o:before{content:""}.fa-gamepad:before{content:""}.fa-keyboard-o:before{content:""}.fa-flag-o:before{content:""}.fa-flag-checkered:before{content:""}.fa-terminal:before{content:""}.fa-code:before{content:""}.fa-mail-reply-all:before,.fa-reply-all:before{content:""}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:""}.fa-location-arrow:before{content:""}.fa-crop:before{content:""}.fa-code-fork:before{content:""}.fa-chain-broken:before,.fa-unlink:before{content:""}.fa-question:before{content:""}.fa-info:before{content:""}.fa-exclamation:before{content:""}.fa-superscript:before{content:""}.fa-subscript:before{content:""}.fa-eraser:before{content:""}.fa-puzzle-piece:before{content:""}.fa-microphone:before{content:""}.fa-microphone-slash:before{content:""}.fa-shield:before{content:""}.fa-calendar-o:before{content:""}.fa-fire-extinguisher:before{content:""}.fa-rocket:before{content:""}.fa-maxcdn:before{content:""}.fa-chevron-circle-left:before{content:""}.fa-chevron-circle-right:before{content:""}.fa-chevron-circle-up:before{content:""}.fa-chevron-circle-down:before{content:""}.fa-html5:before{content:""}.fa-css3:before{content:""}.fa-anchor:before{content:""}.fa-unlock-alt:before{content:""}.fa-bullseye:before{content:""}.fa-ellipsis-h:before{content:""}.fa-ellipsis-v:before{content:""}.fa-rss-square:before{content:""}.fa-play-circle:before{content:""}.fa-ticket:before{content:""}.fa-minus-square:before{content:""}.fa-minus-square-o:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before{content:""}.fa-level-up:before{content:""}.fa-level-down:before{content:""}.fa-check-square:before{content:""}.fa-pencil-square:before{content:""}.fa-external-link-square:before{content:""}.fa-share-square:before{content:""}.fa-compass:before{content:""}.fa-caret-square-o-down:before,.fa-toggle-down:before{content:""}.fa-caret-square-o-up:before,.fa-toggle-up:before{content:""}.fa-caret-square-o-right:before,.fa-toggle-right:before{content:""}.fa-eur:before,.fa-euro:before{content:""}.fa-gbp:before{content:""}.fa-dollar:before,.fa-usd:before{content:""}.fa-inr:before,.fa-rupee:before{content:""}.fa-cny:before,.fa-jpy:before,.fa-rmb:before,.fa-yen:before{content:""}.fa-rouble:before,.fa-rub:before,.fa-ruble:before{content:""}.fa-krw:before,.fa-won:before{content:""}.fa-bitcoin:before,.fa-btc:before{content:""}.fa-file:before{content:""}.fa-file-text:before{content:""}.fa-sort-alpha-asc:before{content:""}.fa-sort-alpha-desc:before{content:""}.fa-sort-amount-asc:before{content:""}.fa-sort-amount-desc:before{content:""}.fa-sort-numeric-asc:before{content:""}.fa-sort-numeric-desc:before{content:""}.fa-thumbs-up:before{content:""}.fa-thumbs-down:before{content:""}.fa-youtube-square:before{content:""}.fa-youtube:before{content:""}.fa-xing:before{content:""}.fa-xing-square:before{content:""}.fa-youtube-play:before{content:""}.fa-dropbox:before{content:""}.fa-stack-overflow:before{content:""}.fa-instagram:before{content:""}.fa-flickr:before{content:""}.fa-adn:before{content:""}.fa-bitbucket:before,.icon-bitbucket:before{content:""}.fa-bitbucket-square:before{content:""}.fa-tumblr:before{content:""}.fa-tumblr-square:before{content:""}.fa-long-arrow-down:before{content:""}.fa-long-arrow-up:before{content:""}.fa-long-arrow-left:before{content:""}.fa-long-arrow-right:before{content:""}.fa-apple:before{content:""}.fa-windows:before{content:""}.fa-android:before{content:""}.fa-linux:before{content:""}.fa-dribbble:before{content:""}.fa-skype:before{content:""}.fa-foursquare:before{content:""}.fa-trello:before{content:""}.fa-female:before{content:""}.fa-male:before{content:""}.fa-gittip:before,.fa-gratipay:before{content:""}.fa-sun-o:before{content:""}.fa-moon-o:before{content:""}.fa-archive:before{content:""}.fa-bug:before{content:""}.fa-vk:before{content:""}.fa-weibo:before{content:""}.fa-renren:before{content:""}.fa-pagelines:before{content:""}.fa-stack-exchange:before{content:""}.fa-arrow-circle-o-right:before{content:""}.fa-arrow-circle-o-left:before{content:""}.fa-caret-square-o-left:before,.fa-toggle-left:before{content:""}.fa-dot-circle-o:before{content:""}.fa-wheelchair:before{content:""}.fa-vimeo-square:before{content:""}.fa-try:before,.fa-turkish-lira:before{content:""}.fa-plus-square-o:before,.wy-menu-vertical li button.toctree-expand:before{content:""}.fa-space-shuttle:before{content:""}.fa-slack:before{content:""}.fa-envelope-square:before{content:""}.fa-wordpress:before{content:""}.fa-openid:before{content:""}.fa-bank:before,.fa-institution:before,.fa-university:before{content:""}.fa-graduation-cap:before,.fa-mortar-board:before{content:""}.fa-yahoo:before{content:""}.fa-google:before{content:""}.fa-reddit:before{content:""}.fa-reddit-square:before{content:""}.fa-stumbleupon-circle:before{content:""}.fa-stumbleupon:before{content:""}.fa-delicious:before{content:""}.fa-digg:before{content:""}.fa-pied-piper-pp:before{content:""}.fa-pied-piper-alt:before{content:""}.fa-drupal:before{content:""}.fa-joomla:before{content:""}.fa-language:before{content:""}.fa-fax:before{content:""}.fa-building:before{content:""}.fa-child:before{content:""}.fa-paw:before{content:""}.fa-spoon:before{content:""}.fa-cube:before{content:""}.fa-cubes:before{content:""}.fa-behance:before{content:""}.fa-behance-square:before{content:""}.fa-steam:before{content:""}.fa-steam-square:before{content:""}.fa-recycle:before{content:""}.fa-automobile:before,.fa-car:before{content:""}.fa-cab:before,.fa-taxi:before{content:""}.fa-tree:before{content:""}.fa-spotify:before{content:""}.fa-deviantart:before{content:""}.fa-soundcloud:before{content:""}.fa-database:before{content:""}.fa-file-pdf-o:before{content:""}.fa-file-word-o:before{content:""}.fa-file-excel-o:before{content:""}.fa-file-powerpoint-o:before{content:""}.fa-file-image-o:before,.fa-file-photo-o:before,.fa-file-picture-o:before{content:""}.fa-file-archive-o:before,.fa-file-zip-o:before{content:""}.fa-file-audio-o:before,.fa-file-sound-o:before{content:""}.fa-file-movie-o:before,.fa-file-video-o:before{content:""}.fa-file-code-o:before{content:""}.fa-vine:before{content:""}.fa-codepen:before{content:""}.fa-jsfiddle:before{content:""}.fa-life-bouy:before,.fa-life-buoy:before,.fa-life-ring:before,.fa-life-saver:before,.fa-support:before{content:""}.fa-circle-o-notch:before{content:""}.fa-ra:before,.fa-rebel:before,.fa-resistance:before{content:""}.fa-empire:before,.fa-ge:before{content:""}.fa-git-square:before{content:""}.fa-git:before{content:""}.fa-hacker-news:before,.fa-y-combinator-square:before,.fa-yc-square:before{content:""}.fa-tencent-weibo:before{content:""}.fa-qq:before{content:""}.fa-wechat:before,.fa-weixin:before{content:""}.fa-paper-plane:before,.fa-send:before{content:""}.fa-paper-plane-o:before,.fa-send-o:before{content:""}.fa-history:before{content:""}.fa-circle-thin:before{content:""}.fa-header:before{content:""}.fa-paragraph:before{content:""}.fa-sliders:before{content:""}.fa-share-alt:before{content:""}.fa-share-alt-square:before{content:""}.fa-bomb:before{content:""}.fa-futbol-o:before,.fa-soccer-ball-o:before{content:""}.fa-tty:before{content:""}.fa-binoculars:before{content:""}.fa-plug:before{content:""}.fa-slideshare:before{content:""}.fa-twitch:before{content:""}.fa-yelp:before{content:""}.fa-newspaper-o:before{content:""}.fa-wifi:before{content:""}.fa-calculator:before{content:""}.fa-paypal:before{content:""}.fa-google-wallet:before{content:""}.fa-cc-visa:before{content:""}.fa-cc-mastercard:before{content:""}.fa-cc-discover:before{content:""}.fa-cc-amex:before{content:""}.fa-cc-paypal:before{content:""}.fa-cc-stripe:before{content:""}.fa-bell-slash:before{content:""}.fa-bell-slash-o:before{content:""}.fa-trash:before{content:""}.fa-copyright:before{content:""}.fa-at:before{content:""}.fa-eyedropper:before{content:""}.fa-paint-brush:before{content:""}.fa-birthday-cake:before{content:""}.fa-area-chart:before{content:""}.fa-pie-chart:before{content:""}.fa-line-chart:before{content:""}.fa-lastfm:before{content:""}.fa-lastfm-square:before{content:""}.fa-toggle-off:before{content:""}.fa-toggle-on:before{content:""}.fa-bicycle:before{content:""}.fa-bus:before{content:""}.fa-ioxhost:before{content:""}.fa-angellist:before{content:""}.fa-cc:before{content:""}.fa-ils:before,.fa-shekel:before,.fa-sheqel:before{content:""}.fa-meanpath:before{content:""}.fa-buysellads:before{content:""}.fa-connectdevelop:before{content:""}.fa-dashcube:before{content:""}.fa-forumbee:before{content:""}.fa-leanpub:before{content:""}.fa-sellsy:before{content:""}.fa-shirtsinbulk:before{content:""}.fa-simplybuilt:before{content:""}.fa-skyatlas:before{content:""}.fa-cart-plus:before{content:""}.fa-cart-arrow-down:before{content:""}.fa-diamond:before{content:""}.fa-ship:before{content:""}.fa-user-secret:before{content:""}.fa-motorcycle:before{content:""}.fa-street-view:before{content:""}.fa-heartbeat:before{content:""}.fa-venus:before{content:""}.fa-mars:before{content:""}.fa-mercury:before{content:""}.fa-intersex:before,.fa-transgender:before{content:""}.fa-transgender-alt:before{content:""}.fa-venus-double:before{content:""}.fa-mars-double:before{content:""}.fa-venus-mars:before{content:""}.fa-mars-stroke:before{content:""}.fa-mars-stroke-v:before{content:""}.fa-mars-stroke-h:before{content:""}.fa-neuter:before{content:""}.fa-genderless:before{content:""}.fa-facebook-official:before{content:""}.fa-pinterest-p:before{content:""}.fa-whatsapp:before{content:""}.fa-server:before{content:""}.fa-user-plus:before{content:""}.fa-user-times:before{content:""}.fa-bed:before,.fa-hotel:before{content:""}.fa-viacoin:before{content:""}.fa-train:before{content:""}.fa-subway:before{content:""}.fa-medium:before{content:""}.fa-y-combinator:before,.fa-yc:before{content:""}.fa-optin-monster:before{content:""}.fa-opencart:before{content:""}.fa-expeditedssl:before{content:""}.fa-battery-4:before,.fa-battery-full:before,.fa-battery:before{content:""}.fa-battery-3:before,.fa-battery-three-quarters:before{content:""}.fa-battery-2:before,.fa-battery-half:before{content:""}.fa-battery-1:before,.fa-battery-quarter:before{content:""}.fa-battery-0:before,.fa-battery-empty:before{content:""}.fa-mouse-pointer:before{content:""}.fa-i-cursor:before{content:""}.fa-object-group:before{content:""}.fa-object-ungroup:before{content:""}.fa-sticky-note:before{content:""}.fa-sticky-note-o:before{content:""}.fa-cc-jcb:before{content:""}.fa-cc-diners-club:before{content:""}.fa-clone:before{content:""}.fa-balance-scale:before{content:""}.fa-hourglass-o:before{content:""}.fa-hourglass-1:before,.fa-hourglass-start:before{content:""}.fa-hourglass-2:before,.fa-hourglass-half:before{content:""}.fa-hourglass-3:before,.fa-hourglass-end:before{content:""}.fa-hourglass:before{content:""}.fa-hand-grab-o:before,.fa-hand-rock-o:before{content:""}.fa-hand-paper-o:before,.fa-hand-stop-o:before{content:""}.fa-hand-scissors-o:before{content:""}.fa-hand-lizard-o:before{content:""}.fa-hand-spock-o:before{content:""}.fa-hand-pointer-o:before{content:""}.fa-hand-peace-o:before{content:""}.fa-trademark:before{content:""}.fa-registered:before{content:""}.fa-creative-commons:before{content:""}.fa-gg:before{content:""}.fa-gg-circle:before{content:""}.fa-tripadvisor:before{content:""}.fa-odnoklassniki:before{content:""}.fa-odnoklassniki-square:before{content:""}.fa-get-pocket:before{content:""}.fa-wikipedia-w:before{content:""}.fa-safari:before{content:""}.fa-chrome:before{content:""}.fa-firefox:before{content:""}.fa-opera:before{content:""}.fa-internet-explorer:before{content:""}.fa-television:before,.fa-tv:before{content:""}.fa-contao:before{content:""}.fa-500px:before{content:""}.fa-amazon:before{content:""}.fa-calendar-plus-o:before{content:""}.fa-calendar-minus-o:before{content:""}.fa-calendar-times-o:before{content:""}.fa-calendar-check-o:before{content:""}.fa-industry:before{content:""}.fa-map-pin:before{content:""}.fa-map-signs:before{content:""}.fa-map-o:before{content:""}.fa-map:before{content:""}.fa-commenting:before{content:""}.fa-commenting-o:before{content:""}.fa-houzz:before{content:""}.fa-vimeo:before{content:""}.fa-black-tie:before{content:""}.fa-fonticons:before{content:""}.fa-reddit-alien:before{content:""}.fa-edge:before{content:""}.fa-credit-card-alt:before{content:""}.fa-codiepie:before{content:""}.fa-modx:before{content:""}.fa-fort-awesome:before{content:""}.fa-usb:before{content:""}.fa-product-hunt:before{content:""}.fa-mixcloud:before{content:""}.fa-scribd:before{content:""}.fa-pause-circle:before{content:""}.fa-pause-circle-o:before{content:""}.fa-stop-circle:before{content:""}.fa-stop-circle-o:before{content:""}.fa-shopping-bag:before{content:""}.fa-shopping-basket:before{content:""}.fa-hashtag:before{content:""}.fa-bluetooth:before{content:""}.fa-bluetooth-b:before{content:""}.fa-percent:before{content:""}.fa-gitlab:before,.icon-gitlab:before{content:""}.fa-wpbeginner:before{content:""}.fa-wpforms:before{content:""}.fa-envira:before{content:""}.fa-universal-access:before{content:""}.fa-wheelchair-alt:before{content:""}.fa-question-circle-o:before{content:""}.fa-blind:before{content:""}.fa-audio-description:before{content:""}.fa-volume-control-phone:before{content:""}.fa-braille:before{content:""}.fa-assistive-listening-systems:before{content:""}.fa-american-sign-language-interpreting:before,.fa-asl-interpreting:before{content:""}.fa-deaf:before,.fa-deafness:before,.fa-hard-of-hearing:before{content:""}.fa-glide:before{content:""}.fa-glide-g:before{content:""}.fa-sign-language:before,.fa-signing:before{content:""}.fa-low-vision:before{content:""}.fa-viadeo:before{content:""}.fa-viadeo-square:before{content:""}.fa-snapchat:before{content:""}.fa-snapchat-ghost:before{content:""}.fa-snapchat-square:before{content:""}.fa-pied-piper:before{content:""}.fa-first-order:before{content:""}.fa-yoast:before{content:""}.fa-themeisle:before{content:""}.fa-google-plus-circle:before,.fa-google-plus-official:before{content:""}.fa-fa:before,.fa-font-awesome:before{content:""}.fa-handshake-o:before{content:""}.fa-envelope-open:before{content:""}.fa-envelope-open-o:before{content:""}.fa-linode:before{content:""}.fa-address-book:before{content:""}.fa-address-book-o:before{content:""}.fa-address-card:before,.fa-vcard:before{content:""}.fa-address-card-o:before,.fa-vcard-o:before{content:""}.fa-user-circle:before{content:""}.fa-user-circle-o:before{content:""}.fa-user-o:before{content:""}.fa-id-badge:before{content:""}.fa-drivers-license:before,.fa-id-card:before{content:""}.fa-drivers-license-o:before,.fa-id-card-o:before{content:""}.fa-quora:before{content:""}.fa-free-code-camp:before{content:""}.fa-telegram:before{content:""}.fa-thermometer-4:before,.fa-thermometer-full:before,.fa-thermometer:before{content:""}.fa-thermometer-3:before,.fa-thermometer-three-quarters:before{content:""}.fa-thermometer-2:before,.fa-thermometer-half:before{content:""}.fa-thermometer-1:before,.fa-thermometer-quarter:before{content:""}.fa-thermometer-0:before,.fa-thermometer-empty:before{content:""}.fa-shower:before{content:""}.fa-bath:before,.fa-bathtub:before,.fa-s15:before{content:""}.fa-podcast:before{content:""}.fa-window-maximize:before{content:""}.fa-window-minimize:before{content:""}.fa-window-restore:before{content:""}.fa-times-rectangle:before,.fa-window-close:before{content:""}.fa-times-rectangle-o:before,.fa-window-close-o:before{content:""}.fa-bandcamp:before{content:""}.fa-grav:before{content:""}.fa-etsy:before{content:""}.fa-imdb:before{content:""}.fa-ravelry:before{content:""}.fa-eercast:before{content:""}.fa-microchip:before{content:""}.fa-snowflake-o:before{content:""}.fa-superpowers:before{content:""}.fa-wpexplorer:before{content:""}.fa-meetup:before{content:""}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0,0,0,0);border:0}.sr-only-focusable:active,.sr-only-focusable:focus{position:static;width:auto;height:auto;margin:0;overflow:visible;clip:auto}.fa,.icon,.rst-content .admonition-title,.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content code.download span:first-child,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.wy-dropdown .caret,.wy-inline-validate.wy-inline-validate-danger .wy-input-context,.wy-inline-validate.wy-inline-validate-info .wy-input-context,.wy-inline-validate.wy-inline-validate-success .wy-input-context,.wy-inline-validate.wy-inline-validate-warning .wy-input-context,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li button.toctree-expand{font-family:inherit}.fa:before,.icon:before,.rst-content .admonition-title:before,.rst-content .code-block-caption .headerlink:before,.rst-content .eqno .headerlink:before,.rst-content code.download span:first-child:before,.rst-content dl dt .headerlink:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content p .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before,.wy-menu-vertical li button.toctree-expand:before{font-family:FontAwesome;display:inline-block;font-style:normal;font-weight:400;line-height:1;text-decoration:inherit}.rst-content .code-block-caption a .headerlink,.rst-content .eqno a .headerlink,.rst-content a .admonition-title,.rst-content code.download a span:first-child,.rst-content dl dt a .headerlink,.rst-content h1 a .headerlink,.rst-content h2 a .headerlink,.rst-content h3 a .headerlink,.rst-content h4 a .headerlink,.rst-content h5 a .headerlink,.rst-content h6 a .headerlink,.rst-content p.caption a .headerlink,.rst-content p a .headerlink,.rst-content table>caption a .headerlink,.rst-content tt.download a span:first-child,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li a button.toctree-expand,a .fa,a .icon,a .rst-content .admonition-title,a .rst-content .code-block-caption .headerlink,a .rst-content .eqno .headerlink,a .rst-content code.download span:first-child,a .rst-content dl dt .headerlink,a .rst-content h1 .headerlink,a .rst-content h2 .headerlink,a .rst-content h3 .headerlink,a .rst-content h4 .headerlink,a .rst-content h5 .headerlink,a .rst-content h6 .headerlink,a .rst-content p.caption .headerlink,a .rst-content p .headerlink,a .rst-content table>caption .headerlink,a .rst-content tt.download span:first-child,a .wy-menu-vertical li button.toctree-expand{display:inline-block;text-decoration:inherit}.btn .fa,.btn .icon,.btn .rst-content .admonition-title,.btn .rst-content .code-block-caption .headerlink,.btn .rst-content .eqno .headerlink,.btn .rst-content code.download span:first-child,.btn .rst-content dl dt .headerlink,.btn .rst-content h1 .headerlink,.btn .rst-content h2 .headerlink,.btn .rst-content h3 .headerlink,.btn .rst-content h4 .headerlink,.btn .rst-content h5 .headerlink,.btn .rst-content h6 .headerlink,.btn .rst-content p .headerlink,.btn .rst-content table>caption .headerlink,.btn .rst-content tt.download span:first-child,.btn .wy-menu-vertical li.current>a button.toctree-expand,.btn .wy-menu-vertical li.on a button.toctree-expand,.btn .wy-menu-vertical li button.toctree-expand,.nav .fa,.nav .icon,.nav .rst-content .admonition-title,.nav .rst-content .code-block-caption .headerlink,.nav .rst-content .eqno .headerlink,.nav .rst-content code.download span:first-child,.nav .rst-content dl dt .headerlink,.nav .rst-content h1 .headerlink,.nav .rst-content h2 .headerlink,.nav .rst-content h3 .headerlink,.nav .rst-content h4 .headerlink,.nav .rst-content h5 .headerlink,.nav .rst-content h6 .headerlink,.nav .rst-content p .headerlink,.nav .rst-content table>caption .headerlink,.nav .rst-content tt.download span:first-child,.nav .wy-menu-vertical li.current>a button.toctree-expand,.nav .wy-menu-vertical li.on a button.toctree-expand,.nav .wy-menu-vertical li button.toctree-expand,.rst-content .btn .admonition-title,.rst-content .code-block-caption .btn .headerlink,.rst-content .code-block-caption .nav .headerlink,.rst-content .eqno .btn .headerlink,.rst-content .eqno .nav .headerlink,.rst-content .nav .admonition-title,.rst-content code.download .btn span:first-child,.rst-content code.download .nav span:first-child,.rst-content dl dt .btn .headerlink,.rst-content dl dt .nav .headerlink,.rst-content h1 .btn .headerlink,.rst-content h1 .nav .headerlink,.rst-content h2 .btn .headerlink,.rst-content h2 .nav .headerlink,.rst-content h3 .btn .headerlink,.rst-content h3 .nav .headerlink,.rst-content h4 .btn .headerlink,.rst-content h4 .nav .headerlink,.rst-content h5 .btn .headerlink,.rst-content h5 .nav .headerlink,.rst-content h6 .btn .headerlink,.rst-content h6 .nav .headerlink,.rst-content p .btn .headerlink,.rst-content p .nav .headerlink,.rst-content table>caption .btn .headerlink,.rst-content table>caption .nav .headerlink,.rst-content tt.download .btn span:first-child,.rst-content tt.download .nav span:first-child,.wy-menu-vertical li .btn button.toctree-expand,.wy-menu-vertical li.current>a .btn button.toctree-expand,.wy-menu-vertical li.current>a .nav button.toctree-expand,.wy-menu-vertical li .nav button.toctree-expand,.wy-menu-vertical li.on a .btn button.toctree-expand,.wy-menu-vertical li.on a .nav button.toctree-expand{display:inline}.btn .fa-large.icon,.btn .fa.fa-large,.btn .rst-content .code-block-caption .fa-large.headerlink,.btn .rst-content .eqno .fa-large.headerlink,.btn .rst-content .fa-large.admonition-title,.btn .rst-content code.download span.fa-large:first-child,.btn .rst-content dl dt .fa-large.headerlink,.btn .rst-content h1 .fa-large.headerlink,.btn .rst-content h2 .fa-large.headerlink,.btn .rst-content h3 .fa-large.headerlink,.btn .rst-content h4 .fa-large.headerlink,.btn .rst-content h5 .fa-large.headerlink,.btn .rst-content h6 .fa-large.headerlink,.btn .rst-content p .fa-large.headerlink,.btn .rst-content table>caption .fa-large.headerlink,.btn .rst-content tt.download span.fa-large:first-child,.btn .wy-menu-vertical li button.fa-large.toctree-expand,.nav .fa-large.icon,.nav .fa.fa-large,.nav .rst-content .code-block-caption .fa-large.headerlink,.nav .rst-content .eqno .fa-large.headerlink,.nav .rst-content .fa-large.admonition-title,.nav .rst-content code.download span.fa-large:first-child,.nav .rst-content dl dt .fa-large.headerlink,.nav .rst-content h1 .fa-large.headerlink,.nav .rst-content h2 .fa-large.headerlink,.nav .rst-content h3 .fa-large.headerlink,.nav .rst-content h4 .fa-large.headerlink,.nav .rst-content h5 .fa-large.headerlink,.nav .rst-content h6 .fa-large.headerlink,.nav .rst-content p .fa-large.headerlink,.nav .rst-content table>caption .fa-large.headerlink,.nav .rst-content tt.download span.fa-large:first-child,.nav .wy-menu-vertical li button.fa-large.toctree-expand,.rst-content .btn .fa-large.admonition-title,.rst-content .code-block-caption .btn .fa-large.headerlink,.rst-content .code-block-caption .nav .fa-large.headerlink,.rst-content .eqno .btn .fa-large.headerlink,.rst-content .eqno .nav .fa-large.headerlink,.rst-content .nav .fa-large.admonition-title,.rst-content code.download .btn span.fa-large:first-child,.rst-content code.download .nav span.fa-large:first-child,.rst-content dl dt .btn .fa-large.headerlink,.rst-content dl dt .nav .fa-large.headerlink,.rst-content h1 .btn .fa-large.headerlink,.rst-content h1 .nav .fa-large.headerlink,.rst-content h2 .btn .fa-large.headerlink,.rst-content h2 .nav .fa-large.headerlink,.rst-content h3 .btn .fa-large.headerlink,.rst-content h3 .nav .fa-large.headerlink,.rst-content h4 .btn .fa-large.headerlink,.rst-content h4 .nav .fa-large.headerlink,.rst-content h5 .btn .fa-large.headerlink,.rst-content h5 .nav .fa-large.headerlink,.rst-content h6 .btn .fa-large.headerlink,.rst-content h6 .nav .fa-large.headerlink,.rst-content p .btn .fa-large.headerlink,.rst-content p .nav .fa-large.headerlink,.rst-content table>caption .btn .fa-large.headerlink,.rst-content table>caption .nav .fa-large.headerlink,.rst-content tt.download .btn span.fa-large:first-child,.rst-content tt.download .nav span.fa-large:first-child,.wy-menu-vertical li .btn button.fa-large.toctree-expand,.wy-menu-vertical li .nav button.fa-large.toctree-expand{line-height:.9em}.btn .fa-spin.icon,.btn .fa.fa-spin,.btn .rst-content .code-block-caption .fa-spin.headerlink,.btn .rst-content .eqno .fa-spin.headerlink,.btn .rst-content .fa-spin.admonition-title,.btn .rst-content code.download span.fa-spin:first-child,.btn .rst-content dl dt .fa-spin.headerlink,.btn .rst-content h1 .fa-spin.headerlink,.btn .rst-content h2 .fa-spin.headerlink,.btn .rst-content h3 .fa-spin.headerlink,.btn .rst-content h4 .fa-spin.headerlink,.btn .rst-content h5 .fa-spin.headerlink,.btn .rst-content h6 .fa-spin.headerlink,.btn .rst-content p .fa-spin.headerlink,.btn .rst-content table>caption .fa-spin.headerlink,.btn .rst-content tt.download span.fa-spin:first-child,.btn .wy-menu-vertical li button.fa-spin.toctree-expand,.nav .fa-spin.icon,.nav .fa.fa-spin,.nav .rst-content .code-block-caption .fa-spin.headerlink,.nav .rst-content .eqno .fa-spin.headerlink,.nav .rst-content .fa-spin.admonition-title,.nav .rst-content code.download span.fa-spin:first-child,.nav .rst-content dl dt .fa-spin.headerlink,.nav .rst-content h1 .fa-spin.headerlink,.nav .rst-content h2 .fa-spin.headerlink,.nav .rst-content h3 .fa-spin.headerlink,.nav .rst-content h4 .fa-spin.headerlink,.nav .rst-content h5 .fa-spin.headerlink,.nav .rst-content h6 .fa-spin.headerlink,.nav .rst-content p .fa-spin.headerlink,.nav .rst-content table>caption .fa-spin.headerlink,.nav .rst-content tt.download span.fa-spin:first-child,.nav .wy-menu-vertical li button.fa-spin.toctree-expand,.rst-content .btn .fa-spin.admonition-title,.rst-content .code-block-caption .btn .fa-spin.headerlink,.rst-content .code-block-caption .nav .fa-spin.headerlink,.rst-content .eqno .btn .fa-spin.headerlink,.rst-content .eqno .nav .fa-spin.headerlink,.rst-content .nav .fa-spin.admonition-title,.rst-content code.download .btn span.fa-spin:first-child,.rst-content code.download .nav span.fa-spin:first-child,.rst-content dl dt .btn .fa-spin.headerlink,.rst-content dl dt .nav .fa-spin.headerlink,.rst-content h1 .btn .fa-spin.headerlink,.rst-content h1 .nav .fa-spin.headerlink,.rst-content h2 .btn .fa-spin.headerlink,.rst-content h2 .nav .fa-spin.headerlink,.rst-content h3 .btn .fa-spin.headerlink,.rst-content h3 .nav .fa-spin.headerlink,.rst-content h4 .btn .fa-spin.headerlink,.rst-content h4 .nav .fa-spin.headerlink,.rst-content h5 .btn .fa-spin.headerlink,.rst-content h5 .nav .fa-spin.headerlink,.rst-content h6 .btn .fa-spin.headerlink,.rst-content h6 .nav .fa-spin.headerlink,.rst-content p .btn .fa-spin.headerlink,.rst-content p .nav .fa-spin.headerlink,.rst-content table>caption .btn .fa-spin.headerlink,.rst-content table>caption .nav .fa-spin.headerlink,.rst-content tt.download .btn span.fa-spin:first-child,.rst-content tt.download .nav span.fa-spin:first-child,.wy-menu-vertical li .btn button.fa-spin.toctree-expand,.wy-menu-vertical li .nav button.fa-spin.toctree-expand{display:inline-block}.btn.fa:before,.btn.icon:before,.rst-content .btn.admonition-title:before,.rst-content .code-block-caption .btn.headerlink:before,.rst-content .eqno .btn.headerlink:before,.rst-content code.download span.btn:first-child:before,.rst-content dl dt .btn.headerlink:before,.rst-content h1 .btn.headerlink:before,.rst-content h2 .btn.headerlink:before,.rst-content h3 .btn.headerlink:before,.rst-content h4 .btn.headerlink:before,.rst-content h5 .btn.headerlink:before,.rst-content h6 .btn.headerlink:before,.rst-content p .btn.headerlink:before,.rst-content table>caption .btn.headerlink:before,.rst-content tt.download span.btn:first-child:before,.wy-menu-vertical li button.btn.toctree-expand:before{opacity:.5;-webkit-transition:opacity .05s ease-in;-moz-transition:opacity .05s ease-in;transition:opacity .05s ease-in}.btn.fa:hover:before,.btn.icon:hover:before,.rst-content .btn.admonition-title:hover:before,.rst-content .code-block-caption .btn.headerlink:hover:before,.rst-content .eqno .btn.headerlink:hover:before,.rst-content code.download span.btn:first-child:hover:before,.rst-content dl dt .btn.headerlink:hover:before,.rst-content h1 .btn.headerlink:hover:before,.rst-content h2 .btn.headerlink:hover:before,.rst-content h3 .btn.headerlink:hover:before,.rst-content h4 .btn.headerlink:hover:before,.rst-content h5 .btn.headerlink:hover:before,.rst-content h6 .btn.headerlink:hover:before,.rst-content p .btn.headerlink:hover:before,.rst-content table>caption .btn.headerlink:hover:before,.rst-content tt.download span.btn:first-child:hover:before,.wy-menu-vertical li button.btn.toctree-expand:hover:before{opacity:1}.btn-mini .fa:before,.btn-mini .icon:before,.btn-mini .rst-content .admonition-title:before,.btn-mini .rst-content .code-block-caption .headerlink:before,.btn-mini .rst-content .eqno .headerlink:before,.btn-mini .rst-content code.download span:first-child:before,.btn-mini .rst-content dl dt .headerlink:before,.btn-mini .rst-content h1 .headerlink:before,.btn-mini .rst-content h2 .headerlink:before,.btn-mini .rst-content h3 .headerlink:before,.btn-mini .rst-content h4 .headerlink:before,.btn-mini .rst-content h5 .headerlink:before,.btn-mini .rst-content h6 .headerlink:before,.btn-mini .rst-content p .headerlink:before,.btn-mini .rst-content table>caption .headerlink:before,.btn-mini .rst-content tt.download span:first-child:before,.btn-mini .wy-menu-vertical li button.toctree-expand:before,.rst-content .btn-mini .admonition-title:before,.rst-content .code-block-caption .btn-mini .headerlink:before,.rst-content .eqno .btn-mini .headerlink:before,.rst-content code.download .btn-mini span:first-child:before,.rst-content dl dt .btn-mini .headerlink:before,.rst-content h1 .btn-mini .headerlink:before,.rst-content h2 .btn-mini .headerlink:before,.rst-content h3 .btn-mini .headerlink:before,.rst-content h4 .btn-mini .headerlink:before,.rst-content h5 .btn-mini .headerlink:before,.rst-content h6 .btn-mini .headerlink:before,.rst-content p .btn-mini .headerlink:before,.rst-content table>caption .btn-mini .headerlink:before,.rst-content tt.download .btn-mini span:first-child:before,.wy-menu-vertical li .btn-mini button.toctree-expand:before{font-size:14px;vertical-align:-15%}.rst-content .admonition,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning,.wy-alert{padding:12px;line-height:24px;margin-bottom:24px;background:#e7f2fa}.rst-content .admonition-title,.wy-alert-title{font-weight:700;display:block;color:#fff;background:#6ab0de;padding:6px 12px;margin:-12px -12px 12px}.rst-content .danger,.rst-content .error,.rst-content .wy-alert-danger.admonition,.rst-content .wy-alert-danger.admonition-todo,.rst-content .wy-alert-danger.attention,.rst-content .wy-alert-danger.caution,.rst-content .wy-alert-danger.hint,.rst-content .wy-alert-danger.important,.rst-content .wy-alert-danger.note,.rst-content .wy-alert-danger.seealso,.rst-content .wy-alert-danger.tip,.rst-content .wy-alert-danger.warning,.wy-alert.wy-alert-danger{background:#fdf3f2}.rst-content .danger .admonition-title,.rst-content .danger .wy-alert-title,.rst-content .error .admonition-title,.rst-content .error .wy-alert-title,.rst-content .wy-alert-danger.admonition-todo .admonition-title,.rst-content .wy-alert-danger.admonition-todo .wy-alert-title,.rst-content .wy-alert-danger.admonition .admonition-title,.rst-content .wy-alert-danger.admonition .wy-alert-title,.rst-content .wy-alert-danger.attention .admonition-title,.rst-content .wy-alert-danger.attention .wy-alert-title,.rst-content .wy-alert-danger.caution .admonition-title,.rst-content .wy-alert-danger.caution .wy-alert-title,.rst-content .wy-alert-danger.hint .admonition-title,.rst-content .wy-alert-danger.hint .wy-alert-title,.rst-content .wy-alert-danger.important .admonition-title,.rst-content .wy-alert-danger.important .wy-alert-title,.rst-content .wy-alert-danger.note .admonition-title,.rst-content .wy-alert-danger.note .wy-alert-title,.rst-content .wy-alert-danger.seealso .admonition-title,.rst-content .wy-alert-danger.seealso .wy-alert-title,.rst-content .wy-alert-danger.tip .admonition-title,.rst-content .wy-alert-danger.tip .wy-alert-title,.rst-content .wy-alert-danger.warning .admonition-title,.rst-content .wy-alert-danger.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-danger .admonition-title,.wy-alert.wy-alert-danger .rst-content .admonition-title,.wy-alert.wy-alert-danger .wy-alert-title{background:#f29f97}.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .warning,.rst-content .wy-alert-warning.admonition,.rst-content .wy-alert-warning.danger,.rst-content .wy-alert-warning.error,.rst-content .wy-alert-warning.hint,.rst-content .wy-alert-warning.important,.rst-content .wy-alert-warning.note,.rst-content .wy-alert-warning.seealso,.rst-content .wy-alert-warning.tip,.wy-alert.wy-alert-warning{background:#ffedcc}.rst-content .admonition-todo .admonition-title,.rst-content .admonition-todo .wy-alert-title,.rst-content .attention .admonition-title,.rst-content .attention .wy-alert-title,.rst-content .caution .admonition-title,.rst-content .caution .wy-alert-title,.rst-content .warning .admonition-title,.rst-content .warning .wy-alert-title,.rst-content .wy-alert-warning.admonition .admonition-title,.rst-content .wy-alert-warning.admonition .wy-alert-title,.rst-content .wy-alert-warning.danger .admonition-title,.rst-content .wy-alert-warning.danger .wy-alert-title,.rst-content .wy-alert-warning.error .admonition-title,.rst-content .wy-alert-warning.error .wy-alert-title,.rst-content .wy-alert-warning.hint .admonition-title,.rst-content .wy-alert-warning.hint .wy-alert-title,.rst-content .wy-alert-warning.important .admonition-title,.rst-content .wy-alert-warning.important .wy-alert-title,.rst-content .wy-alert-warning.note .admonition-title,.rst-content .wy-alert-warning.note .wy-alert-title,.rst-content .wy-alert-warning.seealso .admonition-title,.rst-content .wy-alert-warning.seealso .wy-alert-title,.rst-content .wy-alert-warning.tip .admonition-title,.rst-content .wy-alert-warning.tip .wy-alert-title,.rst-content .wy-alert.wy-alert-warning .admonition-title,.wy-alert.wy-alert-warning .rst-content .admonition-title,.wy-alert.wy-alert-warning .wy-alert-title{background:#f0b37e}.rst-content .note,.rst-content .seealso,.rst-content .wy-alert-info.admonition,.rst-content .wy-alert-info.admonition-todo,.rst-content .wy-alert-info.attention,.rst-content .wy-alert-info.caution,.rst-content .wy-alert-info.danger,.rst-content .wy-alert-info.error,.rst-content .wy-alert-info.hint,.rst-content .wy-alert-info.important,.rst-content .wy-alert-info.tip,.rst-content .wy-alert-info.warning,.wy-alert.wy-alert-info{background:#e7f2fa}.rst-content .note .admonition-title,.rst-content .note .wy-alert-title,.rst-content .seealso .admonition-title,.rst-content .seealso .wy-alert-title,.rst-content .wy-alert-info.admonition-todo .admonition-title,.rst-content .wy-alert-info.admonition-todo .wy-alert-title,.rst-content .wy-alert-info.admonition .admonition-title,.rst-content .wy-alert-info.admonition .wy-alert-title,.rst-content .wy-alert-info.attention .admonition-title,.rst-content .wy-alert-info.attention .wy-alert-title,.rst-content .wy-alert-info.caution .admonition-title,.rst-content .wy-alert-info.caution .wy-alert-title,.rst-content .wy-alert-info.danger .admonition-title,.rst-content .wy-alert-info.danger .wy-alert-title,.rst-content .wy-alert-info.error .admonition-title,.rst-content .wy-alert-info.error .wy-alert-title,.rst-content .wy-alert-info.hint .admonition-title,.rst-content .wy-alert-info.hint .wy-alert-title,.rst-content .wy-alert-info.important .admonition-title,.rst-content .wy-alert-info.important .wy-alert-title,.rst-content .wy-alert-info.tip .admonition-title,.rst-content .wy-alert-info.tip .wy-alert-title,.rst-content .wy-alert-info.warning .admonition-title,.rst-content .wy-alert-info.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-info .admonition-title,.wy-alert.wy-alert-info .rst-content .admonition-title,.wy-alert.wy-alert-info .wy-alert-title{background:#6ab0de}.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .wy-alert-success.admonition,.rst-content .wy-alert-success.admonition-todo,.rst-content .wy-alert-success.attention,.rst-content .wy-alert-success.caution,.rst-content .wy-alert-success.danger,.rst-content .wy-alert-success.error,.rst-content .wy-alert-success.note,.rst-content .wy-alert-success.seealso,.rst-content .wy-alert-success.warning,.wy-alert.wy-alert-success{background:#dbfaf4}.rst-content .hint .admonition-title,.rst-content .hint .wy-alert-title,.rst-content .important .admonition-title,.rst-content .important .wy-alert-title,.rst-content .tip .admonition-title,.rst-content .tip .wy-alert-title,.rst-content .wy-alert-success.admonition-todo .admonition-title,.rst-content .wy-alert-success.admonition-todo .wy-alert-title,.rst-content .wy-alert-success.admonition .admonition-title,.rst-content .wy-alert-success.admonition .wy-alert-title,.rst-content .wy-alert-success.attention .admonition-title,.rst-content .wy-alert-success.attention .wy-alert-title,.rst-content .wy-alert-success.caution .admonition-title,.rst-content .wy-alert-success.caution .wy-alert-title,.rst-content .wy-alert-success.danger .admonition-title,.rst-content .wy-alert-success.danger .wy-alert-title,.rst-content .wy-alert-success.error .admonition-title,.rst-content .wy-alert-success.error .wy-alert-title,.rst-content .wy-alert-success.note .admonition-title,.rst-content .wy-alert-success.note .wy-alert-title,.rst-content .wy-alert-success.seealso .admonition-title,.rst-content .wy-alert-success.seealso .wy-alert-title,.rst-content .wy-alert-success.warning .admonition-title,.rst-content .wy-alert-success.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-success .admonition-title,.wy-alert.wy-alert-success .rst-content .admonition-title,.wy-alert.wy-alert-success .wy-alert-title{background:#1abc9c}.rst-content .wy-alert-neutral.admonition,.rst-content .wy-alert-neutral.admonition-todo,.rst-content .wy-alert-neutral.attention,.rst-content .wy-alert-neutral.caution,.rst-content .wy-alert-neutral.danger,.rst-content .wy-alert-neutral.error,.rst-content .wy-alert-neutral.hint,.rst-content .wy-alert-neutral.important,.rst-content .wy-alert-neutral.note,.rst-content .wy-alert-neutral.seealso,.rst-content .wy-alert-neutral.tip,.rst-content .wy-alert-neutral.warning,.wy-alert.wy-alert-neutral{background:#f3f6f6}.rst-content .wy-alert-neutral.admonition-todo .admonition-title,.rst-content .wy-alert-neutral.admonition-todo .wy-alert-title,.rst-content .wy-alert-neutral.admonition .admonition-title,.rst-content .wy-alert-neutral.admonition .wy-alert-title,.rst-content .wy-alert-neutral.attention .admonition-title,.rst-content .wy-alert-neutral.attention .wy-alert-title,.rst-content .wy-alert-neutral.caution .admonition-title,.rst-content .wy-alert-neutral.caution .wy-alert-title,.rst-content .wy-alert-neutral.danger .admonition-title,.rst-content .wy-alert-neutral.danger .wy-alert-title,.rst-content .wy-alert-neutral.error .admonition-title,.rst-content .wy-alert-neutral.error .wy-alert-title,.rst-content .wy-alert-neutral.hint .admonition-title,.rst-content .wy-alert-neutral.hint .wy-alert-title,.rst-content .wy-alert-neutral.important .admonition-title,.rst-content .wy-alert-neutral.important .wy-alert-title,.rst-content .wy-alert-neutral.note .admonition-title,.rst-content .wy-alert-neutral.note .wy-alert-title,.rst-content .wy-alert-neutral.seealso .admonition-title,.rst-content .wy-alert-neutral.seealso .wy-alert-title,.rst-content .wy-alert-neutral.tip .admonition-title,.rst-content .wy-alert-neutral.tip .wy-alert-title,.rst-content .wy-alert-neutral.warning .admonition-title,.rst-content .wy-alert-neutral.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-neutral .admonition-title,.wy-alert.wy-alert-neutral .rst-content .admonition-title,.wy-alert.wy-alert-neutral .wy-alert-title{color:#404040;background:#e1e4e5}.rst-content .wy-alert-neutral.admonition-todo a,.rst-content .wy-alert-neutral.admonition a,.rst-content .wy-alert-neutral.attention a,.rst-content .wy-alert-neutral.caution a,.rst-content .wy-alert-neutral.danger a,.rst-content .wy-alert-neutral.error a,.rst-content .wy-alert-neutral.hint a,.rst-content .wy-alert-neutral.important a,.rst-content .wy-alert-neutral.note a,.rst-content .wy-alert-neutral.seealso a,.rst-content .wy-alert-neutral.tip a,.rst-content .wy-alert-neutral.warning a,.wy-alert.wy-alert-neutral a{color:#2980b9}.rst-content .admonition-todo p:last-child,.rst-content .admonition p:last-child,.rst-content .attention p:last-child,.rst-content .caution p:last-child,.rst-content .danger p:last-child,.rst-content .error p:last-child,.rst-content .hint p:last-child,.rst-content .important p:last-child,.rst-content .note p:last-child,.rst-content .seealso p:last-child,.rst-content .tip p:last-child,.rst-content .warning p:last-child,.wy-alert p:last-child{margin-bottom:0}.wy-tray-container{position:fixed;bottom:0;left:0;z-index:600}.wy-tray-container li{display:block;width:300px;background:transparent;color:#fff;text-align:center;box-shadow:0 5px 5px 0 rgba(0,0,0,.1);padding:0 24px;min-width:20%;opacity:0;height:0;line-height:56px;overflow:hidden;-webkit-transition:all .3s ease-in;-moz-transition:all .3s ease-in;transition:all .3s ease-in}.wy-tray-container li.wy-tray-item-success{background:#27ae60}.wy-tray-container li.wy-tray-item-info{background:#2980b9}.wy-tray-container li.wy-tray-item-warning{background:#e67e22}.wy-tray-container li.wy-tray-item-danger{background:#e74c3c}.wy-tray-container li.on{opacity:1;height:56px}@media screen and (max-width:768px){.wy-tray-container{bottom:auto;top:0;width:100%}.wy-tray-container li{width:100%}}button{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle;cursor:pointer;line-height:normal;-webkit-appearance:button;*overflow:visible}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}button[disabled]{cursor:default}.btn{display:inline-block;border-radius:2px;line-height:normal;white-space:nowrap;text-align:center;cursor:pointer;font-size:100%;padding:6px 12px 8px;color:#fff;border:1px solid rgba(0,0,0,.1);background-color:#27ae60;text-decoration:none;font-weight:400;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;box-shadow:inset 0 1px 2px -1px hsla(0,0%,100%,.5),inset 0 -2px 0 0 rgba(0,0,0,.1);outline-none:false;vertical-align:middle;*display:inline;zoom:1;-webkit-user-drag:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;-webkit-transition:all .1s linear;-moz-transition:all .1s linear;transition:all .1s linear}.btn-hover{background:#2e8ece;color:#fff}.btn:hover{background:#2cc36b;color:#fff}.btn:focus{background:#2cc36b;outline:0}.btn:active{box-shadow:inset 0 -1px 0 0 rgba(0,0,0,.05),inset 0 2px 0 0 rgba(0,0,0,.1);padding:8px 12px 6px}.btn:visited{color:#fff}.btn-disabled,.btn-disabled:active,.btn-disabled:focus,.btn-disabled:hover,.btn:disabled{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:.4;cursor:not-allowed;box-shadow:none}.btn::-moz-focus-inner{padding:0;border:0}.btn-small{font-size:80%}.btn-info{background-color:#2980b9!important}.btn-info:hover{background-color:#2e8ece!important}.btn-neutral{background-color:#f3f6f6!important;color:#404040!important}.btn-neutral:hover{background-color:#e5ebeb!important;color:#404040}.btn-neutral:visited{color:#404040!important}.btn-success{background-color:#27ae60!important}.btn-success:hover{background-color:#295!important}.btn-danger{background-color:#e74c3c!important}.btn-danger:hover{background-color:#ea6153!important}.btn-warning{background-color:#e67e22!important}.btn-warning:hover{background-color:#e98b39!important}.btn-invert{background-color:#222}.btn-invert:hover{background-color:#2f2f2f!important}.btn-link{background-color:transparent!important;color:#2980b9;box-shadow:none;border-color:transparent!important}.btn-link:active,.btn-link:hover{background-color:transparent!important;color:#409ad5!important;box-shadow:none}.btn-link:visited{color:#9b59b6}.wy-btn-group .btn,.wy-control .btn{vertical-align:middle}.wy-btn-group{margin-bottom:24px;*zoom:1}.wy-btn-group:after,.wy-btn-group:before{display:table;content:""}.wy-btn-group:after{clear:both}.wy-dropdown{position:relative;display:inline-block}.wy-dropdown-active .wy-dropdown-menu{display:block}.wy-dropdown-menu{position:absolute;left:0;display:none;float:left;top:100%;min-width:100%;background:#fcfcfc;z-index:100;border:1px solid #cfd7dd;box-shadow:0 2px 2px 0 rgba(0,0,0,.1);padding:12px}.wy-dropdown-menu>dd>a{display:block;clear:both;color:#404040;white-space:nowrap;font-size:90%;padding:0 12px;cursor:pointer}.wy-dropdown-menu>dd>a:hover{background:#2980b9;color:#fff}.wy-dropdown-menu>dd.divider{border-top:1px solid #cfd7dd;margin:6px 0}.wy-dropdown-menu>dd.search{padding-bottom:12px}.wy-dropdown-menu>dd.search input[type=search]{width:100%}.wy-dropdown-menu>dd.call-to-action{background:#e3e3e3;text-transform:uppercase;font-weight:500;font-size:80%}.wy-dropdown-menu>dd.call-to-action:hover{background:#e3e3e3}.wy-dropdown-menu>dd.call-to-action .btn{color:#fff}.wy-dropdown.wy-dropdown-up .wy-dropdown-menu{bottom:100%;top:auto;left:auto;right:0}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu{background:#fcfcfc;margin-top:2px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a{padding:6px 12px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a:hover{background:#2980b9;color:#fff}.wy-dropdown.wy-dropdown-left .wy-dropdown-menu{right:0;left:auto;text-align:right}.wy-dropdown-arrow:before{content:" ";border-bottom:5px solid #f5f5f5;border-left:5px solid transparent;border-right:5px solid transparent;position:absolute;display:block;top:-4px;left:50%;margin-left:-3px}.wy-dropdown-arrow.wy-dropdown-arrow-left:before{left:11px}.wy-form-stacked select{display:block}.wy-form-aligned .wy-help-inline,.wy-form-aligned input,.wy-form-aligned label,.wy-form-aligned select,.wy-form-aligned textarea{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-form-aligned .wy-control-group>label{display:inline-block;vertical-align:middle;width:10em;margin:6px 12px 0 0;float:left}.wy-form-aligned .wy-control{float:left}.wy-form-aligned .wy-control label{display:block}.wy-form-aligned .wy-control select{margin-top:6px}fieldset{margin:0}fieldset,legend{border:0;padding:0}legend{width:100%;white-space:normal;margin-bottom:24px;font-size:150%;*margin-left:-7px}label,legend{display:block}label{margin:0 0 .3125em;color:#333;font-size:90%}input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}.wy-control-group{margin-bottom:24px;max-width:1200px;margin-left:auto;margin-right:auto;*zoom:1}.wy-control-group:after,.wy-control-group:before{display:table;content:""}.wy-control-group:after{clear:both}.wy-control-group.wy-control-group-required>label:after{content:" *";color:#e74c3c}.wy-control-group .wy-form-full,.wy-control-group .wy-form-halves,.wy-control-group .wy-form-thirds{padding-bottom:12px}.wy-control-group .wy-form-full input[type=color],.wy-control-group .wy-form-full input[type=date],.wy-control-group .wy-form-full input[type=datetime-local],.wy-control-group .wy-form-full input[type=datetime],.wy-control-group .wy-form-full input[type=email],.wy-control-group .wy-form-full input[type=month],.wy-control-group .wy-form-full input[type=number],.wy-control-group .wy-form-full input[type=password],.wy-control-group .wy-form-full input[type=search],.wy-control-group .wy-form-full input[type=tel],.wy-control-group .wy-form-full input[type=text],.wy-control-group .wy-form-full input[type=time],.wy-control-group .wy-form-full input[type=url],.wy-control-group .wy-form-full input[type=week],.wy-control-group .wy-form-full select,.wy-control-group .wy-form-halves input[type=color],.wy-control-group .wy-form-halves input[type=date],.wy-control-group .wy-form-halves input[type=datetime-local],.wy-control-group .wy-form-halves input[type=datetime],.wy-control-group .wy-form-halves input[type=email],.wy-control-group .wy-form-halves input[type=month],.wy-control-group .wy-form-halves input[type=number],.wy-control-group .wy-form-halves input[type=password],.wy-control-group .wy-form-halves input[type=search],.wy-control-group .wy-form-halves input[type=tel],.wy-control-group .wy-form-halves input[type=text],.wy-control-group .wy-form-halves input[type=time],.wy-control-group .wy-form-halves input[type=url],.wy-control-group .wy-form-halves input[type=week],.wy-control-group .wy-form-halves select,.wy-control-group .wy-form-thirds input[type=color],.wy-control-group .wy-form-thirds input[type=date],.wy-control-group .wy-form-thirds input[type=datetime-local],.wy-control-group .wy-form-thirds input[type=datetime],.wy-control-group .wy-form-thirds input[type=email],.wy-control-group .wy-form-thirds input[type=month],.wy-control-group .wy-form-thirds input[type=number],.wy-control-group .wy-form-thirds input[type=password],.wy-control-group .wy-form-thirds input[type=search],.wy-control-group .wy-form-thirds input[type=tel],.wy-control-group .wy-form-thirds input[type=text],.wy-control-group .wy-form-thirds input[type=time],.wy-control-group .wy-form-thirds input[type=url],.wy-control-group .wy-form-thirds input[type=week],.wy-control-group .wy-form-thirds select{width:100%}.wy-control-group .wy-form-full{float:left;display:block;width:100%;margin-right:0}.wy-control-group .wy-form-full:last-child{margin-right:0}.wy-control-group .wy-form-halves{float:left;display:block;margin-right:2.35765%;width:48.82117%}.wy-control-group .wy-form-halves:last-child,.wy-control-group .wy-form-halves:nth-of-type(2n){margin-right:0}.wy-control-group .wy-form-halves:nth-of-type(odd){clear:left}.wy-control-group .wy-form-thirds{float:left;display:block;margin-right:2.35765%;width:31.76157%}.wy-control-group .wy-form-thirds:last-child,.wy-control-group .wy-form-thirds:nth-of-type(3n){margin-right:0}.wy-control-group .wy-form-thirds:nth-of-type(3n+1){clear:left}.wy-control-group.wy-control-group-no-input .wy-control,.wy-control-no-input{margin:6px 0 0;font-size:90%}.wy-control-no-input{display:inline-block}.wy-control-group.fluid-input input[type=color],.wy-control-group.fluid-input input[type=date],.wy-control-group.fluid-input input[type=datetime-local],.wy-control-group.fluid-input input[type=datetime],.wy-control-group.fluid-input input[type=email],.wy-control-group.fluid-input input[type=month],.wy-control-group.fluid-input input[type=number],.wy-control-group.fluid-input input[type=password],.wy-control-group.fluid-input input[type=search],.wy-control-group.fluid-input input[type=tel],.wy-control-group.fluid-input input[type=text],.wy-control-group.fluid-input input[type=time],.wy-control-group.fluid-input input[type=url],.wy-control-group.fluid-input input[type=week]{width:100%}.wy-form-message-inline{padding-left:.3em;color:#666;font-size:90%}.wy-form-message{display:block;color:#999;font-size:70%;margin-top:.3125em;font-style:italic}.wy-form-message p{font-size:inherit;font-style:italic;margin-bottom:6px}.wy-form-message p:last-child{margin-bottom:0}input{line-height:normal}input[type=button],input[type=reset],input[type=submit]{-webkit-appearance:button;cursor:pointer;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;*overflow:visible}input[type=color],input[type=date],input[type=datetime-local],input[type=datetime],input[type=email],input[type=month],input[type=number],input[type=password],input[type=search],input[type=tel],input[type=text],input[type=time],input[type=url],input[type=week]{-webkit-appearance:none;padding:6px;display:inline-block;border:1px solid #ccc;font-size:80%;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;box-shadow:inset 0 1px 3px #ddd;border-radius:0;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}input[type=datetime-local]{padding:.34375em .625em}input[disabled]{cursor:default}input[type=checkbox],input[type=radio]{padding:0;margin-right:.3125em;*height:13px;*width:13px}input[type=checkbox],input[type=radio],input[type=search]{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}input[type=search]::-webkit-search-cancel-button,input[type=search]::-webkit-search-decoration{-webkit-appearance:none}input[type=color]:focus,input[type=date]:focus,input[type=datetime-local]:focus,input[type=datetime]:focus,input[type=email]:focus,input[type=month]:focus,input[type=number]:focus,input[type=password]:focus,input[type=search]:focus,input[type=tel]:focus,input[type=text]:focus,input[type=time]:focus,input[type=url]:focus,input[type=week]:focus{outline:0;outline:thin dotted\9;border-color:#333}input.no-focus:focus{border-color:#ccc!important}input[type=checkbox]:focus,input[type=file]:focus,input[type=radio]:focus{outline:thin dotted #333;outline:1px auto #129fea}input[type=color][disabled],input[type=date][disabled],input[type=datetime-local][disabled],input[type=datetime][disabled],input[type=email][disabled],input[type=month][disabled],input[type=number][disabled],input[type=password][disabled],input[type=search][disabled],input[type=tel][disabled],input[type=text][disabled],input[type=time][disabled],input[type=url][disabled],input[type=week][disabled]{cursor:not-allowed;background-color:#fafafa}input:focus:invalid,select:focus:invalid,textarea:focus:invalid{color:#e74c3c;border:1px solid #e74c3c}input:focus:invalid:focus,select:focus:invalid:focus,textarea:focus:invalid:focus{border-color:#e74c3c}input[type=checkbox]:focus:invalid:focus,input[type=file]:focus:invalid:focus,input[type=radio]:focus:invalid:focus{outline-color:#e74c3c}input.wy-input-large{padding:12px;font-size:100%}textarea{overflow:auto;vertical-align:top;width:100%;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif}select,textarea{padding:.5em .625em;display:inline-block;border:1px solid #ccc;font-size:80%;box-shadow:inset 0 1px 3px #ddd;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}select{border:1px solid #ccc;background-color:#fff}select[multiple]{height:auto}select:focus,textarea:focus{outline:0}input[readonly],select[disabled],select[readonly],textarea[disabled],textarea[readonly]{cursor:not-allowed;background-color:#fafafa}input[type=checkbox][disabled],input[type=radio][disabled]{cursor:not-allowed}.wy-checkbox,.wy-radio{margin:6px 0;color:#404040;display:block}.wy-checkbox input,.wy-radio input{vertical-align:baseline}.wy-form-message-inline{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-input-prefix,.wy-input-suffix{white-space:nowrap;padding:6px}.wy-input-prefix .wy-input-context,.wy-input-suffix .wy-input-context{line-height:27px;padding:0 8px;display:inline-block;font-size:80%;background-color:#f3f6f6;border:1px solid #ccc;color:#999}.wy-input-suffix .wy-input-context{border-left:0}.wy-input-prefix .wy-input-context{border-right:0}.wy-switch{position:relative;display:block;height:24px;margin-top:12px;cursor:pointer}.wy-switch:before{left:0;top:0;width:36px;height:12px;background:#ccc}.wy-switch:after,.wy-switch:before{position:absolute;content:"";display:block;border-radius:4px;-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;transition:all .2s ease-in-out}.wy-switch:after{width:18px;height:18px;background:#999;left:-3px;top:-3px}.wy-switch span{position:absolute;left:48px;display:block;font-size:12px;color:#ccc;line-height:1}.wy-switch.active:before{background:#1e8449}.wy-switch.active:after{left:24px;background:#27ae60}.wy-switch.disabled{cursor:not-allowed;opacity:.8}.wy-control-group.wy-control-group-error .wy-form-message,.wy-control-group.wy-control-group-error>label{color:#e74c3c}.wy-control-group.wy-control-group-error input[type=color],.wy-control-group.wy-control-group-error input[type=date],.wy-control-group.wy-control-group-error input[type=datetime-local],.wy-control-group.wy-control-group-error input[type=datetime],.wy-control-group.wy-control-group-error input[type=email],.wy-control-group.wy-control-group-error input[type=month],.wy-control-group.wy-control-group-error input[type=number],.wy-control-group.wy-control-group-error input[type=password],.wy-control-group.wy-control-group-error input[type=search],.wy-control-group.wy-control-group-error input[type=tel],.wy-control-group.wy-control-group-error input[type=text],.wy-control-group.wy-control-group-error input[type=time],.wy-control-group.wy-control-group-error input[type=url],.wy-control-group.wy-control-group-error input[type=week],.wy-control-group.wy-control-group-error textarea{border:1px solid #e74c3c}.wy-inline-validate{white-space:nowrap}.wy-inline-validate .wy-input-context{padding:.5em .625em;display:inline-block;font-size:80%}.wy-inline-validate.wy-inline-validate-success .wy-input-context{color:#27ae60}.wy-inline-validate.wy-inline-validate-danger .wy-input-context{color:#e74c3c}.wy-inline-validate.wy-inline-validate-warning .wy-input-context{color:#e67e22}.wy-inline-validate.wy-inline-validate-info .wy-input-context{color:#2980b9}.rotate-90{-webkit-transform:rotate(90deg);-moz-transform:rotate(90deg);-ms-transform:rotate(90deg);-o-transform:rotate(90deg);transform:rotate(90deg)}.rotate-180{-webkit-transform:rotate(180deg);-moz-transform:rotate(180deg);-ms-transform:rotate(180deg);-o-transform:rotate(180deg);transform:rotate(180deg)}.rotate-270{-webkit-transform:rotate(270deg);-moz-transform:rotate(270deg);-ms-transform:rotate(270deg);-o-transform:rotate(270deg);transform:rotate(270deg)}.mirror{-webkit-transform:scaleX(-1);-moz-transform:scaleX(-1);-ms-transform:scaleX(-1);-o-transform:scaleX(-1);transform:scaleX(-1)}.mirror.rotate-90{-webkit-transform:scaleX(-1) rotate(90deg);-moz-transform:scaleX(-1) rotate(90deg);-ms-transform:scaleX(-1) rotate(90deg);-o-transform:scaleX(-1) rotate(90deg);transform:scaleX(-1) rotate(90deg)}.mirror.rotate-180{-webkit-transform:scaleX(-1) rotate(180deg);-moz-transform:scaleX(-1) rotate(180deg);-ms-transform:scaleX(-1) rotate(180deg);-o-transform:scaleX(-1) rotate(180deg);transform:scaleX(-1) rotate(180deg)}.mirror.rotate-270{-webkit-transform:scaleX(-1) rotate(270deg);-moz-transform:scaleX(-1) rotate(270deg);-ms-transform:scaleX(-1) rotate(270deg);-o-transform:scaleX(-1) rotate(270deg);transform:scaleX(-1) rotate(270deg)}@media only screen and (max-width:480px){.wy-form button[type=submit]{margin:.7em 0 0}.wy-form input[type=color],.wy-form input[type=date],.wy-form input[type=datetime-local],.wy-form input[type=datetime],.wy-form input[type=email],.wy-form input[type=month],.wy-form input[type=number],.wy-form input[type=password],.wy-form input[type=search],.wy-form input[type=tel],.wy-form input[type=text],.wy-form input[type=time],.wy-form input[type=url],.wy-form input[type=week],.wy-form label{margin-bottom:.3em;display:block}.wy-form input[type=color],.wy-form input[type=date],.wy-form input[type=datetime-local],.wy-form input[type=datetime],.wy-form input[type=email],.wy-form input[type=month],.wy-form input[type=number],.wy-form input[type=password],.wy-form input[type=search],.wy-form input[type=tel],.wy-form input[type=time],.wy-form input[type=url],.wy-form input[type=week]{margin-bottom:0}.wy-form-aligned .wy-control-group label{margin-bottom:.3em;text-align:left;display:block;width:100%}.wy-form-aligned .wy-control{margin:1.5em 0 0}.wy-form-message,.wy-form-message-inline,.wy-form .wy-help-inline{display:block;font-size:80%;padding:6px 0}}@media screen and (max-width:768px){.tablet-hide{display:none}}@media screen and (max-width:480px){.mobile-hide{display:none}}.float-left{float:left}.float-right{float:right}.full-width{width:100%}.rst-content table.docutils,.rst-content table.field-list,.wy-table{border-collapse:collapse;border-spacing:0;empty-cells:show;margin-bottom:24px}.rst-content table.docutils caption,.rst-content table.field-list caption,.wy-table caption{color:#000;font:italic 85%/1 arial,sans-serif;padding:1em 0;text-align:center}.rst-content table.docutils td,.rst-content table.docutils th,.rst-content table.field-list td,.rst-content table.field-list th,.wy-table td,.wy-table th{font-size:90%;margin:0;overflow:visible;padding:8px 16px}.rst-content table.docutils td:first-child,.rst-content table.docutils th:first-child,.rst-content table.field-list td:first-child,.rst-content table.field-list th:first-child,.wy-table td:first-child,.wy-table th:first-child{border-left-width:0}.rst-content table.docutils thead,.rst-content table.field-list thead,.wy-table thead{color:#000;text-align:left;vertical-align:bottom;white-space:nowrap}.rst-content table.docutils thead th,.rst-content table.field-list thead th,.wy-table thead th{font-weight:700;border-bottom:2px solid #e1e4e5}.rst-content table.docutils td,.rst-content table.field-list td,.wy-table td{background-color:transparent;vertical-align:middle}.rst-content table.docutils td p,.rst-content table.field-list td p,.wy-table td p{line-height:18px}.rst-content table.docutils td p:last-child,.rst-content table.field-list td p:last-child,.wy-table td p:last-child{margin-bottom:0}.rst-content table.docutils .wy-table-cell-min,.rst-content table.field-list .wy-table-cell-min,.wy-table .wy-table-cell-min{width:1%;padding-right:0}.rst-content table.docutils .wy-table-cell-min input[type=checkbox],.rst-content table.field-list .wy-table-cell-min input[type=checkbox],.wy-table .wy-table-cell-min input[type=checkbox]{margin:0}.wy-table-secondary{color:grey;font-size:90%}.wy-table-tertiary{color:grey;font-size:80%}.rst-content table.docutils:not(.field-list) tr:nth-child(2n-1) td,.wy-table-backed,.wy-table-odd td,.wy-table-striped tr:nth-child(2n-1) td{background-color:#f3f6f6}.rst-content table.docutils,.wy-table-bordered-all{border:1px solid #e1e4e5}.rst-content table.docutils td,.wy-table-bordered-all td{border-bottom:1px solid #e1e4e5;border-left:1px solid #e1e4e5}.rst-content table.docutils tbody>tr:last-child td,.wy-table-bordered-all tbody>tr:last-child td{border-bottom-width:0}.wy-table-bordered{border:1px solid #e1e4e5}.wy-table-bordered-rows td{border-bottom:1px solid #e1e4e5}.wy-table-bordered-rows tbody>tr:last-child td{border-bottom-width:0}.wy-table-horizontal td,.wy-table-horizontal th{border-width:0 0 1px;border-bottom:1px solid #e1e4e5}.wy-table-horizontal tbody>tr:last-child td{border-bottom-width:0}.wy-table-responsive{margin-bottom:24px;max-width:100%;overflow:auto}.wy-table-responsive table{margin-bottom:0!important}.wy-table-responsive table td,.wy-table-responsive table th{white-space:nowrap}a{color:#2980b9;text-decoration:none;cursor:pointer}a:hover{color:#3091d1}a:visited{color:#9b59b6}html{height:100%}body,html{overflow-x:hidden}body{font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;font-weight:400;color:#404040;min-height:100%;background:#edf0f2}.wy-text-left{text-align:left}.wy-text-center{text-align:center}.wy-text-right{text-align:right}.wy-text-large{font-size:120%}.wy-text-normal{font-size:100%}.wy-text-small,small{font-size:80%}.wy-text-strike{text-decoration:line-through}.wy-text-warning{color:#e67e22!important}a.wy-text-warning:hover{color:#eb9950!important}.wy-text-info{color:#2980b9!important}a.wy-text-info:hover{color:#409ad5!important}.wy-text-success{color:#27ae60!important}a.wy-text-success:hover{color:#36d278!important}.wy-text-danger{color:#e74c3c!important}a.wy-text-danger:hover{color:#ed7669!important}.wy-text-neutral{color:#404040!important}a.wy-text-neutral:hover{color:#595959!important}.rst-content .toctree-wrapper>p.caption,h1,h2,h3,h4,h5,h6,legend{margin-top:0;font-weight:700;font-family:Roboto Slab,ff-tisa-web-pro,Georgia,Arial,sans-serif}p{line-height:24px;font-size:16px;margin:0 0 24px}h1{font-size:175%}.rst-content .toctree-wrapper>p.caption,h2{font-size:150%}h3{font-size:125%}h4{font-size:115%}h5{font-size:110%}h6{font-size:100%}hr{display:block;height:1px;border:0;border-top:1px solid #e1e4e5;margin:24px 0;padding:0}.rst-content code,.rst-content tt,code{white-space:nowrap;max-width:100%;background:#fff;border:1px solid #e1e4e5;font-size:75%;padding:0 5px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;color:#e74c3c;overflow-x:auto}.rst-content tt.code-large,code.code-large{font-size:90%}.rst-content .section ul,.rst-content .toctree-wrapper ul,.rst-content section ul,.wy-plain-list-disc,article ul{list-style:disc;line-height:24px;margin-bottom:24px}.rst-content .section ul li,.rst-content .toctree-wrapper ul li,.rst-content section ul li,.wy-plain-list-disc li,article ul li{list-style:disc;margin-left:24px}.rst-content .section ul li p:last-child,.rst-content .section ul li ul,.rst-content .toctree-wrapper ul li p:last-child,.rst-content .toctree-wrapper ul li ul,.rst-content section ul li p:last-child,.rst-content section ul li ul,.wy-plain-list-disc li p:last-child,.wy-plain-list-disc li ul,article ul li p:last-child,article ul li ul{margin-bottom:0}.rst-content .section ul li li,.rst-content .toctree-wrapper ul li li,.rst-content section ul li li,.wy-plain-list-disc li li,article ul li li{list-style:circle}.rst-content .section ul li li li,.rst-content .toctree-wrapper ul li li li,.rst-content section ul li li li,.wy-plain-list-disc li li li,article ul li li li{list-style:square}.rst-content .section ul li ol li,.rst-content .toctree-wrapper ul li ol li,.rst-content section ul li ol li,.wy-plain-list-disc li ol li,article ul li ol li{list-style:decimal}.rst-content .section ol,.rst-content .section ol.arabic,.rst-content .toctree-wrapper ol,.rst-content .toctree-wrapper ol.arabic,.rst-content section ol,.rst-content section ol.arabic,.wy-plain-list-decimal,article ol{list-style:decimal;line-height:24px;margin-bottom:24px}.rst-content .section ol.arabic li,.rst-content .section ol li,.rst-content .toctree-wrapper ol.arabic li,.rst-content .toctree-wrapper ol li,.rst-content section ol.arabic li,.rst-content section ol li,.wy-plain-list-decimal li,article ol li{list-style:decimal;margin-left:24px}.rst-content .section ol.arabic li ul,.rst-content .section ol li p:last-child,.rst-content .section ol li ul,.rst-content .toctree-wrapper ol.arabic li ul,.rst-content .toctree-wrapper ol li p:last-child,.rst-content .toctree-wrapper ol li ul,.rst-content section ol.arabic li ul,.rst-content section ol li p:last-child,.rst-content section ol li ul,.wy-plain-list-decimal li p:last-child,.wy-plain-list-decimal li ul,article ol li p:last-child,article ol li ul{margin-bottom:0}.rst-content .section ol.arabic li ul li,.rst-content .section ol li ul li,.rst-content .toctree-wrapper ol.arabic li ul li,.rst-content .toctree-wrapper ol li ul li,.rst-content section ol.arabic li ul li,.rst-content section ol li ul li,.wy-plain-list-decimal li ul li,article ol li ul li{list-style:disc}.wy-breadcrumbs{*zoom:1}.wy-breadcrumbs:after,.wy-breadcrumbs:before{display:table;content:""}.wy-breadcrumbs:after{clear:both}.wy-breadcrumbs>li{display:inline-block;padding-top:5px}.wy-breadcrumbs>li.wy-breadcrumbs-aside{float:right}.rst-content .wy-breadcrumbs>li code,.rst-content .wy-breadcrumbs>li tt,.wy-breadcrumbs>li .rst-content tt,.wy-breadcrumbs>li code{all:inherit;color:inherit}.breadcrumb-item:before{content:"/";color:#bbb;font-size:13px;padding:0 6px 0 3px}.wy-breadcrumbs-extra{margin-bottom:0;color:#b3b3b3;font-size:80%;display:inline-block}@media screen and (max-width:480px){.wy-breadcrumbs-extra,.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}@media print{.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}html{font-size:16px}.wy-affix{position:fixed;top:1.618em}.wy-menu a:hover{text-decoration:none}.wy-menu-horiz{*zoom:1}.wy-menu-horiz:after,.wy-menu-horiz:before{display:table;content:""}.wy-menu-horiz:after{clear:both}.wy-menu-horiz li,.wy-menu-horiz ul{display:inline-block}.wy-menu-horiz li:hover{background:hsla(0,0%,100%,.1)}.wy-menu-horiz li.divide-left{border-left:1px solid #404040}.wy-menu-horiz li.divide-right{border-right:1px solid #404040}.wy-menu-horiz a{height:32px;display:inline-block;line-height:32px;padding:0 16px}.wy-menu-vertical{width:300px}.wy-menu-vertical header,.wy-menu-vertical p.caption{color:#55a5d9;height:32px;line-height:32px;padding:0 1.618em;margin:12px 0 0;display:block;font-weight:700;text-transform:uppercase;font-size:85%;white-space:nowrap}.wy-menu-vertical ul{margin-bottom:0}.wy-menu-vertical li.divide-top{border-top:1px solid #404040}.wy-menu-vertical li.divide-bottom{border-bottom:1px solid #404040}.wy-menu-vertical li.current{background:#e3e3e3}.wy-menu-vertical li.current a{color:grey;border-right:1px solid #c9c9c9;padding:.4045em 2.427em}.wy-menu-vertical li.current a:hover{background:#d6d6d6}.rst-content .wy-menu-vertical li tt,.wy-menu-vertical li .rst-content tt,.wy-menu-vertical li code{border:none;background:inherit;color:inherit;padding-left:0;padding-right:0}.wy-menu-vertical li button.toctree-expand{display:block;float:left;margin-left:-1.2em;line-height:18px;color:#4d4d4d;border:none;background:none;padding:0}.wy-menu-vertical li.current>a,.wy-menu-vertical li.on a{color:#404040;font-weight:700;position:relative;background:#fcfcfc;border:none;padding:.4045em 1.618em}.wy-menu-vertical li.current>a:hover,.wy-menu-vertical li.on a:hover{background:#fcfcfc}.wy-menu-vertical li.current>a:hover button.toctree-expand,.wy-menu-vertical li.on a:hover button.toctree-expand{color:grey}.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand{display:block;line-height:18px;color:#333}.wy-menu-vertical li.toctree-l1.current>a{border-bottom:1px solid #c9c9c9;border-top:1px solid #c9c9c9}.wy-menu-vertical .toctree-l1.current .toctree-l2>ul,.wy-menu-vertical .toctree-l2.current .toctree-l3>ul,.wy-menu-vertical .toctree-l3.current .toctree-l4>ul,.wy-menu-vertical .toctree-l4.current .toctree-l5>ul,.wy-menu-vertical .toctree-l5.current .toctree-l6>ul,.wy-menu-vertical .toctree-l6.current .toctree-l7>ul,.wy-menu-vertical .toctree-l7.current .toctree-l8>ul,.wy-menu-vertical .toctree-l8.current .toctree-l9>ul,.wy-menu-vertical .toctree-l9.current .toctree-l10>ul,.wy-menu-vertical .toctree-l10.current .toctree-l11>ul{display:none}.wy-menu-vertical .toctree-l1.current .current.toctree-l2>ul,.wy-menu-vertical .toctree-l2.current .current.toctree-l3>ul,.wy-menu-vertical .toctree-l3.current .current.toctree-l4>ul,.wy-menu-vertical .toctree-l4.current .current.toctree-l5>ul,.wy-menu-vertical .toctree-l5.current .current.toctree-l6>ul,.wy-menu-vertical .toctree-l6.current .current.toctree-l7>ul,.wy-menu-vertical .toctree-l7.current .current.toctree-l8>ul,.wy-menu-vertical .toctree-l8.current .current.toctree-l9>ul,.wy-menu-vertical .toctree-l9.current .current.toctree-l10>ul,.wy-menu-vertical .toctree-l10.current .current.toctree-l11>ul{display:block}.wy-menu-vertical li.toctree-l3,.wy-menu-vertical li.toctree-l4{font-size:.9em}.wy-menu-vertical li.toctree-l2 a,.wy-menu-vertical li.toctree-l3 a,.wy-menu-vertical li.toctree-l4 a,.wy-menu-vertical li.toctree-l5 a,.wy-menu-vertical li.toctree-l6 a,.wy-menu-vertical li.toctree-l7 a,.wy-menu-vertical li.toctree-l8 a,.wy-menu-vertical li.toctree-l9 a,.wy-menu-vertical li.toctree-l10 a{color:#404040}.wy-menu-vertical li.toctree-l2 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l3 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l4 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l5 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l6 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l7 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l8 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l9 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l10 a:hover button.toctree-expand{color:grey}.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a,.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a,.wy-menu-vertical li.toctree-l4.current li.toctree-l5>a,.wy-menu-vertical li.toctree-l5.current li.toctree-l6>a,.wy-menu-vertical li.toctree-l6.current li.toctree-l7>a,.wy-menu-vertical li.toctree-l7.current li.toctree-l8>a,.wy-menu-vertical li.toctree-l8.current li.toctree-l9>a,.wy-menu-vertical li.toctree-l9.current li.toctree-l10>a,.wy-menu-vertical li.toctree-l10.current li.toctree-l11>a{display:block}.wy-menu-vertical li.toctree-l2.current>a{padding:.4045em 2.427em}.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a{padding:.4045em 1.618em .4045em 4.045em}.wy-menu-vertical li.toctree-l3.current>a{padding:.4045em 4.045em}.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a{padding:.4045em 1.618em .4045em 5.663em}.wy-menu-vertical li.toctree-l4.current>a{padding:.4045em 5.663em}.wy-menu-vertical li.toctree-l4.current li.toctree-l5>a{padding:.4045em 1.618em .4045em 7.281em}.wy-menu-vertical li.toctree-l5.current>a{padding:.4045em 7.281em}.wy-menu-vertical li.toctree-l5.current li.toctree-l6>a{padding:.4045em 1.618em .4045em 8.899em}.wy-menu-vertical li.toctree-l6.current>a{padding:.4045em 8.899em}.wy-menu-vertical li.toctree-l6.current li.toctree-l7>a{padding:.4045em 1.618em .4045em 10.517em}.wy-menu-vertical li.toctree-l7.current>a{padding:.4045em 10.517em}.wy-menu-vertical li.toctree-l7.current li.toctree-l8>a{padding:.4045em 1.618em .4045em 12.135em}.wy-menu-vertical li.toctree-l8.current>a{padding:.4045em 12.135em}.wy-menu-vertical li.toctree-l8.current li.toctree-l9>a{padding:.4045em 1.618em .4045em 13.753em}.wy-menu-vertical li.toctree-l9.current>a{padding:.4045em 13.753em}.wy-menu-vertical li.toctree-l9.current li.toctree-l10>a{padding:.4045em 1.618em .4045em 15.371em}.wy-menu-vertical li.toctree-l10.current>a{padding:.4045em 15.371em}.wy-menu-vertical li.toctree-l10.current li.toctree-l11>a{padding:.4045em 1.618em .4045em 16.989em}.wy-menu-vertical li.toctree-l2.current>a,.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a{background:#c9c9c9}.wy-menu-vertical li.toctree-l2 button.toctree-expand{color:#a3a3a3}.wy-menu-vertical li.toctree-l3.current>a,.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a{background:#bdbdbd}.wy-menu-vertical li.toctree-l3 button.toctree-expand{color:#969696}.wy-menu-vertical li.current ul{display:block}.wy-menu-vertical li ul{margin-bottom:0;display:none}.wy-menu-vertical li ul li a{margin-bottom:0;color:#d9d9d9;font-weight:400}.wy-menu-vertical a{line-height:18px;padding:.4045em 1.618em;display:block;position:relative;font-size:90%;color:#d9d9d9}.wy-menu-vertical a:hover{background-color:#4e4a4a;cursor:pointer}.wy-menu-vertical a:hover button.toctree-expand{color:#d9d9d9}.wy-menu-vertical a:active{background-color:#2980b9;cursor:pointer;color:#fff}.wy-menu-vertical a:active button.toctree-expand{color:#fff}.wy-side-nav-search{display:block;width:300px;padding:.809em;margin-bottom:.809em;z-index:200;background-color:#2980b9;text-align:center;color:#fcfcfc}.wy-side-nav-search input[type=text]{width:100%;border-radius:50px;padding:6px 12px;border-color:#2472a4}.wy-side-nav-search img{display:block;margin:auto auto .809em;height:45px;width:45px;background-color:#2980b9;padding:5px;border-radius:100%}.wy-side-nav-search .wy-dropdown>a,.wy-side-nav-search>a{color:#fcfcfc;font-size:100%;font-weight:700;display:inline-block;padding:4px 6px;margin-bottom:.809em;max-width:100%}.wy-side-nav-search .wy-dropdown>a:hover,.wy-side-nav-search>a:hover{background:hsla(0,0%,100%,.1)}.wy-side-nav-search .wy-dropdown>a img.logo,.wy-side-nav-search>a img.logo{display:block;margin:0 auto;height:auto;width:auto;border-radius:0;max-width:100%;background:transparent}.wy-side-nav-search .wy-dropdown>a.icon img.logo,.wy-side-nav-search>a.icon img.logo{margin-top:.85em}.wy-side-nav-search>div.version{margin-top:-.4045em;margin-bottom:.809em;font-weight:400;color:hsla(0,0%,100%,.3)}.wy-nav .wy-menu-vertical header{color:#2980b9}.wy-nav .wy-menu-vertical a{color:#b3b3b3}.wy-nav .wy-menu-vertical a:hover{background-color:#2980b9;color:#fff}[data-menu-wrap]{-webkit-transition:all .2s ease-in;-moz-transition:all .2s ease-in;transition:all .2s ease-in;position:absolute;opacity:1;width:100%;opacity:0}[data-menu-wrap].move-center{left:0;right:auto;opacity:1}[data-menu-wrap].move-left{right:auto;left:-100%;opacity:0}[data-menu-wrap].move-right{right:-100%;left:auto;opacity:0}.wy-body-for-nav{background:#fcfcfc}.wy-grid-for-nav{position:absolute;width:100%;height:100%}.wy-nav-side{position:fixed;top:0;bottom:0;left:0;padding-bottom:2em;width:300px;overflow-x:hidden;overflow-y:hidden;min-height:100%;color:#9b9b9b;background:#343131;z-index:200}.wy-side-scroll{width:320px;position:relative;overflow-x:hidden;overflow-y:scroll;height:100%}.wy-nav-top{display:none;background:#2980b9;color:#fff;padding:.4045em .809em;position:relative;line-height:50px;text-align:center;font-size:100%;*zoom:1}.wy-nav-top:after,.wy-nav-top:before{display:table;content:""}.wy-nav-top:after{clear:both}.wy-nav-top a{color:#fff;font-weight:700}.wy-nav-top img{margin-right:12px;height:45px;width:45px;background-color:#2980b9;padding:5px;border-radius:100%}.wy-nav-top i{font-size:30px;float:left;cursor:pointer;padding-top:inherit}.wy-nav-content-wrap{margin-left:300px;background:#fcfcfc;min-height:100%}.wy-nav-content{padding:1.618em 3.236em;height:100%;max-width:800px;margin:auto}.wy-body-mask{position:fixed;width:100%;height:100%;background:rgba(0,0,0,.2);display:none;z-index:499}.wy-body-mask.on{display:block}footer{color:grey}footer p{margin-bottom:12px}.rst-content footer span.commit tt,footer span.commit .rst-content tt,footer span.commit code{padding:0;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;font-size:1em;background:none;border:none;color:grey}.rst-footer-buttons{*zoom:1}.rst-footer-buttons:after,.rst-footer-buttons:before{width:100%;display:table;content:""}.rst-footer-buttons:after{clear:both}.rst-breadcrumbs-buttons{margin-top:12px;*zoom:1}.rst-breadcrumbs-buttons:after,.rst-breadcrumbs-buttons:before{display:table;content:""}.rst-breadcrumbs-buttons:after{clear:both}#search-results .search li{margin-bottom:24px;border-bottom:1px solid #e1e4e5;padding-bottom:24px}#search-results .search li:first-child{border-top:1px solid #e1e4e5;padding-top:24px}#search-results .search li a{font-size:120%;margin-bottom:12px;display:inline-block}#search-results .context{color:grey;font-size:90%}.genindextable li>ul{margin-left:24px}@media screen and (max-width:768px){.wy-body-for-nav{background:#fcfcfc}.wy-nav-top{display:block}.wy-nav-side{left:-300px}.wy-nav-side.shift{width:85%;left:0}.wy-menu.wy-menu-vertical,.wy-side-nav-search,.wy-side-scroll{width:auto}.wy-nav-content-wrap{margin-left:0}.wy-nav-content-wrap .wy-nav-content{padding:1.618em}.wy-nav-content-wrap.shift{position:fixed;min-width:100%;left:85%;top:0;height:100%;overflow:hidden}}@media screen and (min-width:1100px){.wy-nav-content-wrap{background:rgba(0,0,0,.05)}.wy-nav-content{margin:0;background:#fcfcfc}}@media print{.rst-versions,.wy-nav-side,footer{display:none}.wy-nav-content-wrap{margin-left:0}}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60;*zoom:1}.rst-versions .rst-current-version:after,.rst-versions .rst-current-version:before{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-content .code-block-caption .rst-versions .rst-current-version .headerlink,.rst-content .eqno .rst-versions .rst-current-version .headerlink,.rst-content .rst-versions .rst-current-version .admonition-title,.rst-content code.download .rst-versions .rst-current-version span:first-child,.rst-content dl dt .rst-versions .rst-current-version .headerlink,.rst-content h1 .rst-versions .rst-current-version .headerlink,.rst-content h2 .rst-versions .rst-current-version .headerlink,.rst-content h3 .rst-versions .rst-current-version .headerlink,.rst-content h4 .rst-versions .rst-current-version .headerlink,.rst-content h5 .rst-versions .rst-current-version .headerlink,.rst-content h6 .rst-versions .rst-current-version .headerlink,.rst-content p .rst-versions .rst-current-version .headerlink,.rst-content table>caption .rst-versions .rst-current-version .headerlink,.rst-content tt.download .rst-versions .rst-current-version span:first-child,.rst-versions .rst-current-version .fa,.rst-versions .rst-current-version .icon,.rst-versions .rst-current-version .rst-content .admonition-title,.rst-versions .rst-current-version .rst-content .code-block-caption .headerlink,.rst-versions .rst-current-version .rst-content .eqno .headerlink,.rst-versions .rst-current-version .rst-content code.download span:first-child,.rst-versions .rst-current-version .rst-content dl dt .headerlink,.rst-versions .rst-current-version .rst-content h1 .headerlink,.rst-versions .rst-current-version .rst-content h2 .headerlink,.rst-versions .rst-current-version .rst-content h3 .headerlink,.rst-versions .rst-current-version .rst-content h4 .headerlink,.rst-versions .rst-current-version .rst-content h5 .headerlink,.rst-versions .rst-current-version .rst-content h6 .headerlink,.rst-versions .rst-current-version .rst-content p .headerlink,.rst-versions .rst-current-version .rst-content table>caption .headerlink,.rst-versions .rst-current-version .rst-content tt.download span:first-child,.rst-versions .rst-current-version .wy-menu-vertical li button.toctree-expand,.wy-menu-vertical li .rst-versions .rst-current-version button.toctree-expand{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}.rst-content .toctree-wrapper>p.caption,.rst-content h1,.rst-content h2,.rst-content h3,.rst-content h4,.rst-content h5,.rst-content h6{margin-bottom:24px}.rst-content img{max-width:100%;height:auto}.rst-content div.figure,.rst-content figure{margin-bottom:24px}.rst-content div.figure .caption-text,.rst-content figure .caption-text{font-style:italic}.rst-content div.figure p:last-child.caption,.rst-content figure p:last-child.caption{margin-bottom:0}.rst-content div.figure.align-center,.rst-content figure.align-center{text-align:center}.rst-content .section>a>img,.rst-content .section>img,.rst-content section>a>img,.rst-content section>img{margin-bottom:24px}.rst-content abbr[title]{text-decoration:none}.rst-content.style-external-links a.reference.external:after{font-family:FontAwesome;content:"\f08e";color:#b3b3b3;vertical-align:super;font-size:60%;margin:0 .2em}.rst-content blockquote{margin-left:24px;line-height:24px;margin-bottom:24px}.rst-content pre.literal-block{white-space:pre;margin:0;padding:12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;display:block;overflow:auto}.rst-content div[class^=highlight],.rst-content pre.literal-block{border:1px solid #e1e4e5;overflow-x:auto;margin:1px 0 24px}.rst-content div[class^=highlight] div[class^=highlight],.rst-content pre.literal-block div[class^=highlight]{padding:0;border:none;margin:0}.rst-content div[class^=highlight] td.code{width:100%}.rst-content .linenodiv pre{border-right:1px solid #e6e9ea;margin:0;padding:12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;user-select:none;pointer-events:none}.rst-content div[class^=highlight] pre{white-space:pre;margin:0;padding:12px;display:block;overflow:auto}.rst-content div[class^=highlight] pre .hll{display:block;margin:0 -12px;padding:0 12px}.rst-content .linenodiv pre,.rst-content div[class^=highlight] pre,.rst-content pre.literal-block{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;font-size:12px;line-height:1.4}.rst-content div.highlight .gp,.rst-content div.highlight span.linenos{user-select:none;pointer-events:none}.rst-content div.highlight span.linenos{display:inline-block;padding-left:0;padding-right:12px;margin-right:12px;border-right:1px solid #e6e9ea}.rst-content .code-block-caption{font-style:italic;font-size:85%;line-height:1;padding:1em 0;text-align:center}@media print{.rst-content .codeblock,.rst-content div[class^=highlight],.rst-content div[class^=highlight] pre{white-space:pre-wrap}}.rst-content .admonition,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning{clear:both}.rst-content .admonition-todo .last,.rst-content .admonition-todo>:last-child,.rst-content .admonition .last,.rst-content .admonition>:last-child,.rst-content .attention .last,.rst-content .attention>:last-child,.rst-content .caution .last,.rst-content .caution>:last-child,.rst-content .danger .last,.rst-content .danger>:last-child,.rst-content .error .last,.rst-content .error>:last-child,.rst-content .hint .last,.rst-content .hint>:last-child,.rst-content .important .last,.rst-content .important>:last-child,.rst-content .note .last,.rst-content .note>:last-child,.rst-content .seealso .last,.rst-content .seealso>:last-child,.rst-content .tip .last,.rst-content .tip>:last-child,.rst-content .warning .last,.rst-content .warning>:last-child{margin-bottom:0}.rst-content .admonition-title:before{margin-right:4px}.rst-content .admonition table{border-color:rgba(0,0,0,.1)}.rst-content .admonition table td,.rst-content .admonition table th{background:transparent!important;border-color:rgba(0,0,0,.1)!important}.rst-content .section ol.loweralpha,.rst-content .section ol.loweralpha>li,.rst-content .toctree-wrapper ol.loweralpha,.rst-content .toctree-wrapper ol.loweralpha>li,.rst-content section ol.loweralpha,.rst-content section ol.loweralpha>li{list-style:lower-alpha}.rst-content .section ol.upperalpha,.rst-content .section ol.upperalpha>li,.rst-content .toctree-wrapper ol.upperalpha,.rst-content .toctree-wrapper ol.upperalpha>li,.rst-content section ol.upperalpha,.rst-content section ol.upperalpha>li{list-style:upper-alpha}.rst-content .section ol li>*,.rst-content .section ul li>*,.rst-content .toctree-wrapper ol li>*,.rst-content .toctree-wrapper ul li>*,.rst-content section ol li>*,.rst-content section ul li>*{margin-top:12px;margin-bottom:12px}.rst-content .section ol li>:first-child,.rst-content .section ul li>:first-child,.rst-content .toctree-wrapper ol li>:first-child,.rst-content .toctree-wrapper ul li>:first-child,.rst-content section ol li>:first-child,.rst-content section ul li>:first-child{margin-top:0}.rst-content .section ol li>p,.rst-content .section ol li>p:last-child,.rst-content .section ul li>p,.rst-content .section ul li>p:last-child,.rst-content .toctree-wrapper ol li>p,.rst-content .toctree-wrapper ol li>p:last-child,.rst-content .toctree-wrapper ul li>p,.rst-content .toctree-wrapper ul li>p:last-child,.rst-content section ol li>p,.rst-content section ol li>p:last-child,.rst-content section ul li>p,.rst-content section ul li>p:last-child{margin-bottom:12px}.rst-content .section ol li>p:only-child,.rst-content .section ol li>p:only-child:last-child,.rst-content .section ul li>p:only-child,.rst-content .section ul li>p:only-child:last-child,.rst-content .toctree-wrapper ol li>p:only-child,.rst-content .toctree-wrapper ol li>p:only-child:last-child,.rst-content .toctree-wrapper ul li>p:only-child,.rst-content .toctree-wrapper ul li>p:only-child:last-child,.rst-content section ol li>p:only-child,.rst-content section ol li>p:only-child:last-child,.rst-content section ul li>p:only-child,.rst-content section ul li>p:only-child:last-child{margin-bottom:0}.rst-content .section ol li>ol,.rst-content .section ol li>ul,.rst-content .section ul li>ol,.rst-content .section ul li>ul,.rst-content .toctree-wrapper ol li>ol,.rst-content .toctree-wrapper ol li>ul,.rst-content .toctree-wrapper ul li>ol,.rst-content .toctree-wrapper ul li>ul,.rst-content section ol li>ol,.rst-content section ol li>ul,.rst-content section ul li>ol,.rst-content section ul li>ul{margin-bottom:12px}.rst-content .section ol.simple li>*,.rst-content .section ol.simple li ol,.rst-content .section ol.simple li ul,.rst-content .section ul.simple li>*,.rst-content .section ul.simple li ol,.rst-content .section ul.simple li ul,.rst-content .toctree-wrapper ol.simple li>*,.rst-content .toctree-wrapper ol.simple li ol,.rst-content .toctree-wrapper ol.simple li ul,.rst-content .toctree-wrapper ul.simple li>*,.rst-content .toctree-wrapper ul.simple li ol,.rst-content .toctree-wrapper ul.simple li ul,.rst-content section ol.simple li>*,.rst-content section ol.simple li ol,.rst-content section ol.simple li ul,.rst-content section ul.simple li>*,.rst-content section ul.simple li ol,.rst-content section ul.simple li ul{margin-top:0;margin-bottom:0}.rst-content .line-block{margin-left:0;margin-bottom:24px;line-height:24px}.rst-content .line-block .line-block{margin-left:24px;margin-bottom:0}.rst-content .topic-title{font-weight:700;margin-bottom:12px}.rst-content .toc-backref{color:#404040}.rst-content .align-right{float:right;margin:0 0 24px 24px}.rst-content .align-left{float:left;margin:0 24px 24px 0}.rst-content .align-center{margin:auto}.rst-content .align-center:not(table){display:block}.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content .toctree-wrapper>p.caption .headerlink,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink{opacity:0;font-size:14px;font-family:FontAwesome;margin-left:.5em}.rst-content .code-block-caption .headerlink:focus,.rst-content .code-block-caption:hover .headerlink,.rst-content .eqno .headerlink:focus,.rst-content .eqno:hover .headerlink,.rst-content .toctree-wrapper>p.caption .headerlink:focus,.rst-content .toctree-wrapper>p.caption:hover .headerlink,.rst-content dl dt .headerlink:focus,.rst-content dl dt:hover .headerlink,.rst-content h1 .headerlink:focus,.rst-content h1:hover .headerlink,.rst-content h2 .headerlink:focus,.rst-content h2:hover .headerlink,.rst-content h3 .headerlink:focus,.rst-content h3:hover .headerlink,.rst-content h4 .headerlink:focus,.rst-content h4:hover .headerlink,.rst-content h5 .headerlink:focus,.rst-content h5:hover .headerlink,.rst-content h6 .headerlink:focus,.rst-content h6:hover .headerlink,.rst-content p.caption .headerlink:focus,.rst-content p.caption:hover .headerlink,.rst-content p .headerlink:focus,.rst-content p:hover .headerlink,.rst-content table>caption .headerlink:focus,.rst-content table>caption:hover .headerlink{opacity:1}.rst-content p a{overflow-wrap:anywhere}.rst-content .wy-table td p,.rst-content .wy-table td ul,.rst-content .wy-table th p,.rst-content .wy-table th ul,.rst-content table.docutils td p,.rst-content table.docutils td ul,.rst-content table.docutils th p,.rst-content table.docutils th ul,.rst-content table.field-list td p,.rst-content table.field-list td ul,.rst-content table.field-list th p,.rst-content table.field-list th ul{font-size:inherit}.rst-content .btn:focus{outline:2px solid}.rst-content table>caption .headerlink:after{font-size:12px}.rst-content .centered{text-align:center}.rst-content .sidebar{float:right;width:40%;display:block;margin:0 0 24px 24px;padding:24px;background:#f3f6f6;border:1px solid #e1e4e5}.rst-content .sidebar dl,.rst-content .sidebar p,.rst-content .sidebar ul{font-size:90%}.rst-content .sidebar .last,.rst-content .sidebar>:last-child{margin-bottom:0}.rst-content .sidebar .sidebar-title{display:block;font-family:Roboto Slab,ff-tisa-web-pro,Georgia,Arial,sans-serif;font-weight:700;background:#e1e4e5;padding:6px 12px;margin:-24px -24px 24px;font-size:100%}.rst-content .highlighted{background:#f1c40f;box-shadow:0 0 0 2px #f1c40f;display:inline;font-weight:700}.rst-content .citation-reference,.rst-content .footnote-reference{vertical-align:baseline;position:relative;top:-.4em;line-height:0;font-size:90%}.rst-content .citation-reference>span.fn-bracket,.rst-content .footnote-reference>span.fn-bracket{display:none}.rst-content .hlist{width:100%}.rst-content dl dt span.classifier:before{content:" : "}.rst-content dl dt span.classifier-delimiter{display:none!important}html.writer-html4 .rst-content table.docutils.citation,html.writer-html4 .rst-content table.docutils.footnote{background:none;border:none}html.writer-html4 .rst-content table.docutils.citation td,html.writer-html4 .rst-content table.docutils.citation tr,html.writer-html4 .rst-content table.docutils.footnote td,html.writer-html4 .rst-content table.docutils.footnote tr{border:none;background-color:transparent!important;white-space:normal}html.writer-html4 .rst-content table.docutils.citation td.label,html.writer-html4 .rst-content table.docutils.footnote td.label{padding-left:0;padding-right:0;vertical-align:top}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.field-list,html.writer-html5 .rst-content dl.footnote{display:grid;grid-template-columns:auto minmax(80%,95%)}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dt{display:inline-grid;grid-template-columns:max-content auto}html.writer-html5 .rst-content aside.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content div.citation{display:grid;grid-template-columns:auto auto minmax(.65rem,auto) minmax(40%,95%)}html.writer-html5 .rst-content aside.citation>span.label,html.writer-html5 .rst-content aside.footnote>span.label,html.writer-html5 .rst-content div.citation>span.label{grid-column-start:1;grid-column-end:2}html.writer-html5 .rst-content aside.citation>span.backrefs,html.writer-html5 .rst-content aside.footnote>span.backrefs,html.writer-html5 .rst-content div.citation>span.backrefs{grid-column-start:2;grid-column-end:3;grid-row-start:1;grid-row-end:3}html.writer-html5 .rst-content aside.citation>p,html.writer-html5 .rst-content aside.footnote>p,html.writer-html5 .rst-content div.citation>p{grid-column-start:4;grid-column-end:5}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.field-list,html.writer-html5 .rst-content dl.footnote{margin-bottom:24px}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dt{padding-left:1rem}html.writer-html5 .rst-content dl.citation>dd,html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dd,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dd,html.writer-html5 .rst-content dl.footnote>dt{margin-bottom:0}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.footnote{font-size:.9rem}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.footnote>dt{margin:0 .5rem .5rem 0;line-height:1.2rem;word-break:break-all;font-weight:400}html.writer-html5 .rst-content dl.citation>dt>span.brackets:before,html.writer-html5 .rst-content dl.footnote>dt>span.brackets:before{content:"["}html.writer-html5 .rst-content dl.citation>dt>span.brackets:after,html.writer-html5 .rst-content dl.footnote>dt>span.brackets:after{content:"]"}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref{text-align:left;font-style:italic;margin-left:.65rem;word-break:break-word;word-spacing:-.1rem;max-width:5rem}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref>a,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref>a{word-break:keep-all}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref>a:not(:first-child):before,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref>a:not(:first-child):before{content:" "}html.writer-html5 .rst-content dl.citation>dd,html.writer-html5 .rst-content dl.footnote>dd{margin:0 0 .5rem;line-height:1.2rem}html.writer-html5 .rst-content dl.citation>dd p,html.writer-html5 .rst-content dl.footnote>dd p{font-size:.9rem}html.writer-html5 .rst-content aside.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content div.citation{padding-left:1rem;padding-right:1rem;font-size:.9rem;line-height:1.2rem}html.writer-html5 .rst-content aside.citation p,html.writer-html5 .rst-content aside.footnote p,html.writer-html5 .rst-content div.citation p{font-size:.9rem;line-height:1.2rem;margin-bottom:12px}html.writer-html5 .rst-content aside.citation span.backrefs,html.writer-html5 .rst-content aside.footnote span.backrefs,html.writer-html5 .rst-content div.citation span.backrefs{text-align:left;font-style:italic;margin-left:.65rem;word-break:break-word;word-spacing:-.1rem;max-width:5rem}html.writer-html5 .rst-content aside.citation span.backrefs>a,html.writer-html5 .rst-content aside.footnote span.backrefs>a,html.writer-html5 .rst-content div.citation span.backrefs>a{word-break:keep-all}html.writer-html5 .rst-content aside.citation span.backrefs>a:not(:first-child):before,html.writer-html5 .rst-content aside.footnote span.backrefs>a:not(:first-child):before,html.writer-html5 .rst-content div.citation span.backrefs>a:not(:first-child):before{content:" "}html.writer-html5 .rst-content aside.citation span.label,html.writer-html5 .rst-content aside.footnote span.label,html.writer-html5 .rst-content div.citation span.label{line-height:1.2rem}html.writer-html5 .rst-content aside.citation-list,html.writer-html5 .rst-content aside.footnote-list,html.writer-html5 .rst-content div.citation-list{margin-bottom:24px}html.writer-html5 .rst-content dl.option-list kbd{font-size:.9rem}.rst-content table.docutils.footnote,html.writer-html4 .rst-content table.docutils.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content aside.footnote-list aside.footnote,html.writer-html5 .rst-content div.citation-list>div.citation,html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.footnote{color:grey}.rst-content table.docutils.footnote code,.rst-content table.docutils.footnote tt,html.writer-html4 .rst-content table.docutils.citation code,html.writer-html4 .rst-content table.docutils.citation tt,html.writer-html5 .rst-content aside.footnote-list aside.footnote code,html.writer-html5 .rst-content aside.footnote-list aside.footnote tt,html.writer-html5 .rst-content aside.footnote code,html.writer-html5 .rst-content aside.footnote tt,html.writer-html5 .rst-content div.citation-list>div.citation code,html.writer-html5 .rst-content div.citation-list>div.citation tt,html.writer-html5 .rst-content dl.citation code,html.writer-html5 .rst-content dl.citation tt,html.writer-html5 .rst-content dl.footnote code,html.writer-html5 .rst-content dl.footnote tt{color:#555}.rst-content .wy-table-responsive.citation,.rst-content .wy-table-responsive.footnote{margin-bottom:0}.rst-content .wy-table-responsive.citation+:not(.citation),.rst-content .wy-table-responsive.footnote+:not(.footnote){margin-top:24px}.rst-content .wy-table-responsive.citation:last-child,.rst-content .wy-table-responsive.footnote:last-child{margin-bottom:24px}.rst-content table.docutils th{border-color:#e1e4e5}html.writer-html5 .rst-content table.docutils th{border:1px solid #e1e4e5}html.writer-html5 .rst-content table.docutils td>p,html.writer-html5 .rst-content table.docutils th>p{line-height:1rem;margin-bottom:0;font-size:.9rem}.rst-content table.docutils td .last,.rst-content table.docutils td .last>:last-child{margin-bottom:0}.rst-content table.field-list,.rst-content table.field-list td{border:none}.rst-content table.field-list td p{line-height:inherit}.rst-content table.field-list td>strong{display:inline-block}.rst-content table.field-list .field-name{padding-right:10px;text-align:left;white-space:nowrap}.rst-content table.field-list .field-body{text-align:left}.rst-content code,.rst-content tt{color:#000;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;padding:2px 5px}.rst-content code big,.rst-content code em,.rst-content tt big,.rst-content tt em{font-size:100%!important;line-height:normal}.rst-content code.literal,.rst-content tt.literal{color:#e74c3c;white-space:normal}.rst-content code.xref,.rst-content tt.xref,a .rst-content code,a .rst-content tt{font-weight:700;color:#404040;overflow-wrap:normal}.rst-content kbd,.rst-content pre,.rst-content samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace}.rst-content a code,.rst-content a tt{color:#2980b9}.rst-content dl{margin-bottom:24px}.rst-content dl dt{font-weight:700;margin-bottom:12px}.rst-content dl ol,.rst-content dl p,.rst-content dl table,.rst-content dl ul{margin-bottom:12px}.rst-content dl dd{margin:0 0 12px 24px;line-height:24px}.rst-content dl dd>ol:last-child,.rst-content dl dd>p:last-child,.rst-content dl dd>table:last-child,.rst-content dl dd>ul:last-child{margin-bottom:0}html.writer-html4 .rst-content dl:not(.docutils),html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple){margin-bottom:24px}html.writer-html4 .rst-content dl:not(.docutils)>dt,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt{display:table;margin:6px 0;font-size:90%;line-height:normal;background:#e7f2fa;color:#2980b9;border-top:3px solid #6ab0de;padding:6px;position:relative}html.writer-html4 .rst-content dl:not(.docutils)>dt:before,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt:before{color:#6ab0de}html.writer-html4 .rst-content dl:not(.docutils)>dt .headerlink,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink{color:#404040;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt{margin-bottom:6px;border:none;border-left:3px solid #ccc;background:#f0f0f0;color:#555}html.writer-html4 .rst-content dl:not(.docutils) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink{color:#404040;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils)>dt:first-child,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt:first-child{margin-top:0}html.writer-html4 .rst-content dl:not(.docutils) code.descclassname,html.writer-html4 .rst-content dl:not(.docutils) code.descname,html.writer-html4 .rst-content dl:not(.docutils) tt.descclassname,html.writer-html4 .rst-content dl:not(.docutils) tt.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descname{background-color:transparent;border:none;padding:0;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils) code.descname,html.writer-html4 .rst-content dl:not(.docutils) tt.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descname{font-weight:700}html.writer-html4 .rst-content dl:not(.docutils) .optional,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .optional{display:inline-block;padding:0 4px;color:#000;font-weight:700}html.writer-html4 .rst-content dl:not(.docutils) .property,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .property{display:inline-block;padding-right:8px;max-width:100%}html.writer-html4 .rst-content dl:not(.docutils) .k,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .k{font-style:italic}html.writer-html4 .rst-content dl:not(.docutils) .descclassname,html.writer-html4 .rst-content dl:not(.docutils) .descname,html.writer-html4 .rst-content dl:not(.docutils) .sig-name,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .sig-name{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;color:#000}.rst-content .viewcode-back,.rst-content .viewcode-link{display:inline-block;color:#27ae60;font-size:80%;padding-left:24px}.rst-content .viewcode-back{display:block;float:right}.rst-content p.rubric{margin-bottom:12px;font-weight:700}.rst-content code.download,.rst-content tt.download{background:inherit;padding:inherit;font-weight:400;font-family:inherit;font-size:inherit;color:inherit;border:inherit;white-space:inherit}.rst-content code.download span:first-child,.rst-content tt.download span:first-child{-webkit-font-smoothing:subpixel-antialiased}.rst-content code.download span:first-child:before,.rst-content tt.download span:first-child:before{margin-right:4px}.rst-content .guilabel,.rst-content .menuselection{font-size:80%;font-weight:700;border-radius:4px;padding:2.4px 6px;margin:auto 2px}.rst-content .guilabel,.rst-content .menuselection{border:1px solid #7fbbe3;background:#e7f2fa}.rst-content :not(dl.option-list)>:not(dt):not(kbd):not(.kbd)>.kbd,.rst-content :not(dl.option-list)>:not(dt):not(kbd):not(.kbd)>kbd{color:inherit;font-size:80%;background-color:#fff;border:1px solid #a6a6a6;border-radius:4px;box-shadow:0 2px grey;padding:2.4px 6px;margin:auto 0}.rst-content .versionmodified{font-style:italic}@media screen and (max-width:480px){.rst-content .sidebar{width:100%}}span[id*=MathJax-Span]{color:#404040}.math{text-align:center}@font-face{font-family:Lato;src:url(fonts/lato-normal.woff2?bd03a2cc277bbbc338d464e679fe9942) format("woff2"),url(fonts/lato-normal.woff?27bd77b9162d388cb8d4c4217c7c5e2a) format("woff");font-weight:400;font-style:normal;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-bold.woff2?cccb897485813c7c256901dbca54ecf2) format("woff2"),url(fonts/lato-bold.woff?d878b6c29b10beca227e9eef4246111b) format("woff");font-weight:700;font-style:normal;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-bold-italic.woff2?0b6bb6725576b072c5d0b02ecdd1900d) format("woff2"),url(fonts/lato-bold-italic.woff?9c7e4e9eb485b4a121c760e61bc3707c) format("woff");font-weight:700;font-style:italic;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-normal-italic.woff2?4eb103b4d12be57cb1d040ed5e162e9d) format("woff2"),url(fonts/lato-normal-italic.woff?f28f2d6482446544ef1ea1ccc6dd5892) format("woff");font-weight:400;font-style:italic;font-display:block}@font-face{font-family:Roboto Slab;font-style:normal;font-weight:400;src:url(fonts/Roboto-Slab-Regular.woff2?7abf5b8d04d26a2cafea937019bca958) format("woff2"),url(fonts/Roboto-Slab-Regular.woff?c1be9284088d487c5e3ff0a10a92e58c) format("woff");font-display:block}@font-face{font-family:Roboto Slab;font-style:normal;font-weight:700;src:url(fonts/Roboto-Slab-Bold.woff2?9984f4a9bda09be08e83f2506954adbe) format("woff2"),url(fonts/Roboto-Slab-Bold.woff?bed5564a116b05148e3b3bea6fb1162a) format("woff");font-display:block} \ No newline at end of file diff --git a/docs/build/html/_static/js/badge_only.js b/docs/build/html/_static/js/badge_only.js new file mode 100644 index 0000000..526d723 --- /dev/null +++ b/docs/build/html/_static/js/badge_only.js @@ -0,0 +1 @@ +!function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}}); \ No newline at end of file diff --git a/docs/build/html/_static/js/html5shiv-printshiv.min.js b/docs/build/html/_static/js/html5shiv-printshiv.min.js new file mode 100644 index 0000000..2b43bd0 --- /dev/null +++ b/docs/build/html/_static/js/html5shiv-printshiv.min.js @@ -0,0 +1,4 @@ +/** +* @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed +*/ +!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document); \ No newline at end of file diff --git a/docs/build/html/_static/js/html5shiv.min.js b/docs/build/html/_static/js/html5shiv.min.js new file mode 100644 index 0000000..cd1c674 --- /dev/null +++ b/docs/build/html/_static/js/html5shiv.min.js @@ -0,0 +1,4 @@ +/** +* @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed +*/ +!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document); \ No newline at end of file diff --git a/docs/build/html/_static/js/theme.js b/docs/build/html/_static/js/theme.js new file mode 100644 index 0000000..1fddb6e --- /dev/null +++ b/docs/build/html/_static/js/theme.js @@ -0,0 +1 @@ +!function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("
"),n("table.docutils.footnote").wrap("
"),n("table.docutils.citation").wrap("
"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n(''),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}if(t.length>0){$(".wy-menu-vertical .current").removeClass("current").attr("aria-expanded","false"),t.addClass("current").attr("aria-expanded","true"),t.closest("li.toctree-l1").parent().addClass("current").attr("aria-expanded","true");for(let n=1;n<=10;n++)t.closest("li.toctree-l"+n).addClass("current").attr("aria-expanded","true");t[0].scrollIntoView()}}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current").attr("aria-expanded","false"),e.siblings().find("li.current").removeClass("current").attr("aria-expanded","false");var t=e.find("> ul li");t.length&&(t.removeClass("current").attr("aria-expanded","false"),e.toggleClass("current").attr("aria-expanded",(function(n,e){return"true"==e?"false":"true"})))}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t { + if (node.nodeType === Node.TEXT_NODE) { + const val = node.nodeValue; + const parent = node.parentNode; + const pos = val.toLowerCase().indexOf(text); + if ( + pos >= 0 && + !parent.classList.contains(className) && + !parent.classList.contains("nohighlight") + ) { + let span; + + const closestNode = parent.closest("body, svg, foreignObject"); + const isInSVG = closestNode && closestNode.matches("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.classList.add(className); + } + + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + const rest = document.createTextNode(val.substr(pos + text.length)); + parent.insertBefore( + span, + parent.insertBefore( + rest, + node.nextSibling + ) + ); + node.nodeValue = val.substr(0, pos); + /* There may be more occurrences of search term in this node. So call this + * function recursively on the remaining fragment. + */ + _highlight(rest, addItems, text, className); + + if (isInSVG) { + const rect = document.createElementNS( + "http://www.w3.org/2000/svg", + "rect" + ); + const bbox = parent.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute("class", className); + addItems.push({ parent: parent, target: rect }); + } + } + } else if (node.matches && !node.matches("button, select, textarea")) { + node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); + } +}; +const _highlightText = (thisNode, text, className) => { + let addItems = []; + _highlight(thisNode, addItems, text, className); + addItems.forEach((obj) => + obj.parent.insertAdjacentElement("beforebegin", obj.target) + ); +}; + +/** + * Small JavaScript module for the documentation. + */ +const SphinxHighlight = { + + /** + * highlight the search words provided in localstorage in the text + */ + highlightSearchWords: () => { + if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight + + // get and clear terms from localstorage + const url = new URL(window.location); + const highlight = + localStorage.getItem("sphinx_highlight_terms") + || url.searchParams.get("highlight") + || ""; + localStorage.removeItem("sphinx_highlight_terms") + url.searchParams.delete("highlight"); + window.history.replaceState({}, "", url); + + // get individual terms from highlight string + const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); + if (terms.length === 0) return; // nothing to do + + // There should never be more than one element matching "div.body" + const divBody = document.querySelectorAll("div.body"); + const body = divBody.length ? divBody[0] : document.querySelector("body"); + window.setTimeout(() => { + terms.forEach((term) => _highlightText(body, term, "highlighted")); + }, 10); + + const searchBox = document.getElementById("searchbox"); + if (searchBox === null) return; + searchBox.appendChild( + document + .createRange() + .createContextualFragment( + '" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/docs/build/html/_static/sphinxdoc.css b/docs/build/html/_static/sphinxdoc.css new file mode 100644 index 0000000..b03830b --- /dev/null +++ b/docs/build/html/_static/sphinxdoc.css @@ -0,0 +1,354 @@ +/* + * sphinxdoc.css_t + * ~~~~~~~~~~~~~~~ + * + * Sphinx stylesheet -- sphinxdoc theme. Originally created by + * Armin Ronacher for Werkzeug. + * + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +@import url("basic.css"); + +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; + font-size: 14px; + letter-spacing: -0.01em; + line-height: 150%; + text-align: center; + background-color: #BFD1D4; + color: black; + padding: 0; + border: 1px solid #aaa; + + margin: 0px 80px 0px 80px; + min-width: 740px; +} + +div.document { + background-color: white; + text-align: left; + background-image: url(contents.png); + background-repeat: repeat-x; +} + +div.documentwrapper { + float: left; + width: 100%; +} + +div.bodywrapper { + margin: 0 calc(230px + 10px) 0 0; + border-right: 1px solid #ccc; +} + +div.body { + margin: 0; + padding: 0.5em 20px 20px 20px; +} + +div.related { + font-size: 1em; +} + +div.related ul { + background-image: url(navigation.png); + height: 2em; + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; +} + +div.related ul li { + margin: 0; + padding: 0; + height: 2em; + float: left; +} + +div.related ul li.right { + float: right; + margin-right: 5px; +} + +div.related ul li a { + margin: 0; + padding: 0 5px 0 5px; + line-height: 1.75em; + color: #EE9816; +} + +div.related ul li a:hover { + color: #3CA8E7; +} + +div.sphinxsidebarwrapper { + padding: 0; +} + +div.sphinxsidebar { + padding: 0.5em 15px 15px 0; + width: calc(230px - 20px); + float: right; + font-size: 1em; + text-align: left; +} + +div.sphinxsidebar h3, div.sphinxsidebar h4 { + margin: 1em 0 0.5em 0; + font-size: 1em; + padding: 0.1em 0 0.1em 0.5em; + color: white; + border: 1px solid #86989B; + background-color: #AFC1C4; +} + +div.sphinxsidebar h3 a { + color: white; +} + +div.sphinxsidebar ul { + padding-left: 1.5em; + margin-top: 7px; + padding: 0; + line-height: 130%; +} + +div.sphinxsidebar ul ul { + margin-left: 20px; +} + +div.footer { + background-color: #E3EFF1; + color: #86989B; + padding: 3px 8px 3px 0; + clear: both; + font-size: 0.8em; + text-align: right; +} + +div.footer a { + color: #86989B; + text-decoration: underline; +} + +/* -- body styles ----------------------------------------------------------- */ + +p { + margin: 0.8em 0 0.5em 0; +} + +a { + color: #CA7900; + text-decoration: none; +} + +a:hover { + color: #2491CF; +} + +a:visited { + color: #551A8B; +} + +div.body a { + text-decoration: underline; +} + +h1 { + margin: 0; + padding: 0.7em 0 0.3em 0; + font-size: 1.5em; + color: #11557C; +} + +h2 { + margin: 1.3em 0 0.2em 0; + font-size: 1.35em; + padding: 0; +} + +h3 { + margin: 1em 0 -0.3em 0; + font-size: 1.2em; +} + +div.body h1 a, div.body h2 a, div.body h3 a, div.body h4 a, div.body h5 a, div.body h6 a { + color: black!important; +} + +h1 a.anchor, h2 a.anchor, h3 a.anchor, h4 a.anchor, h5 a.anchor, h6 a.anchor { + display: none; + margin: 0 0 0 0.3em; + padding: 0 0.2em 0 0.2em; + color: #aaa!important; +} + +h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, +h5:hover a.anchor, h6:hover a.anchor { + display: inline; +} + +h1 a.anchor:hover, h2 a.anchor:hover, h3 a.anchor:hover, h4 a.anchor:hover, +h5 a.anchor:hover, h6 a.anchor:hover { + color: #777; + background-color: #eee; +} + +a.headerlink { + color: #c60f0f!important; + font-size: 1em; + margin-left: 6px; + padding: 0 4px 0 4px; + text-decoration: none!important; +} + +a.headerlink:hover { + background-color: #ccc; + color: white!important; +} + +cite, code, code { + font-family: 'Consolas', 'Deja Vu Sans Mono', + 'Bitstream Vera Sans Mono', monospace; + font-size: 0.95em; + letter-spacing: 0.01em; +} + +code { + background-color: #f2f2f2; + border-bottom: 1px solid #ddd; + color: #333; +} + +code.descname, code.descclassname, code.xref { + border: 0; +} + +hr { + border: 1px solid #abc; + margin: 2em; +} + +a code { + border: 0; + color: #CA7900; +} + +a code:hover { + color: #2491CF; +} + +pre { + font-family: 'Consolas', 'Deja Vu Sans Mono', + 'Bitstream Vera Sans Mono', monospace; + font-size: 0.95em; + letter-spacing: 0.015em; + line-height: 120%; + padding: 0.5em; + border: 1px solid #ccc; +} + +pre a { + color: inherit; + text-decoration: underline; +} + +td.linenos pre { + padding: 0.5em 0; +} + +div.quotebar { + background-color: #f8f8f8; + max-width: 250px; + float: right; + padding: 2px 7px; + border: 1px solid #ccc; +} + +nav.contents, +aside.topic, +div.topic { + background-color: #f8f8f8; +} + +table { + border-collapse: collapse; + margin: 0 -0.5em 0 -0.5em; +} + +table td, table th { + padding: 0.2em 0.5em 0.2em 0.5em; +} + +div.admonition, div.warning { + font-size: 0.9em; + margin: 1em 0 1em 0; + border: 1px solid #86989B; + background-color: #f7f7f7; + padding: 0; +} + +div.admonition p, div.warning p { + margin: 0.5em 1em 0.5em 1em; + padding: 0; +} + +div.admonition pre, div.warning pre { + margin: 0.4em 1em 0.4em 1em; +} + +div.admonition p.admonition-title, +div.warning p.admonition-title { + margin: 0; + padding: 0.1em 0 0.1em 0.5em; + color: white; + border-bottom: 1px solid #86989B; + font-weight: bold; + background-color: #AFC1C4; +} + +div.warning { + border: 1px solid #940000; +} + +div.warning p.admonition-title { + background-color: #CF0000; + border-bottom-color: #940000; +} + +div.admonition ul, div.admonition ol, +div.warning ul, div.warning ol { + margin: 0.1em 0.5em 0.5em 3em; + padding: 0; +} + +div.versioninfo { + margin: 1em 0 0 0; + border: 1px solid #ccc; + background-color: #DDEAF0; + padding: 8px; + line-height: 1.3em; + font-size: 0.9em; +} + +.viewcode-back { + font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; +} + +div.viewcode-block:target { + background-color: #f4debf; + border-top: 1px solid #ac9; + border-bottom: 1px solid #ac9; +} + +div.code-block-caption { + background-color: #ddd; + color: #222; + border: 1px solid #ccc; +} \ No newline at end of file diff --git a/docs/build/html/api.html b/docs/build/html/api.html new file mode 100644 index 0000000..7c74701 --- /dev/null +++ b/docs/build/html/api.html @@ -0,0 +1,113 @@ + + + + + + + API — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

API

+ + + + + + +

quapy

QuaPy module for quantification

+
+ + +
+
+
+ +
+ +
+

© Copyright 2024, Alejandro Moreo.

+
+ + Built with Sphinx using a + theme + provided by Read the Docs. + + +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/generated/quapy.html b/docs/build/html/generated/quapy.html new file mode 100644 index 0000000..61ce026 --- /dev/null +++ b/docs/build/html/generated/quapy.html @@ -0,0 +1,106 @@ + + + + + + + quapy — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quapy

+

QuaPy module for quantification

+
+ + +
+
+
+ +
+ +
+

© Copyright 2024, Alejandro Moreo.

+
+ + Built with Sphinx using a + theme + provided by Read the Docs. + + +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/quapy.benchmarking.html b/docs/build/html/quapy.benchmarking.html new file mode 100644 index 0000000..ab3831f --- /dev/null +++ b/docs/build/html/quapy.benchmarking.html @@ -0,0 +1,119 @@ + + + + + + + quapy.benchmarking package — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quapy.benchmarking package

+
+

Submodules

+
+
+

quapy.benchmarking.typical module

+
+
+quapy.benchmarking.typical.wrap_cls_params(params)
+
+ +
+
+

Module contents

+
+
+ + +
+
+
+ +
+ +
+

© Copyright 2024, Alejandro Moreo.

+
+ + Built with Sphinx using a + theme + provided by Read the Docs. + + +
+
+
+
+
+ + + + \ No newline at end of file