diff --git a/docs/build/html/Datasets.html b/docs/build/html/Datasets.html new file mode 100644 index 0000000..6af836e --- /dev/null +++ b/docs/build/html/Datasets.html @@ -0,0 +1,765 @@ + + + + + + + + + Datasets — QuaPy 0.1.6 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Datasets

+

QuaPy makes available several datasets that have been used in +quantification literature, as well as an interface to allow +anyone import their custom datasets.

+

A Dataset object in QuaPy is roughly a pair of LabelledCollection objects, +one playing the role of the training set, another the test set. +LabelledCollection is a data class consisting of the (iterable) +instances and labels. This class handles most of the sampling functionality in QuaPy. +Take a look at the following code:

+
import quapy as qp
+import quapy.functional as F
+
+instances = [
+    '1st positive document', '2nd positive document',
+    'the only negative document',
+    '1st neutral document', '2nd neutral document', '3rd neutral document'
+]
+labels = [2, 2, 0, 1, 1, 1]
+
+data = qp.data.LabelledCollection(instances, labels)
+print(F.strprev(data.prevalence(), prec=2))
+
+
+

Output the class prevalences (showing 2 digit precision):

+
[0.17, 0.50, 0.33]
+
+
+

One can easily produce new samples at desired class prevalences:

+
sample_size = 10
+prev = [0.4, 0.1, 0.5]
+sample = data.sampling(sample_size, *prev)
+
+print('instances:', sample.instances)
+print('labels:', sample.labels)
+print('prevalence:', F.strprev(sample.prevalence(), prec=2))
+
+
+

Which outputs:

+
instances: ['the only negative document' '2nd positive document'
+ '2nd positive document' '2nd neutral document' '1st positive document'
+ 'the only negative document' 'the only negative document'
+ 'the only negative document' '2nd positive document'
+ '1st positive document']
+labels: [0 2 2 1 2 0 0 0 2 2]
+prevalence: [0.40, 0.10, 0.50]
+
+
+

Samples can be made consistent across different runs (e.g., to test +different methods on the same exact samples) by sampling and retaining +the indexes, that can then be used to generate the sample:

+
index = data.sampling_index(sample_size, *prev)
+for method in methods:
+    sample = data.sampling_from_index(index)
+    ...
+
+
+

QuaPy also implements the artificial sampling protocol that produces (via a +Python’s generator) a series of LabelledCollection objects with equidistant +prevalences ranging across the entire prevalence spectrum in the simplex space, e.g.:

+
for sample in data.artificial_sampling_generator(sample_size=100, n_prevalences=5):
+    print(F.strprev(sample.prevalence(), prec=2))
+
+
+

produces one sampling for each (valid) combination of prevalences originating from +splitting the range [0,1] into n_prevalences=5 points (i.e., [0, 0.25, 0.5, 0.75, 1]), +that is:

+
[0.00, 0.00, 1.00]
+[0.00, 0.25, 0.75]
+[0.00, 0.50, 0.50]
+[0.00, 0.75, 0.25]
+[0.00, 1.00, 0.00]
+[0.25, 0.00, 0.75]
+...
+[1.00, 0.00, 0.00]
+
+
+

See the Evaluation wiki for +further details on how to use the artificial sampling protocol to properly +evaluate a quantification method.

+
+

Reviews Datasets

+

Three datasets of reviews about Kindle devices, Harry Potter’s series, and +the well-known IMDb movie reviews can be fetched using a unified interface. +For example:

+
import quapy as qp
+data = qp.datasets.fetch_reviews('kindle')
+
+
+

These datasets have been used in:

+
Esuli, A., Moreo, A., & Sebastiani, F. (2018, October). 
+A recurrent neural network for sentiment quantification. 
+In Proceedings of the 27th ACM International Conference on 
+Information and Knowledge Management (pp. 1775-1778).
+
+
+

The list of reviews ids is available in:

+
qp.datasets.REVIEWS_SENTIMENT_DATASETS
+
+
+

Some statistics of the fhe available datasets are summarized below:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Dataset

classes

train size

test size

train prev

test prev

type

hp

2

9533

18399

[0.018, 0.982]

[0.065, 0.935]

text

kindle

2

3821

21591

[0.081, 0.919]

[0.063, 0.937]

text

imdb

2

25000

25000

[0.500, 0.500]

[0.500, 0.500]

text

+
+
+

Twitter Sentiment Datasets

+

11 Twitter datasets for sentiment analysis. +Text is not accessible, and the documents were made available +in tf-idf format. Each dataset presents two splits: a train/val +split for model selection purposes, and a train+val/test split +for model evaluation. The following code exemplifies how to load +a twitter dataset for model selection.

+
import quapy as qp
+data = qp.datasets.fetch_twitter('gasp', for_model_selection=True)
+
+
+

The datasets were used in:

+
Gao, W., & Sebastiani, F. (2015, August). 
+Tweet sentiment: From classification to quantification. 
+In 2015 IEEE/ACM International Conference on Advances in 
+Social Networks Analysis and Mining (ASONAM) (pp. 97-104). IEEE.
+
+
+

Three of the datasets (semeval13, semeval14, and semeval15) share the +same training set (semeval), meaning that the training split one would get +when requesting any of them is the same. The dataset “semeval” can only +be requested with “for_model_selection=True”. +The lists of the Twitter dataset’s ids can be consulted in:

+
# a list of 11 dataset ids that can be used for model selection or model evaluation
+qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
+
+# 9 dataset ids in which "semeval13", "semeval14", and "semeval15" are replaced with "semeval"
+qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN  
+
+
+

Some details can be found below:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Dataset

classes

train size

test size

features

train prev

test prev

type

gasp

3

8788

3765

694582

[0.421, 0.496, 0.082]

[0.407, 0.507, 0.086]

sparse

hcr

3

1594

798

222046

[0.546, 0.211, 0.243]

[0.640, 0.167, 0.193]

sparse

omd

3

1839

787

199151

[0.463, 0.271, 0.266]

[0.437, 0.283, 0.280]

sparse

sanders

3

2155

923

229399

[0.161, 0.691, 0.148]

[0.164, 0.688, 0.148]

sparse

semeval13

3

11338

3813

1215742

[0.159, 0.470, 0.372]

[0.158, 0.430, 0.412]

sparse

semeval14

3

11338

1853

1215742

[0.159, 0.470, 0.372]

[0.109, 0.361, 0.530]

sparse

semeval15

3

11338

2390

1215742

[0.159, 0.470, 0.372]

[0.153, 0.413, 0.434]

sparse

semeval16

3

8000

2000

889504

[0.157, 0.351, 0.492]

[0.163, 0.341, 0.497]

sparse

sst

3

2971

1271

376132

[0.261, 0.452, 0.288]

[0.207, 0.481, 0.312]

sparse

wa

3

2184

936

248563

[0.305, 0.414, 0.281]

[0.282, 0.446, 0.272]

sparse

wb

3

4259

1823

404333

[0.270, 0.392, 0.337]

[0.274, 0.392, 0.335]

sparse

+
+
+

UCI Machine Learning

+

A set of 32 datasets from the UCI Machine Learning repository +used in:

+
Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).
+Using ensembles for problems with characterizable changes 
+in data distribution: A case study on quantification.
+Information Fusion, 34, 87-100.
+
+
+

The list does not exactly coincide with that used in Pérez-Gállego et al. 2017 +since we were unable to find the datasets with ids “diabetes” and “phoneme”.

+

These dataset can be loaded by calling, e.g.:

+
import quapy as qp
+data = qp.datasets.fetch_UCIDataset('yeast', verbose=True)
+
+
+

This call will return a Dataset object in which the training and +test splits are randomly drawn, in a stratified manner, from the whole +collection at 70% and 30%, respectively. The verbose=True option indicates +that the dataset description should be printed in standard output. +The original data is not split, +and some papers submit the entire collection to a kFCV validation. +In order to accommodate with these practices, one could first instantiate +the entire collection, and then creating a generator that will return one +training+test dataset at a time, following a kFCV protocol:

+
import quapy as qp
+collection = qp.datasets.fetch_UCILabelledCollection("yeast")
+for data in qp.data.Dataset.kFCV(collection, nfolds=5, nrepeats=2):
+    ...
+
+
+

Above code will allow to conduct a 2x5FCV evaluation on the “yeast” dataset.

+

All datasets come in numerical form (dense matrices); some statistics +are summarized below.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Dataset

classes

instances

features

prev

type

acute.a

2

120

6

[0.508, 0.492]

dense

acute.b

2

120

6

[0.583, 0.417]

dense

balance.1

2

625

4

[0.539, 0.461]

dense

balance.2

2

625

4

[0.922, 0.078]

dense

balance.3

2

625

4

[0.539, 0.461]

dense

breast-cancer

2

683

9

[0.350, 0.650]

dense

cmc.1

2

1473

9

[0.573, 0.427]

dense

cmc.2

2

1473

9

[0.774, 0.226]

dense

cmc.3

2

1473

9

[0.653, 0.347]

dense

ctg.1

2

2126

22

[0.222, 0.778]

dense

ctg.2

2

2126

22

[0.861, 0.139]

dense

ctg.3

2

2126

22

[0.917, 0.083]

dense

german

2

1000

24

[0.300, 0.700]

dense

haberman

2

306

3

[0.735, 0.265]

dense

ionosphere

2

351

34

[0.641, 0.359]

dense

iris.1

2

150

4

[0.667, 0.333]

dense

iris.2

2

150

4

[0.667, 0.333]

dense

iris.3

2

150

4

[0.667, 0.333]

dense

mammographic

2

830

5

[0.514, 0.486]

dense

pageblocks.5

2

5473

10

[0.979, 0.021]

dense

semeion

2

1593

256

[0.901, 0.099]

dense

sonar

2

208

60

[0.534, 0.466]

dense

spambase

2

4601

57

[0.606, 0.394]

dense

spectf

2

267

44

[0.794, 0.206]

dense

tictactoe

2

958

9

[0.653, 0.347]

dense

transfusion

2

748

4

[0.762, 0.238]

dense

wdbc

2

569

30

[0.627, 0.373]

dense

wine.1

2

178

13

[0.669, 0.331]

dense

wine.2

2

178

13

[0.601, 0.399]

dense

wine.3

2

178

13

[0.730, 0.270]

dense

wine-q-red

2

1599

11

[0.465, 0.535]

dense

wine-q-white

2

4898

11

[0.335, 0.665]

dense

yeast

2

1484

8

[0.711, 0.289]

dense

+
+

Issues:

+

All datasets will be downloaded automatically the first time they are requested, and +stored in the quapy_data folder for faster further reuse. +However, some datasets require special actions that at the moment are not fully +automated.

+
    +
  • Datasets with ids “ctg.1”, “ctg.2”, and “ctg.3” (Cardiotocography Data Set) load +an Excel file, which requires the user to install the xlrd Python module in order +to open it.

  • +
  • The dataset with id “pageblocks.5” (Page Blocks Classification (5)) needs to +open a “unix compressed file” (extension .Z), which is not directly doable with +standard Pythons packages like gzip or zip. This file would need to be uncompressed using +OS-dependent software manually. Information on how to do it will be printed the first +time the dataset is invoked.

  • +
+
+
+
+

Adding Custom Datasets

+

QuaPy provides data loaders for simple formats dealing with +text, following the format:

+
class-id \t first document's pre-processed text \n
+class-id \t second document's pre-processed text \n
+...
+
+
+

and sparse representations of the form:

+
{-1, 0, or +1} col(int):val(float) col(int):val(float) ... \n
+...
+
+
+

The code in charge in loading a LabelledCollection is:

+
@classmethod
+def load(cls, path:str, loader_func:callable):
+    return LabelledCollection(*loader_func(path))
+
+
+

indicating that any loader_func (e.g., a user-defined one) which +returns valid arguments for initializing a LabelledCollection object will allow +to load any collection. In particular, the LabelledCollection receives as +arguments the instances (as an iterable) and the labels (as an iterable) and, +additionally, the number of classes can be specified (it would otherwise be +inferred from the labels, but that requires at least one positive example for +all classes to be present in the collection).

+

The same loader_func can be passed to a Dataset, along with two +paths, in order to create a training and test pair of LabelledCollection, +e.g.:

+
import quapy as qp
+train_path = '../my_data/train.dat'
+test_path = '../my_data/test.dat'
+def my_custom_loader(path):
+    with open(path, 'rb') as fin:
+        ...
+    return instances, labels
+data = qp.data.Dataset.load(train_path, test_path, my_custom_loader)
+
+
+
+

Data Processing

+

QuaPy implements a number of preprocessing functions in the package qp.data.preprocessing, including:

+
    +
  • text2tfidf: tfidf vectorization

  • +
  • reduce_columns: reducing the number of columns based on term frequency

  • +
  • standardize: transforms the column values into z-scores (i.e., subtract the mean and normalizes by the standard deviation, so +that the column values have zero mean and unit variance).

  • +
  • index: transforms textual tokens into lists of numeric ids)

  • +
+
+
+
+ + +
+
+
+
+ +
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/Evaluation.html b/docs/build/html/Evaluation.html new file mode 100644 index 0000000..af552be --- /dev/null +++ b/docs/build/html/Evaluation.html @@ -0,0 +1,327 @@ + + + + + + + + + Evaluation — QuaPy 0.1.6 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Evaluation

+

Quantification is an appealing tool in scenarios of dataset shift, +and particularly in scenarios of prior-probability shift. +That is, the interest in estimating the class prevalences arises +under the belief that those class prevalences might have changed +with respect to the ones observed during training. +In other words, one could simply return the training prevalence +as a predictor of the test prevalence if this change is assumed +to be unlikely (as is the case in general scenarios of +machine learning governed by the iid assumption). +In brief, quantification requires dedicated evaluation protocols, +which are implemented in QuaPy and explained here.

+
+

Error Measures

+

The module quapy.error implements the following error measures for quantification:

+
    +
  • mae: mean absolute error

  • +
  • mrae: mean relative absolute error

  • +
  • mse: mean squared error

  • +
  • mkld: mean Kullback-Leibler Divergence

  • +
  • mnkld: mean normalized Kullback-Leibler Divergence

  • +
+

Functions ae, rae, se, kld, and nkld are also available, +which return the individual errors (i.e., without averaging the whole).

+

Some errors of classification are also available:

+
    +
  • acce: accuracy error (1-accuracy)

  • +
  • f1e: F-1 score error (1-F1 score)

  • +
+

The error functions implement the following interface, e.g.:

+
mae(true_prevs, prevs_hat)
+
+
+

in which the first argument is a ndarray containing the true +prevalences, and the second argument is another ndarray with +the estimations produced by some method.

+

Some error functions, e.g., mrae, mkld, and mnkld, are +smoothed for numerical stability. In those cases, there is a +third argument, e.g.:

+
def mrae(true_prevs, prevs_hat, eps=None): ...
+
+
+

indicating the value for the smoothing parameter epsilon. +Traditionally, this value is set to 1/(2T) in past literature, +with T the sampling size. One could either pass this value +to the function each time, or to set a QuaPy’s environment +variable SAMPLE_SIZE once, and ommit this argument +thereafter (recommended); +e.g.:

+
qp.environ['SAMPLE_SIZE'] = 100  # once for all
+true_prev = np.asarray([0.5, 0.3, 0.2])  # let's assume 3 classes
+estim_prev = np.asarray([0.1, 0.3, 0.6])
+error = qp.ae_.mrae(true_prev, estim_prev)
+print(f'mrae({true_prev}, {estim_prev}) = {error:.3f}')
+
+
+

will print:

+
mrae([0.500, 0.300, 0.200], [0.100, 0.300, 0.600]) = 0.914
+
+
+

Finally, it is possible to instantiate QuaPy’s quantification +error functions from strings using, e.g.:

+
error_function = qp.ae_.from_name('mse')
+error = error_function(true_prev, estim_prev)
+
+
+
+
+

Evaluation Protocols

+

QuaPy implements the so-called “artificial sampling protocol”, +according to which a test set is used to generate samplings at +desired prevalences of fixed size and covering the full spectrum +of prevalences. This protocol is called “artificial” in contrast +to the “natural prevalence sampling” protocol that, +despite introducing some variability during sampling, approximately +preserves the training class prevalence.

+

In the artificial sampling procol, the user specifies the number +of (equally distant) points to be generated from the interval [0,1].

+

For example, if n_prevpoints=11 then, for each class, the prevalences +[0., 0.1, 0.2, …, 1.] will be used. This means that, for two classes, +the number of different prevalences will be 11 (since, once the prevalence +of one class is determined, the other one is constrained). For 3 classes, +the number of valid combinations can be obtained as 11 + 10 + … + 1 = 66. +In general, the number of valid combinations that will be produced for a given +value of n_prevpoints can be consulted by invoking +quapy.functional.num_prevalence_combinations, e.g.:

+
import quapy.functional as F
+n_prevpoints = 21
+n_classes = 4
+n = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repeats=1)
+
+
+

in this example, n=1771. Note the last argument, n_repeats, that +informs of the number of examples that will be generated for any +valid combination (typical values are, e.g., 1 for a single sample, +or 10 or higher for computing standard deviations of performing statistical +significance tests).

+

One can instead work the other way around, i.e., one could set a +maximum budged of evaluations and get the number of prevalence points that +will generate a number of evaluations close, but not higher, than +the fixed budget. This can be achieved with the function +quapy.functional.get_nprevpoints_approximation, e.g.:

+
budget = 5000
+n_prevpoints = F.get_nprevpoints_approximation(budget, n_classes, n_repeats=1)
+n = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repeats=1)
+print(f'by setting n_prevpoints={n_prevpoints} the number of evaluations for {n_classes} classes will be {n}')
+
+
+

that will print:

+
by setting n_prevpoints=30 the number of evaluations for 4 classes will be 4960
+
+
+

The cost of evaluation will depend on the values of n_prevpoints, n_classes, +and n_repeats. Since it might sometimes be cumbersome to control the overall +cost of an experiment having to do with the number of combinations that +will be generated for a particular setting of these arguments (particularly +when n_classes>2), evaluation functions +typically allow the user to rather specify an evaluation budget, i.e., a maximum +number of samplings to generate. By specifying this argument, one could avoid +specifying n_prevpoints, and the value for it that would lead to a closer +number of evaluation budget, without surpassing it, will be automatically set.

+

The following script shows a full example in which a PACC model relying +on a Logistic Regressor classifier is +tested on the kindle dataset by means of the artificial prevalence +sampling protocol on samples of size 500, in terms of various +evaluation metrics.

+
import quapy as qp
+import quapy.functional as F
+from sklearn.linear_model import LogisticRegression
+
+qp.environ['SAMPLE_SIZE'] = 500
+
+dataset = qp.datasets.fetch_reviews('kindle')
+qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True)
+
+training = dataset.training
+test = dataset.test
+
+lr = LogisticRegression()
+pacc = qp.method.aggregative.PACC(lr)
+
+pacc.fit(training)
+
+df = qp.evaluation.artificial_sampling_report(
+    pacc,  # the quantification method
+    test,  # the test set on which the method will be evaluated
+    sample_size=qp.environ['SAMPLE_SIZE'],  #indicates the size of samples to be drawn
+    n_prevpoints=11,  # how many prevalence points will be extracted from the interval [0, 1] for each category
+    n_repetitions=1,  # number of times each prevalence will be used to generate a test sample
+    n_jobs=-1,  # indicates the number of parallel workers (-1 indicates, as in sklearn, all CPUs)
+    random_seed=42,  # setting a random seed allows to replicate the test samples across runs
+    error_metrics=['mae', 'mrae', 'mkld'],  # specify the evaluation metrics
+    verbose=True  # set to True to show some standard-line outputs
+)
+
+
+

The resulting report is a pandas’ dataframe that can be directly printed. +Here, we set some display options from pandas just to make the output clearer; +note also that the estimated prevalences are shown as strings using the +function strprev function that simply converts a prevalence into a +string representing it, with a fixed decimal precision (default 3):

+
import pandas as pd
+pd.set_option('display.expand_frame_repr', False)
+pd.set_option("precision", 3)
+df['estim-prev'] = df['estim-prev'].map(F.strprev)
+print(df)
+
+
+

The output should look like:

+
     true-prev      estim-prev    mae    mrae       mkld
+0   [0.0, 1.0]  [0.000, 1.000]  0.000   0.000  0.000e+00
+1   [0.1, 0.9]  [0.091, 0.909]  0.009   0.048  4.426e-04
+2   [0.2, 0.8]  [0.163, 0.837]  0.037   0.114  4.633e-03
+3   [0.3, 0.7]  [0.283, 0.717]  0.017   0.041  7.383e-04
+4   [0.4, 0.6]  [0.366, 0.634]  0.034   0.070  2.412e-03
+5   [0.5, 0.5]  [0.459, 0.541]  0.041   0.082  3.387e-03
+6   [0.6, 0.4]  [0.565, 0.435]  0.035   0.073  2.535e-03
+7   [0.7, 0.3]  [0.654, 0.346]  0.046   0.108  4.701e-03
+8   [0.8, 0.2]  [0.725, 0.275]  0.075   0.235  1.515e-02
+9   [0.9, 0.1]  [0.858, 0.142]  0.042   0.229  7.740e-03
+10  [1.0, 0.0]  [0.945, 0.055]  0.055  27.357  5.219e-02
+
+
+

One can get the averaged scores using standard pandas’ +functions, i.e.:

+
print(df.mean())
+
+
+

will produce the following output:

+
true-prev    0.500
+mae          0.035
+mrae         2.578
+mkld         0.009
+dtype: float64
+
+
+

Other evaluation functions include:

+
    +
  • artificial_sampling_eval: that computes the evaluation for a +given evaluation metric, returning the average instead of a dataframe.

  • +
  • artificial_sampling_prediction: that returns two np.arrays containing the +true prevalences and the estimated prevalences.

  • +
+

See the documentation for further details.

+
+
+ + +
+
+
+
+ +
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/Installation.html b/docs/build/html/Installation.html new file mode 100644 index 0000000..db80c8d --- /dev/null +++ b/docs/build/html/Installation.html @@ -0,0 +1,169 @@ + + + + + + + + + Installation — QuaPy 0.1.6 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Installation

+

QuaPy can be easily installed via pip

+
pip install quapy
+
+
+

See pip page for older versions.

+
+

Requirements

+
    +
  • scikit-learn, numpy, scipy

  • +
  • pytorch (for QuaNet)

  • +
  • svmperf patched for quantification (see below)

  • +
  • joblib

  • +
  • tqdm

  • +
  • pandas, xlrd

  • +
  • matplotlib

  • +
+
+
+

SVM-perf with quantification-oriented losses

+

In order to run experiments involving SVM(Q), SVM(KLD), SVM(NKLD), +SVM(AE), or SVM(RAE), you have to first download the +svmperf +package, apply the patch +svm-perf-quantification-ext.patch, +and compile the sources. +The script +prepare_svmperf.sh, +does all the job. Simply run:

+
./prepare_svmperf.sh
+
+
+

The resulting directory ./svm_perf_quantification contains the +patched version of svmperf with quantification-oriented losses.

+

The +svm-perf-quantification-ext.patch +is an extension of the patch made available by +Esuli et al. 2015 +that allows SVMperf to optimize for +the Q measure as proposed by +Barranquero et al. 2015 +and for the KLD and NKLD as proposed by +Esuli et al. 2015 +for quantification. +This patch extends the former by also allowing SVMperf to optimize for +AE and RAE.

+
+
+ + +
+
+
+
+ +
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/Methods.html b/docs/build/html/Methods.html new file mode 100644 index 0000000..dea3f0a --- /dev/null +++ b/docs/build/html/Methods.html @@ -0,0 +1,509 @@ + + + + + + + + + Quantification Methods — QuaPy 0.1.6 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Quantification Methods

+

Quantification methods can be categorized as belonging to +aggregative and non-aggregative groups. +Most methods included in QuaPy at the moment are of type aggregative +(though we plan to add many more methods in the near future), i.e., +are methods characterized by the fact that +quantification is performed as an aggregation function of the individual +products of classification.

+

Any quantifier in QuaPy shoud extend the class BaseQuantifier, +and implement some abstract methods:

+
    @abstractmethod
+    def fit(self, data: LabelledCollection): ...
+
+    @abstractmethod
+    def quantify(self, instances): ...
+
+    @abstractmethod
+    def set_params(self, **parameters): ...
+
+    @abstractmethod
+    def get_params(self, deep=True): ...
+
+
+

The meaning of those functions should be familiar to those +used to work with scikit-learn since the class structure of QuaPy +is directly inspired by scikit-learn’s Estimators. Functions +fit and quantify are used to train the model and to provide +class estimations (the reason why +scikit-learn’ structure has not been adopted as is in QuaPy responds to +the fact that scikit-learn’s predict function is expected to return +one output for each input element –e.g., a predicted label for each +instance in a sample– while in quantification the output for a sample +is one single array of class prevalences), while functions set_params +and get_params allow a +model selector +to automate the process of hyperparameter search.

+
+

Aggregative Methods

+

All quantification methods are implemented as part of the +qp.method package. In particular, aggregative methods are defined in +qp.method.aggregative, and extend AggregativeQuantifier(BaseQuantifier). +The methods that any aggregative quantifier must implement are:

+
    @abstractmethod
+    def fit(self, data: LabelledCollection, fit_learner=True): ...
+
+    @abstractmethod
+    def aggregate(self, classif_predictions:np.ndarray): ...
+
+
+

since, as mentioned before, aggregative methods base their prediction on the +individual predictions of a classifier. Indeed, a default implementation +of BaseQuantifier.quantify is already provided, which looks like:

+
    def quantify(self, instances):
+    classif_predictions = self.preclassify(instances)
+    return self.aggregate(classif_predictions)
+
+
+

Aggregative quantifiers are expected to maintain a classifier (which is +accessed through the @property learner). This classifier is +given as input to the quantifier, and can be already fit +on external data (in which case, the fit_learner argument should +be set to False), or be fit by the quantifier’s fit (default).

+

Another class of aggregative methods are the probabilistic +aggregative methods, that should inherit from the abstract class +AggregativeProbabilisticQuantifier(AggregativeQuantifier). +The particularity of probabilistic aggregative methods (w.r.t. +non-probabilistic ones), is that the default quantifier is defined +in terms of the posterior probabilities returned by a probabilistic +classifier, and not by the crisp decisions of a hard classifier; i.e.:

+
    def quantify(self, instances):
+        classif_posteriors = self.posterior_probabilities(instances)
+        return self.aggregate(classif_posteriors)
+
+
+

One advantage of aggregative methods (either probabilistic or not) +is that the evaluation according to any sampling procedure (e.g., +the artificial sampling protocol) +can be achieved very efficiently, since the entire set can be pre-classified +once, and the quantification estimations for different samples can directly +reuse these predictions, without requiring to classify each element every time. +QuaPy leverages this property to speed-up any procedure having to do with +quantification over samples, as is customarily done in model selection or +in evaluation.

+
+

The Classify & Count variants

+

QuaPy implements the four CC variants, i.e.:

+
    +
  • CC (Classify & Count), the simplest aggregative quantifier; one that +simply relies on the label predictions of a classifier to deliver class estimates.

  • +
  • ACC (Adjusted Classify & Count), the adjusted variant of CC.

  • +
  • PCC (Probabilistic Classify & Count), the probabilistic variant of CC that +relies on the soft estimations (or posterior probabilities) returned by a (probabilistic) classifier.

  • +
  • PACC (Probabilistic Adjusted Classify & Count), the adjusted variant of PCC.

  • +
+

The following code serves as a complete example using CC equipped +with a SVM as the classifier:

+
import quapy as qp
+import quapy.functional as F
+from sklearn.svm import LinearSVC
+
+dataset = qp.datasets.fetch_twitter('hcr', pickle=True)
+training = dataset.training
+test = dataset.test
+
+# instantiate a classifier learner, in this case a SVM
+svm = LinearSVC()
+
+# instantiate a Classify & Count with the SVM
+# (an alias is available in qp.method.aggregative.ClassifyAndCount)
+model = qp.method.aggregative.CC(svm)
+model.fit(training)
+estim_prevalence = model.quantify(test.instances)
+
+
+

The same code could be used to instantiate an ACC, by simply replacing +the instantiation of the model with:

+
model = qp.method.aggregative.ACC(svm)
+
+
+

Note that the adjusted variants (ACC and PACC) need to estimate +some parameters for performing the adjustment (e.g., the +true positive rate and the false positive rate in case of +binary classification) that are estimated on a validation split +of the labelled set. In this case, the init method of +ACC defines an additional parameter, val_split which, by +default, is set to 0.4 and so, the 40% of the labelled data +will be used for estimating the parameters for adjusting the +predictions. This parameters can also be set with an integer, +indicating that the parameters should be estimated by means of +k-fold cross-validation, for which the integer indicates the +number k of folds. Finally, val_split can be set to a +specific held-out validation set (i.e., an instance of LabelledCollection).

+

The specification of val_split can be +postponed to the invokation of the fit method (if val_split was also +set in the constructor, the one specified at fit time would prevail), +e.g.:

+
model = qp.method.aggregative.ACC(svm)
+# perform 5-fold cross validation for estimating ACC's parameters
+# (overrides the default val_split=0.4 in the constructor)
+model.fit(training, val_split=5)
+
+
+

The following code illustrates the case in which PCC is used:

+
model = qp.method.aggregative.PCC(svm)
+model.fit(training)
+estim_prevalence = model.quantify(test.instances)
+print('classifier:', model.learner)
+
+
+

In this case, QuaPy will print:

+
The learner LinearSVC does not seem to be probabilistic. The learner will be calibrated.
+classifier: CalibratedClassifierCV(base_estimator=LinearSVC(), cv=5)
+
+
+

The first output indicates that the learner (LinearSVC in this case) +is not a probabilistic classifier (i.e., it does not implement the +predict_proba method) and so, the classifier will be converted to +a probabilistic one through calibration. +As a result, the classifier that is printed in the second line points +to a CalibratedClassifier instance. Note that calibration can only +be applied to hard classifiers when fit_learner=True; an exception +will be raised otherwise.

+

Lastly, everything we said aboud ACC and PCC +applies to PACC as well.

+
+
+

Expectation Maximization (EMQ)

+

The Expectation Maximization Quantifier (EMQ), also known as +the SLD, is available at qp.method.aggregative.EMQ or via the +alias qp.method.aggregative.ExpectationMaximizationQuantifier. +The method is described in:

+

Saerens, M., Latinne, P., and Decaestecker, C. (2002). Adjusting the outputs of a classifier +to new a priori probabilities: A simple procedure. Neural Computation, 14(1):21–41.

+

EMQ works with a probabilistic classifier (if the classifier +given as input is a hard one, a calibration will be attempted). +Although this method was originally proposed for improving the +posterior probabilities of a probabilistic classifier, and not +for improving the estimation of prior probabilities, EMQ ranks +almost always among the most effective quantifiers in the +experiments we have carried out.

+

An example of use can be found below:

+
import quapy as qp
+from sklearn.linear_model import LogisticRegression
+
+dataset = qp.datasets.fetch_twitter('hcr', pickle=True)
+
+model = qp.method.aggregative.EMQ(LogisticRegression())
+model.fit(dataset.training)
+estim_prevalence = model.quantify(dataset.test.instances)
+
+
+
+
+

Hellinger Distance y (HDy)

+

The method HDy is described in:

+

Implementation of the method based on the Hellinger Distance y (HDy) proposed by +González-Castro, V., Alaiz-Rodrı́guez, R., and Alegre, E. (2013). Class distribution +estimation based on the Hellinger distance. Information Sciences, 218:146–164.

+

It is implemented in qp.method.aggregative.HDy (also accessible +through the allias qp.method.aggregative.HellingerDistanceY). +This method works with a probabilistic classifier (hard classifiers +can be used as well and will be calibrated) and requires a validation +set to estimate parameter for the mixture model. Just like +ACC and PACC, this quantifier receives a val_split argument +in the constructor (or in the fit method, in which case the previous +value is overridden) that can either be a float indicating the proportion +of training data to be taken as the validation set (in a random +stratified split), or a validation set (i.e., an instance of +LabelledCollection) itself.

+

HDy was proposed as a binary classifier and the implementation +provided in QuaPy accepts only binary datasets.

+

The following code shows an example of use:

+
import quapy as qp
+from sklearn.linear_model import LogisticRegression
+
+# load a binary dataset
+dataset = qp.datasets.fetch_reviews('hp', pickle=True)
+qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True)
+
+model = qp.method.aggregative.HDy(LogisticRegression())
+model.fit(dataset.training)
+estim_prevalence = model.quantify(dataset.test.instances)
+
+
+
+
+

Explicit Loss Minimization

+

The Explicit Loss Minimization (ELM) represent a family of methods +based on structured output learning, i.e., quantifiers relying on +classifiers that have been optimized targeting a +quantification-oriented evaluation measure.

+

In QuaPy, the following methods, all relying on Joachim’s +SVMperf +implementation, are available in qp.method.aggregative:

+
    +
  • SVMQ (SVM-Q) is a quantification method optimizing the metric Q defined +in Barranquero, J., Díez, J., and del Coz, J. J. (2015). Quantification-oriented learning based +on reliable classifiers. Pattern Recognition, 48(2):591–604.

  • +
  • SVMKLD (SVM for Kullback-Leibler Divergence) proposed in Esuli, A. and Sebastiani, F. (2015). +Optimizing text quantifiers for multivariate loss functions. +ACM Transactions on Knowledge Discovery and Data, 9(4):Article 27.

  • +
  • SVMNKLD (SVM for Normalized Kullback-Leibler Divergence) proposed in Esuli, A. and Sebastiani, F. (2015). +Optimizing text quantifiers for multivariate loss functions. +ACM Transactions on Knowledge Discovery and Data, 9(4):Article 27.

  • +
  • SVMAE (SVM for Mean Absolute Error)

  • +
  • SVMRAE (SVM for Mean Relative Absolute Error)

  • +
+

the last two methods (SVMAE and SVMRAE) have been implemented in +QuaPy in order to make available ELM variants for what nowadays +are considered the most well-behaved evaluation metrics in quantification.

+

In order to make these models work, you would need to run the script +prepare_svmperf.sh (distributed along with QuaPy) that +downloads SVMperf’ source code, applies a patch that +implements the quantification oriented losses, and compiles the +sources.

+

If you want to add any custom loss, you would need to modify +the source code of SVMperf in order to implement it, and +assign a valid loss code to it. Then you must re-compile +the whole thing and instantiate the quantifier in QuaPy +as follows:

+
# you can either set the path to your custom svm_perf_quantification implementation
+# in the environment variable, or as an argument to the constructor of ELM
+qp.environ['SVMPERF_HOME'] = './path/to/svm_perf_quantification'
+
+# assign an alias to your custom loss and the id you have assigned to it
+svmperf = qp.classification.svmperf.SVMperf
+svmperf.valid_losses['mycustomloss'] = 28
+
+# instantiate the ELM method indicating the loss
+model = qp.method.aggregative.ELM(loss='mycustomloss')
+
+
+

All ELM are binary quantifiers since they rely on SVMperf, that +currently supports only binary classification. +ELM variants (any binary quantifier in general) can be extended +to operate in single-label scenarios trivially by adopting a +“one-vs-all” strategy (as, e.g., in +Gao, W. and Sebastiani, F. (2016). From classification to quantification in tweet sentiment +analysis. Social Network Analysis and Mining, 6(19):1–22). +In QuaPy this is possible by using the OneVsAll class:

+
import quapy as qp
+from quapy.method.aggregative import SVMQ, OneVsAll
+
+# load a single-label dataset (this one contains 3 classes)
+dataset = qp.datasets.fetch_twitter('hcr', pickle=True)
+
+# let qp know where svmperf is
+qp.environ['SVMPERF_HOME'] = '../svm_perf_quantification'
+
+model = OneVsAll(SVMQ(), n_jobs=-1)  # run them on parallel
+model.fit(dataset.training)
+estim_prevalence = model.quantify(dataset.test.instances)
+
+
+
+
+
+

Meta Models

+

By meta models we mean quantification methods that are defined on top of other +quantification methods, and that thus do not squarely belong to the aggregative nor +the non-aggregative group (indeed, meta models could use quantifiers from any of those +groups). +Meta models are implemented in the qp.method.meta module.

+
+

Ensembles

+

QuaPy implements (some of) the variants proposed in:

+
    +
  • Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017). +Using ensembles for problems with characterizable changes in data distribution: A case study on quantification. +Information Fusion, 34, 87-100.

  • +
  • Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019). +Dynamic ensemble selection for quantification tasks. +Information Fusion, 45, 1-15.

  • +
+

The following code shows how to instantiate an Ensemble of 30 Adjusted Classify & Count (ACC) +quantifiers operating with a Logistic Regressor (LR) as the base classifier, and using the +average as the aggregation policy (see the original article for further details). +The last parameter indicates to use all processors for parallelization.

+
import quapy as qp
+from quapy.method.aggregative import ACC
+from quapy.method.meta import Ensemble
+from sklearn.linear_model import LogisticRegression
+
+dataset = qp.datasets.fetch_UCIDataset('haberman')
+
+model = Ensemble(quantifier=ACC(LogisticRegression()), size=30, policy='ave', n_jobs=-1)
+model.fit(dataset.training)
+estim_prevalence = model.quantify(dataset.test.instances)
+
+
+

Other aggregation policies implemented in QuaPy include:

+
    +
  • ‘ptr’ for applying a dynamic selection based on the training prevalence of the ensemble’s members

  • +
  • ‘ds’ for applying a dynamic selection based on the Hellinger Distance

  • +
  • any valid quantification measure (e.g., ‘mse’) for performing a static selection based on +the performance estimated for each member of the ensemble in terms of that evaluation metric.

  • +
+

When using any of the above options, it is important to set the red_size parameter, which +informs of the number of members to retain.

+

Please, check the model selection +wiki if you want to optimize the hyperparameters of ensemble for classification or quantification.

+
+
+

The QuaNet neural network

+

QuaPy offers an implementation of QuaNet, a deep learning model presented in:

+

Esuli, A., Moreo, A., & Sebastiani, F. (2018, October). +A recurrent neural network for sentiment quantification. +In Proceedings of the 27th ACM International Conference on +Information and Knowledge Management (pp. 1775-1778).

+

This model requires torch to be installed. +QuaNet also requires a classifier that can provide embedded representations +of the inputs. +In the original paper, QuaNet was tested using an LSTM as the base classifier. +In the following example, we show an instantiation of QuaNet that instead uses CNN as a probabilistic classifier, taking its last layer representation as the document embedding:

+
import quapy as qp
+from quapy.method.meta import QuaNet
+from quapy.classification.neural import NeuralClassifierTrainer, CNNnet
+
+# use samples of 100 elements
+qp.environ['SAMPLE_SIZE'] = 100
+
+# load the kindle dataset as text, and convert words to numerical indexes
+dataset = qp.datasets.fetch_reviews('kindle', pickle=True)
+qp.data.preprocessing.index(dataset, min_df=5, inplace=True)
+
+# the text classifier is a CNN trained by NeuralClassifierTrainer
+cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)
+learner = NeuralClassifierTrainer(cnn, device='cuda')
+
+# train QuaNet
+model = QuaNet(learner, qp.environ['SAMPLE_SIZE'], device='cuda')
+model.fit(dataset.training)
+estim_prevalence = model.quantify(dataset.test.instances)
+
+
+
+
+
+ + +
+
+
+
+ +
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/Model-Selection.html b/docs/build/html/Model-Selection.html new file mode 100644 index 0000000..3d1c9d3 --- /dev/null +++ b/docs/build/html/Model-Selection.html @@ -0,0 +1,246 @@ + + + + + + + + + Model Selection — QuaPy 0.1.6 documentation + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Model Selection

+

As a supervised machine learning task, quantification methods +can strongly depend on a good choice of model hyper-parameters. +The process whereby those hyper-parameters are chosen is +typically known as Model Selection, and typically consists of +testing different settings and picking the one that performed +best in a held-out validation set in terms of any given +evaluation measure.

+
+

Targeting a Quantification-oriented loss

+

The task being optimized determines the evaluation protocol, +i.e., the criteria according to which the performance of +any given method for solving is to be assessed. +As a task on its own right, quantification should impose +its own model selection strategies, i.e., strategies +aimed at finding appropriate configurations +specifically designed for the task of quantification.

+

Quantification has long been regarded as an add-on of +classification, and thus the model selection strategies +customarily adopted in classification have simply been +applied to quantification (see the next section). +It has been argued in Moreo, Alejandro, and Fabrizio Sebastiani. +“Re-Assessing the” Classify and Count” Quantification Method.” +arXiv preprint arXiv:2011.02552 (2020). +that specific model selection strategies should +be adopted for quantification. That is, model selection +strategies for quantification should target +quantification-oriented losses and be tested in a variety +of scenarios exhibiting different degrees of prior +probability shift.

+

The class +qp.model_selection.GridSearchQ +implements a grid-search exploration over the space of +hyper-parameter combinations that evaluates each
+combination of hyper-parameters +by means of a given quantification-oriented +error metric (e.g., any of the error functions implemented +in qp.error) and according to the +artificial sampling protocol.

+

The following is an example of model selection for quantification:

+
import quapy as qp
+from quapy.method.aggregative import PCC
+from sklearn.linear_model import LogisticRegression
+import numpy as np
+
+# set a seed to replicate runs
+np.random.seed(0)
+qp.environ['SAMPLE_SIZE'] = 500
+
+dataset = qp.datasets.fetch_reviews('hp', tfidf=True, min_df=5)
+
+# The model will be returned by the fit method of GridSearchQ.
+# Model selection will be performed with a fixed budget of 1000 evaluations
+# for each hyper-parameter combination. The error to optimize is the MAE for
+# quantification, as evaluated on artificially drawn samples at prevalences 
+# covering the entire spectrum on a held-out portion (40%) of the training set.
+model = qp.model_selection.GridSearchQ(
+    model=PCC(LogisticRegression()),
+    param_grid={'C': np.logspace(-4,5,10), 'class_weight': ['balanced', None]},
+    sample_size=qp.environ['SAMPLE_SIZE'],
+    eval_budget=1000,
+    error='mae',
+    refit=True,  # retrain on the whole labelled set
+    val_split=0.4,
+    verbose=True  # show information as the process goes on
+).fit(dataset.training)
+
+print(f'model selection ended: best hyper-parameters={model.best_params_}')
+model = model.best_model_
+
+# evaluation in terms of MAE
+results = qp.evaluation.artificial_sampling_eval(
+    model,
+    dataset.test,
+    sample_size=qp.environ['SAMPLE_SIZE'],
+    n_prevpoints=101,
+    n_repetitions=10,
+    error_metric='mae'
+)
+
+print(f'MAE={results:.5f}')
+
+
+

In this example, the system outputs:

+
[GridSearchQ]: starting optimization with n_jobs=1
+[GridSearchQ]: checking hyperparams={'C': 0.0001, 'class_weight': 'balanced'} got mae score 0.24987
+[GridSearchQ]: checking hyperparams={'C': 0.0001, 'class_weight': None} got mae score 0.48135
+[GridSearchQ]: checking hyperparams={'C': 0.001, 'class_weight': 'balanced'} got mae score 0.24866
+[...]
+[GridSearchQ]: checking hyperparams={'C': 100000.0, 'class_weight': None} got mae score 0.43676
+[GridSearchQ]: optimization finished: best params {'C': 0.1, 'class_weight': 'balanced'} (score=0.19982)
+[GridSearchQ]: refitting on the whole development set
+model selection ended: best hyper-parameters={'C': 0.1, 'class_weight': 'balanced'}
+1010 evaluations will be performed for each combination of hyper-parameters
+[artificial sampling protocol] generating predictions: 100%|██████████| 1010/1010 [00:00<00:00, 5005.54it/s]
+MAE=0.20342
+
+
+

The parameter val_split can alternatively be used to indicate +a validation set (i.e., an instance of LabelledCollection) instead +of a proportion. This could be useful if one wants to have control +on the specific data split to be used across different model selection +experiments.

+
+
+

Targeting a Classification-oriented loss

+

Optimizing a model for quantification could rather be +computationally costly. +In aggregative methods, one could alternatively try to optimize +the classifier’s hyper-parameters for classification. +Although this is theoretically suboptimal, many articles in +quantification literature have opted for this strategy.

+

In QuaPy, this is achieved by simply instantiating the +classifier learner as a GridSearchCV from scikit-learn. +The following code illustrates how to do that:

+
learner = GridSearchCV(
+    LogisticRegression(),
+    param_grid={'C': np.logspace(-4, 5, 10), 'class_weight': ['balanced', None]},
+    cv=5)
+model = PCC(learner).fit(dataset.training)
+print(f'model selection ended: best hyper-parameters={model.learner.best_params_}')
+
+
+

In this example, the system outputs:

+
model selection ended: best hyper-parameters={'C': 10000.0, 'class_weight': None}
+1010 evaluations will be performed for each combination of hyper-parameters
+[artificial sampling protocol] generating predictions: 100%|██████████| 1010/1010 [00:00<00:00, 5379.55it/s]
+MAE=0.41734
+
+
+

Note that the MAE is worse than the one we obtained when optimizing +for quantification and, indeed, the hyper-parameters found optimal +largely differ between the two selection modalities. The +hyper-parameters C=10000 and class_weight=None have been found +to work well for the specific training prevalence of the HP dataset, +but these hyper-parameters turned out to be suboptimal when the +class prevalences of the test set differs (as is indeed tested +in scenarios of quantification).

+

This is, however, not always the case, and one could, in practice, +find examples +in which optimizing for classification ends up resulting in a better +quantifier than when optimizing for quantification. +Nonetheless, this is theoretically unlikely to happen.

+
+
+ + +
+
+
+
+ +
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/Plotting.html b/docs/build/html/Plotting.html new file mode 100644 index 0000000..19c7070 --- /dev/null +++ b/docs/build/html/Plotting.html @@ -0,0 +1,344 @@ + + + + + + + + + Plotting — QuaPy 0.1.6 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Plotting

+

The module qp.plot implements some basic plotting functions +that can help analyse the performance of a quantification method.

+

All plotting functions receive as inputs the outcomes of +some experiments and include, for each experiment, +the following three main arguments:

+
    +
  • method_names a list containing the names of the quantification methods

  • +
  • true_prevs a list containing matrices of true prevalences

  • +
  • estim_prevs a list containing matrices of estimated prevalences +(should be of the same shape as the corresponding matrix in true_prevs)

  • +
+

Note that a method (as indicated by a name in method_names) can +appear more than once. This could occur when various datasets are +involved in the experiments. In this case, all experiments for the +method will be merged and the plot will represent the method’s +performance across various datasets.

+

This is a very simple example of a valid input for the plotting functions:

+
method_names = ['classify & count', 'EMQ', 'classify & count']
+true_prevs = [
+    np.array([[0.5, 0.5], [0.25, 0.75]]),
+    np.array([[0.0, 1.0], [0.25, 0.75], [0.0, 0.1]]),
+    np.array([[0.0, 1.0], [0.25, 0.75], [0.0, 0.1]]),
+]
+estim_prevs = [
+    np.array([[0.45, 0.55], [0.6, 0.4]]),
+    np.array([[0.0, 1.0], [0.5, 0.5], [0.2, 0.8]]),
+    np.array([[0.1, 0.9], [0.3, 0.7], [0.0, 0.1]]),
+]
+
+
+

in which the classify & count has been tested in two datasets and +the EMQ method has been tested only in one dataset. For the first +experiment, only two (binary) quantifications have been tested, +while for the second and third experiments three instances have +been tested.

+

In general, we would like to test the performance of the +quantification methods across different scenarios showcasing +the accuracy of the quantifier in predicting class prevalences +for a wide range of prior distributions. This can easily be +achieved by means of the +artificial sampling protocol +that is implemented in QuaPy.

+

The following code shows how to perform one simple experiment +in which the 4 CC-variants, all equipped with a linear SVM, are +applied to one binary dataset of reviews about Kindle devices and +tested across the entire spectrum of class priors (taking 21 splits +of the interval [0,1], i.e., using prevalence steps of 0.05, and +generating 100 random samples at each prevalence).

+
import quapy as qp
+from quapy.method.aggregative import CC, ACC, PCC, PACC
+from sklearn.svm import LinearSVC
+
+qp.environ['SAMPLE_SIZE'] = 500
+
+def gen_data():
+
+    def base_classifier():
+        return LinearSVC()
+
+    def models():
+        yield CC(base_classifier())
+        yield ACC(base_classifier())
+        yield PCC(base_classifier())
+        yield PACC(base_classifier())
+
+    data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)
+
+    method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
+
+    for model in models():
+        model.fit(data.training)
+        true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction(
+            model, data.test, qp.environ['SAMPLE_SIZE'], n_repetitions=100, n_prevpoints=21
+        )
+
+        method_names.append(model.__class__.__name__)
+        true_prevs.append(true_prev)
+        estim_prevs.append(estim_prev)
+        tr_prevs.append(data.training.prevalence())
+
+    return method_names, true_prevs, estim_prevs, tr_prevs
+
+method_names, true_prevs, estim_prevs, tr_prevs = gen_data()
+
+
+

the plots that can be generated are explained below.

+
+

Diagonal Plot

+

The diagonal plot shows a very insightful view of the +quantifier’s performance. It plots the predicted class +prevalence (in the y-axis) against the true class prevalence +(in the x-axis). Unfortunately, it is limited to binary quantification, +although one can simply generate as many diagonal plots as +classes there are by indicating which class should be considered +the target of the plot.

+

The following call will produce the plot:

+
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, train_prev=tr_prevs[0], savepath='./plots/bin_diag.png')
+
+
+

the last argument is optional, and indicates the path where to save +the plot (the file extension will determine the format – typical extensions +are ‘.png’ or ‘.pdf’). If this path is not provided, then the plot +will be shown but not saved. +The resulting plot should look like:

+

diagonal plot on Kindle

+

Note that in this case, we are also indicating the training +prevalence, which is plotted in the diagonal a as cyan dot. +The color bands indicate the standard deviations of the predictions, +and can be hidden by setting the argument show_std=False (see +the complete list of arguments in the documentation).

+

Finally, note how most quantifiers, and specially the “unadjusted” +variants CC and PCC, are strongly biased towards the +prevalence seen during training.

+
+
+

Quantification bias

+

This plot aims at evincing the bias that any quantifier +displays with respect to the training prevalences by +means of box plots. +This plot can be generated by:

+
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, savepath='./plots/bin_bias.png')
+
+
+

and should look like:

+

bias plot on Kindle

+

The box plots show some interesting facts:

+
    +
  • all methods are biased towards the training prevalence but specially +so CC and PCC (an unbiased quantifier would have a box centered at 0)

  • +
  • the bias is always positive, indicating that all methods tend to +overestimate the positive class prevalence

  • +
  • CC and PCC have high variability while ACC and specially PACC exhibit +lower variability.

  • +
+

Again, these plots could be generated for experiments ranging across +different datasets, and the plot will merge all data accordingly.

+

Another illustrative example can be shown that consists of +training different CC quantifiers trained at different +(artificially sampled) training prevalences. +For this example, we generate training samples of 5000 +documents containing 10%, 20%, …, 90% of positives from the +IMDb dataset, and generate the bias plot again. +This example can be run by rewritting the gen_data() function +like this:

+
def gen_data():
+    data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5)
+    model = CC(LinearSVC())
+
+    method_data = []
+    for training_prevalence in np.linspace(0.1, 0.9, 9):
+        training_size = 5000
+        # since the problem is binary, it suffices to specify the negative prevalence (the positive is constrained)
+        training = data.training.sampling(training_size, 1 - training_prevalence)
+        model.fit(training)
+        true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction(
+            model, data.sample, qp.environ['SAMPLE_SIZE'], n_repetitions=100, n_prevpoints=21
+        )
+        # method names can contain Latex syntax
+        method_name = 'CC$_{' + f'{int(100 * training_prevalence)}' + '\%}$'
+        method_data.append((method_name, true_prev, estim_prev, training.prevalence()))
+
+    return zip(*method_data)
+
+
+

and the plot should now look like:

+

bias plot on IMDb

+

which clearly shows a negative bias for CC variants trained on +data containing more negatives (i.e., < 50%) and positive biases +in cases containing more positives (i.e., >50%). The CC trained +at 50% behaves as an unbiased estimator of the positive class +prevalence.

+

The function qp.plot.binary_bias_bins allows the user to +generate box plots broken down by bins of true test prevalence. +To this aim, an argument nbins is passed which indicates +how many isometric subintervals to take. For example +the following plot is produced for nbins=3:

+

bias plot on IMDb

+

Interestingly enough, the seemingly unbiased estimator (CC at 50%) happens to display +a positive bias (or a tendency to overestimate) in cases of low prevalence +(i.e., when the true prevalence of the positive class is below 33%), +and a negative bias (or a tendency to underestimate) in cases of high prevalence +(i.e., when the true prevalence is beyond 67%).

+

Out of curiosity, the diagonal plot for this experiment looks like:

+

diag plot on IMDb

+

showing pretty clearly the dependency of CC on the prior probabilities +of the labeled set it was trained on.

+
+
+

Error by Drift

+

Above discussed plots are useful for analyzing and comparing +the performance of different quantification methods, but are +limited to the binary case. The “error by drift” is a plot +that shows the error in predictions as a function of the +(prior probability) drift between each test sample and the +training set. Interestingly, the error and drift can both be measured +in terms of any evaluation measure for quantification (like the +ones available in qp.error) and can thus be computed +irrespectively of the number of classes.

+

The following shows how to generate the plot for the 4 CC variants, +using 10 bins for the drift +and absolute error as the measure of the error (the +drift in the x-axis is always computed in terms of absolute error since +other errors are harder to interpret):

+
qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, 
+    error_name='ae', n_bins=10, savepath='./plots/err_drift.png')
+
+
+

diag plot on IMDb

+

Note that all methods work reasonably well in cases of low prevalence +drift (i.e., any CC-variant is a good quantifier whenever the IID +assumption is approximately preserved). The higher the drift, the worse +those quantifiers tend to perform, although it is clear that PACC +yields the lowest error for the most difficult cases.

+

Remember that any plot can be generated across many datasets, and +that this would probably result in a more solid comparison. +In those cases, however, it is likely that the variances of each +method get higher, to the detriment of the visualization. +We recommend to set show_std=False in those cases +in order to hide the color bands.

+
+
+ + +
+
+
+
+ +
+
+ + + + \ No newline at end of file diff --git a/docs/build/html/_images/bin_bias.png b/docs/build/html/_images/bin_bias.png new file mode 100644 index 0000000..572dae4 Binary files /dev/null and b/docs/build/html/_images/bin_bias.png differ diff --git a/docs/build/html/_images/bin_bias_bin_cc.png b/docs/build/html/_images/bin_bias_bin_cc.png new file mode 100644 index 0000000..db34c76 Binary files /dev/null and b/docs/build/html/_images/bin_bias_bin_cc.png differ diff --git a/docs/build/html/_images/bin_bias_cc.png b/docs/build/html/_images/bin_bias_cc.png new file mode 100644 index 0000000..db91dd4 Binary files /dev/null and b/docs/build/html/_images/bin_bias_cc.png differ diff --git a/docs/build/html/_images/bin_diag.png b/docs/build/html/_images/bin_diag.png new file mode 100644 index 0000000..7ded71a Binary files /dev/null and b/docs/build/html/_images/bin_diag.png differ diff --git a/docs/build/html/_images/bin_diag_cc.png b/docs/build/html/_images/bin_diag_cc.png new file mode 100644 index 0000000..01bb43d Binary files /dev/null and b/docs/build/html/_images/bin_diag_cc.png differ diff --git a/docs/build/html/_images/err_drift.png b/docs/build/html/_images/err_drift.png new file mode 100644 index 0000000..496b66c Binary files /dev/null and b/docs/build/html/_images/err_drift.png differ diff --git a/docs/build/html/_sources/Datasets.md.txt b/docs/build/html/_sources/Datasets.md.txt new file mode 100644 index 0000000..9632742 --- /dev/null +++ b/docs/build/html/_sources/Datasets.md.txt @@ -0,0 +1,332 @@ +# Datasets + +QuaPy makes available several datasets that have been used in +quantification literature, as well as an interface to allow +anyone import their custom datasets. + +A _Dataset_ object in QuaPy is roughly a pair of _LabelledCollection_ objects, +one playing the role of the training set, another the test set. +_LabelledCollection_ is a data class consisting of the (iterable) +instances and labels. This class handles most of the sampling functionality in QuaPy. +Take a look at the following code: + +```python +import quapy as qp +import quapy.functional as F + +instances = [ + '1st positive document', '2nd positive document', + 'the only negative document', + '1st neutral document', '2nd neutral document', '3rd neutral document' +] +labels = [2, 2, 0, 1, 1, 1] + +data = qp.data.LabelledCollection(instances, labels) +print(F.strprev(data.prevalence(), prec=2)) +``` + +Output the class prevalences (showing 2 digit precision): +``` +[0.17, 0.50, 0.33] +``` + +One can easily produce new samples at desired class prevalences: +```python +sample_size = 10 +prev = [0.4, 0.1, 0.5] +sample = data.sampling(sample_size, *prev) + +print('instances:', sample.instances) +print('labels:', sample.labels) +print('prevalence:', F.strprev(sample.prevalence(), prec=2)) +``` + +Which outputs: +``` +instances: ['the only negative document' '2nd positive document' + '2nd positive document' '2nd neutral document' '1st positive document' + 'the only negative document' 'the only negative document' + 'the only negative document' '2nd positive document' + '1st positive document'] +labels: [0 2 2 1 2 0 0 0 2 2] +prevalence: [0.40, 0.10, 0.50] +``` + +Samples can be made consistent across different runs (e.g., to test +different methods on the same exact samples) by sampling and retaining +the indexes, that can then be used to generate the sample: + +```python +index = data.sampling_index(sample_size, *prev) +for method in methods: + sample = data.sampling_from_index(index) + ... +``` + +QuaPy also implements the artificial sampling protocol that produces (via a +Python's generator) a series of _LabelledCollection_ objects with equidistant +prevalences ranging across the entire prevalence spectrum in the simplex space, e.g.: + +```python +for sample in data.artificial_sampling_generator(sample_size=100, n_prevalences=5): + print(F.strprev(sample.prevalence(), prec=2)) +``` + +produces one sampling for each (valid) combination of prevalences originating from +splitting the range [0,1] into n_prevalences=5 points (i.e., [0, 0.25, 0.5, 0.75, 1]), +that is: +``` +[0.00, 0.00, 1.00] +[0.00, 0.25, 0.75] +[0.00, 0.50, 0.50] +[0.00, 0.75, 0.25] +[0.00, 1.00, 0.00] +[0.25, 0.00, 0.75] +... +[1.00, 0.00, 0.00] +``` + +See the [Evaluation wiki](https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation) for +further details on how to use the artificial sampling protocol to properly +evaluate a quantification method. + + +## Reviews Datasets + +Three datasets of reviews about Kindle devices, Harry Potter's series, and +the well-known IMDb movie reviews can be fetched using a unified interface. +For example: + +```python +import quapy as qp +data = qp.datasets.fetch_reviews('kindle') +``` + +These datasets have been used in: +``` +Esuli, A., Moreo, A., & Sebastiani, F. (2018, October). +A recurrent neural network for sentiment quantification. +In Proceedings of the 27th ACM International Conference on +Information and Knowledge Management (pp. 1775-1778). +``` + +The list of reviews ids is available in: + +```python +qp.datasets.REVIEWS_SENTIMENT_DATASETS +``` + +Some statistics of the fhe available datasets are summarized below: + +| Dataset | classes | train size | test size | train prev | test prev | type | +|---|:---:|:---:|:---:|:---:|:---:|---| +| hp | 2 | 9533 | 18399 | [0.018, 0.982] | [0.065, 0.935] | text | +| kindle | 2 | 3821 | 21591 | [0.081, 0.919] | [0.063, 0.937] | text | +| imdb | 2 | 25000 | 25000 | [0.500, 0.500] | [0.500, 0.500] | text | + + +## Twitter Sentiment Datasets + +11 Twitter datasets for sentiment analysis. +Text is not accessible, and the documents were made available +in tf-idf format. Each dataset presents two splits: a train/val +split for model selection purposes, and a train+val/test split +for model evaluation. The following code exemplifies how to load +a twitter dataset for model selection. + +```python +import quapy as qp +data = qp.datasets.fetch_twitter('gasp', for_model_selection=True) +``` + +The datasets were used in: + +``` +Gao, W., & Sebastiani, F. (2015, August). +Tweet sentiment: From classification to quantification. +In 2015 IEEE/ACM International Conference on Advances in +Social Networks Analysis and Mining (ASONAM) (pp. 97-104). IEEE. +``` + +Three of the datasets (semeval13, semeval14, and semeval15) share the +same training set (semeval), meaning that the training split one would get +when requesting any of them is the same. The dataset "semeval" can only +be requested with "for_model_selection=True". +The lists of the Twitter dataset's ids can be consulted in: + +```python +# a list of 11 dataset ids that can be used for model selection or model evaluation +qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST + +# 9 dataset ids in which "semeval13", "semeval14", and "semeval15" are replaced with "semeval" +qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN +``` + +Some details can be found below: + +| Dataset | classes | train size | test size | features | train prev | test prev | type | +|---|:---:|:---:|:---:|:---:|:---:|:---:|---| +| gasp | 3 | 8788 | 3765 | 694582 | [0.421, 0.496, 0.082] | [0.407, 0.507, 0.086] | sparse | +| hcr | 3 | 1594 | 798 | 222046 | [0.546, 0.211, 0.243] | [0.640, 0.167, 0.193] | sparse | +| omd | 3 | 1839 | 787 | 199151 | [0.463, 0.271, 0.266] | [0.437, 0.283, 0.280] | sparse | +| sanders | 3 | 2155 | 923 | 229399 | [0.161, 0.691, 0.148] | [0.164, 0.688, 0.148] | sparse | +| semeval13 | 3 | 11338 | 3813 | 1215742 | [0.159, 0.470, 0.372] | [0.158, 0.430, 0.412] | sparse | +| semeval14 | 3 | 11338 | 1853 | 1215742 | [0.159, 0.470, 0.372] | [0.109, 0.361, 0.530] | sparse | +| semeval15 | 3 | 11338 | 2390 | 1215742 | [0.159, 0.470, 0.372] | [0.153, 0.413, 0.434] | sparse | +| semeval16 | 3 | 8000 | 2000 | 889504 | [0.157, 0.351, 0.492] | [0.163, 0.341, 0.497] | sparse | +| sst | 3 | 2971 | 1271 | 376132 | [0.261, 0.452, 0.288] | [0.207, 0.481, 0.312] | sparse | +| wa | 3 | 2184 | 936 | 248563 | [0.305, 0.414, 0.281] | [0.282, 0.446, 0.272] | sparse | +| wb | 3 | 4259 | 1823 | 404333 | [0.270, 0.392, 0.337] | [0.274, 0.392, 0.335] | sparse | +## UCI Machine Learning + +A set of 32 datasets from the [UCI Machine Learning repository](https://archive.ics.uci.edu/ml/datasets.php) +used in: + +``` +Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017). +Using ensembles for problems with characterizable changes +in data distribution: A case study on quantification. +Information Fusion, 34, 87-100. +``` + +The list does not exactly coincide with that used in Pérez-Gállego et al. 2017 +since we were unable to find the datasets with ids "diabetes" and "phoneme". + +These dataset can be loaded by calling, e.g.: + +```python +import quapy as qp +data = qp.datasets.fetch_UCIDataset('yeast', verbose=True) +``` + +This call will return a _Dataset_ object in which the training and +test splits are randomly drawn, in a stratified manner, from the whole +collection at 70% and 30%, respectively. The _verbose=True_ option indicates +that the dataset description should be printed in standard output. +The original data is not split, +and some papers submit the entire collection to a kFCV validation. +In order to accommodate with these practices, one could first instantiate +the entire collection, and then creating a generator that will return one +training+test dataset at a time, following a kFCV protocol: + +```python +import quapy as qp +collection = qp.datasets.fetch_UCILabelledCollection("yeast") +for data in qp.data.Dataset.kFCV(collection, nfolds=5, nrepeats=2): + ... +``` + +Above code will allow to conduct a 2x5FCV evaluation on the "yeast" dataset. + +All datasets come in numerical form (dense matrices); some statistics +are summarized below. + +| Dataset | classes | instances | features | prev | type | +|---|:---:|:---:|:---:|:---:|---| +| acute.a | 2 | 120 | 6 | [0.508, 0.492] | dense | +| acute.b | 2 | 120 | 6 | [0.583, 0.417] | dense | +| balance.1 | 2 | 625 | 4 | [0.539, 0.461] | dense | +| balance.2 | 2 | 625 | 4 | [0.922, 0.078] | dense | +| balance.3 | 2 | 625 | 4 | [0.539, 0.461] | dense | +| breast-cancer | 2 | 683 | 9 | [0.350, 0.650] | dense | +| cmc.1 | 2 | 1473 | 9 | [0.573, 0.427] | dense | +| cmc.2 | 2 | 1473 | 9 | [0.774, 0.226] | dense | +| cmc.3 | 2 | 1473 | 9 | [0.653, 0.347] | dense | +| ctg.1 | 2 | 2126 | 22 | [0.222, 0.778] | dense | +| ctg.2 | 2 | 2126 | 22 | [0.861, 0.139] | dense | +| ctg.3 | 2 | 2126 | 22 | [0.917, 0.083] | dense | +| german | 2 | 1000 | 24 | [0.300, 0.700] | dense | +| haberman | 2 | 306 | 3 | [0.735, 0.265] | dense | +| ionosphere | 2 | 351 | 34 | [0.641, 0.359] | dense | +| iris.1 | 2 | 150 | 4 | [0.667, 0.333] | dense | +| iris.2 | 2 | 150 | 4 | [0.667, 0.333] | dense | +| iris.3 | 2 | 150 | 4 | [0.667, 0.333] | dense | +| mammographic | 2 | 830 | 5 | [0.514, 0.486] | dense | +| pageblocks.5 | 2 | 5473 | 10 | [0.979, 0.021] | dense | +| semeion | 2 | 1593 | 256 | [0.901, 0.099] | dense | +| sonar | 2 | 208 | 60 | [0.534, 0.466] | dense | +| spambase | 2 | 4601 | 57 | [0.606, 0.394] | dense | +| spectf | 2 | 267 | 44 | [0.794, 0.206] | dense | +| tictactoe | 2 | 958 | 9 | [0.653, 0.347] | dense | +| transfusion | 2 | 748 | 4 | [0.762, 0.238] | dense | +| wdbc | 2 | 569 | 30 | [0.627, 0.373] | dense | +| wine.1 | 2 | 178 | 13 | [0.669, 0.331] | dense | +| wine.2 | 2 | 178 | 13 | [0.601, 0.399] | dense | +| wine.3 | 2 | 178 | 13 | [0.730, 0.270] | dense | +| wine-q-red | 2 | 1599 | 11 | [0.465, 0.535] | dense | +| wine-q-white | 2 | 4898 | 11 | [0.335, 0.665] | dense | +| yeast | 2 | 1484 | 8 | [0.711, 0.289] | dense | + +### Issues: +All datasets will be downloaded automatically the first time they are requested, and +stored in the _quapy_data_ folder for faster further reuse. +However, some datasets require special actions that at the moment are not fully +automated. + +* Datasets with ids "ctg.1", "ctg.2", and "ctg.3" (_Cardiotocography Data Set_) load +an Excel file, which requires the user to install the _xlrd_ Python module in order +to open it. +* The dataset with id "pageblocks.5" (_Page Blocks Classification (5)_) needs to +open a "unix compressed file" (extension .Z), which is not directly doable with +standard Pythons packages like gzip or zip. This file would need to be uncompressed using +OS-dependent software manually. Information on how to do it will be printed the first +time the dataset is invoked. + +## Adding Custom Datasets + +QuaPy provides data loaders for simple formats dealing with +text, following the format: + +``` +class-id \t first document's pre-processed text \n +class-id \t second document's pre-processed text \n +... +``` + +and sparse representations of the form: + +``` +{-1, 0, or +1} col(int):val(float) col(int):val(float) ... \n +... +``` + +The code in charge in loading a LabelledCollection is: + +```python +@classmethod +def load(cls, path:str, loader_func:callable): + return LabelledCollection(*loader_func(path)) +``` + +indicating that any _loader_func_ (e.g., a user-defined one) which +returns valid arguments for initializing a _LabelledCollection_ object will allow +to load any collection. In particular, the _LabelledCollection_ receives as +arguments the instances (as an iterable) and the labels (as an iterable) and, +additionally, the number of classes can be specified (it would otherwise be +inferred from the labels, but that requires at least one positive example for +all classes to be present in the collection). + +The same _loader_func_ can be passed to a Dataset, along with two +paths, in order to create a training and test pair of _LabelledCollection_, +e.g.: + +```python +import quapy as qp +train_path = '../my_data/train.dat' +test_path = '../my_data/test.dat' +def my_custom_loader(path): + with open(path, 'rb') as fin: + ... + return instances, labels +data = qp.data.Dataset.load(train_path, test_path, my_custom_loader) +``` + +### Data Processing + +QuaPy implements a number of preprocessing functions in the package _qp.data.preprocessing_, including: + +* _text2tfidf_: tfidf vectorization +* _reduce_columns_: reducing the number of columns based on term frequency +* _standardize_: transforms the column values into z-scores (i.e., subtract the mean and normalizes by the standard deviation, so +that the column values have zero mean and unit variance). +* _index_: transforms textual tokens into lists of numeric ids) diff --git a/docs/build/html/_sources/Evaluation.md.txt b/docs/build/html/_sources/Evaluation.md.txt new file mode 100644 index 0000000..13c15b9 --- /dev/null +++ b/docs/build/html/_sources/Evaluation.md.txt @@ -0,0 +1,232 @@ +# Evaluation + +Quantification is an appealing tool in scenarios of dataset shift, +and particularly in scenarios of prior-probability shift. +That is, the interest in estimating the class prevalences arises +under the belief that those class prevalences might have changed +with respect to the ones observed during training. +In other words, one could simply return the training prevalence +as a predictor of the test prevalence if this change is assumed +to be unlikely (as is the case in general scenarios of +machine learning governed by the iid assumption). +In brief, quantification requires dedicated evaluation protocols, +which are implemented in QuaPy and explained here. + +## Error Measures + +The module quapy.error implements the following error measures for quantification: +* _mae_: mean absolute error +* _mrae_: mean relative absolute error +* _mse_: mean squared error +* _mkld_: mean Kullback-Leibler Divergence +* _mnkld_: mean normalized Kullback-Leibler Divergence + +Functions _ae_, _rae_, _se_, _kld_, and _nkld_ are also available, +which return the individual errors (i.e., without averaging the whole). + +Some errors of classification are also available: +* _acce_: accuracy error (1-accuracy) +* _f1e_: F-1 score error (1-F1 score) + +The error functions implement the following interface, e.g.: + +```python +mae(true_prevs, prevs_hat) +``` + +in which the first argument is a ndarray containing the true +prevalences, and the second argument is another ndarray with +the estimations produced by some method. + +Some error functions, e.g., _mrae_, _mkld_, and _mnkld_, are +smoothed for numerical stability. In those cases, there is a +third argument, e.g.: + +```python +def mrae(true_prevs, prevs_hat, eps=None): ... +``` + +indicating the value for the smoothing parameter epsilon. +Traditionally, this value is set to 1/(2T) in past literature, +with T the sampling size. One could either pass this value +to the function each time, or to set a QuaPy's environment +variable _SAMPLE_SIZE_ once, and ommit this argument +thereafter (recommended); +e.g.: + +```python +qp.environ['SAMPLE_SIZE'] = 100 # once for all +true_prev = np.asarray([0.5, 0.3, 0.2]) # let's assume 3 classes +estim_prev = np.asarray([0.1, 0.3, 0.6]) +error = qp.ae_.mrae(true_prev, estim_prev) +print(f'mrae({true_prev}, {estim_prev}) = {error:.3f}') +``` + +will print: +``` +mrae([0.500, 0.300, 0.200], [0.100, 0.300, 0.600]) = 0.914 +``` + +Finally, it is possible to instantiate QuaPy's quantification +error functions from strings using, e.g.: + +```python +error_function = qp.ae_.from_name('mse') +error = error_function(true_prev, estim_prev) +``` + +## Evaluation Protocols + +QuaPy implements the so-called "artificial sampling protocol", +according to which a test set is used to generate samplings at +desired prevalences of fixed size and covering the full spectrum +of prevalences. This protocol is called "artificial" in contrast +to the "natural prevalence sampling" protocol that, +despite introducing some variability during sampling, approximately +preserves the training class prevalence. + +In the artificial sampling procol, the user specifies the number +of (equally distant) points to be generated from the interval [0,1]. + +For example, if n_prevpoints=11 then, for each class, the prevalences +[0., 0.1, 0.2, ..., 1.] will be used. This means that, for two classes, +the number of different prevalences will be 11 (since, once the prevalence +of one class is determined, the other one is constrained). For 3 classes, +the number of valid combinations can be obtained as 11 + 10 + ... + 1 = 66. +In general, the number of valid combinations that will be produced for a given +value of n_prevpoints can be consulted by invoking +quapy.functional.num_prevalence_combinations, e.g.: + +```python +import quapy.functional as F +n_prevpoints = 21 +n_classes = 4 +n = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repeats=1) +``` + +in this example, n=1771. Note the last argument, n_repeats, that +informs of the number of examples that will be generated for any +valid combination (typical values are, e.g., 1 for a single sample, +or 10 or higher for computing standard deviations of performing statistical +significance tests). + +One can instead work the other way around, i.e., one could set a +maximum budged of evaluations and get the number of prevalence points that +will generate a number of evaluations close, but not higher, than +the fixed budget. This can be achieved with the function +quapy.functional.get_nprevpoints_approximation, e.g.: + +```python +budget = 5000 +n_prevpoints = F.get_nprevpoints_approximation(budget, n_classes, n_repeats=1) +n = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repeats=1) +print(f'by setting n_prevpoints={n_prevpoints} the number of evaluations for {n_classes} classes will be {n}') +``` +that will print: +``` +by setting n_prevpoints=30 the number of evaluations for 4 classes will be 4960 +``` + +The cost of evaluation will depend on the values of _n_prevpoints_, _n_classes_, +and _n_repeats_. Since it might sometimes be cumbersome to control the overall +cost of an experiment having to do with the number of combinations that +will be generated for a particular setting of these arguments (particularly +when _n_classes>2_), evaluation functions +typically allow the user to rather specify an _evaluation budget_, i.e., a maximum +number of samplings to generate. By specifying this argument, one could avoid +specifying _n_prevpoints_, and the value for it that would lead to a closer +number of evaluation budget, without surpassing it, will be automatically set. + +The following script shows a full example in which a PACC model relying +on a Logistic Regressor classifier is +tested on the _kindle_ dataset by means of the artificial prevalence +sampling protocol on samples of size 500, in terms of various +evaluation metrics. + +````python +import quapy as qp +import quapy.functional as F +from sklearn.linear_model import LogisticRegression + +qp.environ['SAMPLE_SIZE'] = 500 + +dataset = qp.datasets.fetch_reviews('kindle') +qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True) + +training = dataset.training +test = dataset.test + +lr = LogisticRegression() +pacc = qp.method.aggregative.PACC(lr) + +pacc.fit(training) + +df = qp.evaluation.artificial_sampling_report( + pacc, # the quantification method + test, # the test set on which the method will be evaluated + sample_size=qp.environ['SAMPLE_SIZE'], #indicates the size of samples to be drawn + n_prevpoints=11, # how many prevalence points will be extracted from the interval [0, 1] for each category + n_repetitions=1, # number of times each prevalence will be used to generate a test sample + n_jobs=-1, # indicates the number of parallel workers (-1 indicates, as in sklearn, all CPUs) + random_seed=42, # setting a random seed allows to replicate the test samples across runs + error_metrics=['mae', 'mrae', 'mkld'], # specify the evaluation metrics + verbose=True # set to True to show some standard-line outputs +) +```` + +The resulting report is a pandas' dataframe that can be directly printed. +Here, we set some display options from pandas just to make the output clearer; +note also that the estimated prevalences are shown as strings using the +function strprev function that simply converts a prevalence into a +string representing it, with a fixed decimal precision (default 3): + +```python +import pandas as pd +pd.set_option('display.expand_frame_repr', False) +pd.set_option("precision", 3) +df['estim-prev'] = df['estim-prev'].map(F.strprev) +print(df) +``` + +The output should look like: + +``` + true-prev estim-prev mae mrae mkld +0 [0.0, 1.0] [0.000, 1.000] 0.000 0.000 0.000e+00 +1 [0.1, 0.9] [0.091, 0.909] 0.009 0.048 4.426e-04 +2 [0.2, 0.8] [0.163, 0.837] 0.037 0.114 4.633e-03 +3 [0.3, 0.7] [0.283, 0.717] 0.017 0.041 7.383e-04 +4 [0.4, 0.6] [0.366, 0.634] 0.034 0.070 2.412e-03 +5 [0.5, 0.5] [0.459, 0.541] 0.041 0.082 3.387e-03 +6 [0.6, 0.4] [0.565, 0.435] 0.035 0.073 2.535e-03 +7 [0.7, 0.3] [0.654, 0.346] 0.046 0.108 4.701e-03 +8 [0.8, 0.2] [0.725, 0.275] 0.075 0.235 1.515e-02 +9 [0.9, 0.1] [0.858, 0.142] 0.042 0.229 7.740e-03 +10 [1.0, 0.0] [0.945, 0.055] 0.055 27.357 5.219e-02 +``` + +One can get the averaged scores using standard pandas' +functions, i.e.: + +```python +print(df.mean()) +``` + +will produce the following output: + +``` +true-prev 0.500 +mae 0.035 +mrae 2.578 +mkld 0.009 +dtype: float64 +``` + +Other evaluation functions include: + +* _artificial_sampling_eval_: that computes the evaluation for a +given evaluation metric, returning the average instead of a dataframe. +* _artificial_sampling_prediction_: that returns two np.arrays containing the +true prevalences and the estimated prevalences. + +See the documentation for further details. \ No newline at end of file diff --git a/docs/build/html/_sources/Installation.rst.txt b/docs/build/html/_sources/Installation.rst.txt new file mode 100644 index 0000000..0eaabd6 --- /dev/null +++ b/docs/build/html/_sources/Installation.rst.txt @@ -0,0 +1,56 @@ +Installation +------------ + +QuaPy can be easily installed via `pip` + +:: + + pip install quapy + +See `pip page `_ for older versions. + +Requirements +************ + +* scikit-learn, numpy, scipy +* pytorch (for QuaNet) +* svmperf patched for quantification (see below) +* joblib +* tqdm +* pandas, xlrd +* matplotlib + + +SVM-perf with quantification-oriented losses +******************************************** + +In order to run experiments involving SVM(Q), SVM(KLD), SVM(NKLD), +SVM(AE), or SVM(RAE), you have to first download the +`svmperf `_ +package, apply the patch +`svm-perf-quantification-ext.patch `_, +and compile the sources. +The script +`prepare_svmperf.sh `_, +does all the job. Simply run: + +:: + + ./prepare_svmperf.sh + + +The resulting directory `./svm_perf_quantification` contains the +patched version of `svmperf` with quantification-oriented losses. + +The +`svm-perf-quantification-ext.patch `_ +is an extension of the patch made available by +`Esuli et al. 2015 `_ +that allows SVMperf to optimize for +the `Q` measure as proposed by +`Barranquero et al. 2015 `_ +and for the `KLD` and `NKLD` as proposed by +`Esuli et al. 2015 `_ +for quantification. +This patch extends the former by also allowing SVMperf to optimize for +`AE` and `RAE`. \ No newline at end of file diff --git a/docs/build/html/_sources/Methods.md.txt b/docs/build/html/_sources/Methods.md.txt new file mode 100644 index 0000000..9597fd8 --- /dev/null +++ b/docs/build/html/_sources/Methods.md.txt @@ -0,0 +1,412 @@ +# Quantification Methods + +Quantification methods can be categorized as belonging to +_aggregative_ and _non-aggregative_ groups. +Most methods included in QuaPy at the moment are of type _aggregative_ +(though we plan to add many more methods in the near future), i.e., +are methods characterized by the fact that +quantification is performed as an aggregation function of the individual +products of classification. + +Any quantifier in QuaPy shoud extend the class _BaseQuantifier_, +and implement some abstract methods: +```python + @abstractmethod + def fit(self, data: LabelledCollection): ... + + @abstractmethod + def quantify(self, instances): ... + + @abstractmethod + def set_params(self, **parameters): ... + + @abstractmethod + def get_params(self, deep=True): ... +``` +The meaning of those functions should be familiar to those +used to work with scikit-learn since the class structure of QuaPy +is directly inspired by scikit-learn's _Estimators_. Functions +_fit_ and _quantify_ are used to train the model and to provide +class estimations (the reason why +scikit-learn' structure has not been adopted _as is_ in QuaPy responds to +the fact that scikit-learn's _predict_ function is expected to return +one output for each input element --e.g., a predicted label for each +instance in a sample-- while in quantification the output for a sample +is one single array of class prevalences), while functions _set_params_ +and _get_params_ allow a +[model selector](https://github.com/HLT-ISTI/QuaPy/wiki/Model-Selection) +to automate the process of hyperparameter search. + +## Aggregative Methods + +All quantification methods are implemented as part of the +_qp.method_ package. In particular, _aggregative_ methods are defined in +_qp.method.aggregative_, and extend _AggregativeQuantifier(BaseQuantifier)_. +The methods that any _aggregative_ quantifier must implement are: + +```python + @abstractmethod + def fit(self, data: LabelledCollection, fit_learner=True): ... + + @abstractmethod + def aggregate(self, classif_predictions:np.ndarray): ... +``` + +since, as mentioned before, aggregative methods base their prediction on the +individual predictions of a classifier. Indeed, a default implementation +of _BaseQuantifier.quantify_ is already provided, which looks like: + +```python + def quantify(self, instances): + classif_predictions = self.preclassify(instances) + return self.aggregate(classif_predictions) +``` +Aggregative quantifiers are expected to maintain a classifier (which is +accessed through the _@property_ _learner_). This classifier is +given as input to the quantifier, and can be already fit +on external data (in which case, the _fit_learner_ argument should +be set to False), or be fit by the quantifier's fit (default). + +Another class of _aggregative_ methods are the _probabilistic_ +aggregative methods, that should inherit from the abstract class +_AggregativeProbabilisticQuantifier(AggregativeQuantifier)_. +The particularity of _probabilistic_ aggregative methods (w.r.t. +non-probabilistic ones), is that the default quantifier is defined +in terms of the posterior probabilities returned by a probabilistic +classifier, and not by the crisp decisions of a hard classifier; i.e.: + +```python + def quantify(self, instances): + classif_posteriors = self.posterior_probabilities(instances) + return self.aggregate(classif_posteriors) +``` + +One advantage of _aggregative_ methods (either probabilistic or not) +is that the evaluation according to any sampling procedure (e.g., +the [artificial sampling protocol](https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation)) +can be achieved very efficiently, since the entire set can be pre-classified +once, and the quantification estimations for different samples can directly +reuse these predictions, without requiring to classify each element every time. +QuaPy leverages this property to speed-up any procedure having to do with +quantification over samples, as is customarily done in model selection or +in evaluation. + +### The Classify & Count variants + +QuaPy implements the four CC variants, i.e.: + +* _CC_ (Classify & Count), the simplest aggregative quantifier; one that + simply relies on the label predictions of a classifier to deliver class estimates. +* _ACC_ (Adjusted Classify & Count), the adjusted variant of CC. +* _PCC_ (Probabilistic Classify & Count), the probabilistic variant of CC that +relies on the soft estimations (or posterior probabilities) returned by a (probabilistic) classifier. +* _PACC_ (Probabilistic Adjusted Classify & Count), the adjusted variant of PCC. + +The following code serves as a complete example using CC equipped +with a SVM as the classifier: + +```python +import quapy as qp +import quapy.functional as F +from sklearn.svm import LinearSVC + +dataset = qp.datasets.fetch_twitter('hcr', pickle=True) +training = dataset.training +test = dataset.test + +# instantiate a classifier learner, in this case a SVM +svm = LinearSVC() + +# instantiate a Classify & Count with the SVM +# (an alias is available in qp.method.aggregative.ClassifyAndCount) +model = qp.method.aggregative.CC(svm) +model.fit(training) +estim_prevalence = model.quantify(test.instances) +``` + +The same code could be used to instantiate an ACC, by simply replacing +the instantiation of the model with: +```python +model = qp.method.aggregative.ACC(svm) +``` +Note that the adjusted variants (ACC and PACC) need to estimate +some parameters for performing the adjustment (e.g., the +_true positive rate_ and the _false positive rate_ in case of +binary classification) that are estimated on a validation split +of the labelled set. In this case, the __init__ method of +ACC defines an additional parameter, _val_split_ which, by +default, is set to 0.4 and so, the 40% of the labelled data +will be used for estimating the parameters for adjusting the +predictions. This parameters can also be set with an integer, +indicating that the parameters should be estimated by means of +_k_-fold cross-validation, for which the integer indicates the +number _k_ of folds. Finally, _val_split_ can be set to a +specific held-out validation set (i.e., an instance of _LabelledCollection_). + +The specification of _val_split_ can be +postponed to the invokation of the fit method (if _val_split_ was also +set in the constructor, the one specified at fit time would prevail), +e.g.: + +```python +model = qp.method.aggregative.ACC(svm) +# perform 5-fold cross validation for estimating ACC's parameters +# (overrides the default val_split=0.4 in the constructor) +model.fit(training, val_split=5) +``` + +The following code illustrates the case in which PCC is used: +```python +model = qp.method.aggregative.PCC(svm) +model.fit(training) +estim_prevalence = model.quantify(test.instances) +print('classifier:', model.learner) +``` +In this case, QuaPy will print: +``` +The learner LinearSVC does not seem to be probabilistic. The learner will be calibrated. +classifier: CalibratedClassifierCV(base_estimator=LinearSVC(), cv=5) +``` +The first output indicates that the learner (_LinearSVC_ in this case) +is not a probabilistic classifier (i.e., it does not implement the +_predict_proba_ method) and so, the classifier will be converted to +a probabilistic one through [calibration](https://scikit-learn.org/stable/modules/calibration.html). +As a result, the classifier that is printed in the second line points +to a _CalibratedClassifier_ instance. Note that calibration can only +be applied to hard classifiers when _fit_learner=True_; an exception +will be raised otherwise. + +Lastly, everything we said aboud ACC and PCC +applies to PACC as well. + + +### Expectation Maximization (EMQ) + +The Expectation Maximization Quantifier (EMQ), also known as +the SLD, is available at _qp.method.aggregative.EMQ_ or via the +alias _qp.method.aggregative.ExpectationMaximizationQuantifier_. +The method is described in: + +_Saerens, M., Latinne, P., and Decaestecker, C. (2002). Adjusting the outputs of a classifier +to new a priori probabilities: A simple procedure. Neural Computation, 14(1):21–41._ + +EMQ works with a probabilistic classifier (if the classifier +given as input is a hard one, a calibration will be attempted). +Although this method was originally proposed for improving the +posterior probabilities of a probabilistic classifier, and not +for improving the estimation of prior probabilities, EMQ ranks +almost always among the most effective quantifiers in the +experiments we have carried out. + +An example of use can be found below: + +```python +import quapy as qp +from sklearn.linear_model import LogisticRegression + +dataset = qp.datasets.fetch_twitter('hcr', pickle=True) + +model = qp.method.aggregative.EMQ(LogisticRegression()) +model.fit(dataset.training) +estim_prevalence = model.quantify(dataset.test.instances) +``` + + +### Hellinger Distance y (HDy) + +The method HDy is described in: + +_Implementation of the method based on the Hellinger Distance y (HDy) proposed by +González-Castro, V., Alaiz-Rodrı́guez, R., and Alegre, E. (2013). Class distribution +estimation based on the Hellinger distance. Information Sciences, 218:146–164._ + +It is implemented in _qp.method.aggregative.HDy_ (also accessible +through the allias _qp.method.aggregative.HellingerDistanceY_). +This method works with a probabilistic classifier (hard classifiers +can be used as well and will be calibrated) and requires a validation +set to estimate parameter for the mixture model. Just like +ACC and PACC, this quantifier receives a _val_split_ argument +in the constructor (or in the fit method, in which case the previous +value is overridden) that can either be a float indicating the proportion +of training data to be taken as the validation set (in a random +stratified split), or a validation set (i.e., an instance of +_LabelledCollection_) itself. + +HDy was proposed as a binary classifier and the implementation +provided in QuaPy accepts only binary datasets. + +The following code shows an example of use: +```python +import quapy as qp +from sklearn.linear_model import LogisticRegression + +# load a binary dataset +dataset = qp.datasets.fetch_reviews('hp', pickle=True) +qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True) + +model = qp.method.aggregative.HDy(LogisticRegression()) +model.fit(dataset.training) +estim_prevalence = model.quantify(dataset.test.instances) +``` + +### Explicit Loss Minimization + +The Explicit Loss Minimization (ELM) represent a family of methods +based on structured output learning, i.e., quantifiers relying on +classifiers that have been optimized targeting a +quantification-oriented evaluation measure. + +In QuaPy, the following methods, all relying on Joachim's +[SVMperf](https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html) +implementation, are available in _qp.method.aggregative_: + +* SVMQ (SVM-Q) is a quantification method optimizing the metric _Q_ defined +in _Barranquero, J., Díez, J., and del Coz, J. J. (2015). Quantification-oriented learning based +on reliable classifiers. Pattern Recognition, 48(2):591–604._ +* SVMKLD (SVM for Kullback-Leibler Divergence) proposed in _Esuli, A. and Sebastiani, F. (2015). + Optimizing text quantifiers for multivariate loss functions. + ACM Transactions on Knowledge Discovery and Data, 9(4):Article 27._ +* SVMNKLD (SVM for Normalized Kullback-Leibler Divergence) proposed in _Esuli, A. and Sebastiani, F. (2015). + Optimizing text quantifiers for multivariate loss functions. + ACM Transactions on Knowledge Discovery and Data, 9(4):Article 27._ +* SVMAE (SVM for Mean Absolute Error) +* SVMRAE (SVM for Mean Relative Absolute Error) + +the last two methods (SVMAE and SVMRAE) have been implemented in +QuaPy in order to make available ELM variants for what nowadays +are considered the most well-behaved evaluation metrics in quantification. + +In order to make these models work, you would need to run the script +_prepare_svmperf.sh_ (distributed along with QuaPy) that +downloads _SVMperf_' source code, applies a patch that +implements the quantification oriented losses, and compiles the +sources. + +If you want to add any custom loss, you would need to modify +the source code of _SVMperf_ in order to implement it, and +assign a valid loss code to it. Then you must re-compile +the whole thing and instantiate the quantifier in QuaPy +as follows: + +```python +# you can either set the path to your custom svm_perf_quantification implementation +# in the environment variable, or as an argument to the constructor of ELM +qp.environ['SVMPERF_HOME'] = './path/to/svm_perf_quantification' + +# assign an alias to your custom loss and the id you have assigned to it +svmperf = qp.classification.svmperf.SVMperf +svmperf.valid_losses['mycustomloss'] = 28 + +# instantiate the ELM method indicating the loss +model = qp.method.aggregative.ELM(loss='mycustomloss') +``` + +All ELM are binary quantifiers since they rely on _SVMperf_, that +currently supports only binary classification. +ELM variants (any binary quantifier in general) can be extended +to operate in single-label scenarios trivially by adopting a +"one-vs-all" strategy (as, e.g., in +_Gao, W. and Sebastiani, F. (2016). From classification to quantification in tweet sentiment +analysis. Social Network Analysis and Mining, 6(19):1–22_). +In QuaPy this is possible by using the _OneVsAll_ class: + +```python +import quapy as qp +from quapy.method.aggregative import SVMQ, OneVsAll + +# load a single-label dataset (this one contains 3 classes) +dataset = qp.datasets.fetch_twitter('hcr', pickle=True) + +# let qp know where svmperf is +qp.environ['SVMPERF_HOME'] = '../svm_perf_quantification' + +model = OneVsAll(SVMQ(), n_jobs=-1) # run them on parallel +model.fit(dataset.training) +estim_prevalence = model.quantify(dataset.test.instances) +``` + +## Meta Models + +By _meta_ models we mean quantification methods that are defined on top of other +quantification methods, and that thus do not squarely belong to the aggregative nor +the non-aggregative group (indeed, _meta_ models could use quantifiers from any of those +groups). +_Meta_ models are implemented in the _qp.method.meta_ module. + +### Ensembles + +QuaPy implements (some of) the variants proposed in: + +* _Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017). +Using ensembles for problems with characterizable changes in data distribution: A case study on quantification. +Information Fusion, 34, 87-100._ +* _Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019). + Dynamic ensemble selection for quantification tasks. + Information Fusion, 45, 1-15._ + +The following code shows how to instantiate an Ensemble of 30 _Adjusted Classify & Count_ (ACC) +quantifiers operating with a _Logistic Regressor_ (LR) as the base classifier, and using the +_average_ as the aggregation policy (see the original article for further details). +The last parameter indicates to use all processors for parallelization. + +```python +import quapy as qp +from quapy.method.aggregative import ACC +from quapy.method.meta import Ensemble +from sklearn.linear_model import LogisticRegression + +dataset = qp.datasets.fetch_UCIDataset('haberman') + +model = Ensemble(quantifier=ACC(LogisticRegression()), size=30, policy='ave', n_jobs=-1) +model.fit(dataset.training) +estim_prevalence = model.quantify(dataset.test.instances) +``` + +Other aggregation policies implemented in QuaPy include: +* 'ptr' for applying a dynamic selection based on the training prevalence of the ensemble's members +* 'ds' for applying a dynamic selection based on the Hellinger Distance +* _any valid quantification measure_ (e.g., 'mse') for performing a static selection based on +the performance estimated for each member of the ensemble in terms of that evaluation metric. + +When using any of the above options, it is important to set the _red_size_ parameter, which +informs of the number of members to retain. + +Please, check the [model selection](https://github.com/HLT-ISTI/QuaPy/wiki/Model-Selection) +wiki if you want to optimize the hyperparameters of ensemble for classification or quantification. + +### The QuaNet neural network + +QuaPy offers an implementation of QuaNet, a deep learning model presented in: + +_Esuli, A., Moreo, A., & Sebastiani, F. (2018, October). +A recurrent neural network for sentiment quantification. +In Proceedings of the 27th ACM International Conference on +Information and Knowledge Management (pp. 1775-1778)._ + +This model requires _torch_ to be installed. +QuaNet also requires a classifier that can provide embedded representations +of the inputs. +In the original paper, QuaNet was tested using an LSTM as the base classifier. +In the following example, we show an instantiation of QuaNet that instead uses CNN as a probabilistic classifier, taking its last layer representation as the document embedding: + +```python +import quapy as qp +from quapy.method.meta import QuaNet +from quapy.classification.neural import NeuralClassifierTrainer, CNNnet + +# use samples of 100 elements +qp.environ['SAMPLE_SIZE'] = 100 + +# load the kindle dataset as text, and convert words to numerical indexes +dataset = qp.datasets.fetch_reviews('kindle', pickle=True) +qp.data.preprocessing.index(dataset, min_df=5, inplace=True) + +# the text classifier is a CNN trained by NeuralClassifierTrainer +cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes) +learner = NeuralClassifierTrainer(cnn, device='cuda') + +# train QuaNet +model = QuaNet(learner, qp.environ['SAMPLE_SIZE'], device='cuda') +model.fit(dataset.training) +estim_prevalence = model.quantify(dataset.test.instances) +``` diff --git a/docs/build/html/_sources/Model-Selection.md.txt b/docs/build/html/_sources/Model-Selection.md.txt new file mode 100644 index 0000000..2ecd8c6 --- /dev/null +++ b/docs/build/html/_sources/Model-Selection.md.txt @@ -0,0 +1,159 @@ +# Model Selection + +As a supervised machine learning task, quantification methods +can strongly depend on a good choice of model hyper-parameters. +The process whereby those hyper-parameters are chosen is +typically known as _Model Selection_, and typically consists of +testing different settings and picking the one that performed +best in a held-out validation set in terms of any given +evaluation measure. + +## Targeting a Quantification-oriented loss + +The task being optimized determines the evaluation protocol, +i.e., the criteria according to which the performance of +any given method for solving is to be assessed. +As a task on its own right, quantification should impose +its own model selection strategies, i.e., strategies +aimed at finding appropriate configurations +specifically designed for the task of quantification. + +Quantification has long been regarded as an add-on of +classification, and thus the model selection strategies +customarily adopted in classification have simply been +applied to quantification (see the next section). +It has been argued in _Moreo, Alejandro, and Fabrizio Sebastiani. +"Re-Assessing the" Classify and Count" Quantification Method." +arXiv preprint arXiv:2011.02552 (2020)._ +that specific model selection strategies should +be adopted for quantification. That is, model selection +strategies for quantification should target +quantification-oriented losses and be tested in a variety +of scenarios exhibiting different degrees of prior +probability shift. + +The class +_qp.model_selection.GridSearchQ_ +implements a grid-search exploration over the space of +hyper-parameter combinations that evaluates each +combination of hyper-parameters +by means of a given quantification-oriented +error metric (e.g., any of the error functions implemented +in _qp.error_) and according to the +[_artificial sampling protocol_](https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation). + +The following is an example of model selection for quantification: + +```python +import quapy as qp +from quapy.method.aggregative import PCC +from sklearn.linear_model import LogisticRegression +import numpy as np + +# set a seed to replicate runs +np.random.seed(0) +qp.environ['SAMPLE_SIZE'] = 500 + +dataset = qp.datasets.fetch_reviews('hp', tfidf=True, min_df=5) + +# The model will be returned by the fit method of GridSearchQ. +# Model selection will be performed with a fixed budget of 1000 evaluations +# for each hyper-parameter combination. The error to optimize is the MAE for +# quantification, as evaluated on artificially drawn samples at prevalences +# covering the entire spectrum on a held-out portion (40%) of the training set. +model = qp.model_selection.GridSearchQ( + model=PCC(LogisticRegression()), + param_grid={'C': np.logspace(-4,5,10), 'class_weight': ['balanced', None]}, + sample_size=qp.environ['SAMPLE_SIZE'], + eval_budget=1000, + error='mae', + refit=True, # retrain on the whole labelled set + val_split=0.4, + verbose=True # show information as the process goes on +).fit(dataset.training) + +print(f'model selection ended: best hyper-parameters={model.best_params_}') +model = model.best_model_ + +# evaluation in terms of MAE +results = qp.evaluation.artificial_sampling_eval( + model, + dataset.test, + sample_size=qp.environ['SAMPLE_SIZE'], + n_prevpoints=101, + n_repetitions=10, + error_metric='mae' +) + +print(f'MAE={results:.5f}') +``` + +In this example, the system outputs: +``` +[GridSearchQ]: starting optimization with n_jobs=1 +[GridSearchQ]: checking hyperparams={'C': 0.0001, 'class_weight': 'balanced'} got mae score 0.24987 +[GridSearchQ]: checking hyperparams={'C': 0.0001, 'class_weight': None} got mae score 0.48135 +[GridSearchQ]: checking hyperparams={'C': 0.001, 'class_weight': 'balanced'} got mae score 0.24866 +[...] +[GridSearchQ]: checking hyperparams={'C': 100000.0, 'class_weight': None} got mae score 0.43676 +[GridSearchQ]: optimization finished: best params {'C': 0.1, 'class_weight': 'balanced'} (score=0.19982) +[GridSearchQ]: refitting on the whole development set +model selection ended: best hyper-parameters={'C': 0.1, 'class_weight': 'balanced'} +1010 evaluations will be performed for each combination of hyper-parameters +[artificial sampling protocol] generating predictions: 100%|██████████| 1010/1010 [00:00<00:00, 5005.54it/s] +MAE=0.20342 +``` + +The parameter _val_split_ can alternatively be used to indicate +a validation set (i.e., an instance of _LabelledCollection_) instead +of a proportion. This could be useful if one wants to have control +on the specific data split to be used across different model selection +experiments. + +## Targeting a Classification-oriented loss + +Optimizing a model for quantification could rather be +computationally costly. +In aggregative methods, one could alternatively try to optimize +the classifier's hyper-parameters for classification. +Although this is theoretically suboptimal, many articles in +quantification literature have opted for this strategy. + +In QuaPy, this is achieved by simply instantiating the +classifier learner as a GridSearchCV from scikit-learn. +The following code illustrates how to do that: + +```python +learner = GridSearchCV( + LogisticRegression(), + param_grid={'C': np.logspace(-4, 5, 10), 'class_weight': ['balanced', None]}, + cv=5) +model = PCC(learner).fit(dataset.training) +print(f'model selection ended: best hyper-parameters={model.learner.best_params_}') +``` + +In this example, the system outputs: +``` +model selection ended: best hyper-parameters={'C': 10000.0, 'class_weight': None} +1010 evaluations will be performed for each combination of hyper-parameters +[artificial sampling protocol] generating predictions: 100%|██████████| 1010/1010 [00:00<00:00, 5379.55it/s] +MAE=0.41734 +``` + +Note that the MAE is worse than the one we obtained when optimizing +for quantification and, indeed, the hyper-parameters found optimal +largely differ between the two selection modalities. The +hyper-parameters C=10000 and class_weight=None have been found +to work well for the specific training prevalence of the HP dataset, +but these hyper-parameters turned out to be suboptimal when the +class prevalences of the test set differs (as is indeed tested +in scenarios of quantification). + +This is, however, not always the case, and one could, in practice, +find examples +in which optimizing for classification ends up resulting in a better +quantifier than when optimizing for quantification. +Nonetheless, this is theoretically unlikely to happen. + + + diff --git a/docs/build/html/_sources/Plotting.md.txt b/docs/build/html/_sources/Plotting.md.txt new file mode 100644 index 0000000..81495cc --- /dev/null +++ b/docs/build/html/_sources/Plotting.md.txt @@ -0,0 +1,253 @@ +# Plotting + +The module _qp.plot_ implements some basic plotting functions +that can help analyse the performance of a quantification method. + +All plotting functions receive as inputs the outcomes of +some experiments and include, for each experiment, +the following three main arguments: + +* _method_names_ a list containing the names of the quantification methods +* _true_prevs_ a list containing matrices of true prevalences +* _estim_prevs_ a list containing matrices of estimated prevalences +(should be of the same shape as the corresponding matrix in _true_prevs_) + +Note that a method (as indicated by a name in _method_names_) can +appear more than once. This could occur when various datasets are +involved in the experiments. In this case, all experiments for the +method will be merged and the plot will represent the method's +performance across various datasets. + +This is a very simple example of a valid input for the plotting functions: +```python +method_names = ['classify & count', 'EMQ', 'classify & count'] +true_prevs = [ + np.array([[0.5, 0.5], [0.25, 0.75]]), + np.array([[0.0, 1.0], [0.25, 0.75], [0.0, 0.1]]), + np.array([[0.0, 1.0], [0.25, 0.75], [0.0, 0.1]]), +] +estim_prevs = [ + np.array([[0.45, 0.55], [0.6, 0.4]]), + np.array([[0.0, 1.0], [0.5, 0.5], [0.2, 0.8]]), + np.array([[0.1, 0.9], [0.3, 0.7], [0.0, 0.1]]), +] +``` +in which the _classify & count_ has been tested in two datasets and +the _EMQ_ method has been tested only in one dataset. For the first +experiment, only two (binary) quantifications have been tested, +while for the second and third experiments three instances have +been tested. + +In general, we would like to test the performance of the +quantification methods across different scenarios showcasing +the accuracy of the quantifier in predicting class prevalences +for a wide range of prior distributions. This can easily be +achieved by means of the +[artificial sampling protocol](https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation) +that is implemented in QuaPy. + +The following code shows how to perform one simple experiment +in which the 4 _CC-variants_, all equipped with a linear SVM, are +applied to one binary dataset of reviews about _Kindle_ devices and +tested across the entire spectrum of class priors (taking 21 splits +of the interval [0,1], i.e., using prevalence steps of 0.05, and +generating 100 random samples at each prevalence). + +```python +import quapy as qp +from quapy.method.aggregative import CC, ACC, PCC, PACC +from sklearn.svm import LinearSVC + +qp.environ['SAMPLE_SIZE'] = 500 + +def gen_data(): + + def base_classifier(): + return LinearSVC() + + def models(): + yield CC(base_classifier()) + yield ACC(base_classifier()) + yield PCC(base_classifier()) + yield PACC(base_classifier()) + + data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5) + + method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], [] + + for model in models(): + model.fit(data.training) + true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction( + model, data.test, qp.environ['SAMPLE_SIZE'], n_repetitions=100, n_prevpoints=21 + ) + + method_names.append(model.__class__.__name__) + true_prevs.append(true_prev) + estim_prevs.append(estim_prev) + tr_prevs.append(data.training.prevalence()) + + return method_names, true_prevs, estim_prevs, tr_prevs + +method_names, true_prevs, estim_prevs, tr_prevs = gen_data() +```` +the plots that can be generated are explained below. + +## Diagonal Plot + +The _diagonal_ plot shows a very insightful view of the +quantifier's performance. It plots the predicted class +prevalence (in the y-axis) against the true class prevalence +(in the x-axis). Unfortunately, it is limited to binary quantification, +although one can simply generate as many _diagonal_ plots as +classes there are by indicating which class should be considered +the target of the plot. + +The following call will produce the plot: + +```python +qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, train_prev=tr_prevs[0], savepath='./plots/bin_diag.png') +``` + +the last argument is optional, and indicates the path where to save +the plot (the file extension will determine the format -- typical extensions +are '.png' or '.pdf'). If this path is not provided, then the plot +will be shown but not saved. +The resulting plot should look like: + +![diagonal plot on Kindle](./wiki_examples/selected_plots/bin_diag.png) + +Note that in this case, we are also indicating the training +prevalence, which is plotted in the diagonal a as cyan dot. +The color bands indicate the standard deviations of the predictions, +and can be hidden by setting the argument _show_std=False_ (see +the complete list of arguments in the documentation). + +Finally, note how most quantifiers, and specially the "unadjusted" +variants CC and PCC, are strongly biased towards the +prevalence seen during training. + +## Quantification bias + +This plot aims at evincing the bias that any quantifier +displays with respect to the training prevalences by +means of [box plots](https://en.wikipedia.org/wiki/Box_plot). +This plot can be generated by: + +```python +qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, savepath='./plots/bin_bias.png') +``` + +and should look like: + +![bias plot on Kindle](./wiki_examples/selected_plots/bin_bias.png) + +The box plots show some interesting facts: +* all methods are biased towards the training prevalence but specially +so CC and PCC (an unbiased quantifier would have a box centered at 0) +* the bias is always positive, indicating that all methods tend to +overestimate the positive class prevalence +* CC and PCC have high variability while ACC and specially PACC exhibit +lower variability. + +Again, these plots could be generated for experiments ranging across +different datasets, and the plot will merge all data accordingly. + +Another illustrative example can be shown that consists of +training different CC quantifiers trained at different +(artificially sampled) training prevalences. +For this example, we generate training samples of 5000 +documents containing 10%, 20%, ..., 90% of positives from the +IMDb dataset, and generate the bias plot again. +This example can be run by rewritting the _gen_data()_ function +like this: + +```python +def gen_data(): + data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5) + model = CC(LinearSVC()) + + method_data = [] + for training_prevalence in np.linspace(0.1, 0.9, 9): + training_size = 5000 + # since the problem is binary, it suffices to specify the negative prevalence (the positive is constrained) + training = data.training.sampling(training_size, 1 - training_prevalence) + model.fit(training) + true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction( + model, data.sample, qp.environ['SAMPLE_SIZE'], n_repetitions=100, n_prevpoints=21 + ) + # method names can contain Latex syntax + method_name = 'CC$_{' + f'{int(100 * training_prevalence)}' + '\%}$' + method_data.append((method_name, true_prev, estim_prev, training.prevalence())) + + return zip(*method_data) +``` + +and the plot should now look like: + +![bias plot on IMDb](./wiki_examples/selected_plots/bin_bias_cc.png) + +which clearly shows a negative bias for CC variants trained on +data containing more negatives (i.e., < 50%) and positive biases +in cases containing more positives (i.e., >50%). The CC trained +at 50% behaves as an unbiased estimator of the positive class +prevalence. + +The function _qp.plot.binary_bias_bins_ allows the user to +generate box plots broken down by bins of true test prevalence. +To this aim, an argument _nbins_ is passed which indicates +how many isometric subintervals to take. For example +the following plot is produced for _nbins=3_: + +![bias plot on IMDb](./wiki_examples/selected_plots/bin_bias_bin_cc.png) + +Interestingly enough, the seemingly unbiased estimator (CC at 50%) happens to display +a positive bias (or a tendency to overestimate) in cases of low prevalence +(i.e., when the true prevalence of the positive class is below 33%), +and a negative bias (or a tendency to underestimate) in cases of high prevalence +(i.e., when the true prevalence is beyond 67%). + +Out of curiosity, the diagonal plot for this experiment looks like: + +![diag plot on IMDb](./wiki_examples/selected_plots/bin_diag_cc.png) + +showing pretty clearly the dependency of CC on the prior probabilities +of the labeled set it was trained on. + + +## Error by Drift + +Above discussed plots are useful for analyzing and comparing +the performance of different quantification methods, but are +limited to the binary case. The "error by drift" is a plot +that shows the error in predictions as a function of the +(prior probability) drift between each test sample and the +training set. Interestingly, the error and drift can both be measured +in terms of any evaluation measure for quantification (like the +ones available in _qp.error_) and can thus be computed +irrespectively of the number of classes. + +The following shows how to generate the plot for the 4 CC variants, +using 10 bins for the drift +and _absolute error_ as the measure of the error (the +drift in the x-axis is always computed in terms of _absolute error_ since +other errors are harder to interpret): + +```python +qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, + error_name='ae', n_bins=10, savepath='./plots/err_drift.png') +``` + +![diag plot on IMDb](./wiki_examples/selected_plots/err_drift.png) + +Note that all methods work reasonably well in cases of low prevalence +drift (i.e., any CC-variant is a good quantifier whenever the IID +assumption is approximately preserved). The higher the drift, the worse +those quantifiers tend to perform, although it is clear that PACC +yields the lowest error for the most difficult cases. + +Remember that any plot can be generated _across many datasets_, and +that this would probably result in a more solid comparison. +In those cases, however, it is likely that the variances of each +method get higher, to the detriment of the visualization. +We recommend to set _show_std=False_ in those cases +in order to hide the color bands. diff --git a/docs/build/html/_sources/index.rst.txt b/docs/build/html/_sources/index.rst.txt new file mode 100644 index 0000000..18a0623 --- /dev/null +++ b/docs/build/html/_sources/index.rst.txt @@ -0,0 +1,90 @@ +.. QuaPy documentation master file, created by + sphinx-quickstart on Tue Nov 9 11:31:32 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to QuaPy's documentation! +================================= + +QuaPy is an open source framework for Quantification (a.k.a. Supervised Prevalence Estimation) +written in Python. + +Introduction +------------ + +QuaPy roots on the concept of data sample, and provides implementations of most important concepts +in quantification literature, such as the most important quantification baselines, many advanced +quantification methods, quantification-oriented model selection, many evaluation measures and protocols +used for evaluating quantification methods. +QuaPy also integrates commonly used datasets and offers visualization tools for facilitating the analysis and +interpretation of results. + +A quick example: +**************** + +The following script fetchs a Twitter dataset, trains and evaluates an +`Adjusted Classify & Count` model in terms of the `Mean Absolute Error` (MAE) +between the class prevalences estimated for the test set and the true prevalences +of the test set. + +:: + + import quapy as qp + from sklearn.linear_model import LogisticRegression + + dataset = qp.datasets.fetch_twitter('semeval16') + + # create an "Adjusted Classify & Count" quantifier + model = qp.method.aggregative.ACC(LogisticRegression()) + model.fit(dataset.training) + + estim_prevalences = model.quantify(dataset.test.instances) + true_prevalences = dataset.test.prevalence() + + error = qp.error.mae(true_prevalences, estim_prevalences) + + print(f'Mean Absolute Error (MAE)={error:.3f}') + + +Quantification is useful in scenarios of prior probability shift. In other +words, we would not be interested in estimating the class prevalences of the test set if +we could assume the IID assumption to hold, as this prevalence would simply coincide with the +class prevalence of the training set. For this reason, any Quantification model +should be tested across samples characterized by different class prevalences. +QuaPy implements sampling procedures and evaluation protocols that automates this endeavour. +See the :doc:`Evaluation` for detailed examples. + +Features +******** + +* Implementation of most popular quantification methods (Classify-&-Count variants, Expectation-Maximization, SVM-based variants for quantification, HDy, QuaNet, and Ensembles). +* Versatile functionality for performing evaluation based on artificial sampling protocols. +* Implementation of most commonly used evaluation metrics (e.g., MAE, MRAE, MSE, NKLD, etc.). +* Popular datasets for Quantification (textual and numeric) available, including: + * 32 UCI Machine Learning datasets. + * 11 Twitter Sentiment datasets. + * 3 Reviews Sentiment datasets. +* Native supports for binary and single-label scenarios of quantification. +* Model selection functionality targeting quantification-oriented losses. +* Visualization tools for analysing results. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + Installation + Datasets + Evaluation + Methods + Model Selection + Plotting + API Developer documentation + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/build/html/_sources/modules.rst.txt b/docs/build/html/_sources/modules.rst.txt new file mode 100644 index 0000000..5d84a54 --- /dev/null +++ b/docs/build/html/_sources/modules.rst.txt @@ -0,0 +1,7 @@ +quapy +===== + +.. toctree:: + :maxdepth: 4 + + quapy diff --git a/docs/build/html/_sources/quapy.classification.rst.txt b/docs/build/html/_sources/quapy.classification.rst.txt new file mode 100644 index 0000000..b541e7e --- /dev/null +++ b/docs/build/html/_sources/quapy.classification.rst.txt @@ -0,0 +1,37 @@ +quapy.classification package +============================ + +Submodules +---------- + +quapy.classification.methods module +----------------------------------- + +.. automodule:: quapy.classification.methods + :members: + :undoc-members: + :show-inheritance: + +quapy.classification.neural module +---------------------------------- + +.. automodule:: quapy.classification.neural + :members: + :undoc-members: + :show-inheritance: + +quapy.classification.svmperf module +----------------------------------- + +.. automodule:: quapy.classification.svmperf + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: quapy.classification + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/build/html/_sources/quapy.data.rst.txt b/docs/build/html/_sources/quapy.data.rst.txt new file mode 100644 index 0000000..5012a73 --- /dev/null +++ b/docs/build/html/_sources/quapy.data.rst.txt @@ -0,0 +1,45 @@ +quapy.data package +================== + +Submodules +---------- + +quapy.data.base module +---------------------- + +.. automodule:: quapy.data.base + :members: + :undoc-members: + :show-inheritance: + +quapy.data.datasets module +-------------------------- + +.. automodule:: quapy.data.datasets + :members: + :undoc-members: + :show-inheritance: + +quapy.data.preprocessing module +------------------------------- + +.. automodule:: quapy.data.preprocessing + :members: + :undoc-members: + :show-inheritance: + +quapy.data.reader module +------------------------ + +.. automodule:: quapy.data.reader + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: quapy.data + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/build/html/_sources/quapy.method.rst.txt b/docs/build/html/_sources/quapy.method.rst.txt new file mode 100644 index 0000000..526e922 --- /dev/null +++ b/docs/build/html/_sources/quapy.method.rst.txt @@ -0,0 +1,53 @@ +quapy.method package +==================== + +Submodules +---------- + +quapy.method.aggregative module +------------------------------- + +.. automodule:: quapy.method.aggregative + :members: + :undoc-members: + :show-inheritance: + +quapy.method.base module +------------------------ + +.. automodule:: quapy.method.base + :members: + :undoc-members: + :show-inheritance: + +quapy.method.meta module +------------------------ + +.. automodule:: quapy.method.meta + :members: + :undoc-members: + :show-inheritance: + +quapy.method.neural module +-------------------------- + +.. automodule:: quapy.method.neural + :members: + :undoc-members: + :show-inheritance: + +quapy.method.non\_aggregative module +------------------------------------ + +.. automodule:: quapy.method.non_aggregative + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: quapy.method + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/build/html/_sources/quapy.rst.txt b/docs/build/html/_sources/quapy.rst.txt new file mode 100644 index 0000000..b6fc8c8 --- /dev/null +++ b/docs/build/html/_sources/quapy.rst.txt @@ -0,0 +1,72 @@ +quapy package +============= + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + quapy.classification + quapy.data + quapy.method + quapy.tests + +Submodules +---------- + +quapy.error module +------------------ + +.. automodule:: quapy.error + :members: + :undoc-members: + :show-inheritance: + +quapy.evaluation module +----------------------- + +.. automodule:: quapy.evaluation + :members: + :undoc-members: + :show-inheritance: + +quapy.functional module +----------------------- + +.. automodule:: quapy.functional + :members: + :undoc-members: + :show-inheritance: + +quapy.model\_selection module +----------------------------- + +.. automodule:: quapy.model_selection + :members: + :undoc-members: + :show-inheritance: + +quapy.plot module +----------------- + +.. automodule:: quapy.plot + :members: + :undoc-members: + :show-inheritance: + +quapy.util module +----------------- + +.. automodule:: quapy.util + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: quapy + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/build/html/_sources/quapy.tests.rst.txt b/docs/build/html/_sources/quapy.tests.rst.txt new file mode 100644 index 0000000..0e0d92d --- /dev/null +++ b/docs/build/html/_sources/quapy.tests.rst.txt @@ -0,0 +1,37 @@ +quapy.tests package +=================== + +Submodules +---------- + +quapy.tests.test\_base module +----------------------------- + +.. automodule:: quapy.tests.test_base + :members: + :undoc-members: + :show-inheritance: + +quapy.tests.test\_datasets module +--------------------------------- + +.. automodule:: quapy.tests.test_datasets + :members: + :undoc-members: + :show-inheritance: + +quapy.tests.test\_methods module +-------------------------------- + +.. automodule:: quapy.tests.test_methods + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: quapy.tests + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/build/html/_sources/readme.rst.txt b/docs/build/html/_sources/readme.rst.txt new file mode 100644 index 0000000..59bb23d --- /dev/null +++ b/docs/build/html/_sources/readme.rst.txt @@ -0,0 +1,7 @@ +Getting Started +=============== +QuaPy is an open source framework for Quantification (a.k.a. Supervised Prevalence Estimation) written in Python. + +Installation +------------ +>>> pip install quapy \ No newline at end of file diff --git a/docs/build/html/_sources/readme2.md.txt b/docs/build/html/_sources/readme2.md.txt new file mode 100644 index 0000000..c8d6969 --- /dev/null +++ b/docs/build/html/_sources/readme2.md.txt @@ -0,0 +1 @@ +.. include:: ../../README.md \ No newline at end of file diff --git a/docs/build/html/_static/alabaster.css b/docs/build/html/_static/alabaster.css new file mode 100644 index 0000000..0eddaeb --- /dev/null +++ b/docs/build/html/_static/alabaster.css @@ -0,0 +1,701 @@ +@import url("basic.css"); + +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: Georgia, serif; + font-size: 17px; + background-color: #fff; + color: #000; + margin: 0; + padding: 0; +} + + +div.document { + width: 940px; + margin: 30px auto 0 auto; +} + +div.documentwrapper { + float: left; + width: 100%; +} + +div.bodywrapper { + margin: 0 0 0 220px; +} + +div.sphinxsidebar { + width: 220px; + font-size: 14px; + line-height: 1.5; +} + +hr { + border: 1px solid #B1B4B6; +} + +div.body { + background-color: #fff; + color: #3E4349; + padding: 0 30px 0 30px; +} + +div.body > .section { + text-align: left; +} + +div.footer { + width: 940px; + margin: 20px auto 30px auto; + font-size: 14px; + color: #888; + text-align: right; +} + +div.footer a { + color: #888; +} + +p.caption { + font-family: inherit; + font-size: inherit; +} + + +div.relations { + display: none; +} + + +div.sphinxsidebar a { + color: #444; + text-decoration: none; + border-bottom: 1px dotted #999; +} + +div.sphinxsidebar a:hover { + border-bottom: 1px solid #999; +} + +div.sphinxsidebarwrapper { + padding: 18px 10px; +} + +div.sphinxsidebarwrapper p.logo { + padding: 0; + margin: -10px 0 0 0px; + text-align: center; +} + +div.sphinxsidebarwrapper h1.logo { + margin-top: -10px; + text-align: center; + margin-bottom: 5px; + text-align: left; +} + +div.sphinxsidebarwrapper h1.logo-name { + margin-top: 0px; +} + +div.sphinxsidebarwrapper p.blurb { + margin-top: 0; + font-style: normal; +} + +div.sphinxsidebar h3, +div.sphinxsidebar h4 { + font-family: Georgia, serif; + color: #444; + font-size: 24px; + font-weight: normal; + margin: 0 0 5px 0; + padding: 0; +} + +div.sphinxsidebar h4 { + font-size: 20px; +} + +div.sphinxsidebar h3 a { + color: #444; +} + +div.sphinxsidebar p.logo a, +div.sphinxsidebar h3 a, +div.sphinxsidebar p.logo a:hover, +div.sphinxsidebar h3 a:hover { + border: none; +} + +div.sphinxsidebar p { + color: #555; + margin: 10px 0; +} + +div.sphinxsidebar ul { + margin: 10px 0; + padding: 0; + color: #000; +} + +div.sphinxsidebar ul li.toctree-l1 > a { + font-size: 120%; +} + +div.sphinxsidebar ul li.toctree-l2 > a { + font-size: 110%; +} + +div.sphinxsidebar input { + border: 1px solid #CCC; + font-family: Georgia, serif; + font-size: 1em; +} + +div.sphinxsidebar hr { + border: none; + height: 1px; + color: #AAA; + background: #AAA; + + text-align: left; + margin-left: 0; + width: 50%; +} + +div.sphinxsidebar .badge { + border-bottom: none; +} + +div.sphinxsidebar .badge:hover { + border-bottom: none; +} + +/* To address an issue with donation coming after search */ +div.sphinxsidebar h3.donation { + margin-top: 10px; +} + +/* -- body styles ----------------------------------------------------------- */ + +a { + color: #004B6B; + text-decoration: underline; +} + +a:hover { + color: #6D4100; + text-decoration: underline; +} + +div.body h1, +div.body h2, +div.body h3, +div.body h4, +div.body h5, +div.body h6 { + font-family: Georgia, serif; + font-weight: normal; + margin: 30px 0px 10px 0px; + padding: 0; +} + +div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; } +div.body h2 { font-size: 180%; } +div.body h3 { font-size: 150%; } +div.body h4 { font-size: 130%; } +div.body h5 { font-size: 100%; } +div.body h6 { font-size: 100%; } + +a.headerlink { + color: #DDD; + padding: 0 4px; + text-decoration: none; +} + +a.headerlink:hover { + color: #444; + background: #EAEAEA; +} + +div.body p, div.body dd, div.body li { + line-height: 1.4em; +} + +div.admonition { + margin: 20px 0px; + padding: 10px 30px; + background-color: #EEE; + border: 1px solid #CCC; +} + +div.admonition tt.xref, div.admonition code.xref, div.admonition a tt { + background-color: #FBFBFB; + border-bottom: 1px solid #fafafa; +} + +div.admonition p.admonition-title { + font-family: Georgia, serif; + font-weight: normal; + font-size: 24px; + margin: 0 0 10px 0; + padding: 0; + line-height: 1; +} + +div.admonition p.last { + margin-bottom: 0; +} + +div.highlight { + background-color: #fff; +} + +dt:target, .highlight { + background: #FAF3E8; +} + +div.warning { + background-color: #FCC; + border: 1px solid #FAA; +} + +div.danger { + background-color: #FCC; + border: 1px solid #FAA; + -moz-box-shadow: 2px 2px 4px #D52C2C; + -webkit-box-shadow: 2px 2px 4px #D52C2C; + box-shadow: 2px 2px 4px #D52C2C; +} + +div.error { + background-color: #FCC; + border: 1px solid #FAA; + -moz-box-shadow: 2px 2px 4px #D52C2C; + -webkit-box-shadow: 2px 2px 4px #D52C2C; + box-shadow: 2px 2px 4px #D52C2C; +} + +div.caution { + background-color: #FCC; + border: 1px solid #FAA; +} + +div.attention { + background-color: #FCC; + border: 1px solid #FAA; +} + +div.important { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.note { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.tip { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.hint { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.seealso { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.topic { + background-color: #EEE; +} + +p.admonition-title { + display: inline; +} + +p.admonition-title:after { + content: ":"; +} + +pre, tt, code { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; + font-size: 0.9em; +} + +.hll { + background-color: #FFC; + margin: 0 -12px; + padding: 0 12px; + display: block; +} + +img.screenshot { +} + +tt.descname, tt.descclassname, code.descname, code.descclassname { + font-size: 0.95em; +} + +tt.descname, code.descname { + padding-right: 0.08em; +} + +img.screenshot { + -moz-box-shadow: 2px 2px 4px #EEE; + -webkit-box-shadow: 2px 2px 4px #EEE; + box-shadow: 2px 2px 4px #EEE; +} + +table.docutils { + border: 1px solid #888; + -moz-box-shadow: 2px 2px 4px #EEE; + -webkit-box-shadow: 2px 2px 4px #EEE; + box-shadow: 2px 2px 4px #EEE; +} + +table.docutils td, table.docutils th { + border: 1px solid #888; + padding: 0.25em 0.7em; +} + +table.field-list, table.footnote { + border: none; + -moz-box-shadow: none; + -webkit-box-shadow: none; + box-shadow: none; +} + +table.footnote { + margin: 15px 0; + width: 100%; + border: 1px solid #EEE; + background: #FDFDFD; + font-size: 0.9em; +} + +table.footnote + table.footnote { + margin-top: -15px; + border-top: none; +} + +table.field-list th { + padding: 0 0.8em 0 0; +} + +table.field-list td { + padding: 0; +} + +table.field-list p { + margin-bottom: 0.8em; +} + +/* Cloned from + * https://github.com/sphinx-doc/sphinx/commit/ef60dbfce09286b20b7385333d63a60321784e68 + */ +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +table.footnote td.label { + width: .1px; + padding: 0.3em 0 0.3em 0.5em; +} + +table.footnote td { + padding: 0.3em 0.5em; +} + +dl { + margin: 0; + padding: 0; +} + +dl dd { + margin-left: 30px; +} + +blockquote { + margin: 0 0 0 30px; + padding: 0; +} + +ul, ol { + /* Matches the 30px from the narrow-screen "li > ul" selector below */ + margin: 10px 0 10px 30px; + padding: 0; +} + +pre { + background: #EEE; + padding: 7px 30px; + margin: 15px 0px; + line-height: 1.3em; +} + +div.viewcode-block:target { + background: #ffd; +} + +dl pre, blockquote pre, li pre { + margin-left: 0; + padding-left: 30px; +} + +tt, code { + background-color: #ecf0f3; + color: #222; + /* padding: 1px 2px; */ +} + +tt.xref, code.xref, a tt { + background-color: #FBFBFB; + border-bottom: 1px solid #fff; +} + +a.reference { + text-decoration: none; + border-bottom: 1px dotted #004B6B; +} + +/* Don't put an underline on images */ +a.image-reference, a.image-reference:hover { + border-bottom: none; +} + +a.reference:hover { + border-bottom: 1px solid #6D4100; +} + +a.footnote-reference { + text-decoration: none; + font-size: 0.7em; + vertical-align: top; + border-bottom: 1px dotted #004B6B; +} + +a.footnote-reference:hover { + border-bottom: 1px solid #6D4100; +} + +a:hover tt, a:hover code { + background: #EEE; +} + + +@media screen and (max-width: 870px) { + + div.sphinxsidebar { + display: none; + } + + div.document { + width: 100%; + + } + + div.documentwrapper { + margin-left: 0; + margin-top: 0; + margin-right: 0; + margin-bottom: 0; + } + + div.bodywrapper { + margin-top: 0; + margin-right: 0; + margin-bottom: 0; + margin-left: 0; + } + + ul { + margin-left: 0; + } + + li > ul { + /* Matches the 30px from the "ul, ol" selector above */ + margin-left: 30px; + } + + .document { + width: auto; + } + + .footer { + width: auto; + } + + .bodywrapper { + margin: 0; + } + + .footer { + width: auto; + } + + .github { + display: none; + } + + + +} + + + +@media screen and (max-width: 875px) { + + body { + margin: 0; + padding: 20px 30px; + } + + div.documentwrapper { + float: none; + background: #fff; + } + + div.sphinxsidebar { + display: block; + float: none; + width: 102.5%; + margin: 50px -30px -20px -30px; + padding: 10px 20px; + background: #333; + color: #FFF; + } + + div.sphinxsidebar h3, div.sphinxsidebar h4, div.sphinxsidebar p, + div.sphinxsidebar h3 a { + color: #fff; + } + + div.sphinxsidebar a { + color: #AAA; + } + + div.sphinxsidebar p.logo { + display: none; + } + + div.document { + width: 100%; + margin: 0; + } + + div.footer { + display: none; + } + + div.bodywrapper { + margin: 0; + } + + div.body { + min-height: 0; + padding: 0; + } + + .rtd_doc_footer { + display: none; + } + + .document { + width: auto; + } + + .footer { + width: auto; + } + + .footer { + width: auto; + } + + .github { + display: none; + } +} + + +/* misc. */ + +.revsys-inline { + display: none!important; +} + +/* Make nested-list/multi-paragraph items look better in Releases changelog + * pages. Without this, docutils' magical list fuckery causes inconsistent + * formatting between different release sub-lists. + */ +div#changelog > div.section > ul > li > p:only-child { + margin-bottom: 0; +} + +/* Hide fugly table cell borders in ..bibliography:: directive output */ +table.docutils.citation, table.docutils.citation td, table.docutils.citation th { + border: none; + /* Below needed in some edge cases; if not applied, bottom shadows appear */ + -moz-box-shadow: none; + -webkit-box-shadow: none; + box-shadow: none; +} + + +/* relbar */ + +.related { + line-height: 30px; + width: 100%; + font-size: 0.9rem; +} + +.related.top { + border-bottom: 1px solid #EEE; + margin-bottom: 20px; +} + +.related.bottom { + border-top: 1px solid #EEE; +} + +.related ul { + padding: 0; + margin: 0; + list-style: none; +} + +.related li { + display: inline; +} + +nav#rellinks { + float: right; +} + +nav#rellinks li+li:before { + content: "|"; +} + +nav#breadcrumbs li+li:before { + content: "\00BB"; +} + +/* Hide certain items when printing */ +@media print { + div.related { + display: none; + } +} \ No newline at end of file diff --git a/docs/build/html/_static/background_b01.png b/docs/build/html/_static/background_b01.png new file mode 100644 index 0000000..353f26d Binary files /dev/null and b/docs/build/html/_static/background_b01.png differ diff --git a/docs/build/html/_static/basic.css b/docs/build/html/_static/basic.css new file mode 100644 index 0000000..9b94688 --- /dev/null +++ b/docs/build/html/_static/basic.css @@ -0,0 +1,904 @@ +/* + * basic.css + * ~~~~~~~~~ + * + * Sphinx stylesheet -- basic theme. + * + * :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +div.section::after { + display: block; + content: ''; + clear: left; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 210px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox form.search { + overflow: hidden; +} + +div.sphinxsidebar #searchbox input[type="text"] { + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; +} + +div.sphinxsidebar #searchbox input[type="submit"] { + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; +} + + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin: 10px 0 0 20px; + padding: 0; +} + +ul.search li { + padding: 5px 0 5px 20px; + background-image: url(file.png); + background-repeat: no-repeat; + background-position: 0 7px; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li p.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body { + min-width: 450px; + max-width: 800px; +} + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +a.brackets:before, +span.brackets > a:before{ + content: "["; +} + +a.brackets:after, +span.brackets > a:after { + content: "]"; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, figure.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, figure.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, figure.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +img.align-default, figure.align-default, .figure.align-default { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-default { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar, +aside.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px; + background-color: #ffe; + width: 40%; + float: right; + clear: right; + overflow-x: auto; +} + +p.sidebar-title { + font-weight: bold; +} + +div.admonition, div.topic, blockquote { + clear: left; +} + +/* -- topics ---------------------------------------------------------------- */ + +div.topic { + border: 1px solid #ccc; + padding: 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- content of sidebars/topics/admonitions -------------------------------- */ + +div.sidebar > :last-child, +aside.sidebar > :last-child, +div.topic > :last-child, +div.admonition > :last-child { + margin-bottom: 0; +} + +div.sidebar::after, +aside.sidebar::after, +div.topic::after, +div.admonition::after, +blockquote::after { + display: block; + content: ''; + clear: both; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + margin-top: 10px; + margin-bottom: 10px; + border: 0; + border-collapse: collapse; +} + +table.align-center { + margin-left: auto; + margin-right: auto; +} + +table.align-default { + margin-left: auto; + margin-right: auto; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +table.footnote td, table.footnote th { + border: 0 !important; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +th > :first-child, +td > :first-child { + margin-top: 0px; +} + +th > :last-child, +td > :last-child { + margin-bottom: 0px; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure, figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption, figcaption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number, +figcaption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text, +figcaption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +/* -- hlist styles ---------------------------------------------------------- */ + +table.hlist { + margin: 1em 0; +} + +table.hlist td { + vertical-align: top; +} + +/* -- object description styles --------------------------------------------- */ + +.sig { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; +} + +.sig-name, code.descname { + background-color: transparent; + font-weight: bold; +} + +.sig-name { + font-size: 1.1em; +} + +code.descname { + font-size: 1.2em; +} + +.sig-prename, code.descclassname { + background-color: transparent; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.sig-param.n { + font-style: italic; +} + +/* C++ specific styling */ + +.sig-inline.c-texpr, +.sig-inline.cpp-texpr { + font-family: unset; +} + +.sig.c .k, .sig.c .kt, +.sig.cpp .k, .sig.cpp .kt { + color: #0033B3; +} + +.sig.c .m, +.sig.cpp .m { + color: #1750EB; +} + +.sig.c .s, .sig.c .sc, +.sig.cpp .s, .sig.cpp .sc { + color: #067D17; +} + + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +:not(li) > ol > li:first-child > :first-child, +:not(li) > ul > li:first-child > :first-child { + margin-top: 0px; +} + +:not(li) > ol > li:last-child > :last-child, +:not(li) > ul > li:last-child > :last-child { + margin-bottom: 0px; +} + +ol.simple ol p, +ol.simple ul p, +ul.simple ol p, +ul.simple ul p { + margin-top: 0; +} + +ol.simple > li:not(:first-child) > p, +ul.simple > li:not(:first-child) > p { + margin-top: 0; +} + +ol.simple p, +ul.simple p { + margin-bottom: 0; +} + +dl.footnote > dt, +dl.citation > dt { + float: left; + margin-right: 0.5em; +} + +dl.footnote > dd, +dl.citation > dd { + margin-bottom: 0em; +} + +dl.footnote > dd:after, +dl.citation > dd:after { + content: ""; + clear: both; +} + +dl.field-list { + display: grid; + grid-template-columns: fit-content(30%) auto; +} + +dl.field-list > dt { + font-weight: bold; + word-break: break-word; + padding-left: 0.5em; + padding-right: 5px; +} + +dl.field-list > dt:after { + content: ":"; +} + +dl.field-list > dd { + padding-left: 0.5em; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0em; +} + +dl { + margin-bottom: 15px; +} + +dd > :first-child { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +dl > dd:last-child, +dl > dd:last-child > :last-child { + margin-bottom: 0; +} + +dt:target, span.highlighted { + background-color: #fbe54e; +} + +rect.highlighted { + fill: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +.classifier:before { + font-style: normal; + margin: 0.5em; + content: ":"; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +pre, div[class*="highlight-"] { + clear: both; +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; +} + +div[class*="highlight-"] { + margin: 1em 0; +} + +td.linenos pre { + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + display: block; +} + +table.highlighttable tbody { + display: block; +} + +table.highlighttable tr { + display: flex; +} + +table.highlighttable td { + margin: 0; + padding: 0; +} + +table.highlighttable td.linenos { + padding-right: 0.5em; +} + +table.highlighttable td.code { + flex: 1; + overflow: hidden; +} + +.highlight .hll { + display: block; +} + +div.highlight pre, +table.highlighttable pre { + margin: 0; +} + +div.code-block-caption + div { + margin-top: 0; +} + +div.code-block-caption { + margin-top: 1em; + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +table.highlighttable td.linenos, +span.linenos, +div.highlight span.gp { /* gp: Generic.Prompt */ + user-select: none; + -webkit-user-select: text; /* Safari fallback only */ + -webkit-user-select: none; /* Chrome/Safari */ + -moz-user-select: none; /* Firefox */ + -ms-user-select: none; /* IE10+ */ +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + margin: 1em 0; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: absolute; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/docs/build/html/_static/bizstyle.css b/docs/build/html/_static/bizstyle.css new file mode 100644 index 0000000..def9ced --- /dev/null +++ b/docs/build/html/_static/bizstyle.css @@ -0,0 +1,506 @@ +/* + * bizstyle.css_t + * ~~~~~~~~~~~~~~ + * + * Sphinx stylesheet -- business style theme. + * + * :copyright: Copyright 2011-2014 by Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +@import url("basic.css"); + +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; + font-size: 14px; + letter-spacing: -0.01em; + line-height: 150%; + text-align: center; + background-color: white; + background-image: url(background_b01.png); + color: black; + padding: 0; + border-right: 1px solid #336699; + border-left: 1px solid #336699; + + margin: 0px 40px 0px 40px; +} + +div.document { + background-color: white; + text-align: left; + background-repeat: repeat-x; + + -moz-box-shadow: 2px 2px 5px #000; + -webkit-box-shadow: 2px 2px 5px #000; +} + +div.documentwrapper { + float: left; + width: 100%; +} + +div.bodywrapper { + margin: 0 0 0 240px; + border-left: 1px solid #ccc; +} + +div.body { + margin: 0; + padding: 0.5em 20px 20px 20px; +} +div.bodywrapper { + margin: 0 0 0 calc(210px + 30px); +} + +div.related { + font-size: 1em; + + -moz-box-shadow: 2px 2px 5px #000; + -webkit-box-shadow: 2px 2px 5px #000; +} + +div.related ul { + background-color: #336699; + height: 100%; + overflow: hidden; + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; +} + +div.related ul li { + color: white; + margin: 0; + padding: 0; + height: 2em; + float: left; +} + +div.related ul li.right { + float: right; + margin-right: 5px; +} + +div.related ul li a { + margin: 0; + padding: 0 5px 0 5px; + line-height: 1.75em; + color: #fff; +} + +div.related ul li a:hover { + color: #fff; + text-decoration: underline; +} + +div.sphinxsidebarwrapper { + padding: 0; +} + +div.sphinxsidebar { + padding: 0.5em 12px 12px 12px; + width: 210px; + font-size: 1em; + text-align: left; +} + +div.sphinxsidebar h3, div.sphinxsidebar h4 { + margin: 1em 0 0.5em 0; + font-size: 1em; + padding: 0.1em 0 0.1em 0.5em; + color: white; + border: 1px solid #336699; + background-color: #336699; +} + +div.sphinxsidebar h3 a { + color: white; +} + +div.sphinxsidebar ul { + padding-left: 1.5em; + margin-top: 7px; + padding: 0; + line-height: 130%; +} + +div.sphinxsidebar ul ul { + margin-left: 20px; +} + +div.sphinxsidebar input { + border: 1px solid #336699; +} + +div.footer { + background-color: white; + color: #336699; + padding: 3px 8px 3px 0; + clear: both; + font-size: 0.8em; + text-align: right; + border-bottom: 1px solid #336699; + + -moz-box-shadow: 2px 2px 5px #000; + -webkit-box-shadow: 2px 2px 5px #000; +} + +div.footer a { + color: #336699; + text-decoration: underline; +} + +/* -- body styles ----------------------------------------------------------- */ + +p { + margin: 0.8em 0 0.5em 0; +} + +a { + color: #336699; + text-decoration: none; +} + +a:hover { + color: #336699; + text-decoration: underline; +} + +div.body a { + text-decoration: underline; +} + +h1, h2, h3 { + color: #336699; +} + +h1 { + margin: 0; + padding: 0.7em 0 0.3em 0; + font-size: 1.5em; +} + +h2 { + margin: 1.3em 0 0.2em 0; + font-size: 1.35em; + padding-bottom: .5em; + border-bottom: 1px solid #336699; +} + +h3 { + margin: 1em 0 -0.3em 0; + font-size: 1.2em; + padding-bottom: .3em; + border-bottom: 1px solid #CCCCCC; +} + +div.body h1 a, div.body h2 a, div.body h3 a, +div.body h4 a, div.body h5 a, div.body h6 a { + color: black!important; +} + +h1 a.anchor, h2 a.anchor, h3 a.anchor, +h4 a.anchor, h5 a.anchor, h6 a.anchor { + display: none; + margin: 0 0 0 0.3em; + padding: 0 0.2em 0 0.2em; + color: #aaa!important; +} + +h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, +h5:hover a.anchor, h6:hover a.anchor { + display: inline; +} + +h1 a.anchor:hover, h2 a.anchor:hover, h3 a.anchor:hover, h4 a.anchor:hover, +h5 a.anchor:hover, h6 a.anchor:hover { + color: #777; + background-color: #eee; +} + +a.headerlink { + color: #c60f0f!important; + font-size: 1em; + margin-left: 6px; + padding: 0 4px 0 4px; + text-decoration: none!important; +} + +a.headerlink:hover { + background-color: #ccc; + color: white!important; +} + +cite, code, tt { + font-family: 'Consolas', 'Deja Vu Sans Mono', + 'Bitstream Vera Sans Mono', monospace; + font-size: 0.95em; + letter-spacing: 0.01em; +} + +code { + background-color: #F2F2F2; + border-bottom: 1px solid #ddd; + color: #333; +} + +code.descname, code.descclassname, code.xref { + border: 0; +} + +hr { + border: 1px solid #abc; + margin: 2em; +} + +a code { + border: 0; + color: #CA7900; +} + +a code:hover { + color: #2491CF; +} + +pre { + background-color: transparent !important; + font-family: 'Consolas', 'Deja Vu Sans Mono', + 'Bitstream Vera Sans Mono', monospace; + font-size: 0.95em; + letter-spacing: 0.015em; + line-height: 120%; + padding: 0.5em; + border-right: 5px solid #ccc; + border-left: 5px solid #ccc; +} + +pre a { + color: inherit; + text-decoration: underline; +} + +td.linenos pre { + padding: 0.5em 0; +} + +div.quotebar { + background-color: #f8f8f8; + max-width: 250px; + float: right; + padding: 2px 7px; + border: 1px solid #ccc; +} + +div.topic { + background-color: #f8f8f8; +} + +table { + border-collapse: collapse; + margin: 0 -0.5em 0 -0.5em; +} + +table td, table th { + padding: 0.2em 0.5em 0.2em 0.5em; +} + +div.admonition { + font-size: 0.9em; + margin: 1em 0 1em 0; + border: 3px solid #cccccc; + background-color: #f7f7f7; + padding: 0; +} + +div.admonition p { + margin: 0.5em 1em 0.5em 1em; + padding: 0; +} + +div.admonition li p { + margin-left: 0; +} + +div.admonition pre, div.warning pre { + margin: 0; +} + +div.highlight { + margin: 0.4em 1em; +} + +div.admonition p.admonition-title { + margin: 0; + padding: 0.1em 0 0.1em 0.5em; + color: white; + border-bottom: 3px solid #cccccc; + font-weight: bold; + background-color: #165e83; +} + +div.danger { border: 3px solid #f0908d; background-color: #f0cfa0; } +div.error { border: 3px solid #f0908d; background-color: #ede4cd; } +div.warning { border: 3px solid #f8b862; background-color: #f0cfa0; } +div.caution { border: 3px solid #f8b862; background-color: #ede4cd; } +div.attention { border: 3px solid #f8b862; background-color: #f3f3f3; } +div.important { border: 3px solid #f0cfa0; background-color: #ede4cd; } +div.note { border: 3px solid #f0cfa0; background-color: #f3f3f3; } +div.hint { border: 3px solid #bed2c3; background-color: #f3f3f3; } +div.tip { border: 3px solid #bed2c3; background-color: #f3f3f3; } + +div.danger p.admonition-title, div.error p.admonition-title { + background-color: #b7282e; + border-bottom: 3px solid #f0908d; +} + +div.caution p.admonition-title, +div.warning p.admonition-title, +div.attention p.admonition-title { + background-color: #f19072; + border-bottom: 3px solid #f8b862; +} + +div.note p.admonition-title, div.important p.admonition-title { + background-color: #f8b862; + border-bottom: 3px solid #f0cfa0; +} + +div.hint p.admonition-title, div.tip p.admonition-title { + background-color: #7ebea5; + border-bottom: 3px solid #bed2c3; +} + +div.admonition ul, div.admonition ol, +div.warning ul, div.warning ol { + margin: 0.1em 0.5em 0.5em 3em; + padding: 0; +} + +div.versioninfo { + margin: 1em 0 0 0; + border: 1px solid #ccc; + background-color: #DDEAF0; + padding: 8px; + line-height: 1.3em; + font-size: 0.9em; +} + +.viewcode-back { + font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; +} + +div.viewcode-block:target { + background-color: #f4debf; + border-top: 1px solid #ac9; + border-bottom: 1px solid #ac9; +} + +p.versionchanged span.versionmodified { + font-size: 0.9em; + margin-right: 0.2em; + padding: 0.1em; + background-color: #DCE6A0; +} + +dl.field-list > dt { + color: white; + background-color: #82A0BE; +} + +dl.field-list > dd { + background-color: #f7f7f7; +} + +/* -- table styles ---------------------------------------------------------- */ + +table.docutils { + margin: 1em 0; + padding: 0; + border: 1px solid white; + background-color: #f7f7f7; +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 1px solid white; + border-bottom: 1px solid white; +} + +table.docutils td p { + margin-top: 0; + margin-bottom: 0.3em; +} + +table.field-list td, table.field-list th { + border: 0 !important; + word-break: break-word; +} + +table.footnote td, table.footnote th { + border: 0 !important; +} + +th { + color: white; + text-align: left; + padding-right: 5px; + background-color: #82A0BE; +} + +div.literal-block-wrapper div.code-block-caption { + background-color: #EEE; + border-style: solid; + border-color: #CCC; + border-width: 1px 5px; +} + +/* WIDE DESKTOP STYLE */ +@media only screen and (min-width: 1176px) { +body { + margin: 0 40px 0 40px; +} +} + +/* TABLET STYLE */ +@media only screen and (min-width: 768px) and (max-width: 991px) { +body { + margin: 0 40px 0 40px; +} +} + +/* MOBILE LAYOUT (PORTRAIT/320px) */ +@media only screen and (max-width: 767px) { +body { + margin: 0; +} +div.bodywrapper { + margin: 0; + width: 100%; + border: none; +} +div.sphinxsidebar { + display: none; +} +} + +/* MOBILE LAYOUT (LANDSCAPE/480px) */ +@media only screen and (min-width: 480px) and (max-width: 767px) { +body { + margin: 0 20px 0 20px; +} +} + +/* RETINA OVERRIDES */ +@media +only screen and (-webkit-min-device-pixel-ratio: 2), +only screen and (min-device-pixel-ratio: 2) { +} + +/* -- end ------------------------------------------------------------------- */ \ No newline at end of file diff --git a/docs/build/html/_static/bizstyle.js b/docs/build/html/_static/bizstyle.js new file mode 100644 index 0000000..b993d2a --- /dev/null +++ b/docs/build/html/_static/bizstyle.js @@ -0,0 +1,41 @@ +// +// bizstyle.js +// ~~~~~~~~~~~ +// +// Sphinx javascript -- for bizstyle theme. +// +// This theme was created by referring to 'sphinxdoc' +// +// :copyright: Copyright 2012-2014 by Sphinx team, see AUTHORS. +// :license: BSD, see LICENSE for details. +// +$(document).ready(function(){ + if (navigator.userAgent.indexOf('iPhone') > 0 || + navigator.userAgent.indexOf('Android') > 0) { + $("li.nav-item-0 a").text("Top"); + } + + $("div.related:first ul li:not(.right) a").slice(1).each(function(i, item){ + if (item.text.length > 20) { + var tmpstr = item.text + $(item).attr("title", tmpstr); + $(item).text(tmpstr.substr(0, 17) + "..."); + } + }); + $("div.related:last ul li:not(.right) a").slice(1).each(function(i, item){ + if (item.text.length > 20) { + var tmpstr = item.text + $(item).attr("title", tmpstr); + $(item).text(tmpstr.substr(0, 17) + "..."); + } + }); +}); + +$(window).resize(function(){ + if ($(window).width() <= 776) { + $("li.nav-item-0 a").text("Top"); + } + else { + $("li.nav-item-0 a").text("QuaPy 0.1.6 documentation"); + } +}); \ No newline at end of file diff --git a/docs/build/html/_static/css3-mediaqueries.js b/docs/build/html/_static/css3-mediaqueries.js new file mode 100644 index 0000000..59735f5 --- /dev/null +++ b/docs/build/html/_static/css3-mediaqueries.js @@ -0,0 +1 @@ +if(typeof Object.create!=="function"){Object.create=function(e){function t(){}t.prototype=e;return new t}}var ua={toString:function(){return navigator.userAgent},test:function(e){return this.toString().toLowerCase().indexOf(e.toLowerCase())>-1}};ua.version=(ua.toString().toLowerCase().match(/[\s\S]+(?:rv|it|ra|ie)[\/: ]([\d.]+)/)||[])[1];ua.webkit=ua.test("webkit");ua.gecko=ua.test("gecko")&&!ua.webkit;ua.opera=ua.test("opera");ua.ie=ua.test("msie")&&!ua.opera;ua.ie6=ua.ie&&document.compatMode&&typeof document.documentElement.style.maxHeight==="undefined";ua.ie7=ua.ie&&document.documentElement&&typeof document.documentElement.style.maxHeight!=="undefined"&&typeof XDomainRequest==="undefined";ua.ie8=ua.ie&&typeof XDomainRequest!=="undefined";var domReady=function(){var e=[];var t=function(){if(!arguments.callee.done){arguments.callee.done=true;for(var t=0;t=200&&r.status<300||r.status===304||navigator.userAgent.indexOf("Safari")>-1&&typeof r.status==="undefined"){t(r.responseText)}else{n()}document.documentElement.style.cursor="";r=null}};r.send("")};var l=function(t){t=t.replace(e.REDUNDANT_COMPONENTS,"");t=t.replace(e.REDUNDANT_WHITESPACE,"$1");t=t.replace(e.WHITESPACE_IN_PARENTHESES,"($1)");t=t.replace(e.MORE_WHITESPACE," ");t=t.replace(e.FINAL_SEMICOLONS,"}");return t};var c={stylesheet:function(t){var n={};var r=[],i=[],s=[],o=[];var u=t.cssHelperText;var a=t.getAttribute("media");if(a){var f=a.toLowerCase().split(",")}else{var f=["all"]}for(var l=0;l-1&&a.href&&a.href.length!==0&&!a.disabled){r[r.length]=a}}if(r.length>0){var c=0;var d=function(){c++;if(c===r.length){i()}};var v=function(t){var n=t.href;f(n,function(r){r=l(r).replace(e.RELATIVE_URLS,"url("+n.substring(0,n.lastIndexOf("/"))+"/$1)");t.cssHelperText=r;d()},d)};for(u=0;u0){r.setAttribute("media",t.join(","))}document.getElementsByTagName("head")[0].appendChild(r);if(r.styleSheet){r.styleSheet.cssText=e}else{r.appendChild(document.createTextNode(e))}r.addedWithCssHelper=true;if(typeof n==="undefined"||n===true){cssHelper.parsed(function(t){var n=p(r,e);for(var i in n){if(n.hasOwnProperty(i)){g(i,n[i])}}a("newStyleParsed",r)})}else{r.parsingDisallowed=true}return r},removeStyle:function(e){return e.parentNode.removeChild(e)},parsed:function(e){if(n){s(e)}else{if(typeof t!=="undefined"){if(typeof e==="function"){e(t)}}else{s(e);d()}}},stylesheets:function(e){cssHelper.parsed(function(t){e(m.stylesheets||y("stylesheets"))})},mediaQueryLists:function(e){cssHelper.parsed(function(t){e(m.mediaQueryLists||y("mediaQueryLists"))})},rules:function(e){cssHelper.parsed(function(t){e(m.rules||y("rules"))})},selectors:function(e){cssHelper.parsed(function(t){e(m.selectors||y("selectors"))})},declarations:function(e){cssHelper.parsed(function(t){e(m.declarations||y("declarations"))})},properties:function(e){cssHelper.parsed(function(t){e(m.properties||y("properties"))})},broadcast:a,addListener:function(e,t){if(typeof t==="function"){if(!u[e]){u[e]={listeners:[]}}u[e].listeners[u[e].listeners.length]=t}},removeListener:function(e,t){if(typeof t==="function"&&u[e]){var n=u[e].listeners;for(var r=0;r=a||s&&l0}}else if("device-height"===e.substring(r-13,r)){c=screen.height;if(t!==null){if(u==="length"){return i&&c>=a||s&&c0}}else if("width"===e.substring(r-5,r)){l=document.documentElement.clientWidth||document.body.clientWidth;if(t!==null){if(u==="length"){return i&&l>=a||s&&l0}}else if("height"===e.substring(r-6,r)){c=document.documentElement.clientHeight||document.body.clientHeight;if(t!==null){if(u==="length"){return i&&c>=a||s&&c0}}else if("device-aspect-ratio"===e.substring(r-19,r)){return u==="aspect-ratio"&&screen.width*a[1]===screen.height*a[0]}else if("color-index"===e.substring(r-11,r)){var h=Math.pow(2,screen.colorDepth);if(t!==null){if(u==="absolute"){return i&&h>=a||s&&h0}}else if("color"===e.substring(r-5,r)){var p=screen.colorDepth;if(t!==null){if(u==="absolute"){return i&&p>=a||s&&p0}}else if("resolution"===e.substring(r-10,r)){var d;if(f==="dpcm"){d=o("1cm")}else{d=o("1in")}if(t!==null){if(u==="resolution"){return i&&d>=a||s&&d0}}else{return false}};var a=function(e){var t=e.getValid();var n=e.getExpressions();var r=n.length;if(r>0){for(var i=0;i0){u=false;for(var f=0;f0){l[c++]=","}l[c++]=h}}if(l.length>0){r[r.length]=cssHelper.addStyle("@media "+l.join("")+"{"+e.getCssText()+"}",t,false)}};var l=function(e,t){for(var n=0;n0}}var o=[],u=[];for(var f in i){if(i.hasOwnProperty(f)){o[o.length]=f;if(i[f]){u[u.length]=f}if(f==="all"){n=true}}}if(u.length>0){r[r.length]=cssHelper.addStyle(e.getCssText(),u,false)}var c=e.getMediaQueryLists();if(n){l(c)}else{l(c,o)}};var h=function(e){for(var t=0;td||Math.abs(s-t)>d){e=n;t=s;clearTimeout(r);r=setTimeout(function(){if(!i()){p()}else{cssHelper.broadcast("cssMediaQueriesTested")}},500)}};window.onresize=function(){var e=window.onresize||function(){};return function(){e();s()}}()};var m=document.documentElement;m.style.marginLeft="-32767px";setTimeout(function(){m.style.marginLeft=""},5e3);return function(){if(!i()){cssHelper.addListener("newStyleParsed",function(e){c(e.cssHelperParsed.stylesheet)});cssHelper.addListener("cssMediaQueriesTested",function(){if(ua.ie){m.style.width="1px"}setTimeout(function(){m.style.width="";m.style.marginLeft=""},0);cssHelper.removeListener("cssMediaQueriesTested",arguments.callee)});s();p()}else{m.style.marginLeft=""}v()}}());try{document.execCommand("BackgroundImageCache",false,true)}catch(e){} diff --git a/docs/build/html/_static/css3-mediaqueries_src.js b/docs/build/html/_static/css3-mediaqueries_src.js new file mode 100644 index 0000000..7878620 --- /dev/null +++ b/docs/build/html/_static/css3-mediaqueries_src.js @@ -0,0 +1,1104 @@ +/* +css3-mediaqueries.js - CSS Helper and CSS3 Media Queries Enabler + +author: Wouter van der Graaf +version: 1.0 (20110330) +license: MIT +website: http://code.google.com/p/css3-mediaqueries-js/ + +W3C spec: http://www.w3.org/TR/css3-mediaqueries/ + +Note: use of embedded