update doc

This commit is contained in:
Alejandro Moreo Fernandez 2021-12-07 17:16:39 +01:00
parent 2bd47f0841
commit 5deb92b457
10 changed files with 720 additions and 156 deletions

View File

@ -1,4 +1,3 @@
Looks like there are some "multilingual" stuff in the master branch? See, e.g., MultilingualLabelledCollection in data/base.py
Packaging:
==========================================
@ -13,8 +12,8 @@ Unify ThresholdOptimization methods, as an extension of PACC (and not ACC), the
use a prob classifier (take into account that PACC uses pcc internally, whereas the threshold methods use cc
instead). The fit method of ACC and PACC has a block for estimating the validation estimates that should be unified
as well...
Rename APP NPP
Add NPP as an option for GridSearchQ
Refactor protocols. APP and NPP related functionalities are duplicated in functional, LabelledCollection, and evaluation
New features:
==========================================

View File

@ -400,6 +400,8 @@
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="quapy.html#quapy.evaluation.gen_prevalence_prediction">gen_prevalence_prediction() (in module quapy.evaluation)</a>
</li>
<li><a href="quapy.html#quapy.evaluation.gen_prevalence_report">gen_prevalence_report() (in module quapy.evaluation)</a>
</li>
<li><a href="quapy.method.html#quapy.method.neural.QuaNetTrainer.get_aggregative_estims">get_aggregative_estims() (quapy.method.neural.QuaNetTrainer method)</a>
</li>

Binary file not shown.

View File

@ -238,11 +238,12 @@ from the labels. The classes must be indicated in cases in which some of the lab
<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.artificial_sampling_generator">
<span class="sig-name descname"><span class="pre">artificial_sampling_generator</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevalences</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">101</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.artificial_sampling_generator" title="Permalink to this definition"></a></dt>
<dd><p>A generator of samples that implements the artificial prevalence protocol (APP). The APP consists of exploring
a grid of prevalence values (e.g., [0, 0.05, 0.1, 0.15, …, 1]), and generating all valid combinations of
<dd><p>A generator of samples that implements the artificial prevalence protocol (APP).
The APP consists of exploring a grid of prevalence values containing <cite>n_prevalences</cite> points (e.g.,
[0, 0.05, 0.1, 0.15, …, 1], if <cite>n_prevalences=21</cite>), and generating all valid combinations of
prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], …,
[1, 0, 0] prevalence values of size <cite>sample_size</cite> will be yielded). The number of samples for each valid
combination of prevalence values is indicated by <cite>repeats</cite></p>
combination of prevalence values is indicated by <cite>repeats</cite>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">

View File

@ -514,82 +514,276 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
<span id="quapy-evaluation-module"></span><h2>quapy.evaluation module<a class="headerlink" href="#module-quapy.evaluation" title="Permalink to this headline"></a></h2>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.evaluation.artificial_prevalence_prediction">
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">artificial_prevalence_prediction</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevpoints</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">210</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_repetitions</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eval_budget</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">42</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.artificial_prevalence_prediction" title="Permalink to this definition"></a></dt>
<dd><p>Performs the predictions for all samples generated according to the artificial sampling protocol.</p>
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">artificial_prevalence_prediction</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevpoints</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">101</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eval_budget</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">42</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.artificial_prevalence_prediction" title="Permalink to this definition"></a></dt>
<dd><p>Performs the predictions for all samples generated according to the Artificial Prevalence Protocol (APP).
The APP consists of exploring a grid of prevalence values containing <cite>n_prevalences</cite> points (e.g.,
[0, 0.05, 0.1, 0.15, …, 1], if <cite>n_prevalences=21</cite>), and generating all valid combinations of
prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], …,
[1, 0, 0] prevalence values of size <cite>sample_size</cite> will be considered). The number of samples for each valid
combination of prevalence values is indicated by <cite>repeats</cite>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>model</strong> the model in charge of generating the class prevalence estimations</p></li>
<li><p><strong>test</strong> the test set on which to perform arificial sampling</p></li>
<li><p><strong>sample_size</strong> the size of the samples</p></li>
<li><p><strong>n_prevpoints</strong> the number of different prevalences to sample (or set to None if eval_budget is specified)</p></li>
<li><p><strong>n_repetitions</strong> the number of repetitions for each prevalence</p></li>
<li><p><strong>eval_budget</strong> if specified, sets a ceil on the number of evaluations to perform. For example, if there are 3</p></li>
<li><p><strong>test</strong> the test set on which to perform APP</p></li>
<li><p><strong>sample_size</strong> integer, the size of the samples</p></li>
<li><p><strong>n_prevpoints</strong> integer, the number of different prevalences to sample (or set to None if eval_budget
is specified; default 101, i.e., steps of 1%)</p></li>
<li><p><strong>repeats</strong> integer, the number of repetitions for each prevalence (default 1)</p></li>
<li><p><strong>eval_budget</strong> integer, if specified, sets a ceil on the number of evaluations to perform. For example, if
there are 3 classes, <cite>repeats=1</cite>, and <cite>eval_budget=20</cite>, then <cite>n_prevpoints</cite> will be set to 5, since this
will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] … [1, 0, 0]) and
since setting <cite>n_prevpoints=6</cite> would produce more than 20 evaluations.</p></li>
<li><p><strong>n_jobs</strong> integer, number of jobs to be run in parallel (default 1)</p></li>
<li><p><strong>random_seed</strong> integer, allows to replicate the samplings. The seed is local to the method and does not affect
any other random process (default 42)</p></li>
<li><p><strong>verbose</strong> if True, shows a progress bar</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>a tuple containing two <cite>np.ndarrays</cite> of shape <cite>(m,n,)</cite> with <cite>m</cite> the number of samples
<cite>(n_prevpoints*repeats)</cite> and <cite>n</cite> the number of classes. The first one contains the true prevalence values
for the samples generated while the second one contains the prevalence estimations</p>
</dd>
</dl>
<p>classes, n_repetitions=1 and eval_budget=20, then n_prevpoints will be set to 5, since this will generate 15
different prevalences ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] … [1, 0, 0]) and since setting it n_prevpoints
to 6 would produce more than 20 evaluations.
:param n_jobs: number of jobs to be run in parallel
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
any other random process.
:param verbose: if True, shows a progress bar
:return: two ndarrays of shape (m,n) with m the number of samples (n_prevpoints*n_repetitions) and n the</p>
<blockquote>
<div><p>number of classes. The first one contains the true prevalences for the samples generated while the second one
contains the the prevalence estimations</p>
</div></blockquote>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.evaluation.artificial_prevalence_protocol">
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">artificial_prevalence_protocol</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevpoints</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">210</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_repetitions</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eval_budget</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">42</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">error_metric</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span> </span><span class="pre">Callable</span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">'mae'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.artificial_prevalence_protocol" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">artificial_prevalence_protocol</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevpoints</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">101</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eval_budget</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">42</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">error_metric</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span> </span><span class="pre">Callable</span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">'mae'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.artificial_prevalence_protocol" title="Permalink to this definition"></a></dt>
<dd><p>Generates samples according to the Artificial Prevalence Protocol (APP).
The APP consists of exploring a grid of prevalence values containing <cite>n_prevalences</cite> points (e.g.,
[0, 0.05, 0.1, 0.15, …, 1], if <cite>n_prevalences=21</cite>), and generating all valid combinations of
prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], …,
[1, 0, 0] prevalence values of size <cite>sample_size</cite> will be considered). The number of samples for each valid
combination of prevalence values is indicated by <cite>repeats</cite>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>model</strong> the model in charge of generating the class prevalence estimations</p></li>
<li><p><strong>test</strong> the test set on which to perform APP</p></li>
<li><p><strong>sample_size</strong> integer, the size of the samples</p></li>
<li><p><strong>n_prevpoints</strong> integer, the number of different prevalences to sample (or set to None if eval_budget
is specified; default 101, i.e., steps of 1%)</p></li>
<li><p><strong>repeats</strong> integer, the number of repetitions for each prevalence (default 1)</p></li>
<li><p><strong>eval_budget</strong> integer, if specified, sets a ceil on the number of evaluations to perform. For example, if
there are 3 classes, <cite>repeats=1</cite>, and <cite>eval_budget=20</cite>, then <cite>n_prevpoints</cite> will be set to 5, since this
will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] … [1, 0, 0]) and
since setting <cite>n_prevpoints=6</cite> would produce more than 20 evaluations.</p></li>
<li><p><strong>n_jobs</strong> integer, number of jobs to be run in parallel (default 1)</p></li>
<li><p><strong>random_seed</strong> integer, allows to replicate the samplings. The seed is local to the method and does not affect
any other random process (default 42)</p></li>
<li><p><strong>error_metric</strong> a string indicating the name of the error (as defined in <a class="reference internal" href="#module-quapy.error" title="quapy.error"><code class="xref py py-mod docutils literal notranslate"><span class="pre">quapy.error</span></code></a>) or a
callable error function</p></li>
<li><p><strong>verbose</strong> set to True (default False) for displaying some information on standard output</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>yields one sample at a time</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.evaluation.artificial_prevalence_report">
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">artificial_prevalence_report</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevpoints</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">210</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_repetitions</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eval_budget</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">42</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">error_metrics</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Iterable</span><span class="p"><span class="pre">[</span></span><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span> </span><span class="pre">Callable</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">'mae'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.artificial_prevalence_report" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">artificial_prevalence_report</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevpoints</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">101</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eval_budget</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">42</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">error_metrics</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Iterable</span><span class="p"><span class="pre">[</span></span><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span> </span><span class="pre">Callable</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">'mae'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.artificial_prevalence_report" title="Permalink to this definition"></a></dt>
<dd><p>Generates an evaluation report for all samples generated according to the Artificial Prevalence Protocol (APP).
The APP consists of exploring a grid of prevalence values containing <cite>n_prevalences</cite> points (e.g.,
[0, 0.05, 0.1, 0.15, …, 1], if <cite>n_prevalences=21</cite>), and generating all valid combinations of
prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], …,
[1, 0, 0] prevalence values of size <cite>sample_size</cite> will be considered). The number of samples for each valid
combination of prevalence values is indicated by <cite>repeats</cite>.
Te report takes the form of a
pandas <a class="reference external" href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html">dataframe</a>
in which the rows correspond to different samples, and the columns inform of the true prevalence values,
the estimated prevalence values, and the score obtained by each of the evaluation measures indicated.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>model</strong> the model in charge of generating the class prevalence estimations</p></li>
<li><p><strong>test</strong> the test set on which to perform APP</p></li>
<li><p><strong>sample_size</strong> integer, the size of the samples</p></li>
<li><p><strong>n_prevpoints</strong> integer, the number of different prevalences to sample (or set to None if eval_budget
is specified; default 101, i.e., steps of 1%)</p></li>
<li><p><strong>repeats</strong> integer, the number of repetitions for each prevalence (default 1)</p></li>
<li><p><strong>eval_budget</strong> integer, if specified, sets a ceil on the number of evaluations to perform. For example, if
there are 3 classes, <cite>repeats=1</cite>, and <cite>eval_budget=20</cite>, then <cite>n_prevpoints</cite> will be set to 5, since this
will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] … [1, 0, 0]) and
since setting <cite>n_prevpoints=6</cite> would produce more than 20 evaluations.</p></li>
<li><p><strong>n_jobs</strong> integer, number of jobs to be run in parallel (default 1)</p></li>
<li><p><strong>random_seed</strong> integer, allows to replicate the samplings. The seed is local to the method and does not affect
any other random process (default 42)</p></li>
<li><p><strong>error_metrics</strong> a string indicating the name of the error (as defined in <a class="reference internal" href="#module-quapy.error" title="quapy.error"><code class="xref py py-mod docutils literal notranslate"><span class="pre">quapy.error</span></code></a>) or a
callable error function; optionally, a list of strings or callables can be indicated, if the results
are to be evaluated with more than one error metric. Default is “mae”</p></li>
<li><p><strong>verbose</strong> if True, shows a progress bar</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>pandas dataframe with rows corresponding to different samples, and with columns informing of the
true prevalence values, the estimated prevalence values, and the score obtained by each of the evaluation
measures indicated.</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.evaluation.evaluate">
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">evaluate</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test_samples</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Iterable</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">err</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span> </span><span class="pre">Callable</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">int</span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">-</span> <span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.evaluate" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">evaluate</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test_samples</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Iterable</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">error_metric</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span> </span><span class="pre">Callable</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">int</span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">-</span> <span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.evaluate" title="Permalink to this definition"></a></dt>
<dd><p>Evaluates a model on a sequence of test samples in terms of a given error metric.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>model</strong> the model in charge of generating the class prevalence estimations</p></li>
<li><p><strong>test_samples</strong> an iterable yielding one sample at a time</p></li>
<li><p><strong>error_metric</strong> a string indicating the name of the error (as defined in <a class="reference internal" href="#module-quapy.error" title="quapy.error"><code class="xref py py-mod docutils literal notranslate"><span class="pre">quapy.error</span></code></a>) or a
callable error function</p></li>
<li><p><strong>n_jobs</strong> integer, number of jobs to be run in parallel (default 1)</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>the score obtained using <cite>error_metric</cite></p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.evaluation.gen_prevalence_prediction">
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">gen_prevalence_prediction</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">gen_fn</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Callable</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eval_budget</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.gen_prevalence_prediction" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Generates prevalence predictions for a custom protocol defined as a generator function that yields
samples at each iteration. The sequence of samples is processed exhaustively if <cite>eval_budget=None</cite>
or up to the <cite>eval_budget</cite> iterations if specified.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>model</strong> the model in charge of generating the class prevalence estimations</p></li>
<li><p><strong>gen_fn</strong> a generator function yielding one sample at each iteration</p></li>
<li><p><strong>eval_budget</strong> a maximum number of evaluations to run. Set to None (default) for exploring the
entire sequence</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>a tuple containing two <cite>np.ndarrays</cite> of shape <cite>(m,n,)</cite> with <cite>m</cite> the number of samples
generated and <cite>n</cite> the number of classes. The first one contains the true prevalence values
for the samples generated while the second one contains the prevalence estimations</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.evaluation.gen_prevalence_report">
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">gen_prevalence_report</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">gen_fn</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Callable</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eval_budget</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">error_metrics</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Iterable</span><span class="p"><span class="pre">[</span></span><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span> </span><span class="pre">Callable</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">'mae'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.gen_prevalence_report" title="Permalink to this definition"></a></dt>
<dd><p>GGenerates an evaluation report for a custom protocol defined as a generator function that yields
samples at each iteration. The sequence of samples is processed exhaustively if <cite>eval_budget=None</cite>
or up to the <cite>eval_budget</cite> iterations if specified.
Te report takes the form of a
pandas <a class="reference external" href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html">dataframe</a>
in which the rows correspond to different samples, and the columns inform of the true prevalence values,
the estimated prevalence values, and the score obtained by each of the evaluation measures indicated.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>model</strong> the model in charge of generating the class prevalence estimations</p></li>
<li><p><strong>gen_fn</strong> a generator function yielding one sample at each iteration</p></li>
<li><p><strong>eval_budget</strong> a maximum number of evaluations to run. Set to None (default) for exploring the
entire sequence</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>a tuple containing two <cite>np.ndarrays</cite> of shape <cite>(m,n,)</cite> with <cite>m</cite> the number of samples
generated. The first one contains the true prevalence values
for the samples generated while the second one contains the prevalence estimations</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.evaluation.natural_prevalence_prediction">
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">natural_prevalence_prediction</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_repetitions</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">42</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.natural_prevalence_prediction" title="Permalink to this definition"></a></dt>
<dd><p>Performs the predictions for all samples generated according to the artificial sampling protocol.
:param model: the model in charge of generating the class prevalence estimations
:param test: the test set on which to perform arificial sampling
:param sample_size: the size of the samples
:param n_repetitions: the number of repetitions for each prevalence
:param n_jobs: number of jobs to be run in parallel
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
any other random process.
:param verbose: if True, shows a progress bar
:return: two ndarrays of shape (m,n) with m the number of samples (n_repetitions) and n the</p>
<blockquote>
<div><p>number of classes. The first one contains the true prevalences for the samples generated while the second one
contains the the prevalence estimations</p>
</div></blockquote>
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">natural_prevalence_prediction</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">42</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.natural_prevalence_prediction" title="Permalink to this definition"></a></dt>
<dd><p>Performs the predictions for all samples generated according to the Natural Prevalence Protocol (NPP).
The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural
prevalence of the collection.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>model</strong> the model in charge of generating the class prevalence estimations</p></li>
<li><p><strong>test</strong> the test set on which to perform NPP</p></li>
<li><p><strong>sample_size</strong> integer, the size of the samples</p></li>
<li><p><strong>repeats</strong> integer, the number of samples to generate</p></li>
<li><p><strong>n_jobs</strong> integer, number of jobs to be run in parallel (default 1)</p></li>
<li><p><strong>random_seed</strong> allows to replicate the samplings. The seed is local to the method and does not affect
any other random process (default 42)</p></li>
<li><p><strong>verbose</strong> if True, shows a progress bar</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>a tuple containing two <cite>np.ndarrays</cite> of shape <cite>(m,n,)</cite> with <cite>m</cite> the number of samples
<cite>(repeats)</cite> and <cite>n</cite> the number of classes. The first one contains the true prevalence values
for the samples generated while the second one contains the prevalence estimations</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.evaluation.natural_prevalence_protocol">
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">natural_prevalence_protocol</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_repetitions</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">42</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">error_metric</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span> </span><span class="pre">Callable</span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">'mae'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.natural_prevalence_protocol" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">natural_prevalence_protocol</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">42</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">error_metric</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span> </span><span class="pre">Callable</span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">'mae'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.natural_prevalence_protocol" title="Permalink to this definition"></a></dt>
<dd><p>Generates samples according to the Natural Prevalence Protocol (NPP).
The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural
prevalence of the collection.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>model</strong> the model in charge of generating the class prevalence estimations</p></li>
<li><p><strong>test</strong> the test set on which to perform NPP</p></li>
<li><p><strong>sample_size</strong> integer, the size of the samples</p></li>
<li><p><strong>repeats</strong> integer, the number of samples to generate</p></li>
<li><p><strong>n_jobs</strong> integer, number of jobs to be run in parallel (default 1)</p></li>
<li><p><strong>random_seed</strong> allows to replicate the samplings. The seed is local to the method and does not affect
any other random process (default 42)</p></li>
<li><p><strong>error_metric</strong> a string indicating the name of the error (as defined in <a class="reference internal" href="#module-quapy.error" title="quapy.error"><code class="xref py py-mod docutils literal notranslate"><span class="pre">quapy.error</span></code></a>) or a
callable error function</p></li>
<li><p><strong>verbose</strong> if True, shows a progress bar</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>yields one sample at a time</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.evaluation.natural_prevalence_report">
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">natural_prevalence_report</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_repetitions</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">42</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">error_metrics</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Iterable</span><span class="p"><span class="pre">[</span></span><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span> </span><span class="pre">Callable</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">'mae'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.natural_prevalence_report" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<span class="sig-prename descclassname"><span class="pre">quapy.evaluation.</span></span><span class="sig-name descname"><span class="pre">natural_prevalence_report</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.method.html#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><span class="pre">quapy.method.base.BaseQuantifier</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">test</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">quapy.data.base.LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_seed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">42</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">error_metrics</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">Iterable</span><span class="p"><span class="pre">[</span></span><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span> </span><span class="pre">Callable</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">'mae'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.evaluation.natural_prevalence_report" title="Permalink to this definition"></a></dt>
<dd><p>Generates an evaluation report for all samples generated according to the Natural Prevalence Protocol (NPP).
The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural
prevalence of the collection.
Te report takes the form of a
pandas <a class="reference external" href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html">dataframe</a>
in which the rows correspond to different samples, and the columns inform of the true prevalence values,
the estimated prevalence values, and the score obtained by each of the evaluation measures indicated.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>model</strong> the model in charge of generating the class prevalence estimations</p></li>
<li><p><strong>test</strong> the test set on which to perform NPP</p></li>
<li><p><strong>sample_size</strong> integer, the size of the samples</p></li>
<li><p><strong>repeats</strong> integer, the number of samples to generate</p></li>
<li><p><strong>n_jobs</strong> integer, number of jobs to be run in parallel (default 1)</p></li>
<li><p><strong>random_seed</strong> allows to replicate the samplings. The seed is local to the method and does not affect
any other random process (default 42)</p></li>
<li><p><strong>error_metrics</strong> a string indicating the name of the error (as defined in <a class="reference internal" href="#module-quapy.error" title="quapy.error"><code class="xref py py-mod docutils literal notranslate"><span class="pre">quapy.error</span></code></a>) or a
callable error function; optionally, a list of strings or callables can be indicated, if the results
are to be evaluated with more than one error metric. Default is “mae”</p></li>
<li><p><strong>verbose</strong> if True, shows a progress bar</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>a tuple containing two <cite>np.ndarrays</cite> of shape <cite>(m,n,)</cite> with <cite>m</cite> the number of samples
<cite>(repeats)</cite> and <cite>n</cite> the number of classes. The first one contains the true prevalence values
for the samples generated while the second one contains the prevalence estimations</p>
</dd>
</dl>
</dd></dl>
</section>
<section id="module-quapy.functional">
@ -597,12 +791,44 @@ contains the the prevalence estimations</p>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.functional.HellingerDistance">
<span class="sig-prename descclassname"><span class="pre">quapy.functional.</span></span><span class="sig-name descname"><span class="pre">HellingerDistance</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">P</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">Q</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.functional.HellingerDistance" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Computes the Hellingher Distance (HD) between (discretized) distributions <cite>P</cite> and <cite>Q</cite>.
The HD for two discrete distributions of <cite>k</cite> bins is defined as:</p>
<div class="math notranslate nohighlight">
\[HD(P,Q) = \frac{ 1 }{ \sqrt{ 2 } } \sqrt{ \sum_{i=1}^k ( \sqrt{p_i} - \sqrt{q_i} )^2 }\]</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>P</strong> real-valued array-like of shape <cite>(k,)</cite> representing a discrete distribution</p></li>
<li><p><strong>Q</strong> real-valued array-like of shape <cite>(k,)</cite> representing a discrete distribution</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>float</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.functional.adjusted_quantification">
<span class="sig-prename descclassname"><span class="pre">quapy.functional.</span></span><span class="sig-name descname"><span class="pre">adjusted_quantification</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevalence_estim</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tpr</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fpr</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">clip</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.functional.adjusted_quantification" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Implements the adjustment of ACC and PACC for the binary case. The adjustment for a prevalence estimate of the
positive class <cite>p</cite> comes down to computing:</p>
<div class="math notranslate nohighlight">
\[ACC(p) = \frac{ p - fpr }{ tpr - fpr }\]</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevalence_estim</strong> float, the estimated value for the positive class</p></li>
<li><p><strong>tpr</strong> float, the true positive rate of the classifier</p></li>
<li><p><strong>fpr</strong> float, the false positive rate of the classifier</p></li>
<li><p><strong>clip</strong> set to True (default) to clip values that might exceed the range [0,1]</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>float, the adjusted count</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.functional.artificial_prevalence_sampling">
@ -626,7 +852,7 @@ constrained dimension</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>an ndarray of shape <cite>(n, dimensions)</cite> if <cite>return_constrained_dim=True</cite> or of shape <cite>(n, dimensions-1)</cite>
<dd class="field-even"><p>a <cite>np.ndarray</cite> of shape <cite>(n, dimensions)</cite> if <cite>return_constrained_dim=True</cite> or of shape <cite>(n, dimensions-1)</cite>
if <cite>return_constrained_dim=False</cite>, where <cite>n</cite> is the number of valid combinations found in the grid multiplied
by <cite>repeat</cite></p>
</dd>
@ -636,30 +862,63 @@ by <cite>repeat</cite></p>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.functional.get_nprevpoints_approximation">
<span class="sig-prename descclassname"><span class="pre">quapy.functional.</span></span><span class="sig-name descname"><span class="pre">get_nprevpoints_approximation</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">combinations_budget</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_classes</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_repeats</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">int</span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.functional.get_nprevpoints_approximation" title="Permalink to this definition"></a></dt>
<dd><p>Searches for the largest number of (equidistant) prevalence points to define for each of the n_classes classes so that
the number of valid prevalences generated as combinations of prevalence points (points in a n_classes-dimensional
simplex) do not exceed combinations_budget.
:param n_classes: number of classes
:param n_repeats: number of repetitions for each prevalence combination
:param combinations_budget: maximum number of combinatios allowed
:return: the largest number of prevalence points that generate less than combinations_budget valid prevalences</p>
<dd><p>Searches for the largest number of (equidistant) prevalence points to define for each of the <cite>n_classes</cite> classes so
that the number of valid prevalence values generated as combinations of prevalence points (points in a
<cite>n_classes</cite>-dimensional simplex) do not exceed combinations_budget.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>combinations_budget</strong> integer, maximum number of combinatios allowed</p></li>
<li><p><strong>n_classes</strong> integer, number of classes</p></li>
<li><p><strong>n_repeats</strong> integer, number of repetitions for each prevalence combination</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>the largest number of prevalence points that generate less than combinations_budget valid prevalences</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.functional.normalize_prevalence">
<span class="sig-prename descclassname"><span class="pre">quapy.functional.</span></span><span class="sig-name descname"><span class="pre">normalize_prevalence</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevalences</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.functional.normalize_prevalence" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Normalize a vector or matrix of prevalence values. The normalization consists of applying a L1 normalization in
cases in which the prevalence values are not all-zeros, and to convert the prevalence values into <cite>1/n_classes</cite> in
cases in which all values are zero.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>prevalences</strong> array-like of shape <cite>(n_classes,)</cite> or of shape <cite>(n_samples, n_classes,)</cite> with prevalence values</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>a normalized vector or matrix of prevalence values</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.functional.num_prevalence_combinations">
<span class="sig-prename descclassname"><span class="pre">quapy.functional.</span></span><span class="sig-name descname"><span class="pre">num_prevalence_combinations</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n_prevpoints</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_classes</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_repeats</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">int</span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.functional.num_prevalence_combinations" title="Permalink to this definition"></a></dt>
<dd><p>Computes the number of prevalence combinations in the n_classes-dimensional simplex if nprevpoints equally distant
prevalences are generated and n_repeats repetitions are requested
:param n_classes: number of classes
:param n_prevpoints: number of prevalence points.
:param n_repeats: number of repetitions for each prevalence combination
:return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the
number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]</p>
<dd><p>Computes the number of valid prevalence combinations in the n_classes-dimensional simplex if <cite>n_prevpoints</cite> equally
distant prevalence values are generated and <cite>n_repeats</cite> repetitions are requested.
The computation comes down to calculating:</p>
<div class="math notranslate nohighlight">
\[\binom{N+C-1}{C-1} \times r\]</div>
<p>where <cite>N</cite> is <cite>n_prevpoints-1</cite>, i.e., the number of probability mass blocks to allocate, <cite>C</cite> is the number of
classes, and <cite>r</cite> is <cite>n_repeats</cite>. This solution comes from the
<a class="reference external" href="https://brilliant.org/wiki/integer-equations-star-and-bars/">Stars and Bars</a> problem.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>n_classes</strong> integer, number of classes</p></li>
<li><p><strong>n_prevpoints</strong> integer, number of prevalence points.</p></li>
<li><p><strong>n_repeats</strong> integer, number of repetitions for each prevalence combination</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the</p>
</dd>
</dl>
<p>number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]</p>
</dd></dl>
<dl class="py function">
@ -683,19 +942,33 @@ some classes have no examples.</p></li>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.functional.prevalence_from_probabilities">
<span class="sig-prename descclassname"><span class="pre">quapy.functional.</span></span><span class="sig-name descname"><span class="pre">prevalence_from_probabilities</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">posteriors</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">binarize</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">bool</span></span> <span class="o"><span class="pre">=</span></span> <span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.functional.prevalence_from_probabilities" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Returns a vector of prevalence values from a matrix of posterior probabilities.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>posteriors</strong> array-like of shape <cite>(n_instances, n_classes,)</cite> with posterior probabilities for each class</p></li>
<li><p><strong>binarize</strong> set to True (default is False) for computing the prevalence values on crisp decisions (i.e.,
converting the vectors of posterior probabilities into class indices, by taking the argmax).</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>array of shape <cite>(n_classes,)</cite> containing the prevalence values</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.functional.prevalence_linspace">
<span class="sig-prename descclassname"><span class="pre">quapy.functional.</span></span><span class="sig-name descname"><span class="pre">prevalence_linspace</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n_prevalences</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">21</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeat</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">smooth_limits_epsilon</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.01</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.functional.prevalence_linspace" title="Permalink to this definition"></a></dt>
<dd><p>Produces a uniformly separated values of prevalence. By default, produces an array of 21 prevalence values, with
<span class="sig-prename descclassname"><span class="pre">quapy.functional.</span></span><span class="sig-name descname"><span class="pre">prevalence_linspace</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n_prevalences</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">21</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">smooth_limits_epsilon</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.01</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.functional.prevalence_linspace" title="Permalink to this definition"></a></dt>
<dd><p>Produces an array of uniformly separated values of prevalence.
By default, produces an array of 21 prevalence values, with
step 0.05 and with the limits smoothed, i.e.:
[0.01, 0.05, 0.10, 0.15, …, 0.90, 0.95, 0.99]</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>n_prevalences</strong> the number of prevalence values to sample from the [0,1] interval (default 21)</p></li>
<li><p><strong>repeat</strong> number of times each prevalence is to be repeated (defaults to 1)</p></li>
<li><p><strong>repeats</strong> number of times each prevalence is to be repeated (defaults to 1)</p></li>
<li><p><strong>smooth_limits_epsilon</strong> the quantity to add and subtract to the limits 0 and 1</p></li>
</ul>
</dd>
@ -708,17 +981,61 @@ step 0.05 and with the limits smoothed, i.e.:
<dl class="py function">
<dt class="sig sig-object py" id="quapy.functional.strprev">
<span class="sig-prename descclassname"><span class="pre">quapy.functional.</span></span><span class="sig-name descname"><span class="pre">strprev</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevalences</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prec</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">3</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.functional.strprev" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Returns a string representation for a prevalence vector. E.g.,</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">strprev</span><span class="p">([</span><span class="mi">1</span><span class="o">/</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="o">/</span><span class="mi">3</span><span class="p">],</span> <span class="n">prec</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="s1">&#39;[0.33, 0.67]&#39;</span>
</pre></div>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevalences</strong> a vector of prevalence values</p></li>
<li><p><strong>prec</strong> float precision</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>string</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.functional.uniform_prevalence_sampling">
<span class="sig-prename descclassname"><span class="pre">quapy.functional.</span></span><span class="sig-name descname"><span class="pre">uniform_prevalence_sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n_classes</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.functional.uniform_prevalence_sampling" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Implements the <a class="reference external" href="http://www.cs.cmu.edu/~nasmith/papers/smith+tromble.tr04.pdf">Kraemer algorithm</a>
for sampling uniformly at random from the unit simplex. This implementation is adapted from this
<cite>post &lt;https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex&gt;_</cite>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>n_classes</strong> integer, number of classes (dimensionality of the simplex)</p></li>
<li><p><strong>size</strong> number of samples to return</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><cite>np.ndarray</cite> of shape <cite>(size, n_classes,)</cite> if <cite>size&gt;1</cite>, or of shape <cite>(n_classes,)</cite> otherwise</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.functional.uniform_simplex_sampling">
<span class="sig-prename descclassname"><span class="pre">quapy.functional.</span></span><span class="sig-name descname"><span class="pre">uniform_simplex_sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n_classes</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.functional.uniform_simplex_sampling" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Implements the <a class="reference external" href="http://www.cs.cmu.edu/~nasmith/papers/smith+tromble.tr04.pdf">Kraemer algorithm</a>
for sampling uniformly at random from the unit simplex. This implementation is adapted from this
<cite>post &lt;https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex&gt;_</cite>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>n_classes</strong> integer, number of classes (dimensionality of the simplex)</p></li>
<li><p><strong>size</strong> number of samples to return</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><cite>np.ndarray</cite> of shape <cite>(size, n_classes,)</cite> if <cite>size&gt;1</cite>, or of shape <cite>(n_classes,)</cite> otherwise</p>
</dd>
</dl>
</dd></dl>
</section>
<section id="module-quapy.model_selection">
@ -1147,7 +1464,7 @@ func is applied in two parallel processes to args[0:50] and to args[50:99]</p>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.util.pickled_resource">
<span class="sig-prename descclassname"><span class="pre">quapy.util.</span></span><span class="sig-name descname"><span class="pre">pickled_resource</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">pickle_path</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">generation_func</span></span><span class="p"><span class="pre">:</span></span> <span class="n"><span class="pre">callable</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.util.pickled_resource" title="Permalink to this definition"></a></dt>
<dd><p>Allows for fast reuse of resources that are generated only once by calling generation_func(<a href="#id1"><span class="problematic" id="id2">*</span></a>args). The next times
<dd><p>Allows for fast reuse of resources that are generated only once by calling generation_func(<a href="#id4"><span class="problematic" id="id5">*</span></a>args). The next times
this function is invoked, it loads the pickled resource. Example:</p>
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="k">def</span> <span class="nf">some_array</span><span class="p">(</span><span class="n">n</span><span class="p">):</span> <span class="c1"># a mock resource created with one parameter (`n`)</span>
<span class="gp">&gt;&gt;&gt; </span> <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>

File diff suppressed because one or more lines are too long

View File

@ -214,11 +214,12 @@ class LabelledCollection:
def artificial_sampling_generator(self, sample_size, n_prevalences=101, repeats=1):
"""
A generator of samples that implements the artificial prevalence protocol (APP). The APP consists of exploring
a grid of prevalence values (e.g., [0, 0.05, 0.1, 0.15, ..., 1]), and generating all valid combinations of
A generator of samples that implements the artificial prevalence protocol (APP).
The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g.,
[0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of
prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ...,
[1, 0, 0] prevalence values of size `sample_size` will be yielded). The number of samples for each valid
combination of prevalence values is indicated by `repeats`
combination of prevalence values is indicated by `repeats`.
:param sample_size: the number of instances in each sample
:param n_prevalences: the number of prevalence points to be taken from the [0,1] interval (including the

View File

@ -1,7 +1,7 @@
from typing import Union, Callable, Iterable
import numpy as np
from tqdm import tqdm
import inspect
import quapy as qp
from quapy.data import LabelledCollection
@ -9,44 +9,49 @@ from quapy.method.base import BaseQuantifier
from quapy.util import temp_seed
import quapy.functional as F
import pandas as pd
import inspect
def artificial_prevalence_prediction(
model: BaseQuantifier,
test: LabelledCollection,
sample_size,
n_prevpoints=210,
n_repetitions=1,
n_prevpoints=101,
repeats=1,
eval_budget: int = None,
n_jobs=1,
random_seed=42,
verbose=False):
"""
Performs the predictions for all samples generated according to the artificial sampling protocol.
Performs the predictions for all samples generated according to the Artificial Prevalence Protocol (APP).
The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g.,
[0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of
prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ...,
[1, 0, 0] prevalence values of size `sample_size` will be considered). The number of samples for each valid
combination of prevalence values is indicated by `repeats`.
:param model: the model in charge of generating the class prevalence estimations
:param test: the test set on which to perform arificial sampling
:param sample_size: the size of the samples
:param n_prevpoints: the number of different prevalences to sample (or set to None if eval_budget is specified)
:param n_repetitions: the number of repetitions for each prevalence
:param eval_budget: if specified, sets a ceil on the number of evaluations to perform. For example, if there are 3
classes, n_repetitions=1 and eval_budget=20, then n_prevpoints will be set to 5, since this will generate 15
different prevalences ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and since setting it n_prevpoints
to 6 would produce more than 20 evaluations.
:param n_jobs: number of jobs to be run in parallel
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
any other random process.
:param test: the test set on which to perform APP
:param sample_size: integer, the size of the samples
:param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget
is specified; default 101, i.e., steps of 1%)
:param repeats: integer, the number of repetitions for each prevalence (default 1)
:param eval_budget: integer, if specified, sets a ceil on the number of evaluations to perform. For example, if
there are 3 classes, `repeats=1`, and `eval_budget=20`, then `n_prevpoints` will be set to 5, since this
will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and
since setting `n_prevpoints=6` would produce more than 20 evaluations.
:param n_jobs: integer, number of jobs to be run in parallel (default 1)
:param random_seed: integer, allows to replicate the samplings. The seed is local to the method and does not affect
any other random process (default 42)
:param verbose: if True, shows a progress bar
:return: two ndarrays of shape (m,n) with m the number of samples (n_prevpoints*n_repetitions) and n the
number of classes. The first one contains the true prevalences for the samples generated while the second one
contains the the prevalence estimations
:return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples
`(n_prevpoints*repeats)` and `n` the number of classes. The first one contains the true prevalence values
for the samples generated while the second one contains the prevalence estimations
"""
n_prevpoints, _ = qp.evaluation._check_num_evals(test.n_classes, n_prevpoints, eval_budget, n_repetitions, verbose)
n_prevpoints, _ = qp.evaluation._check_num_evals(test.n_classes, n_prevpoints, eval_budget, repeats, verbose)
with temp_seed(random_seed):
indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions))
indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, repeats))
return _predict_from_indexes(indexes, model, test, n_jobs, verbose)
@ -55,32 +60,48 @@ def natural_prevalence_prediction(
model: BaseQuantifier,
test: LabelledCollection,
sample_size,
n_repetitions=1,
repeats,
n_jobs=1,
random_seed=42,
verbose=False):
"""
Performs the predictions for all samples generated according to the artificial sampling protocol.
Performs the predictions for all samples generated according to the Natural Prevalence Protocol (NPP).
The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural
prevalence of the collection.
:param model: the model in charge of generating the class prevalence estimations
:param test: the test set on which to perform arificial sampling
:param sample_size: the size of the samples
:param n_repetitions: the number of repetitions for each prevalence
:param n_jobs: number of jobs to be run in parallel
:param test: the test set on which to perform NPP
:param sample_size: integer, the size of the samples
:param repeats: integer, the number of samples to generate
:param n_jobs: integer, number of jobs to be run in parallel (default 1)
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
any other random process.
any other random process (default 42)
:param verbose: if True, shows a progress bar
:return: two ndarrays of shape (m,n) with m the number of samples (n_repetitions) and n the
number of classes. The first one contains the true prevalences for the samples generated while the second one
contains the the prevalence estimations
:return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples
`(repeats)` and `n` the number of classes. The first one contains the true prevalence values
for the samples generated while the second one contains the prevalence estimations
"""
with temp_seed(random_seed):
indexes = list(test.natural_sampling_index_generator(sample_size, n_repetitions))
indexes = list(test.natural_sampling_index_generator(sample_size, repeats))
return _predict_from_indexes(indexes, model, test, n_jobs, verbose)
def gen_prevalence_prediction(model: BaseQuantifier, gen_fn: Callable, eval_budget=None):
"""
Generates prevalence predictions for a custom protocol defined as a generator function that yields
samples at each iteration. The sequence of samples is processed exhaustively if `eval_budget=None`
or up to the `eval_budget` iterations if specified.
:param model: the model in charge of generating the class prevalence estimations
:param gen_fn: a generator function yielding one sample at each iteration
:param eval_budget: a maximum number of evaluations to run. Set to None (default) for exploring the
entire sequence
:return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples
generated and `n` the number of classes. The first one contains the true prevalence values
for the samples generated while the second one contains the prevalence estimations
"""
if not inspect.isgenerator(gen_fn()):
raise ValueError('param "gen_fun" is not a callable returning a generator')
@ -142,16 +163,49 @@ def artificial_prevalence_report(
model: BaseQuantifier,
test: LabelledCollection,
sample_size,
n_prevpoints=210,
n_repetitions=1,
n_prevpoints=101,
repeats=1,
eval_budget: int = None,
n_jobs=1,
random_seed=42,
error_metrics:Iterable[Union[str,Callable]]='mae',
verbose=False):
"""
Generates an evaluation report for all samples generated according to the Artificial Prevalence Protocol (APP).
The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g.,
[0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of
prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ...,
[1, 0, 0] prevalence values of size `sample_size` will be considered). The number of samples for each valid
combination of prevalence values is indicated by `repeats`.
Te report takes the form of a
pandas' `dataframe <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_
in which the rows correspond to different samples, and the columns inform of the true prevalence values,
the estimated prevalence values, and the score obtained by each of the evaluation measures indicated.
:param model: the model in charge of generating the class prevalence estimations
:param test: the test set on which to perform APP
:param sample_size: integer, the size of the samples
:param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget
is specified; default 101, i.e., steps of 1%)
:param repeats: integer, the number of repetitions for each prevalence (default 1)
:param eval_budget: integer, if specified, sets a ceil on the number of evaluations to perform. For example, if
there are 3 classes, `repeats=1`, and `eval_budget=20`, then `n_prevpoints` will be set to 5, since this
will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and
since setting `n_prevpoints=6` would produce more than 20 evaluations.
:param n_jobs: integer, number of jobs to be run in parallel (default 1)
:param random_seed: integer, allows to replicate the samplings. The seed is local to the method and does not affect
any other random process (default 42)
:param error_metrics: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a
callable error function; optionally, a list of strings or callables can be indicated, if the results
are to be evaluated with more than one error metric. Default is "mae"
:param verbose: if True, shows a progress bar
:return: pandas' dataframe with rows corresponding to different samples, and with columns informing of the
true prevalence values, the estimated prevalence values, and the score obtained by each of the evaluation
measures indicated.
"""
true_prevs, estim_prevs = artificial_prevalence_prediction(
model, test, sample_size, n_prevpoints, n_repetitions, eval_budget, n_jobs, random_seed, verbose
model, test, sample_size, n_prevpoints, repeats, eval_budget, n_jobs, random_seed, verbose
)
return _prevalence_report(true_prevs, estim_prevs, error_metrics)
@ -160,18 +214,66 @@ def natural_prevalence_report(
model: BaseQuantifier,
test: LabelledCollection,
sample_size,
n_repetitions=1,
repeats=1,
n_jobs=1,
random_seed=42,
error_metrics:Iterable[Union[str,Callable]]='mae',
verbose=False):
"""
Generates an evaluation report for all samples generated according to the Natural Prevalence Protocol (NPP).
The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural
prevalence of the collection.
Te report takes the form of a
pandas' `dataframe <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_
in which the rows correspond to different samples, and the columns inform of the true prevalence values,
the estimated prevalence values, and the score obtained by each of the evaluation measures indicated.
:param model: the model in charge of generating the class prevalence estimations
:param test: the test set on which to perform NPP
:param sample_size: integer, the size of the samples
:param repeats: integer, the number of samples to generate
:param n_jobs: integer, number of jobs to be run in parallel (default 1)
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
any other random process (default 42)
:param error_metrics: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a
callable error function; optionally, a list of strings or callables can be indicated, if the results
are to be evaluated with more than one error metric. Default is "mae"
:param verbose: if True, shows a progress bar
:return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples
`(repeats)` and `n` the number of classes. The first one contains the true prevalence values
for the samples generated while the second one contains the prevalence estimations
"""
true_prevs, estim_prevs = natural_prevalence_prediction(
model, test, sample_size, n_repetitions, n_jobs, random_seed, verbose
model, test, sample_size, repeats, n_jobs, random_seed, verbose
)
return _prevalence_report(true_prevs, estim_prevs, error_metrics)
def gen_prevalence_report(model: BaseQuantifier, gen_fn: Callable, eval_budget=None,
error_metrics:Iterable[Union[str,Callable]]='mae'):
"""
GGenerates an evaluation report for a custom protocol defined as a generator function that yields
samples at each iteration. The sequence of samples is processed exhaustively if `eval_budget=None`
or up to the `eval_budget` iterations if specified.
Te report takes the form of a
pandas' `dataframe <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_
in which the rows correspond to different samples, and the columns inform of the true prevalence values,
the estimated prevalence values, and the score obtained by each of the evaluation measures indicated.
:param model: the model in charge of generating the class prevalence estimations
:param gen_fn: a generator function yielding one sample at each iteration
:param eval_budget: a maximum number of evaluations to run. Set to None (default) for exploring the
entire sequence
:return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples
generated. The first one contains the true prevalence values
for the samples generated while the second one contains the prevalence estimations
"""
true_prevs, estim_prevs = gen_prevalence_prediction(model, gen_fn, eval_budget)
return _prevalence_report(true_prevs, estim_prevs, error_metrics)
def _prevalence_report(
true_prevs,
estim_prevs,
@ -199,13 +301,39 @@ def artificial_prevalence_protocol(
model: BaseQuantifier,
test: LabelledCollection,
sample_size,
n_prevpoints=210,
n_repetitions=1,
n_prevpoints=101,
repeats=1,
eval_budget: int = None,
n_jobs=1,
random_seed=42,
error_metric:Union[str,Callable]='mae',
verbose=False):
"""
Generates samples according to the Artificial Prevalence Protocol (APP).
The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g.,
[0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of
prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ...,
[1, 0, 0] prevalence values of size `sample_size` will be considered). The number of samples for each valid
combination of prevalence values is indicated by `repeats`.
:param model: the model in charge of generating the class prevalence estimations
:param test: the test set on which to perform APP
:param sample_size: integer, the size of the samples
:param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget
is specified; default 101, i.e., steps of 1%)
:param repeats: integer, the number of repetitions for each prevalence (default 1)
:param eval_budget: integer, if specified, sets a ceil on the number of evaluations to perform. For example, if
there are 3 classes, `repeats=1`, and `eval_budget=20`, then `n_prevpoints` will be set to 5, since this
will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and
since setting `n_prevpoints=6` would produce more than 20 evaluations.
:param n_jobs: integer, number of jobs to be run in parallel (default 1)
:param random_seed: integer, allows to replicate the samplings. The seed is local to the method and does not affect
any other random process (default 42)
:param error_metric: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a
callable error function
:param verbose: set to True (default False) for displaying some information on standard output
:return: yields one sample at a time
"""
if isinstance(error_metric, str):
error_metric = qp.error.from_name(error_metric)
@ -213,7 +341,7 @@ def artificial_prevalence_protocol(
assert hasattr(error_metric, '__call__'), 'invalid error function'
true_prevs, estim_prevs = artificial_prevalence_prediction(
model, test, sample_size, n_prevpoints, n_repetitions, eval_budget, n_jobs, random_seed, verbose
model, test, sample_size, n_prevpoints, repeats, eval_budget, n_jobs, random_seed, verbose
)
return error_metric(true_prevs, estim_prevs)
@ -223,11 +351,28 @@ def natural_prevalence_protocol(
model: BaseQuantifier,
test: LabelledCollection,
sample_size,
n_repetitions=1,
repeats=1,
n_jobs=1,
random_seed=42,
error_metric:Union[str,Callable]='mae',
verbose=False):
"""
Generates samples according to the Natural Prevalence Protocol (NPP).
The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural
prevalence of the collection.
:param model: the model in charge of generating the class prevalence estimations
:param test: the test set on which to perform NPP
:param sample_size: integer, the size of the samples
:param repeats: integer, the number of samples to generate
:param n_jobs: integer, number of jobs to be run in parallel (default 1)
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
any other random process (default 42)
:param error_metric: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a
callable error function
:param verbose: if True, shows a progress bar
:return: yields one sample at a time
"""
if isinstance(error_metric, str):
error_metric = qp.error.from_name(error_metric)
@ -235,16 +380,26 @@ def natural_prevalence_protocol(
assert hasattr(error_metric, '__call__'), 'invalid error function'
true_prevs, estim_prevs = natural_prevalence_prediction(
model, test, sample_size, n_repetitions, n_jobs, random_seed, verbose
model, test, sample_size, repeats, n_jobs, random_seed, verbose
)
return error_metric(true_prevs, estim_prevs)
def evaluate(model: BaseQuantifier, test_samples:Iterable[LabelledCollection], err:Union[str, Callable], n_jobs:int=-1):
if isinstance(err, str):
err = qp.error.from_name(err)
scores = qp.util.parallel(_delayed_eval, ((model, Ti, err) for Ti in test_samples), n_jobs=n_jobs)
def evaluate(model: BaseQuantifier, test_samples:Iterable[LabelledCollection], error_metric:Union[str, Callable], n_jobs:int=-1):
"""
Evaluates a model on a sequence of test samples in terms of a given error metric.
:param model: the model in charge of generating the class prevalence estimations
:param test_samples: an iterable yielding one sample at a time
:param error_metric: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a
callable error function
:param n_jobs: integer, number of jobs to be run in parallel (default 1)
:return: the score obtained using `error_metric`
"""
if isinstance(error_metric, str):
error_metric = qp.error.from_name(error_metric)
scores = qp.util.parallel(_delayed_eval, ((model, Ti, error_metric) for Ti in test_samples), n_jobs=n_jobs)
return np.mean(scores)
@ -255,27 +410,27 @@ def _delayed_eval(args):
return error(prev_true, prev_estim)
def _check_num_evals(n_classes, n_prevpoints=None, eval_budget=None, n_repetitions=1, verbose=False):
def _check_num_evals(n_classes, n_prevpoints=None, eval_budget=None, repeats=1, verbose=False):
if n_prevpoints is None and eval_budget is None:
raise ValueError('either n_prevpoints or eval_budget has to be specified')
elif n_prevpoints is None:
assert eval_budget > 0, 'eval_budget must be a positive integer'
n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, repeats)
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats)
if verbose:
print(f'setting n_prevpoints={n_prevpoints} so that the number of '
f'evaluations ({eval_computations}) does not exceed the evaluation '
f'budget ({eval_budget})')
elif eval_budget is None:
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats)
if verbose:
print(f'{eval_computations} evaluations will be performed for each '
f'combination of hyper-parameters')
else:
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats)
if eval_computations > eval_budget:
n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, n_repetitions)
new_eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, n_repetitions)
n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, repeats)
new_eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats)
if verbose:
print(f'the budget of evaluations would be exceeded with '
f'n_prevpoints={n_prevpoints}. Chaning to n_prevpoints={n_prevpoints}. This will produce '

View File

@ -1,6 +1,6 @@
import itertools
from collections import defaultdict
import scipy
import numpy as np
@ -20,7 +20,7 @@ def artificial_prevalence_sampling(dimensions, n_prevalences=21, repeat=1, retur
:param repeat: number of copies for each valid prevalence vector (default is 1)
:param return_constrained_dim: set to True to return all dimensions, or to False (default) for ommitting the
constrained dimension
:return: an ndarray of shape `(n, dimensions)` if `return_constrained_dim=True` or of shape `(n, dimensions-1)`
:return: a `np.ndarray` of shape `(n, dimensions)` if `return_constrained_dim=True` or of shape `(n, dimensions-1)`
if `return_constrained_dim=False`, where `n` is the number of valid combinations found in the grid multiplied
by `repeat`
"""
@ -35,14 +35,15 @@ def artificial_prevalence_sampling(dimensions, n_prevalences=21, repeat=1, retur
return prevs
def prevalence_linspace(n_prevalences=21, repeat=1, smooth_limits_epsilon=0.01):
def prevalence_linspace(n_prevalences=21, repeats=1, smooth_limits_epsilon=0.01):
"""
Produces a uniformly separated values of prevalence. By default, produces an array of 21 prevalence values, with
Produces an array of uniformly separated values of prevalence.
By default, produces an array of 21 prevalence values, with
step 0.05 and with the limits smoothed, i.e.:
[0.01, 0.05, 0.10, 0.15, ..., 0.90, 0.95, 0.99]
:param n_prevalences: the number of prevalence values to sample from the [0,1] interval (default 21)
:param repeat: number of times each prevalence is to be repeated (defaults to 1)
:param repeats: number of times each prevalence is to be repeated (defaults to 1)
:param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1
:return: an array of uniformly separated prevalence values
"""
@ -51,8 +52,8 @@ def prevalence_linspace(n_prevalences=21, repeat=1, smooth_limits_epsilon=0.01):
p[-1] -= smooth_limits_epsilon
if p[0] > p[1]:
raise ValueError(f'the smoothing in the limits is greater than the prevalence step')
if repeat > 1:
p = np.repeat(p, repeat)
if repeats > 1:
p = np.repeat(p, repeats)
return p
@ -75,6 +76,14 @@ def prevalence_from_labels(labels, classes):
def prevalence_from_probabilities(posteriors, binarize: bool = False):
"""
Returns a vector of prevalence values from a matrix of posterior probabilities.
:param posteriors: array-like of shape `(n_instances, n_classes,)` with posterior probabilities for each class
:param binarize: set to True (default is False) for computing the prevalence values on crisp decisions (i.e.,
converting the vectors of posterior probabilities into class indices, by taking the argmax).
:return: array of shape `(n_classes,)` containing the prevalence values
"""
if posteriors.ndim != 2:
raise ValueError(f'param posteriors does not seem to be a ndarray of posteior probabilities')
if binarize:
@ -87,15 +96,34 @@ def prevalence_from_probabilities(posteriors, binarize: bool = False):
def HellingerDistance(P, Q):
"""
Computes the Hellingher Distance (HD) between (discretized) distributions `P` and `Q`.
The HD for two discrete distributions of `k` bins is defined as:
.. math::
HD(P,Q) = \\frac{ 1 }{ \\sqrt{ 2 } } \\sqrt{ \sum_{i=1}^k ( \\sqrt{p_i} - \\sqrt{q_i} )^2 }
:param P: real-valued array-like of shape `(k,)` representing a discrete distribution
:param Q: real-valued array-like of shape `(k,)` representing a discrete distribution
:return: float
"""
return np.sqrt(np.sum((np.sqrt(P) - np.sqrt(Q))**2))
def uniform_prevalence_sampling(n_classes, size=1):
"""
Implements the `Kraemer algorithm <http://www.cs.cmu.edu/~nasmith/papers/smith+tromble.tr04.pdf>`_
for sampling uniformly at random from the unit simplex. This implementation is adapted from this
`post <https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex>_`.
:param n_classes: integer, number of classes (dimensionality of the simplex)
:param size: number of samples to return
:return: `np.ndarray` of shape `(size, n_classes,)` if `size>1`, or of shape `(n_classes,)` otherwise
"""
if n_classes == 2:
u = np.random.rand(size)
u = np.vstack([1-u, u]).T
else:
# from https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex
u = np.random.rand(size, n_classes-1)
u.sort(axis=-1)
_0s = np.zeros(shape=(size, 1))
@ -106,15 +134,41 @@ def uniform_prevalence_sampling(n_classes, size=1):
if size == 1:
u = u.flatten()
return u
#return np.asarray([uniform_simplex_sampling(n_classes) for _ in range(size)])
uniform_simplex_sampling = uniform_prevalence_sampling
def strprev(prevalences, prec=3):
"""
Returns a string representation for a prevalence vector. E.g.,
>>> strprev([1/3, 2/3], prec=2)
>>> '[0.33, 0.67]'
:param prevalences: a vector of prevalence values
:param prec: float precision
:return: string
"""
return '['+ ', '.join([f'{p:.{prec}f}' for p in prevalences]) + ']'
def adjusted_quantification(prevalence_estim, tpr, fpr, clip=True):
"""
Implements the adjustment of ACC and PACC for the binary case. The adjustment for a prevalence estimate of the
positive class `p` comes down to computing:
.. math::
ACC(p) = \\frac{ p - fpr }{ tpr - fpr }
:param prevalence_estim: float, the estimated value for the positive class
:param tpr: float, the true positive rate of the classifier
:param fpr: float, the false positive rate of the classifier
:param clip: set to True (default) to clip values that might exceed the range [0,1]
:return: float, the adjusted count
"""
den = tpr - fpr
if den == 0:
den += 1e-8
@ -125,6 +179,14 @@ def adjusted_quantification(prevalence_estim, tpr, fpr, clip=True):
def normalize_prevalence(prevalences):
"""
Normalize a vector or matrix of prevalence values. The normalization consists of applying a L1 normalization in
cases in which the prevalence values are not all-zeros, and to convert the prevalence values into `1/n_classes` in
cases in which all values are zero.
:param prevalences: array-like of shape `(n_classes,)` or of shape `(n_samples, n_classes,)` with prevalence values
:return: a normalized vector or matrix of prevalence values
"""
prevalences = np.asarray(prevalences)
n_classes = prevalences.shape[-1]
accum = prevalences.sum(axis=-1, keepdims=True)
@ -138,13 +200,14 @@ def normalize_prevalence(prevalences):
return prevalences
def num_prevalence_combinations(n_prevpoints:int, n_classes:int, n_repeats:int=1):
def __num_prevalence_combinations_depr(n_prevpoints:int, n_classes:int, n_repeats:int=1):
"""
Computes the number of prevalence combinations in the n_classes-dimensional simplex if nprevpoints equally distant
prevalences are generated and n_repeats repetitions are requested
:param n_classes: number of classes
:param n_prevpoints: number of prevalence points.
:param n_repeats: number of repetitions for each prevalence combination
Computes the number of prevalence combinations in the n_classes-dimensional simplex if `nprevpoints` equally distant
prevalence values are generated and `n_repeats` repetitions are requested.
:param n_classes: integer, number of classes
:param n_prevpoints: integer, number of prevalence points.
:param n_repeats: integer, number of repetitions for each prevalence combination
:return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the
number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]
"""
@ -161,14 +224,40 @@ def num_prevalence_combinations(n_prevpoints:int, n_classes:int, n_repeats:int=1
return __f(n_classes, n_prevpoints) * n_repeats
def num_prevalence_combinations(n_prevpoints:int, n_classes:int, n_repeats:int=1):
"""
Computes the number of valid prevalence combinations in the n_classes-dimensional simplex if `n_prevpoints` equally
distant prevalence values are generated and `n_repeats` repetitions are requested.
The computation comes down to calculating:
.. math::
\\binom{N+C-1}{C-1} \\times r
where `N` is `n_prevpoints-1`, i.e., the number of probability mass blocks to allocate, `C` is the number of
classes, and `r` is `n_repeats`. This solution comes from the
`Stars and Bars <https://brilliant.org/wiki/integer-equations-star-and-bars/>`_ problem.
:param n_classes: integer, number of classes
:param n_prevpoints: integer, number of prevalence points.
:param n_repeats: integer, number of repetitions for each prevalence combination
:return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the
number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]
"""
N = n_prevpoints-1
C = n_classes
r = n_repeats
return int(scipy.special.binom(N + C - 1, C - 1) * r)
def get_nprevpoints_approximation(combinations_budget:int, n_classes:int, n_repeats:int=1):
"""
Searches for the largest number of (equidistant) prevalence points to define for each of the n_classes classes so that
the number of valid prevalences generated as combinations of prevalence points (points in a n_classes-dimensional
simplex) do not exceed combinations_budget.
:param n_classes: number of classes
:param n_repeats: number of repetitions for each prevalence combination
:param combinations_budget: maximum number of combinatios allowed
Searches for the largest number of (equidistant) prevalence points to define for each of the `n_classes` classes so
that the number of valid prevalence values generated as combinations of prevalence points (points in a
`n_classes`-dimensional simplex) do not exceed combinations_budget.
:param combinations_budget: integer, maximum number of combinatios allowed
:param n_classes: integer, number of classes
:param n_repeats: integer, number of repetitions for each prevalence combination
:return: the largest number of prevalence points that generate less than combinations_budget valid prevalences
"""
assert n_classes > 0 and n_repeats > 0 and combinations_budget > 0, 'parameters must be positive integers'

View File

@ -447,7 +447,7 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
Px_test, _ = np.histogram(Px, bins=bins, range=(0, 1), density=True)
prev_selected, min_dist = None, None
for prev in F.prevalence_linspace(n_prevalences=100, repeat=1, smooth_limits_epsilon=0.0):
for prev in F.prevalence_linspace(n_prevalences=100, repeats=1, smooth_limits_epsilon=0.0):
Px_train = prev * Pxy1_density + (1 - prev) * Pxy0_density
hdy = F.HellingerDistance(Px_train, Px_test)
if prev_selected is None or hdy < min_dist: