update qp.error documentation

This commit is contained in:
Alejandro Moreo Fernandez 2021-11-12 15:37:31 +01:00
parent 3eb760901f
commit b78c8268fd
4 changed files with 484 additions and 37 deletions

View File

@ -67,7 +67,7 @@ that also generates embedded inputs (i.e., that implements <cite>transform</cite
<code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.method.neural.QuaNet</span></code>. This is a mock method to allow for easily instantiating
<code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.method.neural.QuaNet</span></code> on array-like real-valued instances.
The transformation consists of applying <code class="xref py py-class docutils literal notranslate"><span class="pre">sklearn.decomposition.TruncatedSVD</span></code>
while classification is performed using <code class="xref py py-class docutils literal notranslate"><span class="pre">sklearn.linear_model.LogisticRegression</span></code> on the low-rank space</p>
while classification is performed using <code class="xref py py-class docutils literal notranslate"><span class="pre">sklearn.linear_model.LogisticRegression</span></code> on the low-rank space.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
@ -81,7 +81,7 @@ while classification is performed using <code class="xref py py-class docutils l
<dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.fit">
<span class="sig-name descname"><span class="pre">fit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.fit" title="Permalink to this definition"></a></dt>
<dd><p>Fit the model according to the given training data. The fit consists of
fitting TruncatedSVD and Logistic Regression.</p>
fitting <cite>TruncatedSVD</cite> and then <cite>LogisticRegression</cite> on the low-rank representation.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
@ -98,7 +98,7 @@ fitting TruncatedSVD and Logistic Regression.</p>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.get_params">
<span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.get_params" title="Permalink to this definition"></a></dt>
<dd><p>Get hyper-parameters for this estimator</p>
<dd><p>Get hyper-parameters for this estimator.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns</dt>
<dd class="field-odd"><p>a dictionary with parameter names mapped to their values</p>
@ -109,7 +109,7 @@ fitting TruncatedSVD and Logistic Regression.</p>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.predict">
<span class="sig-name descname"><span class="pre">predict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.predict" title="Permalink to this definition"></a></dt>
<dd><p>Predicts labels for the instances <cite>X</cite></p>
<dd><p>Predicts labels for the instances <cite>X</cite> embedded into the low-rank space.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>X</strong> array-like of shape <cite>(n_samples, n_features)</cite> instances to classify</p>
@ -124,7 +124,7 @@ instances in <cite>X</cite></p>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.predict_proba">
<span class="sig-name descname"><span class="pre">predict_proba</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.predict_proba" title="Permalink to this definition"></a></dt>
<dd><p>Predicts posterior probabilities for the instances <cite>X</cite></p>
<dd><p>Predicts posterior probabilities for the instances <cite>X</cite> embedded into the low-rank space.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>X</strong> array-like of shape <cite>(n_samples, n_features)</cite> instances to classify</p>
@ -143,7 +143,7 @@ instances in <cite>X</cite></p>
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>parameters</strong> a <cite>**kwargs</cite> dictionary with the estimator parameters for
<a class="reference external" href="https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html">Logistic Regression</a>
and eventually also <cite>n_components</cite> for PCA</p>
and eventually also <cite>n_components</cite> for <cite>TruncatedSVD</cite></p>
</dd>
</dl>
</dd></dl>
@ -151,7 +151,8 @@ and eventually also <cite>n_components</cite> for PCA</p>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.classification.methods.LowRankLogisticRegression.transform">
<span class="sig-name descname"><span class="pre">transform</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.methods.LowRankLogisticRegression.transform" title="Permalink to this definition"></a></dt>
<dd><p>Returns the low-rank approximation of X with <cite>n_components</cite> dimensions</p>
<dd><p>Returns the low-rank approximation of <cite>X</cite> with <cite>n_components</cite> dimensions, or <cite>X</cite> unaltered if
<cite>n_components</cite> &gt;= <cite>X.shape[1]</cite>.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>X</strong> array-like of shape <cite>(n_samples, n_features)</cite> instances to embed</p>

View File

@ -14,6 +14,8 @@
<script src="_static/jquery.js"></script>
<script src="_static/underscore.js"></script>
<script src="_static/doctools.js"></script>
<script async="async" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
<script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
<script src="_static/bizstyle.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
@ -93,103 +95,418 @@
<span id="quapy-error-module"></span><h2>quapy.error module<a class="headerlink" href="#module-quapy.error" title="Permalink to this headline"></a></h2>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.absolute_error">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.absolute_error" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.absolute_error" title="Permalink to this definition"></a></dt>
<dd><dl class="simple">
<dt>Computes the absolute error between the two prevalence vectors.</dt><dd><p>Absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
<span class="math notranslate nohighlight">\(AE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}|\hat{p}(y)-p(y)|\)</span>,
where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.</p>
</dd>
</dl>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>absolute error</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.acc_error">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">acc_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.acc_error" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as <span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with
<cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing for true positives, false positives, false negatives, and true negatives,
respectively</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>y_true</strong> array-like of true labels</p></li>
<li><p><strong>y_pred</strong> array-like of predicted labels</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>1-accuracy</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.acce">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">acce</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.acce" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Computes the error in terms of 1-accuracy. The accuracy is computed as <span class="math notranslate nohighlight">\(\frac{tp+tn}{tp+fp+fn+tn}\)</span>, with
<cite>tp</cite>, <cite>fp</cite>, <cite>fn</cite>, and <cite>tn</cite> standing for true positives, false positives, false negatives, and true negatives,
respectively</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>y_true</strong> array-like of true labels</p></li>
<li><p><strong>y_pred</strong> array-like of predicted labels</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>1-accuracy</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.ae">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">ae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.ae" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">ae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.ae" title="Permalink to this definition"></a></dt>
<dd><dl class="simple">
<dt>Computes the absolute error between the two prevalence vectors.</dt><dd><p>Absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
<span class="math notranslate nohighlight">\(AE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}|\hat{p}(y)-p(y)|\)</span>,
where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.</p>
</dd>
</dl>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>absolute error</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.f1_error">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">f1_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.f1_error" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e., <span class="math notranslate nohighlight">\(1-F_1^M\)</span>,
where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall, defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>,
with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing for true positives, false positives, and false negatives, respectively.
<cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently, and then averaged.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>y_true</strong> array-like of true labels</p></li>
<li><p><strong>y_pred</strong> array-like of predicted labels</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><span class="math notranslate nohighlight">\(1-F_1^M\)</span></p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.f1e">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">f1e</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">y_true</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_pred</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.f1e" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>F1 error: simply computes the error in terms of macro <span class="math notranslate nohighlight">\(F_1\)</span>, i.e., <span class="math notranslate nohighlight">\(1-F_1^M\)</span>,
where <span class="math notranslate nohighlight">\(F_1\)</span> is the harmonic mean of precision and recall, defined as <span class="math notranslate nohighlight">\(\frac{2tp}{2tp+fp+fn}\)</span>,
with <cite>tp</cite>, <cite>fp</cite>, and <cite>fn</cite> standing for true positives, false positives, and false negatives, respectively.
<cite>Macro</cite> averaging means the <span class="math notranslate nohighlight">\(F_1\)</span> is computed for each category independently, and then averaged.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>y_true</strong> array-like of true labels</p></li>
<li><p><strong>y_pred</strong> array-like of predicted labels</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p><span class="math notranslate nohighlight">\(1-F_1^M\)</span></p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.from_name">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">from_name</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">err_name</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.from_name" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Gets an error function from its name. E.g., <cite>from_name(“mae”)</cite> will return function <a class="reference internal" href="#quapy.error.mae" title="quapy.error.mae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.mae()</span></code></a></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>err_name</strong> string, the error name</p>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>a callable implementing the requested error</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.kld">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">kld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.kld" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><dl class="simple">
<dt>Computes the Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
<span class="math notranslate nohighlight">\(KLD(p,\hat{p})=D_{KL}(p||\hat{p})=\sum_{y\in \mathcal{Y}} p(y)\log\frac{p(y)}{\hat{p}(y)}\)</span>, where
<span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
</dd>
</dl>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>eps</strong> smoothing factor. KLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>Kullback-Leibler divergence between the two distributions</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.mae">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mae" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Computes the mean absolute error (see <a class="reference internal" href="#quapy.error.ae" title="quapy.error.ae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.ae()</span></code></a>) across the sample pairs.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>mean absolute error</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.mean_absolute_error">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mean_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mean_absolute_error" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Computes the mean absolute error (see <a class="reference internal" href="#quapy.error.ae" title="quapy.error.ae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.ae()</span></code></a>) across the sample pairs.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>mean absolute error</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.mean_relative_absolute_error">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mean_relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mean_relative_absolute_error" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across the sample pairs.
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>eps</strong> smoothing factor. <cite>mrae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>mean relative absolute error</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.mkld">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mkld" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Computes the mean Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.kld" title="quapy.error.kld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.kld()</span></code></a>) across the sample pairs.
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>eps</strong> smoothing factor. KLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>mean Kullback-Leibler distribution</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.mnkld">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mnkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mnkld" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Computes the mean Normalized Kullback-Leibler divergence (see <a class="reference internal" href="#quapy.error.nkld" title="quapy.error.nkld"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.nkld()</span></code></a>) across the sample pairs.
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>eps</strong> smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>mean Normalized Kullback-Leibler distribution</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.mrae">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mrae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mrae" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Computes the mean relative absolute error (see <a class="reference internal" href="#quapy.error.rae" title="quapy.error.rae"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.rae()</span></code></a>) across the sample pairs.
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>eps</strong> smoothing factor. <cite>mrae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>mean relative absolute error</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.mse">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">mse</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevs_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.mse" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><p>Computes the mean squared error (see <a class="reference internal" href="#quapy.error.se" title="quapy.error.se"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.se()</span></code></a>) across the sample pairs.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_samples, n_classes,)</cite> with the predicted prevalence values</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>mean squared error</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.nkld">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">nkld</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.nkld" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><dl class="simple">
<dt>Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.</dt><dd><p>Normalized Kullback-Leibler divergence between two prevalence distributions <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span>
is computed as <span class="math notranslate nohighlight">\(NKLD(p,\hat{p}) = 2\frac{e^{KLD(p,\hat{p})}}{e^{KLD(p,\hat{p})}+1}-1\)</span>, where
<span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
</dd>
</dl>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>eps</strong> smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; <cite>eps</cite>
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>Normalized Kullback-Leibler divergence between the two distributions</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.rae">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">rae</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.rae" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><dl class="simple">
<dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
<span class="math notranslate nohighlight">\(RAE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
</dd>
</dl>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>eps</strong> smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>relative absolute error</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.relative_absolute_error">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">relative_absolute_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.relative_absolute_error" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><dl class="simple">
<dt>Computes the absolute relative error between the two prevalence vectors.</dt><dd><p>Relative absolute error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
<span class="math notranslate nohighlight">\(RAE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\)</span>,
where <span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.
The distributions are smoothed using the <cite>eps</cite> factor (see <a class="reference internal" href="#quapy.error.smooth" title="quapy.error.smooth"><code class="xref py py-meth docutils literal notranslate"><span class="pre">quapy.error.smooth()</span></code></a>).</p>
</dd>
</dl>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
<li><p><strong>eps</strong> smoothing factor. <cite>rae</cite> is not defined in cases in which the true distribution contains zeros; <cite>eps</cite>
is typically set to be <span class="math notranslate nohighlight">\(\frac{1}{2T}\)</span>, with <span class="math notranslate nohighlight">\(T\)</span> the sample size. If <cite>eps=None</cite>, the sample size
will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has thus to be set beforehand).</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>relative absolute error</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.se">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">se</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">p_hat</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.se" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dd><dl class="simple">
<dt>Computes the squared error between the two prevalence vectors.</dt><dd><p>Squared error between two prevalence vectors <span class="math notranslate nohighlight">\(p\)</span> and <span class="math notranslate nohighlight">\(\hat{p}\)</span> is computed as
<span class="math notranslate nohighlight">\(SE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}(\hat{p}(y)-p(y))^2\)</span>, where
<span class="math notranslate nohighlight">\(\mathcal{Y}\)</span> are the classes of interest.</p>
</dd>
</dl>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>prevs_hat</strong> array-like of shape <cite>(n_classes,)</cite> with the predicted prevalence values</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>absolute error</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.error.smooth">
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">smooth</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">p</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.smooth" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<span class="sig-prename descclassname"><span class="pre">quapy.error.</span></span><span class="sig-name descname"><span class="pre">smooth</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eps</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.error.smooth" title="Permalink to this definition"></a></dt>
<dd><p>Smooths a prevalence distribution with <span class="math notranslate nohighlight">\(\epsilon\)</span> (<cite>eps</cite>) as:
<span class="math notranslate nohighlight">\(\underline{p}(y)=\frac{\epsilon+p(y)}{\epsilon|\mathcal{Y}|+\displaystyle\sum_{y\in \mathcal{Y}}p(y)}\)</span></p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>prevs</strong> array-like of shape <cite>(n_classes,)</cite> with the true prevalence values</p></li>
<li><p><strong>eps</strong> smoothing factor</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>array-like of shape <cite>(n_classes,)</cite> with the smoothed distribution</p>
</dd>
</dl>
</dd></dl>
</div>
<div class="section" id="module-quapy.evaluation">

File diff suppressed because one or more lines are too long

View File

@ -1,8 +1,14 @@
import quapy as qp
import numpy as np
from sklearn.metrics import f1_score
def from_name(err_name):
"""Gets an error function from its name. E.g., `from_name("mae")` will return function :meth:`quapy.error.mae`
:param err_name: string, the error name
:return: a callable implementing the requested error
"""
assert err_name in ERROR_NAMES, f'unknown error {err_name}'
callable_error = globals()[err_name]
if err_name in QUANTIFICATION_ERROR_SMOOTH_NAMES:
@ -14,35 +20,105 @@ def from_name(err_name):
def f1e(y_true, y_pred):
"""F1 error: simply computes the error in terms of macro :math:`F_1`, i.e., :math:`1-F_1^M`,
where :math:`F_1` is the harmonic mean of precision and recall, defined as :math:`\\frac{2tp}{2tp+fp+fn}`,
with `tp`, `fp`, and `fn` standing for true positives, false positives, and false negatives, respectively.
`Macro` averaging means the :math:`F_1` is computed for each category independently, and then averaged.
:param y_true: array-like of true labels
:param y_pred: array-like of predicted labels
:return: :math:`1-F_1^M`
"""
return 1. - f1_score(y_true, y_pred, average='macro')
def acce(y_true, y_pred):
"""Computes the error in terms of 1-accuracy. The accuracy is computed as :math:`\\frac{tp+tn}{tp+fp+fn+tn}`, with
`tp`, `fp`, `fn`, and `tn` standing for true positives, false positives, false negatives, and true negatives,
respectively
:param y_true: array-like of true labels
:param y_pred: array-like of predicted labels
:return: 1-accuracy
"""
return 1. - (y_true == y_pred).mean()
def mae(prevs, prevs_hat):
"""Computes the mean absolute error (see :meth:`quapy.error.ae`) across the sample pairs.
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
:return: mean absolute error
"""
return ae(prevs, prevs_hat).mean()
def ae(p, p_hat):
assert p.shape == p_hat.shape, f'wrong shape {p.shape} vs. {p_hat.shape}'
return abs(p_hat-p).mean(axis=-1)
def ae(prevs, prevs_hat):
"""Computes the absolute error between the two prevalence vectors.
Absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
:math:`AE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\in \mathcal{Y}}|\\hat{p}(y)-p(y)|`,
where :math:`\\mathcal{Y}` are the classes of interest.
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:return: absolute error
"""
assert prevs.shape == prevs_hat.shape, f'wrong shape {prevs.shape} vs. {prevs_hat.shape}'
return abs(prevs_hat - prevs).mean(axis=-1)
def mse(prevs, prevs_hat):
"""Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs.
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
:return: mean squared error
"""
return se(prevs, prevs_hat).mean()
def se(p, p_hat):
"""Computes the squared error between the two prevalence vectors.
Squared error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
:math:`SE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\in \mathcal{Y}}(\\hat{p}(y)-p(y))^2`, where
:math:`\\mathcal{Y}` are the classes of interest.
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:return: absolute error
"""
return ((p_hat-p)**2).mean(axis=-1)
def mkld(prevs, prevs_hat, eps=None):
"""Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the sample pairs.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. KLD is not defined in cases in which the distributions contain zeros; `eps`
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
:return: mean Kullback-Leibler distribution
"""
return kld(prevs, prevs_hat, eps).mean()
def kld(p, p_hat, eps=None):
"""Computes the Kullback-Leibler divergence between the two prevalence distributions.
Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}` is computed as
:math:`KLD(p,\\hat{p})=D_{KL}(p||\\hat{p})=\\sum_{y\\in \\mathcal{Y}} p(y)\\log\\frac{p(y)}{\\hat{p}(y)}`, where
:math:`\\mathcal{Y}` are the classes of interest.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. KLD is not defined in cases in which the distributions contain zeros; `eps`
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
:return: Kullback-Leibler divergence between the two distributions
"""
eps = __check_eps(eps)
sp = p+eps
sp_hat = p_hat + eps
@ -50,28 +126,81 @@ def kld(p, p_hat, eps=None):
def mnkld(prevs, prevs_hat, eps=None):
"""Computes the mean Normalized Kullback-Leibler divergence (see :meth:`quapy.error.nkld`) across the sample pairs.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; `eps`
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
:return: mean Normalized Kullback-Leibler distribution
"""
return nkld(prevs, prevs_hat, eps).mean()
def nkld(p, p_hat, eps=None):
"""Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.
Normalized Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}`
is computed as :math:`NKLD(p,\\hat{p}) = 2\\frac{e^{KLD(p,\\hat{p})}}{e^{KLD(p,\\hat{p})}+1}-1`, where
:math:`\\mathcal{Y}` are the classes of interest.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain zeros; `eps`
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
:return: Normalized Kullback-Leibler divergence between the two distributions
"""
ekld = np.exp(kld(p, p_hat, eps))
return 2. * ekld / (1 + ekld) - 1.
def mrae(p, p_hat, eps=None):
"""Computes the mean relative absolute error (see :meth:`quapy.error.rae`) across the sample pairs.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. `mrae` is not defined in cases in which the true distribution contains zeros; `eps`
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
:return: mean relative absolute error
"""
return rae(p, p_hat, eps).mean()
def rae(p, p_hat, eps=None):
"""Computes the absolute relative error between the two prevalence vectors.
Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
:math:`RAE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\in \mathcal{Y}}\\frac{|\\hat{p}(y)-p(y)|}{p(y)}`,
where :math:`\\mathcal{Y}` are the classes of interest.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. `rae` is not defined in cases in which the true distribution contains zeros; `eps`
is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size. If `eps=None`, the sample size
will be taken from the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
:return: relative absolute error
"""
eps = __check_eps(eps)
p = smooth(p, eps)
p_hat = smooth(p_hat, eps)
return (abs(p-p_hat)/p).mean(axis=-1)
def smooth(p, eps):
n_classes = p.shape[-1]
return (p+eps)/(eps*n_classes + 1)
def smooth(prevs, eps):
""" Smooths a prevalence distribution with :math:`\epsilon` (`eps`) as:
:math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+\\displaystyle\\sum_{y\\in \\mathcal{Y}}p(y)}`
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param eps: smoothing factor
:return: array-like of shape `(n_classes,)` with the smoothed distribution
"""
n_classes = prevs.shape[-1]
return (prevs + eps) / (eps * n_classes + 1)
def __check_eps(eps=None):