QuaPy/docs/build/html/_modules/quapy/method/aggregative.html

1613 lines
195 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>quapy.method.aggregative &mdash; QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../../_static/documentation_options.js?v=22607128"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../../index.html" class="icon icon-home">
QuaPy: A Python-based open-source framework for quantification
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
<li class="breadcrumb-item active">quapy.method.aggregative</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for quapy.method.aggregative</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABC</span><span class="p">,</span> <span class="n">abstractmethod</span>
<span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">Union</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">abstention.calibration</span> <span class="kn">import</span> <span class="n">NoBiasVectorScaling</span><span class="p">,</span> <span class="n">TempScaling</span><span class="p">,</span> <span class="n">VectorScaling</span>
<span class="kn">from</span> <span class="nn">scipy</span> <span class="kn">import</span> <span class="n">optimize</span>
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
<span class="kn">from</span> <span class="nn">sklearn.calibration</span> <span class="kn">import</span> <span class="n">CalibratedClassifierCV</span>
<span class="kn">from</span> <span class="nn">sklearn.metrics</span> <span class="kn">import</span> <span class="n">confusion_matrix</span>
<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">cross_val_predict</span>
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
<span class="kn">from</span> <span class="nn">quapy.functional</span> <span class="kn">import</span> <span class="n">get_divergence</span>
<span class="kn">from</span> <span class="nn">quapy.classification.calibration</span> <span class="kn">import</span> <span class="n">NBVSCalibration</span><span class="p">,</span> <span class="n">BCTSCalibration</span><span class="p">,</span> <span class="n">TSCalibration</span><span class="p">,</span> <span class="n">VSCalibration</span>
<span class="kn">from</span> <span class="nn">quapy.classification.svmperf</span> <span class="kn">import</span> <span class="n">SVMperf</span>
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">,</span> <span class="n">OneVsAllGeneric</span>
<span class="c1"># Abstract classes</span>
<span class="c1"># ------------------------------------</span>
<div class="viewcode-block" id="AggregativeQuantifier">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier">[docs]</a>
<span class="k">class</span> <span class="nc">AggregativeQuantifier</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">ABC</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstract class for quantification methods that base their estimations on the aggregation of classification</span>
<span class="sd"> results. Aggregative quantifiers implement a pipeline that consists of generating classification predictions</span>
<span class="sd"> and aggregating them. For this reason, the training phase is implemented by :meth:`classification_fit` followed</span>
<span class="sd"> by :meth:`aggregation_fit`, while the testing phase is implemented by :meth:`classify` followed by</span>
<span class="sd"> :meth:`aggregate`. Subclasses of this abstract class must provide implementations for these methods.</span>
<span class="sd"> Aggregative quantifiers also maintain a :attr:`classifier` attribute.</span>
<span class="sd"> The method :meth:`fit` comes with a default implementation based on :meth:`classification_fit`</span>
<span class="sd"> and :meth:`aggregation_fit`.</span>
<span class="sd"> The method :meth:`quantify` comes with a default implementation based on :meth:`classify`</span>
<span class="sd"> and :meth:`aggregate`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">val_split_</span> <span class="o">=</span> <span class="kc">None</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">val_split</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">val_split_</span>
<span class="nd">@val_split</span><span class="o">.</span><span class="n">setter</span>
<span class="k">def</span> <span class="nf">val_split</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">val_split</span><span class="p">):</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">val_split</span><span class="p">,</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;warning: setting val_split with a LabelledCollection will be inefficient in&#39;</span>
<span class="s1">&#39;model selection. Rather pass the LabelledCollection at fit time&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split_</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="k">def</span> <span class="nf">_check_init_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implements any check to be performed in the parameters of the init method before undertaking</span>
<span class="sd"> the training of the quantifier. This is made as to allow for a quick execution stop when the</span>
<span class="sd"> parameters are not valid.</span>
<span class="sd"> :return: Nothing. May raise an exception.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">pass</span>
<span class="k">def</span> <span class="nf">_check_non_empty_classes</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Asserts all classes have positive instances.</span>
<span class="sd"> :param data: LabelledCollection</span>
<span class="sd"> :return: Nothing. May raise an exception.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">sample_prevs</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<span class="n">empty_classes</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argwhere</span><span class="p">(</span><span class="n">sample_prevs</span><span class="o">==</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">empty_classes</span><span class="p">)</span><span class="o">&gt;</span><span class="mi">0</span><span class="p">:</span>
<span class="n">empty_class_names</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">classes_</span><span class="p">[</span><span class="n">empty_classes</span><span class="p">]</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;classes </span><span class="si">{</span><span class="n">empty_class_names</span><span class="si">}</span><span class="s1"> have no training examples&#39;</span><span class="p">)</span>
<div class="viewcode-block" id="AggregativeQuantifier.fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.</span>
<span class="sd"> :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data</span>
<span class="sd"> :param fit_classifier: whether to train the learner (default is True). Set to False if the</span>
<span class="sd"> learner has been trained outside the quantifier.</span>
<span class="sd"> :return: self</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_check_init_parameters</span><span class="p">()</span>
<span class="n">classif_predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="p">,</span> <span class="n">predict_on</span><span class="o">=</span><span class="n">val_split</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">aggregation_fit</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="AggregativeQuantifier.classifier_fit_predict">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.classifier_fit_predict">[docs]</a>
<span class="k">def</span> <span class="nf">classifier_fit_predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">predict_on</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Trains the classifier if requested (`fit_classifier=True`) and generate the necessary predictions to</span>
<span class="sd"> train the aggregation function.</span>
<span class="sd"> :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data</span>
<span class="sd"> :param fit_classifier: whether to train the learner (default is True). Set to False if the</span>
<span class="sd"> learner has been trained outside the quantifier.</span>
<span class="sd"> :param predict_on: specifies the set on which predictions need to be issued. This parameter can</span>
<span class="sd"> be specified as None (default) to indicate no prediction is needed; a float in (0, 1) to</span>
<span class="sd"> indicate the proportion of instances to be used for predictions (the remainder is used for</span>
<span class="sd"> training); an integer &gt;1 to indicate that the predictions must be generated via k-fold</span>
<span class="sd"> cross-validation, using this integer as k; or the data sample itself on which to generate</span>
<span class="sd"> the predictions.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">fit_classifier</span><span class="p">,</span> <span class="nb">bool</span><span class="p">),</span> <span class="s1">&#39;unexpected type for &quot;fit_classifier&quot;, must be boolean&#39;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_check_classifier</span><span class="p">(</span><span class="n">adapt_if_necessary</span><span class="o">=</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_classifier_method</span><span class="p">()</span> <span class="o">==</span> <span class="s1">&#39;predict_proba&#39;</span><span class="p">))</span>
<span class="k">if</span> <span class="n">fit_classifier</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_check_non_empty_classes</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="k">if</span> <span class="n">predict_on</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">predict_on</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">val_split</span>
<span class="k">if</span> <span class="n">predict_on</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="n">fit_classifier</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">predict_on</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
<span class="k">if</span> <span class="n">fit_classifier</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="mf">0.</span> <span class="o">&lt;</span> <span class="n">predict_on</span> <span class="o">&lt;</span> <span class="mf">1.</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;proportion </span><span class="si">{</span><span class="n">predict_on</span><span class="si">=}</span><span class="s1"> out of range, must be in (0,1)&#39;</span><span class="p">)</span>
<span class="n">train</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">predict_on</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">train</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">val</span><span class="o">.</span><span class="n">X</span><span class="p">),</span> <span class="n">val</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;wrong type for predict_on: since fit_classifier=False, &#39;</span>
<span class="sa">f</span><span class="s1">&#39;the set on which predictions have to be issued must be &#39;</span>
<span class="sa">f</span><span class="s1">&#39;explicitly indicated&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">predict_on</span><span class="p">,</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="k">if</span> <span class="n">fit_classifier</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">predict_on</span><span class="o">.</span><span class="n">X</span><span class="p">),</span> <span class="n">predict_on</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">predict_on</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">predict_on</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">if</span> <span class="n">fit_classifier</span><span class="p">:</span>
<span class="k">if</span> <span class="n">predict_on</span> <span class="o">&lt;=</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;invalid value </span><span class="si">{</span><span class="n">predict_on</span><span class="si">}</span><span class="s1"> in fit. &#39;</span>
<span class="sa">f</span><span class="s1">&#39;Specify a integer &gt;1 for kFCV estimation.&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">n_jobs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">&#39;n_jobs&#39;</span><span class="p">)</span> <span class="k">else</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="n">cross_val_predict</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">,</span> <span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">Xy</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span><span class="n">predict_on</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_classifier_method</span><span class="p">())</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;wrong type for predict_on: since fit_classifier=False, &#39;</span>
<span class="sa">f</span><span class="s1">&#39;the set on which predictions have to be issued must be &#39;</span>
<span class="sa">f</span><span class="s1">&#39;explicitly indicated&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s1">&#39;error: param &quot;predict_on&quot; (</span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">predict_on</span><span class="p">)</span><span class="si">}</span><span class="s1">) not understood; &#39;</span>
<span class="sa">f</span><span class="s1">&#39;use either a float indicating the split proportion, or a &#39;</span>
<span class="sa">f</span><span class="s1">&#39;tuple (X,y) indicating the validation partition&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">predictions</span></div>
<div class="viewcode-block" id="AggregativeQuantifier.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.aggregation_fit">[docs]</a>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Trains the aggregation function.</span>
<span class="sd"> :param classif_predictions: a LabelledCollection containing the label predictions issued</span>
<span class="sd"> by the classifier</span>
<span class="sd"> :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="o">...</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gives access to the classifier</span>
<span class="sd"> :return: the classifier (typically an sklearn&#39;s Estimator)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier_</span>
<span class="nd">@classifier</span><span class="o">.</span><span class="n">setter</span>
<span class="k">def</span> <span class="nf">classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Setter for the classifier</span>
<span class="sd"> :param classifier: the classifier</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier_</span> <span class="o">=</span> <span class="n">classifier</span>
<div class="viewcode-block" id="AggregativeQuantifier.classify">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.classify">[docs]</a>
<span class="k">def</span> <span class="nf">classify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Provides the label predictions for the given instances. The predictions should respect the format expected by</span>
<span class="sd"> :meth:`aggregate`, e.g., posterior probabilities for probabilistic quantifiers, or crisp predictions for</span>
<span class="sd"> non-probabilistic quantifiers. The default one is &quot;decision_function&quot;.</span>
<span class="sd"> :param instances: array-like of shape `(n_instances, n_features,)`</span>
<span class="sd"> :return: np.ndarray of shape `(n_instances,)` with label predictions</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classifier_method</span><span class="p">())(</span><span class="n">instances</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_classifier_method</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Name of the method that must be used for issuing label predictions. The default one is &quot;decision_function&quot;.</span>
<span class="sd"> :return: string</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="s1">&#39;decision_function&#39;</span>
<span class="k">def</span> <span class="nf">_check_classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">adapt_if_necessary</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Guarantees that the underlying classifier implements the method required for issuing predictions, i.e.,</span>
<span class="sd"> the method indicated by the :meth:`_classifier_method`</span>
<span class="sd"> :param adapt_if_necessary: if True, the method will try to comply with the required specifications</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classifier_method</span><span class="p">()),</span> \
<span class="sa">f</span><span class="s2">&quot;the method does not implement the required </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">_classifier_method</span><span class="p">()</span><span class="si">}</span><span class="s2"> method&quot;</span>
<div class="viewcode-block" id="AggregativeQuantifier.quantify">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.quantify">[docs]</a>
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generate class prevalence estimates for the sample&#39;s instances by aggregating the label predictions generated</span>
<span class="sd"> by the classifier.</span>
<span class="sd"> :param instances: array-like</span>
<span class="sd"> :return: `np.ndarray` of shape `(n_classes)` with class prevalence estimates.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">classif_predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">)</span></div>
<div class="viewcode-block" id="AggregativeQuantifier.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.aggregate">[docs]</a>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Implements the aggregation of label predictions.</span>
<span class="sd"> :param classif_predictions: `np.ndarray` of label predictions</span>
<span class="sd"> :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="o">...</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Class labels, in the same order in which class prevalence values are to be computed.</span>
<span class="sd"> This default implementation actually returns the class labels of the learner.</span>
<span class="sd"> :return: array-like</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span></div>
<div class="viewcode-block" id="AggregativeCrispQuantifier">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeCrispQuantifier">[docs]</a>
<span class="k">class</span> <span class="nc">AggregativeCrispQuantifier</span><span class="p">(</span><span class="n">AggregativeQuantifier</span><span class="p">,</span> <span class="n">ABC</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstract class for quantification methods that base their estimations on the aggregation of crips decisions</span>
<span class="sd"> as returned by a hard classifier. Aggregative crisp quantifiers thus extend Aggregative</span>
<span class="sd"> Quantifiers by implementing specifications about crisp predictions.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">_classifier_method</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Name of the method that must be used for issuing label predictions. For crisp quantifiers, the method</span>
<span class="sd"> is &#39;predict&#39;, that returns an array of shape `(n_instances,)` of label predictions.</span>
<span class="sd"> :return: the string &quot;predict&quot;, i.e., the standard method name for scikit-learn hard predictions</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="s1">&#39;predict&#39;</span></div>
<div class="viewcode-block" id="AggregativeSoftQuantifier">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeSoftQuantifier">[docs]</a>
<span class="k">class</span> <span class="nc">AggregativeSoftQuantifier</span><span class="p">(</span><span class="n">AggregativeQuantifier</span><span class="p">,</span> <span class="n">ABC</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstract class for quantification methods that base their estimations on the aggregation of posterior</span>
<span class="sd"> probabilities as returned by a probabilistic classifier.</span>
<span class="sd"> Aggregative soft quantifiers thus extend Aggregative Quantifiers by implementing specifications</span>
<span class="sd"> about soft predictions.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">_classifier_method</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Name of the method that must be used for issuing label predictions. For probabilistic quantifiers, the method</span>
<span class="sd"> is &#39;predict_proba&#39;, that returns an array of shape `(n_instances, n_dimensions,)` with posterior</span>
<span class="sd"> probabilities.</span>
<span class="sd"> :return: the string &quot;predict_proba&quot;, i.e., the standard method name for scikit-learn soft predictions</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="s1">&#39;predict_proba&#39;</span>
<span class="k">def</span> <span class="nf">_check_classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">adapt_if_necessary</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Guarantees that the underlying classifier implements the method indicated by the :meth:`_classifier_method`.</span>
<span class="sd"> In case it does not, the classifier is calibrated (by means of the Platt&#39;s calibration method implemented by</span>
<span class="sd"> scikit-learn in CalibratedClassifierCV, with cv=5). This calibration is only allowed if `adapt_if_necessary`</span>
<span class="sd"> is set to True. If otherwise (i.e., the classifier is not probabilistic, and `adapt_if_necessary` is set</span>
<span class="sd"> to False), an exception will be raised.</span>
<span class="sd"> :param adapt_if_necessary: a hard classifier is turned into a soft classifier if `adapt_if_necessary==True`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classifier_method</span><span class="p">()):</span>
<span class="k">if</span> <span class="n">adapt_if_necessary</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;warning: The learner </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> does not seem to be &#39;</span>
<span class="sa">f</span><span class="s1">&#39;probabilistic. The learner will be calibrated (using CalibratedClassifierCV).&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">CalibratedClassifierCV</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">AssertionError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;error: The learner </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> does not &#39;</span>
<span class="sa">f</span><span class="s1">&#39;seem to be probabilistic. The learner cannot be calibrated since &#39;</span>
<span class="sa">f</span><span class="s1">&#39;fit_classifier is set to False&#39;</span><span class="p">)</span></div>
<div class="viewcode-block" id="BinaryAggregativeQuantifier">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.BinaryAggregativeQuantifier">[docs]</a>
<span class="k">class</span> <span class="nc">BinaryAggregativeQuantifier</span><span class="p">(</span><span class="n">AggregativeQuantifier</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">):</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">pos_label</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">neg_label</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<div class="viewcode-block" id="BinaryAggregativeQuantifier.fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.BinaryAggregativeQuantifier.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_check_binary</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span></div>
</div>
<span class="c1"># Methods</span>
<span class="c1"># ------------------------------------</span>
<div class="viewcode-block" id="CC">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.CC">[docs]</a>
<span class="k">class</span> <span class="nc">CC</span><span class="p">(</span><span class="n">AggregativeCrispQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The most basic Quantification method. One that simply classifies all instances and counts how many have been</span>
<span class="sd"> attributed to each of the classes in order to compute class prevalence estimates.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<div class="viewcode-block" id="CC.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.CC.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Nothing to do here!</span>
<span class="sd"> :param classif_predictions: this is actually None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">pass</span></div>
<div class="viewcode-block" id="CC.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.CC.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Computes class prevalence estimates by counting the prevalence of each of the predicted labels.</span>
<span class="sd"> :param classif_predictions: array-like with label predictions</span>
<span class="sd"> :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">prevalence_from_labels</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="ACC">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.ACC">[docs]</a>
<span class="k">class</span> <span class="nc">ACC</span><span class="p">(</span><span class="n">AggregativeCrispQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> `Adjusted Classify &amp; Count &lt;https://link.springer.com/article/10.1007/s10618-008-0097-y&gt;`_,</span>
<span class="sd"> the &quot;adjusted&quot; variant of :class:`CC`, that corrects the predictions of CC</span>
<span class="sd"> according to the `misclassification rates`.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
<span class="sd"> be extracted from the training set; or as an integer (default 5), indicating that the predictions</span>
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
<span class="sd"> for `k`); or as a collection defining the specific set of data to use for validation.</span>
<span class="sd"> Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
<span class="sd"> on which the predictions are to be generated.</span>
<span class="sd"> :param n_jobs: number of parallel workers</span>
<span class="sd"> :param solver: indicates the method to be used for obtaining the final estimates. The choice</span>
<span class="sd"> &#39;exact&#39; comes down to solving the system of linear equations :math:`Ax=B` where `A` is a</span>
<span class="sd"> matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in </span>
<span class="sd"> binary) and `B` is the vector of prevalence values estimated via CC, as :math:`x=A^{-1}B`. This solution</span>
<span class="sd"> might not exist for degenerated classifiers, in which case the method defaults to classify and count </span>
<span class="sd"> (i.e., does not attempt any adjustment).</span>
<span class="sd"> Another option is to search for the prevalence vector that minimizes the L2 norm of :math:`|Ax-B|`. The latter</span>
<span class="sd"> is achieved by indicating solver=&#39;minimize&#39;. This one generally works better, and is the default parameter.</span>
<span class="sd"> More details about this can be consulted in `Bunse, M. &quot;On Multi-Class Extensions of Adjusted Classify and</span>
<span class="sd"> Count&quot;, on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications</span>
<span class="sd"> (LQ 2022), ECML/PKDD 2022, Grenoble (France) &lt;https://lq-2022.github.io/proceedings/CompleteVolume.pdf&gt;`_.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">solver</span><span class="o">=</span><span class="s1">&#39;minimize&#39;</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">solver</span> <span class="o">=</span> <span class="n">solver</span>
<span class="k">def</span> <span class="nf">_check_init_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">solver</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;exact&#39;</span><span class="p">,</span> <span class="s1">&#39;minimize&#39;</span><span class="p">],</span> <span class="s2">&quot;unknown solver; valid ones are &#39;exact&#39;, &#39;minimize&#39;&quot;</span>
<div class="viewcode-block" id="ACC.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.ACC.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Estimates the misclassification rates.</span>
<span class="sd"> :param classif_predictions: classifier predictions with true labels</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">pred_labels</span><span class="p">,</span> <span class="n">true_labels</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
<span class="bp">self</span><span class="o">.</span><span class="n">cc</span> <span class="o">=</span> <span class="n">CC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pte_cond_estim_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">getPteCondEstim</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span><span class="p">,</span> <span class="n">true_labels</span><span class="p">,</span> <span class="n">pred_labels</span><span class="p">)</span></div>
<div class="viewcode-block" id="ACC.getPteCondEstim">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.ACC.getPteCondEstim">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">getPteCondEstim</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">classes</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">y_</span><span class="p">):</span>
<span class="c1"># estimate the matrix with entry (i,j) being the estimate of P(hat_yi|yj), that is, the probability that a</span>
<span class="c1"># document that belongs to yj ends up being classified as belonging to yi</span>
<span class="n">conf</span> <span class="o">=</span> <span class="n">confusion_matrix</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">y_</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="n">classes</span><span class="p">)</span><span class="o">.</span><span class="n">T</span>
<span class="n">conf</span> <span class="o">=</span> <span class="n">conf</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span>
<span class="n">class_counts</span> <span class="o">=</span> <span class="n">conf</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">classes</span><span class="p">):</span>
<span class="k">if</span> <span class="n">class_counts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">conf</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">conf</span><span class="p">[:,</span> <span class="n">i</span><span class="p">]</span> <span class="o">/=</span> <span class="n">class_counts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
<span class="k">return</span> <span class="n">conf</span></div>
<div class="viewcode-block" id="ACC.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.ACC.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">):</span>
<span class="n">prevs_estim</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cc</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">)</span>
<span class="k">return</span> <span class="n">ACC</span><span class="o">.</span><span class="n">solve_adjustment</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pte_cond_estim_</span><span class="p">,</span> <span class="n">prevs_estim</span><span class="p">,</span> <span class="n">solver</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">solver</span><span class="p">)</span></div>
<div class="viewcode-block" id="ACC.solve_adjustment">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.ACC.solve_adjustment">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">solve_adjustment</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">PteCondEstim</span><span class="p">,</span> <span class="n">prevs_estim</span><span class="p">,</span> <span class="n">solver</span><span class="o">=</span><span class="s1">&#39;exact&#39;</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Solves the system linear system :math:`Ax = B` with :math:`A` = `PteCondEstim` and :math:`B` = `prevs_estim`</span>
<span class="sd"> :param PteCondEstim: a `np.ndarray` of shape `(n_classes,n_classes,)` with entry `(i,j)` being the estimate</span>
<span class="sd"> of :math:`P(y_i|y_j)`, that is, the probability that an instance that belongs to :math:`y_j` ends up being</span>
<span class="sd"> classified as belonging to :math:`y_i`</span>
<span class="sd"> :param prevs_estim: a `np.ndarray` of shape `(n_classes,)` with the class prevalence estimates</span>
<span class="sd"> :param solver: indicates the method to use for solving the system of linear equations. Valid options are</span>
<span class="sd"> &#39;exact&#39; (tries to solve the system --may fail if the misclassificatin matrix has rank &lt; n_classes) or</span>
<span class="sd"> &#39;optim_minimize&#39; (minimizes a norm --always exists). </span>
<span class="sd"> :return: an adjusted `np.ndarray` of shape `(n_classes,)` with the corrected class prevalence estimates</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">A</span> <span class="o">=</span> <span class="n">PteCondEstim</span>
<span class="n">B</span> <span class="o">=</span> <span class="n">prevs_estim</span>
<span class="k">if</span> <span class="n">solver</span> <span class="o">==</span> <span class="s1">&#39;exact&#39;</span><span class="p">:</span>
<span class="c1"># attempts an exact solution of the linear system (may fail)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">adjusted_prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linalg</span><span class="o">.</span><span class="n">solve</span><span class="p">(</span><span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="p">)</span>
<span class="n">adjusted_prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">adjusted_prevs</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">adjusted_prevs</span> <span class="o">/=</span> <span class="n">adjusted_prevs</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="k">except</span> <span class="n">np</span><span class="o">.</span><span class="n">linalg</span><span class="o">.</span><span class="n">LinAlgError</span><span class="p">:</span>
<span class="n">adjusted_prevs</span> <span class="o">=</span> <span class="n">prevs_estim</span> <span class="c1"># no way to adjust them!</span>
<span class="k">return</span> <span class="n">adjusted_prevs</span>
<span class="k">elif</span> <span class="n">solver</span> <span class="o">==</span> <span class="s1">&#39;minimize&#39;</span><span class="p">:</span>
<span class="c1"># poses the problem as an optimization one, and tries to minimize the norm of the differences</span>
<span class="k">def</span> <span class="nf">loss</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">linalg</span><span class="o">.</span><span class="n">norm</span><span class="p">(</span><span class="n">A</span> <span class="o">@</span> <span class="n">prev</span> <span class="o">-</span> <span class="n">B</span><span class="p">)</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">optim_minimize</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="o">=</span><span class="n">A</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span></div>
</div>
<div class="viewcode-block" id="PCC">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PCC">[docs]</a>
<span class="k">class</span> <span class="nc">PCC</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> `Probabilistic Classify &amp; Count &lt;https://ieeexplore.ieee.org/abstract/document/5694031&gt;`_,</span>
<span class="sd"> the probabilistic variant of CC that relies on the posterior probabilities returned by a probabilistic classifier.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<div class="viewcode-block" id="PCC.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PCC.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Nothing to do here!</span>
<span class="sd"> :param classif_predictions: this is actually None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">pass</span></div>
<div class="viewcode-block" id="PCC.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PCC.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">):</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">prevalence_from_probabilities</span><span class="p">(</span><span class="n">classif_posteriors</span><span class="p">,</span> <span class="n">binarize</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="PACC">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PACC">[docs]</a>
<span class="k">class</span> <span class="nc">PACC</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> `Probabilistic Adjusted Classify &amp; Count &lt;https://ieeexplore.ieee.org/abstract/document/5694031&gt;`_,</span>
<span class="sd"> the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
<span class="sd"> be extracted from the training set; or as an integer (default 5), indicating that the predictions</span>
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
<span class="sd"> for `k`). Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
<span class="sd"> on which the predictions are to be generated.</span>
<span class="sd"> :param n_jobs: number of parallel workers</span>
<span class="sd"> :param solver: indicates the method to be used for obtaining the final estimates. The choice</span>
<span class="sd"> &#39;exact&#39; comes down to solving the system of linear equations :math:`Ax=B` where `A` is a</span>
<span class="sd"> matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in</span>
<span class="sd"> binary) and `B` is the vector of prevalence values estimated via CC, as :math:`x=A^{-1}B`. This solution</span>
<span class="sd"> might not exist for degenerated classifiers, in which case the method defaults to classify and count</span>
<span class="sd"> (i.e., does not attempt any adjustment).</span>
<span class="sd"> Another option is to search for the prevalence vector that minimizes the L2 norm of :math:`|Ax-B|`. The latter</span>
<span class="sd"> is achieved by indicating solver=&#39;minimize&#39;. This one generally works better, and is the default parameter.</span>
<span class="sd"> More details about this can be consulted in `Bunse, M. &quot;On Multi-Class Extensions of Adjusted Classify and</span>
<span class="sd"> Count&quot;, on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications</span>
<span class="sd"> (LQ 2022), ECML/PKDD 2022, Grenoble (France) &lt;https://lq-2022.github.io/proceedings/CompleteVolume.pdf&gt;`_.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">solver</span><span class="o">=</span><span class="s1">&#39;minimize&#39;</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">solver</span> <span class="o">=</span> <span class="n">solver</span>
<span class="k">def</span> <span class="nf">_check_init_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">solver</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;exact&#39;</span><span class="p">,</span> <span class="s1">&#39;minimize&#39;</span><span class="p">],</span> <span class="s2">&quot;unknown solver; valid ones are &#39;exact&#39;, &#39;minimize&#39;&quot;</span>
<div class="viewcode-block" id="PACC.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PACC.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Estimates the misclassification rates</span>
<span class="sd"> :param classif_predictions: classifier soft predictions with true labels</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">posteriors</span><span class="p">,</span> <span class="n">true_labels</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pcc</span> <span class="o">=</span> <span class="n">PCC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pte_cond_estim_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">getPteCondEstim</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span><span class="p">,</span> <span class="n">true_labels</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">)</span></div>
<div class="viewcode-block" id="PACC.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PACC.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">):</span>
<span class="n">prevs_estim</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pcc</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">classif_posteriors</span><span class="p">)</span>
<span class="k">return</span> <span class="n">ACC</span><span class="o">.</span><span class="n">solve_adjustment</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pte_cond_estim_</span><span class="p">,</span> <span class="n">prevs_estim</span><span class="p">,</span> <span class="n">solver</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">solver</span><span class="p">)</span></div>
<div class="viewcode-block" id="PACC.getPteCondEstim">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PACC.getPteCondEstim">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">getPteCondEstim</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">classes</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">y_</span><span class="p">):</span>
<span class="c1"># estimate the matrix with entry (i,j) being the estimate of P(hat_yi|yj), that is, the probability that a</span>
<span class="c1"># document that belongs to yj ends up being classified as belonging to yi</span>
<span class="n">n_classes</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">classes</span><span class="p">)</span>
<span class="n">confusion</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">eye</span><span class="p">(</span><span class="n">n_classes</span><span class="p">)</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">class_</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">classes</span><span class="p">):</span>
<span class="n">idx</span> <span class="o">=</span> <span class="n">y</span> <span class="o">==</span> <span class="n">class_</span>
<span class="k">if</span> <span class="n">idx</span><span class="o">.</span><span class="n">any</span><span class="p">():</span>
<span class="n">confusion</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">y_</span><span class="p">[</span><span class="n">idx</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="k">return</span> <span class="n">confusion</span><span class="o">.</span><span class="n">T</span></div>
</div>
<div class="viewcode-block" id="EMQ">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ">[docs]</a>
<span class="k">class</span> <span class="nc">EMQ</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> `Expectation Maximization for Quantification &lt;https://ieeexplore.ieee.org/abstract/document/6789744&gt;`_ (EMQ),</span>
<span class="sd"> aka `Saerens-Latinne-Decaestecker` (SLD) algorithm.</span>
<span class="sd"> EMQ consists of using the well-known `Expectation Maximization algorithm` to iteratively update the posterior</span>
<span class="sd"> probabilities generated by a probabilistic classifier and the class prevalence estimates obtained via</span>
<span class="sd"> maximum-likelihood estimation, in a mutually recursive way, until convergence.</span>
<span class="sd"> This implementation also gives access to the heuristics proposed by `Alexandari et al. paper</span>
<span class="sd"> &lt;http://proceedings.mlr.press/v119/alexandari20a.html&gt;`_. These heuristics consist of using, as the training</span>
<span class="sd"> prevalence, an estimate of it obtained via k-fold cross validation (instead of the true training prevalence),</span>
<span class="sd"> and to recalibrate the posterior probabilities of the classifier.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
<span class="sd"> be extracted from the training set; or as an integer, indicating that the predictions</span>
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
<span class="sd"> for `k`, default 5); or as a collection defining the specific set of data to use for validation.</span>
<span class="sd"> Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
<span class="sd"> on which the predictions are to be generated. This hyperparameter is only meant to be used when the</span>
<span class="sd"> heuristics are to be applied, i.e., if a recalibration is required. The default value is None (meaning</span>
<span class="sd"> the recalibration is not required). In case this hyperparameter is set to a value other than None, but</span>
<span class="sd"> the recalibration is not required (recalib=None), a warning message will be raised.</span>
<span class="sd"> :param exact_train_prev: set to True (default) for using the true training prevalence as the initial observation;</span>
<span class="sd"> set to False for computing the training prevalence as an estimate of it, i.e., as the expected</span>
<span class="sd"> value of the posterior probabilities of the training instances.</span>
<span class="sd"> :param recalib: a string indicating the method of recalibration.</span>
<span class="sd"> Available choices include &quot;nbvs&quot; (No-Bias Vector Scaling), &quot;bcts&quot; (Bias-Corrected Temperature Scaling,</span>
<span class="sd"> default), &quot;ts&quot; (Temperature Scaling), and &quot;vs&quot; (Vector Scaling). Default is None (no recalibration).</span>
<span class="sd"> :param n_jobs: number of parallel workers. Only used for recalibrating the classifier if `val_split` is set to</span>
<span class="sd"> an integer `k` --the number of folds.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">MAX_ITER</span> <span class="o">=</span> <span class="mi">1000</span>
<span class="n">EPSILON</span> <span class="o">=</span> <span class="mf">1e-4</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">exact_train_prev</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">recalib</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">exact_train_prev</span> <span class="o">=</span> <span class="n">exact_train_prev</span>
<span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="o">=</span> <span class="n">recalib</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
<div class="viewcode-block" id="EMQ.EMQ_BCTS">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ.EMQ_BCTS">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">EMQ_BCTS</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Constructs an instance of EMQ using the best configuration found in the `Alexandari et al. paper</span>
<span class="sd"> &lt;http://proceedings.mlr.press/v119/alexandari20a.html&gt;`_, i.e., one that relies on Bias-Corrected Temperature</span>
<span class="sd"> Scaling (BCTS) as a recalibration function, and that uses an estimate of the training prevalence instead of</span>
<span class="sd"> the true training prevalence.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a classifier</span>
<span class="sd"> :param n_jobs: number of parallel workers.</span>
<span class="sd"> :return: An instance of EMQ with BCTS</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">EMQ</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">exact_train_prev</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">recalib</span><span class="o">=</span><span class="s1">&#39;bcts&#39;</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_check_init_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">exact_train_prev</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeWarning</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;The parameter </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">val_split</span><span class="si">=}</span><span class="s1"> was specified for EMQ, while the parameters &#39;</span>
<span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">exact_train_prev</span><span class="si">=}</span><span class="s1"> and </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">recalib</span><span class="si">=}</span><span class="s1">. This has no effect and causes an unnecessary &#39;</span>
<span class="sa">f</span><span class="s1">&#39;overload.&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;[warning] The parameter </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">recalib</span><span class="si">=}</span><span class="s1"> requires the val_split be different from None. &#39;</span>
<span class="sa">f</span><span class="s1">&#39;This parameter will be set to 5. To avoid this warning, set this value to a float value &#39;</span>
<span class="sa">f</span><span class="s1">&#39;indicating the proportion of training data to be used as validation, or to an integer &#39;</span>
<span class="sa">f</span><span class="s1">&#39;indicating the number of folds for kFCV.&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span><span class="o">=</span><span class="mi">5</span>
<div class="viewcode-block" id="EMQ.classify">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ.classify">[docs]</a>
<span class="k">def</span> <span class="nf">classify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Provides the posterior probabilities for the given instances. If the classifier was required</span>
<span class="sd"> to be recalibrated, then these posteriors are recalibrated accordingly.</span>
<span class="sd"> :param instances: array-like of shape `(n_instances, n_dimensions,)`</span>
<span class="sd"> :return: np.ndarray of shape `(n_instances, n_classes,)` with posterior probabilities</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">&#39;calibration_function&#39;</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span><span class="p">(</span><span class="n">posteriors</span><span class="p">)</span>
<span class="k">return</span> <span class="n">posteriors</span></div>
<div class="viewcode-block" id="EMQ.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">P</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="o">==</span> <span class="s1">&#39;nbvs&#39;</span><span class="p">:</span>
<span class="n">calibrator</span> <span class="o">=</span> <span class="n">NoBiasVectorScaling</span><span class="p">()</span>
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="o">==</span> <span class="s1">&#39;bcts&#39;</span><span class="p">:</span>
<span class="n">calibrator</span> <span class="o">=</span> <span class="n">TempScaling</span><span class="p">(</span><span class="n">bias_positions</span><span class="o">=</span><span class="s1">&#39;all&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="o">==</span> <span class="s1">&#39;ts&#39;</span><span class="p">:</span>
<span class="n">calibrator</span> <span class="o">=</span> <span class="n">TempScaling</span><span class="p">()</span>
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="o">==</span> <span class="s1">&#39;vs&#39;</span><span class="p">:</span>
<span class="n">calibrator</span> <span class="o">=</span> <span class="n">VectorScaling</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;invalid param argument for recalibration method; available ones are &#39;</span>
<span class="s1">&#39;&quot;nbvs&quot;, &quot;bcts&quot;, &quot;ts&quot;, and &quot;vs&quot;.&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span> <span class="o">=</span> <span class="n">calibrator</span><span class="p">(</span><span class="n">P</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">eye</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)[</span><span class="n">y</span><span class="p">],</span> <span class="n">posterior_supplied</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">exact_train_prev</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">train_prevalence</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">train_posteriors</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">X</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">train_posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span><span class="p">(</span><span class="n">train_posteriors</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">train_prevalence</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">prevalence_from_probabilities</span><span class="p">(</span><span class="n">train_posteriors</span><span class="p">)</span></div>
<div class="viewcode-block" id="EMQ.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="n">EPSILON</span><span class="p">):</span>
<span class="n">priors</span><span class="p">,</span> <span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">EM</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">train_prevalence</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">,</span> <span class="n">epsilon</span><span class="p">)</span>
<span class="k">return</span> <span class="n">priors</span></div>
<div class="viewcode-block" id="EMQ.predict_proba">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ.predict_proba">[docs]</a>
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="n">EPSILON</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the posterior probabilities updated by the EM algorithm.</span>
<span class="sd"> :param instances: np.ndarray of shape `(n_instances, n_dimensions)`</span>
<span class="sd"> :param epsilon: error tolerance</span>
<span class="sd"> :return: np.ndarray of shape `(n_instances, n_classes)`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">classif_posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
<span class="n">priors</span><span class="p">,</span> <span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">EM</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">train_prevalence</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">,</span> <span class="n">epsilon</span><span class="p">)</span>
<span class="k">return</span> <span class="n">posteriors</span></div>
<div class="viewcode-block" id="EMQ.EM">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ.EM">[docs]</a>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">EM</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">tr_prev</span><span class="p">,</span> <span class="n">posterior_probabilities</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="n">EPSILON</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Computes the `Expectation Maximization` routine.</span>
<span class="sd"> :param tr_prev: array-like, the training prevalence</span>
<span class="sd"> :param posterior_probabilities: `np.ndarray` of shape `(n_instances, n_classes,)` with the</span>
<span class="sd"> posterior probabilities</span>
<span class="sd"> :param epsilon: float, the threshold different between two consecutive iterations</span>
<span class="sd"> to reach before stopping the loop</span>
<span class="sd"> :return: a tuple with the estimated prevalence values (shape `(n_classes,)`) and</span>
<span class="sd"> the corrected posterior probabilities (shape `(n_instances, n_classes,)`)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">Px</span> <span class="o">=</span> <span class="n">posterior_probabilities</span>
<span class="n">Ptr</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">tr_prev</span><span class="p">)</span>
<span class="n">qs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">Ptr</span><span class="p">)</span> <span class="c1"># qs (the running estimate) is initialized as the training prevalence</span>
<span class="n">s</span><span class="p">,</span> <span class="n">converged</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">False</span>
<span class="n">qs_prev_</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">while</span> <span class="ow">not</span> <span class="n">converged</span> <span class="ow">and</span> <span class="n">s</span> <span class="o">&lt;</span> <span class="n">EMQ</span><span class="o">.</span><span class="n">MAX_ITER</span><span class="p">:</span>
<span class="c1"># E-step: ps is Ps(y|xi)</span>
<span class="n">ps_unnormalized</span> <span class="o">=</span> <span class="p">(</span><span class="n">qs</span> <span class="o">/</span> <span class="n">Ptr</span><span class="p">)</span> <span class="o">*</span> <span class="n">Px</span>
<span class="n">ps</span> <span class="o">=</span> <span class="n">ps_unnormalized</span> <span class="o">/</span> <span class="n">ps_unnormalized</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">keepdims</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># M-step:</span>
<span class="n">qs</span> <span class="o">=</span> <span class="n">ps</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="k">if</span> <span class="n">qs_prev_</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">qs</span><span class="p">,</span> <span class="n">qs_prev_</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">epsilon</span> <span class="ow">and</span> <span class="n">s</span> <span class="o">&gt;</span> <span class="mi">10</span><span class="p">:</span>
<span class="n">converged</span> <span class="o">=</span> <span class="kc">True</span>
<span class="n">qs_prev_</span> <span class="o">=</span> <span class="n">qs</span>
<span class="n">s</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">converged</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;[warning] the method has reached the maximum number of iterations; it might have not converged&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">qs</span><span class="p">,</span> <span class="n">ps</span></div>
</div>
<div class="viewcode-block" id="HDy">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.HDy">[docs]</a>
<span class="k">class</span> <span class="nc">HDy</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">,</span> <span class="n">BinaryAggregativeQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> `Hellinger Distance y &lt;https://www.sciencedirect.com/science/article/pii/S0020025512004069&gt;`_ (HDy).</span>
<span class="sd"> HDy is a probabilistic method for training binary quantifiers, that models quantification as the problem of</span>
<span class="sd"> minimizing the divergence (in terms of the Hellinger Distance) between two distributions of posterior</span>
<span class="sd"> probabilities returned by the classifier. One of the distributions is generated from the unlabelled examples and</span>
<span class="sd"> the other is generated from a validation set. This latter distribution is defined as a mixture of the</span>
<span class="sd"> class-conditional distributions of the posterior probabilities returned for the positive and negative validation</span>
<span class="sd"> examples, respectively. The parameters of the mixture thus represent the estimates of the class prevalence values.</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a binary classifier</span>
<span class="sd"> :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out</span>
<span class="sd"> validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5)..</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<div class="viewcode-block" id="HDy.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.HDy.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Trains a HDy quantifier.</span>
<span class="sd"> :param data: the training set</span>
<span class="sd"> :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)</span>
<span class="sd"> :param val_split: either a float in (0,1) indicating the proportion of training instances to use for</span>
<span class="sd"> validation (e.g., 0.3 for using 30% of the training set as validation data), or a</span>
<span class="sd"> :class:`quapy.data.base.LabelledCollection` indicating the validation set itself</span>
<span class="sd"> :return: self</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">P</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
<span class="n">Px</span> <span class="o">=</span> <span class="n">P</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span> <span class="c1"># takes only the P(y=+1|x)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy1</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy0</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">neg_label</span><span class="p">]</span>
<span class="c1"># pre-compute the histogram for positive and negative examples</span>
<span class="bp">self</span><span class="o">.</span><span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">110</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span> <span class="c1"># [10, 20, 30, ..., 100, 110]</span>
<span class="k">def</span> <span class="nf">hist</span><span class="p">(</span><span class="n">P</span><span class="p">,</span> <span class="n">bins</span><span class="p">):</span>
<span class="n">h</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">P</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="n">bins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">density</span><span class="o">=</span><span class="kc">True</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">return</span> <span class="n">h</span> <span class="o">/</span> <span class="n">h</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy1_density</span> <span class="o">=</span> <span class="p">{</span><span class="n">bins</span><span class="p">:</span> <span class="n">hist</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy1</span><span class="p">,</span> <span class="n">bins</span><span class="p">)</span> <span class="k">for</span> <span class="n">bins</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">bins</span><span class="p">}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_density</span> <span class="o">=</span> <span class="p">{</span><span class="n">bins</span><span class="p">:</span> <span class="n">hist</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy0</span><span class="p">,</span> <span class="n">bins</span><span class="p">)</span> <span class="k">for</span> <span class="n">bins</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">bins</span><span class="p">}</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="HDy.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.HDy.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">):</span>
<span class="c1"># &quot;In this work, the number of bins b used in HDx and HDy was chosen from 10 to 110 in steps of 10,</span>
<span class="c1"># and the final estimated a priori probability was taken as the median of these 11 estimates.&quot;</span>
<span class="c1"># (González-Castro, et al., 2013).</span>
<span class="n">Px</span> <span class="o">=</span> <span class="n">classif_posteriors</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span> <span class="c1"># takes only the P(y=+1|x)</span>
<span class="n">prev_estimations</span> <span class="o">=</span> <span class="p">[]</span>
<span class="c1"># for bins in np.linspace(10, 110, 11, dtype=int): #[10, 20, 30, ..., 100, 110]</span>
<span class="c1"># Pxy0_density, _ = np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)</span>
<span class="c1"># Pxy1_density, _ = np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)</span>
<span class="k">for</span> <span class="n">bins</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">bins</span><span class="p">:</span>
<span class="n">Pxy0_density</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_density</span><span class="p">[</span><span class="n">bins</span><span class="p">]</span>
<span class="n">Pxy1_density</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">Pxy1_density</span><span class="p">[</span><span class="n">bins</span><span class="p">]</span>
<span class="n">Px_test</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">Px</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="n">bins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">density</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># the authors proposed to search for the prevalence yielding the best matching as a linear search</span>
<span class="c1"># at small steps (modern implementations resort to an optimization procedure,</span>
<span class="c1"># see class DistributionMatching)</span>
<span class="n">prev_selected</span><span class="p">,</span> <span class="n">min_dist</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
<span class="k">for</span> <span class="n">prev</span> <span class="ow">in</span> <span class="n">F</span><span class="o">.</span><span class="n">prevalence_linspace</span><span class="p">(</span><span class="n">n_prevalences</span><span class="o">=</span><span class="mi">101</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="mf">0.0</span><span class="p">):</span>
<span class="n">Px_train</span> <span class="o">=</span> <span class="n">prev</span> <span class="o">*</span> <span class="n">Pxy1_density</span> <span class="o">+</span> <span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">prev</span><span class="p">)</span> <span class="o">*</span> <span class="n">Pxy0_density</span>
<span class="n">hdy</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">HellingerDistance</span><span class="p">(</span><span class="n">Px_train</span><span class="p">,</span> <span class="n">Px_test</span><span class="p">)</span>
<span class="k">if</span> <span class="n">prev_selected</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">hdy</span> <span class="o">&lt;</span> <span class="n">min_dist</span><span class="p">:</span>
<span class="n">prev_selected</span><span class="p">,</span> <span class="n">min_dist</span> <span class="o">=</span> <span class="n">prev</span><span class="p">,</span> <span class="n">hdy</span>
<span class="n">prev_estimations</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">prev_selected</span><span class="p">)</span>
<span class="n">class1_prev</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prev_estimations</span><span class="p">)</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">as_binary_prevalence</span><span class="p">(</span><span class="n">class1_prev</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="DyS">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.DyS">[docs]</a>
<span class="k">class</span> <span class="nc">DyS</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">,</span> <span class="n">BinaryAggregativeQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> `DyS framework &lt;https://ojs.aaai.org/index.php/AAAI/article/view/4376&gt;`_ (DyS).</span>
<span class="sd"> DyS is a generalization of HDy method, using a Ternary Search in order to find the prevalence that</span>
<span class="sd"> minimizes the distance between distributions.</span>
<span class="sd"> Details for the ternary search have been got from &lt;https://dl.acm.org/doi/pdf/10.1145/3219819.3220059&gt;</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a binary classifier</span>
<span class="sd"> :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out</span>
<span class="sd"> validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5)..</span>
<span class="sd"> :param n_bins: an int with the number of bins to use to compute the histograms.</span>
<span class="sd"> :param divergence: a str indicating the name of divergence (currently supported ones are &quot;HD&quot; or &quot;topsoe&quot;), or a</span>
<span class="sd"> callable function computes the divergence between two distributions (two equally sized arrays).</span>
<span class="sd"> :param tol: a float with the tolerance for the ternary search algorithm.</span>
<span class="sd"> :param n_jobs: number of parallel workers.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_bins</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">divergence</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]</span><span class="o">=</span> <span class="s1">&#39;HD&#39;</span><span class="p">,</span> <span class="n">tol</span><span class="o">=</span><span class="mf">1e-05</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">tol</span> <span class="o">=</span> <span class="n">tol</span>
<span class="bp">self</span><span class="o">.</span><span class="n">divergence</span> <span class="o">=</span> <span class="n">divergence</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_bins</span> <span class="o">=</span> <span class="n">n_bins</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
<span class="k">def</span> <span class="nf">_ternary_search</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">left</span><span class="p">,</span> <span class="n">right</span><span class="p">,</span> <span class="n">tol</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Find maximum of unimodal function f() within [left, right]</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">while</span> <span class="nb">abs</span><span class="p">(</span><span class="n">right</span> <span class="o">-</span> <span class="n">left</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="n">tol</span><span class="p">:</span>
<span class="n">left_third</span> <span class="o">=</span> <span class="n">left</span> <span class="o">+</span> <span class="p">(</span><span class="n">right</span> <span class="o">-</span> <span class="n">left</span><span class="p">)</span> <span class="o">/</span> <span class="mi">3</span>
<span class="n">right_third</span> <span class="o">=</span> <span class="n">right</span> <span class="o">-</span> <span class="p">(</span><span class="n">right</span> <span class="o">-</span> <span class="n">left</span><span class="p">)</span> <span class="o">/</span> <span class="mi">3</span>
<span class="k">if</span> <span class="n">f</span><span class="p">(</span><span class="n">left_third</span><span class="p">)</span> <span class="o">&gt;</span> <span class="n">f</span><span class="p">(</span><span class="n">right_third</span><span class="p">):</span>
<span class="n">left</span> <span class="o">=</span> <span class="n">left_third</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">right</span> <span class="o">=</span> <span class="n">right_third</span>
<span class="c1"># Left and right are the current bounds; the maximum is between them</span>
<span class="k">return</span> <span class="p">(</span><span class="n">left</span> <span class="o">+</span> <span class="n">right</span><span class="p">)</span> <span class="o">/</span> <span class="mi">2</span>
<div class="viewcode-block" id="DyS.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.DyS.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="n">Px</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
<span class="n">Px</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span> <span class="c1"># takes only the P(y=+1|x)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy1</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy0</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">neg_label</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy1_density</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy1</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_bins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">density</span><span class="o">=</span><span class="kc">True</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_density</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy0</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_bins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">density</span><span class="o">=</span><span class="kc">True</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="DyS.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.DyS.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">):</span>
<span class="n">Px</span> <span class="o">=</span> <span class="n">classif_posteriors</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span> <span class="c1"># takes only the P(y=+1|x)</span>
<span class="n">Px_test</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">Px</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_bins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">density</span><span class="o">=</span><span class="kc">True</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">divergence</span> <span class="o">=</span> <span class="n">get_divergence</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">divergence</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">distribution_distance</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
<span class="n">Px_train</span> <span class="o">=</span> <span class="n">prev</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">Pxy1_density</span> <span class="o">+</span> <span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">prev</span><span class="p">)</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_density</span>
<span class="k">return</span> <span class="n">divergence</span><span class="p">(</span><span class="n">Px_train</span><span class="p">,</span> <span class="n">Px_test</span><span class="p">)</span>
<span class="n">class1_prev</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ternary_search</span><span class="p">(</span><span class="n">f</span><span class="o">=</span><span class="n">distribution_distance</span><span class="p">,</span> <span class="n">left</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">right</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">tol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">tol</span><span class="p">)</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">as_binary_prevalence</span><span class="p">(</span><span class="n">class1_prev</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="SMM">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.SMM">[docs]</a>
<span class="k">class</span> <span class="nc">SMM</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">,</span> <span class="n">BinaryAggregativeQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> `SMM method &lt;https://ieeexplore.ieee.org/document/9260028&gt;`_ (SMM).</span>
<span class="sd"> SMM is a simplification of matching distribution methods where the representation of the examples</span>
<span class="sd"> is created using the mean instead of a histogram (conceptually equivalent to PACC).</span>
<span class="sd"> :param classifier: a sklearn&#39;s Estimator that generates a binary classifier.</span>
<span class="sd"> :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out</span>
<span class="sd"> validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5)..</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<div class="viewcode-block" id="SMM.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.SMM.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="n">Px</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
<span class="n">Px</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span> <span class="c1"># takes only the P(y=+1|x)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy1</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy0</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">neg_label</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy1_mean</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy1</span><span class="p">)</span> <span class="c1"># equiv. TPR </span>
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_mean</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy0</span><span class="p">)</span> <span class="c1"># equiv. FPR</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="SMM.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.SMM.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">):</span>
<span class="n">Px</span> <span class="o">=</span> <span class="n">classif_posteriors</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span> <span class="c1"># takes only the P(y=+1|x)</span>
<span class="n">Px_mean</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">Px</span><span class="p">)</span>
<span class="n">class1_prev</span> <span class="o">=</span> <span class="p">(</span><span class="n">Px_mean</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_mean</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy1_mean</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_mean</span><span class="p">)</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">as_binary_prevalence</span><span class="p">(</span><span class="n">class1_prev</span><span class="p">,</span> <span class="n">clip_if_necessary</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="DMy">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.DMy">[docs]</a>
<span class="k">class</span> <span class="nc">DMy</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of posterior</span>
<span class="sd"> probabilities. This implementation takes the number of bins, the divergence, and the possibility to work on CDF</span>
<span class="sd"> as hyperparameters.</span>
<span class="sd"> :param classifier: a `sklearn`&#39;s Estimator that generates a probabilistic classifier</span>
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set to model the</span>
<span class="sd"> validation distribution.</span>
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
<span class="sd"> validation data, or as an integer, indicating that the validation distribution should be estimated via</span>
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
<span class="sd"> :param nbins: number of bins used to discretize the distributions (default 8)</span>
<span class="sd"> :param divergence: a string representing a divergence measure (currently, &quot;HD&quot; and &quot;topsoe&quot; are implemented)</span>
<span class="sd"> or a callable function taking two ndarrays of the same dimension as input (default &quot;HD&quot;, meaning Hellinger</span>
<span class="sd"> Distance)</span>
<span class="sd"> :param cdf: whether to use CDF instead of PDF (default False)</span>
<span class="sd"> :param n_jobs: number of parallel workers (default None)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">nbins</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">divergence</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]</span><span class="o">=</span><span class="s1">&#39;HD&#39;</span><span class="p">,</span>
<span class="n">cdf</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">search</span><span class="o">=</span><span class="s1">&#39;optim_minimize&#39;</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
<span class="bp">self</span><span class="o">.</span><span class="n">nbins</span> <span class="o">=</span> <span class="n">nbins</span>
<span class="bp">self</span><span class="o">.</span><span class="n">divergence</span> <span class="o">=</span> <span class="n">divergence</span>
<span class="bp">self</span><span class="o">.</span><span class="n">cdf</span> <span class="o">=</span> <span class="n">cdf</span>
<span class="bp">self</span><span class="o">.</span><span class="n">search</span> <span class="o">=</span> <span class="n">search</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
<span class="c1"># @classmethod</span>
<span class="c1"># def HDy(cls, classifier, val_split=5, n_jobs=None):</span>
<span class="c1"># from quapy.method.meta import MedianEstimator</span>
<span class="c1">#</span>
<span class="c1"># hdy = DMy(classifier=classifier, val_split=val_split, search=&#39;linear_search&#39;, divergence=&#39;HD&#39;)</span>
<span class="c1"># hdy = AggregativeMedianEstimator(hdy, param_grid={&#39;nbins&#39;: np.linspace(10, 110, 11).astype(int)}, n_jobs=n_jobs)</span>
<span class="c1"># return hdy</span>
<span class="k">def</span> <span class="nf">_get_distributions</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">):</span>
<span class="n">histograms</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">post_dims</span> <span class="o">=</span> <span class="n">posteriors</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="k">if</span> <span class="n">post_dims</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
<span class="c1"># in binary quantification we can use only one class, since the other one is its complement</span>
<span class="n">post_dims</span> <span class="o">=</span> <span class="mi">1</span>
<span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">post_dims</span><span class="p">):</span>
<span class="n">hist</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">posteriors</span><span class="p">[:,</span> <span class="n">dim</span><span class="p">],</span> <span class="n">bins</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">nbins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">))[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">histograms</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">hist</span><span class="p">)</span>
<span class="n">counts</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">vstack</span><span class="p">(</span><span class="n">histograms</span><span class="p">)</span>
<span class="n">distributions</span> <span class="o">=</span> <span class="n">counts</span><span class="o">/</span><span class="n">counts</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)[:,</span><span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">]</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cdf</span><span class="p">:</span>
<span class="n">distributions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">cumsum</span><span class="p">(</span><span class="n">distributions</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="k">return</span> <span class="n">distributions</span>
<div class="viewcode-block" id="DMy.aggregation_fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.DMy.aggregation_fit">[docs]</a>
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Trains the classifier (if requested) and generates the validation distributions out of the training data.</span>
<span class="sd"> The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of</span>
<span class="sd"> channels, and `nbins` the number of bins. In particular, let `V` be the validation distributions; then `di=V[i]`</span>
<span class="sd"> are the distributions obtained from training data labelled with class `i`; while `dij = di[j]` is the discrete</span>
<span class="sd"> distribution of posterior probabilities `P(Y=j|X=x)` for training data labelled with class `i`, and `dij[k]`</span>
<span class="sd"> is the fraction of instances with a value in the `k`-th bin.</span>
<span class="sd"> :param data: the training set</span>
<span class="sd"> :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)</span>
<span class="sd"> :param val_split: either a float in (0,1) indicating the proportion of training instances to use for</span>
<span class="sd"> validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection</span>
<span class="sd"> indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV</span>
<span class="sd"> to estimate the parameters</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">posteriors</span><span class="p">,</span> <span class="n">true_labels</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
<span class="n">n_classes</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="n">func</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_get_distributions</span><span class="p">,</span>
<span class="n">args</span><span class="o">=</span><span class="p">[</span><span class="n">posteriors</span><span class="p">[</span><span class="n">true_labels</span><span class="o">==</span><span class="n">cat</span><span class="p">]</span> <span class="k">for</span> <span class="n">cat</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_classes</span><span class="p">)],</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
<span class="n">backend</span><span class="o">=</span><span class="s1">&#39;threading&#39;</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="DMy.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.DMy.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution</span>
<span class="sd"> (the mixture) that best matches the test distribution, in terms of the divergence measure of choice.</span>
<span class="sd"> In the multiclass case, with `n` the number of classes, the test and mixture distributions contain</span>
<span class="sd"> `n` channels (proper distributions of binned posterior probabilities), on which the divergence is computed</span>
<span class="sd"> independently. The matching is computed as an average of the divergence across all channels.</span>
<span class="sd"> :param posteriors: posterior probabilities of the instances in the sample</span>
<span class="sd"> :return: a vector of class prevalence estimates</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">test_distribution</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_distributions</span><span class="p">(</span><span class="n">posteriors</span><span class="p">)</span>
<span class="n">divergence</span> <span class="o">=</span> <span class="n">get_divergence</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">divergence</span><span class="p">)</span>
<span class="n">n_classes</span><span class="p">,</span> <span class="n">n_channels</span><span class="p">,</span> <span class="n">nbins</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span><span class="o">.</span><span class="n">shape</span>
<span class="k">def</span> <span class="nf">loss</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
<span class="n">prev</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">expand_dims</span><span class="p">(</span><span class="n">prev</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">mixture_distribution</span> <span class="o">=</span> <span class="p">(</span><span class="n">prev</span> <span class="o">@</span> <span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,</span><span class="o">-</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">n_channels</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">divs</span> <span class="o">=</span> <span class="p">[</span><span class="n">divergence</span><span class="p">(</span><span class="n">test_distribution</span><span class="p">[</span><span class="n">ch</span><span class="p">],</span> <span class="n">mixture_distribution</span><span class="p">[</span><span class="n">ch</span><span class="p">])</span> <span class="k">for</span> <span class="n">ch</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_channels</span><span class="p">)]</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">divs</span><span class="p">)</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">argmin_prevalence</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">search</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="newELM">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.newELM">[docs]</a>
<span class="k">def</span> <span class="nf">newELM</span><span class="p">(</span><span class="n">svmperf_base</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">&#39;01&#39;</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Explicit Loss Minimization (ELM) quantifiers.</span>
<span class="sd"> Quantifiers based on ELM represent a family of methods based on structured output learning;</span>
<span class="sd"> these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss</span>
<span class="sd"> measure. This implementation relies on</span>
<span class="sd"> `Joachims SVM perf &lt;https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html&gt;`_ structured output</span>
<span class="sd"> learning algorithm, which has to be installed and patched for the purpose (see this</span>
<span class="sd"> `script &lt;https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh&gt;`_).</span>
<span class="sd"> This function equivalent to:</span>
<span class="sd"> &gt;&gt;&gt; CC(SVMperf(svmperf_base, loss, C))</span>
<span class="sd"> :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)</span>
<span class="sd"> this path will be obtained from qp.environ[&#39;SVMPERF_HOME&#39;]</span>
<span class="sd"> :param loss: the loss to optimize (see :attr:`quapy.classification.svmperf.SVMperf.valid_losses`)</span>
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
<span class="sd"> :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the</span>
<span class="sd"> underlying classifier</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">svmperf_base</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">svmperf_base</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">&#39;SVMPERF_HOME&#39;</span><span class="p">]</span>
<span class="k">assert</span> <span class="n">svmperf_base</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">,</span> \
<span class="s1">&#39;param svmperf_base was not specified, and the variable SVMPERF_HOME has not been set in the environment&#39;</span>
<span class="k">return</span> <span class="n">CC</span><span class="p">(</span><span class="n">SVMperf</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="n">loss</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="n">C</span><span class="p">))</span></div>
<div class="viewcode-block" id="newSVMQ">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.newSVMQ">[docs]</a>
<span class="k">def</span> <span class="nf">newSVMQ</span><span class="p">(</span><span class="n">svmperf_base</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> SVM(Q) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the `Q` loss combining a</span>
<span class="sd"> classification-oriented loss and a quantification-oriented loss, as proposed by</span>
<span class="sd"> `Barranquero et al. 2015 &lt;https://www.sciencedirect.com/science/article/pii/S003132031400291X&gt;`_.</span>
<span class="sd"> Equivalent to:</span>
<span class="sd"> &gt;&gt;&gt; CC(SVMperf(svmperf_base, loss=&#39;q&#39;, C=C))</span>
<span class="sd"> Quantifiers based on ELM represent a family of methods based on structured output learning;</span>
<span class="sd"> these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss</span>
<span class="sd"> measure. This implementation relies on</span>
<span class="sd"> `Joachims SVM perf &lt;https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html&gt;`_ structured output</span>
<span class="sd"> learning algorithm, which has to be installed and patched for the purpose (see this</span>
<span class="sd"> `script &lt;https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh&gt;`_).</span>
<span class="sd"> This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))</span>
<span class="sd"> :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)</span>
<span class="sd"> this path will be obtained from qp.environ[&#39;SVMPERF_HOME&#39;]</span>
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
<span class="sd"> :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the</span>
<span class="sd"> underlying classifier</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">newELM</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">&#39;q&#39;</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="n">C</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">newSVMKLD</span><span class="p">(</span><span class="n">svmperf_base</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence</span>
<span class="sd"> as proposed by `Esuli et al. 2015 &lt;https://dl.acm.org/doi/abs/10.1145/2700406&gt;`_.</span>
<span class="sd"> Equivalent to:</span>
<span class="sd"> &gt;&gt;&gt; CC(SVMperf(svmperf_base, loss=&#39;kld&#39;, C=C))</span>
<span class="sd"> Quantifiers based on ELM represent a family of methods based on structured output learning;</span>
<span class="sd"> these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss</span>
<span class="sd"> measure. This implementation relies on</span>
<span class="sd"> `Joachims SVM perf &lt;https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html&gt;`_ structured output</span>
<span class="sd"> learning algorithm, which has to be installed and patched for the purpose (see this</span>
<span class="sd"> `script &lt;https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh&gt;`_).</span>
<span class="sd"> This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))</span>
<span class="sd"> :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)</span>
<span class="sd"> this path will be obtained from qp.environ[&#39;SVMPERF_HOME&#39;]</span>
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
<span class="sd"> :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the</span>
<span class="sd"> underlying classifier</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">newELM</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">&#39;kld&#39;</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="n">C</span><span class="p">)</span>
<div class="viewcode-block" id="newSVMKLD">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.newSVMKLD">[docs]</a>
<span class="k">def</span> <span class="nf">newSVMKLD</span><span class="p">(</span><span class="n">svmperf_base</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence</span>
<span class="sd"> normalized via the logistic function, as proposed by</span>
<span class="sd"> `Esuli et al. 2015 &lt;https://dl.acm.org/doi/abs/10.1145/2700406&gt;`_.</span>
<span class="sd"> Equivalent to:</span>
<span class="sd"> &gt;&gt;&gt; CC(SVMperf(svmperf_base, loss=&#39;nkld&#39;, C=C))</span>
<span class="sd"> Quantifiers based on ELM represent a family of methods based on structured output learning;</span>
<span class="sd"> these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss</span>
<span class="sd"> measure. This implementation relies on</span>
<span class="sd"> `Joachims SVM perf &lt;https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html&gt;`_ structured output</span>
<span class="sd"> learning algorithm, which has to be installed and patched for the purpose (see this</span>
<span class="sd"> `script &lt;https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh&gt;`_).</span>
<span class="sd"> This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))</span>
<span class="sd"> :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)</span>
<span class="sd"> this path will be obtained from qp.environ[&#39;SVMPERF_HOME&#39;]</span>
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
<span class="sd"> :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the</span>
<span class="sd"> underlying classifier</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">newELM</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">&#39;nkld&#39;</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="n">C</span><span class="p">)</span></div>
<div class="viewcode-block" id="newSVMAE">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.newSVMAE">[docs]</a>
<span class="k">def</span> <span class="nf">newSVMAE</span><span class="p">(</span><span class="n">svmperf_base</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Absolute Error as first used by</span>
<span class="sd"> `Moreo and Sebastiani, 2021 &lt;https://arxiv.org/abs/2011.02552&gt;`_.</span>
<span class="sd"> Equivalent to:</span>
<span class="sd"> &gt;&gt;&gt; CC(SVMperf(svmperf_base, loss=&#39;mae&#39;, C=C))</span>
<span class="sd"> Quantifiers based on ELM represent a family of methods based on structured output learning;</span>
<span class="sd"> these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss</span>
<span class="sd"> measure. This implementation relies on</span>
<span class="sd"> `Joachims SVM perf &lt;https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html&gt;`_ structured output</span>
<span class="sd"> learning algorithm, which has to be installed and patched for the purpose (see this</span>
<span class="sd"> `script &lt;https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh&gt;`_).</span>
<span class="sd"> This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))</span>
<span class="sd"> :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)</span>
<span class="sd"> this path will be obtained from qp.environ[&#39;SVMPERF_HOME&#39;]</span>
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
<span class="sd"> :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the</span>
<span class="sd"> underlying classifier</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">newELM</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">&#39;mae&#39;</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="n">C</span><span class="p">)</span></div>
<div class="viewcode-block" id="newSVMRAE">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.newSVMRAE">[docs]</a>
<span class="k">def</span> <span class="nf">newSVMRAE</span><span class="p">(</span><span class="n">svmperf_base</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Relative Absolute Error as first</span>
<span class="sd"> used by `Moreo and Sebastiani, 2021 &lt;https://arxiv.org/abs/2011.02552&gt;`_.</span>
<span class="sd"> Equivalent to:</span>
<span class="sd"> &gt;&gt;&gt; CC(SVMperf(svmperf_base, loss=&#39;mrae&#39;, C=C))</span>
<span class="sd"> Quantifiers based on ELM represent a family of methods based on structured output learning;</span>
<span class="sd"> these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss</span>
<span class="sd"> measure. This implementation relies on</span>
<span class="sd"> `Joachims SVM perf &lt;https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html&gt;`_ structured output</span>
<span class="sd"> learning algorithm, which has to be installed and patched for the purpose (see this</span>
<span class="sd"> `script &lt;https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh&gt;`_).</span>
<span class="sd"> This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))</span>
<span class="sd"> :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)</span>
<span class="sd"> this path will be obtained from qp.environ[&#39;SVMPERF_HOME&#39;]</span>
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
<span class="sd"> :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the</span>
<span class="sd"> underlying classifier</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">newELM</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">&#39;mrae&#39;</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="n">C</span><span class="p">)</span></div>
<div class="viewcode-block" id="OneVsAllAggregative">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.OneVsAllAggregative">[docs]</a>
<span class="k">class</span> <span class="nc">OneVsAllAggregative</span><span class="p">(</span><span class="n">OneVsAllGeneric</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Allows any binary quantifier to perform quantification on single-label datasets.</span>
<span class="sd"> The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the</span>
<span class="sd"> class prevelences sum up to 1.</span>
<span class="sd"> This variant was used, along with the :class:`EMQ` quantifier, in</span>
<span class="sd"> `Gao and Sebastiani, 2016 &lt;https://link.springer.com/content/pdf/10.1007/s13278-016-0327-z.pdf&gt;`_.</span>
<span class="sd"> :param binary_quantifier: a quantifier (binary) that will be employed to work on multiclass model in a</span>
<span class="sd"> one-vs-all manner</span>
<span class="sd"> :param n_jobs: number of parallel workers</span>
<span class="sd"> :param parallel_backend: the parallel backend for joblib (default &quot;loky&quot;); this is helpful for some quantifiers</span>
<span class="sd"> (e.g., ELM-based ones) that cannot be run with multiprocessing, since the temp dir they create during fit will</span>
<span class="sd"> is removed and no longer available at predict time.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">parallel_backend</span><span class="o">=</span><span class="s1">&#39;multiprocessing&#39;</span><span class="p">):</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">BaseQuantifier</span><span class="p">),</span> \
<span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span><span class="si">}</span><span class="s1"> does not seem to be a Quantifier&#39;</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">),</span> \
<span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span><span class="si">}</span><span class="s1"> does not seem to be of type Aggregative&#39;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span> <span class="o">=</span> <span class="n">binary_quantifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">parallel_backend</span> <span class="o">=</span> <span class="n">parallel_backend</span>
<div class="viewcode-block" id="OneVsAllAggregative.classify">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.OneVsAllAggregative.classify">[docs]</a>
<span class="k">def</span> <span class="nf">classify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> If the base quantifier is not probabilistic, returns a matrix of shape `(n,m,)` with `n` the number of</span>
<span class="sd"> instances and `m` the number of classes. The entry `(i,j)` is a binary value indicating whether instance</span>
<span class="sd"> `i `belongs to class `j`. The binary classifications are independent of each other, meaning that an instance</span>
<span class="sd"> can end up be attributed to 0, 1, or more classes.</span>
<span class="sd"> If the base quantifier is probabilistic, returns a matrix of shape `(n,m,2)` with `n` the number of instances</span>
<span class="sd"> and `m` the number of classes. The entry `(i,j,1)` (resp. `(i,j,0)`) is a value in [0,1] indicating the</span>
<span class="sd"> posterior probability that instance `i` belongs (resp. does not belong) to class `j`. The posterior</span>
<span class="sd"> probabilities are independent of each other, meaning that, in general, they do not sum up to one.</span>
<span class="sd"> :param instances: array-like</span>
<span class="sd"> :return: `np.ndarray`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">classif_predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_parallel</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_delayed_binary_classification</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">AggregativeSoftQuantifier</span><span class="p">):</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">swapaxes</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">T</span></div>
<div class="viewcode-block" id="OneVsAllAggregative.aggregate">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.OneVsAllAggregative.aggregate">[docs]</a>
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">):</span>
<span class="n">prevalences</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_parallel</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_delayed_binary_aggregate</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">)</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">normalize_prevalence</span><span class="p">(</span><span class="n">prevalences</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_delayed_binary_classification</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span><span class="p">[</span><span class="n">c</span><span class="p">]</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_delayed_binary_aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">):</span>
<span class="c1"># the estimation for the positive class prevalence</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span><span class="p">[</span><span class="n">c</span><span class="p">]</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">[:,</span> <span class="n">c</span><span class="p">])[</span><span class="mi">1</span><span class="p">]</span></div>
<div class="viewcode-block" id="AggregativeMedianEstimator">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeMedianEstimator">[docs]</a>
<span class="k">class</span> <span class="nc">AggregativeMedianEstimator</span><span class="p">(</span><span class="n">BinaryQuantifier</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the</span>
<span class="sd"> estimation returned by differently (hyper)parameterized base quantifiers.</span>
<span class="sd"> The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions,</span>
<span class="sd"> i.e., in cases of binary quantification.</span>
<span class="sd"> :param base_quantifier: the base, binary quantifier</span>
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
<span class="sd"> :param param_grid: the grid or parameters towards which the median will be computed</span>
<span class="sd"> :param n_jobs: number of parllel workes</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">base_quantifier</span><span class="p">:</span> <span class="n">AggregativeQuantifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier</span>
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">param_grid</span>
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
<div class="viewcode-block" id="AggregativeMedianEstimator.get_params">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeMedianEstimator.get_params">[docs]</a>
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">(</span><span class="n">deep</span><span class="p">)</span></div>
<div class="viewcode-block" id="AggregativeMedianEstimator.set_params">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeMedianEstimator.set_params">[docs]</a>
<span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_delayed_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
<span class="n">params</span><span class="p">,</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
<span class="k">return</span> <span class="n">model</span>
<span class="k">def</span> <span class="nf">_delayed_fit_classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;enter job&#39;</span><span class="p">)</span>
<span class="n">cls_params</span><span class="p">,</span> <span class="n">training</span><span class="p">,</span> <span class="n">kwargs</span> <span class="o">=</span> <span class="n">args</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">cls_params</span><span class="p">)</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;exit job&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_delayed_fit_aggregation</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
<span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">),</span> <span class="n">q_params</span><span class="p">),</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">q_params</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">aggregation_fit</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span>
<span class="k">return</span> <span class="n">model</span>
<div class="viewcode-block" id="AggregativeMedianEstimator.fit">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeMedianEstimator.fit">[docs]</a>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="kn">import</span> <span class="nn">itertools</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_check_binary</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">):</span>
<span class="n">cls_configs</span><span class="p">,</span> <span class="n">q_configs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">group_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">cls_configs</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">models_preds</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit_classifier</span><span class="p">,</span>
<span class="p">((</span><span class="n">params</span><span class="p">,</span> <span class="n">training</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">)</span> <span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="n">cls_configs</span><span class="p">),</span>
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">backend</span><span class="o">=</span><span class="s1">&#39;threading&#39;</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;only 1&#39;</span><span class="p">)</span>
<span class="n">model</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span>
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">cls_configs</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="n">models_preds</span> <span class="o">=</span> <span class="p">[(</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">models</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit_aggregation</span><span class="p">,</span>
<span class="p">((</span><span class="n">setup</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">setup</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="n">models_preds</span><span class="p">,</span> <span class="n">q_configs</span><span class="p">)),</span>
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
<span class="n">backend</span><span class="o">=</span><span class="s1">&#39;threading&#39;</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">configs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">expand_grid</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">models</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit</span><span class="p">,</span>
<span class="p">((</span><span class="n">params</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="n">configs</span><span class="p">),</span>
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
<span class="n">backend</span><span class="o">=</span><span class="s1">&#39;threading&#39;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<span class="k">def</span> <span class="nf">_delayed_predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
<span class="n">model</span><span class="p">,</span> <span class="n">instances</span> <span class="o">=</span> <span class="n">args</span>
<span class="k">return</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
<div class="viewcode-block" id="AggregativeMedianEstimator.quantify">
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeMedianEstimator.quantify">[docs]</a>
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_predict</span><span class="p">,</span>
<span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span> <span class="k">for</span> <span class="n">model</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">models</span><span class="p">),</span>
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;_R_SEED&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
<span class="n">backend</span><span class="o">=</span><span class="s1">&#39;threading&#39;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span></div>
</div>
<span class="c1">#---------------------------------------------------------------</span>
<span class="c1"># imports</span>
<span class="c1">#---------------------------------------------------------------</span>
<span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">_threshold_optim</span>
<span class="n">T50</span> <span class="o">=</span> <span class="n">_threshold_optim</span><span class="o">.</span><span class="n">T50</span>
<span class="n">MAX</span> <span class="o">=</span> <span class="n">_threshold_optim</span><span class="o">.</span><span class="n">MAX</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">_threshold_optim</span><span class="o">.</span><span class="n">X</span>
<span class="n">MS</span> <span class="o">=</span> <span class="n">_threshold_optim</span><span class="o">.</span><span class="n">MS</span>
<span class="n">MS2</span> <span class="o">=</span> <span class="n">_threshold_optim</span><span class="o">.</span><span class="n">MS2</span>
<span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">_kdey</span>
<span class="n">KDEyML</span> <span class="o">=</span> <span class="n">_kdey</span><span class="o">.</span><span class="n">KDEyML</span>
<span class="n">KDEyHD</span> <span class="o">=</span> <span class="n">_kdey</span><span class="o">.</span><span class="n">KDEyHD</span>
<span class="n">KDEyCS</span> <span class="o">=</span> <span class="n">_kdey</span><span class="o">.</span><span class="n">KDEyCS</span>
<span class="c1">#---------------------------------------------------------------</span>
<span class="c1"># aliases</span>
<span class="c1">#---------------------------------------------------------------</span>
<span class="n">ClassifyAndCount</span> <span class="o">=</span> <span class="n">CC</span>
<span class="n">AdjustedClassifyAndCount</span> <span class="o">=</span> <span class="n">ACC</span>
<span class="n">ProbabilisticClassifyAndCount</span> <span class="o">=</span> <span class="n">PCC</span>
<span class="n">ProbabilisticAdjustedClassifyAndCount</span> <span class="o">=</span> <span class="n">PACC</span>
<span class="n">ExpectationMaximizationQuantifier</span> <span class="o">=</span> <span class="n">EMQ</span>
<span class="n">DistributionMatchingY</span> <span class="o">=</span> <span class="n">DMy</span>
<span class="n">SLD</span> <span class="o">=</span> <span class="n">EMQ</span>
<span class="n">HellingerDistanceY</span> <span class="o">=</span> <span class="n">HDy</span>
<span class="n">MedianSweep</span> <span class="o">=</span> <span class="n">MS</span>
<span class="n">MedianSweep2</span> <span class="o">=</span> <span class="n">MS2</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, Alejandro Moreo.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>