1613 lines
195 KiB
HTML
1613 lines
195 KiB
HTML
<!DOCTYPE html>
|
||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||
<head>
|
||
<meta charset="utf-8" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||
<title>quapy.method.aggregative — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||
|
||
|
||
<!--[if lt IE 9]>
|
||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||
<![endif]-->
|
||
|
||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||
<script src="../../../_static/js/theme.js"></script>
|
||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||
<link rel="search" title="Search" href="../../../search.html" />
|
||
</head>
|
||
|
||
<body class="wy-body-for-nav">
|
||
<div class="wy-grid-for-nav">
|
||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||
<div class="wy-side-scroll">
|
||
<div class="wy-side-nav-search" >
|
||
|
||
|
||
|
||
<a href="../../../index.html" class="icon icon-home">
|
||
QuaPy: A Python-based open-source framework for quantification
|
||
</a>
|
||
<div role="search">
|
||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||
<input type="hidden" name="check_keywords" value="yes" />
|
||
<input type="hidden" name="area" value="default" />
|
||
</form>
|
||
</div>
|
||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||
</ul>
|
||
|
||
</div>
|
||
</div>
|
||
</nav>
|
||
|
||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||
</nav>
|
||
|
||
<div class="wy-nav-content">
|
||
<div class="rst-content">
|
||
<div role="navigation" aria-label="Page navigation">
|
||
<ul class="wy-breadcrumbs">
|
||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||
<li class="breadcrumb-item active">quapy.method.aggregative</li>
|
||
<li class="wy-breadcrumbs-aside">
|
||
</li>
|
||
</ul>
|
||
<hr/>
|
||
</div>
|
||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||
<div itemprop="articleBody">
|
||
|
||
<h1>Source code for quapy.method.aggregative</h1><div class="highlight"><pre>
|
||
<span></span><span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABC</span><span class="p">,</span> <span class="n">abstractmethod</span>
|
||
<span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
|
||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">Union</span>
|
||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||
<span class="kn">from</span> <span class="nn">abstention.calibration</span> <span class="kn">import</span> <span class="n">NoBiasVectorScaling</span><span class="p">,</span> <span class="n">TempScaling</span><span class="p">,</span> <span class="n">VectorScaling</span>
|
||
<span class="kn">from</span> <span class="nn">scipy</span> <span class="kn">import</span> <span class="n">optimize</span>
|
||
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
|
||
<span class="kn">from</span> <span class="nn">sklearn.calibration</span> <span class="kn">import</span> <span class="n">CalibratedClassifierCV</span>
|
||
<span class="kn">from</span> <span class="nn">sklearn.metrics</span> <span class="kn">import</span> <span class="n">confusion_matrix</span>
|
||
<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">cross_val_predict</span>
|
||
|
||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
||
<span class="kn">from</span> <span class="nn">quapy.functional</span> <span class="kn">import</span> <span class="n">get_divergence</span>
|
||
<span class="kn">from</span> <span class="nn">quapy.classification.calibration</span> <span class="kn">import</span> <span class="n">NBVSCalibration</span><span class="p">,</span> <span class="n">BCTSCalibration</span><span class="p">,</span> <span class="n">TSCalibration</span><span class="p">,</span> <span class="n">VSCalibration</span>
|
||
<span class="kn">from</span> <span class="nn">quapy.classification.svmperf</span> <span class="kn">import</span> <span class="n">SVMperf</span>
|
||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
||
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">,</span> <span class="n">OneVsAllGeneric</span>
|
||
|
||
|
||
<span class="c1"># Abstract classes</span>
|
||
<span class="c1"># ------------------------------------</span>
|
||
|
||
<div class="viewcode-block" id="AggregativeQuantifier">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">AggregativeQuantifier</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">ABC</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Abstract class for quantification methods that base their estimations on the aggregation of classification</span>
|
||
<span class="sd"> results. Aggregative quantifiers implement a pipeline that consists of generating classification predictions</span>
|
||
<span class="sd"> and aggregating them. For this reason, the training phase is implemented by :meth:`classification_fit` followed</span>
|
||
<span class="sd"> by :meth:`aggregation_fit`, while the testing phase is implemented by :meth:`classify` followed by</span>
|
||
<span class="sd"> :meth:`aggregate`. Subclasses of this abstract class must provide implementations for these methods.</span>
|
||
<span class="sd"> Aggregative quantifiers also maintain a :attr:`classifier` attribute.</span>
|
||
|
||
<span class="sd"> The method :meth:`fit` comes with a default implementation based on :meth:`classification_fit`</span>
|
||
<span class="sd"> and :meth:`aggregation_fit`.</span>
|
||
|
||
<span class="sd"> The method :meth:`quantify` comes with a default implementation based on :meth:`classify`</span>
|
||
<span class="sd"> and :meth:`aggregate`.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="n">val_split_</span> <span class="o">=</span> <span class="kc">None</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">val_split</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">val_split_</span>
|
||
|
||
<span class="nd">@val_split</span><span class="o">.</span><span class="n">setter</span>
|
||
<span class="k">def</span> <span class="nf">val_split</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">val_split</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">val_split</span><span class="p">,</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="s1">'warning: setting val_split with a LabelledCollection will be inefficient in'</span>
|
||
<span class="s1">'model selection. Rather pass the LabelledCollection at fit time'</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split_</span> <span class="o">=</span> <span class="n">val_split</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_check_init_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Implements any check to be performed in the parameters of the init method before undertaking</span>
|
||
<span class="sd"> the training of the quantifier. This is made as to allow for a quick execution stop when the</span>
|
||
<span class="sd"> parameters are not valid.</span>
|
||
|
||
<span class="sd"> :return: Nothing. May raise an exception.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">pass</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_check_non_empty_classes</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Asserts all classes have positive instances.</span>
|
||
|
||
<span class="sd"> :param data: LabelledCollection</span>
|
||
<span class="sd"> :return: Nothing. May raise an exception.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">sample_prevs</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||
<span class="n">empty_classes</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argwhere</span><span class="p">(</span><span class="n">sample_prevs</span><span class="o">==</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
|
||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">empty_classes</span><span class="p">)</span><span class="o">></span><span class="mi">0</span><span class="p">:</span>
|
||
<span class="n">empty_class_names</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">classes_</span><span class="p">[</span><span class="n">empty_classes</span><span class="p">]</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'classes </span><span class="si">{</span><span class="n">empty_class_names</span><span class="si">}</span><span class="s1"> have no training examples'</span><span class="p">)</span>
|
||
|
||
<div class="viewcode-block" id="AggregativeQuantifier.fit">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.fit">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.</span>
|
||
|
||
<span class="sd"> :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data</span>
|
||
<span class="sd"> :param fit_classifier: whether to train the learner (default is True). Set to False if the</span>
|
||
<span class="sd"> learner has been trained outside the quantifier.</span>
|
||
<span class="sd"> :return: self</span>
|
||
<span class="sd"> """</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_check_init_parameters</span><span class="p">()</span>
|
||
<span class="n">classif_predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="p">,</span> <span class="n">predict_on</span><span class="o">=</span><span class="n">val_split</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">aggregation_fit</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="bp">self</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="AggregativeQuantifier.classifier_fit_predict">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.classifier_fit_predict">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">classifier_fit_predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">predict_on</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Trains the classifier if requested (`fit_classifier=True`) and generate the necessary predictions to</span>
|
||
<span class="sd"> train the aggregation function.</span>
|
||
|
||
<span class="sd"> :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data</span>
|
||
<span class="sd"> :param fit_classifier: whether to train the learner (default is True). Set to False if the</span>
|
||
<span class="sd"> learner has been trained outside the quantifier.</span>
|
||
<span class="sd"> :param predict_on: specifies the set on which predictions need to be issued. This parameter can</span>
|
||
<span class="sd"> be specified as None (default) to indicate no prediction is needed; a float in (0, 1) to</span>
|
||
<span class="sd"> indicate the proportion of instances to be used for predictions (the remainder is used for</span>
|
||
<span class="sd"> training); an integer >1 to indicate that the predictions must be generated via k-fold</span>
|
||
<span class="sd"> cross-validation, using this integer as k; or the data sample itself on which to generate</span>
|
||
<span class="sd"> the predictions.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">fit_classifier</span><span class="p">,</span> <span class="nb">bool</span><span class="p">),</span> <span class="s1">'unexpected type for "fit_classifier", must be boolean'</span>
|
||
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_check_classifier</span><span class="p">(</span><span class="n">adapt_if_necessary</span><span class="o">=</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_classifier_method</span><span class="p">()</span> <span class="o">==</span> <span class="s1">'predict_proba'</span><span class="p">))</span>
|
||
|
||
<span class="k">if</span> <span class="n">fit_classifier</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_check_non_empty_classes</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
|
||
|
||
<span class="k">if</span> <span class="n">predict_on</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="n">predict_on</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">val_split</span>
|
||
|
||
<span class="k">if</span> <span class="n">predict_on</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="n">fit_classifier</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
|
||
<span class="n">predictions</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">predict_on</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="n">fit_classifier</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="mf">0.</span> <span class="o"><</span> <span class="n">predict_on</span> <span class="o"><</span> <span class="mf">1.</span><span class="p">):</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'proportion </span><span class="si">{</span><span class="n">predict_on</span><span class="si">=}</span><span class="s1"> out of range, must be in (0,1)'</span><span class="p">)</span>
|
||
<span class="n">train</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">predict_on</span><span class="p">))</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">train</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
|
||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">val</span><span class="o">.</span><span class="n">X</span><span class="p">),</span> <span class="n">val</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'wrong type for predict_on: since fit_classifier=False, '</span>
|
||
<span class="sa">f</span><span class="s1">'the set on which predictions have to be issued must be '</span>
|
||
<span class="sa">f</span><span class="s1">'explicitly indicated'</span><span class="p">)</span>
|
||
|
||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">predict_on</span><span class="p">,</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="n">fit_classifier</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
|
||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">predict_on</span><span class="o">.</span><span class="n">X</span><span class="p">),</span> <span class="n">predict_on</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">predict_on</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||
|
||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">predict_on</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="n">fit_classifier</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="n">predict_on</span> <span class="o"><=</span> <span class="mi">1</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'invalid value </span><span class="si">{</span><span class="n">predict_on</span><span class="si">}</span><span class="s1"> in fit. '</span>
|
||
<span class="sa">f</span><span class="s1">'Specify a integer >1 for kFCV estimation.'</span><span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="n">n_jobs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'n_jobs'</span><span class="p">)</span> <span class="k">else</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
|
||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">cross_val_predict</span><span class="p">(</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">,</span> <span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">Xy</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span><span class="n">predict_on</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_classifier_method</span><span class="p">())</span>
|
||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">y</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'wrong type for predict_on: since fit_classifier=False, '</span>
|
||
<span class="sa">f</span><span class="s1">'the set on which predictions have to be issued must be '</span>
|
||
<span class="sa">f</span><span class="s1">'explicitly indicated'</span><span class="p">)</span>
|
||
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
|
||
<span class="sa">f</span><span class="s1">'error: param "predict_on" (</span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">predict_on</span><span class="p">)</span><span class="si">}</span><span class="s1">) not understood; '</span>
|
||
<span class="sa">f</span><span class="s1">'use either a float indicating the split proportion, or a '</span>
|
||
<span class="sa">f</span><span class="s1">'tuple (X,y) indicating the validation partition'</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="n">predictions</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="AggregativeQuantifier.aggregation_fit">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.aggregation_fit">[docs]</a>
|
||
<span class="nd">@abstractmethod</span>
|
||
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Trains the aggregation function.</span>
|
||
|
||
<span class="sd"> :param classif_predictions: a LabelledCollection containing the label predictions issued</span>
|
||
<span class="sd"> by the classifier</span>
|
||
<span class="sd"> :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data</span>
|
||
<span class="sd"> """</span>
|
||
<span class="o">...</span></div>
|
||
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Gives access to the classifier</span>
|
||
|
||
<span class="sd"> :return: the classifier (typically an sklearn's Estimator)</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier_</span>
|
||
|
||
<span class="nd">@classifier</span><span class="o">.</span><span class="n">setter</span>
|
||
<span class="k">def</span> <span class="nf">classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Setter for the classifier</span>
|
||
|
||
<span class="sd"> :param classifier: the classifier</span>
|
||
<span class="sd"> """</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier_</span> <span class="o">=</span> <span class="n">classifier</span>
|
||
|
||
<div class="viewcode-block" id="AggregativeQuantifier.classify">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.classify">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">classify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Provides the label predictions for the given instances. The predictions should respect the format expected by</span>
|
||
<span class="sd"> :meth:`aggregate`, e.g., posterior probabilities for probabilistic quantifiers, or crisp predictions for</span>
|
||
<span class="sd"> non-probabilistic quantifiers. The default one is "decision_function".</span>
|
||
|
||
<span class="sd"> :param instances: array-like of shape `(n_instances, n_features,)`</span>
|
||
<span class="sd"> :return: np.ndarray of shape `(n_instances,)` with label predictions</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classifier_method</span><span class="p">())(</span><span class="n">instances</span><span class="p">)</span></div>
|
||
|
||
|
||
<span class="k">def</span> <span class="nf">_classifier_method</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Name of the method that must be used for issuing label predictions. The default one is "decision_function".</span>
|
||
|
||
<span class="sd"> :return: string</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="s1">'decision_function'</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_check_classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">adapt_if_necessary</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Guarantees that the underlying classifier implements the method required for issuing predictions, i.e.,</span>
|
||
<span class="sd"> the method indicated by the :meth:`_classifier_method`</span>
|
||
|
||
<span class="sd"> :param adapt_if_necessary: if True, the method will try to comply with the required specifications</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classifier_method</span><span class="p">()),</span> \
|
||
<span class="sa">f</span><span class="s2">"the method does not implement the required </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">_classifier_method</span><span class="p">()</span><span class="si">}</span><span class="s2"> method"</span>
|
||
|
||
<div class="viewcode-block" id="AggregativeQuantifier.quantify">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.quantify">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Generate class prevalence estimates for the sample's instances by aggregating the label predictions generated</span>
|
||
<span class="sd"> by the classifier.</span>
|
||
|
||
<span class="sd"> :param instances: array-like</span>
|
||
<span class="sd"> :return: `np.ndarray` of shape `(n_classes)` with class prevalence estimates.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">classif_predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">)</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="AggregativeQuantifier.aggregate">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.aggregate">[docs]</a>
|
||
<span class="nd">@abstractmethod</span>
|
||
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Implements the aggregation of label predictions.</span>
|
||
|
||
<span class="sd"> :param classif_predictions: `np.ndarray` of label predictions</span>
|
||
<span class="sd"> :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="o">...</span></div>
|
||
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Class labels, in the same order in which class prevalence values are to be computed.</span>
|
||
<span class="sd"> This default implementation actually returns the class labels of the learner.</span>
|
||
|
||
<span class="sd"> :return: array-like</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="AggregativeCrispQuantifier">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeCrispQuantifier">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">AggregativeCrispQuantifier</span><span class="p">(</span><span class="n">AggregativeQuantifier</span><span class="p">,</span> <span class="n">ABC</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Abstract class for quantification methods that base their estimations on the aggregation of crips decisions</span>
|
||
<span class="sd"> as returned by a hard classifier. Aggregative crisp quantifiers thus extend Aggregative</span>
|
||
<span class="sd"> Quantifiers by implementing specifications about crisp predictions.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_classifier_method</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Name of the method that must be used for issuing label predictions. For crisp quantifiers, the method</span>
|
||
<span class="sd"> is 'predict', that returns an array of shape `(n_instances,)` of label predictions.</span>
|
||
|
||
<span class="sd"> :return: the string "predict", i.e., the standard method name for scikit-learn hard predictions</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="s1">'predict'</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="AggregativeSoftQuantifier">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeSoftQuantifier">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">AggregativeSoftQuantifier</span><span class="p">(</span><span class="n">AggregativeQuantifier</span><span class="p">,</span> <span class="n">ABC</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Abstract class for quantification methods that base their estimations on the aggregation of posterior</span>
|
||
<span class="sd"> probabilities as returned by a probabilistic classifier.</span>
|
||
<span class="sd"> Aggregative soft quantifiers thus extend Aggregative Quantifiers by implementing specifications</span>
|
||
<span class="sd"> about soft predictions.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_classifier_method</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Name of the method that must be used for issuing label predictions. For probabilistic quantifiers, the method</span>
|
||
<span class="sd"> is 'predict_proba', that returns an array of shape `(n_instances, n_dimensions,)` with posterior</span>
|
||
<span class="sd"> probabilities.</span>
|
||
|
||
<span class="sd"> :return: the string "predict_proba", i.e., the standard method name for scikit-learn soft predictions</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="s1">'predict_proba'</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_check_classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">adapt_if_necessary</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Guarantees that the underlying classifier implements the method indicated by the :meth:`_classifier_method`.</span>
|
||
<span class="sd"> In case it does not, the classifier is calibrated (by means of the Platt's calibration method implemented by</span>
|
||
<span class="sd"> scikit-learn in CalibratedClassifierCV, with cv=5). This calibration is only allowed if `adapt_if_necessary`</span>
|
||
<span class="sd"> is set to True. If otherwise (i.e., the classifier is not probabilistic, and `adapt_if_necessary` is set</span>
|
||
<span class="sd"> to False), an exception will be raised.</span>
|
||
|
||
<span class="sd"> :param adapt_if_necessary: a hard classifier is turned into a soft classifier if `adapt_if_necessary==True`</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classifier_method</span><span class="p">()):</span>
|
||
<span class="k">if</span> <span class="n">adapt_if_necessary</span><span class="p">:</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'warning: The learner </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> does not seem to be '</span>
|
||
<span class="sa">f</span><span class="s1">'probabilistic. The learner will be calibrated (using CalibratedClassifierCV).'</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">CalibratedClassifierCV</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">AssertionError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error: The learner </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> does not '</span>
|
||
<span class="sa">f</span><span class="s1">'seem to be probabilistic. The learner cannot be calibrated since '</span>
|
||
<span class="sa">f</span><span class="s1">'fit_classifier is set to False'</span><span class="p">)</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="BinaryAggregativeQuantifier">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.BinaryAggregativeQuantifier">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">BinaryAggregativeQuantifier</span><span class="p">(</span><span class="n">AggregativeQuantifier</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">):</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">pos_label</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||
|
||
<span class="nd">@property</span>
|
||
<span class="k">def</span> <span class="nf">neg_label</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||
|
||
<div class="viewcode-block" id="BinaryAggregativeQuantifier.fit">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.BinaryAggregativeQuantifier.fit">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_check_binary</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span></div>
|
||
</div>
|
||
|
||
|
||
|
||
<span class="c1"># Methods</span>
|
||
<span class="c1"># ------------------------------------</span>
|
||
<div class="viewcode-block" id="CC">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.CC">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">CC</span><span class="p">(</span><span class="n">AggregativeCrispQuantifier</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> The most basic Quantification method. One that simply classifies all instances and counts how many have been</span>
|
||
<span class="sd"> attributed to each of the classes in order to compute class prevalence estimates.</span>
|
||
|
||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||
|
||
<div class="viewcode-block" id="CC.aggregation_fit">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.CC.aggregation_fit">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Nothing to do here!</span>
|
||
|
||
<span class="sd"> :param classif_predictions: this is actually None</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">pass</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="CC.aggregate">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.CC.aggregate">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Computes class prevalence estimates by counting the prevalence of each of the predicted labels.</span>
|
||
|
||
<span class="sd"> :param classif_predictions: array-like with label predictions</span>
|
||
<span class="sd"> :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">prevalence_from_labels</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span></div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="ACC">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.ACC">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">ACC</span><span class="p">(</span><span class="n">AggregativeCrispQuantifier</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> `Adjusted Classify & Count <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_,</span>
|
||
<span class="sd"> the "adjusted" variant of :class:`CC`, that corrects the predictions of CC</span>
|
||
<span class="sd"> according to the `misclassification rates`.</span>
|
||
|
||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
||
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
|
||
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
|
||
<span class="sd"> be extracted from the training set; or as an integer (default 5), indicating that the predictions</span>
|
||
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
|
||
<span class="sd"> for `k`); or as a collection defining the specific set of data to use for validation.</span>
|
||
<span class="sd"> Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
|
||
<span class="sd"> on which the predictions are to be generated.</span>
|
||
<span class="sd"> :param n_jobs: number of parallel workers</span>
|
||
<span class="sd"> :param solver: indicates the method to be used for obtaining the final estimates. The choice</span>
|
||
<span class="sd"> 'exact' comes down to solving the system of linear equations :math:`Ax=B` where `A` is a</span>
|
||
<span class="sd"> matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in </span>
|
||
<span class="sd"> binary) and `B` is the vector of prevalence values estimated via CC, as :math:`x=A^{-1}B`. This solution</span>
|
||
<span class="sd"> might not exist for degenerated classifiers, in which case the method defaults to classify and count </span>
|
||
<span class="sd"> (i.e., does not attempt any adjustment).</span>
|
||
<span class="sd"> Another option is to search for the prevalence vector that minimizes the L2 norm of :math:`|Ax-B|`. The latter</span>
|
||
<span class="sd"> is achieved by indicating solver='minimize'. This one generally works better, and is the default parameter.</span>
|
||
<span class="sd"> More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and</span>
|
||
<span class="sd"> Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications</span>
|
||
<span class="sd"> (LQ 2022), ECML/PKDD 2022, Grenoble (France) <https://lq-2022.github.io/proceedings/CompleteVolume.pdf>`_.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">solver</span><span class="o">=</span><span class="s1">'minimize'</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">solver</span> <span class="o">=</span> <span class="n">solver</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_check_init_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">solver</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'exact'</span><span class="p">,</span> <span class="s1">'minimize'</span><span class="p">],</span> <span class="s2">"unknown solver; valid ones are 'exact', 'minimize'"</span>
|
||
|
||
<div class="viewcode-block" id="ACC.aggregation_fit">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.ACC.aggregation_fit">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Estimates the misclassification rates.</span>
|
||
|
||
<span class="sd"> :param classif_predictions: classifier predictions with true labels</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">pred_labels</span><span class="p">,</span> <span class="n">true_labels</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">cc</span> <span class="o">=</span> <span class="n">CC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pte_cond_estim_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">getPteCondEstim</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span><span class="p">,</span> <span class="n">true_labels</span><span class="p">,</span> <span class="n">pred_labels</span><span class="p">)</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="ACC.getPteCondEstim">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.ACC.getPteCondEstim">[docs]</a>
|
||
<span class="nd">@classmethod</span>
|
||
<span class="k">def</span> <span class="nf">getPteCondEstim</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">classes</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">y_</span><span class="p">):</span>
|
||
<span class="c1"># estimate the matrix with entry (i,j) being the estimate of P(hat_yi|yj), that is, the probability that a</span>
|
||
<span class="c1"># document that belongs to yj ends up being classified as belonging to yi</span>
|
||
<span class="n">conf</span> <span class="o">=</span> <span class="n">confusion_matrix</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">y_</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="n">classes</span><span class="p">)</span><span class="o">.</span><span class="n">T</span>
|
||
<span class="n">conf</span> <span class="o">=</span> <span class="n">conf</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span>
|
||
<span class="n">class_counts</span> <span class="o">=</span> <span class="n">conf</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">classes</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="n">class_counts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||
<span class="n">conf</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="n">conf</span><span class="p">[:,</span> <span class="n">i</span><span class="p">]</span> <span class="o">/=</span> <span class="n">class_counts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
|
||
<span class="k">return</span> <span class="n">conf</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="ACC.aggregate">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.ACC.aggregate">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">):</span>
|
||
<span class="n">prevs_estim</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cc</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">ACC</span><span class="o">.</span><span class="n">solve_adjustment</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pte_cond_estim_</span><span class="p">,</span> <span class="n">prevs_estim</span><span class="p">,</span> <span class="n">solver</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">solver</span><span class="p">)</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="ACC.solve_adjustment">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.ACC.solve_adjustment">[docs]</a>
|
||
<span class="nd">@classmethod</span>
|
||
<span class="k">def</span> <span class="nf">solve_adjustment</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">PteCondEstim</span><span class="p">,</span> <span class="n">prevs_estim</span><span class="p">,</span> <span class="n">solver</span><span class="o">=</span><span class="s1">'exact'</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Solves the system linear system :math:`Ax = B` with :math:`A` = `PteCondEstim` and :math:`B` = `prevs_estim`</span>
|
||
|
||
<span class="sd"> :param PteCondEstim: a `np.ndarray` of shape `(n_classes,n_classes,)` with entry `(i,j)` being the estimate</span>
|
||
<span class="sd"> of :math:`P(y_i|y_j)`, that is, the probability that an instance that belongs to :math:`y_j` ends up being</span>
|
||
<span class="sd"> classified as belonging to :math:`y_i`</span>
|
||
<span class="sd"> :param prevs_estim: a `np.ndarray` of shape `(n_classes,)` with the class prevalence estimates</span>
|
||
<span class="sd"> :param solver: indicates the method to use for solving the system of linear equations. Valid options are</span>
|
||
<span class="sd"> 'exact' (tries to solve the system --may fail if the misclassificatin matrix has rank < n_classes) or</span>
|
||
<span class="sd"> 'optim_minimize' (minimizes a norm --always exists). </span>
|
||
<span class="sd"> :return: an adjusted `np.ndarray` of shape `(n_classes,)` with the corrected class prevalence estimates</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="n">A</span> <span class="o">=</span> <span class="n">PteCondEstim</span>
|
||
<span class="n">B</span> <span class="o">=</span> <span class="n">prevs_estim</span>
|
||
|
||
<span class="k">if</span> <span class="n">solver</span> <span class="o">==</span> <span class="s1">'exact'</span><span class="p">:</span>
|
||
<span class="c1"># attempts an exact solution of the linear system (may fail)</span>
|
||
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="n">adjusted_prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linalg</span><span class="o">.</span><span class="n">solve</span><span class="p">(</span><span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="p">)</span>
|
||
<span class="n">adjusted_prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">adjusted_prevs</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||
<span class="n">adjusted_prevs</span> <span class="o">/=</span> <span class="n">adjusted_prevs</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||
<span class="k">except</span> <span class="n">np</span><span class="o">.</span><span class="n">linalg</span><span class="o">.</span><span class="n">LinAlgError</span><span class="p">:</span>
|
||
<span class="n">adjusted_prevs</span> <span class="o">=</span> <span class="n">prevs_estim</span> <span class="c1"># no way to adjust them!</span>
|
||
|
||
<span class="k">return</span> <span class="n">adjusted_prevs</span>
|
||
|
||
<span class="k">elif</span> <span class="n">solver</span> <span class="o">==</span> <span class="s1">'minimize'</span><span class="p">:</span>
|
||
<span class="c1"># poses the problem as an optimization one, and tries to minimize the norm of the differences</span>
|
||
|
||
<span class="k">def</span> <span class="nf">loss</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">linalg</span><span class="o">.</span><span class="n">norm</span><span class="p">(</span><span class="n">A</span> <span class="o">@</span> <span class="n">prev</span> <span class="o">-</span> <span class="n">B</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">optim_minimize</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="o">=</span><span class="n">A</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span></div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="PCC">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PCC">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">PCC</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> `Probabilistic Classify & Count <https://ieeexplore.ieee.org/abstract/document/5694031>`_,</span>
|
||
<span class="sd"> the probabilistic variant of CC that relies on the posterior probabilities returned by a probabilistic classifier.</span>
|
||
|
||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||
|
||
<div class="viewcode-block" id="PCC.aggregation_fit">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PCC.aggregation_fit">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Nothing to do here!</span>
|
||
|
||
<span class="sd"> :param classif_predictions: this is actually None</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">pass</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="PCC.aggregate">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PCC.aggregate">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">prevalence_from_probabilities</span><span class="p">(</span><span class="n">classif_posteriors</span><span class="p">,</span> <span class="n">binarize</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="PACC">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PACC">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">PACC</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> `Probabilistic Adjusted Classify & Count <https://ieeexplore.ieee.org/abstract/document/5694031>`_,</span>
|
||
<span class="sd"> the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier.</span>
|
||
|
||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
||
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
|
||
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
|
||
<span class="sd"> be extracted from the training set; or as an integer (default 5), indicating that the predictions</span>
|
||
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
|
||
<span class="sd"> for `k`). Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
|
||
<span class="sd"> on which the predictions are to be generated.</span>
|
||
<span class="sd"> :param n_jobs: number of parallel workers</span>
|
||
<span class="sd"> :param solver: indicates the method to be used for obtaining the final estimates. The choice</span>
|
||
<span class="sd"> 'exact' comes down to solving the system of linear equations :math:`Ax=B` where `A` is a</span>
|
||
<span class="sd"> matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in</span>
|
||
<span class="sd"> binary) and `B` is the vector of prevalence values estimated via CC, as :math:`x=A^{-1}B`. This solution</span>
|
||
<span class="sd"> might not exist for degenerated classifiers, in which case the method defaults to classify and count</span>
|
||
<span class="sd"> (i.e., does not attempt any adjustment).</span>
|
||
<span class="sd"> Another option is to search for the prevalence vector that minimizes the L2 norm of :math:`|Ax-B|`. The latter</span>
|
||
<span class="sd"> is achieved by indicating solver='minimize'. This one generally works better, and is the default parameter.</span>
|
||
<span class="sd"> More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and</span>
|
||
<span class="sd"> Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications</span>
|
||
<span class="sd"> (LQ 2022), ECML/PKDD 2022, Grenoble (France) <https://lq-2022.github.io/proceedings/CompleteVolume.pdf>`_.</span>
|
||
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">solver</span><span class="o">=</span><span class="s1">'minimize'</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">solver</span> <span class="o">=</span> <span class="n">solver</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_check_init_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">solver</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'exact'</span><span class="p">,</span> <span class="s1">'minimize'</span><span class="p">],</span> <span class="s2">"unknown solver; valid ones are 'exact', 'minimize'"</span>
|
||
|
||
<div class="viewcode-block" id="PACC.aggregation_fit">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PACC.aggregation_fit">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Estimates the misclassification rates</span>
|
||
|
||
<span class="sd"> :param classif_predictions: classifier soft predictions with true labels</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">posteriors</span><span class="p">,</span> <span class="n">true_labels</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">pcc</span> <span class="o">=</span> <span class="n">PCC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pte_cond_estim_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">getPteCondEstim</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span><span class="p">,</span> <span class="n">true_labels</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">)</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="PACC.aggregate">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PACC.aggregate">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">):</span>
|
||
<span class="n">prevs_estim</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pcc</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">classif_posteriors</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">ACC</span><span class="o">.</span><span class="n">solve_adjustment</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pte_cond_estim_</span><span class="p">,</span> <span class="n">prevs_estim</span><span class="p">,</span> <span class="n">solver</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">solver</span><span class="p">)</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="PACC.getPteCondEstim">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.PACC.getPteCondEstim">[docs]</a>
|
||
<span class="nd">@classmethod</span>
|
||
<span class="k">def</span> <span class="nf">getPteCondEstim</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">classes</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">y_</span><span class="p">):</span>
|
||
<span class="c1"># estimate the matrix with entry (i,j) being the estimate of P(hat_yi|yj), that is, the probability that a</span>
|
||
<span class="c1"># document that belongs to yj ends up being classified as belonging to yi</span>
|
||
<span class="n">n_classes</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">classes</span><span class="p">)</span>
|
||
<span class="n">confusion</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">eye</span><span class="p">(</span><span class="n">n_classes</span><span class="p">)</span>
|
||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">class_</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">classes</span><span class="p">):</span>
|
||
<span class="n">idx</span> <span class="o">=</span> <span class="n">y</span> <span class="o">==</span> <span class="n">class_</span>
|
||
<span class="k">if</span> <span class="n">idx</span><span class="o">.</span><span class="n">any</span><span class="p">():</span>
|
||
<span class="n">confusion</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">y_</span><span class="p">[</span><span class="n">idx</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="n">confusion</span><span class="o">.</span><span class="n">T</span></div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="EMQ">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">EMQ</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> `Expectation Maximization for Quantification <https://ieeexplore.ieee.org/abstract/document/6789744>`_ (EMQ),</span>
|
||
<span class="sd"> aka `Saerens-Latinne-Decaestecker` (SLD) algorithm.</span>
|
||
<span class="sd"> EMQ consists of using the well-known `Expectation Maximization algorithm` to iteratively update the posterior</span>
|
||
<span class="sd"> probabilities generated by a probabilistic classifier and the class prevalence estimates obtained via</span>
|
||
<span class="sd"> maximum-likelihood estimation, in a mutually recursive way, until convergence.</span>
|
||
|
||
<span class="sd"> This implementation also gives access to the heuristics proposed by `Alexandari et al. paper</span>
|
||
<span class="sd"> <http://proceedings.mlr.press/v119/alexandari20a.html>`_. These heuristics consist of using, as the training</span>
|
||
<span class="sd"> prevalence, an estimate of it obtained via k-fold cross validation (instead of the true training prevalence),</span>
|
||
<span class="sd"> and to recalibrate the posterior probabilities of the classifier.</span>
|
||
|
||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
||
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
|
||
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
|
||
<span class="sd"> be extracted from the training set; or as an integer, indicating that the predictions</span>
|
||
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
|
||
<span class="sd"> for `k`, default 5); or as a collection defining the specific set of data to use for validation.</span>
|
||
<span class="sd"> Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
|
||
<span class="sd"> on which the predictions are to be generated. This hyperparameter is only meant to be used when the</span>
|
||
<span class="sd"> heuristics are to be applied, i.e., if a recalibration is required. The default value is None (meaning</span>
|
||
<span class="sd"> the recalibration is not required). In case this hyperparameter is set to a value other than None, but</span>
|
||
<span class="sd"> the recalibration is not required (recalib=None), a warning message will be raised.</span>
|
||
<span class="sd"> :param exact_train_prev: set to True (default) for using the true training prevalence as the initial observation;</span>
|
||
<span class="sd"> set to False for computing the training prevalence as an estimate of it, i.e., as the expected</span>
|
||
<span class="sd"> value of the posterior probabilities of the training instances.</span>
|
||
<span class="sd"> :param recalib: a string indicating the method of recalibration.</span>
|
||
<span class="sd"> Available choices include "nbvs" (No-Bias Vector Scaling), "bcts" (Bias-Corrected Temperature Scaling,</span>
|
||
<span class="sd"> default), "ts" (Temperature Scaling), and "vs" (Vector Scaling). Default is None (no recalibration).</span>
|
||
<span class="sd"> :param n_jobs: number of parallel workers. Only used for recalibrating the classifier if `val_split` is set to</span>
|
||
<span class="sd"> an integer `k` --the number of folds.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="n">MAX_ITER</span> <span class="o">=</span> <span class="mi">1000</span>
|
||
<span class="n">EPSILON</span> <span class="o">=</span> <span class="mf">1e-4</span>
|
||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">exact_train_prev</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">recalib</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">exact_train_prev</span> <span class="o">=</span> <span class="n">exact_train_prev</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="o">=</span> <span class="n">recalib</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
||
|
||
<div class="viewcode-block" id="EMQ.EMQ_BCTS">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ.EMQ_BCTS">[docs]</a>
|
||
<span class="nd">@classmethod</span>
|
||
<span class="k">def</span> <span class="nf">EMQ_BCTS</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Constructs an instance of EMQ using the best configuration found in the `Alexandari et al. paper</span>
|
||
<span class="sd"> <http://proceedings.mlr.press/v119/alexandari20a.html>`_, i.e., one that relies on Bias-Corrected Temperature</span>
|
||
<span class="sd"> Scaling (BCTS) as a recalibration function, and that uses an estimate of the training prevalence instead of</span>
|
||
<span class="sd"> the true training prevalence.</span>
|
||
|
||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
||
<span class="sd"> :param n_jobs: number of parallel workers.</span>
|
||
<span class="sd"> :return: An instance of EMQ with BCTS</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="n">EMQ</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">exact_train_prev</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">recalib</span><span class="o">=</span><span class="s1">'bcts'</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)</span></div>
|
||
|
||
|
||
<span class="k">def</span> <span class="nf">_check_init_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">exact_train_prev</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">RuntimeWarning</span><span class="p">(</span><span class="sa">f</span><span class="s1">'The parameter </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">val_split</span><span class="si">=}</span><span class="s1"> was specified for EMQ, while the parameters '</span>
|
||
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">exact_train_prev</span><span class="si">=}</span><span class="s1"> and </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">recalib</span><span class="si">=}</span><span class="s1">. This has no effect and causes an unnecessary '</span>
|
||
<span class="sa">f</span><span class="s1">'overload.'</span><span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'[warning] The parameter </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">recalib</span><span class="si">=}</span><span class="s1"> requires the val_split be different from None. '</span>
|
||
<span class="sa">f</span><span class="s1">'This parameter will be set to 5. To avoid this warning, set this value to a float value '</span>
|
||
<span class="sa">f</span><span class="s1">'indicating the proportion of training data to be used as validation, or to an integer '</span>
|
||
<span class="sa">f</span><span class="s1">'indicating the number of folds for kFCV.'</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span><span class="o">=</span><span class="mi">5</span>
|
||
|
||
<div class="viewcode-block" id="EMQ.classify">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ.classify">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">classify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Provides the posterior probabilities for the given instances. If the classifier was required</span>
|
||
<span class="sd"> to be recalibrated, then these posteriors are recalibrated accordingly.</span>
|
||
|
||
<span class="sd"> :param instances: array-like of shape `(n_instances, n_dimensions,)`</span>
|
||
<span class="sd"> :return: np.ndarray of shape `(n_instances, n_classes,)` with posterior probabilities</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'calibration_function'</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span><span class="p">(</span><span class="n">posteriors</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">posteriors</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="EMQ.aggregation_fit">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ.aggregation_fit">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="n">P</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="o">==</span> <span class="s1">'nbvs'</span><span class="p">:</span>
|
||
<span class="n">calibrator</span> <span class="o">=</span> <span class="n">NoBiasVectorScaling</span><span class="p">()</span>
|
||
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="o">==</span> <span class="s1">'bcts'</span><span class="p">:</span>
|
||
<span class="n">calibrator</span> <span class="o">=</span> <span class="n">TempScaling</span><span class="p">(</span><span class="n">bias_positions</span><span class="o">=</span><span class="s1">'all'</span><span class="p">)</span>
|
||
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="o">==</span> <span class="s1">'ts'</span><span class="p">:</span>
|
||
<span class="n">calibrator</span> <span class="o">=</span> <span class="n">TempScaling</span><span class="p">()</span>
|
||
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="o">==</span> <span class="s1">'vs'</span><span class="p">:</span>
|
||
<span class="n">calibrator</span> <span class="o">=</span> <span class="n">VectorScaling</span><span class="p">()</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'invalid param argument for recalibration method; available ones are '</span>
|
||
<span class="s1">'"nbvs", "bcts", "ts", and "vs".'</span><span class="p">)</span>
|
||
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span> <span class="o">=</span> <span class="n">calibrator</span><span class="p">(</span><span class="n">P</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">eye</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)[</span><span class="n">y</span><span class="p">],</span> <span class="n">posterior_supplied</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">exact_train_prev</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">train_prevalence</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="n">train_posteriors</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">X</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">recalib</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="n">train_posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span><span class="p">(</span><span class="n">train_posteriors</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">train_prevalence</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">prevalence_from_probabilities</span><span class="p">(</span><span class="n">train_posteriors</span><span class="p">)</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="EMQ.aggregate">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ.aggregate">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="n">EPSILON</span><span class="p">):</span>
|
||
<span class="n">priors</span><span class="p">,</span> <span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">EM</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">train_prevalence</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">,</span> <span class="n">epsilon</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">priors</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="EMQ.predict_proba">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ.predict_proba">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="n">EPSILON</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Returns the posterior probabilities updated by the EM algorithm.</span>
|
||
|
||
<span class="sd"> :param instances: np.ndarray of shape `(n_instances, n_dimensions)`</span>
|
||
<span class="sd"> :param epsilon: error tolerance</span>
|
||
<span class="sd"> :return: np.ndarray of shape `(n_instances, n_classes)`</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">classif_posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
||
<span class="n">priors</span><span class="p">,</span> <span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">EM</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">train_prevalence</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">,</span> <span class="n">epsilon</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">posteriors</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="EMQ.EM">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.EMQ.EM">[docs]</a>
|
||
<span class="nd">@classmethod</span>
|
||
<span class="k">def</span> <span class="nf">EM</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">tr_prev</span><span class="p">,</span> <span class="n">posterior_probabilities</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="n">EPSILON</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Computes the `Expectation Maximization` routine.</span>
|
||
|
||
<span class="sd"> :param tr_prev: array-like, the training prevalence</span>
|
||
<span class="sd"> :param posterior_probabilities: `np.ndarray` of shape `(n_instances, n_classes,)` with the</span>
|
||
<span class="sd"> posterior probabilities</span>
|
||
<span class="sd"> :param epsilon: float, the threshold different between two consecutive iterations</span>
|
||
<span class="sd"> to reach before stopping the loop</span>
|
||
<span class="sd"> :return: a tuple with the estimated prevalence values (shape `(n_classes,)`) and</span>
|
||
<span class="sd"> the corrected posterior probabilities (shape `(n_instances, n_classes,)`)</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">Px</span> <span class="o">=</span> <span class="n">posterior_probabilities</span>
|
||
<span class="n">Ptr</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">tr_prev</span><span class="p">)</span>
|
||
<span class="n">qs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">Ptr</span><span class="p">)</span> <span class="c1"># qs (the running estimate) is initialized as the training prevalence</span>
|
||
|
||
<span class="n">s</span><span class="p">,</span> <span class="n">converged</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="kc">False</span>
|
||
<span class="n">qs_prev_</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="k">while</span> <span class="ow">not</span> <span class="n">converged</span> <span class="ow">and</span> <span class="n">s</span> <span class="o"><</span> <span class="n">EMQ</span><span class="o">.</span><span class="n">MAX_ITER</span><span class="p">:</span>
|
||
<span class="c1"># E-step: ps is Ps(y|xi)</span>
|
||
<span class="n">ps_unnormalized</span> <span class="o">=</span> <span class="p">(</span><span class="n">qs</span> <span class="o">/</span> <span class="n">Ptr</span><span class="p">)</span> <span class="o">*</span> <span class="n">Px</span>
|
||
<span class="n">ps</span> <span class="o">=</span> <span class="n">ps_unnormalized</span> <span class="o">/</span> <span class="n">ps_unnormalized</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">keepdims</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||
|
||
<span class="c1"># M-step:</span>
|
||
<span class="n">qs</span> <span class="o">=</span> <span class="n">ps</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||
|
||
<span class="k">if</span> <span class="n">qs_prev_</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">qs</span><span class="p">,</span> <span class="n">qs_prev_</span><span class="p">)</span> <span class="o"><</span> <span class="n">epsilon</span> <span class="ow">and</span> <span class="n">s</span> <span class="o">></span> <span class="mi">10</span><span class="p">:</span>
|
||
<span class="n">converged</span> <span class="o">=</span> <span class="kc">True</span>
|
||
|
||
<span class="n">qs_prev_</span> <span class="o">=</span> <span class="n">qs</span>
|
||
<span class="n">s</span> <span class="o">+=</span> <span class="mi">1</span>
|
||
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">converged</span><span class="p">:</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="s1">'[warning] the method has reached the maximum number of iterations; it might have not converged'</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="n">qs</span><span class="p">,</span> <span class="n">ps</span></div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="HDy">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.HDy">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">HDy</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">,</span> <span class="n">BinaryAggregativeQuantifier</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> `Hellinger Distance y <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDy).</span>
|
||
<span class="sd"> HDy is a probabilistic method for training binary quantifiers, that models quantification as the problem of</span>
|
||
<span class="sd"> minimizing the divergence (in terms of the Hellinger Distance) between two distributions of posterior</span>
|
||
<span class="sd"> probabilities returned by the classifier. One of the distributions is generated from the unlabelled examples and</span>
|
||
<span class="sd"> the other is generated from a validation set. This latter distribution is defined as a mixture of the</span>
|
||
<span class="sd"> class-conditional distributions of the posterior probabilities returned for the positive and negative validation</span>
|
||
<span class="sd"> examples, respectively. The parameters of the mixture thus represent the estimates of the class prevalence values.</span>
|
||
|
||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a binary classifier</span>
|
||
<span class="sd"> :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out</span>
|
||
<span class="sd"> validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5)..</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||
|
||
<div class="viewcode-block" id="HDy.aggregation_fit">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.HDy.aggregation_fit">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Trains a HDy quantifier.</span>
|
||
|
||
<span class="sd"> :param data: the training set</span>
|
||
<span class="sd"> :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)</span>
|
||
<span class="sd"> :param val_split: either a float in (0,1) indicating the proportion of training instances to use for</span>
|
||
<span class="sd"> validation (e.g., 0.3 for using 30% of the training set as validation data), or a</span>
|
||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` indicating the validation set itself</span>
|
||
<span class="sd"> :return: self</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">P</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
|
||
<span class="n">Px</span> <span class="o">=</span> <span class="n">P</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span> <span class="c1"># takes only the P(y=+1|x)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy1</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy0</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">neg_label</span><span class="p">]</span>
|
||
|
||
<span class="c1"># pre-compute the histogram for positive and negative examples</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">110</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span> <span class="c1"># [10, 20, 30, ..., 100, 110]</span>
|
||
|
||
<span class="k">def</span> <span class="nf">hist</span><span class="p">(</span><span class="n">P</span><span class="p">,</span> <span class="n">bins</span><span class="p">):</span>
|
||
<span class="n">h</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">P</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="n">bins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">density</span><span class="o">=</span><span class="kc">True</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||
<span class="k">return</span> <span class="n">h</span> <span class="o">/</span> <span class="n">h</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy1_density</span> <span class="o">=</span> <span class="p">{</span><span class="n">bins</span><span class="p">:</span> <span class="n">hist</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy1</span><span class="p">,</span> <span class="n">bins</span><span class="p">)</span> <span class="k">for</span> <span class="n">bins</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">bins</span><span class="p">}</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_density</span> <span class="o">=</span> <span class="p">{</span><span class="n">bins</span><span class="p">:</span> <span class="n">hist</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy0</span><span class="p">,</span> <span class="n">bins</span><span class="p">)</span> <span class="k">for</span> <span class="n">bins</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">bins</span><span class="p">}</span>
|
||
|
||
<span class="k">return</span> <span class="bp">self</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="HDy.aggregate">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.HDy.aggregate">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">):</span>
|
||
<span class="c1"># "In this work, the number of bins b used in HDx and HDy was chosen from 10 to 110 in steps of 10,</span>
|
||
<span class="c1"># and the final estimated a priori probability was taken as the median of these 11 estimates."</span>
|
||
<span class="c1"># (González-Castro, et al., 2013).</span>
|
||
|
||
<span class="n">Px</span> <span class="o">=</span> <span class="n">classif_posteriors</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span> <span class="c1"># takes only the P(y=+1|x)</span>
|
||
|
||
<span class="n">prev_estimations</span> <span class="o">=</span> <span class="p">[]</span>
|
||
<span class="c1"># for bins in np.linspace(10, 110, 11, dtype=int): #[10, 20, 30, ..., 100, 110]</span>
|
||
<span class="c1"># Pxy0_density, _ = np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)</span>
|
||
<span class="c1"># Pxy1_density, _ = np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)</span>
|
||
<span class="k">for</span> <span class="n">bins</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">bins</span><span class="p">:</span>
|
||
<span class="n">Pxy0_density</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_density</span><span class="p">[</span><span class="n">bins</span><span class="p">]</span>
|
||
<span class="n">Pxy1_density</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">Pxy1_density</span><span class="p">[</span><span class="n">bins</span><span class="p">]</span>
|
||
|
||
<span class="n">Px_test</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">Px</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="n">bins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">density</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||
|
||
<span class="c1"># the authors proposed to search for the prevalence yielding the best matching as a linear search</span>
|
||
<span class="c1"># at small steps (modern implementations resort to an optimization procedure,</span>
|
||
<span class="c1"># see class DistributionMatching)</span>
|
||
<span class="n">prev_selected</span><span class="p">,</span> <span class="n">min_dist</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
|
||
<span class="k">for</span> <span class="n">prev</span> <span class="ow">in</span> <span class="n">F</span><span class="o">.</span><span class="n">prevalence_linspace</span><span class="p">(</span><span class="n">n_prevalences</span><span class="o">=</span><span class="mi">101</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="mf">0.0</span><span class="p">):</span>
|
||
<span class="n">Px_train</span> <span class="o">=</span> <span class="n">prev</span> <span class="o">*</span> <span class="n">Pxy1_density</span> <span class="o">+</span> <span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">prev</span><span class="p">)</span> <span class="o">*</span> <span class="n">Pxy0_density</span>
|
||
<span class="n">hdy</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">HellingerDistance</span><span class="p">(</span><span class="n">Px_train</span><span class="p">,</span> <span class="n">Px_test</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">prev_selected</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">hdy</span> <span class="o"><</span> <span class="n">min_dist</span><span class="p">:</span>
|
||
<span class="n">prev_selected</span><span class="p">,</span> <span class="n">min_dist</span> <span class="o">=</span> <span class="n">prev</span><span class="p">,</span> <span class="n">hdy</span>
|
||
<span class="n">prev_estimations</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">prev_selected</span><span class="p">)</span>
|
||
|
||
<span class="n">class1_prev</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prev_estimations</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">as_binary_prevalence</span><span class="p">(</span><span class="n">class1_prev</span><span class="p">)</span></div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="DyS">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.DyS">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">DyS</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">,</span> <span class="n">BinaryAggregativeQuantifier</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> `DyS framework <https://ojs.aaai.org/index.php/AAAI/article/view/4376>`_ (DyS).</span>
|
||
<span class="sd"> DyS is a generalization of HDy method, using a Ternary Search in order to find the prevalence that</span>
|
||
<span class="sd"> minimizes the distance between distributions.</span>
|
||
<span class="sd"> Details for the ternary search have been got from <https://dl.acm.org/doi/pdf/10.1145/3219819.3220059></span>
|
||
|
||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a binary classifier</span>
|
||
<span class="sd"> :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out</span>
|
||
<span class="sd"> validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5)..</span>
|
||
<span class="sd"> :param n_bins: an int with the number of bins to use to compute the histograms.</span>
|
||
<span class="sd"> :param divergence: a str indicating the name of divergence (currently supported ones are "HD" or "topsoe"), or a</span>
|
||
<span class="sd"> callable function computes the divergence between two distributions (two equally sized arrays).</span>
|
||
<span class="sd"> :param tol: a float with the tolerance for the ternary search algorithm.</span>
|
||
<span class="sd"> :param n_jobs: number of parallel workers.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_bins</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">divergence</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]</span><span class="o">=</span> <span class="s1">'HD'</span><span class="p">,</span> <span class="n">tol</span><span class="o">=</span><span class="mf">1e-05</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">tol</span> <span class="o">=</span> <span class="n">tol</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">divergence</span> <span class="o">=</span> <span class="n">divergence</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">n_bins</span> <span class="o">=</span> <span class="n">n_bins</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_ternary_search</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">left</span><span class="p">,</span> <span class="n">right</span><span class="p">,</span> <span class="n">tol</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Find maximum of unimodal function f() within [left, right]</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">while</span> <span class="nb">abs</span><span class="p">(</span><span class="n">right</span> <span class="o">-</span> <span class="n">left</span><span class="p">)</span> <span class="o">>=</span> <span class="n">tol</span><span class="p">:</span>
|
||
<span class="n">left_third</span> <span class="o">=</span> <span class="n">left</span> <span class="o">+</span> <span class="p">(</span><span class="n">right</span> <span class="o">-</span> <span class="n">left</span><span class="p">)</span> <span class="o">/</span> <span class="mi">3</span>
|
||
<span class="n">right_third</span> <span class="o">=</span> <span class="n">right</span> <span class="o">-</span> <span class="p">(</span><span class="n">right</span> <span class="o">-</span> <span class="n">left</span><span class="p">)</span> <span class="o">/</span> <span class="mi">3</span>
|
||
|
||
<span class="k">if</span> <span class="n">f</span><span class="p">(</span><span class="n">left_third</span><span class="p">)</span> <span class="o">></span> <span class="n">f</span><span class="p">(</span><span class="n">right_third</span><span class="p">):</span>
|
||
<span class="n">left</span> <span class="o">=</span> <span class="n">left_third</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="n">right</span> <span class="o">=</span> <span class="n">right_third</span>
|
||
|
||
<span class="c1"># Left and right are the current bounds; the maximum is between them</span>
|
||
<span class="k">return</span> <span class="p">(</span><span class="n">left</span> <span class="o">+</span> <span class="n">right</span><span class="p">)</span> <span class="o">/</span> <span class="mi">2</span>
|
||
|
||
<div class="viewcode-block" id="DyS.aggregation_fit">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.DyS.aggregation_fit">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||
<span class="n">Px</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
|
||
<span class="n">Px</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span> <span class="c1"># takes only the P(y=+1|x)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy1</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy0</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">neg_label</span><span class="p">]</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy1_density</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy1</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_bins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">density</span><span class="o">=</span><span class="kc">True</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_density</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy0</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_bins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">density</span><span class="o">=</span><span class="kc">True</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||
<span class="k">return</span> <span class="bp">self</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="DyS.aggregate">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.DyS.aggregate">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">):</span>
|
||
<span class="n">Px</span> <span class="o">=</span> <span class="n">classif_posteriors</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span> <span class="c1"># takes only the P(y=+1|x)</span>
|
||
|
||
<span class="n">Px_test</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">Px</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_bins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">density</span><span class="o">=</span><span class="kc">True</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||
<span class="n">divergence</span> <span class="o">=</span> <span class="n">get_divergence</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">divergence</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span> <span class="nf">distribution_distance</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
|
||
<span class="n">Px_train</span> <span class="o">=</span> <span class="n">prev</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">Pxy1_density</span> <span class="o">+</span> <span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">prev</span><span class="p">)</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_density</span>
|
||
<span class="k">return</span> <span class="n">divergence</span><span class="p">(</span><span class="n">Px_train</span><span class="p">,</span> <span class="n">Px_test</span><span class="p">)</span>
|
||
|
||
<span class="n">class1_prev</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ternary_search</span><span class="p">(</span><span class="n">f</span><span class="o">=</span><span class="n">distribution_distance</span><span class="p">,</span> <span class="n">left</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">right</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">tol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">tol</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">as_binary_prevalence</span><span class="p">(</span><span class="n">class1_prev</span><span class="p">)</span></div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="SMM">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.SMM">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">SMM</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">,</span> <span class="n">BinaryAggregativeQuantifier</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> `SMM method <https://ieeexplore.ieee.org/document/9260028>`_ (SMM).</span>
|
||
<span class="sd"> SMM is a simplification of matching distribution methods where the representation of the examples</span>
|
||
<span class="sd"> is created using the mean instead of a histogram (conceptually equivalent to PACC).</span>
|
||
|
||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a binary classifier.</span>
|
||
<span class="sd"> :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out</span>
|
||
<span class="sd"> validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5)..</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||
|
||
<div class="viewcode-block" id="SMM.aggregation_fit">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.SMM.aggregation_fit">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||
<span class="n">Px</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
|
||
<span class="n">Px</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span> <span class="c1"># takes only the P(y=+1|x)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy1</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy0</span> <span class="o">=</span> <span class="n">Px</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">neg_label</span><span class="p">]</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy1_mean</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy1</span><span class="p">)</span> <span class="c1"># equiv. TPR </span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_mean</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy0</span><span class="p">)</span> <span class="c1"># equiv. FPR</span>
|
||
<span class="k">return</span> <span class="bp">self</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="SMM.aggregate">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.SMM.aggregate">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_posteriors</span><span class="p">):</span>
|
||
<span class="n">Px</span> <span class="o">=</span> <span class="n">classif_posteriors</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">]</span> <span class="c1"># takes only the P(y=+1|x)</span>
|
||
<span class="n">Px_mean</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">Px</span><span class="p">)</span>
|
||
|
||
<span class="n">class1_prev</span> <span class="o">=</span> <span class="p">(</span><span class="n">Px_mean</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_mean</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">Pxy1_mean</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">Pxy0_mean</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">as_binary_prevalence</span><span class="p">(</span><span class="n">class1_prev</span><span class="p">,</span> <span class="n">clip_if_necessary</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="DMy">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.DMy">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">DMy</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of posterior</span>
|
||
<span class="sd"> probabilities. This implementation takes the number of bins, the divergence, and the possibility to work on CDF</span>
|
||
<span class="sd"> as hyperparameters.</span>
|
||
|
||
<span class="sd"> :param classifier: a `sklearn`'s Estimator that generates a probabilistic classifier</span>
|
||
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set to model the</span>
|
||
<span class="sd"> validation distribution.</span>
|
||
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
|
||
<span class="sd"> validation data, or as an integer, indicating that the validation distribution should be estimated via</span>
|
||
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
|
||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
||
<span class="sd"> :param nbins: number of bins used to discretize the distributions (default 8)</span>
|
||
<span class="sd"> :param divergence: a string representing a divergence measure (currently, "HD" and "topsoe" are implemented)</span>
|
||
<span class="sd"> or a callable function taking two ndarrays of the same dimension as input (default "HD", meaning Hellinger</span>
|
||
<span class="sd"> Distance)</span>
|
||
<span class="sd"> :param cdf: whether to use CDF instead of PDF (default False)</span>
|
||
<span class="sd"> :param n_jobs: number of parallel workers (default None)</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">nbins</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">divergence</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]</span><span class="o">=</span><span class="s1">'HD'</span><span class="p">,</span>
|
||
<span class="n">cdf</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">search</span><span class="o">=</span><span class="s1">'optim_minimize'</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">nbins</span> <span class="o">=</span> <span class="n">nbins</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">divergence</span> <span class="o">=</span> <span class="n">divergence</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">cdf</span> <span class="o">=</span> <span class="n">cdf</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">search</span> <span class="o">=</span> <span class="n">search</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
||
|
||
<span class="c1"># @classmethod</span>
|
||
<span class="c1"># def HDy(cls, classifier, val_split=5, n_jobs=None):</span>
|
||
<span class="c1"># from quapy.method.meta import MedianEstimator</span>
|
||
<span class="c1">#</span>
|
||
<span class="c1"># hdy = DMy(classifier=classifier, val_split=val_split, search='linear_search', divergence='HD')</span>
|
||
<span class="c1"># hdy = AggregativeMedianEstimator(hdy, param_grid={'nbins': np.linspace(10, 110, 11).astype(int)}, n_jobs=n_jobs)</span>
|
||
<span class="c1"># return hdy</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_get_distributions</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">):</span>
|
||
<span class="n">histograms</span> <span class="o">=</span> <span class="p">[]</span>
|
||
<span class="n">post_dims</span> <span class="o">=</span> <span class="n">posteriors</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||
<span class="k">if</span> <span class="n">post_dims</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
|
||
<span class="c1"># in binary quantification we can use only one class, since the other one is its complement</span>
|
||
<span class="n">post_dims</span> <span class="o">=</span> <span class="mi">1</span>
|
||
<span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">post_dims</span><span class="p">):</span>
|
||
<span class="n">hist</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">posteriors</span><span class="p">[:,</span> <span class="n">dim</span><span class="p">],</span> <span class="n">bins</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">nbins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">))[</span><span class="mi">0</span><span class="p">]</span>
|
||
<span class="n">histograms</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">hist</span><span class="p">)</span>
|
||
|
||
<span class="n">counts</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">vstack</span><span class="p">(</span><span class="n">histograms</span><span class="p">)</span>
|
||
<span class="n">distributions</span> <span class="o">=</span> <span class="n">counts</span><span class="o">/</span><span class="n">counts</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)[:,</span><span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">]</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cdf</span><span class="p">:</span>
|
||
<span class="n">distributions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">cumsum</span><span class="p">(</span><span class="n">distributions</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">distributions</span>
|
||
|
||
<div class="viewcode-block" id="DMy.aggregation_fit">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.DMy.aggregation_fit">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Trains the classifier (if requested) and generates the validation distributions out of the training data.</span>
|
||
<span class="sd"> The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of</span>
|
||
<span class="sd"> channels, and `nbins` the number of bins. In particular, let `V` be the validation distributions; then `di=V[i]`</span>
|
||
<span class="sd"> are the distributions obtained from training data labelled with class `i`; while `dij = di[j]` is the discrete</span>
|
||
<span class="sd"> distribution of posterior probabilities `P(Y=j|X=x)` for training data labelled with class `i`, and `dij[k]`</span>
|
||
<span class="sd"> is the fraction of instances with a value in the `k`-th bin.</span>
|
||
|
||
<span class="sd"> :param data: the training set</span>
|
||
<span class="sd"> :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)</span>
|
||
<span class="sd"> :param val_split: either a float in (0,1) indicating the proportion of training instances to use for</span>
|
||
<span class="sd"> validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection</span>
|
||
<span class="sd"> indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV</span>
|
||
<span class="sd"> to estimate the parameters</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">posteriors</span><span class="p">,</span> <span class="n">true_labels</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
|
||
<span class="n">n_classes</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||
<span class="n">func</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_get_distributions</span><span class="p">,</span>
|
||
<span class="n">args</span><span class="o">=</span><span class="p">[</span><span class="n">posteriors</span><span class="p">[</span><span class="n">true_labels</span><span class="o">==</span><span class="n">cat</span><span class="p">]</span> <span class="k">for</span> <span class="n">cat</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_classes</span><span class="p">)],</span>
|
||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
|
||
<span class="n">backend</span><span class="o">=</span><span class="s1">'threading'</span>
|
||
<span class="p">)</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="DMy.aggregate">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.DMy.aggregate">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution</span>
|
||
<span class="sd"> (the mixture) that best matches the test distribution, in terms of the divergence measure of choice.</span>
|
||
<span class="sd"> In the multiclass case, with `n` the number of classes, the test and mixture distributions contain</span>
|
||
<span class="sd"> `n` channels (proper distributions of binned posterior probabilities), on which the divergence is computed</span>
|
||
<span class="sd"> independently. The matching is computed as an average of the divergence across all channels.</span>
|
||
|
||
<span class="sd"> :param posteriors: posterior probabilities of the instances in the sample</span>
|
||
<span class="sd"> :return: a vector of class prevalence estimates</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">test_distribution</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_distributions</span><span class="p">(</span><span class="n">posteriors</span><span class="p">)</span>
|
||
<span class="n">divergence</span> <span class="o">=</span> <span class="n">get_divergence</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">divergence</span><span class="p">)</span>
|
||
<span class="n">n_classes</span><span class="p">,</span> <span class="n">n_channels</span><span class="p">,</span> <span class="n">nbins</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span><span class="o">.</span><span class="n">shape</span>
|
||
<span class="k">def</span> <span class="nf">loss</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
|
||
<span class="n">prev</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">expand_dims</span><span class="p">(</span><span class="n">prev</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||
<span class="n">mixture_distribution</span> <span class="o">=</span> <span class="p">(</span><span class="n">prev</span> <span class="o">@</span> <span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,</span><span class="o">-</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">n_channels</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
||
<span class="n">divs</span> <span class="o">=</span> <span class="p">[</span><span class="n">divergence</span><span class="p">(</span><span class="n">test_distribution</span><span class="p">[</span><span class="n">ch</span><span class="p">],</span> <span class="n">mixture_distribution</span><span class="p">[</span><span class="n">ch</span><span class="p">])</span> <span class="k">for</span> <span class="n">ch</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_channels</span><span class="p">)]</span>
|
||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">divs</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">argmin_prevalence</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">search</span><span class="p">)</span></div>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="newELM">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.newELM">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">newELM</span><span class="p">(</span><span class="n">svmperf_base</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">'01'</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Explicit Loss Minimization (ELM) quantifiers.</span>
|
||
<span class="sd"> Quantifiers based on ELM represent a family of methods based on structured output learning;</span>
|
||
<span class="sd"> these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss</span>
|
||
<span class="sd"> measure. This implementation relies on</span>
|
||
<span class="sd"> `Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output</span>
|
||
<span class="sd"> learning algorithm, which has to be installed and patched for the purpose (see this</span>
|
||
<span class="sd"> `script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).</span>
|
||
<span class="sd"> This function equivalent to:</span>
|
||
|
||
<span class="sd"> >>> CC(SVMperf(svmperf_base, loss, C))</span>
|
||
|
||
<span class="sd"> :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)</span>
|
||
<span class="sd"> this path will be obtained from qp.environ['SVMPERF_HOME']</span>
|
||
<span class="sd"> :param loss: the loss to optimize (see :attr:`quapy.classification.svmperf.SVMperf.valid_losses`)</span>
|
||
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
|
||
<span class="sd"> :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the</span>
|
||
<span class="sd"> underlying classifier</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="n">svmperf_base</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="n">svmperf_base</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SVMPERF_HOME'</span><span class="p">]</span>
|
||
<span class="k">assert</span> <span class="n">svmperf_base</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">,</span> \
|
||
<span class="s1">'param svmperf_base was not specified, and the variable SVMPERF_HOME has not been set in the environment'</span>
|
||
<span class="k">return</span> <span class="n">CC</span><span class="p">(</span><span class="n">SVMperf</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="n">loss</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="n">C</span><span class="p">))</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="newSVMQ">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.newSVMQ">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">newSVMQ</span><span class="p">(</span><span class="n">svmperf_base</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> SVM(Q) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the `Q` loss combining a</span>
|
||
<span class="sd"> classification-oriented loss and a quantification-oriented loss, as proposed by</span>
|
||
<span class="sd"> `Barranquero et al. 2015 <https://www.sciencedirect.com/science/article/pii/S003132031400291X>`_.</span>
|
||
<span class="sd"> Equivalent to:</span>
|
||
|
||
<span class="sd"> >>> CC(SVMperf(svmperf_base, loss='q', C=C))</span>
|
||
|
||
<span class="sd"> Quantifiers based on ELM represent a family of methods based on structured output learning;</span>
|
||
<span class="sd"> these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss</span>
|
||
<span class="sd"> measure. This implementation relies on</span>
|
||
<span class="sd"> `Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output</span>
|
||
<span class="sd"> learning algorithm, which has to be installed and patched for the purpose (see this</span>
|
||
<span class="sd"> `script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).</span>
|
||
<span class="sd"> This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))</span>
|
||
|
||
<span class="sd"> :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)</span>
|
||
<span class="sd"> this path will be obtained from qp.environ['SVMPERF_HOME']</span>
|
||
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
|
||
<span class="sd"> :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the</span>
|
||
<span class="sd"> underlying classifier</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="n">newELM</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">'q'</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="n">C</span><span class="p">)</span></div>
|
||
|
||
|
||
<span class="k">def</span> <span class="nf">newSVMKLD</span><span class="p">(</span><span class="n">svmperf_base</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence</span>
|
||
<span class="sd"> as proposed by `Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.</span>
|
||
<span class="sd"> Equivalent to:</span>
|
||
|
||
<span class="sd"> >>> CC(SVMperf(svmperf_base, loss='kld', C=C))</span>
|
||
|
||
<span class="sd"> Quantifiers based on ELM represent a family of methods based on structured output learning;</span>
|
||
<span class="sd"> these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss</span>
|
||
<span class="sd"> measure. This implementation relies on</span>
|
||
<span class="sd"> `Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output</span>
|
||
<span class="sd"> learning algorithm, which has to be installed and patched for the purpose (see this</span>
|
||
<span class="sd"> `script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).</span>
|
||
<span class="sd"> This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))</span>
|
||
|
||
<span class="sd"> :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)</span>
|
||
<span class="sd"> this path will be obtained from qp.environ['SVMPERF_HOME']</span>
|
||
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
|
||
<span class="sd"> :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the</span>
|
||
<span class="sd"> underlying classifier</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="n">newELM</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">'kld'</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="n">C</span><span class="p">)</span>
|
||
|
||
|
||
<div class="viewcode-block" id="newSVMKLD">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.newSVMKLD">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">newSVMKLD</span><span class="p">(</span><span class="n">svmperf_base</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence</span>
|
||
<span class="sd"> normalized via the logistic function, as proposed by</span>
|
||
<span class="sd"> `Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.</span>
|
||
<span class="sd"> Equivalent to:</span>
|
||
|
||
<span class="sd"> >>> CC(SVMperf(svmperf_base, loss='nkld', C=C))</span>
|
||
|
||
<span class="sd"> Quantifiers based on ELM represent a family of methods based on structured output learning;</span>
|
||
<span class="sd"> these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss</span>
|
||
<span class="sd"> measure. This implementation relies on</span>
|
||
<span class="sd"> `Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output</span>
|
||
<span class="sd"> learning algorithm, which has to be installed and patched for the purpose (see this</span>
|
||
<span class="sd"> `script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).</span>
|
||
<span class="sd"> This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))</span>
|
||
|
||
<span class="sd"> :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)</span>
|
||
<span class="sd"> this path will be obtained from qp.environ['SVMPERF_HOME']</span>
|
||
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
|
||
<span class="sd"> :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the</span>
|
||
<span class="sd"> underlying classifier</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="n">newELM</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">'nkld'</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="n">C</span><span class="p">)</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="newSVMAE">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.newSVMAE">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">newSVMAE</span><span class="p">(</span><span class="n">svmperf_base</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Absolute Error as first used by</span>
|
||
<span class="sd"> `Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_.</span>
|
||
<span class="sd"> Equivalent to:</span>
|
||
|
||
<span class="sd"> >>> CC(SVMperf(svmperf_base, loss='mae', C=C))</span>
|
||
|
||
<span class="sd"> Quantifiers based on ELM represent a family of methods based on structured output learning;</span>
|
||
<span class="sd"> these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss</span>
|
||
<span class="sd"> measure. This implementation relies on</span>
|
||
<span class="sd"> `Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output</span>
|
||
<span class="sd"> learning algorithm, which has to be installed and patched for the purpose (see this</span>
|
||
<span class="sd"> `script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).</span>
|
||
<span class="sd"> This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))</span>
|
||
|
||
<span class="sd"> :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)</span>
|
||
<span class="sd"> this path will be obtained from qp.environ['SVMPERF_HOME']</span>
|
||
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
|
||
<span class="sd"> :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the</span>
|
||
<span class="sd"> underlying classifier</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="n">newELM</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="n">C</span><span class="p">)</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="newSVMRAE">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.newSVMRAE">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">newSVMRAE</span><span class="p">(</span><span class="n">svmperf_base</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Relative Absolute Error as first</span>
|
||
<span class="sd"> used by `Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_.</span>
|
||
<span class="sd"> Equivalent to:</span>
|
||
|
||
<span class="sd"> >>> CC(SVMperf(svmperf_base, loss='mrae', C=C))</span>
|
||
|
||
<span class="sd"> Quantifiers based on ELM represent a family of methods based on structured output learning;</span>
|
||
<span class="sd"> these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss</span>
|
||
<span class="sd"> measure. This implementation relies on</span>
|
||
<span class="sd"> `Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output</span>
|
||
<span class="sd"> learning algorithm, which has to be installed and patched for the purpose (see this</span>
|
||
<span class="sd"> `script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).</span>
|
||
<span class="sd"> This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))</span>
|
||
|
||
<span class="sd"> :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)</span>
|
||
<span class="sd"> this path will be obtained from qp.environ['SVMPERF_HOME']</span>
|
||
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
|
||
<span class="sd"> :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the</span>
|
||
<span class="sd"> underlying classifier</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="n">newELM</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">'mrae'</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="n">C</span><span class="p">)</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="OneVsAllAggregative">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.OneVsAllAggregative">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">OneVsAllAggregative</span><span class="p">(</span><span class="n">OneVsAllGeneric</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Allows any binary quantifier to perform quantification on single-label datasets.</span>
|
||
<span class="sd"> The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the</span>
|
||
<span class="sd"> class prevelences sum up to 1.</span>
|
||
<span class="sd"> This variant was used, along with the :class:`EMQ` quantifier, in</span>
|
||
<span class="sd"> `Gao and Sebastiani, 2016 <https://link.springer.com/content/pdf/10.1007/s13278-016-0327-z.pdf>`_.</span>
|
||
|
||
<span class="sd"> :param binary_quantifier: a quantifier (binary) that will be employed to work on multiclass model in a</span>
|
||
<span class="sd"> one-vs-all manner</span>
|
||
<span class="sd"> :param n_jobs: number of parallel workers</span>
|
||
<span class="sd"> :param parallel_backend: the parallel backend for joblib (default "loky"); this is helpful for some quantifiers</span>
|
||
<span class="sd"> (e.g., ELM-based ones) that cannot be run with multiprocessing, since the temp dir they create during fit will</span>
|
||
<span class="sd"> is removed and no longer available at predict time.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">parallel_backend</span><span class="o">=</span><span class="s1">'multiprocessing'</span><span class="p">):</span>
|
||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">BaseQuantifier</span><span class="p">),</span> \
|
||
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span><span class="si">}</span><span class="s1"> does not seem to be a Quantifier'</span>
|
||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">),</span> \
|
||
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span><span class="si">}</span><span class="s1"> does not seem to be of type Aggregative'</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span> <span class="o">=</span> <span class="n">binary_quantifier</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">parallel_backend</span> <span class="o">=</span> <span class="n">parallel_backend</span>
|
||
|
||
<div class="viewcode-block" id="OneVsAllAggregative.classify">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.OneVsAllAggregative.classify">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">classify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> If the base quantifier is not probabilistic, returns a matrix of shape `(n,m,)` with `n` the number of</span>
|
||
<span class="sd"> instances and `m` the number of classes. The entry `(i,j)` is a binary value indicating whether instance</span>
|
||
<span class="sd"> `i `belongs to class `j`. The binary classifications are independent of each other, meaning that an instance</span>
|
||
<span class="sd"> can end up be attributed to 0, 1, or more classes.</span>
|
||
<span class="sd"> If the base quantifier is probabilistic, returns a matrix of shape `(n,m,2)` with `n` the number of instances</span>
|
||
<span class="sd"> and `m` the number of classes. The entry `(i,j,1)` (resp. `(i,j,0)`) is a value in [0,1] indicating the</span>
|
||
<span class="sd"> posterior probability that instance `i` belongs (resp. does not belong) to class `j`. The posterior</span>
|
||
<span class="sd"> probabilities are independent of each other, meaning that, in general, they do not sum up to one.</span>
|
||
|
||
<span class="sd"> :param instances: array-like</span>
|
||
<span class="sd"> :return: `np.ndarray`</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="n">classif_predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_parallel</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_delayed_binary_classification</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">AggregativeSoftQuantifier</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">swapaxes</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">T</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="OneVsAllAggregative.aggregate">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.OneVsAllAggregative.aggregate">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">):</span>
|
||
<span class="n">prevalences</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_parallel</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_delayed_binary_aggregate</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">normalize_prevalence</span><span class="p">(</span><span class="n">prevalences</span><span class="p">)</span></div>
|
||
|
||
|
||
<span class="k">def</span> <span class="nf">_delayed_binary_classification</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span><span class="p">[</span><span class="n">c</span><span class="p">]</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_delayed_binary_aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">):</span>
|
||
<span class="c1"># the estimation for the positive class prevalence</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span><span class="p">[</span><span class="n">c</span><span class="p">]</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">[:,</span> <span class="n">c</span><span class="p">])[</span><span class="mi">1</span><span class="p">]</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="AggregativeMedianEstimator">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeMedianEstimator">[docs]</a>
|
||
<span class="k">class</span> <span class="nc">AggregativeMedianEstimator</span><span class="p">(</span><span class="n">BinaryQuantifier</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the</span>
|
||
<span class="sd"> estimation returned by differently (hyper)parameterized base quantifiers.</span>
|
||
<span class="sd"> The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions,</span>
|
||
<span class="sd"> i.e., in cases of binary quantification.</span>
|
||
|
||
<span class="sd"> :param base_quantifier: the base, binary quantifier</span>
|
||
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
|
||
<span class="sd"> :param param_grid: the grid or parameters towards which the median will be computed</span>
|
||
<span class="sd"> :param n_jobs: number of parllel workes</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">base_quantifier</span><span class="p">:</span> <span class="n">AggregativeQuantifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">param_grid</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
||
|
||
<div class="viewcode-block" id="AggregativeMedianEstimator.get_params">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeMedianEstimator.get_params">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">(</span><span class="n">deep</span><span class="p">)</span></div>
|
||
|
||
|
||
<div class="viewcode-block" id="AggregativeMedianEstimator.set_params">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeMedianEstimator.set_params">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span></div>
|
||
|
||
|
||
<span class="k">def</span> <span class="nf">_delayed_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
|
||
<span class="n">params</span><span class="p">,</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
|
||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">)</span>
|
||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span>
|
||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">model</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_delayed_fit_classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="s1">'enter job'</span><span class="p">)</span>
|
||
<span class="n">cls_params</span><span class="p">,</span> <span class="n">training</span><span class="p">,</span> <span class="n">kwargs</span> <span class="o">=</span> <span class="n">args</span>
|
||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">)</span>
|
||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">cls_params</span><span class="p">)</span>
|
||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="s1">'exit job'</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_delayed_fit_aggregation</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
|
||
<span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">),</span> <span class="n">q_params</span><span class="p">),</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
|
||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
|
||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">q_params</span><span class="p">)</span>
|
||
<span class="n">model</span><span class="o">.</span><span class="n">aggregation_fit</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">model</span>
|
||
|
||
|
||
<div class="viewcode-block" id="AggregativeMedianEstimator.fit">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeMedianEstimator.fit">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||
<span class="kn">import</span> <span class="nn">itertools</span>
|
||
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_check_binary</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
|
||
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">):</span>
|
||
<span class="n">cls_configs</span><span class="p">,</span> <span class="n">q_configs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">group_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
||
|
||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">cls_configs</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
||
<span class="n">models_preds</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit_classifier</span><span class="p">,</span>
|
||
<span class="p">((</span><span class="n">params</span><span class="p">,</span> <span class="n">training</span><span class="p">,</span> <span class="n">kwargs</span><span class="p">)</span> <span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="n">cls_configs</span><span class="p">),</span>
|
||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
|
||
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
|
||
<span class="n">backend</span><span class="o">=</span><span class="s1">'threading'</span>
|
||
<span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="s1">'only 1'</span><span class="p">)</span>
|
||
<span class="n">model</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span>
|
||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">cls_configs</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||
<span class="n">models_preds</span> <span class="o">=</span> <span class="p">[(</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)]</span>
|
||
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">models</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit_aggregation</span><span class="p">,</span>
|
||
<span class="p">((</span><span class="n">setup</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">setup</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="n">models_preds</span><span class="p">,</span> <span class="n">q_configs</span><span class="p">)),</span>
|
||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
|
||
<span class="n">backend</span><span class="o">=</span><span class="s1">'threading'</span>
|
||
<span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="n">configs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">expand_grid</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">models</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit</span><span class="p">,</span>
|
||
<span class="p">((</span><span class="n">params</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="n">configs</span><span class="p">),</span>
|
||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
|
||
<span class="n">backend</span><span class="o">=</span><span class="s1">'threading'</span>
|
||
<span class="p">)</span>
|
||
<span class="k">return</span> <span class="bp">self</span></div>
|
||
|
||
|
||
<span class="k">def</span> <span class="nf">_delayed_predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
||
<span class="n">model</span><span class="p">,</span> <span class="n">instances</span> <span class="o">=</span> <span class="n">args</span>
|
||
<span class="k">return</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
||
|
||
<div class="viewcode-block" id="AggregativeMedianEstimator.quantify">
|
||
<a class="viewcode-back" href="../../../quapy.method.html#quapy.method.aggregative.AggregativeMedianEstimator.quantify">[docs]</a>
|
||
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_predict</span><span class="p">,</span>
|
||
<span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span> <span class="k">for</span> <span class="n">model</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">models</span><span class="p">),</span>
|
||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
|
||
<span class="n">backend</span><span class="o">=</span><span class="s1">'threading'</span>
|
||
<span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span></div>
|
||
</div>
|
||
|
||
|
||
|
||
<span class="c1">#---------------------------------------------------------------</span>
|
||
<span class="c1"># imports</span>
|
||
<span class="c1">#---------------------------------------------------------------</span>
|
||
|
||
<span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">_threshold_optim</span>
|
||
|
||
<span class="n">T50</span> <span class="o">=</span> <span class="n">_threshold_optim</span><span class="o">.</span><span class="n">T50</span>
|
||
<span class="n">MAX</span> <span class="o">=</span> <span class="n">_threshold_optim</span><span class="o">.</span><span class="n">MAX</span>
|
||
<span class="n">X</span> <span class="o">=</span> <span class="n">_threshold_optim</span><span class="o">.</span><span class="n">X</span>
|
||
<span class="n">MS</span> <span class="o">=</span> <span class="n">_threshold_optim</span><span class="o">.</span><span class="n">MS</span>
|
||
<span class="n">MS2</span> <span class="o">=</span> <span class="n">_threshold_optim</span><span class="o">.</span><span class="n">MS2</span>
|
||
|
||
|
||
<span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">_kdey</span>
|
||
|
||
<span class="n">KDEyML</span> <span class="o">=</span> <span class="n">_kdey</span><span class="o">.</span><span class="n">KDEyML</span>
|
||
<span class="n">KDEyHD</span> <span class="o">=</span> <span class="n">_kdey</span><span class="o">.</span><span class="n">KDEyHD</span>
|
||
<span class="n">KDEyCS</span> <span class="o">=</span> <span class="n">_kdey</span><span class="o">.</span><span class="n">KDEyCS</span>
|
||
|
||
<span class="c1">#---------------------------------------------------------------</span>
|
||
<span class="c1"># aliases</span>
|
||
<span class="c1">#---------------------------------------------------------------</span>
|
||
|
||
<span class="n">ClassifyAndCount</span> <span class="o">=</span> <span class="n">CC</span>
|
||
<span class="n">AdjustedClassifyAndCount</span> <span class="o">=</span> <span class="n">ACC</span>
|
||
<span class="n">ProbabilisticClassifyAndCount</span> <span class="o">=</span> <span class="n">PCC</span>
|
||
<span class="n">ProbabilisticAdjustedClassifyAndCount</span> <span class="o">=</span> <span class="n">PACC</span>
|
||
<span class="n">ExpectationMaximizationQuantifier</span> <span class="o">=</span> <span class="n">EMQ</span>
|
||
<span class="n">DistributionMatchingY</span> <span class="o">=</span> <span class="n">DMy</span>
|
||
<span class="n">SLD</span> <span class="o">=</span> <span class="n">EMQ</span>
|
||
<span class="n">HellingerDistanceY</span> <span class="o">=</span> <span class="n">HDy</span>
|
||
<span class="n">MedianSweep</span> <span class="o">=</span> <span class="n">MS</span>
|
||
<span class="n">MedianSweep2</span> <span class="o">=</span> <span class="n">MS2</span>
|
||
</pre></div>
|
||
|
||
</div>
|
||
</div>
|
||
<footer>
|
||
|
||
<hr/>
|
||
|
||
<div role="contentinfo">
|
||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||
</div>
|
||
|
||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||
|
||
|
||
</footer>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<script>
|
||
jQuery(function () {
|
||
SphinxRtdTheme.Navigation.enable(true);
|
||
});
|
||
</script>
|
||
|
||
</body>
|
||
</html> |