fixing bugs in one-vs-all

This commit is contained in:
Alejandro Moreo Fernandez 2023-02-10 19:02:17 +01:00
parent 33a21db52c
commit 952cf5e767
16 changed files with 308 additions and 174 deletions

View File

@ -113,7 +113,7 @@
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.HDy.aggregate">(quapy.method.aggregative.HDy method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.OneVsAll.aggregate">(quapy.method.aggregative.OneVsAll method)</a>
<li><a href="quapy.method.html#quapy.method.aggregative.OneVsAllAggregative.aggregate">(quapy.method.aggregative.OneVsAllAggregative method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.PACC.aggregate">(quapy.method.aggregative.PACC method)</a>
</li>
@ -177,8 +177,6 @@
<li><a href="quapy.method.html#quapy.method.aggregative.CC">CC (class in quapy.method.aggregative)</a>
</li>
<li><a href="quapy.html#quapy.functional.check_prevalence_vector">check_prevalence_vector() (in module quapy.functional)</a>
</li>
<li><a href="quapy.method.html#quapy.method.base.OneVsAllGeneric.classes">classes (quapy.method.base.OneVsAllGeneric property)</a>
</li>
<li><a href="quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.classes_">classes_ (quapy.classification.calibration.RecalibratedProbabilisticClassifierBase property)</a>
@ -187,7 +185,7 @@
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.AggregativeQuantifier.classes_">(quapy.method.aggregative.AggregativeQuantifier property)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.OneVsAll.classes_">(quapy.method.aggregative.OneVsAll property)</a>
<li><a href="quapy.method.html#quapy.method.base.OneVsAllGeneric.classes_">(quapy.method.base.OneVsAllGeneric property)</a>
</li>
<li><a href="quapy.method.html#quapy.method.neural.QuaNetTrainer.classes_">(quapy.method.neural.QuaNetTrainer property)</a>
</li>
@ -203,7 +201,7 @@
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.ELM.classify">(quapy.method.aggregative.ELM method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.OneVsAll.classify">(quapy.method.aggregative.OneVsAll method)</a>
<li><a href="quapy.method.html#quapy.method.aggregative.OneVsAllAggregative.classify">(quapy.method.aggregative.OneVsAllAggregative method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.PACC.classify">(quapy.method.aggregative.PACC method)</a>
</li>
@ -358,8 +356,6 @@
<li><a href="quapy.method.html#quapy.method.aggregative.EMQ.fit">(quapy.method.aggregative.EMQ method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.HDy.fit">(quapy.method.aggregative.HDy method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.OneVsAll.fit">(quapy.method.aggregative.OneVsAll method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.PACC.fit">(quapy.method.aggregative.PACC method)</a>
</li>
@ -426,10 +422,6 @@
<li><a href="quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.get_params">(quapy.classification.neural.NeuralClassifierTrainer method)</a>
</li>
<li><a href="quapy.classification.html#quapy.classification.neural.TextClassifierNet.get_params">(quapy.classification.neural.TextClassifierNet method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.OneVsAll.get_params">(quapy.method.aggregative.OneVsAll method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.base.OneVsAllGeneric.get_params">(quapy.method.base.OneVsAllGeneric method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.meta.Ensemble.get_params">(quapy.method.meta.Ensemble method)</a>
</li>
@ -443,6 +435,8 @@
<li><a href="quapy.method.html#quapy.method.meta.get_probability_distribution">get_probability_distribution() (in module quapy.method.meta)</a>
</li>
<li><a href="quapy.html#quapy.util.get_quapy_home">get_quapy_home() (in module quapy.util)</a>
</li>
<li><a href="quapy.method.html#quapy.method.base.getOneVsAll">getOneVsAll() (in module quapy.method.base)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.ACC.getPteCondEstim">getPteCondEstim() (quapy.method.aggregative.ACC class method)</a>
@ -539,6 +533,8 @@
<li><a href="quapy.method.html#quapy.method.aggregative.MedianSweep">MedianSweep (in module quapy.method.aggregative)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.MedianSweep2">MedianSweep2 (in module quapy.method.aggregative)</a>
</li>
<li><a href="quapy.data.html#quapy.data.base.LabelledCollection.mix">mix() (quapy.data.base.LabelledCollection class method)</a>
</li>
<li><a href="quapy.html#quapy.error.mkld">mkld() (in module quapy.error)</a>
</li>
@ -641,10 +637,12 @@
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="quapy.html#quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances">on_preclassified_instances() (quapy.protocol.OnLabelledCollectionProtocol method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.OneVsAll">OneVsAll (class in quapy.method.aggregative)</a>
<li><a href="quapy.method.html#quapy.method.base.OneVsAll">OneVsAll (class in quapy.method.base)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="quapy.method.html#quapy.method.aggregative.OneVsAllAggregative">OneVsAllAggregative (class in quapy.method.aggregative)</a>
</li>
<li><a href="quapy.method.html#quapy.method.base.OneVsAllGeneric">OneVsAllGeneric (class in quapy.method.base)</a>
</li>
<li><a href="quapy.html#quapy.protocol.OnLabelledCollectionProtocol">OnLabelledCollectionProtocol (class in quapy.protocol)</a>
@ -975,10 +973,6 @@
<li><a href="quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.set_params">(quapy.classification.neural.NeuralClassifierTrainer method)</a>
</li>
<li><a href="quapy.classification.html#quapy.classification.svmperf.SVMperf.set_params">(quapy.classification.svmperf.SVMperf method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.aggregative.OneVsAll.set_params">(quapy.method.aggregative.OneVsAll method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.base.OneVsAllGeneric.set_params">(quapy.method.base.OneVsAllGeneric method)</a>
</li>
<li><a href="quapy.method.html#quapy.method.meta.Ensemble.set_params">(quapy.method.meta.Ensemble method)</a>
</li>

Binary file not shown.

View File

@ -168,7 +168,7 @@ training set afterwards. Default value is 5.</p></li>
<span class="sig-name descname"><span class="pre">fit_cv</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_cv" title="Permalink to this definition"></a></dt>
<dd><p>Fits the calibration in a cross-validation manner, i.e., it generates posterior probabilities for all
training instances via cross-validation, and then retrains the classifier on all training instances.
The posterior probabilities thus generated are used for calibrating the outpus of the classifier.</p>
The posterior probabilities thus generated are used for calibrating the outputs of the classifier.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">

View File

@ -222,10 +222,10 @@ the collection), <cite>prevs</cite> (the prevalence values for each class)</p>
<dl class="py class">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.data.base.</span></span><span class="sig-name descname"><span class="pre">LabelledCollection</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">labels</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classes_</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection" title="Permalink to this definition"></a></dt>
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.data.base.</span></span><span class="sig-name descname"><span class="pre">LabelledCollection</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">labels</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">classes</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>A LabelledCollection is a set of objects each with a label associated to it. This class implements many sampling
routines.</p>
<p>A LabelledCollection is a set of objects each with a label attached to each of them.
This class implements several sampling routines and other utilities.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
@ -252,7 +252,7 @@ from the labels. The classes must be indicated in cases in which some of the lab
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.Xp">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">Xp</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.Xp" title="Permalink to this definition"></a></dt>
<dd><p>Gets the instances and the true prevalence. This is useful when implementing evaluation protocols from
a <cite>LabelledCollection</cite> object.</p>
a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> object.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>a tuple <cite>(instances, prevalence)</cite> from this collection</p>
@ -299,7 +299,7 @@ as listed by <cite>self.classes_</cite></p>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.kFCV">
<span class="sig-name descname"><span class="pre">kFCV</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">nfolds</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nrepeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.kFCV" title="Permalink to this definition"></a></dt>
<span class="sig-name descname"><span class="pre">kFCV</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">nfolds</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nrepeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.kFCV" title="Permalink to this definition"></a></dt>
<dd><p>Generator of stratified folds to be used in k-fold cross validation.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
@ -338,6 +338,23 @@ these arguments are used to call <cite>loader_func(path, **loader_kwargs)</cite>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.mix">
<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">mix</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">a</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">b</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.mix" title="Permalink to this definition"></a></dt>
<dd><p>Returns a new <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> as the union of this collection with another collection.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>a</strong> instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a></p></li>
<li><p><strong>b</strong> instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a></p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>a <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a> representing the union of both collections</p>
</dd>
</dl>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.n_classes">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">n_classes</span></span><a class="headerlink" href="#quapy.data.base.LabelledCollection.n_classes" title="Permalink to this definition"></a></dt>
@ -374,7 +391,7 @@ as listed by <cite>self.classes_</cite></p>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.sampling">
<span class="sig-name descname"><span class="pre">sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling" title="Permalink to this definition"></a></dt>
<span class="sig-name descname"><span class="pre">sampling</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling" title="Permalink to this definition"></a></dt>
<dd><p>Return a random sample (an instance of <a class="reference internal" href="#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">LabelledCollection</span></code></a>) of desired size and desired prevalence
values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than
the actual prevalence of the class, or with replacement otherwise.</p>
@ -386,6 +403,7 @@ the actual prevalence of the class, or with replacement otherwise.</p>
it is constrained. E.g., for binary collections, only the prevalence <cite>p</cite> for the first class (as listed in
<cite>self.classes_</cite> can be specified, while the other class takes prevalence value <cite>1-p</cite></p></li>
<li><p><strong>shuffle</strong> if set to True (default), shuffles the index before returning it</p></li>
<li><p><strong>random_state</strong> seed for reproducing sampling</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
@ -412,7 +430,7 @@ index.</p>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.data.base.LabelledCollection.sampling_index">
<span class="sig-name descname"><span class="pre">sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling_index" title="Permalink to this definition"></a></dt>
<span class="sig-name descname"><span class="pre">sampling_index</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">size</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">prevs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shuffle</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.data.base.LabelledCollection.sampling_index" title="Permalink to this definition"></a></dt>
<dd><p>Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the
prevalence values are not specified, then returns the index of a uniform sampling.
For each class, the sampling is drawn without replacement if the requested prevalence is larger than
@ -425,6 +443,7 @@ the actual prevalence of the class, or with replacement otherwise.</p>
it is constrained. E.g., for binary collections, only the prevalence <cite>p</cite> for the first class (as listed in
<cite>self.classes_</cite> can be specified, while the other class takes prevalence value <cite>1-p</cite></p></li>
<li><p><strong>shuffle</strong> if set to True (default), shuffles the index before returning it</p></li>
<li><p><strong>random_state</strong> seed for reproducing sampling</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>

View File

@ -502,7 +502,7 @@ will be taken from the environment variable <cite>SAMPLE_SIZE</cite> (which has
</div>
<span class="target" id="module-quapy.protocol"></span><dl class="py class">
<dt class="sig sig-object py" id="quapy.protocol.APP">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">APP</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevalences</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">21</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">smooth_limits_epsilon</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.APP" title="Permalink to this definition"></a></dt>
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">APP</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_prevalences</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">21</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">10</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">smooth_limits_epsilon</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.APP" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#quapy.protocol.AbstractStochasticSeededProtocol" title="quapy.protocol.AbstractStochasticSeededProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">AbstractStochasticSeededProtocol</span></code></a>, <a class="reference internal" href="#quapy.protocol.OnLabelledCollectionProtocol" title="quapy.protocol.OnLabelledCollectionProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">OnLabelledCollectionProtocol</span></code></a></p>
<p>Implementation of the artificial prevalence protocol (APP).
The APP consists of exploring a grid of prevalence values containing <cite>n_prevalences</cite> points (e.g.,
@ -520,7 +520,8 @@ qp.environ[“SAMPLE_SIZE”]. If this is not set, a ValueError exception is rai
grid (default is 21)</p></li>
<li><p><strong>repeats</strong> number of copies for each valid prevalence vector (default is 10)</p></li>
<li><p><strong>smooth_limits_epsilon</strong> the quantity to add and subtract to the limits 0 and 1</p></li>
<li><p><strong>random_state</strong> allows replicating samples across runs (default None)</p></li>
<li><p><strong>random_state</strong> allows replicating samples across runs (default 0, meaning that the sequence of samples
will be the same every time the protocol is called)</p></li>
<li><p><strong>return_type</strong> set to “sample_prev” (default) to get the pairs of (sample, prevalence) at each iteration, or
to “labelled_collection” to get instead instances of LabelledCollection</p></li>
</ul>
@ -604,7 +605,7 @@ in the grid multiplied by <cite>repeat</cite></p>
<dl class="py class">
<dt class="sig sig-object py" id="quapy.protocol.AbstractStochasticSeededProtocol">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">AbstractStochasticSeededProtocol</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.AbstractStochasticSeededProtocol" title="Permalink to this definition"></a></dt>
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">AbstractStochasticSeededProtocol</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.AbstractStochasticSeededProtocol" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#quapy.protocol.AbstractProtocol" title="quapy.protocol.AbstractProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">AbstractProtocol</span></code></a></p>
<p>An <cite>AbstractStochasticSeededProtocol</cite> is a protocol that generates, via any random procedure (e.g.,
via random sampling), sequences of <a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a> samples.
@ -616,8 +617,8 @@ needed for extracting the samples, and <a class="reference internal" href="#quap
deterministically generates a sample.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>random_state</strong> the seed for allowing to replicate any sequence of samples. Default is None, meaning that
the sequence will be different every time the protocol is called.</p>
<dd class="field-odd"><p><strong>random_state</strong> the seed for allowing to replicate any sequence of samples. Default is 0, meaning that
the sequence will be consistent every time the protocol is called.</p>
</dd>
</dl>
<dl class="py method">
@ -659,7 +660,7 @@ the sequence will be different every time the protocol is called.</p>
<dl class="py class">
<dt class="sig sig-object py" id="quapy.protocol.DomainMixer">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">DomainMixer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">domainA</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">domainB</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevalence</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mixture_points</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">11</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.DomainMixer" title="Permalink to this definition"></a></dt>
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">DomainMixer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">domainA</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">domainB</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prevalence</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mixture_points</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">11</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.DomainMixer" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#quapy.protocol.AbstractStochasticSeededProtocol" title="quapy.protocol.AbstractStochasticSeededProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">AbstractStochasticSeededProtocol</span></code></a></p>
<p>Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence.</p>
<dl class="field-list simple">
@ -675,7 +676,8 @@ will be taken from the domain A (default).</p></li>
<li><p><strong>mixture_points</strong> an integer indicating the number of points to take from a linear scale (e.g., 21 will
generate the mixture points [1, 0.95, 0.9, …, 0]), or the array of mixture values itself.
the specific points</p></li>
<li><p><strong>random_state</strong> </p></li>
<li><p><strong>random_state</strong> allows replicating samples across runs (default 0, meaning that the sequence of samples
will be the same every time the protocol is called)</p></li>
</ul>
</dd>
</dl>
@ -719,7 +721,7 @@ the specific points</p></li>
<dl class="py class">
<dt class="sig sig-object py" id="quapy.protocol.NPP">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">NPP</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.NPP" title="Permalink to this definition"></a></dt>
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">NPP</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.NPP" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#quapy.protocol.AbstractStochasticSeededProtocol" title="quapy.protocol.AbstractStochasticSeededProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">AbstractStochasticSeededProtocol</span></code></a>, <a class="reference internal" href="#quapy.protocol.OnLabelledCollectionProtocol" title="quapy.protocol.OnLabelledCollectionProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">OnLabelledCollectionProtocol</span></code></a></p>
<p>A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing
samples uniformly at random, therefore approximately preserving the natural prevalence of the collection.</p>
@ -730,7 +732,8 @@ samples uniformly at random, therefore approximately preserving the natural prev
<li><p><strong>sample_size</strong> integer, the number of instances in each sample; if None (default) then it is taken from
qp.environ[“SAMPLE_SIZE”]. If this is not set, a ValueError exception is raised.</p></li>
<li><p><strong>repeats</strong> the number of samples to generate. Default is 100.</p></li>
<li><p><strong>random_state</strong> allows replicating samples across runs (default None)</p></li>
<li><p><strong>random_state</strong> allows replicating samples across runs (default 0, meaning that the sequence of samples
will be the same every time the protocol is called)</p></li>
<li><p><strong>return_type</strong> set to “sample_prev” (default) to get the pairs of (sample, prevalence) at each iteration, or
to “labelled_collection” to get instead instances of LabelledCollection</p></li>
</ul>
@ -802,7 +805,7 @@ to “labelled_collection” to get instead instances of LabelledCollection</p><
<dl class="py class">
<dt class="sig sig-object py" id="quapy.protocol.USimplexPP">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">USimplexPP</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.USimplexPP" title="Permalink to this definition"></a></dt>
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.protocol.</span></span><span class="sig-name descname"><span class="pre">USimplexPP</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">repeats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">100</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">return_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'sample_prev'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.protocol.USimplexPP" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#quapy.protocol.AbstractStochasticSeededProtocol" title="quapy.protocol.AbstractStochasticSeededProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">AbstractStochasticSeededProtocol</span></code></a>, <a class="reference internal" href="#quapy.protocol.OnLabelledCollectionProtocol" title="quapy.protocol.OnLabelledCollectionProtocol"><code class="xref py py-class docutils literal notranslate"><span class="pre">OnLabelledCollectionProtocol</span></code></a></p>
<p>A variant of <a class="reference internal" href="#quapy.protocol.APP" title="quapy.protocol.APP"><code class="xref py py-class docutils literal notranslate"><span class="pre">APP</span></code></a> that, instead of using a grid of equidistant prevalence values,
relies on the Kraemer algorithm for sampling unit (k-1)-simplex uniformly at random, with
@ -817,7 +820,8 @@ combinations of the grid values of APP makes this endeavour intractable.</p>
<li><p><strong>sample_size</strong> integer, the number of instances in each sample; if None (default) then it is taken from
qp.environ[“SAMPLE_SIZE”]. If this is not set, a ValueError exception is raised.</p></li>
<li><p><strong>repeats</strong> the number of samples to generate. Default is 100.</p></li>
<li><p><strong>random_state</strong> allows replicating samples across runs (default None)</p></li>
<li><p><strong>random_state</strong> allows replicating samples across runs (default 0, meaning that the sequence of samples
will be the same every time the protocol is called)</p></li>
<li><p><strong>return_type</strong> set to “sample_prev” (default) to get the pairs of (sample, prevalence) at each iteration, or
to “labelled_collection” to get instead instances of LabelledCollection</p></li>
</ul>

View File

@ -781,9 +781,9 @@ validation data, or as an integer, indicating that the misclassification rates s
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="quapy.method.aggregative.OneVsAll">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.method.aggregative.</span></span><span class="sig-name descname"><span class="pre">OneVsAll</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">binary_quantifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.OneVsAll" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#quapy.method.aggregative.AggregativeQuantifier" title="quapy.method.aggregative.AggregativeQuantifier"><code class="xref py py-class docutils literal notranslate"><span class="pre">AggregativeQuantifier</span></code></a></p>
<dt class="sig sig-object py" id="quapy.method.aggregative.OneVsAllAggregative">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.method.aggregative.</span></span><span class="sig-name descname"><span class="pre">OneVsAllAggregative</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">binary_quantifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">parallel_backend</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'loky'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.OneVsAllAggregative" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#quapy.method.base.OneVsAllGeneric" title="quapy.method.base.OneVsAllGeneric"><code class="xref py py-class docutils literal notranslate"><span class="pre">OneVsAllGeneric</span></code></a>, <a class="reference internal" href="#quapy.method.aggregative.AggregativeQuantifier" title="quapy.method.aggregative.AggregativeQuantifier"><code class="xref py py-class docutils literal notranslate"><span class="pre">AggregativeQuantifier</span></code></a></p>
<p>Allows any binary quantifier to perform quantification on single-label datasets.
The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the
class prevelences sum up to 1.
@ -795,12 +795,15 @@ This variant was used, along with the <a class="reference internal" href="#quapy
<li><p><strong>binary_quantifier</strong> a quantifier (binary) that will be employed to work on multiclass model in a
one-vs-all manner</p></li>
<li><p><strong>n_jobs</strong> number of parallel workers</p></li>
<li><p><strong>parallel_backend</strong> the parallel backend for joblib (default “loky”); this is helpful for some quantifiers
(e.g., ELM-based ones) that cannot be run with multiprocessing, since the temp dir they create during fit will
is removed and no longer available at predict time.</p></li>
</ul>
</dd>
</dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.method.aggregative.OneVsAll.aggregate">
<span class="sig-name descname"><span class="pre">aggregate</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">classif_predictions</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.OneVsAll.aggregate" title="Permalink to this definition"></a></dt>
<dt class="sig sig-object py" id="quapy.method.aggregative.OneVsAllAggregative.aggregate">
<span class="sig-name descname"><span class="pre">aggregate</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">classif_predictions</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.OneVsAllAggregative.aggregate" title="Permalink to this definition"></a></dt>
<dd><p>Implements the aggregation of label predictions.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
@ -812,21 +815,9 @@ one-vs-all manner</p></li>
</dl>
</dd></dl>
<dl class="py property">
<dt class="sig sig-object py" id="quapy.method.aggregative.OneVsAll.classes_">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">classes_</span></span><a class="headerlink" href="#quapy.method.aggregative.OneVsAll.classes_" title="Permalink to this definition"></a></dt>
<dd><p>Class labels, in the same order in which class prevalence values are to be computed.
This default implementation actually returns the class labels of the learner.</p>
<dl class="field-list simple">
<dt class="field-odd">Returns<span class="colon">:</span></dt>
<dd class="field-odd"><p>array-like</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.method.aggregative.OneVsAll.classify">
<span class="sig-name descname"><span class="pre">classify</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.OneVsAll.classify" title="Permalink to this definition"></a></dt>
<dt class="sig sig-object py" id="quapy.method.aggregative.OneVsAllAggregative.classify">
<span class="sig-name descname"><span class="pre">classify</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.OneVsAllAggregative.classify" title="Permalink to this definition"></a></dt>
<dd><p>If the base quantifier is not probabilistic, returns a matrix of shape <cite>(n,m,)</cite> with <cite>n</cite> the number of
instances and <cite>m</cite> the number of classes. The entry <cite>(i,j)</cite> is a binary value indicating whether instance
<cite>i `belongs to class `j</cite>. The binary classifications are independent of each other, meaning that an instance
@ -845,63 +836,6 @@ probabilities are independent of each other, meaning that, in general, they do n
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.method.aggregative.OneVsAll.fit">
<span class="sig-name descname"><span class="pre">fit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">fit_classifier</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.OneVsAll.fit" title="Permalink to this definition"></a></dt>
<dd><p>Trains the aggregative quantifier</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>data</strong> a <a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a> consisting of the training data</p></li>
<li><p><strong>fit_classifier</strong> whether or not to train the learner (default is True). Set to False if the
learner has been trained outside the quantifier.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>self</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.method.aggregative.OneVsAll.get_params">
<span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">deep</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.OneVsAll.get_params" title="Permalink to this definition"></a></dt>
<dd><p>Get parameters for this estimator.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>deep</strong> (<em>bool</em><em>, </em><em>default=True</em>) If True, will return the parameters for this estimator and
contained subobjects that are estimators.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p><strong>params</strong> Parameter names mapped to their values.</p>
</dd>
<dt class="field-odd">Return type<span class="colon">:</span></dt>
<dd class="field-odd"><p>dict</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.method.aggregative.OneVsAll.set_params">
<span class="sig-name descname"><span class="pre">set_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">parameters</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.aggregative.OneVsAll.set_params" title="Permalink to this definition"></a></dt>
<dd><p>Set the parameters of this estimator.</p>
<p>The method works on simple estimators as well as on nested objects
(such as <code class="xref py py-class docutils literal notranslate"><span class="pre">Pipeline</span></code>). The latter have
parameters of the form <code class="docutils literal notranslate"><span class="pre">&lt;component&gt;__&lt;parameter&gt;</span></code> so that its
possible to update each component of a nested object.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>**params</strong> (<em>dict</em>) Estimator parameters.</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p><strong>self</strong> Estimator instance.</p>
</dd>
<dt class="field-odd">Return type<span class="colon">:</span></dt>
<dd class="field-odd"><p>estimator instance</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
<dl class="py class">
@ -1362,38 +1296,57 @@ validation data, or as an integer, indicating that the misclassification rates s
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="quapy.method.base.OneVsAllGeneric">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.method.base.</span></span><span class="sig-name descname"><span class="pre">OneVsAllGeneric</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">binary_quantifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.base.OneVsAllGeneric" title="Permalink to this definition"></a></dt>
<dt class="sig sig-object py" id="quapy.method.base.OneVsAll">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.method.base.</span></span><span class="sig-name descname"><span class="pre">OneVsAll</span></span><a class="headerlink" href="#quapy.method.base.OneVsAll" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
</dd></dl>
<dl class="py class">
<dt class="sig sig-object py" id="quapy.method.base.OneVsAllGeneric">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">quapy.method.base.</span></span><span class="sig-name descname"><span class="pre">OneVsAllGeneric</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">binary_quantifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">parallel_backend</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'loky'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.base.OneVsAllGeneric" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#quapy.method.base.OneVsAll" title="quapy.method.base.OneVsAll"><code class="xref py py-class docutils literal notranslate"><span class="pre">OneVsAll</span></code></a>, <a class="reference internal" href="#quapy.method.base.BaseQuantifier" title="quapy.method.base.BaseQuantifier"><code class="xref py py-class docutils literal notranslate"><span class="pre">BaseQuantifier</span></code></a></p>
<p>Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary
quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1.</p>
<dl class="py property">
<dt class="sig sig-object py" id="quapy.method.base.OneVsAllGeneric.classes">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">classes</span></span><a class="headerlink" href="#quapy.method.base.OneVsAllGeneric.classes" title="Permalink to this definition"></a></dt>
<dt class="sig sig-object py" id="quapy.method.base.OneVsAllGeneric.classes_">
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">classes_</span></span><a class="headerlink" href="#quapy.method.base.OneVsAllGeneric.classes_" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.method.base.OneVsAllGeneric.fit">
<span class="sig-name descname"><span class="pre">fit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.base.OneVsAllGeneric.fit" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.method.base.OneVsAllGeneric.get_params">
<span class="sig-name descname"><span class="pre">get_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">deep</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.base.OneVsAllGeneric.get_params" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<span class="sig-name descname"><span class="pre">fit</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><span class="pre">LabelledCollection</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">fit_classifier</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.base.OneVsAllGeneric.fit" title="Permalink to this definition"></a></dt>
<dd><p>Trains a quantifier.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>data</strong> a <a class="reference internal" href="quapy.data.html#quapy.data.base.LabelledCollection" title="quapy.data.base.LabelledCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">quapy.data.base.LabelledCollection</span></code></a> consisting of the training data</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>self</p>
</dd>
</dl>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.method.base.OneVsAllGeneric.quantify">
<span class="sig-name descname"><span class="pre">quantify</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">X</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.base.OneVsAllGeneric.quantify" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="quapy.method.base.OneVsAllGeneric.set_params">
<span class="sig-name descname"><span class="pre">set_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">parameters</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.base.OneVsAllGeneric.set_params" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<span class="sig-name descname"><span class="pre">quantify</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">instances</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.base.OneVsAllGeneric.quantify" title="Permalink to this definition"></a></dt>
<dd><p>Generate class prevalence estimates for the samples instances</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>instances</strong> array-like</p>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p><cite>np.ndarray</cite> of shape <cite>(n_classes,)</cite> with class prevalence estimates.</p>
</dd>
</dl>
</dd></dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="quapy.method.base.getOneVsAll">
<span class="sig-prename descclassname"><span class="pre">quapy.method.base.</span></span><span class="sig-name descname"><span class="pre">getOneVsAll</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">binary_quantifier</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">n_jobs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">parallel_backend</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'loky'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#quapy.method.base.getOneVsAll" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</section>
<section id="module-quapy.method.meta">
<span id="quapy-method-meta"></span><h2>quapy.method.meta<a class="headerlink" href="#module-quapy.method.meta" title="Permalink to this heading"></a></h2>

File diff suppressed because one or more lines are too long

View File

@ -11,7 +11,7 @@ from sklearn.linear_model import LogisticRegression
# Define a custom quantifier: for this example, we will consider a new quantification algorithm that uses a
# logistic regressor for generating posterior probabilities, and then applies a custom threshold value to the
# posteriors. Since the quantifier internally uses a classifier, it is an aggregative quantifier; and since it
# relies on posterior probabilities, then it is a probabilistic aggregative quantifier. Note also it has an
# relies on posterior probabilities, it is a probabilistic-aggregative quantifier. Note also it has an
# internal hyperparameter (let say, alpha) which is the decision threshold. Let's also assume the quantifier
# is binary, for simplicity.
@ -47,13 +47,13 @@ if __name__ == '__main__':
# load the IMDb dataset
train, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
train, val = train.split_stratified(train_prop=0.75)
# model selection
# let us assume we want to explore our hyperparameter alpha along with one hyperparameter of the classifier
train, val = train.split_stratified(train_prop=0.75)
param_grid = {
'alpha': np.linspace(0,1,11), # quantifier-dependent hyperparameter
'classifier__C': np.logspace(-2,2,5) # classifier-dependent hyperparameter
'alpha': np.linspace(0, 1, 11), # quantifier-dependent hyperparameter
'classifier__C': np.logspace(-2, 2, 5) # classifier-dependent hyperparameter
}
quantifier = GridSearchQ(quantifier, param_grid, protocol=APP(val), n_jobs=-1, verbose=True).fit(train)

View File

@ -9,19 +9,37 @@ from method.aggregative import EMQ
from model_selection import GridSearchQ
import pandas as pd
"""
This example shows hoy to use the LeQua datasets (new in v0.1.7). For more information about the datasets, and the
LeQua competition itself, check:
https://lequa2022.github.io/index (the site of the competition)
https://ceur-ws.org/Vol-3180/paper-146.pdf (the overview paper)
"""
# there are 4 tasks (T1A, T1B, T2A, T2B)
task = 'T1A'
# set the sample size in the environment. The sample size is task-dendendent and can be consulted by doing:
qp.environ['SAMPLE_SIZE'] = LEQUA2022_SAMPLE_SIZE[task]
qp.environ['N_JOBS'] = -1
# the fetch method returns a training set (an instance of LabelledCollection) and two generators: one for the
# validation set and another for the test sets. These generators are both instances of classes that extend
# AbstractProtocol (i.e., classes that implement sampling generation procedures) and, in particular, are instances
# of SamplesFromDir, a protocol that simply iterates over pre-generated samples (those provided for the competition)
# stored in a directory.
training, val_generator, test_generator = fetch_lequa2022(task=task)
# define the quantifier
learner = CalibratedClassifierCV(LogisticRegression())
quantifier = EMQ(classifier=learner)
quantifier = EMQ(classifier=LogisticRegression())
# model selection
param_grid = {'C': np.logspace(-3, 3, 7), 'class_weight': ['balanced', None]}
model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, n_jobs=-1, refit=False, verbose=True)
param_grid = {
'classifier__C': np.logspace(-3, 3, 7), # classifier-dependent: inverse of regularization strength
'classifier__class_weight': ['balanced', None], # classifier-dependent: weights of each class
'recalib': ['bcts', 'platt', None] # quantifier-dependent: recalibration method (new in v0.1.7)
}
model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', refit=False, verbose=True)
quantifier = model_selection.fit(training)
# evaluation

View File

@ -12,6 +12,7 @@ and positive. We will use a one-vs-all approach using a binary quantifier for de
"""
qp.environ['SAMPLE_SIZE'] = 100
qp.environ['N_JOBS'] = -1
"""
Any binary quantifier can be turned into a single-label quantifier by means of getOneVsAll function.
@ -21,7 +22,7 @@ an instance of AggregativeQuantifier. Although OneVsAllGeneric works in all case
some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions
during evaluation).
"""
quantifier = getOneVsAll(MS2(LogisticRegression()), parallel_backend="loky")
quantifier = getOneVsAll(MS2(LogisticRegression()))
print(f'the quantifier is an instance of {quantifier.__class__.__name__}')
# load a ternary dataset
@ -38,8 +39,8 @@ param_grid = {
'binary_quantifier__classifier__class_weight': ['balanced', None] # classifier-dependent hyperparameter
}
print('starting model selection')
gs = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), n_jobs=-1, verbose=True, refit=False)
quantifier = gs.fit(train_modsel).best_model()
model_selection = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), verbose=True, refit=False)
quantifier = model_selection.fit(train_modsel).best_model()
print('training on the whole training set')
train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test

View File

@ -84,7 +84,5 @@ Change Log 0.1.7
Things to fix:
--------------
- update unit tests
- update Wikis...
- improve plots
- documentation of protocols is incomplete

View File

@ -1262,7 +1262,7 @@ class OneVsAllAggregative(OneVsAllGeneric, AggregativeQuantifier):
is removed and no longer available at predict time.
"""
def __init__(self, binary_quantifier, n_jobs=None, parallel_backend='loky'):
def __init__(self, binary_quantifier, n_jobs=None, parallel_backend='multiprocessing'):
assert isinstance(binary_quantifier, BaseQuantifier), \
f'{self.binary_quantifier} does not seem to be a Quantifier'
assert isinstance(binary_quantifier, AggregativeQuantifier), \

View File

@ -54,13 +54,13 @@ class OneVsAll:
pass
def getOneVsAll(binary_quantifier, n_jobs=None, parallel_backend='loky'):
def getOneVsAll(binary_quantifier, n_jobs=None):
assert isinstance(binary_quantifier, BaseQuantifier), \
f'{binary_quantifier} does not seem to be a Quantifier'
if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier):
return qp.method.aggregative.OneVsAllAggregative(binary_quantifier, n_jobs, parallel_backend)
return qp.method.aggregative.OneVsAllAggregative(binary_quantifier, n_jobs)
else:
return OneVsAllGeneric(binary_quantifier, n_jobs, parallel_backend)
return OneVsAllGeneric(binary_quantifier, n_jobs)
class OneVsAllGeneric(OneVsAll,BaseQuantifier):
@ -69,7 +69,7 @@ class OneVsAllGeneric(OneVsAll,BaseQuantifier):
quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1.
"""
def __init__(self, binary_quantifier, n_jobs=None, parallel_backend='loky'):
def __init__(self, binary_quantifier, n_jobs=None):
assert isinstance(binary_quantifier, BaseQuantifier), \
f'{binary_quantifier} does not seem to be a Quantifier'
if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier):
@ -77,7 +77,6 @@ class OneVsAllGeneric(OneVsAll,BaseQuantifier):
f'you might prefer instantiating {qp.method.aggregative.OneVsAllAggregative.__name__}')
self.binary_quantifier = binary_quantifier
self.n_jobs = qp._get_njobs(n_jobs)
self.parallel_backend = parallel_backend
def fit(self, data: LabelledCollection, fit_classifier=True):
assert not data.binary, f'{self.__class__.__name__} expect non-binary data'
@ -89,7 +88,7 @@ class OneVsAllGeneric(OneVsAll,BaseQuantifier):
def _parallel(self, func, *args, **kwargs):
return np.asarray(
Parallel(n_jobs=self.n_jobs, backend=self.parallel_backend)(
Parallel(n_jobs=self.n_jobs, backend='threading')(
delayed(func)(c, *args, **kwargs) for c in self.classes_
)
)

View File

@ -86,7 +86,8 @@ class GridSearchQ(BaseQuantifier):
tinit = time()
hyper = [dict({k: values[i] for i, k in enumerate(params_keys)}) for values in itertools.product(*params_values)]
hyper = [dict({k: val[i] for i, k in enumerate(params_keys)}) for val in itertools.product(*params_values)]
self._sout(f'starting model selection with {self.n_jobs =}')
#pass a seed to parallel so it is set in clild processes
scores = qp.util.parallel(
self._delayed_eval,

View File

@ -45,13 +45,13 @@ class AbstractStochasticSeededProtocol(AbstractProtocol):
needed for extracting the samples, and :meth:`sample` that, given some parameters as input,
deterministically generates a sample.
:param random_state: the seed for allowing to replicate any sequence of samples. Default is None, meaning that
the sequence will be different every time the protocol is called.
:param random_state: the seed for allowing to replicate any sequence of samples. Default is 0, meaning that
the sequence will be consistent every time the protocol is called.
"""
_random_state = -1 # means "not set"
def __init__(self, random_state=None):
def __init__(self, random_state=0):
self.random_state = random_state
@property
@ -82,6 +82,13 @@ class AbstractStochasticSeededProtocol(AbstractProtocol):
...
def __call__(self):
"""
Yields one sample at a time. The type of object returned depends on the `collator` function. The
default behaviour returns tuples of the form `(sample, prevalence)`.
:return: a tuple `(sample, prevalence)` if return_type='sample_prev', or an instance of
:class:`qp.data.LabelledCollection` if return_type='labelled_collection'
"""
with ExitStack() as stack:
if self.random_state == -1:
raise ValueError('The random seed has never been initialized. '
@ -96,13 +103,33 @@ class AbstractStochasticSeededProtocol(AbstractProtocol):
class OnLabelledCollectionProtocol:
"""
Protocols that generate samples from a :class:`qp.data.LabelledCollection` object.
"""
RETURN_TYPES = ['sample_prev', 'labelled_collection']
def get_labelled_collection(self):
"""
Returns the labelled collection on which this protocol acts.
:return: an object of type :class:`qp.data.LabelledCollection`
"""
return self.data
def on_preclassified_instances(self, pre_classifications, in_place=False):
"""
Returns a copy of this protocol that acts on a modified version of the original
:class:`qp.data.LabelledCollection` in which the original instances have been replaced
with the outputs of a classifier for each instance. (This is convenient for speeding-up
the evaluation procedures for many samples, by pre-classifying the instances in advance.)
:param pre_classifications: the predictions issued by a classifier, typically an array-like
with shape `(n_instances,)` when the classifier is a hard one, or with shape
`(n_instances, n_classes)` when the classifier is a probabilistic one.
:param in_place: whether or not to apply the modification in-place or in a new copy (default).
:return: a copy of this protocol
"""
assert len(pre_classifications) == len(self.data), \
f'error: the pre-classified data has different shape ' \
f'(expected {len(self.data)}, found {len(pre_classifications)})'
@ -115,6 +142,15 @@ class OnLabelledCollectionProtocol:
@classmethod
def get_collator(cls, return_type='sample_prev'):
"""
Returns a collator function, i.e., a function that prepares the yielded data
:param return_type: either 'sample_prev' (default) if the collator is requested to yield tuples of
`(sample, prevalence)`, or 'labelled_collection' when it is requested to yield instances of
:class:`qp.data.LabelledCollection`
:return: the collator function (a callable function that takes as input an instance of
:class:`qp.data.LabelledCollection`)
"""
assert return_type in cls.RETURN_TYPES, \
f'unknown return type passed as argument; valid ones are {cls.RETURN_TYPES}'
if return_type=='sample_prev':
@ -139,13 +175,14 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
grid (default is 21)
:param repeats: number of copies for each valid prevalence vector (default is 10)
:param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1
:param random_state: allows replicating samples across runs (default None)
:param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples
will be the same every time the protocol is called)
:param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or
to "labelled_collection" to get instead instances of LabelledCollection
"""
def __init__(self, data:LabelledCollection, sample_size=None, n_prevalences=21, repeats=10,
smooth_limits_epsilon=0, random_state=None, return_type='sample_prev'):
smooth_limits_epsilon=0, random_state=0, return_type='sample_prev'):
super(APP, self).__init__(random_state)
self.data = data
self.sample_size = qp._get_sample_size(sample_size)
@ -179,6 +216,11 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
return prevs
def samples_parameters(self):
"""
Return all the necessary parameters to replicate the samples as according to the APP protocol.
:return: a list of indexes that realize the APP sampling
"""
indexes = []
for prevs in self.prevalence_grid():
index = self.data.sampling_index(self.sample_size, *prevs)
@ -186,9 +228,20 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
return indexes
def sample(self, index):
"""
Realizes the sample given the index of the instances.
:param index: indexes of the instances to select
:return: an instance of :class:`qp.data.LabelledCollection`
"""
return self.data.sampling_from_index(index)
def total(self):
"""
Returns the number of samples that will be generated
:return: int
"""
return F.num_prevalence_combinations(self.n_prevalences, self.data.n_classes, self.repeats)
@ -201,12 +254,14 @@ class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
:param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from
qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised.
:param repeats: the number of samples to generate. Default is 100.
:param random_state: allows replicating samples across runs (default None)
:param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples
will be the same every time the protocol is called)
:param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or
to "labelled_collection" to get instead instances of LabelledCollection
"""
def __init__(self, data:LabelledCollection, sample_size=None, repeats=100, random_state=None, return_type='sample_prev'):
def __init__(self, data:LabelledCollection, sample_size=None, repeats=100, random_state=0,
return_type='sample_prev'):
super(NPP, self).__init__(random_state)
self.data = data
self.sample_size = qp._get_sample_size(sample_size)
@ -215,6 +270,11 @@ class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
self.collator = OnLabelledCollectionProtocol.get_collator(return_type)
def samples_parameters(self):
"""
Return all the necessary parameters to replicate the samples as according to the NPP protocol.
:return: a list of indexes that realize the NPP sampling
"""
indexes = []
for _ in range(self.repeats):
index = self.data.uniform_sampling_index(self.sample_size)
@ -222,9 +282,20 @@ class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
return indexes
def sample(self, index):
"""
Realizes the sample given the index of the instances.
:param index: indexes of the instances to select
:return: an instance of :class:`qp.data.LabelledCollection`
"""
return self.data.sampling_from_index(index)
def total(self):
"""
Returns the number of samples that will be generated (equals to "repeats")
:return: int
"""
return self.repeats
@ -241,12 +312,13 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol)
:param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from
qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised.
:param repeats: the number of samples to generate. Default is 100.
:param random_state: allows replicating samples across runs (default None)
:param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples
will be the same every time the protocol is called)
:param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or
to "labelled_collection" to get instead instances of LabelledCollection
"""
def __init__(self, data: LabelledCollection, sample_size=None, repeats=100, random_state=None,
def __init__(self, data: LabelledCollection, sample_size=None, repeats=100, random_state=0,
return_type='sample_prev'):
super(USimplexPP, self).__init__(random_state)
self.data = data
@ -256,6 +328,11 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol)
self.collator = OnLabelledCollectionProtocol.get_collator(return_type)
def samples_parameters(self):
"""
Return all the necessary parameters to replicate the samples as according to the USimplexPP protocol.
:return: a list of indexes that realize the USimplexPP sampling
"""
indexes = []
for prevs in F.uniform_simplex_sampling(n_classes=self.data.n_classes, size=self.repeats):
index = self.data.sampling_index(self.sample_size, *prevs)
@ -263,9 +340,20 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol)
return indexes
def sample(self, index):
"""
Realizes the sample given the index of the instances.
:param index: indexes of the instances to select
:return: an instance of :class:`qp.data.LabelledCollection`
"""
return self.data.sampling_from_index(index)
def total(self):
"""
Returns the number of samples that will be generated (equals to "repeats")
:return: int
"""
return self.repeats
@ -273,17 +361,19 @@ class DomainMixer(AbstractStochasticSeededProtocol):
"""
Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence.
:param domainA:
:param domainB:
:param sample_size:
:param repeats:
:param domainA: one domain, an object of :class:`qp.data.LabelledCollection`
:param domainB: another domain, an object of :class:`qp.data.LabelledCollection`
:param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from
qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised.
:param repeats: int, number of samples to draw for every mixture rate
:param prevalence: the prevalence to preserv along the mixtures. If specified, should be an array containing
one prevalence value (positive float) for each class and summing up to one. If not specified, the prevalence
will be taken from the domain A (default).
:param mixture_points: an integer indicating the number of points to take from a linear scale (e.g., 21 will
generate the mixture points [1, 0.95, 0.9, ..., 0]), or the array of mixture values itself.
the specific points
:param random_state:
:param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples
will be the same every time the protocol is called)
"""
def __init__(
@ -294,7 +384,7 @@ class DomainMixer(AbstractStochasticSeededProtocol):
repeats=1,
prevalence=None,
mixture_points=11,
random_state=None,
random_state=0,
return_type='sample_prev'):
super(DomainMixer, self).__init__(random_state)
self.A = domainA
@ -319,6 +409,11 @@ class DomainMixer(AbstractStochasticSeededProtocol):
self.collator = OnLabelledCollectionProtocol.get_collator(return_type)
def samples_parameters(self):
"""
Return all the necessary parameters to replicate the samples as according to the this protocol.
:return: a list of zipped indexes (from A and B) that realize the sampling
"""
indexesA, indexesB = [], []
for propA in self.mixture_points:
for _ in range(self.repeats):
@ -331,12 +426,23 @@ class DomainMixer(AbstractStochasticSeededProtocol):
return list(zip(indexesA, indexesB))
def sample(self, indexes):
"""
Realizes the sample given a pair of indexes of the instances from A and B.
:param indexes: indexes of the instances to select from A and B
:return: an instance of :class:`qp.data.LabelledCollection`
"""
indexesA, indexesB = indexes
sampleA = self.A.sampling_from_index(indexesA)
sampleB = self.B.sampling_from_index(indexesB)
return sampleA+sampleB
def total(self):
"""
Returns the number of samples that will be generated (equals to "repeats * mixture_points")
:return: int
"""
return self.repeats * len(self.mixture_points)

View File

@ -28,13 +28,27 @@ class TestProtocols(unittest.TestCase):
self.assertEqual(samples1, samples2)
def test_app_not_replicate(self):
data = mock_labelled_collection()
p = APP(data, sample_size=5, n_prevalences=11)
p = APP(data, sample_size=5, n_prevalences=11) # <- random_state is by default set to 0
samples1 = samples_to_str(p)
samples2 = samples_to_str(p)
self.assertEqual(samples1, samples2)
def test_app_not_replicate(self):
data = mock_labelled_collection()
p = APP(data, sample_size=5, n_prevalences=11, random_state=None)
samples1 = samples_to_str(p)
samples2 = samples_to_str(p)
self.assertNotEqual(samples1, samples2)
p = APP(data, sample_size=5, n_prevalences=11, random_state=42)
samples1 = samples_to_str(p)
p = APP(data, sample_size=5, n_prevalences=11, random_state=0)
samples2 = samples_to_str(p)
self.assertNotEqual(samples1, samples2)
def test_app_number(self):
@ -64,13 +78,26 @@ class TestProtocols(unittest.TestCase):
self.assertEqual(samples1, samples2)
def test_npp_not_replicate(self):
data = mock_labelled_collection()
p = NPP(data, sample_size=5, repeats=5)
p = NPP(data, sample_size=5, repeats=5) # <- random_state is by default set to 0
samples1 = samples_to_str(p)
samples2 = samples_to_str(p)
self.assertEqual(samples1, samples2)
def test_npp_not_replicate(self):
data = mock_labelled_collection()
p = NPP(data, sample_size=5, repeats=5, random_state=None)
samples1 = samples_to_str(p)
samples2 = samples_to_str(p)
self.assertNotEqual(samples1, samples2)
p = NPP(data, sample_size=5, repeats=5, random_state=42)
samples1 = samples_to_str(p)
p = NPP(data, sample_size=5, repeats=5, random_state=0)
samples2 = samples_to_str(p)
self.assertNotEqual(samples1, samples2)
def test_kraemer_replicate(self):
@ -82,9 +109,16 @@ class TestProtocols(unittest.TestCase):
self.assertEqual(samples1, samples2)
p = USimplexPP(data, sample_size=5, repeats=10) # <- random_state is by default set to 0
samples1 = samples_to_str(p)
samples2 = samples_to_str(p)
self.assertEqual(samples1, samples2)
def test_kraemer_not_replicate(self):
data = mock_labelled_collection()
p = USimplexPP(data, sample_size=5, repeats=10)
p = USimplexPP(data, sample_size=5, repeats=10, random_state=None)
samples1 = samples_to_str(p)
samples2 = samples_to_str(p)
@ -101,10 +135,17 @@ class TestProtocols(unittest.TestCase):
self.assertEqual(samples1, samples2)
p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11) # <- random_state is by default set to 0
samples1 = samples_to_str(p)
samples2 = samples_to_str(p)
self.assertEqual(samples1, samples2)
def test_covariate_shift_not_replicate(self):
dataA = mock_labelled_collection('domA')
dataB = mock_labelled_collection('domB')
p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11)
p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11, random_state=None)
samples1 = samples_to_str(p)
samples2 = samples_to_str(p)