Merge branch 'master' of gitea-s2i2s.isti.cnr.it:moreo/QuaPy

2021-06-15 10:06:44 +02:00 · 2021-06-15 10:06:44 +02:00 · 2869effe22
parent fd55de4a36 e8c3e29911
commit 2869effe22
3 changed files with 13 additions and 2 deletions
--- a/quapy/classification/methods.py
+++ b/quapy/classification/methods.py
@ -32,7 +32,7 @@ class PCALR(BaseEstimator):
            self.pca = TruncatedSVD(self.n_components).fit(X, y)
        self.classes_ = self.learner.classes_
        return self
-    
+

    def predict(self, X):
        # X = self.transform(X)
--- a/quapy/classification/svmperf.py
+++ b/quapy/classification/svmperf.py
@ -58,6 +58,8 @@ class SVMperf(BaseEstimator, ClassifierMixin):
        if self.verbose:
            print('[Running]', cmd)
        p = subprocess.run(cmd.split(), stdout=PIPE, stderr=STDOUT)
+        if not exists(self.model):
+            print(p.stderr.decode('utf-8'))
        remove(traindat)

        if self.verbose:
@ -102,5 +104,5 @@ class SVMperf(BaseEstimator, ClassifierMixin):

    def __del__(self):
        if hasattr(self, 'tmpdir'):
-            shutil.rmtree(self.tmpdir)
+            pass # shutil.rmtree(self.tmpdir, ignore_errors=True)

--- a/quapy/plot.py
+++ b/quapy/plot.py
@ -172,6 +172,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e
    # join all data, and keep the order in which the methods appeared for the first time
    data = defaultdict(lambda:{'x':np.empty(shape=(0)), 'y':np.empty(shape=(0))})
    method_order = []
+
    for method, test_prevs_i, estim_prevs_i, tr_prev_i in zip(method_names, true_prevs, estim_prevs, tr_prevs):
        tr_prev_i = np.repeat(tr_prev_i.reshape(1,-1), repeats=test_prevs_i.shape[0], axis=0)

@ -185,6 +186,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e
            method_order.append(method)

    bins = np.linspace(0, 1, n_bins+1)
+    inds_histogram_global = np.zeros(n_bins, dtype=np.float)  # we use this to keep track of how many datapoits contribute to each bin
    binwidth = 1 / n_bins
    min_x, max_x = None, None
    for method in method_order:
@ -194,6 +196,8 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e
            method_drifts=np.log(1+method_drifts)

        inds = np.digitize(tr_test_drifts, bins, right=True)
+        inds_histogram_global += np.histogram(tr_test_drifts, density=True, bins=bins)[0]
+
        xs, ys, ystds = [], [], []
        for ind in range(len(bins)):
            selected = inds==ind
@ -214,6 +218,11 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e
        if show_std:
            ax.fill_between(xs, ys-ystds, ys+ystds, alpha=0.25)

+    # xs = bins[:-1]
+    # ys = inds_histogram_global
+    # print(xs.shape, ys.shape)
+    # ax.errorbar(xs, ys, label='density')
+
    ax.set(xlabel=f'Distribution shift between training set and test sample',
           ylabel=f'{error_name.upper()} (true distribution, predicted distribution)',
           title=title)