diff --git a/quapy/classification/methods.py b/quapy/classification/methods.py index bf1d8d6..b313f57 100644 --- a/quapy/classification/methods.py +++ b/quapy/classification/methods.py @@ -32,7 +32,7 @@ class PCALR(BaseEstimator): self.pca = TruncatedSVD(self.n_components).fit(X, y) self.classes_ = self.learner.classes_ return self - + def predict(self, X): # X = self.transform(X) diff --git a/quapy/classification/svmperf.py b/quapy/classification/svmperf.py index a52d9ba..b5a4e85 100644 --- a/quapy/classification/svmperf.py +++ b/quapy/classification/svmperf.py @@ -58,6 +58,8 @@ class SVMperf(BaseEstimator, ClassifierMixin): if self.verbose: print('[Running]', cmd) p = subprocess.run(cmd.split(), stdout=PIPE, stderr=STDOUT) + if not exists(self.model): + print(p.stderr.decode('utf-8')) remove(traindat) if self.verbose: @@ -102,5 +104,5 @@ class SVMperf(BaseEstimator, ClassifierMixin): def __del__(self): if hasattr(self, 'tmpdir'): - shutil.rmtree(self.tmpdir) + pass # shutil.rmtree(self.tmpdir, ignore_errors=True) diff --git a/quapy/plot.py b/quapy/plot.py index ff93237..7b105e2 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -172,6 +172,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e # join all data, and keep the order in which the methods appeared for the first time data = defaultdict(lambda:{'x':np.empty(shape=(0)), 'y':np.empty(shape=(0))}) method_order = [] + for method, test_prevs_i, estim_prevs_i, tr_prev_i in zip(method_names, true_prevs, estim_prevs, tr_prevs): tr_prev_i = np.repeat(tr_prev_i.reshape(1,-1), repeats=test_prevs_i.shape[0], axis=0) @@ -185,6 +186,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e method_order.append(method) bins = np.linspace(0, 1, n_bins+1) + inds_histogram_global = np.zeros(n_bins, dtype=np.float) # we use this to keep track of how many datapoits contribute to each bin binwidth = 1 / n_bins min_x, max_x = None, None for method in method_order: @@ -194,6 +196,8 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e method_drifts=np.log(1+method_drifts) inds = np.digitize(tr_test_drifts, bins, right=True) + inds_histogram_global += np.histogram(tr_test_drifts, density=True, bins=bins)[0] + xs, ys, ystds = [], [], [] for ind in range(len(bins)): selected = inds==ind @@ -214,6 +218,11 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e if show_std: ax.fill_between(xs, ys-ystds, ys+ystds, alpha=0.25) + # xs = bins[:-1] + # ys = inds_histogram_global + # print(xs.shape, ys.shape) + # ax.errorbar(xs, ys, label='density') + ax.set(xlabel=f'Distribution shift between training set and test sample', ylabel=f'{error_name.upper()} (true distribution, predicted distribution)', title=title)