diff --git a/quacc/main_test.py b/quacc/main_test.py deleted file mode 100644 index 488f6fe..0000000 --- a/quacc/main_test.py +++ /dev/null @@ -1,137 +0,0 @@ -import logging -from logging.handlers import QueueHandler -from multiprocessing import Manager, Queue -from threading import Thread -from time import sleep, time - -import numpy as np -import scipy.sparse as sp -from joblib import Parallel, delayed -from quapy.protocol import APP -from sklearn.linear_model import LinearRegression, LogisticRegression -from sklearn.metrics import accuracy_score - -from baselines.mandoline import estimate_performance -from quacc.dataset import Dataset -from quacc.logger import logger, logger_manager, setup_logger, setup_worker_logger - - -def test_lr(): - d = Dataset(name="rcv1", target="CCAT", n_prevalences=1).get_raw() - - classifier = LogisticRegression() - classifier.fit(*d.train.Xy) - - val, _ = d.validation.split_stratified(0.5, random_state=0) - val_X, val_y = val.X, val.y - val_probs = classifier.predict_proba(val_X) - - reg_X = sp.hstack([val_X, val_probs]) - reg_y = val_probs[np.arange(val_probs.shape[0]), val_y] - reg = LinearRegression() - reg.fit(reg_X, reg_y) - - _test_num = 10000 - test_X = d.test.X[:_test_num, :] - test_probs = classifier.predict_proba(test_X) - test_reg_X = sp.hstack([test_X, test_probs]) - reg_pred = reg.predict(test_reg_X) - - def threshold(pred): - # return np.mean( - # (reg.predict(test_reg_X) >= pred) - # == ( - # test_probs[np.arange(_test_num), d.test.y[:_test_num]] == np.max(test_probs, axis=1) - # ) - # ) - return np.mean( - (reg.predict(test_reg_X) >= pred) - == (np.argmax(test_probs, axis=1) == d.test.y[:_test_num]) - ) - - max_p, max_acc = 0, 0 - for p in reg_pred: - acc = threshold(p) - if acc > max_acc: - max_acc = acc - max_p = p - - print(f"{max_p = }, {max_acc = }") - reg_pred = reg_pred - max_p + 0.5 - print(reg_pred) - print(np.mean(reg_pred >= 0.5)) - print(np.mean(np.argmax(test_probs, axis=1) == d.test.y[:_test_num])) - - -def entropy(probas): - return -np.sum(np.multiply(probas, np.log(probas + 1e-20)), axis=1) - - -def get_slices(probas): - ln, ncl = probas.shape - preds = np.argmax(probas, axis=1) - pred_slices = np.full((ln, ncl), fill_value=-1, dtype="