QuaPy/quapy/tests/test_replicability.py

112 lines
3.8 KiB
Python
Raw Normal View History

import unittest
import quapy as qp
2023-02-09 19:39:16 +01:00
from quapy.data import LabelledCollection
from quapy.functional import strprev
from sklearn.linear_model import LogisticRegression
2024-01-29 09:43:29 +01:00
import numpy as np
2023-02-09 19:39:16 +01:00
from quapy.method.aggregative import PACC
2024-01-29 09:43:29 +01:00
import quapy.functional as F
class MyTestCase(unittest.TestCase):
2024-01-29 09:43:29 +01:00
2023-02-09 19:39:16 +01:00
def test_prediction_replicability(self):
2024-02-07 18:31:34 +01:00
dataset = qp.datasets.fetch_UCIBinaryDataset('yeast')
with qp.util.temp_seed(0):
lr = LogisticRegression(random_state=0, max_iter=10000)
pacc = PACC(lr)
prev = pacc.fit(dataset.training).quantify(dataset.test.X)
str_prev1 = strprev(prev, prec=5)
with qp.util.temp_seed(0):
lr = LogisticRegression(random_state=0, max_iter=10000)
pacc = PACC(lr)
prev2 = pacc.fit(dataset.training).quantify(dataset.test.X)
str_prev2 = strprev(prev2, prec=5)
self.assertEqual(str_prev1, str_prev2) # add assertion here
2024-01-29 09:43:29 +01:00
2023-02-09 19:39:16 +01:00
def test_samping_replicability(self):
def equal_collections(c1, c2, value=True):
2024-02-14 12:27:19 +01:00
self.assertEqual(np.all(c1.X == c2.X), value)
self.assertEqual(np.all(c1.y == c2.y), value)
2023-02-09 19:39:16 +01:00
if value:
self.assertEqual(np.all(c1.classes_ == c2.classes_), value)
X = list(map(str, range(100)))
y = np.random.randint(0, 2, 100)
data = LabelledCollection(instances=X, labels=y)
sample1 = data.sampling(50)
sample2 = data.sampling(50)
equal_collections(sample1, sample2, False)
sample1 = data.sampling(50, random_state=0)
sample2 = data.sampling(50, random_state=0)
equal_collections(sample1, sample2, True)
sample1 = data.sampling(50, *[0.7, 0.3], random_state=0)
sample2 = data.sampling(50, *[0.7, 0.3], random_state=0)
equal_collections(sample1, sample2, True)
with qp.util.temp_seed(0):
sample1 = data.sampling(50, *[0.7, 0.3])
with qp.util.temp_seed(0):
sample2 = data.sampling(50, *[0.7, 0.3])
equal_collections(sample1, sample2, True)
sample1 = data.sampling(50, *[0.7, 0.3], random_state=0)
sample2 = data.sampling(50, *[0.7, 0.3], random_state=0)
equal_collections(sample1, sample2, True)
sample1_tr, sample1_te = data.split_stratified(train_prop=0.7, random_state=0)
sample2_tr, sample2_te = data.split_stratified(train_prop=0.7, random_state=0)
equal_collections(sample1_tr, sample2_tr, True)
equal_collections(sample1_te, sample2_te, True)
with qp.util.temp_seed(0):
sample1_tr, sample1_te = data.split_stratified(train_prop=0.7)
with qp.util.temp_seed(0):
sample2_tr, sample2_te = data.split_stratified(train_prop=0.7)
equal_collections(sample1_tr, sample2_tr, True)
equal_collections(sample1_te, sample2_te, True)
2024-01-29 09:43:29 +01:00
def test_parallel_replicability(self):
train, test = qp.datasets.fetch_UCIMulticlassDataset('dry-bean').train_test
test = test.sampling(500, *[0.1, 0.0, 0.1, 0.1, 0.2, 0.5, 0.0])
with qp.util.temp_seed(10):
pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2)
pacc.fit(train, val_split=0.5)
prev1 = F.strprev(pacc.quantify(test.instances))
with qp.util.temp_seed(0):
pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2)
pacc.fit(train, val_split=0.5)
prev2 = F.strprev(pacc.quantify(test.instances))
with qp.util.temp_seed(0):
pacc = PACC(LogisticRegression(), val_split=2, n_jobs=2)
pacc.fit(train, val_split=0.5)
prev3 = F.strprev(pacc.quantify(test.instances))
print(prev1)
print(prev2)
print(prev3)
self.assertNotEqual(prev1, prev2)
self.assertEqual(prev2, prev3)
if __name__ == '__main__':
unittest.main()