180 lines
6.0 KiB
Python
180 lines
6.0 KiB
Python
import unittest
|
|
import numpy as np
|
|
from quapy.data import LabelledCollection
|
|
from quapy.protocol import APP, NPP, USimplexPP, DomainMixer, AbstractStochasticSeededProtocol
|
|
|
|
|
|
def mock_labelled_collection(prefix=''):
|
|
y = [0] * 250 + [1] * 250 + [2] * 250 + [3] * 250
|
|
X = [prefix + str(i) + '-' + str(yi) for i, yi in enumerate(y)]
|
|
return LabelledCollection(X, y, classes=sorted(np.unique(y)))
|
|
|
|
|
|
def samples_to_str(protocol):
|
|
samples_str = ""
|
|
for instances, prev in protocol():
|
|
samples_str += f'{instances}\t{prev}\n'
|
|
return samples_str
|
|
|
|
|
|
class TestProtocols(unittest.TestCase):
|
|
|
|
def test_app_replicate(self):
|
|
data = mock_labelled_collection()
|
|
p = APP(data, sample_size=5, n_prevalences=11, random_state=42)
|
|
|
|
samples1 = samples_to_str(p)
|
|
samples2 = samples_to_str(p)
|
|
|
|
self.assertEqual(samples1, samples2)
|
|
|
|
p = APP(data, sample_size=5, n_prevalences=11) # <- random_state is by default set to 0
|
|
|
|
samples1 = samples_to_str(p)
|
|
samples2 = samples_to_str(p)
|
|
|
|
self.assertEqual(samples1, samples2)
|
|
|
|
def test_app_not_replicate(self):
|
|
data = mock_labelled_collection()
|
|
p = APP(data, sample_size=5, n_prevalences=11, random_state=None)
|
|
|
|
samples1 = samples_to_str(p)
|
|
samples2 = samples_to_str(p)
|
|
|
|
self.assertNotEqual(samples1, samples2)
|
|
|
|
p = APP(data, sample_size=5, n_prevalences=11, random_state=42)
|
|
samples1 = samples_to_str(p)
|
|
p = APP(data, sample_size=5, n_prevalences=11, random_state=0)
|
|
samples2 = samples_to_str(p)
|
|
|
|
self.assertNotEqual(samples1, samples2)
|
|
|
|
def test_app_number(self):
|
|
data = mock_labelled_collection()
|
|
p = APP(data, sample_size=100, n_prevalences=10, repeats=1)
|
|
|
|
# surprisingly enough, for some n_prevalences the test fails, notwithstanding
|
|
# everything is correct. The problem is that in function APP.prevalence_grid()
|
|
# there is sometimes one rounding error that gets cumulated and
|
|
# surpasses 1.0 (by a very small float value, 0.0000000000002 or sthe like)
|
|
# so these tuples are mistakenly removed... I have tried with np.close, and
|
|
# other workarounds, but eventually happens that there is some negative probability
|
|
# in the sampling function...
|
|
|
|
count = 0
|
|
for _ in p():
|
|
count+=1
|
|
|
|
self.assertEqual(count, p.total())
|
|
|
|
def test_npp_replicate(self):
|
|
data = mock_labelled_collection()
|
|
p = NPP(data, sample_size=5, repeats=5, random_state=42)
|
|
|
|
samples1 = samples_to_str(p)
|
|
samples2 = samples_to_str(p)
|
|
|
|
self.assertEqual(samples1, samples2)
|
|
|
|
p = NPP(data, sample_size=5, repeats=5) # <- random_state is by default set to 0
|
|
|
|
samples1 = samples_to_str(p)
|
|
samples2 = samples_to_str(p)
|
|
|
|
self.assertEqual(samples1, samples2)
|
|
|
|
def test_npp_not_replicate(self):
|
|
data = mock_labelled_collection()
|
|
p = NPP(data, sample_size=5, repeats=5, random_state=None)
|
|
|
|
samples1 = samples_to_str(p)
|
|
samples2 = samples_to_str(p)
|
|
|
|
self.assertNotEqual(samples1, samples2)
|
|
|
|
p = NPP(data, sample_size=5, repeats=5, random_state=42)
|
|
samples1 = samples_to_str(p)
|
|
p = NPP(data, sample_size=5, repeats=5, random_state=0)
|
|
samples2 = samples_to_str(p)
|
|
self.assertNotEqual(samples1, samples2)
|
|
|
|
def test_kraemer_replicate(self):
|
|
data = mock_labelled_collection()
|
|
p = USimplexPP(data, sample_size=5, repeats=10, random_state=42)
|
|
|
|
samples1 = samples_to_str(p)
|
|
samples2 = samples_to_str(p)
|
|
|
|
self.assertEqual(samples1, samples2)
|
|
|
|
p = USimplexPP(data, sample_size=5, repeats=10) # <- random_state is by default set to 0
|
|
|
|
samples1 = samples_to_str(p)
|
|
samples2 = samples_to_str(p)
|
|
|
|
self.assertEqual(samples1, samples2)
|
|
|
|
def test_kraemer_not_replicate(self):
|
|
data = mock_labelled_collection()
|
|
p = USimplexPP(data, sample_size=5, repeats=10, random_state=None)
|
|
|
|
samples1 = samples_to_str(p)
|
|
samples2 = samples_to_str(p)
|
|
|
|
self.assertNotEqual(samples1, samples2)
|
|
|
|
def test_covariate_shift_replicate(self):
|
|
dataA = mock_labelled_collection('domA')
|
|
dataB = mock_labelled_collection('domB')
|
|
p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11, random_state=1)
|
|
|
|
samples1 = samples_to_str(p)
|
|
samples2 = samples_to_str(p)
|
|
|
|
self.assertEqual(samples1, samples2)
|
|
|
|
p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11) # <- random_state is by default set to 0
|
|
|
|
samples1 = samples_to_str(p)
|
|
samples2 = samples_to_str(p)
|
|
|
|
self.assertEqual(samples1, samples2)
|
|
|
|
def test_covariate_shift_not_replicate(self):
|
|
dataA = mock_labelled_collection('domA')
|
|
dataB = mock_labelled_collection('domB')
|
|
p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11, random_state=None)
|
|
|
|
samples1 = samples_to_str(p)
|
|
samples2 = samples_to_str(p)
|
|
|
|
self.assertNotEqual(samples1, samples2)
|
|
|
|
def test_no_seed_init(self):
|
|
class NoSeedInit(AbstractStochasticSeededProtocol):
|
|
def __init__(self):
|
|
self.data = mock_labelled_collection()
|
|
|
|
def samples_parameters(self):
|
|
# return a matrix containing sampling indexes in the rows
|
|
return np.random.randint(0, len(self.data), 10*10).reshape(10, 10)
|
|
|
|
def sample(self, params):
|
|
index = np.unique(params)
|
|
return self.data.sampling_from_index(index)
|
|
|
|
p = NoSeedInit()
|
|
|
|
# this should raise a ValueError, since the class is said to be AbstractStochasticSeededProtocol but the
|
|
# random_seed has never been passed to super(NoSeedInit, self).__init__(random_seed)
|
|
with self.assertRaises(ValueError):
|
|
for sample in p():
|
|
pass
|
|
print('done')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|