diff --git a/CHANGE_LOG.txt b/CHANGE_LOG.txt index 9f0b885..3c43e5d 100644 --- a/CHANGE_LOG.txt +++ b/CHANGE_LOG.txt @@ -1,6 +1,8 @@ Change Log 0.1.10 ----------------- +CLEAN TODO-FILE + - Base code Refactor: - Removing coupling between LabelledCollection and quantification methods. E.g.: def fit(data:LabelledCollection): -> def fit(X, y): diff --git a/TODO.txt b/TODO.txt index 1da8b68..8430eea 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,3 +1,18 @@ +Add the fix suggested by Alexander: + +For a more general application, I would maybe first stablish a per-class threshold value of plausible prevalence +based on the number of actual positives and the required sample size; e.g., for sample_size=100 and actual +positives [10, 100, 500] -> [0.1, 1.0, 1.0], meaning that class 0 can be sampled at most at 0.1 prevalence, while +the others can be sampled up to 1. prevalence. Then, when a prevalence value is requested, e.g., [0.33, 0.33, 0.33], +we may either clip each value and normalize (as you suggest for the extreme case, e.g., [0.1, 0.33, 0.33]/sum) or +scale each value by per-class thresholds, i.e., [0.33*0.1, 0.33*1, 0.33*1]/sum. +- This affects LabelledCollection +- This functionality should be accessible via sampling protocols and evaluation functions + +Solve the pre-trained classifier issues. An example is the coptic-codes script I did, which needed a mock_lr to +work for having access to classes_; think also the case in which the precomputed outputs are already generated +as in the unifying problems code. + Para quitar el labelledcollection de los métodos: - El follón viene por la semántica confusa de fit en agregativos, que recibe 3 parámetros: diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py index 71753c8..aa609bc 100644 --- a/quapy/tests/test_methods.py +++ b/quapy/tests/test_methods.py @@ -106,7 +106,7 @@ class TestMethods(unittest.TestCase): from quapy.method.meta import QuaNet model = QuaNet(learner, device='cpu', n_epochs=2, tr_iter_per_poch=10, va_iter_per_poch=10, patience=2) - model.fit(dataset.training) + model.fit(*dataset.training.Xy) estim_prevalences = model.predict(dataset.test.instances) self.assertTrue(check_prevalence_vector(estim_prevalences)) @@ -114,7 +114,7 @@ class TestMethods(unittest.TestCase): for dataset in TestMethods.datasets: for q in COMPOSABLE_METHODS: print('testing', q) - q.fit(dataset.training) + q.fit(*dataset.training.Xy) estim_prevalences = q.predict(dataset.test.X) self.assertTrue(check_prevalence_vector(estim_prevalences))