From a6c69264b65f51a73edc74aba246177b4ca75fd1 Mon Sep 17 00:00:00 2001 From: Lorenzo Volpi Date: Fri, 27 Oct 2023 17:05:01 +0200 Subject: [PATCH] TODO updated, dataset imdb parameter added --- TODO.md | 5 +++++ quacc/dataset.py | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/TODO.md b/TODO.md index f5ce0a2..e2a47de 100644 --- a/TODO.md +++ b/TODO.md @@ -29,3 +29,8 @@ - [x] salvare il best score ottenuto da ogni applicazione di GridSearchQ - nel caso di bin fare media dei due best score - [x] import baselines + +- [ ] testare anche su imbd +- [ ] plot avg con train prevalence sull'asse x e media su test prevalecne +- [ ] realizzare grid search per task specifico partendo da GridSearchhQ +- [ ] provare PACC come quantificatore \ No newline at end of file diff --git a/quacc/dataset.py b/quacc/dataset.py index 9362da8..2d3228c 100644 --- a/quacc/dataset.py +++ b/quacc/dataset.py @@ -42,8 +42,9 @@ class Dataset: def __spambase(self): return qp.datasets.fetch_UCIDataset("spambase", verbose=False).train_test + # provare min_df=5 def __imdb(self): - return qp.datasets.fetch_reviews("imdb", tfidf=True).train_test + return qp.datasets.fetch_reviews("imdb", tfidf=True, min_df=3).train_test def __rcv1(self): n_train = 23149