From d86c402916fd895d38ffa3ab654ec8399874a553 Mon Sep 17 00:00:00 2001 From: Andrea Esuli Date: Thu, 29 Apr 2021 16:07:39 +0200 Subject: [PATCH 01/14] Added first tests --- quapy/tests/__init__.py | 0 quapy/tests/test_base.py | 5 +++++ quapy/tests/test_datasets.py | 18 ++++++++++++++++++ quapy/tests/test_methods.py | 29 +++++++++++++++++++++++++++++ 4 files changed, 52 insertions(+) create mode 100644 quapy/tests/__init__.py create mode 100644 quapy/tests/test_base.py create mode 100644 quapy/tests/test_datasets.py create mode 100644 quapy/tests/test_methods.py diff --git a/quapy/tests/__init__.py b/quapy/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/quapy/tests/test_base.py b/quapy/tests/test_base.py new file mode 100644 index 0000000..4fd9faa --- /dev/null +++ b/quapy/tests/test_base.py @@ -0,0 +1,5 @@ +import pytest + +def test_import(): + import quapy as qp + assert qp.__version__ is not None diff --git a/quapy/tests/test_datasets.py b/quapy/tests/test_datasets.py new file mode 100644 index 0000000..3f8f673 --- /dev/null +++ b/quapy/tests/test_datasets.py @@ -0,0 +1,18 @@ +import pytest + +from quapy.data.datasets import REVIEWS_SENTIMENT_DATASETS, TWITTER_SENTIMENT_DATASETS_TEST, \ + TWITTER_SENTIMENT_DATASETS_TRAIN, UCI_DATASETS, fetch_reviews, fetch_twitter, fetch_UCIDataset + + +@pytest.mark.parametrize('dataset_name', REVIEWS_SENTIMENT_DATASETS) +def test_fetch_reviews(dataset_name): + fetch_reviews(dataset_name) + + +@pytest.mark.parametrize('dataset_name', TWITTER_SENTIMENT_DATASETS_TEST + TWITTER_SENTIMENT_DATASETS_TRAIN) +def test_fetch_twitter(dataset_name): + fetch_twitter(dataset_name) + +@pytest.mark.parametrize('dataset_name', UCI_DATASETS) +def test_fetch_UCIDataset(dataset_name): + fetch_UCIDataset(dataset_name) diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py new file mode 100644 index 0000000..660c45e --- /dev/null +++ b/quapy/tests/test_methods.py @@ -0,0 +1,29 @@ +import numpy +import pytest +from sklearn.linear_model import LogisticRegression +from sklearn.naive_bayes import MultinomialNB +from sklearn.svm import LinearSVC + +import quapy as qp + +datasets = [qp.datasets.fetch_twitter('semeval16')] + +aggregative_methods = [qp.method.aggregative.CC, qp.method.aggregative.ACC, qp.method.aggregative.ELM] + +learners = [LogisticRegression, MultinomialNB, LinearSVC] + + +@pytest.mark.parametrize('dataset', datasets) +@pytest.mark.parametrize('aggregative_method', aggregative_methods) +@pytest.mark.parametrize('learner', learners) +def test_aggregative_methods(dataset, aggregative_method, learner): + model = aggregative_method(learner()) + + model.fit(dataset.training) + + estim_prevalences = model.quantify(dataset.test.instances) + + true_prevalences = dataset.test.prevalence() + error = qp.error.mae(true_prevalences, estim_prevalences) + + assert type(error) == numpy.float64 From 44cec7a0462cbcd7c603f3c9c4416e7f614b9800 Mon Sep 17 00:00:00 2001 From: Andrea Esuli Date: Fri, 30 Apr 2021 17:00:46 +0200 Subject: [PATCH 02/14] Added encoding option with default to utf-8. --- quapy/data/reader.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/quapy/data/reader.py b/quapy/data/reader.py index 743b99e..5b4d115 100644 --- a/quapy/data/reader.py +++ b/quapy/data/reader.py @@ -3,7 +3,7 @@ from scipy.sparse import dok_matrix from tqdm import tqdm -def from_text(path): +def from_text(path, encoding='utf-8'): """ Reas a labelled colletion of documents. File fomart <0 or 1>\t\n @@ -11,7 +11,7 @@ def from_text(path): :return: a list of sentences, and a list of labels """ all_sentences, all_labels = [], [] - for line in tqdm(open(path, 'rt').readlines(), f'loading {path}'): + for line in tqdm(open(path, 'rt', encoding=encoding).readlines(), f'loading {path}'): line = line.strip() if line: label, sentence = line.split('\t') @@ -25,8 +25,8 @@ def from_text(path): def from_sparse(path): """ - Reas a labelled colletion of real-valued instances expressed in sparse format - File fomart <-1 or 0 or 1>[\s col(int):val(float)]\n + Reads a labelled collection of real-valued instances expressed in sparse format + File format <-1 or 0 or 1>[\s col(int):val(float)]\n :param path: path to the labelled collection :return: a csr_matrix containing the instances (rows), and a ndarray containing the labels """ @@ -56,16 +56,16 @@ def from_sparse(path): return X, y -def from_csv(path): +def from_csv(path, encoding='utf-8'): """ - Reas a csv file in which columns are separated by ','. - File fomart