QuaPy/quapy/tests/test_datasets.py

54 lines
2.0 KiB
Python
Raw Normal View History

2021-04-29 16:07:39 +02:00
import pytest
from quapy.data.datasets import REVIEWS_SENTIMENT_DATASETS, TWITTER_SENTIMENT_DATASETS_TEST, \
2022-06-01 18:28:59 +02:00
TWITTER_SENTIMENT_DATASETS_TRAIN, UCI_DATASETS, LEQUA2022_TASKS, \
fetch_reviews, fetch_twitter, fetch_UCIDataset, fetch_lequa2022
2021-04-29 16:07:39 +02:00
@pytest.mark.parametrize('dataset_name', REVIEWS_SENTIMENT_DATASETS)
def test_fetch_reviews(dataset_name):
2021-04-30 17:22:58 +02:00
dataset = fetch_reviews(dataset_name)
2021-05-05 17:12:44 +02:00
print(f'Dataset {dataset_name}')
print('Training set stats')
dataset.training.stats()
print('Test set stats')
dataset.test.stats()
2021-04-29 16:07:39 +02:00
@pytest.mark.parametrize('dataset_name', TWITTER_SENTIMENT_DATASETS_TEST + TWITTER_SENTIMENT_DATASETS_TRAIN)
def test_fetch_twitter(dataset_name):
2021-04-30 17:22:58 +02:00
try:
dataset = fetch_twitter(dataset_name)
except ValueError as ve:
if dataset_name == 'semeval' and ve.args[0].startswith(
'dataset "semeval" can only be used for model selection.'):
dataset = fetch_twitter(dataset_name, for_model_selection=True)
2021-05-05 17:12:44 +02:00
print(f'Dataset {dataset_name}')
print('Training set stats')
dataset.training.stats()
print('Test set stats')
2021-04-29 16:07:39 +02:00
2021-04-30 17:22:58 +02:00
@pytest.mark.parametrize('dataset_name', UCI_DATASETS)
2021-04-29 16:07:39 +02:00
def test_fetch_UCIDataset(dataset_name):
2021-04-30 17:22:58 +02:00
try:
dataset = fetch_UCIDataset(dataset_name)
except FileNotFoundError as fnfe:
if dataset_name == 'pageblocks.5' and fnfe.args[0].find(
'If this is the first time you attempt to load this dataset') > 0:
2021-05-05 17:12:44 +02:00
print('The pageblocks.5 dataset requires some hand processing to be usable, skipping this test.')
2021-04-30 17:22:58 +02:00
return
2021-05-05 17:12:44 +02:00
print(f'Dataset {dataset_name}')
print('Training set stats')
dataset.training.stats()
print('Test set stats')
2022-06-01 18:28:59 +02:00
@pytest.mark.parametrize('dataset_name', LEQUA2022_TASKS)
def test_fetch_lequa2022(dataset_name):
fetch_lequa2022(dataset_name)
# dataset = fetch_lequa2022(dataset_name)
# print(f'Dataset {dataset_name}')
# print('Training set stats')
# dataset.training.stats()
# print('Test set stats')