diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..1ba6d09
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,33 @@
+name: CI
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+      - devel
+
+jobs:
+
+  # take out unit tests
+  test:
+    name: Unit tests (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version:
+        - "3.11"
+    env:
+      QUAPY_TESTS_OMIT_LARGE_DATASETS: True
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip setuptools wheel
+        python -m pip install -e .[bayes,tests]
+    - name: Test with unittest
+      run: python -m unittest
diff --git a/quapy/data/base.py b/quapy/data/base.py
index e52230e..ceb7402 100644
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@@ -549,7 +549,7 @@ class Dataset:
             yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
 
 
-    def reduce(self, n_train=100, n_test=100):
+    def reduce(self, n_train=100, n_test=100, random_state=None):
         """
         Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set.
 
@@ -557,6 +557,14 @@ class Dataset:
         :param n_test: number of test documents to keep (default 100)
         :return: self
         """
-        self.training = self.training.sampling(n_train, *self.training.prevalence())
-        self.test = self.test.sampling(n_test, *self.test.prevalence())
+        self.training = self.training.sampling(
+            n_train,
+            *self.training.prevalence(),
+            random_state = random_state
+        )
+        self.test = self.test.sampling(
+            n_test,
+            *self.test.prevalence(),
+            random_state = random_state
+        )
         return self
\ No newline at end of file
diff --git a/quapy/tests/test_datasets.py b/quapy/tests/test_datasets.py
index daa9207..a8587b2 100644
--- a/quapy/tests/test_datasets.py
+++ b/quapy/tests/test_datasets.py
@@ -1,3 +1,4 @@
+import os
 import unittest
 
 from sklearn.feature_extraction.text import TfidfVectorizer
@@ -77,6 +78,9 @@ class TestDatasets(unittest.TestCase):
             self._check_dataset(dataset)
 
     def test_lequa2022(self):
+        if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
+            print("omitting test_lequa2022 because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
+            return
 
         for dataset_name in LEQUA2022_VECTOR_TASKS:
             print(f'loading dataset {dataset_name}...', end='')
@@ -104,6 +108,10 @@ class TestDatasets(unittest.TestCase):
 
 
     def test_IFCB(self):
+        if os.environ.get('QUAPY_TESTS_OMIT_LARGE_DATASETS'):
+            print("omitting test_IFCB because QUAPY_TESTS_OMIT_LARGE_DATASETS is set")
+            return
+
         print(f'loading dataset IFCB.')
         for mod_sel in [False, True]:
             train, gen = fetch_IFCB(single_sample_train=True, for_model_selection=mod_sel)
diff --git a/quapy/tests/test_modsel.py b/quapy/tests/test_modsel.py
index fe416c7..64b0ff4 100644
--- a/quapy/tests/test_modsel.py
+++ b/quapy/tests/test_modsel.py
@@ -4,7 +4,6 @@ import numpy as np
 from sklearn.linear_model import LogisticRegression
 
 import quapy as qp
-import util
 from quapy.method.aggregative import PACC
 from quapy.model_selection import GridSearchQ
 from quapy.protocol import APP
@@ -20,7 +19,7 @@ class ModselTestCase(unittest.TestCase):
 
         q = PACC(LogisticRegression(random_state=1, max_iter=5000))
 
-        data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce()
+        data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(random_state=1)
         training, validation = data.training.split_stratified(0.7, random_state=1)
 
         param_grid = {'classifier__C': [0.000001, 10.]}
@@ -42,7 +41,7 @@ class ModselTestCase(unittest.TestCase):
 
         q = PACC(LogisticRegression(random_state=1, max_iter=5000))
 
-        data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(n_train=500)
+        data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(n_train=500, random_state=1)
         training, validation = data.training.split_stratified(0.7, random_state=1)
 
         param_grid = {'classifier__C': np.logspace(-3,3,7)}
@@ -80,7 +79,7 @@ class ModselTestCase(unittest.TestCase):
 
         q = PACC(SlowLR())
 
-        data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce()
+        data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10).reduce(random_state=1)
         training, validation = data.training.split_stratified(0.7, random_state=1)
 
         param_grid = {'classifier__C': np.logspace(-1,1,3)}
diff --git a/setup.py b/setup.py
index 1f6c6fb..9f7df5c 100644
--- a/setup.py
+++ b/setup.py
@@ -125,6 +125,7 @@ setup(
     # projects.
     extras_require={  # Optional
        'bayes': ['jax', 'jaxlib', 'numpyro'],
+       'tests': ['certifi'],
     },
 
     # If there are data files included in your packages that need to be