diff --git a/src/data/__pycache__/__init__.cpython-37.pyc b/src/data/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index 12306d0..0000000 Binary files a/src/data/__pycache__/__init__.cpython-37.pyc and /dev/null differ diff --git a/src/data/__pycache__/embeddings.cpython-37.pyc b/src/data/__pycache__/embeddings.cpython-37.pyc deleted file mode 100644 index 88eb734..0000000 Binary files a/src/data/__pycache__/embeddings.cpython-37.pyc and /dev/null differ diff --git a/src/data/__pycache__/languages.cpython-37.pyc b/src/data/__pycache__/languages.cpython-37.pyc deleted file mode 100644 index 0526949..0000000 Binary files a/src/data/__pycache__/languages.cpython-37.pyc and /dev/null differ diff --git a/src/data/__pycache__/supervised.cpython-37.pyc b/src/data/__pycache__/supervised.cpython-37.pyc deleted file mode 100644 index 3ac82eb..0000000 Binary files a/src/data/__pycache__/supervised.cpython-37.pyc and /dev/null differ diff --git a/src/data/__pycache__/text_preprocessor.cpython-37.pyc b/src/data/__pycache__/text_preprocessor.cpython-37.pyc deleted file mode 100644 index 81f2690..0000000 Binary files a/src/data/__pycache__/text_preprocessor.cpython-37.pyc and /dev/null differ diff --git a/src/data/__pycache__/tsr_function__.cpython-37.pyc b/src/data/__pycache__/tsr_function__.cpython-37.pyc deleted file mode 100644 index b695d10..0000000 Binary files a/src/data/__pycache__/tsr_function__.cpython-37.pyc and /dev/null differ diff --git a/src/data/reader/__pycache__/__init__.cpython-37.pyc b/src/data/reader/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index 317ad21..0000000 Binary files a/src/data/reader/__pycache__/__init__.cpython-37.pyc and /dev/null differ diff --git a/src/data/reader/__pycache__/jrcacquis_reader.cpython-37.pyc b/src/data/reader/__pycache__/jrcacquis_reader.cpython-37.pyc deleted file mode 100644 index 939ec96..0000000 Binary files a/src/data/reader/__pycache__/jrcacquis_reader.cpython-37.pyc and /dev/null differ diff --git a/src/data/reader/__pycache__/rcv_reader.cpython-37.pyc b/src/data/reader/__pycache__/rcv_reader.cpython-37.pyc deleted file mode 100644 index 104401d..0000000 Binary files a/src/data/reader/__pycache__/rcv_reader.cpython-37.pyc and /dev/null differ diff --git a/src/data/reader/__pycache__/wikipedia_tools.cpython-37.pyc b/src/data/reader/__pycache__/wikipedia_tools.cpython-37.pyc deleted file mode 100644 index cc4feef..0000000 Binary files a/src/data/reader/__pycache__/wikipedia_tools.cpython-37.pyc and /dev/null differ diff --git a/src/learning/__pycache__/learners.cpython-37.pyc b/src/learning/__pycache__/learners.cpython-37.pyc deleted file mode 100644 index 30ddb23..0000000 Binary files a/src/learning/__pycache__/learners.cpython-37.pyc and /dev/null differ diff --git a/src/util/__pycache__/evaluation.cpython-37.pyc b/src/util/__pycache__/evaluation.cpython-37.pyc deleted file mode 100644 index c9c1d30..0000000 Binary files a/src/util/__pycache__/evaluation.cpython-37.pyc and /dev/null differ diff --git a/src/util/__pycache__/file.cpython-37.pyc b/src/util/__pycache__/file.cpython-37.pyc deleted file mode 100644 index 79bb263..0000000 Binary files a/src/util/__pycache__/file.cpython-37.pyc and /dev/null differ diff --git a/src/util/__pycache__/metrics.cpython-37.pyc b/src/util/__pycache__/metrics.cpython-37.pyc deleted file mode 100644 index 4bfc8ce..0000000 Binary files a/src/util/__pycache__/metrics.cpython-37.pyc and /dev/null differ diff --git a/src/util/__pycache__/results.cpython-37.pyc b/src/util/__pycache__/results.cpython-37.pyc deleted file mode 100644 index 0ba8b91..0000000 Binary files a/src/util/__pycache__/results.cpython-37.pyc and /dev/null differ diff --git a/src/util/util.py b/src/util/util.py index e69de29..0a3da19 100644 --- a/src/util/util.py +++ b/src/util/util.py @@ -0,0 +1,11 @@ +from tqdm import tqdm +import re +import sys + + +def mask_numbers(data, number_mask='numbermask'): + mask = re.compile(r'\b[0-9][0-9.,-]*\b') + masked = [] + for text in tqdm(data, desc='masking numbers'): + masked.append(mask.sub(number_mask, text)) + return masked \ No newline at end of file