From 2a5d0243db7b7e5c89ae1fdca1944d7e4b3a2455 Mon Sep 17 00:00:00 2001 From: andrea Date: Thu, 28 Nov 2019 18:39:19 +0100 Subject: [PATCH] first commit --- .idea/deployment.xml | 20 + .idea/encodings.xml | 4 + .idea/misc.xml | 10 + .idea/modules.xml | 8 + .idea/tesi_funneling.iml | 14 + .idea/vcs.xml | 6 + .idea/webServers.xml | 15 + .idea/workspace.xml | 655 ++++++++++++++++++ src/.gitignore | 1 + src/FPEC_andrea.py | 151 ++++ .../dataset_builder.cpython-37.pyc | Bin 0 -> 24002 bytes src/data/__init__.py | 0 src/data/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 144 bytes .../__pycache__/embeddings.cpython-37.pyc | Bin 0 -> 9387 bytes src/data/__pycache__/languages.cpython-37.pyc | Bin 0 -> 1048 bytes .../__pycache__/supervised.cpython-37.pyc | Bin 0 -> 2286 bytes .../text_preprocessor.cpython-37.pyc | Bin 0 -> 1698 bytes .../__pycache__/tsr_function__.cpython-37.pyc | Bin 0 -> 9820 bytes src/data/embeddings.py | 196 ++++++ src/data/languages.py | 42 ++ src/data/reader/__init__.py | 0 .../__pycache__/__init__.cpython-37.pyc | Bin 0 -> 151 bytes .../jrcacquis_reader.cpython-37.pyc | Bin 0 -> 10949 bytes .../__pycache__/rcv_reader.cpython-37.pyc | Bin 0 -> 6717 bytes .../wikipedia_tools.cpython-37.pyc | Bin 0 -> 10598 bytes src/data/reader/jrcacquis_reader.py | 321 +++++++++ src/data/reader/rcv_reader.py | 225 ++++++ src/data/reader/wikipedia_tools.py | 304 ++++++++ src/data/supervised.py | 75 ++ src/data/text_preprocessor.py | 33 + src/data/tsr_function__.py | 270 ++++++++ src/dataset_builder.py | 567 +++++++++++++++ .../__pycache__/learners.cpython-37.pyc | Bin 0 -> 27631 bytes src/learning/learners.py | 646 +++++++++++++++++ src/results/results.csv | 7 + src/transformers/__init__.py | 0 src/transformers/clesa.py | 110 +++ src/transformers/dci.py | 154 ++++ src/transformers/riboc.py | 53 ++ .../__pycache__/evaluation.cpython-37.pyc | Bin 0 -> 4195 bytes src/util/__pycache__/file.cpython-37.pyc | Bin 0 -> 1718 bytes src/util/__pycache__/metrics.cpython-37.pyc | Bin 0 -> 6430 bytes src/util/__pycache__/results.cpython-37.pyc | Bin 0 -> 2041 bytes src/util/evaluation.py | 95 +++ src/util/file.py | 36 + src/util/metrics.py | 168 +++++ src/util/results.py | 33 + 47 files changed, 4219 insertions(+) create mode 100644 .idea/deployment.xml create mode 100644 .idea/encodings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/tesi_funneling.iml create mode 100644 .idea/vcs.xml create mode 100644 .idea/webServers.xml create mode 100644 .idea/workspace.xml create mode 100644 src/.gitignore create mode 100644 src/FPEC_andrea.py create mode 100644 src/__pycache__/dataset_builder.cpython-37.pyc create mode 100644 src/data/__init__.py create mode 100644 src/data/__pycache__/__init__.cpython-37.pyc create mode 100644 src/data/__pycache__/embeddings.cpython-37.pyc create mode 100644 src/data/__pycache__/languages.cpython-37.pyc create mode 100644 src/data/__pycache__/supervised.cpython-37.pyc create mode 100644 src/data/__pycache__/text_preprocessor.cpython-37.pyc create mode 100644 src/data/__pycache__/tsr_function__.cpython-37.pyc create mode 100644 src/data/embeddings.py create mode 100644 src/data/languages.py create mode 100644 src/data/reader/__init__.py create mode 100644 src/data/reader/__pycache__/__init__.cpython-37.pyc create mode 100644 src/data/reader/__pycache__/jrcacquis_reader.cpython-37.pyc create mode 100644 src/data/reader/__pycache__/rcv_reader.cpython-37.pyc create mode 100644 src/data/reader/__pycache__/wikipedia_tools.cpython-37.pyc create mode 100644 src/data/reader/jrcacquis_reader.py create mode 100644 src/data/reader/rcv_reader.py create mode 100644 src/data/reader/wikipedia_tools.py create mode 100755 src/data/supervised.py create mode 100644 src/data/text_preprocessor.py create mode 100755 src/data/tsr_function__.py create mode 100644 src/dataset_builder.py create mode 100644 src/learning/__pycache__/learners.cpython-37.pyc create mode 100644 src/learning/learners.py create mode 100644 src/results/results.csv create mode 100644 src/transformers/__init__.py create mode 100644 src/transformers/clesa.py create mode 100644 src/transformers/dci.py create mode 100644 src/transformers/riboc.py create mode 100644 src/util/__pycache__/evaluation.cpython-37.pyc create mode 100644 src/util/__pycache__/file.cpython-37.pyc create mode 100644 src/util/__pycache__/metrics.cpython-37.pyc create mode 100644 src/util/__pycache__/results.cpython-37.pyc create mode 100644 src/util/evaluation.py create mode 100644 src/util/file.py create mode 100644 src/util/metrics.py create mode 100644 src/util/results.py diff --git a/.idea/deployment.xml b/.idea/deployment.xml new file mode 100644 index 0000000..d6ba90a --- /dev/null +++ b/.idea/deployment.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..15a15b2 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..9fa0db4 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,10 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..5ddd5cc --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/tesi_funneling.iml b/.idea/tesi_funneling.iml new file mode 100644 index 0000000..828b8c4 --- /dev/null +++ b/.idea/tesi_funneling.iml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/webServers.xml b/.idea/webServers.xml new file mode 100644 index 0000000..56d5a5b --- /dev/null +++ b/.idea/webServers.xml @@ -0,0 +1,15 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..92b2f97 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,655 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NaivePolylingualClassifier + tra + base_l + tfidf + proba + we + Wordembeddings + hstack + ha + timeit + ti + time + dot + vec + _fit_binary + oneVs + embed + no tf- + embedding_matrix + WordEm + WordEmbeddings + # pretrai + # [pre + joblib + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +