From b32243542308889ad1a8ff935e6f5d8e633eb81a Mon Sep 17 00:00:00 2001
From: andreapdr <andrea.pedrotti@isti.cnr.it>
Date: Tue, 12 Mar 2024 13:52:34 +0100
Subject: [PATCH] examples and readme update

---
 example.sh | 4 ++++
 infer.py   | 2 +-
 readme.md  | 5 +++--
 3 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/example.sh b/example.sh
index 122cab6..e1a574d 100644
--- a/example.sh
+++ b/example.sh
@@ -1,3 +1,7 @@
 #!bin/bash
 
+# carica il dataset da examples/dataset/sample-dataset.csv
+# le predicions vengono salvate in exampels/results/sample-dataset_<timestamp>.csv
+# --category_map specifica il path del file di mapping. Nel file di output sono salvate i nomi delle categorie predette.
+
 python infer.py --datapath examples/dataset/sample-dataset.csv --outdir examples/results --category_map examples/dataset/dataset-mapping.csv
\ No newline at end of file
diff --git a/infer.py b/infer.py
index 004eaa4..dffc6ef 100644
--- a/infer.py
+++ b/infer.py
@@ -103,7 +103,7 @@ if __name__ == "__main__":
     parser = ArgumentParser()
     parser.add_argument("--datapath", required=True, type=str, help="path to csv file containing the documents to be classified")
     parser.add_argument("--outdir", type=str, default="results/inference-preds", help="path to store csv file containing gfun predictions")
-    parser.add_argument("--category_map", type=str, default="models/category_mappers/rai-mapping.csv", help="path to csv file containing the mapping from label name to label id [str: id]")
+    parser.add_argument("--category_map", type=str, default=None, help="path to csv file containing the mapping from label name to label id [str: id]")
     parser.add_argument("--nlabels", type=int, default=28)
     parser.add_argument("--muse_dir", type=str, default="embeddings", help="path to muse embeddings")
     parser.add_argument("--trained_gfun", type=str, default="rai_pmt_mean_231029", help="name of the trained gfun instance")
diff --git a/readme.md b/readme.md
index 4085acb..52c41c4 100644
--- a/readme.md
+++ b/readme.md
@@ -9,7 +9,7 @@ mkdir resources
 # optional
 mkdir models/category_mappers
 ```
-In `models`, scaricare i modelli pre-trained condivisi. La directory `models` contiene 4 subdir `metaclassifier, vgfs, vectorizer, category_mappers`.
+In `models`, scaricare i modelli pre-trained condivisi. La directory `models` contiene 3 subdir `metaclassifier, vgfs, vectorizer`.
 In `resources` estrarre i muse-embeddings.
 In `models/category_mappers` estrarre il file csv che contiene il mapping da category label a category id (opzionale).
 
@@ -22,13 +22,14 @@ python infer.py --datapth <path/to/the/csv_file.csv>
 
 I risultati saranno salvati di default nella cartella `results/inference-preds`, in un file csv denominato a seconda input file specificato in `--datapath` + il timetamp della run (e.g., `<csv_file>_<240312_13345>.csv`) (è possibile cambiare directory di output tramite `--outdir <my/output/dir/>`)
 
+NB: per ottenere i nomi (stringhe) delle classi predette è necessario specificare il path del file csv che contiene il mapping class id -> class label (argomento `--category_map`).
 
 ```
 optional arguments:
   -h, --help            show this help message and exit
   --datapath            path to csv file containing the documents to be classified
   --outdir              path to store csv file containing gfun predictions (default=results/inference-preds)
-  --category_map         path to csv file containing the mapping from label name to label id [str: id] (default=models/category_mappers/rai-mapping.csv)
+  --category_map         path to csv file containing the mapping from label name to label id [str: id] (default=None)
   --nlabels             number of target classes defined in the annotation schema (default=28)
   --muse_dir            path to muse embeddings
   --trained_gfun        name of the trained gfun instance