From d486fd2c004360363221f662f511a053f9d0e364 Mon Sep 17 00:00:00 2001 From: cesare Date: Thu, 16 Dec 2021 18:27:41 +0100 Subject: [PATCH] spaces removed --- ...tadata_final.csv => Metadata_final_v0.csv} | 0 ...ogy_final.csv => Terminology_final_v0.csv} | 0 sshoc_31_skos.ipynb | 89 ++++++++++--------- 3 files changed, 45 insertions(+), 44 deletions(-) rename data/input/{Metadata_final.csv => Metadata_final_v0.csv} (100%) rename data/input/{Terminology_final.csv => Terminology_final_v0.csv} (100%) diff --git a/data/input/Metadata_final.csv b/data/input/Metadata_final_v0.csv similarity index 100% rename from data/input/Metadata_final.csv rename to data/input/Metadata_final_v0.csv diff --git a/data/input/Terminology_final.csv b/data/input/Terminology_final_v0.csv similarity index 100% rename from data/input/Terminology_final.csv rename to data/input/Terminology_final_v0.csv diff --git a/sshoc_31_skos.ipynb b/sshoc_31_skos.ipynb index a52ae0a..8e1e758 100644 --- a/sshoc_31_skos.ipynb +++ b/sshoc_31_skos.ipynb @@ -159,7 +159,7 @@ { "data": { "text/plain": [ - ")>" + ")>" ] }, "execution_count": 7, @@ -220,7 +220,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "4371\n" + "4366\n" ] } ], @@ -229,14 +229,14 @@ "for index, row in df_data.iterrows():\n", " \n", " if row.Subject.lower()==\"preflabel\":\n", - " label=row[\"Concept ID\"]\n", - " enlabel=Literal(row[\"Term\"], lang='en')\n", - " frlabel=Literal(row[\"French\"], lang='fr')\n", - " nllabel=Literal(row['Dutch'], lang='nl')\n", - " delabel=Literal(row['German'], lang='de')\n", - " itlabel=Literal(row['Italian'], lang='it')\n", - " sllabel=Literal(row['Slovenian'], lang='sl')\n", - " ellabel=Literal(row['Greek'], lang='el')\n", + " label=row[\"Concept ID\"].strip()\n", + " enlabel=Literal(row[\"Term\"].strip(), lang='en')\n", + " frlabel=Literal(row[\"French\"].strip(), lang='fr')\n", + " nllabel=Literal(row['Dutch'].strip(), lang='nl')\n", + " delabel=Literal(row['German'].strip(), lang='de')\n", + " itlabel=Literal(row['Italian'].strip(), lang='it')\n", + " sllabel=Literal(row['Slovenian'].strip(), lang='sl')\n", + " ellabel=Literal(row['Greek'].strip(), lang='el')\n", " \n", " c1rdf.add((sshocterm[label], RDF.type, SKOS.Concept))\n", " c1rdf.add((sshocterm[label], SKOS.inScheme, sshocterm['']))\n", @@ -250,28 +250,28 @@ " c1rdf.add((sshocterm[label], SKOS.prefLabel, ellabel))\n", " if row.Subject.lower()==\"altlabel\":\n", " if not pd.isna(row['Term']):\n", - " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Term\"], lang='en')))\n", + " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Term\"].strip(), lang='en')))\n", " if not pd.isna(row['French']):\n", - " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"French\"], lang='fr')))\n", + " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"French\"].strip(), lang='fr')))\n", " if not pd.isna(row['Dutch']):\n", - " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Dutch\"], lang='nl')))\n", + " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Dutch\"].strip(), lang='nl')))\n", " if not pd.isna(row['German']):\n", - " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"German\"], lang='de')))\n", + " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"German\"].strip(), lang='de')))\n", " if not pd.isna(row['Italian']):\n", - " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Italian\"], lang='it')))\n", + " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Italian\"].strip(), lang='it')))\n", " if not pd.isna(row['Slovenian']):\n", - " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Slovenian\"], lang='sl')))\n", + " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Slovenian\"].strip(), lang='sl')))\n", " if not pd.isna(row['Greek']):\n", - " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Greek\"], lang='el')))\n", + " c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Greek\"].strip(), lang='el')))\n", " \n", " if row.Subject.lower()==\"definition\":\n", - " endef=Literal(row[\"Term\"], lang='en')\n", - " frdef=Literal(row[\"French\"], lang='fr')\n", - " nldef=Literal(row['Dutch'], lang='nl')\n", - " dedef=Literal(row['German'], lang='de')\n", - " itdef=Literal(row['Italian'], lang='it')\n", - " sldef=Literal(row['Slovenian'], lang='sl')\n", - " eldef=Literal(row['Greek'], lang='el')\n", + " endef=Literal(row[\"Term\"].strip(), lang='en')\n", + " frdef=Literal(row[\"French\"].strip(), lang='fr')\n", + " nldef=Literal(row['Dutch'].strip(), lang='nl')\n", + " dedef=Literal(row['German'].strip(), lang='de')\n", + " itdef=Literal(row['Italian'].strip(), lang='it')\n", + " sldef=Literal(row['Slovenian'].strip(), lang='sl')\n", + " eldef=Literal(row['Greek'].strip(), lang='el')\n", " \n", " c1rdf.add((sshocterm[label], SKOS.definition, endef))\n", " c1rdf.add((sshocterm[label], SKOS.definition, frdef))\n", @@ -281,7 +281,7 @@ " c1rdf.add((sshocterm[label], SKOS.definition, sldef))\n", " c1rdf.add((sshocterm[label], SKOS.definition, eldef))\n", " if not pd.isna(row['Source of definition']):\n", - " source=Literal(row['Source of definition'])\n", + " source=Literal(row['Source of definition'].strip())\n", " #print (f'{label}, {source}')\n", " c1rdf.add((sshocterm[label], dct.source, source))\n", " if not pd.isna(row['Loterre Open Science Thesaurus']):\n", @@ -297,10 +297,10 @@ " c1rdf.add((sshocterm[label], SKOS.exactMatch, lov2))\n", " #Terms4FAIRSkills ISO \n", " if not pd.isna(row['Terms4FAIRSkills']):\n", - " t4fs=Literal(row['Terms4FAIRSkills'])\n", + " t4fs=Literal(row['Terms4FAIRSkills'].strip())\n", " c1rdf.add((sshocterm[label], SKOS.note, t4fs))\n", " if not pd.isna(row['ISO']):\n", - " tiso=Literal(row['ISO'])\n", + " tiso=Literal(row['ISO'].strip())\n", " c1rdf.add((sshocterm[label], SKOS.note, tiso))\n", " if not pd.isna(row['Broader Concept']):\n", " broc=URIRef(row['Broader Concept'])\n", @@ -335,8 +335,8 @@ "metadata": {}, "outputs": [], "source": [ - "c1rdf.serialize(destination='data/mdstskos.rdf', format=\"n3\");#format=\"pretty-xml\")\n", - "#comrdf.serialize(destination='data/parsed_rdf/prima_cantica_forme_com.rdf', format=\"n3\");" + "c1rdf.serialize(destination='data/mdstskos.ttl', format=\"n3\");#format=\"pretty-xml\")\n", + "c1rdf.serialize(destination='data/mdstskos.rdf', format=\"pretty-xml\");#format=\"pretty-xml\")" ] }, { @@ -409,7 +409,7 @@ { "data": { "text/plain": [ - ")>" + ")>" ] }, "execution_count": 15, @@ -459,7 +459,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "3027\n" + "3030\n" ] } ], @@ -476,22 +476,22 @@ " \n", " strsource=strsource.replace('(source: ','')\n", " strsource=strsource.replace(')','')\n", - " source=Literal(strsource)\n", - " enterm=Literal(row[\"Englishterm\"], lang='en')\n", - " frterm=Literal(row[\"Frenchterm\"], lang='fr')\n", - " nlterm=Literal(row['Dutchterm'], lang='nl')\n", + " source=Literal(strsource.strip())\n", + " enterm=Literal(row[\"Englishterm\"].strip(), lang='en')\n", + " frterm=Literal(row[\"Frenchterm\"].strip(), lang='fr')\n", + " nlterm=Literal(row['Dutchterm'].strip(), lang='nl')\n", " #determ=Literal(row['Germanterm'], lang='de')\n", - " itterm=Literal(row['Italianterm'], lang='it')\n", - " #slterm=Literal(row['Slovenianterm'], lang='sl')\n", - " elterm=Literal(row['Greekterm'], lang='el')\n", + " itterm=Literal(row['Italianterm'].strip(), lang='it')\n", + " #slterm=Literal(row['Slovenianterm'].strip(), lang='sl')\n", + " elterm=Literal(row['Greekterm'].strip(), lang='el')\n", " \n", - " endef=Literal(row[\"Englishdefinition\"], lang='en')\n", - " frdef=Literal(row[\"Frenchdefinition\"], lang='fr')\n", - " nldef=Literal(row['Dutchdefinition'], lang='nl')\n", + " endef=Literal(row[\"Englishdefinition\"].strip(), lang='en')\n", + " frdef=Literal(row[\"Frenchdefinition\"].strip(), lang='fr')\n", + " nldef=Literal(row['Dutchdefinition'].strip(), lang='nl')\n", " #dedef=Literal(row['Germandefinition'], lang='de')\n", - " itdef=Literal(row['Italiandefinition'], lang='it')\n", + " itdef=Literal(row['Italiandefinition'].strip(), lang='it')\n", " #sldef=Literal(row['Sloveniandefinition'], lang='sl')\n", - " eldef=Literal(row['Greekdefinition'], lang='el')\n", + " eldef=Literal(row['Greekdefinition'].strip(), lang='el')\n", " \n", " ccr.add((sshoccmd[label], RDF.type, SKOS.Concept))\n", " ccr.add((sshoccmd[label], SKOS.prefLabel, enterm))\n", @@ -532,7 +532,8 @@ "metadata": {}, "outputs": [], "source": [ - "ccr.serialize(destination='data/skosccr.rdf', format=\"n3\");#format=\"pretty-xml\")" + "ccr.serialize(destination='data/skosccr.rdf', format=\"pretty-xml\");#format=\"n3\")\n", + "ccr.serialize(destination='data/skosccr.ttl', format=\"n3\");#format=\"n3\")" ] }, {