spaces removed

This commit is contained in:
cesare 2021-12-16 18:27:41 +01:00
parent 1bfde8a489
commit d486fd2c00
3 changed files with 45 additions and 44 deletions

View File

@ -159,7 +159,7 @@
{
"data": {
"text/plain": [
"<Graph identifier=N154f08dbddf34364b307d99f1e8be418 (<class 'rdflib.graph.Graph'>)>"
"<Graph identifier=N354cb351c0e54b49974ca1568175540a (<class 'rdflib.graph.Graph'>)>"
]
},
"execution_count": 7,
@ -220,7 +220,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"4371\n"
"4366\n"
]
}
],
@ -229,14 +229,14 @@
"for index, row in df_data.iterrows():\n",
" \n",
" if row.Subject.lower()==\"preflabel\":\n",
" label=row[\"Concept ID\"]\n",
" enlabel=Literal(row[\"Term\"], lang='en')\n",
" frlabel=Literal(row[\"French\"], lang='fr')\n",
" nllabel=Literal(row['Dutch'], lang='nl')\n",
" delabel=Literal(row['German'], lang='de')\n",
" itlabel=Literal(row['Italian'], lang='it')\n",
" sllabel=Literal(row['Slovenian'], lang='sl')\n",
" ellabel=Literal(row['Greek'], lang='el')\n",
" label=row[\"Concept ID\"].strip()\n",
" enlabel=Literal(row[\"Term\"].strip(), lang='en')\n",
" frlabel=Literal(row[\"French\"].strip(), lang='fr')\n",
" nllabel=Literal(row['Dutch'].strip(), lang='nl')\n",
" delabel=Literal(row['German'].strip(), lang='de')\n",
" itlabel=Literal(row['Italian'].strip(), lang='it')\n",
" sllabel=Literal(row['Slovenian'].strip(), lang='sl')\n",
" ellabel=Literal(row['Greek'].strip(), lang='el')\n",
" \n",
" c1rdf.add((sshocterm[label], RDF.type, SKOS.Concept))\n",
" c1rdf.add((sshocterm[label], SKOS.inScheme, sshocterm['']))\n",
@ -250,28 +250,28 @@
" c1rdf.add((sshocterm[label], SKOS.prefLabel, ellabel))\n",
" if row.Subject.lower()==\"altlabel\":\n",
" if not pd.isna(row['Term']):\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Term\"], lang='en')))\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Term\"].strip(), lang='en')))\n",
" if not pd.isna(row['French']):\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"French\"], lang='fr')))\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"French\"].strip(), lang='fr')))\n",
" if not pd.isna(row['Dutch']):\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Dutch\"], lang='nl')))\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Dutch\"].strip(), lang='nl')))\n",
" if not pd.isna(row['German']):\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"German\"], lang='de')))\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"German\"].strip(), lang='de')))\n",
" if not pd.isna(row['Italian']):\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Italian\"], lang='it')))\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Italian\"].strip(), lang='it')))\n",
" if not pd.isna(row['Slovenian']):\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Slovenian\"], lang='sl')))\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Slovenian\"].strip(), lang='sl')))\n",
" if not pd.isna(row['Greek']):\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Greek\"], lang='el')))\n",
" c1rdf.add((sshocterm[label], SKOS.altLabel, Literal(row[\"Greek\"].strip(), lang='el')))\n",
" \n",
" if row.Subject.lower()==\"definition\":\n",
" endef=Literal(row[\"Term\"], lang='en')\n",
" frdef=Literal(row[\"French\"], lang='fr')\n",
" nldef=Literal(row['Dutch'], lang='nl')\n",
" dedef=Literal(row['German'], lang='de')\n",
" itdef=Literal(row['Italian'], lang='it')\n",
" sldef=Literal(row['Slovenian'], lang='sl')\n",
" eldef=Literal(row['Greek'], lang='el')\n",
" endef=Literal(row[\"Term\"].strip(), lang='en')\n",
" frdef=Literal(row[\"French\"].strip(), lang='fr')\n",
" nldef=Literal(row['Dutch'].strip(), lang='nl')\n",
" dedef=Literal(row['German'].strip(), lang='de')\n",
" itdef=Literal(row['Italian'].strip(), lang='it')\n",
" sldef=Literal(row['Slovenian'].strip(), lang='sl')\n",
" eldef=Literal(row['Greek'].strip(), lang='el')\n",
" \n",
" c1rdf.add((sshocterm[label], SKOS.definition, endef))\n",
" c1rdf.add((sshocterm[label], SKOS.definition, frdef))\n",
@ -281,7 +281,7 @@
" c1rdf.add((sshocterm[label], SKOS.definition, sldef))\n",
" c1rdf.add((sshocterm[label], SKOS.definition, eldef))\n",
" if not pd.isna(row['Source of definition']):\n",
" source=Literal(row['Source of definition'])\n",
" source=Literal(row['Source of definition'].strip())\n",
" #print (f'{label}, {source}')\n",
" c1rdf.add((sshocterm[label], dct.source, source))\n",
" if not pd.isna(row['Loterre Open Science Thesaurus']):\n",
@ -297,10 +297,10 @@
" c1rdf.add((sshocterm[label], SKOS.exactMatch, lov2))\n",
" #Terms4FAIRSkills ISO \n",
" if not pd.isna(row['Terms4FAIRSkills']):\n",
" t4fs=Literal(row['Terms4FAIRSkills'])\n",
" t4fs=Literal(row['Terms4FAIRSkills'].strip())\n",
" c1rdf.add((sshocterm[label], SKOS.note, t4fs))\n",
" if not pd.isna(row['ISO']):\n",
" tiso=Literal(row['ISO'])\n",
" tiso=Literal(row['ISO'].strip())\n",
" c1rdf.add((sshocterm[label], SKOS.note, tiso))\n",
" if not pd.isna(row['Broader Concept']):\n",
" broc=URIRef(row['Broader Concept'])\n",
@ -335,8 +335,8 @@
"metadata": {},
"outputs": [],
"source": [
"c1rdf.serialize(destination='data/mdstskos.rdf', format=\"n3\");#format=\"pretty-xml\")\n",
"#comrdf.serialize(destination='data/parsed_rdf/prima_cantica_forme_com.rdf', format=\"n3\");"
"c1rdf.serialize(destination='data/mdstskos.ttl', format=\"n3\");#format=\"pretty-xml\")\n",
"c1rdf.serialize(destination='data/mdstskos.rdf', format=\"pretty-xml\");#format=\"pretty-xml\")"
]
},
{
@ -409,7 +409,7 @@
{
"data": {
"text/plain": [
"<Graph identifier=Nc8bd6a6227614ea78529b51b43dbda88 (<class 'rdflib.graph.Graph'>)>"
"<Graph identifier=N876b7db85e864943a1e0342d4e4dafdc (<class 'rdflib.graph.Graph'>)>"
]
},
"execution_count": 15,
@ -459,7 +459,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"3027\n"
"3030\n"
]
}
],
@ -476,22 +476,22 @@
" \n",
" strsource=strsource.replace('(source: ','')\n",
" strsource=strsource.replace(')','')\n",
" source=Literal(strsource)\n",
" enterm=Literal(row[\"Englishterm\"], lang='en')\n",
" frterm=Literal(row[\"Frenchterm\"], lang='fr')\n",
" nlterm=Literal(row['Dutchterm'], lang='nl')\n",
" source=Literal(strsource.strip())\n",
" enterm=Literal(row[\"Englishterm\"].strip(), lang='en')\n",
" frterm=Literal(row[\"Frenchterm\"].strip(), lang='fr')\n",
" nlterm=Literal(row['Dutchterm'].strip(), lang='nl')\n",
" #determ=Literal(row['Germanterm'], lang='de')\n",
" itterm=Literal(row['Italianterm'], lang='it')\n",
" #slterm=Literal(row['Slovenianterm'], lang='sl')\n",
" elterm=Literal(row['Greekterm'], lang='el')\n",
" itterm=Literal(row['Italianterm'].strip(), lang='it')\n",
" #slterm=Literal(row['Slovenianterm'].strip(), lang='sl')\n",
" elterm=Literal(row['Greekterm'].strip(), lang='el')\n",
" \n",
" endef=Literal(row[\"Englishdefinition\"], lang='en')\n",
" frdef=Literal(row[\"Frenchdefinition\"], lang='fr')\n",
" nldef=Literal(row['Dutchdefinition'], lang='nl')\n",
" endef=Literal(row[\"Englishdefinition\"].strip(), lang='en')\n",
" frdef=Literal(row[\"Frenchdefinition\"].strip(), lang='fr')\n",
" nldef=Literal(row['Dutchdefinition'].strip(), lang='nl')\n",
" #dedef=Literal(row['Germandefinition'], lang='de')\n",
" itdef=Literal(row['Italiandefinition'], lang='it')\n",
" itdef=Literal(row['Italiandefinition'].strip(), lang='it')\n",
" #sldef=Literal(row['Sloveniandefinition'], lang='sl')\n",
" eldef=Literal(row['Greekdefinition'], lang='el')\n",
" eldef=Literal(row['Greekdefinition'].strip(), lang='el')\n",
" \n",
" ccr.add((sshoccmd[label], RDF.type, SKOS.Concept))\n",
" ccr.add((sshoccmd[label], SKOS.prefLabel, enterm))\n",
@ -532,7 +532,8 @@
"metadata": {},
"outputs": [],
"source": [
"ccr.serialize(destination='data/skosccr.rdf', format=\"n3\");#format=\"pretty-xml\")"
"ccr.serialize(destination='data/skosccr.rdf', format=\"pretty-xml\");#format=\"n3\")\n",
"ccr.serialize(destination='data/skosccr.ttl', format=\"n3\");#format=\"n3\")"
]
},
{