sshoc-skosmapping/sshoc_lessico_panlatino.ipynb

830 lines
32 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "lightweight-detroit",
"metadata": {},
"source": [
"## Mapping *Pan-Latin Textile Fibres Vocabulary* from spreadsheet to SKOS resources\n",
"\n",
"This Notebook implements a simple parser used to transform the Pan-Latin Textile Fibres Vocabulary, developed within the Realiter network, and published as spreadsheets, into SKOS resources. The parser reads the spreadsheets and transforms the content in SKOS data following a set of mapping rules, the result is stored in two Turtle files.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "modified-vegetarian",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import rdflib\n",
"import itertools\n",
"import yaml\n",
"import datetime"
]
},
{
"cell_type": "markdown",
"id": "hundred-singles",
"metadata": {},
"source": [
"The file *config.yaml* contains the external information used in the parsing, including the position of the spreadsheets. Set the correct values before running the Notebook."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "stupid-lewis",
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" with open(\"config-lessico.yaml\", 'r') as stream:\n",
" try:\n",
" conf=yaml.safe_load(stream)\n",
" except yaml.YAMLError as exc:\n",
" print(exc)\n",
"except FileNotFoundError:\n",
" print('Warning config.yaml file not present! Please store it in the same directory as the notebook')\n",
"#print (conf)"
]
},
{
"cell_type": "markdown",
"id": "generic-thong",
"metadata": {},
"source": [
"The following cells defines the *Namespaces* used in the parsing"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "oriental-structure",
"metadata": {},
"outputs": [],
"source": [
"from rdflib.namespace import DC, DCAT, DCTERMS, OWL, \\\n",
" RDF, RDFS, SKOS, \\\n",
" XMLNS, XSD, XMLNS\n",
"from rdflib import Namespace\n",
"from rdflib import URIRef, BNode, Literal\n",
"\n",
"pltextile = Namespace(conf['Namespaces']['TEXTILETERM'])\n",
"dc11=Namespace(\"http://purl.org/dc/elements/1.1/\");\n",
"dct = Namespace(\"http://purl.org/dc/terms/\")\n",
"iso369=Namespace(\"http://id.loc.gov/vocabulary/iso639-3\");"
]
},
{
"cell_type": "markdown",
"id": "sacred-shopper",
"metadata": {},
"source": [
"Download **Lessico** spreadsheet and show it to check if the operation has been executed correctly"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "systematic-saudi",
"metadata": {},
"outputs": [],
"source": [
"url=conf['Source']['LESSICOSOURCE']\n",
"df_data=pd.read_csv(url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "sunrise-reunion",
"metadata": {},
"outputs": [],
"source": [
"df_data.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "native-judges",
"metadata": {},
"outputs": [],
"source": [
"df_data.rename(columns = {'es [ARG]': 'es-arg', 'es [MEX]': 'es-mex', 'fr [CA]': 'fr-ca'}, inplace = True)\n",
"#df_data.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "united-samoa",
"metadata": {},
"outputs": [],
"source": [
"df_data.iloc[0].it.split('\\n')[0].split(' ')[0]"
]
},
{
"cell_type": "markdown",
"id": "indonesian-curtis",
"metadata": {},
"source": [
"Create a graph for the SKOS data and bind the namespaces to it"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "parallel-bible",
"metadata": {},
"outputs": [],
"source": [
"c1rdf = rdflib.Graph()\n",
"c1rdf.bind(\"pltextile\", pltextile)\n",
"c1rdf.bind(\"dc11\", dc11)\n",
"c1rdf.bind(\"dct\", dct)\n",
"c1rdf.bind(\"iso369-3\", iso369)\n",
"c1rdf.bind(\"skos\", SKOS)\n",
"c1rdf.bind(\"dc\", DC)\n",
"c1rdf.bind(\"rdf\", RDF)\n",
"c1rdf.bind(\"rdfs\", RDFS)\n",
"c1rdf.bind(\"owl\", OWL)\n",
"c1rdf.bind(\"xsd\", XSD)\n"
]
},
{
"cell_type": "markdown",
"id": "quantitative-integer",
"metadata": {},
"source": [
"Insert in the graph the *SKOS.ConceptScheme*"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "protective-anxiety",
"metadata": {},
"outputs": [],
"source": [
"now = datetime.datetime.today()\n",
"today_date=now.date()\n",
"title=Literal(conf['Texts']['LESSICOTITLE'], lang=conf['Texts']['LANG'])\n",
"description=Literal(conf['Texts']['LESSICODESCRIPTION'], lang=conf['Texts']['LANG'])\n",
"description_it=Literal(conf['Texts']['LESSICODESCRIPTION_IT'], lang='it')\n",
"identifier=Literal(conf['Texts']['LESSICOID'], lang=conf['Texts']['LANG'])\n",
"#identifier=URIRef(conf['Texts']['VOCABULARYID'])\n",
"createddate= Literal(conf['Texts']['LESSICOCREATEDATE'],datatype=XSD.date)\n",
"moddate= Literal(today_date,datatype=XSD.date)\n",
"version= Literal(conf['Texts']['LESSICOVERSION'],datatype=XSD.string)\n",
"\n",
"c1rdf.add((pltextile[''], RDF.type, SKOS.ConceptScheme))\n",
"c1rdf.add((pltextile[''], DC.title, title))\n",
"c1rdf.add((pltextile[''], DC.identifier, identifier))\n",
"c1rdf.add((pltextile[''], DC.description, description))\n",
"c1rdf.add((pltextile[''], DC.description, description_it))\n",
"c1rdf.add((pltextile[''], dct.created, createddate))\n",
"c1rdf.add((pltextile[''], dct.modified, moddate))\n",
"c1rdf.add((pltextile[''], OWL.versionInfo, version))\n",
"c1rdf.add((pltextile[''], dct.language, iso369.eng))\n",
"c1rdf.add((pltextile[''], dct.language, iso369.es))\n",
"c1rdf.add((pltextile[''], dct.language, iso369.fra))\n",
"c1rdf.add((pltextile[''], dct.language, iso369.gl))\n",
"c1rdf.add((pltextile[''], dct.language, iso369.ita))\n",
"c1rdf.add((pltextile[''], dct.language, iso369.ro))\n",
"c1rdf.add((pltextile[''], dct.language, iso369.pt))\n",
"c1rdf.add((pltextile[''], dct.language, iso369.ca))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "vertical-election",
"metadata": {},
"outputs": [],
"source": [
"#c1rdf.serialize(destination='data/skostest.rdf', format=\"n3\");#format=\"pretty-xml\")\n",
"#comrdf.serialize(destination='data/parsed_rdf/prima_cantica_forme_com.rdf', format=\"n3\");\n",
"df_data.fillna('', inplace=True)\n",
"df_data.head()"
]
},
{
"cell_type": "markdown",
"id": "assigned-beijing",
"metadata": {},
"source": [
"The following cell implements the mapping rules for creating SKOS resources."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "typical-prompt",
"metadata": {},
"outputs": [],
"source": [
"#df_data.iloc[0].it.split('\\n')[0].split(' ')[0]\n",
"for index, row in df_data.iterrows():\n",
" \n",
" strlabel=row.it.split('\\n')[0].split(' (')[0].strip()\n",
" label=strlabel.replace(\" \", \"_\")\n",
" #label=URIRef(row.it.split('\\n')[0].split(' (')[0].strip())\n",
" c1rdf.add((pltextile[''], SKOS.hasTopConcept, pltextile[label])) \n",
" frlabel=Literal(row[\"fr\"].split('\\n')[0].strip(), lang='fr')\n",
" fraltlabels=row[\"fr\"].split('\\n')[1:]\n",
" itlabel=Literal(row['it'].split('\\n')[0].strip(), lang='it')\n",
" italtlabels=row[\"it\"].split('\\n')[1:] \n",
" calabel=Literal(row['ca'].split('\\n')[0].strip(), lang='ca')\n",
" caaltlabels=row[\"ca\"].split('\\n')[1:]\n",
" eslabel=Literal(row['es'].split('\\n')[0].strip(), lang='es')\n",
" esaltlabels=row[\"es\"].split('\\n')[1:]\n",
" gllabel=Literal(row['gl'].split('\\n')[0].strip(), lang='gl')\n",
" glaltlabels=row[\"gl\"].split('\\n')[1:]\n",
" ptlabel=Literal(row['pt'].split('\\n')[0].strip(), lang='pt')\n",
" ptaltlabels=row[\"pt\"].split('\\n')[1:]\n",
" rolabel=Literal(row['ro'].split('\\n')[0].strip(), lang='ro')\n",
" roaltlabels=row[\"ro\"].split('\\n')[1:]\n",
" enlabel=Literal(row['en'].split('\\n')[0].strip(), lang='en')\n",
" enaltlabels=row[\"en\"].split('\\n')[1:]\n",
" \n",
" esarglabel=Literal(row['es-arg'].split('\\n')[0].strip(), lang='es-ar')\n",
" esargaltlabels=row[\"es-arg\"].split('\\n')[1:]\n",
" #es-arg-mex\n",
"# esargmexarglabel=Literal(row['es-arg-mex'].split('\\n')[0].strip(), lang='es-ar')\n",
"# esargmexmexlabel=Literal(row['es-arg-mex'].split('\\n')[0].strip(), lang='es-mx')\n",
"# esargmexaltlabels=row[\"es-arg-mex\"].split('\\n')[1:]\n",
" \n",
" esmexlabel=Literal(row['es-mex'].split('\\n')[0].strip(), lang='es-mx')\n",
" esmexaltlabels=row[\"es-mex\"].split('\\n')[1:]\n",
" frcalabel=Literal(row['fr-ca'].split('\\n')[0].strip(), lang='fr-ca')\n",
" frcaaltlabels=row[\"fr-ca\"].split('\\n')[1:]\n",
" \n",
" #definition\n",
" itdef=Literal(row[\"DEF\"].strip(), lang='it')\n",
" \n",
" \n",
" \n",
" c1rdf.add((pltextile[label], RDF.type, SKOS.Concept))\n",
" c1rdf.add((pltextile[label], SKOS.inScheme, pltextile['']))\n",
" c1rdf.add((pltextile[label], SKOS.topConceptOf, pltextile['']))\n",
" \n",
" for alab in esargaltlabels:\n",
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-ar')))\n",
" \n",
"# for alab in esargmexaltlabels:\n",
"# c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-ar')))\n",
"# c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-mx')))\n",
" \n",
" for alab in esmexaltlabels:\n",
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-mx')))\n",
" \n",
" for alab in frcaaltlabels:\n",
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='fr-ca')))\n",
" \n",
" for alab in esaltlabels:\n",
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es')))\n",
" \n",
" for alab in glaltlabels:\n",
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='gl')))\n",
" \n",
" for alab in ptaltlabels:\n",
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt')))\n",
" \n",
" for alab in roaltlabels:\n",
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ro')))\n",
" \n",
" for alab in enaltlabels:\n",
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='en')))\n",
" \n",
" for alab in caaltlabels:\n",
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ca')))\n",
" \n",
" for alab in fraltlabels:\n",
" #print (\"tt \"+alab)\n",
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='fr')))\n",
" for alab in italtlabels:\n",
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='it')))\n",
" \n",
" \n",
" if(frlabel):\n",
" c1rdf.add((pltextile[label], SKOS.prefLabel, frlabel))\n",
" if(itlabel):\n",
" c1rdf.add((pltextile[label], SKOS.prefLabel, itlabel))\n",
" if(gllabel):\n",
" c1rdf.add((pltextile[label], SKOS.prefLabel, gllabel))\n",
" \n",
" if(ptlabel):\n",
" c1rdf.add((pltextile[label], SKOS.prefLabel, ptlabel))\n",
" if(rolabel):\n",
" c1rdf.add((pltextile[label], SKOS.prefLabel, rolabel))\n",
" if(enlabel):\n",
" c1rdf.add((pltextile[label], SKOS.prefLabel, enlabel))\n",
" \n",
" if(calabel): \n",
" c1rdf.add((pltextile[label], SKOS.prefLabel, calabel))\n",
" if(eslabel): \n",
" c1rdf.add((pltextile[label], SKOS.prefLabel, eslabel))\n",
" if(esarglabel):\n",
" c1rdf.add((pltextile[label], SKOS.prefLabel, esarglabel))\n",
" \n",
"# if(esargmexarglabel):\n",
"# c1rdf.add((pltextile[label], SKOS.prefLabel, esargmexarglabel))\n",
"# c1rdf.add((pltextile[label], SKOS.prefLabel, esargmexmexlabel))\n",
" \n",
" if(esmexlabel):\n",
" c1rdf.add((pltextile[label], SKOS.prefLabel, esmexlabel))\n",
" if(frcalabel):\n",
" c1rdf.add((pltextile[label], SKOS.prefLabel, frcalabel))\n",
" \n",
" if (itdef):\n",
" c1rdf.add((pltextile[label], SKOS.definition, itdef))\n",
"\n",
"print(len(c1rdf))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "answering-latino",
"metadata": {},
"outputs": [],
"source": [
"# for s, p, o in c1rdf.triples((None, None, None)):\n",
"# print(\"{} {}\".format(s, o.n3))"
]
},
{
"cell_type": "markdown",
"id": "quality-scratch",
"metadata": {},
"source": [
"Create a *Turtle* file in the **/data** directory with the SKOS resources for **Data Stewardship terminology** "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "equal-voice",
"metadata": {},
"outputs": [],
"source": [
"c1rdf.serialize(destination='data/lexpanlatskos_11.ttl', format=\"n3\");#format=\"pretty-xml\")\n",
"c1rdf.serialize(destination='data/lexpanlatskos_11.rdf', format=\"pretty-xml\");#format=\"pretty-xml\")"
]
},
{
"cell_type": "markdown",
"id": "selected-enemy",
"metadata": {},
"source": [
"### Lessico panlatino delle Maniche"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "current-material",
"metadata": {},
"outputs": [],
"source": [
"urlma=conf['Source']['LESSICOMANICHESOURCE']\n",
"df_data_maniche=pd.read_csv(urlma)\n",
"df_data_maniche.rename(columns = {'es [ARG]': 'es-arg', 'es [MEX]': 'es-mex', 'pt [BR]': 'pt-br'}, inplace = True)\n",
"df_data_maniche.fillna('', inplace=True)\n",
"#df_data_maniche.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "incorporated-creature",
"metadata": {},
"outputs": [],
"source": [
"cl_manicherdf = rdflib.Graph()\n",
"cl_manicherdf.bind(\"pltextile\", pltextile)\n",
"cl_manicherdf.bind(\"dc11\", dc11)\n",
"cl_manicherdf.bind(\"dct\", dct)\n",
"cl_manicherdf.bind(\"iso369-3\", iso369)\n",
"cl_manicherdf.bind(\"skos\", SKOS)\n",
"cl_manicherdf.bind(\"dc\", DC)\n",
"cl_manicherdf.bind(\"rdf\", RDF)\n",
"cl_manicherdf.bind(\"rdfs\", RDFS)\n",
"cl_manicherdf.bind(\"owl\", OWL)\n",
"cl_manicherdf.bind(\"xsd\", XSD)\n",
"now = datetime.datetime.today()\n",
"today_date=now.date()\n",
"title=Literal(conf['Texts']['LESSICOMANICHETITLE'], lang=conf['Texts']['LANG'])\n",
"description=Literal(conf['Texts']['LESSICOMANICHEDESCRIPTION'], lang=conf['Texts']['LANG'])\n",
"description_it=Literal(conf['Texts']['LESSICOMANICHEDESCRIPTION_IT'], lang='it')\n",
"identifier=Literal(conf['Texts']['LESSICOMANICHEID'], lang=conf['Texts']['LANG'])\n",
"#identifier=URIRef(conf['Texts']['VOCABULARYID'])\n",
"createddate= Literal(conf['Texts']['LESSICOCREATEDATE'],datatype=XSD.date)\n",
"moddate= Literal(today_date,datatype=XSD.date)\n",
"version= Literal(conf['Texts']['LESSICOVERSION'],datatype=XSD.string)\n",
"\n",
"cl_manicherdf.add((pltextile[''], RDF.type, SKOS.ConceptScheme))\n",
"cl_manicherdf.add((pltextile[''], DC.title, title))\n",
"cl_manicherdf.add((pltextile[''], DC.identifier, identifier))\n",
"cl_manicherdf.add((pltextile[''], DC.description, description))\n",
"cl_manicherdf.add((pltextile[''], DC.description, description_it))\n",
"cl_manicherdf.add((pltextile[''], dct.created, createddate))\n",
"cl_manicherdf.add((pltextile[''], dct.modified, moddate))\n",
"cl_manicherdf.add((pltextile[''], OWL.versionInfo, version))\n",
"cl_manicherdf.add((pltextile[''], dct.language, iso369.eng))\n",
"cl_manicherdf.add((pltextile[''], dct.language, iso369.es))\n",
"cl_manicherdf.add((pltextile[''], dct.language, iso369.fra))\n",
"cl_manicherdf.add((pltextile[''], dct.language, iso369.ca))\n",
"cl_manicherdf.add((pltextile[''], dct.language, iso369.ita))\n",
"cl_manicherdf.add((pltextile[''], dct.language, iso369.pt))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "modular-realtor",
"metadata": {},
"outputs": [],
"source": [
"# Mapping\n",
"for index, row in df_data_maniche.iterrows():\n",
" \n",
" strlabel=row.it.split('\\n')[0].split('(')[0].strip()\n",
" label=strlabel.replace(\" \", \"_\").replace(\"\",\"\").replace(\"'\",\"\").strip()\n",
" #label=URIRef(row.it.split('\\n')[0].split(' (')[0].strip())\n",
" cl_manicherdf.add((pltextile[''], SKOS.hasTopConcept, pltextile[label])) \n",
" frlabel=Literal(row[\"fr\"].split('\\n')[0].strip(), lang='fr')\n",
" fraltlabels=row[\"fr\"].split('\\n')[1:]\n",
" itlabel=Literal(row['it'].split('\\n')[0].strip(), lang='it')\n",
" italtlabels=row[\"it\"].split('\\n')[1:] \n",
" calabel=Literal(row['ca'].split('\\n')[0].strip(), lang='ca')\n",
" caaltlabels=row[\"ca\"].split('\\n')[1:]\n",
" eslabel=Literal(row['es'].split('\\n')[0].strip(), lang='es')\n",
" esaltlabels=row[\"es\"].split('\\n')[1:]\n",
" #gllabel=Literal(row['gl'].split('\\n')[0].strip(), lang='gl')\n",
" #glaltlabels=row[\"gl\"].split('\\n')[1:]\n",
" ptlabel=Literal(row['pt'].split('\\n')[0].strip(), lang='pt')\n",
" ptaltlabels=row[\"pt\"].split('\\n')[1:]\n",
"# rolabel=Literal(row['ro'].split('\\n')[0].strip(), lang='ro')\n",
"# roaltlabels=row[\"ro\"].split('\\n')[1:]\n",
" enlabel=Literal(row['en'].split('\\n')[0].strip(), lang='en')\n",
" enaltlabels=row[\"en\"].split('\\n')[1:]\n",
" \n",
" esarglabel=Literal(row['es-arg'].split('\\n')[0].strip(), lang='es-ar')\n",
" esargaltlabels=row[\"es-arg\"].split('\\n')[1:]\n",
" \n",
"\n",
" esmexlabel=Literal(row['es-mex'].split('\\n')[0].strip(), lang='es-mx')\n",
" esmexaltlabels=row[\"es-mex\"].split('\\n')[1:]\n",
" ptbrlabel=Literal(row['pt-br'].split('\\n')[0].strip(), lang='pt-br')\n",
" ptbraltlabels=row[\"pt-br\"].split('\\n')[1:]\n",
" \n",
" #definition\n",
" itdef=Literal(row[\"DEF\"].strip(), lang='it')\n",
" #DOI as rdfs:seeAlso\n",
" imagedoi=row[\"DOI\"].strip()\n",
" \n",
" cl_manicherdf.add((pltextile[label], RDF.type, SKOS.Concept))\n",
" cl_manicherdf.add((pltextile[label], SKOS.inScheme, pltextile['']))\n",
" cl_manicherdf.add((pltextile[label], SKOS.topConceptOf, pltextile['']))\n",
" if (imagedoi!=''):\n",
" cl_manicherdf.add((pltextile[label], RDFS.seeAlso, URIRef('https://doi.org/'+imagedoi)))\n",
" \n",
" for alab in esargaltlabels:\n",
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-ar')))\n",
" \n",
" \n",
" for alab in esmexaltlabels:\n",
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-mx')))\n",
" \n",
" for alab in ptbraltlabels:\n",
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt-br')))\n",
" \n",
" for alab in esaltlabels:\n",
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es')))\n",
" \n",
"# for alab in glaltlabels:\n",
"# cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='gl')))\n",
" \n",
" for alab in ptaltlabels:\n",
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt')))\n",
" \n",
"# for alab in roaltlabels:\n",
"# cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ro')))\n",
" \n",
" for alab in enaltlabels:\n",
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='en')))\n",
" \n",
" for alab in caaltlabels:\n",
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ca')))\n",
" \n",
" for alab in fraltlabels:\n",
" #print (\"tt \"+alab)\n",
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='fr')))\n",
" for alab in italtlabels:\n",
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='it')))\n",
" \n",
" \n",
" if(frlabel):\n",
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, frlabel))\n",
" if(itlabel):\n",
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, itlabel))\n",
"# if(gllabel):\n",
"# cl_manicherdf.add((pltextile[label], SKOS.prefLabel, gllabel))\n",
" \n",
" if(ptlabel):\n",
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, ptlabel))\n",
"# if(rolabel):\n",
"# cl_manicherdf.add((pltextile[label], SKOS.prefLabel, rolabel))\n",
" if(enlabel):\n",
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, enlabel))\n",
" \n",
" if(calabel): \n",
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, calabel))\n",
" if(eslabel): \n",
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, eslabel))\n",
" if(esarglabel):\n",
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, esarglabel))\n",
" \n",
"\n",
" if(esmexlabel):\n",
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, esmexlabel))\n",
" if(ptbrlabel):\n",
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, ptbrlabel))\n",
" \n",
" if (itdef):\n",
" cl_manicherdf.add((pltextile[label], SKOS.definition, itdef))\n",
"\n",
"print(len(cl_manicherdf))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "matched-mustang",
"metadata": {},
"outputs": [],
"source": [
"cl_manicherdf.serialize(destination='data/lexpanlatmanicheskos_11.ttl', format=\"n3\");#format=\"pretty-xml\")\n",
"cl_manicherdf.serialize(destination='data/lexpanlatmanicheskos_11.rdf', format=\"pretty-xml\");#format=\"pretty-xml\")"
]
},
{
"cell_type": "markdown",
"id": "talented-making",
"metadata": {},
"source": [
"### Lessico panlatino dei Colli"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "centered-advantage",
"metadata": {},
"outputs": [],
"source": [
"urlco=conf['Source']['LESSICOCOLLISOURCE']\n",
"df_data_colli=pd.read_csv(urlco)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "desperate-uruguay",
"metadata": {},
"outputs": [],
"source": [
"df_data_colli.rename(columns = {'es [ARG]': 'es-arg', 'es [MEX]': 'es-mex', 'pt [BR]': 'pt-br'}, inplace = True)\n",
"df_data_colli.fillna('', inplace=True)\n",
"#df_data_colli.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "magnetic-stake",
"metadata": {},
"outputs": [],
"source": [
"cl_collirdf = rdflib.Graph()\n",
"cl_collirdf.bind(\"pltextile\", pltextile)\n",
"cl_collirdf.bind(\"dc11\", dc11)\n",
"cl_collirdf.bind(\"dct\", dct)\n",
"cl_collirdf.bind(\"iso369-3\", iso369)\n",
"cl_collirdf.bind(\"skos\", SKOS)\n",
"cl_collirdf.bind(\"dc\", DC)\n",
"cl_collirdf.bind(\"rdf\", RDF)\n",
"cl_collirdf.bind(\"owl\", OWL)\n",
"cl_collirdf.bind(\"xsd\", XSD)\n"
]
},
{
"cell_type": "markdown",
"id": "hidden-purple",
"metadata": {},
"source": [
"SKOS concept scheme"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "christian-paste",
"metadata": {},
"outputs": [],
"source": [
"now = datetime.datetime.today()\n",
"today_date=now.date()\n",
"title=Literal(conf['Texts']['LESSICOCOLLITITLE'], lang=conf['Texts']['LANG'])\n",
"description=Literal(conf['Texts']['LESSICOCOLLIDESCRIPTION'], lang=conf['Texts']['LANG'])\n",
"description_it=Literal(conf['Texts']['LESSICOCOLLIDESCRIPTION_IT'], lang='it')\n",
"identifier=Literal(conf['Texts']['LESSICOCOLLIID'], lang=conf['Texts']['LANG'])\n",
"#identifier=URIRef(conf['Texts']['VOCABULARYID'])\n",
"createddate= Literal(conf['Texts']['LESSICOCREATEDATE'],datatype=XSD.date)\n",
"moddate= Literal(today_date,datatype=XSD.date)\n",
"version= Literal(conf['Texts']['LESSICOVERSION'],datatype=XSD.string)\n",
"\n",
"cl_collirdf.add((pltextile[''], RDF.type, SKOS.ConceptScheme))\n",
"cl_collirdf.add((pltextile[''], DC.title, title))\n",
"cl_collirdf.add((pltextile[''], DC.identifier, identifier))\n",
"cl_collirdf.add((pltextile[''], DC.description, description))\n",
"cl_collirdf.add((pltextile[''], DC.description, description_it))\n",
"cl_collirdf.add((pltextile[''], dct.created, createddate))\n",
"cl_collirdf.add((pltextile[''], dct.modified, moddate))\n",
"cl_collirdf.add((pltextile[''], OWL.versionInfo, version))\n",
"cl_collirdf.add((pltextile[''], dct.language, iso369.eng))\n",
"cl_collirdf.add((pltextile[''], dct.language, iso369.es))\n",
"cl_collirdf.add((pltextile[''], dct.language, iso369.fra))\n",
"cl_collirdf.add((pltextile[''], dct.language, iso369.ita))\n",
"cl_collirdf.add((pltextile[''], dct.language, iso369.pt))\n",
"cl_collirdf.add((pltextile[''], dct.language, iso369.ca))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "incorporate-difference",
"metadata": {},
"outputs": [],
"source": [
"# Mapping\n",
"for index, row in df_data_colli.iterrows():\n",
" \n",
" strlabel=row.it.split('\\n')[0].split(' (')[0].strip()\n",
" label=strlabel.replace(\" \", \"_\").replace(\"\",\"\")\n",
" #label=URIRef(row.it.split('\\n')[0].split(' (')[0].strip())\n",
" cl_collirdf.add((pltextile[''], SKOS.hasTopConcept, pltextile[label])) \n",
" frlabel=Literal(row[\"fr\"].split('\\n')[0].strip(), lang='fr')\n",
" fraltlabels=row[\"fr\"].split('\\n')[1:]\n",
" itlabel=Literal(row['it'].split('\\n')[0].strip(), lang='it')\n",
" italtlabels=row[\"it\"].split('\\n')[1:] \n",
" calabel=Literal(row['ca'].split('\\n')[0].strip(), lang='ca')\n",
" caaltlabels=row[\"ca\"].split('\\n')[1:]\n",
" eslabel=Literal(row['es'].split('\\n')[0].strip(), lang='es')\n",
" esaltlabels=row[\"es\"].split('\\n')[1:]\n",
" #gllabel=Literal(row['gl'].split('\\n')[0].strip(), lang='gl')\n",
" #glaltlabels=row[\"gl\"].split('\\n')[1:]\n",
" ptlabel=Literal(row['pt'].split('\\n')[0].strip(), lang='pt')\n",
" ptaltlabels=row[\"pt\"].split('\\n')[1:]\n",
"# rolabel=Literal(row['ro'].split('\\n')[0].strip(), lang='ro')\n",
"# roaltlabels=row[\"ro\"].split('\\n')[1:]\n",
" enlabel=Literal(row['en'].split('\\n')[0].strip(), lang='en')\n",
" enaltlabels=row[\"en\"].split('\\n')[1:]\n",
" \n",
" esarglabel=Literal(row['es-arg'].split('\\n')[0].strip(), lang='es-ar')\n",
" esargaltlabels=row[\"es-arg\"].split('\\n')[1:]\n",
" \n",
"\n",
" esmexlabel=Literal(row['es-mex'].split('\\n')[0].strip(), lang='es-mx')\n",
" esmexaltlabels=row[\"es-mex\"].split('\\n')[1:]\n",
" ptbrlabel=Literal(row['pt-br'].split('\\n')[0].strip(), lang='pt-br')\n",
" ptbraltlabels=row[\"pt-br\"].split('\\n')[1:]\n",
" \n",
" #definition\n",
" itdef=Literal(row[\"DEF\"].strip(), lang='it')\n",
" #DOI as rdfs:seeAlso\n",
" imagedoi=row[\"DOI\"].strip()\n",
" \n",
" cl_collirdf.add((pltextile[label], RDF.type, SKOS.Concept))\n",
" cl_collirdf.add((pltextile[label], SKOS.inScheme, pltextile['']))\n",
" cl_collirdf.add((pltextile[label], SKOS.topConceptOf, pltextile['']))\n",
" if (imagedoi!=''):\n",
" cl_collirdf.add((pltextile[label], RDFS.seeAlso, URIRef('https://doi.org/'+imagedoi)))\n",
" \n",
" for alab in esargaltlabels:\n",
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-ar')))\n",
" \n",
" \n",
" for alab in esmexaltlabels:\n",
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-mx')))\n",
" \n",
" for alab in ptbraltlabels:\n",
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt-br')))\n",
" \n",
" for alab in esaltlabels:\n",
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es')))\n",
" \n",
"# for alab in glaltlabels:\n",
"# cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='gl')))\n",
" \n",
" for alab in ptaltlabels:\n",
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt')))\n",
" \n",
"# for alab in roaltlabels:\n",
"# cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ro')))\n",
" \n",
" for alab in enaltlabels:\n",
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='en')))\n",
" \n",
" for alab in caaltlabels:\n",
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ca')))\n",
" \n",
" for alab in fraltlabels:\n",
" #print (\"tt \"+alab)\n",
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='fr')))\n",
" for alab in italtlabels:\n",
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='it')))\n",
" \n",
" \n",
" if(frlabel):\n",
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, frlabel))\n",
" if(itlabel):\n",
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, itlabel))\n",
"# if(gllabel):\n",
"# cl_collirdf.add((pltextile[label], SKOS.prefLabel, gllabel))\n",
" \n",
" if(ptlabel):\n",
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, ptlabel))\n",
"# if(rolabel):\n",
"# cl_collirdf.add((pltextile[label], SKOS.prefLabel, rolabel))\n",
" if(enlabel):\n",
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, enlabel))\n",
" \n",
" if(calabel): \n",
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, calabel))\n",
" if(eslabel): \n",
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, eslabel))\n",
" if(esarglabel):\n",
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, esarglabel))\n",
" \n",
"\n",
" if(esmexlabel):\n",
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, esmexlabel))\n",
" if(ptbrlabel):\n",
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, ptbrlabel))\n",
" \n",
" if (itdef):\n",
" cl_collirdf.add((pltextile[label], SKOS.definition, itdef))\n",
"\n",
"print(len(cl_collirdf))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "applicable-commissioner",
"metadata": {},
"outputs": [],
"source": [
"cl_collirdf.serialize(destination='data/lexpanlatcolliskos_11.ttl', format=\"n3\");#format=\"pretty-xml\")\n",
"cl_collirdf.serialize(destination='data/lexpanlatcolliskos_11.rdf', format=\"pretty-xml\");#format=\"pretty-xml\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "limiting-duration",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}