830 lines
32 KiB
Plaintext
830 lines
32 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "lightweight-detroit",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Mapping *Pan-Latin Textile Fibres Vocabulary* from spreadsheet to SKOS resources\n",
|
||
"\n",
|
||
"This Notebook implements a simple parser used to transform the Pan-Latin Textile Fibres Vocabulary, developed within the Realiter network, and published as spreadsheets, into SKOS resources. The parser reads the spreadsheets and transforms the content in SKOS data following a set of mapping rules, the result is stored in two Turtle files.\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "modified-vegetarian",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import rdflib\n",
|
||
"import itertools\n",
|
||
"import yaml\n",
|
||
"import datetime"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "hundred-singles",
|
||
"metadata": {},
|
||
"source": [
|
||
"The file *config.yaml* contains the external information used in the parsing, including the position of the spreadsheets. Set the correct values before running the Notebook."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "stupid-lewis",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"try:\n",
|
||
" with open(\"config-lessico.yaml\", 'r') as stream:\n",
|
||
" try:\n",
|
||
" conf=yaml.safe_load(stream)\n",
|
||
" except yaml.YAMLError as exc:\n",
|
||
" print(exc)\n",
|
||
"except FileNotFoundError:\n",
|
||
" print('Warning config.yaml file not present! Please store it in the same directory as the notebook')\n",
|
||
"#print (conf)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "generic-thong",
|
||
"metadata": {},
|
||
"source": [
|
||
"The following cells defines the *Namespaces* used in the parsing"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "oriental-structure",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from rdflib.namespace import DC, DCAT, DCTERMS, OWL, \\\n",
|
||
" RDF, RDFS, SKOS, \\\n",
|
||
" XMLNS, XSD, XMLNS\n",
|
||
"from rdflib import Namespace\n",
|
||
"from rdflib import URIRef, BNode, Literal\n",
|
||
"\n",
|
||
"pltextile = Namespace(conf['Namespaces']['TEXTILETERM'])\n",
|
||
"dc11=Namespace(\"http://purl.org/dc/elements/1.1/\");\n",
|
||
"dct = Namespace(\"http://purl.org/dc/terms/\")\n",
|
||
"iso369=Namespace(\"http://id.loc.gov/vocabulary/iso639-3\");"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "sacred-shopper",
|
||
"metadata": {},
|
||
"source": [
|
||
"Download **Lessico** spreadsheet and show it to check if the operation has been executed correctly"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "systematic-saudi",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"url=conf['Source']['LESSICOSOURCE']\n",
|
||
"df_data=pd.read_csv(url)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "sunrise-reunion",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_data.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "native-judges",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_data.rename(columns = {'es [ARG]': 'es-arg', 'es [MEX]': 'es-mex', 'fr [CA]': 'fr-ca'}, inplace = True)\n",
|
||
"#df_data.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "united-samoa",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_data.iloc[0].it.split('\\n')[0].split(' ')[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "indonesian-curtis",
|
||
"metadata": {},
|
||
"source": [
|
||
"Create a graph for the SKOS data and bind the namespaces to it"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "parallel-bible",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"c1rdf = rdflib.Graph()\n",
|
||
"c1rdf.bind(\"pltextile\", pltextile)\n",
|
||
"c1rdf.bind(\"dc11\", dc11)\n",
|
||
"c1rdf.bind(\"dct\", dct)\n",
|
||
"c1rdf.bind(\"iso369-3\", iso369)\n",
|
||
"c1rdf.bind(\"skos\", SKOS)\n",
|
||
"c1rdf.bind(\"dc\", DC)\n",
|
||
"c1rdf.bind(\"rdf\", RDF)\n",
|
||
"c1rdf.bind(\"rdfs\", RDFS)\n",
|
||
"c1rdf.bind(\"owl\", OWL)\n",
|
||
"c1rdf.bind(\"xsd\", XSD)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "quantitative-integer",
|
||
"metadata": {},
|
||
"source": [
|
||
"Insert in the graph the *SKOS.ConceptScheme*"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "protective-anxiety",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"now = datetime.datetime.today()\n",
|
||
"today_date=now.date()\n",
|
||
"title=Literal(conf['Texts']['LESSICOTITLE'], lang=conf['Texts']['LANG'])\n",
|
||
"description=Literal(conf['Texts']['LESSICODESCRIPTION'], lang=conf['Texts']['LANG'])\n",
|
||
"description_it=Literal(conf['Texts']['LESSICODESCRIPTION_IT'], lang='it')\n",
|
||
"identifier=Literal(conf['Texts']['LESSICOID'], lang=conf['Texts']['LANG'])\n",
|
||
"#identifier=URIRef(conf['Texts']['VOCABULARYID'])\n",
|
||
"createddate= Literal(conf['Texts']['LESSICOCREATEDATE'],datatype=XSD.date)\n",
|
||
"moddate= Literal(today_date,datatype=XSD.date)\n",
|
||
"version= Literal(conf['Texts']['LESSICOVERSION'],datatype=XSD.string)\n",
|
||
"\n",
|
||
"c1rdf.add((pltextile[''], RDF.type, SKOS.ConceptScheme))\n",
|
||
"c1rdf.add((pltextile[''], DC.title, title))\n",
|
||
"c1rdf.add((pltextile[''], DC.identifier, identifier))\n",
|
||
"c1rdf.add((pltextile[''], DC.description, description))\n",
|
||
"c1rdf.add((pltextile[''], DC.description, description_it))\n",
|
||
"c1rdf.add((pltextile[''], dct.created, createddate))\n",
|
||
"c1rdf.add((pltextile[''], dct.modified, moddate))\n",
|
||
"c1rdf.add((pltextile[''], OWL.versionInfo, version))\n",
|
||
"c1rdf.add((pltextile[''], dct.language, iso369.eng))\n",
|
||
"c1rdf.add((pltextile[''], dct.language, iso369.es))\n",
|
||
"c1rdf.add((pltextile[''], dct.language, iso369.fra))\n",
|
||
"c1rdf.add((pltextile[''], dct.language, iso369.gl))\n",
|
||
"c1rdf.add((pltextile[''], dct.language, iso369.ita))\n",
|
||
"c1rdf.add((pltextile[''], dct.language, iso369.ro))\n",
|
||
"c1rdf.add((pltextile[''], dct.language, iso369.pt))\n",
|
||
"c1rdf.add((pltextile[''], dct.language, iso369.ca))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "vertical-election",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#c1rdf.serialize(destination='data/skostest.rdf', format=\"n3\");#format=\"pretty-xml\")\n",
|
||
"#comrdf.serialize(destination='data/parsed_rdf/prima_cantica_forme_com.rdf', format=\"n3\");\n",
|
||
"df_data.fillna('', inplace=True)\n",
|
||
"df_data.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "assigned-beijing",
|
||
"metadata": {},
|
||
"source": [
|
||
"The following cell implements the mapping rules for creating SKOS resources."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "typical-prompt",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#df_data.iloc[0].it.split('\\n')[0].split(' ')[0]\n",
|
||
"for index, row in df_data.iterrows():\n",
|
||
" \n",
|
||
" strlabel=row.it.split('\\n')[0].split(' (')[0].strip()\n",
|
||
" label=strlabel.replace(\" \", \"_\")\n",
|
||
" #label=URIRef(row.it.split('\\n')[0].split(' (')[0].strip())\n",
|
||
" c1rdf.add((pltextile[''], SKOS.hasTopConcept, pltextile[label])) \n",
|
||
" frlabel=Literal(row[\"fr\"].split('\\n')[0].strip(), lang='fr')\n",
|
||
" fraltlabels=row[\"fr\"].split('\\n')[1:]\n",
|
||
" itlabel=Literal(row['it'].split('\\n')[0].strip(), lang='it')\n",
|
||
" italtlabels=row[\"it\"].split('\\n')[1:] \n",
|
||
" calabel=Literal(row['ca'].split('\\n')[0].strip(), lang='ca')\n",
|
||
" caaltlabels=row[\"ca\"].split('\\n')[1:]\n",
|
||
" eslabel=Literal(row['es'].split('\\n')[0].strip(), lang='es')\n",
|
||
" esaltlabels=row[\"es\"].split('\\n')[1:]\n",
|
||
" gllabel=Literal(row['gl'].split('\\n')[0].strip(), lang='gl')\n",
|
||
" glaltlabels=row[\"gl\"].split('\\n')[1:]\n",
|
||
" ptlabel=Literal(row['pt'].split('\\n')[0].strip(), lang='pt')\n",
|
||
" ptaltlabels=row[\"pt\"].split('\\n')[1:]\n",
|
||
" rolabel=Literal(row['ro'].split('\\n')[0].strip(), lang='ro')\n",
|
||
" roaltlabels=row[\"ro\"].split('\\n')[1:]\n",
|
||
" enlabel=Literal(row['en'].split('\\n')[0].strip(), lang='en')\n",
|
||
" enaltlabels=row[\"en\"].split('\\n')[1:]\n",
|
||
" \n",
|
||
" esarglabel=Literal(row['es-arg'].split('\\n')[0].strip(), lang='es-ar')\n",
|
||
" esargaltlabels=row[\"es-arg\"].split('\\n')[1:]\n",
|
||
" #es-arg-mex\n",
|
||
"# esargmexarglabel=Literal(row['es-arg-mex'].split('\\n')[0].strip(), lang='es-ar')\n",
|
||
"# esargmexmexlabel=Literal(row['es-arg-mex'].split('\\n')[0].strip(), lang='es-mx')\n",
|
||
"# esargmexaltlabels=row[\"es-arg-mex\"].split('\\n')[1:]\n",
|
||
" \n",
|
||
" esmexlabel=Literal(row['es-mex'].split('\\n')[0].strip(), lang='es-mx')\n",
|
||
" esmexaltlabels=row[\"es-mex\"].split('\\n')[1:]\n",
|
||
" frcalabel=Literal(row['fr-ca'].split('\\n')[0].strip(), lang='fr-ca')\n",
|
||
" frcaaltlabels=row[\"fr-ca\"].split('\\n')[1:]\n",
|
||
" \n",
|
||
" #definition\n",
|
||
" itdef=Literal(row[\"DEF\"].strip(), lang='it')\n",
|
||
" \n",
|
||
" \n",
|
||
" \n",
|
||
" c1rdf.add((pltextile[label], RDF.type, SKOS.Concept))\n",
|
||
" c1rdf.add((pltextile[label], SKOS.inScheme, pltextile['']))\n",
|
||
" c1rdf.add((pltextile[label], SKOS.topConceptOf, pltextile['']))\n",
|
||
" \n",
|
||
" for alab in esargaltlabels:\n",
|
||
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-ar')))\n",
|
||
" \n",
|
||
"# for alab in esargmexaltlabels:\n",
|
||
"# c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-ar')))\n",
|
||
"# c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-mx')))\n",
|
||
" \n",
|
||
" for alab in esmexaltlabels:\n",
|
||
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-mx')))\n",
|
||
" \n",
|
||
" for alab in frcaaltlabels:\n",
|
||
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='fr-ca')))\n",
|
||
" \n",
|
||
" for alab in esaltlabels:\n",
|
||
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es')))\n",
|
||
" \n",
|
||
" for alab in glaltlabels:\n",
|
||
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='gl')))\n",
|
||
" \n",
|
||
" for alab in ptaltlabels:\n",
|
||
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt')))\n",
|
||
" \n",
|
||
" for alab in roaltlabels:\n",
|
||
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ro')))\n",
|
||
" \n",
|
||
" for alab in enaltlabels:\n",
|
||
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='en')))\n",
|
||
" \n",
|
||
" for alab in caaltlabels:\n",
|
||
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ca')))\n",
|
||
" \n",
|
||
" for alab in fraltlabels:\n",
|
||
" #print (\"tt \"+alab)\n",
|
||
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='fr')))\n",
|
||
" for alab in italtlabels:\n",
|
||
" c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='it')))\n",
|
||
" \n",
|
||
" \n",
|
||
" if(frlabel):\n",
|
||
" c1rdf.add((pltextile[label], SKOS.prefLabel, frlabel))\n",
|
||
" if(itlabel):\n",
|
||
" c1rdf.add((pltextile[label], SKOS.prefLabel, itlabel))\n",
|
||
" if(gllabel):\n",
|
||
" c1rdf.add((pltextile[label], SKOS.prefLabel, gllabel))\n",
|
||
" \n",
|
||
" if(ptlabel):\n",
|
||
" c1rdf.add((pltextile[label], SKOS.prefLabel, ptlabel))\n",
|
||
" if(rolabel):\n",
|
||
" c1rdf.add((pltextile[label], SKOS.prefLabel, rolabel))\n",
|
||
" if(enlabel):\n",
|
||
" c1rdf.add((pltextile[label], SKOS.prefLabel, enlabel))\n",
|
||
" \n",
|
||
" if(calabel): \n",
|
||
" c1rdf.add((pltextile[label], SKOS.prefLabel, calabel))\n",
|
||
" if(eslabel): \n",
|
||
" c1rdf.add((pltextile[label], SKOS.prefLabel, eslabel))\n",
|
||
" if(esarglabel):\n",
|
||
" c1rdf.add((pltextile[label], SKOS.prefLabel, esarglabel))\n",
|
||
" \n",
|
||
"# if(esargmexarglabel):\n",
|
||
"# c1rdf.add((pltextile[label], SKOS.prefLabel, esargmexarglabel))\n",
|
||
"# c1rdf.add((pltextile[label], SKOS.prefLabel, esargmexmexlabel))\n",
|
||
" \n",
|
||
" if(esmexlabel):\n",
|
||
" c1rdf.add((pltextile[label], SKOS.prefLabel, esmexlabel))\n",
|
||
" if(frcalabel):\n",
|
||
" c1rdf.add((pltextile[label], SKOS.prefLabel, frcalabel))\n",
|
||
" \n",
|
||
" if (itdef):\n",
|
||
" c1rdf.add((pltextile[label], SKOS.definition, itdef))\n",
|
||
"\n",
|
||
"print(len(c1rdf))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "answering-latino",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# for s, p, o in c1rdf.triples((None, None, None)):\n",
|
||
"# print(\"{} {}\".format(s, o.n3))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "quality-scratch",
|
||
"metadata": {},
|
||
"source": [
|
||
"Create a *Turtle* file in the **/data** directory with the SKOS resources for **Data Stewardship terminology** "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "equal-voice",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"c1rdf.serialize(destination='data/lexpanlatskos_11.ttl', format=\"n3\");#format=\"pretty-xml\")\n",
|
||
"c1rdf.serialize(destination='data/lexpanlatskos_11.rdf', format=\"pretty-xml\");#format=\"pretty-xml\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "selected-enemy",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Lessico panlatino delle Maniche"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "current-material",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"urlma=conf['Source']['LESSICOMANICHESOURCE']\n",
|
||
"df_data_maniche=pd.read_csv(urlma)\n",
|
||
"df_data_maniche.rename(columns = {'es [ARG]': 'es-arg', 'es [MEX]': 'es-mex', 'pt [BR]': 'pt-br'}, inplace = True)\n",
|
||
"df_data_maniche.fillna('', inplace=True)\n",
|
||
"#df_data_maniche.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "incorporated-creature",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"cl_manicherdf = rdflib.Graph()\n",
|
||
"cl_manicherdf.bind(\"pltextile\", pltextile)\n",
|
||
"cl_manicherdf.bind(\"dc11\", dc11)\n",
|
||
"cl_manicherdf.bind(\"dct\", dct)\n",
|
||
"cl_manicherdf.bind(\"iso369-3\", iso369)\n",
|
||
"cl_manicherdf.bind(\"skos\", SKOS)\n",
|
||
"cl_manicherdf.bind(\"dc\", DC)\n",
|
||
"cl_manicherdf.bind(\"rdf\", RDF)\n",
|
||
"cl_manicherdf.bind(\"rdfs\", RDFS)\n",
|
||
"cl_manicherdf.bind(\"owl\", OWL)\n",
|
||
"cl_manicherdf.bind(\"xsd\", XSD)\n",
|
||
"now = datetime.datetime.today()\n",
|
||
"today_date=now.date()\n",
|
||
"title=Literal(conf['Texts']['LESSICOMANICHETITLE'], lang=conf['Texts']['LANG'])\n",
|
||
"description=Literal(conf['Texts']['LESSICOMANICHEDESCRIPTION'], lang=conf['Texts']['LANG'])\n",
|
||
"description_it=Literal(conf['Texts']['LESSICOMANICHEDESCRIPTION_IT'], lang='it')\n",
|
||
"identifier=Literal(conf['Texts']['LESSICOMANICHEID'], lang=conf['Texts']['LANG'])\n",
|
||
"#identifier=URIRef(conf['Texts']['VOCABULARYID'])\n",
|
||
"createddate= Literal(conf['Texts']['LESSICOCREATEDATE'],datatype=XSD.date)\n",
|
||
"moddate= Literal(today_date,datatype=XSD.date)\n",
|
||
"version= Literal(conf['Texts']['LESSICOVERSION'],datatype=XSD.string)\n",
|
||
"\n",
|
||
"cl_manicherdf.add((pltextile[''], RDF.type, SKOS.ConceptScheme))\n",
|
||
"cl_manicherdf.add((pltextile[''], DC.title, title))\n",
|
||
"cl_manicherdf.add((pltextile[''], DC.identifier, identifier))\n",
|
||
"cl_manicherdf.add((pltextile[''], DC.description, description))\n",
|
||
"cl_manicherdf.add((pltextile[''], DC.description, description_it))\n",
|
||
"cl_manicherdf.add((pltextile[''], dct.created, createddate))\n",
|
||
"cl_manicherdf.add((pltextile[''], dct.modified, moddate))\n",
|
||
"cl_manicherdf.add((pltextile[''], OWL.versionInfo, version))\n",
|
||
"cl_manicherdf.add((pltextile[''], dct.language, iso369.eng))\n",
|
||
"cl_manicherdf.add((pltextile[''], dct.language, iso369.es))\n",
|
||
"cl_manicherdf.add((pltextile[''], dct.language, iso369.fra))\n",
|
||
"cl_manicherdf.add((pltextile[''], dct.language, iso369.ca))\n",
|
||
"cl_manicherdf.add((pltextile[''], dct.language, iso369.ita))\n",
|
||
"cl_manicherdf.add((pltextile[''], dct.language, iso369.pt))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "modular-realtor",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Mapping\n",
|
||
"for index, row in df_data_maniche.iterrows():\n",
|
||
" \n",
|
||
" strlabel=row.it.split('\\n')[0].split('(')[0].strip()\n",
|
||
" label=strlabel.replace(\" \", \"_\").replace(\"’\",\"\").replace(\"'\",\"\").strip()\n",
|
||
" #label=URIRef(row.it.split('\\n')[0].split(' (')[0].strip())\n",
|
||
" cl_manicherdf.add((pltextile[''], SKOS.hasTopConcept, pltextile[label])) \n",
|
||
" frlabel=Literal(row[\"fr\"].split('\\n')[0].strip(), lang='fr')\n",
|
||
" fraltlabels=row[\"fr\"].split('\\n')[1:]\n",
|
||
" itlabel=Literal(row['it'].split('\\n')[0].strip(), lang='it')\n",
|
||
" italtlabels=row[\"it\"].split('\\n')[1:] \n",
|
||
" calabel=Literal(row['ca'].split('\\n')[0].strip(), lang='ca')\n",
|
||
" caaltlabels=row[\"ca\"].split('\\n')[1:]\n",
|
||
" eslabel=Literal(row['es'].split('\\n')[0].strip(), lang='es')\n",
|
||
" esaltlabels=row[\"es\"].split('\\n')[1:]\n",
|
||
" #gllabel=Literal(row['gl'].split('\\n')[0].strip(), lang='gl')\n",
|
||
" #glaltlabels=row[\"gl\"].split('\\n')[1:]\n",
|
||
" ptlabel=Literal(row['pt'].split('\\n')[0].strip(), lang='pt')\n",
|
||
" ptaltlabels=row[\"pt\"].split('\\n')[1:]\n",
|
||
"# rolabel=Literal(row['ro'].split('\\n')[0].strip(), lang='ro')\n",
|
||
"# roaltlabels=row[\"ro\"].split('\\n')[1:]\n",
|
||
" enlabel=Literal(row['en'].split('\\n')[0].strip(), lang='en')\n",
|
||
" enaltlabels=row[\"en\"].split('\\n')[1:]\n",
|
||
" \n",
|
||
" esarglabel=Literal(row['es-arg'].split('\\n')[0].strip(), lang='es-ar')\n",
|
||
" esargaltlabels=row[\"es-arg\"].split('\\n')[1:]\n",
|
||
" \n",
|
||
"\n",
|
||
" esmexlabel=Literal(row['es-mex'].split('\\n')[0].strip(), lang='es-mx')\n",
|
||
" esmexaltlabels=row[\"es-mex\"].split('\\n')[1:]\n",
|
||
" ptbrlabel=Literal(row['pt-br'].split('\\n')[0].strip(), lang='pt-br')\n",
|
||
" ptbraltlabels=row[\"pt-br\"].split('\\n')[1:]\n",
|
||
" \n",
|
||
" #definition\n",
|
||
" itdef=Literal(row[\"DEF\"].strip(), lang='it')\n",
|
||
" #DOI as rdfs:seeAlso\n",
|
||
" imagedoi=row[\"DOI\"].strip()\n",
|
||
" \n",
|
||
" cl_manicherdf.add((pltextile[label], RDF.type, SKOS.Concept))\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.inScheme, pltextile['']))\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.topConceptOf, pltextile['']))\n",
|
||
" if (imagedoi!=''):\n",
|
||
" cl_manicherdf.add((pltextile[label], RDFS.seeAlso, URIRef('https://doi.org/'+imagedoi)))\n",
|
||
" \n",
|
||
" for alab in esargaltlabels:\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-ar')))\n",
|
||
" \n",
|
||
" \n",
|
||
" for alab in esmexaltlabels:\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-mx')))\n",
|
||
" \n",
|
||
" for alab in ptbraltlabels:\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt-br')))\n",
|
||
" \n",
|
||
" for alab in esaltlabels:\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es')))\n",
|
||
" \n",
|
||
"# for alab in glaltlabels:\n",
|
||
"# cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='gl')))\n",
|
||
" \n",
|
||
" for alab in ptaltlabels:\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt')))\n",
|
||
" \n",
|
||
"# for alab in roaltlabels:\n",
|
||
"# cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ro')))\n",
|
||
" \n",
|
||
" for alab in enaltlabels:\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='en')))\n",
|
||
" \n",
|
||
" for alab in caaltlabels:\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ca')))\n",
|
||
" \n",
|
||
" for alab in fraltlabels:\n",
|
||
" #print (\"tt \"+alab)\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='fr')))\n",
|
||
" for alab in italtlabels:\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='it')))\n",
|
||
" \n",
|
||
" \n",
|
||
" if(frlabel):\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, frlabel))\n",
|
||
" if(itlabel):\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, itlabel))\n",
|
||
"# if(gllabel):\n",
|
||
"# cl_manicherdf.add((pltextile[label], SKOS.prefLabel, gllabel))\n",
|
||
" \n",
|
||
" if(ptlabel):\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, ptlabel))\n",
|
||
"# if(rolabel):\n",
|
||
"# cl_manicherdf.add((pltextile[label], SKOS.prefLabel, rolabel))\n",
|
||
" if(enlabel):\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, enlabel))\n",
|
||
" \n",
|
||
" if(calabel): \n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, calabel))\n",
|
||
" if(eslabel): \n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, eslabel))\n",
|
||
" if(esarglabel):\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, esarglabel))\n",
|
||
" \n",
|
||
"\n",
|
||
" if(esmexlabel):\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, esmexlabel))\n",
|
||
" if(ptbrlabel):\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.prefLabel, ptbrlabel))\n",
|
||
" \n",
|
||
" if (itdef):\n",
|
||
" cl_manicherdf.add((pltextile[label], SKOS.definition, itdef))\n",
|
||
"\n",
|
||
"print(len(cl_manicherdf))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "matched-mustang",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"cl_manicherdf.serialize(destination='data/lexpanlatmanicheskos_11.ttl', format=\"n3\");#format=\"pretty-xml\")\n",
|
||
"cl_manicherdf.serialize(destination='data/lexpanlatmanicheskos_11.rdf', format=\"pretty-xml\");#format=\"pretty-xml\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "talented-making",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Lessico panlatino dei Colli"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "centered-advantage",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"urlco=conf['Source']['LESSICOCOLLISOURCE']\n",
|
||
"df_data_colli=pd.read_csv(urlco)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "desperate-uruguay",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_data_colli.rename(columns = {'es [ARG]': 'es-arg', 'es [MEX]': 'es-mex', 'pt [BR]': 'pt-br'}, inplace = True)\n",
|
||
"df_data_colli.fillna('', inplace=True)\n",
|
||
"#df_data_colli.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "magnetic-stake",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"cl_collirdf = rdflib.Graph()\n",
|
||
"cl_collirdf.bind(\"pltextile\", pltextile)\n",
|
||
"cl_collirdf.bind(\"dc11\", dc11)\n",
|
||
"cl_collirdf.bind(\"dct\", dct)\n",
|
||
"cl_collirdf.bind(\"iso369-3\", iso369)\n",
|
||
"cl_collirdf.bind(\"skos\", SKOS)\n",
|
||
"cl_collirdf.bind(\"dc\", DC)\n",
|
||
"cl_collirdf.bind(\"rdf\", RDF)\n",
|
||
"cl_collirdf.bind(\"owl\", OWL)\n",
|
||
"cl_collirdf.bind(\"xsd\", XSD)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "hidden-purple",
|
||
"metadata": {},
|
||
"source": [
|
||
"SKOS concept scheme"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "christian-paste",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"now = datetime.datetime.today()\n",
|
||
"today_date=now.date()\n",
|
||
"title=Literal(conf['Texts']['LESSICOCOLLITITLE'], lang=conf['Texts']['LANG'])\n",
|
||
"description=Literal(conf['Texts']['LESSICOCOLLIDESCRIPTION'], lang=conf['Texts']['LANG'])\n",
|
||
"description_it=Literal(conf['Texts']['LESSICOCOLLIDESCRIPTION_IT'], lang='it')\n",
|
||
"identifier=Literal(conf['Texts']['LESSICOCOLLIID'], lang=conf['Texts']['LANG'])\n",
|
||
"#identifier=URIRef(conf['Texts']['VOCABULARYID'])\n",
|
||
"createddate= Literal(conf['Texts']['LESSICOCREATEDATE'],datatype=XSD.date)\n",
|
||
"moddate= Literal(today_date,datatype=XSD.date)\n",
|
||
"version= Literal(conf['Texts']['LESSICOVERSION'],datatype=XSD.string)\n",
|
||
"\n",
|
||
"cl_collirdf.add((pltextile[''], RDF.type, SKOS.ConceptScheme))\n",
|
||
"cl_collirdf.add((pltextile[''], DC.title, title))\n",
|
||
"cl_collirdf.add((pltextile[''], DC.identifier, identifier))\n",
|
||
"cl_collirdf.add((pltextile[''], DC.description, description))\n",
|
||
"cl_collirdf.add((pltextile[''], DC.description, description_it))\n",
|
||
"cl_collirdf.add((pltextile[''], dct.created, createddate))\n",
|
||
"cl_collirdf.add((pltextile[''], dct.modified, moddate))\n",
|
||
"cl_collirdf.add((pltextile[''], OWL.versionInfo, version))\n",
|
||
"cl_collirdf.add((pltextile[''], dct.language, iso369.eng))\n",
|
||
"cl_collirdf.add((pltextile[''], dct.language, iso369.es))\n",
|
||
"cl_collirdf.add((pltextile[''], dct.language, iso369.fra))\n",
|
||
"cl_collirdf.add((pltextile[''], dct.language, iso369.ita))\n",
|
||
"cl_collirdf.add((pltextile[''], dct.language, iso369.pt))\n",
|
||
"cl_collirdf.add((pltextile[''], dct.language, iso369.ca))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "incorporate-difference",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Mapping\n",
|
||
"for index, row in df_data_colli.iterrows():\n",
|
||
" \n",
|
||
" strlabel=row.it.split('\\n')[0].split(' (')[0].strip()\n",
|
||
" label=strlabel.replace(\" \", \"_\").replace(\"’\",\"\")\n",
|
||
" #label=URIRef(row.it.split('\\n')[0].split(' (')[0].strip())\n",
|
||
" cl_collirdf.add((pltextile[''], SKOS.hasTopConcept, pltextile[label])) \n",
|
||
" frlabel=Literal(row[\"fr\"].split('\\n')[0].strip(), lang='fr')\n",
|
||
" fraltlabels=row[\"fr\"].split('\\n')[1:]\n",
|
||
" itlabel=Literal(row['it'].split('\\n')[0].strip(), lang='it')\n",
|
||
" italtlabels=row[\"it\"].split('\\n')[1:] \n",
|
||
" calabel=Literal(row['ca'].split('\\n')[0].strip(), lang='ca')\n",
|
||
" caaltlabels=row[\"ca\"].split('\\n')[1:]\n",
|
||
" eslabel=Literal(row['es'].split('\\n')[0].strip(), lang='es')\n",
|
||
" esaltlabels=row[\"es\"].split('\\n')[1:]\n",
|
||
" #gllabel=Literal(row['gl'].split('\\n')[0].strip(), lang='gl')\n",
|
||
" #glaltlabels=row[\"gl\"].split('\\n')[1:]\n",
|
||
" ptlabel=Literal(row['pt'].split('\\n')[0].strip(), lang='pt')\n",
|
||
" ptaltlabels=row[\"pt\"].split('\\n')[1:]\n",
|
||
"# rolabel=Literal(row['ro'].split('\\n')[0].strip(), lang='ro')\n",
|
||
"# roaltlabels=row[\"ro\"].split('\\n')[1:]\n",
|
||
" enlabel=Literal(row['en'].split('\\n')[0].strip(), lang='en')\n",
|
||
" enaltlabels=row[\"en\"].split('\\n')[1:]\n",
|
||
" \n",
|
||
" esarglabel=Literal(row['es-arg'].split('\\n')[0].strip(), lang='es-ar')\n",
|
||
" esargaltlabels=row[\"es-arg\"].split('\\n')[1:]\n",
|
||
" \n",
|
||
"\n",
|
||
" esmexlabel=Literal(row['es-mex'].split('\\n')[0].strip(), lang='es-mx')\n",
|
||
" esmexaltlabels=row[\"es-mex\"].split('\\n')[1:]\n",
|
||
" ptbrlabel=Literal(row['pt-br'].split('\\n')[0].strip(), lang='pt-br')\n",
|
||
" ptbraltlabels=row[\"pt-br\"].split('\\n')[1:]\n",
|
||
" \n",
|
||
" #definition\n",
|
||
" itdef=Literal(row[\"DEF\"].strip(), lang='it')\n",
|
||
" #DOI as rdfs:seeAlso\n",
|
||
" imagedoi=row[\"DOI\"].strip()\n",
|
||
" \n",
|
||
" cl_collirdf.add((pltextile[label], RDF.type, SKOS.Concept))\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.inScheme, pltextile['']))\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.topConceptOf, pltextile['']))\n",
|
||
" if (imagedoi!=''):\n",
|
||
" cl_collirdf.add((pltextile[label], RDFS.seeAlso, URIRef('https://doi.org/'+imagedoi)))\n",
|
||
" \n",
|
||
" for alab in esargaltlabels:\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-ar')))\n",
|
||
" \n",
|
||
" \n",
|
||
" for alab in esmexaltlabels:\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-mx')))\n",
|
||
" \n",
|
||
" for alab in ptbraltlabels:\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt-br')))\n",
|
||
" \n",
|
||
" for alab in esaltlabels:\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es')))\n",
|
||
" \n",
|
||
"# for alab in glaltlabels:\n",
|
||
"# cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='gl')))\n",
|
||
" \n",
|
||
" for alab in ptaltlabels:\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt')))\n",
|
||
" \n",
|
||
"# for alab in roaltlabels:\n",
|
||
"# cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ro')))\n",
|
||
" \n",
|
||
" for alab in enaltlabels:\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='en')))\n",
|
||
" \n",
|
||
" for alab in caaltlabels:\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ca')))\n",
|
||
" \n",
|
||
" for alab in fraltlabels:\n",
|
||
" #print (\"tt \"+alab)\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='fr')))\n",
|
||
" for alab in italtlabels:\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='it')))\n",
|
||
" \n",
|
||
" \n",
|
||
" if(frlabel):\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, frlabel))\n",
|
||
" if(itlabel):\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, itlabel))\n",
|
||
"# if(gllabel):\n",
|
||
"# cl_collirdf.add((pltextile[label], SKOS.prefLabel, gllabel))\n",
|
||
" \n",
|
||
" if(ptlabel):\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, ptlabel))\n",
|
||
"# if(rolabel):\n",
|
||
"# cl_collirdf.add((pltextile[label], SKOS.prefLabel, rolabel))\n",
|
||
" if(enlabel):\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, enlabel))\n",
|
||
" \n",
|
||
" if(calabel): \n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, calabel))\n",
|
||
" if(eslabel): \n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, eslabel))\n",
|
||
" if(esarglabel):\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, esarglabel))\n",
|
||
" \n",
|
||
"\n",
|
||
" if(esmexlabel):\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, esmexlabel))\n",
|
||
" if(ptbrlabel):\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.prefLabel, ptbrlabel))\n",
|
||
" \n",
|
||
" if (itdef):\n",
|
||
" cl_collirdf.add((pltextile[label], SKOS.definition, itdef))\n",
|
||
"\n",
|
||
"print(len(cl_collirdf))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "applicable-commissioner",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"cl_collirdf.serialize(destination='data/lexpanlatcolliskos_11.ttl', format=\"n3\");#format=\"pretty-xml\")\n",
|
||
"cl_collirdf.serialize(destination='data/lexpanlatcolliskos_11.rdf', format=\"pretty-xml\");#format=\"pretty-xml\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "limiting-duration",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.1"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|