sshoc-skosmapping/sshoc_lessico_panlatino.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "lightweight-detroit",
   "metadata": {},
   "source": [
    "## Mapping *Pan-Latin Textile Fibres Vocabulary* from spreadsheet to SKOS resources\n",
    "\n",
    "This Notebook implements a simple parser used to transform the Pan-Latin Textile Fibres Vocabulary, developed within the Realiter network, and published as spreadsheets, into SKOS resources. The parser reads the spreadsheets and transforms the content in SKOS data following a set of mapping rules, the result is stored in two  Turtle files.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "modified-vegetarian",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import rdflib\n",
    "import itertools\n",
    "import yaml\n",
    "import datetime"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "hundred-singles",
   "metadata": {},
   "source": [
    "The file *config.yaml* contains the external information used in the parsing, including the position of the spreadsheets. Set the correct values before running the Notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "stupid-lewis",
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    with open(\"config-lessico.yaml\", 'r') as stream:\n",
    "        try:\n",
    "           conf=yaml.safe_load(stream)\n",
    "        except yaml.YAMLError as exc:\n",
    "            print(exc)\n",
    "except FileNotFoundError:\n",
    "    print('Warning config.yaml file not present! Please store it in the same directory as the notebook')\n",
    "#print (conf)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "generic-thong",
   "metadata": {},
   "source": [
    "The following cells defines the *Namespaces* used in the parsing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "oriental-structure",
   "metadata": {},
   "outputs": [],
   "source": [
    "from rdflib.namespace import DC, DCAT, DCTERMS, OWL, \\\n",
    "                            RDF, RDFS, SKOS,  \\\n",
    "                           XMLNS, XSD, XMLNS\n",
    "from rdflib import Namespace\n",
    "from rdflib import URIRef, BNode, Literal\n",
    "\n",
    "pltextile = Namespace(conf['Namespaces']['TEXTILETERM'])\n",
    "dc11=Namespace(\"http://purl.org/dc/elements/1.1/\");\n",
    "dct = Namespace(\"http://purl.org/dc/terms/\")\n",
    "iso369=Namespace(\"http://id.loc.gov/vocabulary/iso639-3\");"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "sacred-shopper",
   "metadata": {},
   "source": [
    "Download **Lessico** spreadsheet and show it to check if the operation has been executed correctly"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "systematic-saudi",
   "metadata": {},
   "outputs": [],
   "source": [
    "url=conf['Source']['LESSICOSOURCE']\n",
    "df_data=pd.read_csv(url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "sunrise-reunion",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "native-judges",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_data.rename(columns = {'es [ARG]': 'es-arg', 'es [MEX]': 'es-mex', 'fr [CA]': 'fr-ca'}, inplace = True)\n",
    "#df_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "united-samoa",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_data.iloc[0].it.split('\\n')[0].split(' ')[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "indonesian-curtis",
   "metadata": {},
   "source": [
    "Create a graph for the SKOS data and bind the namespaces to it"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "parallel-bible",
   "metadata": {},
   "outputs": [],
   "source": [
    "c1rdf = rdflib.Graph()\n",
    "c1rdf.bind(\"pltextile\", pltextile)\n",
    "c1rdf.bind(\"dc11\", dc11)\n",
    "c1rdf.bind(\"dct\", dct)\n",
    "c1rdf.bind(\"iso369-3\", iso369)\n",
    "c1rdf.bind(\"skos\", SKOS)\n",
    "c1rdf.bind(\"dc\", DC)\n",
    "c1rdf.bind(\"rdf\", RDF)\n",
    "c1rdf.bind(\"rdfs\", RDFS)\n",
    "c1rdf.bind(\"owl\", OWL)\n",
    "c1rdf.bind(\"xsd\", XSD)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "quantitative-integer",
   "metadata": {},
   "source": [
    "Insert in the graph the *SKOS.ConceptScheme*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "protective-anxiety",
   "metadata": {},
   "outputs": [],
   "source": [
    "now = datetime.datetime.today()\n",
    "today_date=now.date()\n",
    "title=Literal(conf['Texts']['LESSICOTITLE'], lang=conf['Texts']['LANG'])\n",
    "description=Literal(conf['Texts']['LESSICODESCRIPTION'], lang=conf['Texts']['LANG'])\n",
    "description_it=Literal(conf['Texts']['LESSICODESCRIPTION_IT'], lang='it')\n",
    "identifier=Literal(conf['Texts']['LESSICOID'], lang=conf['Texts']['LANG'])\n",
    "#identifier=URIRef(conf['Texts']['VOCABULARYID'])\n",
    "createddate= Literal(conf['Texts']['LESSICOCREATEDATE'],datatype=XSD.date)\n",
    "moddate= Literal(today_date,datatype=XSD.date)\n",
    "version= Literal(conf['Texts']['LESSICOVERSION'],datatype=XSD.string)\n",
    "\n",
    "c1rdf.add((pltextile[''], RDF.type, SKOS.ConceptScheme))\n",
    "c1rdf.add((pltextile[''], DC.title, title))\n",
    "c1rdf.add((pltextile[''], DC.identifier, identifier))\n",
    "c1rdf.add((pltextile[''], DC.description, description))\n",
    "c1rdf.add((pltextile[''], DC.description, description_it))\n",
    "c1rdf.add((pltextile[''], dct.created, createddate))\n",
    "c1rdf.add((pltextile[''], dct.modified, moddate))\n",
    "c1rdf.add((pltextile[''], OWL.versionInfo, version))\n",
    "c1rdf.add((pltextile[''], dct.language, iso369.eng))\n",
    "c1rdf.add((pltextile[''], dct.language, iso369.es))\n",
    "c1rdf.add((pltextile[''], dct.language, iso369.fra))\n",
    "c1rdf.add((pltextile[''], dct.language, iso369.gl))\n",
    "c1rdf.add((pltextile[''], dct.language, iso369.ita))\n",
    "c1rdf.add((pltextile[''], dct.language, iso369.ro))\n",
    "c1rdf.add((pltextile[''], dct.language, iso369.pt))\n",
    "c1rdf.add((pltextile[''], dct.language, iso369.ca))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "vertical-election",
   "metadata": {},
   "outputs": [],
   "source": [
    "#c1rdf.serialize(destination='data/skostest.rdf', format=\"n3\");#format=\"pretty-xml\")\n",
    "#comrdf.serialize(destination='data/parsed_rdf/prima_cantica_forme_com.rdf', format=\"n3\");\n",
    "df_data.fillna('', inplace=True)\n",
    "df_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "assigned-beijing",
   "metadata": {},
   "source": [
    "The following cell implements the mapping rules for creating SKOS resources."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "typical-prompt",
   "metadata": {},
   "outputs": [],
   "source": [
    "#df_data.iloc[0].it.split('\\n')[0].split(' ')[0]\n",
    "for index, row in df_data.iterrows():\n",
    "    \n",
    "    strlabel=row.it.split('\\n')[0].split(' (')[0].strip()\n",
    "    label=strlabel.replace(\" \", \"_\")\n",
    "    #label=URIRef(row.it.split('\\n')[0].split(' (')[0].strip())\n",
    "    c1rdf.add((pltextile[''], SKOS.hasTopConcept, pltextile[label]))    \n",
    "    frlabel=Literal(row[\"fr\"].split('\\n')[0].strip(), lang='fr')\n",
    "    fraltlabels=row[\"fr\"].split('\\n')[1:]\n",
    "    itlabel=Literal(row['it'].split('\\n')[0].strip(), lang='it')\n",
    "    italtlabels=row[\"it\"].split('\\n')[1:]                    \n",
    "    calabel=Literal(row['ca'].split('\\n')[0].strip(), lang='ca')\n",
    "    caaltlabels=row[\"ca\"].split('\\n')[1:]\n",
    "    eslabel=Literal(row['es'].split('\\n')[0].strip(), lang='es')\n",
    "    esaltlabels=row[\"es\"].split('\\n')[1:]\n",
    "    gllabel=Literal(row['gl'].split('\\n')[0].strip(), lang='gl')\n",
    "    glaltlabels=row[\"gl\"].split('\\n')[1:]\n",
    "    ptlabel=Literal(row['pt'].split('\\n')[0].strip(), lang='pt')\n",
    "    ptaltlabels=row[\"pt\"].split('\\n')[1:]\n",
    "    rolabel=Literal(row['ro'].split('\\n')[0].strip(), lang='ro')\n",
    "    roaltlabels=row[\"ro\"].split('\\n')[1:]\n",
    "    enlabel=Literal(row['en'].split('\\n')[0].strip(), lang='en')\n",
    "    enaltlabels=row[\"en\"].split('\\n')[1:]\n",
    "    \n",
    "    esarglabel=Literal(row['es-arg'].split('\\n')[0].strip(), lang='es-ar')\n",
    "    esargaltlabels=row[\"es-arg\"].split('\\n')[1:]\n",
    "    #es-arg-mex\n",
    "#     esargmexarglabel=Literal(row['es-arg-mex'].split('\\n')[0].strip(), lang='es-ar')\n",
    "#     esargmexmexlabel=Literal(row['es-arg-mex'].split('\\n')[0].strip(), lang='es-mx')\n",
    "#     esargmexaltlabels=row[\"es-arg-mex\"].split('\\n')[1:]\n",
    "    \n",
    "    esmexlabel=Literal(row['es-mex'].split('\\n')[0].strip(), lang='es-mx')\n",
    "    esmexaltlabels=row[\"es-mex\"].split('\\n')[1:]\n",
    "    frcalabel=Literal(row['fr-ca'].split('\\n')[0].strip(), lang='fr-ca')\n",
    "    frcaaltlabels=row[\"fr-ca\"].split('\\n')[1:]\n",
    "    \n",
    "    #definition\n",
    "    itdef=Literal(row[\"DEF\"].strip(), lang='it')\n",
    "    \n",
    " \n",
    "    \n",
    "    c1rdf.add((pltextile[label], RDF.type, SKOS.Concept))\n",
    "    c1rdf.add((pltextile[label], SKOS.inScheme, pltextile['']))\n",
    "    c1rdf.add((pltextile[label], SKOS.topConceptOf, pltextile['']))\n",
    "    \n",
    "    for alab in esargaltlabels:\n",
    "        c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-ar')))\n",
    "    \n",
    "#     for alab in esargmexaltlabels:\n",
    "#         c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-ar')))\n",
    "#         c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-mx')))\n",
    "     \n",
    "    for alab in esmexaltlabels:\n",
    "        c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-mx')))\n",
    "        \n",
    "    for alab in frcaaltlabels:\n",
    "        c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='fr-ca')))\n",
    "    \n",
    "    for alab in esaltlabels:\n",
    "        c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es')))\n",
    "    \n",
    "    for alab in glaltlabels:\n",
    "        c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='gl')))\n",
    "    \n",
    "    for alab in ptaltlabels:\n",
    "        c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt')))\n",
    "    \n",
    "    for alab in roaltlabels:\n",
    "        c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ro')))\n",
    "    \n",
    "    for alab in enaltlabels:\n",
    "        c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='en')))\n",
    "        \n",
    "    for alab in caaltlabels:\n",
    "        c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ca')))\n",
    "        \n",
    "    for alab in fraltlabels:\n",
    "        #print (\"tt \"+alab)\n",
    "        c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='fr')))\n",
    "    for alab in italtlabels:\n",
    "        c1rdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='it')))\n",
    "   \n",
    "    \n",
    "    if(frlabel):\n",
    "        c1rdf.add((pltextile[label], SKOS.prefLabel, frlabel))\n",
    "    if(itlabel):\n",
    "        c1rdf.add((pltextile[label], SKOS.prefLabel, itlabel))\n",
    "    if(gllabel):\n",
    "        c1rdf.add((pltextile[label], SKOS.prefLabel, gllabel))\n",
    "    \n",
    "    if(ptlabel):\n",
    "        c1rdf.add((pltextile[label], SKOS.prefLabel, ptlabel))\n",
    "    if(rolabel):\n",
    "        c1rdf.add((pltextile[label], SKOS.prefLabel, rolabel))\n",
    "    if(enlabel):\n",
    "        c1rdf.add((pltextile[label], SKOS.prefLabel, enlabel))\n",
    "        \n",
    "    if(calabel):    \n",
    "        c1rdf.add((pltextile[label], SKOS.prefLabel, calabel))\n",
    "    if(eslabel):  \n",
    "        c1rdf.add((pltextile[label], SKOS.prefLabel, eslabel))\n",
    "    if(esarglabel):\n",
    "        c1rdf.add((pltextile[label], SKOS.prefLabel, esarglabel))\n",
    "    \n",
    "#     if(esargmexarglabel):\n",
    "#         c1rdf.add((pltextile[label], SKOS.prefLabel, esargmexarglabel))\n",
    "#         c1rdf.add((pltextile[label], SKOS.prefLabel, esargmexmexlabel))\n",
    "        \n",
    "    if(esmexlabel):\n",
    "        c1rdf.add((pltextile[label], SKOS.prefLabel, esmexlabel))\n",
    "    if(frcalabel):\n",
    "        c1rdf.add((pltextile[label], SKOS.prefLabel, frcalabel))\n",
    "    \n",
    "    if (itdef):\n",
    "        c1rdf.add((pltextile[label], SKOS.definition, itdef))\n",
    "\n",
    "print(len(c1rdf))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "answering-latino",
   "metadata": {},
   "outputs": [],
   "source": [
    "# for s, p, o in c1rdf.triples((None,  None, None)):\n",
    "#    print(\"{}  {}\".format(s, o.n3))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "quality-scratch",
   "metadata": {},
   "source": [
    "Create a *Turtle* file in the **/data** directory with the SKOS resources for **Data Stewardship terminology** "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "equal-voice",
   "metadata": {},
   "outputs": [],
   "source": [
    "c1rdf.serialize(destination='data/lexpanlatskos_11.ttl', format=\"n3\");#format=\"pretty-xml\")\n",
    "c1rdf.serialize(destination='data/lexpanlatskos_11.rdf', format=\"pretty-xml\");#format=\"pretty-xml\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "selected-enemy",
   "metadata": {},
   "source": [
    "### Lessico panlatino delle Maniche"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "current-material",
   "metadata": {},
   "outputs": [],
   "source": [
    "urlma=conf['Source']['LESSICOMANICHESOURCE']\n",
    "df_data_maniche=pd.read_csv(urlma)\n",
    "df_data_maniche.rename(columns = {'es [ARG]': 'es-arg', 'es [MEX]': 'es-mex', 'pt [BR]': 'pt-br'}, inplace = True)\n",
    "df_data_maniche.fillna('', inplace=True)\n",
    "#df_data_maniche.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "incorporated-creature",
   "metadata": {},
   "outputs": [],
   "source": [
    "cl_manicherdf = rdflib.Graph()\n",
    "cl_manicherdf.bind(\"pltextile\", pltextile)\n",
    "cl_manicherdf.bind(\"dc11\", dc11)\n",
    "cl_manicherdf.bind(\"dct\", dct)\n",
    "cl_manicherdf.bind(\"iso369-3\", iso369)\n",
    "cl_manicherdf.bind(\"skos\", SKOS)\n",
    "cl_manicherdf.bind(\"dc\", DC)\n",
    "cl_manicherdf.bind(\"rdf\", RDF)\n",
    "cl_manicherdf.bind(\"rdfs\", RDFS)\n",
    "cl_manicherdf.bind(\"owl\", OWL)\n",
    "cl_manicherdf.bind(\"xsd\", XSD)\n",
    "now = datetime.datetime.today()\n",
    "today_date=now.date()\n",
    "title=Literal(conf['Texts']['LESSICOMANICHETITLE'], lang=conf['Texts']['LANG'])\n",
    "description=Literal(conf['Texts']['LESSICOMANICHEDESCRIPTION'], lang=conf['Texts']['LANG'])\n",
    "description_it=Literal(conf['Texts']['LESSICOMANICHEDESCRIPTION_IT'], lang='it')\n",
    "identifier=Literal(conf['Texts']['LESSICOMANICHEID'], lang=conf['Texts']['LANG'])\n",
    "#identifier=URIRef(conf['Texts']['VOCABULARYID'])\n",
    "createddate= Literal(conf['Texts']['LESSICOCREATEDATE'],datatype=XSD.date)\n",
    "moddate= Literal(today_date,datatype=XSD.date)\n",
    "version= Literal(conf['Texts']['LESSICOVERSION'],datatype=XSD.string)\n",
    "\n",
    "cl_manicherdf.add((pltextile[''], RDF.type, SKOS.ConceptScheme))\n",
    "cl_manicherdf.add((pltextile[''], DC.title, title))\n",
    "cl_manicherdf.add((pltextile[''], DC.identifier, identifier))\n",
    "cl_manicherdf.add((pltextile[''], DC.description, description))\n",
    "cl_manicherdf.add((pltextile[''], DC.description, description_it))\n",
    "cl_manicherdf.add((pltextile[''], dct.created, createddate))\n",
    "cl_manicherdf.add((pltextile[''], dct.modified, moddate))\n",
    "cl_manicherdf.add((pltextile[''], OWL.versionInfo, version))\n",
    "cl_manicherdf.add((pltextile[''], dct.language, iso369.eng))\n",
    "cl_manicherdf.add((pltextile[''], dct.language, iso369.es))\n",
    "cl_manicherdf.add((pltextile[''], dct.language, iso369.fra))\n",
    "cl_manicherdf.add((pltextile[''], dct.language, iso369.ca))\n",
    "cl_manicherdf.add((pltextile[''], dct.language, iso369.ita))\n",
    "cl_manicherdf.add((pltextile[''], dct.language, iso369.pt))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "modular-realtor",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Mapping\n",
    "for index, row in df_data_maniche.iterrows():\n",
    "    \n",
    "    strlabel=row.it.split('\\n')[0].split('(')[0].strip()\n",
    "    label=strlabel.replace(\" \", \"_\").replace(\"’\",\"\").replace(\"'\",\"\").strip()\n",
    "    #label=URIRef(row.it.split('\\n')[0].split(' (')[0].strip())\n",
    "    cl_manicherdf.add((pltextile[''], SKOS.hasTopConcept, pltextile[label]))    \n",
    "    frlabel=Literal(row[\"fr\"].split('\\n')[0].strip(), lang='fr')\n",
    "    fraltlabels=row[\"fr\"].split('\\n')[1:]\n",
    "    itlabel=Literal(row['it'].split('\\n')[0].strip(), lang='it')\n",
    "    italtlabels=row[\"it\"].split('\\n')[1:]                    \n",
    "    calabel=Literal(row['ca'].split('\\n')[0].strip(), lang='ca')\n",
    "    caaltlabels=row[\"ca\"].split('\\n')[1:]\n",
    "    eslabel=Literal(row['es'].split('\\n')[0].strip(), lang='es')\n",
    "    esaltlabels=row[\"es\"].split('\\n')[1:]\n",
    "    #gllabel=Literal(row['gl'].split('\\n')[0].strip(), lang='gl')\n",
    "    #glaltlabels=row[\"gl\"].split('\\n')[1:]\n",
    "    ptlabel=Literal(row['pt'].split('\\n')[0].strip(), lang='pt')\n",
    "    ptaltlabels=row[\"pt\"].split('\\n')[1:]\n",
    "#     rolabel=Literal(row['ro'].split('\\n')[0].strip(), lang='ro')\n",
    "#     roaltlabels=row[\"ro\"].split('\\n')[1:]\n",
    "    enlabel=Literal(row['en'].split('\\n')[0].strip(), lang='en')\n",
    "    enaltlabels=row[\"en\"].split('\\n')[1:]\n",
    "    \n",
    "    esarglabel=Literal(row['es-arg'].split('\\n')[0].strip(), lang='es-ar')\n",
    "    esargaltlabels=row[\"es-arg\"].split('\\n')[1:]\n",
    "  \n",
    "\n",
    "    esmexlabel=Literal(row['es-mex'].split('\\n')[0].strip(), lang='es-mx')\n",
    "    esmexaltlabels=row[\"es-mex\"].split('\\n')[1:]\n",
    "    ptbrlabel=Literal(row['pt-br'].split('\\n')[0].strip(), lang='pt-br')\n",
    "    ptbraltlabels=row[\"pt-br\"].split('\\n')[1:]\n",
    "    \n",
    "    #definition\n",
    "    itdef=Literal(row[\"DEF\"].strip(), lang='it')\n",
    "    #DOI as rdfs:seeAlso\n",
    "    imagedoi=row[\"DOI\"].strip()\n",
    "    \n",
    "    cl_manicherdf.add((pltextile[label], RDF.type, SKOS.Concept))\n",
    "    cl_manicherdf.add((pltextile[label], SKOS.inScheme, pltextile['']))\n",
    "    cl_manicherdf.add((pltextile[label], SKOS.topConceptOf, pltextile['']))\n",
    "    if (imagedoi!=''):\n",
    "        cl_manicherdf.add((pltextile[label], RDFS.seeAlso, URIRef('https://doi.org/'+imagedoi)))\n",
    "    \n",
    "    for alab in esargaltlabels:\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-ar')))\n",
    "    \n",
    "     \n",
    "    for alab in esmexaltlabels:\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-mx')))\n",
    "        \n",
    "    for alab in ptbraltlabels:\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt-br')))\n",
    "    \n",
    "    for alab in esaltlabels:\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es')))\n",
    "    \n",
    "#     for alab in glaltlabels:\n",
    "#         cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='gl')))\n",
    "    \n",
    "    for alab in ptaltlabels:\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt')))\n",
    "    \n",
    "#     for alab in roaltlabels:\n",
    "#         cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ro')))\n",
    "    \n",
    "    for alab in enaltlabels:\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='en')))\n",
    "        \n",
    "    for alab in caaltlabels:\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ca')))\n",
    "        \n",
    "    for alab in fraltlabels:\n",
    "        #print (\"tt \"+alab)\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='fr')))\n",
    "    for alab in italtlabels:\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='it')))\n",
    "   \n",
    "    \n",
    "    if(frlabel):\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.prefLabel, frlabel))\n",
    "    if(itlabel):\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.prefLabel, itlabel))\n",
    "#     if(gllabel):\n",
    "#         cl_manicherdf.add((pltextile[label], SKOS.prefLabel, gllabel))\n",
    "    \n",
    "    if(ptlabel):\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.prefLabel, ptlabel))\n",
    "#     if(rolabel):\n",
    "#         cl_manicherdf.add((pltextile[label], SKOS.prefLabel, rolabel))\n",
    "    if(enlabel):\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.prefLabel, enlabel))\n",
    "        \n",
    "    if(calabel):    \n",
    "        cl_manicherdf.add((pltextile[label], SKOS.prefLabel, calabel))\n",
    "    if(eslabel):  \n",
    "        cl_manicherdf.add((pltextile[label], SKOS.prefLabel, eslabel))\n",
    "    if(esarglabel):\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.prefLabel, esarglabel))\n",
    "    \n",
    "\n",
    "    if(esmexlabel):\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.prefLabel, esmexlabel))\n",
    "    if(ptbrlabel):\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.prefLabel, ptbrlabel))\n",
    "    \n",
    "    if (itdef):\n",
    "        cl_manicherdf.add((pltextile[label], SKOS.definition, itdef))\n",
    "\n",
    "print(len(cl_manicherdf))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "matched-mustang",
   "metadata": {},
   "outputs": [],
   "source": [
    "cl_manicherdf.serialize(destination='data/lexpanlatmanicheskos_11.ttl', format=\"n3\");#format=\"pretty-xml\")\n",
    "cl_manicherdf.serialize(destination='data/lexpanlatmanicheskos_11.rdf', format=\"pretty-xml\");#format=\"pretty-xml\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "talented-making",
   "metadata": {},
   "source": [
    "### Lessico panlatino dei Colli"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "centered-advantage",
   "metadata": {},
   "outputs": [],
   "source": [
    "urlco=conf['Source']['LESSICOCOLLISOURCE']\n",
    "df_data_colli=pd.read_csv(urlco)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "desperate-uruguay",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_data_colli.rename(columns = {'es [ARG]': 'es-arg', 'es [MEX]': 'es-mex', 'pt [BR]': 'pt-br'}, inplace = True)\n",
    "df_data_colli.fillna('', inplace=True)\n",
    "#df_data_colli.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "magnetic-stake",
   "metadata": {},
   "outputs": [],
   "source": [
    "cl_collirdf = rdflib.Graph()\n",
    "cl_collirdf.bind(\"pltextile\", pltextile)\n",
    "cl_collirdf.bind(\"dc11\", dc11)\n",
    "cl_collirdf.bind(\"dct\", dct)\n",
    "cl_collirdf.bind(\"iso369-3\", iso369)\n",
    "cl_collirdf.bind(\"skos\", SKOS)\n",
    "cl_collirdf.bind(\"dc\", DC)\n",
    "cl_collirdf.bind(\"rdf\", RDF)\n",
    "cl_collirdf.bind(\"owl\", OWL)\n",
    "cl_collirdf.bind(\"xsd\", XSD)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "hidden-purple",
   "metadata": {},
   "source": [
    "SKOS concept scheme"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "christian-paste",
   "metadata": {},
   "outputs": [],
   "source": [
    "now = datetime.datetime.today()\n",
    "today_date=now.date()\n",
    "title=Literal(conf['Texts']['LESSICOCOLLITITLE'], lang=conf['Texts']['LANG'])\n",
    "description=Literal(conf['Texts']['LESSICOCOLLIDESCRIPTION'], lang=conf['Texts']['LANG'])\n",
    "description_it=Literal(conf['Texts']['LESSICOCOLLIDESCRIPTION_IT'], lang='it')\n",
    "identifier=Literal(conf['Texts']['LESSICOCOLLIID'], lang=conf['Texts']['LANG'])\n",
    "#identifier=URIRef(conf['Texts']['VOCABULARYID'])\n",
    "createddate= Literal(conf['Texts']['LESSICOCREATEDATE'],datatype=XSD.date)\n",
    "moddate= Literal(today_date,datatype=XSD.date)\n",
    "version= Literal(conf['Texts']['LESSICOVERSION'],datatype=XSD.string)\n",
    "\n",
    "cl_collirdf.add((pltextile[''], RDF.type, SKOS.ConceptScheme))\n",
    "cl_collirdf.add((pltextile[''], DC.title, title))\n",
    "cl_collirdf.add((pltextile[''], DC.identifier, identifier))\n",
    "cl_collirdf.add((pltextile[''], DC.description, description))\n",
    "cl_collirdf.add((pltextile[''], DC.description, description_it))\n",
    "cl_collirdf.add((pltextile[''], dct.created, createddate))\n",
    "cl_collirdf.add((pltextile[''], dct.modified, moddate))\n",
    "cl_collirdf.add((pltextile[''], OWL.versionInfo, version))\n",
    "cl_collirdf.add((pltextile[''], dct.language, iso369.eng))\n",
    "cl_collirdf.add((pltextile[''], dct.language, iso369.es))\n",
    "cl_collirdf.add((pltextile[''], dct.language, iso369.fra))\n",
    "cl_collirdf.add((pltextile[''], dct.language, iso369.ita))\n",
    "cl_collirdf.add((pltextile[''], dct.language, iso369.pt))\n",
    "cl_collirdf.add((pltextile[''], dct.language, iso369.ca))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "incorporate-difference",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Mapping\n",
    "for index, row in df_data_colli.iterrows():\n",
    "    \n",
    "    strlabel=row.it.split('\\n')[0].split(' (')[0].strip()\n",
    "    label=strlabel.replace(\" \", \"_\").replace(\"’\",\"\")\n",
    "    #label=URIRef(row.it.split('\\n')[0].split(' (')[0].strip())\n",
    "    cl_collirdf.add((pltextile[''], SKOS.hasTopConcept, pltextile[label]))    \n",
    "    frlabel=Literal(row[\"fr\"].split('\\n')[0].strip(), lang='fr')\n",
    "    fraltlabels=row[\"fr\"].split('\\n')[1:]\n",
    "    itlabel=Literal(row['it'].split('\\n')[0].strip(), lang='it')\n",
    "    italtlabels=row[\"it\"].split('\\n')[1:]                    \n",
    "    calabel=Literal(row['ca'].split('\\n')[0].strip(), lang='ca')\n",
    "    caaltlabels=row[\"ca\"].split('\\n')[1:]\n",
    "    eslabel=Literal(row['es'].split('\\n')[0].strip(), lang='es')\n",
    "    esaltlabels=row[\"es\"].split('\\n')[1:]\n",
    "    #gllabel=Literal(row['gl'].split('\\n')[0].strip(), lang='gl')\n",
    "    #glaltlabels=row[\"gl\"].split('\\n')[1:]\n",
    "    ptlabel=Literal(row['pt'].split('\\n')[0].strip(), lang='pt')\n",
    "    ptaltlabels=row[\"pt\"].split('\\n')[1:]\n",
    "#     rolabel=Literal(row['ro'].split('\\n')[0].strip(), lang='ro')\n",
    "#     roaltlabels=row[\"ro\"].split('\\n')[1:]\n",
    "    enlabel=Literal(row['en'].split('\\n')[0].strip(), lang='en')\n",
    "    enaltlabels=row[\"en\"].split('\\n')[1:]\n",
    "    \n",
    "    esarglabel=Literal(row['es-arg'].split('\\n')[0].strip(), lang='es-ar')\n",
    "    esargaltlabels=row[\"es-arg\"].split('\\n')[1:]\n",
    "  \n",
    "\n",
    "    esmexlabel=Literal(row['es-mex'].split('\\n')[0].strip(), lang='es-mx')\n",
    "    esmexaltlabels=row[\"es-mex\"].split('\\n')[1:]\n",
    "    ptbrlabel=Literal(row['pt-br'].split('\\n')[0].strip(), lang='pt-br')\n",
    "    ptbraltlabels=row[\"pt-br\"].split('\\n')[1:]\n",
    "    \n",
    "    #definition\n",
    "    itdef=Literal(row[\"DEF\"].strip(), lang='it')\n",
    "    #DOI as rdfs:seeAlso\n",
    "    imagedoi=row[\"DOI\"].strip()\n",
    "    \n",
    "    cl_collirdf.add((pltextile[label], RDF.type, SKOS.Concept))\n",
    "    cl_collirdf.add((pltextile[label], SKOS.inScheme, pltextile['']))\n",
    "    cl_collirdf.add((pltextile[label], SKOS.topConceptOf, pltextile['']))\n",
    "    if (imagedoi!=''):\n",
    "        cl_collirdf.add((pltextile[label], RDFS.seeAlso, URIRef('https://doi.org/'+imagedoi)))\n",
    "    \n",
    "    for alab in esargaltlabels:\n",
    "        cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-ar')))\n",
    "    \n",
    "     \n",
    "    for alab in esmexaltlabels:\n",
    "        cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es-mx')))\n",
    "        \n",
    "    for alab in ptbraltlabels:\n",
    "        cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt-br')))\n",
    "    \n",
    "    for alab in esaltlabels:\n",
    "        cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='es')))\n",
    "    \n",
    "#     for alab in glaltlabels:\n",
    "#         cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='gl')))\n",
    "    \n",
    "    for alab in ptaltlabels:\n",
    "        cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='pt')))\n",
    "    \n",
    "#     for alab in roaltlabels:\n",
    "#         cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ro')))\n",
    "    \n",
    "    for alab in enaltlabels:\n",
    "        cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='en')))\n",
    "        \n",
    "    for alab in caaltlabels:\n",
    "        cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='ca')))\n",
    "        \n",
    "    for alab in fraltlabels:\n",
    "        #print (\"tt \"+alab)\n",
    "        cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='fr')))\n",
    "    for alab in italtlabels:\n",
    "        cl_collirdf.add((pltextile[label], SKOS.altLabel, Literal(alab, lang='it')))\n",
    "   \n",
    "    \n",
    "    if(frlabel):\n",
    "        cl_collirdf.add((pltextile[label], SKOS.prefLabel, frlabel))\n",
    "    if(itlabel):\n",
    "        cl_collirdf.add((pltextile[label], SKOS.prefLabel, itlabel))\n",
    "#     if(gllabel):\n",
    "#         cl_collirdf.add((pltextile[label], SKOS.prefLabel, gllabel))\n",
    "    \n",
    "    if(ptlabel):\n",
    "        cl_collirdf.add((pltextile[label], SKOS.prefLabel, ptlabel))\n",
    "#     if(rolabel):\n",
    "#         cl_collirdf.add((pltextile[label], SKOS.prefLabel, rolabel))\n",
    "    if(enlabel):\n",
    "        cl_collirdf.add((pltextile[label], SKOS.prefLabel, enlabel))\n",
    "        \n",
    "    if(calabel):    \n",
    "        cl_collirdf.add((pltextile[label], SKOS.prefLabel, calabel))\n",
    "    if(eslabel):  \n",
    "        cl_collirdf.add((pltextile[label], SKOS.prefLabel, eslabel))\n",
    "    if(esarglabel):\n",
    "        cl_collirdf.add((pltextile[label], SKOS.prefLabel, esarglabel))\n",
    "    \n",
    "\n",
    "    if(esmexlabel):\n",
    "        cl_collirdf.add((pltextile[label], SKOS.prefLabel, esmexlabel))\n",
    "    if(ptbrlabel):\n",
    "        cl_collirdf.add((pltextile[label], SKOS.prefLabel, ptbrlabel))\n",
    "    \n",
    "    if (itdef):\n",
    "        cl_collirdf.add((pltextile[label], SKOS.definition, itdef))\n",
    "\n",
    "print(len(cl_collirdf))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "applicable-commissioner",
   "metadata": {},
   "outputs": [],
   "source": [
    "cl_collirdf.serialize(destination='data/lexpanlatcolliskos_11.ttl', format=\"n3\");#format=\"pretty-xml\")\n",
    "cl_collirdf.serialize(destination='data/lexpanlatcolliskos_11.rdf', format=\"pretty-xml\");#format=\"pretty-xml\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "limiting-duration",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}