From 51983970771586baa98740a4feaefdf6f67d8748 Mon Sep 17 00:00:00 2001 From: Cesare Concordia Date: Sat, 6 Feb 2021 17:34:38 +0100 Subject: [PATCH] Upload files to '' --- Progetto_Lett.ipynb | 1394 ++++++++++--------------------------------- 1 file changed, 303 insertions(+), 1091 deletions(-) diff --git a/Progetto_Lett.ipynb b/Progetto_Lett.ipynb index 6440163..c529e56 100644 --- a/Progetto_Lett.ipynb +++ b/Progetto_Lett.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -17,6 +17,7 @@ "import sys\n", "import numpy as np\n", "import pandas as pd\n", + "import rdflib\n", "import matplotlib.pyplot as plt\n", "# importing useful Python utility libraries we'll need\n", "from collections import Counter, defaultdict\n", @@ -25,7 +26,24 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \\\n", + "# PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \\\n", + "# VOID, XMLNS, XSD\n", + "from rdflib.namespace import DC, DCAT, DCTERMS, OWL, \\\n", + " RDF, RDFS, SKOS, \\\n", + " XMLNS, XSD\n", + "from rdflib import Namespace\n", + "from rdflib import URIRef, BNode, Literal\n", + "n = Namespace(\"http://hdn.dantenetwork.it/resource/work/commedia/cantica/\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -34,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -43,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -52,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -74,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -87,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -148,16 +166,18 @@ }, { "cell_type": "code", - "execution_count": 141, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class TEIFile(object):\n", " def __init__(self, filename, idres=0):\n", + " self.g = rdflib.Graph()\n", " self.filename = filename\n", " self.soup = read_tei(filename)\n", " self._text = None\n", " self.idres=idres;\n", + " self.InFor = URIRef(\"http://example.org/word/InflectedForm\")\n", " # self._lemmas = None\n", " # self._lemma_lemmas = None\n", " # self._categ_lemmas = None\n", @@ -304,7 +324,7 @@ " IRIff_text= \"http://hdn.dantenetwork.it/resource/work/commedia/cantica/\"+str(self.idres)+\"/\"+\"/\".join(canto.lower().split())+\"/verso/\"+str(i)+\"#\"+str(j);\n", " IRIff_text_type= IRIff_text +' rdf:type InflectedForm . '\n", " IRIff_text_pos= IRIff_text +' isInPosition '+str(j);\n", - " IRIff_text_exp= IRIff_text +' hasExpression '+(\" \".join(lm_text.split()))+\"^^xsd:string . \";\n", + " IRIff_text_exp= IRIff_text +' hasExpression \"'+(\" \".join(lm_text.split()))+'\"^^xsd:string .' ;\n", " IRIff_text_oo= IRIff_text +' isOccurrenceOf ulem . ';\n", " IRIff_text_co= IRIff_text +\" http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/\"+str(self.idres)+\"/\"+\"/\".join(canto.lower().split())+\"/verso/\"+str(i);\n", " lstiri.append(IRIff_text);\n", @@ -321,6 +341,70 @@ " \n", " \n", " return iriffs\n", + " \n", + " #IRI forma flessa RDF\n", + " @property\n", + " def IRIffRDF(self):\n", + " iriffs = []\n", + " i=0\n", + " for div in self.soup.body.find_all(\"div1\"):\n", + " for verso in div.find_all('l'):\n", + " i=i+1;\n", + " j=0;\n", + " for lm in verso.find_all(\"lm\"):\n", + " lstctg=[];\n", + " lstlms=[];\n", + " lstiri=[];\n", + " j=j+1;\n", + " lm_text=elem_to_text(lm).strip();\n", + " #ctg=lm.get('catg');\n", + " if (lm.get('catg')!=None):\n", + " ctg=lm.get('catg');\n", + " else:\n", + " ctg=\"non_spec\";\n", + " \n", + " lstctg.append(\" \".join(ctg.split())); \n", + " \n", + " if (lm.get('lemma')!=None):\n", + " lemma=lm.get('lemma');\n", + " else:\n", + " lemma=\"non_spec\";\n", + " lstlms.append(\" \".join(lemma.split())); \n", + " for parent in lm.parents:\n", + " if (parent.name=='div1'):\n", + " canto = parent.contents[0];\n", + " if (parent.name=='lm1' and iriffs[-1][0]==\" \".join(lm_text.split())):\n", + " j=j-1;\n", + " #lstctg=lstctg+iriffs[-1][1];\n", + " #lstlms=lstlms+iriffs[-1][2];\n", + " iriffs.pop();\n", + " #g.add((bob, RDF.type, FOAF.Person))\n", + " #bob = URIRef(\"http://example.org/people/Bob\")\n", + " IRIff_text= URIRef(\"http://hdn.dantenetwork.it/resource/work/commedia/cantica/\"+str(self.idres)+\"/\"+\"/\".join(canto.lower().split())+\"/verso/\"+str(i)+\"#\"+str(j));\n", + " self.g.remove((IRIff_text, None, None))\n", + " self.g.add((IRIff_text, RDF.type, self.InFor))\n", + " \n", + " IRIff_text= \"http://hdn.dantenetwork.it/resource/work/commedia/cantica/\"+str(self.idres)+\"/\"+\"/\".join(canto.lower().split())+\"/verso/\"+str(i)+\"#\"+str(j);\n", + " IRIff_text_type= IRIff_text +' rdf:type InflectedForm . '\n", + " IRIff_text_pos= IRIff_text +' isInPosition '+str(j);\n", + " IRIff_text_exp= IRIff_text +' hasExpression \"'+(\" \".join(lm_text.split()))+'\"^^xsd:string .' ;\n", + " IRIff_text_oo= IRIff_text +' isOccurrenceOf ulem . ';\n", + " IRIff_text_co= IRIff_text +\" http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/\"+str(self.idres)+\"/\"+\"/\".join(canto.lower().split())+\"/verso/\"+str(i);\n", + " lstiri.append(IRIff_text);\n", + " lstiri.append(IRIff_text_type);\n", + " lstiri.append(IRIff_text_co);\n", + " lstiri.append(IRIff_text_pos);\n", + " lstiri.append(IRIff_text_exp);\n", + " lstiri.append(IRIff_text_oo);\n", + " iriffs.append((\" \".join(lm_text.split()), canto.replace('\\n','').strip(), i, j, lstiri));\n", + " \n", + " \n", + " # ordr_lms.append((\" \".join(lm_text.split()), \" \".join(ctg.split()), \" \".join(lemma.split()), canto.replace('\\n','').strip(), i, j, \"hdn:Works/Commedia/Cantica/1/\"+str(i),\n", + " # \"hdn:Works/Commedia/Cantica/1/\"+str(i)+\"/#\"+str(j)));\n", + " \n", + " \n", + " return self.g\n", + " \n", " #IRI del verso\n", " @property\n", " def IRIverso(self):\n", @@ -362,6 +446,7 @@ " \n", " \n", " return iris\n", + " #IRI del verso\n", " \n", " \n", " #test\n", @@ -406,7 +491,7 @@ }, { "cell_type": "code", - "execution_count": 142, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -414,7 +499,7 @@ " tei = TEIFile(tei_file, idres)\n", " print(f\"Handled {tei_file}\")\n", " base_name = tei_file\n", - " return tei.orderedlemma, tei.IRIverso, tei.IRIff, tei.categ_lemma, tei.lemma_lemma #, tei.abstract" + " return tei.orderedlemma, tei.IRIverso, tei.IRIff, tei.IRIffRDF, tei.categ_lemma, tei.lemma_lemma #, tei.abstract" ] }, { @@ -427,43 +512,9 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "('Nel', 'rdms', 'il', 'Canto 1')\n", - "\n", - "('mezzo', 'eilaksl', 'in mezzo di', 'Canto 1')\n", - "\n", - "('del', 'rdms', 'il', 'Canto 1')\n", - "\n", - "('cammin', 'sm2ms', 'cammino', 'Canto 1')\n", - "\n", - "('di', 'epskg', 'di', 'Canto 1')\n", - "\n", - "('nostra', 'as1fs', 'nostro', 'Canto 1')\n", - "\n", - "('vita', 'sf1fs', 'vita', 'Canto 1')\n", - "\n", - "('mi', 'pf1sypr', 'mi', 'Canto 1')\n", - "\n", - "('ritrovai', 'vta+1irs1', 'ritrovare', 'Canto 1')\n", - "\n", - "('per', 'epskpl', 'per', 'Canto 1')\n", - "\n", - "('una', 'rifs', 'una', 'Canto 1')\n", - "\n", - "('selva', 'sf1fs', 'selva', 'Canto 1')\n", - "\n", - "('oscura', 'a1fs', 'oscuro', 'Canto 1')\n", - "\n", - "...\n" - ] - } - ], + "outputs": [], "source": [ "tei = TEIFile('/Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/inferno_forparsing.xml', 1)\n", "bbs=tei.ff_ea\n", @@ -474,6 +525,21 @@ " break" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g1=tei.IRIffRDF\n", + " \n", + "print(len(g1)) # prints 2\n", + "\n", + "import pprint\n", + "for stmt in g1:\n", + " pprint.pprint(stmt)\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -484,43 +550,18 @@ }, { "cell_type": "code", - "execution_count": 143, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Handled /Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/inferno_forparsing.xml\n" - ] - } - ], + "outputs": [], "source": [ "mytesto=tei_to_csv_entry('/Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/inferno_forparsing.xml', 1)" ] }, { "cell_type": "code", - "execution_count": 144, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "FormaFlessa 33400\n", - "Categoria 33400\n", - "Lemma 33400\n", - "Canto 33400\n", - "Verso 33400\n", - "PosizioneFFNelVerso 33400\n", - "dtype: int64" - ] - }, - "execution_count": 144, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "data = [mytesto[0]]\n", "#data[0]\n", @@ -531,152 +572,9 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FormaFlessaCategoriaLemmaCantoVersoPosizioneFFNelVerso
33390un[rims][uno]Canto 3447206
33391pertugio[sm2ms][pertugio]Canto 3447207
33392tondo[a1ms][tondo]Canto 3447208
33393E[cscc][e]Canto 3447211
33394quindi[b][quindi]Canto 3447212
33395uscimmo[vi3irp1][uscire]Canto 3447213
33396a[epsf][a]Canto 3447214
33397riveder[vta2fp][rivedere]Canto 3447215
33398le[rdfp][la]Canto 3447216
33399stelle[sf1fp][stella]Canto 3447217
\n", - "
" - ], - "text/plain": [ - " FormaFlessa Categoria Lemma Canto Verso PosizioneFFNelVerso\n", - "33390 un [rims] [uno] Canto 34 4720 6\n", - "33391 pertugio [sm2ms] [pertugio] Canto 34 4720 7\n", - "33392 tondo [a1ms] [tondo] Canto 34 4720 8\n", - "33393 E [cscc] [e] Canto 34 4721 1\n", - "33394 quindi [b] [quindi] Canto 34 4721 2\n", - "33395 uscimmo [vi3irp1] [uscire] Canto 34 4721 3\n", - "33396 a [epsf] [a] Canto 34 4721 4\n", - "33397 riveder [vta2fp] [rivedere] Canto 34 4721 5\n", - "33398 le [rdfp] [la] Canto 34 4721 6\n", - "33399 stelle [sf1fp] [stella] Canto 34 4721 7" - ] - }, - "execution_count": 145, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "testo_tabella.tail(10)" ] @@ -703,23 +601,9 @@ }, { "cell_type": "code", - "execution_count": 146, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "NumeroVerso 4721\n", - "Verso 4721\n", - "IRIVerso 4721\n", - "dtype: int64" - ] - }, - "execution_count": 146, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "data_IRI_versi_inf = [mytesto[1]]\n", "#data_IRI_versi\n", @@ -729,82 +613,9 @@ }, { "cell_type": "code", - "execution_count": 147, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NumeroVerso Verso IRIVerso
01Nel mezzo del cammin di nostra vitahttp://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - "http://erlangen-crm.org/current/P190_has_symbolic_content \"Nel mezzo del cammin di nostra vita\"^^xsd:string ;\n", - " http://erlangen-crm.org/current/P3_has_note \"1\"^^xsd:int ;\n", - " http://hdn.dantenetwork.it/resource/has_number \"1\"^^xsd:int .
12mi ritrovai per una selva oscurahttp://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/2\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - "http://erlangen-crm.org/current/P190_has_symbolic_content \"mi ritrovai per una selva oscura\"^^xsd:string ;\n", - " http://erlangen-crm.org/current/P3_has_note \"2\"^^xsd:int ;\n", - " http://hdn.dantenetwork.it/resource/has_number \"2\"^^xsd:int .
23ché la diritta via era smarrita.http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/3\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - "http://erlangen-crm.org/current/P190_has_symbolic_content \"ché la diritta via era smarrita.\"^^xsd:string ;\n", - " http://erlangen-crm.org/current/P3_has_note \"3\"^^xsd:int ;\n", - " http://hdn.dantenetwork.it/resource/has_number \"3\"^^xsd:int .
34Ahi quanto a dir qual era è cosa durahttp://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/4\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - "http://erlangen-crm.org/current/P190_has_symbolic_content \"Ahi quanto a dir qual era è cosa dura\"^^xsd:string ;\n", - " http://erlangen-crm.org/current/P3_has_note \"4\"^^xsd:int ;\n", - " http://hdn.dantenetwork.it/resource/has_number \"4\"^^xsd:int .
45esta selva selvaggia e aspra e fortehttp://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/5\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - "http://erlangen-crm.org/current/P190_has_symbolic_content \"esta selva selvaggia e aspra e forte\"^^xsd:string ;\n", - " http://erlangen-crm.org/current/P3_has_note \"5\"^^xsd:int ;\n", - " http://hdn.dantenetwork.it/resource/has_number \"5\"^^xsd:int .
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 147, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_IRI_versi_inf.head().style.set_properties(subset=['IRIVerso'], **{'width': '400px'})" ] @@ -820,25 +631,9 @@ }, { "cell_type": "code", - "execution_count": 148, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "FormaFlessa 33400\n", - "Canto 33400\n", - "NumeroVerso 33400\n", - "Offset 33400\n", - "IRIFF 33400\n", - "dtype: int64" - ] - }, - "execution_count": 148, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "data_IRI_ff_inf = [mytesto[2]]\n", "#data_IRI_versi\n", @@ -848,69 +643,160 @@ }, { "cell_type": "code", - "execution_count": 149, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FormaFlessa Canto NumeroVerso Offset IRIFF
0NelCanto 111['http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#1 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#1 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#1 isInPosition 1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#1 hasExpression Nel^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#1 isOccurrenceOf ulem . ']
1mezzoCanto 112['http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#2', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#2 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#2 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#2 isInPosition 2', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#2 hasExpression mezzo^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#2 isOccurrenceOf ulem . ']
2delCanto 113['http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#3', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#3 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#3 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#3 isInPosition 3', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#3 hasExpression del^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#3 isOccurrenceOf ulem . ']
3camminCanto 114['http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#4', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#4 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#4 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#4 isInPosition 4', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#4 hasExpression cammin^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#4 isOccurrenceOf ulem . ']
4diCanto 115['http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#5', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#5 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#5 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#5 isInPosition 5', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#5 hasExpression di^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#5 isOccurrenceOf ulem . ']
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 149, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "df_IRI_ff_inf.head().style.set_properties(subset=['IRIFF'], **{'width': '400px'})" + "df_IRI_ff_inf.tail().style.set_properties(subset=['IRIFF'], **{'width': '400px'})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Forse non tutti sanno che... \n", + "\n", + "\n", + "*Nota: i risultati delle prossime elaborazioni considerano diverse tra loro due parole parole anche se differiscono per la presenza di maiuscole/minuscole*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_inf_per_test=df_IRI_ff_inf[['FormaFlessa', 'Canto', 'NumeroVerso', 'Offset']]\n", + "df_num_ff=df_inf_per_test[df_inf_per_test['FormaFlessa'].str.len()>3]['FormaFlessa'].value_counts()\n", + "print(\"Le 10 parole (più lunghe di 3 caratteri) usate con maggiore frequenza nella prima Cantica sono:\", end=\"\\n\"*2)\n", + "print('{:<10}{}'.format('Parola', 'Frequenza'))\n", + "df_num_ff.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_inf_versi=df_inf_per_test.groupby('NumeroVerso')['FormaFlessa'].apply(list).reset_index(name='parole')\n", + "#test_inf_versi.head()\n", + "parole_counter = Counter(itertools.chain(*test_inf_versi['parole']))\n", + "print('\\nCi sono {} parole diverse nella prima Cantica.\\n'.format(len(parole_counter)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"\\nLe 10 parole più frequenti nella prima Cantica, indipendentemente dalla lunghezza in caratteri, sono: \\n\")\n", + "print('{:<30}Frequenza\\n'.format(\"Parola\"))\n", + "for k, v in parole_counter.most_common(10):\n", + " print(f'{k:<30}{v}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nel risultato della cella qui sotto si vede che alcune parole hanno il segno di punteggiatura, nella creazione degli IRI dovremmo toglierlo?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "least_common_parole = parole_counter.most_common()[-30:]\n", + "print(\"\\nAlcune parole che compaiono una sola volta nella prima Cantica: \\n\")\n", + "print('{:<30}Frequenza\\n'.format(\"Parola\"))\n", + "for lk, lv in least_common_parole:\n", + " print(f'{lk:<30}{lv}')\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Frequenza delle parole palindrome\n", + "def is_palindrome(s):\n", + " return s==s[::-1]\n", + "\n", + "for k, v in parole_counter.most_common():\n", + " if(len(k)>1 and is_palindrome(k)):\n", + " print(f'{k:<30}{v}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#test_versi_1=test_inf_versi['parole']\n", + "#for tve in test_versi_1:\n", + "# if(is_palindrome((\"\".join(tve)))):\n", + "# print (\"\".join(tve))\n", + " #print ((\" \".join(tve)[::-1]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cooccurrences = []\n", + "\n", + "for parole in test_inf_versi['parole']:\n", + " parole_pairs = itertools.combinations(parole, 2)\n", + " for pair in parole_pairs:\n", + " if(len(pair[0])>3 and len(pair[1])>3):\n", + " cooccurrences.append(tuple((pair)))\n", + " # cooccurrences.append(tuple(sorted(pair)))\n", + "\n", + "# Conto la frequenza di ciascuna cooccorrenza\n", + "parole_co_counter = Counter(cooccurrences)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"La frequenza delle co-occorrenze di due parole (non necessariamente consecutive e formate da almeno 4 caratteri) \\nin uno stesso verso della prima Cantica\", '\\n')\n", + "print('{:<50}{}'.format('Co-ooccorrenza', 'Frequenza\\n'))\n", + "for k, v in parole_co_counter.most_common(20):\n", + " parole = '['+k[0] + ' , ' + k[1]+']'\n", + " print(f'{parole:<50}{v}')\n", + "print('\\n')\n", + "#print('\\nMedia:')\n", + "#print(np.median(list(parole_co_counter.values())))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Cominciamo a lavorare con RDF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "#g.parse(\"/Users/cesare/Projects/hdn/triple/DanteTriple/xml/DaMa/Commedia.rdf\", format=\"nt\")\n" ] }, { @@ -923,7 +809,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -939,43 +825,18 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Handled /Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/purgatorio.xml\n" - ] - } - ], + "outputs": [], "source": [ "parsed_purgatorio=tei_to_csv_entry('/Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/purgatorio.xml', 2)" ] }, { "cell_type": "code", - "execution_count": 158, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "FormaFlessa 33245\n", - "Categoria 33245\n", - "Lemma 33245\n", - "Canto 33245\n", - "Verso 33245\n", - "PosizioneFFNelVerso 33245\n", - "dtype: int64" - ] - }, - "execution_count": 158, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "data_purgatorio = [parsed_purgatorio[0]]\n", "#dfObj_purgatorio = pd.DataFrame(data_purgatorio[0]) \n", @@ -985,109 +846,9 @@ }, { "cell_type": "code", - "execution_count": 159, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FormaFlessaCategoriaLemmaCantoVersoPosizioneFFNelVerso
33240disposto[vtp2pra1ms][disporre]Canto 3347553
33241a[epsb][a]Canto 3347554
33242salire[vi3fp][salire]Canto 3347555
33243alle[rdfp, epakml][la, a]Canto 3347556
33244stelle[sf1fp][stella]Canto 3347557
\n", - "
" - ], - "text/plain": [ - " FormaFlessa Categoria Lemma Canto Verso \\\n", - "33240 disposto [vtp2pra1ms] [disporre] Canto 33 4755 \n", - "33241 a [epsb] [a] Canto 33 4755 \n", - "33242 salire [vi3fp] [salire] Canto 33 4755 \n", - "33243 alle [rdfp, epakml] [la, a] Canto 33 4755 \n", - "33244 stelle [sf1fp] [stella] Canto 33 4755 \n", - "\n", - " PosizioneFFNelVerso \n", - "33240 3 \n", - "33241 4 \n", - "33242 5 \n", - "33243 6 \n", - "33244 7 " - ] - }, - "execution_count": 159, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "testo_purgatorio_tabella.tail()" ] @@ -1103,23 +864,9 @@ }, { "cell_type": "code", - "execution_count": 160, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "NumeroVerso 4755\n", - "Verso 4755\n", - "IRIVerso 4755\n", - "dtype: int64" - ] - }, - "execution_count": 160, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "data_IRI_versi_pur = [parsed_purgatorio[1]]\n", "#data_IRI_versi\n", @@ -1129,107 +876,18 @@ }, { "cell_type": "code", - "execution_count": 161, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NumeroVerso Verso IRIVerso
01Per correr miglior acque alza le velehttp://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/1/verso/1\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - "http://erlangen-crm.org/current/P190_has_symbolic_content \"Per correr miglior acque alza le vele\"^^xsd:string ;\n", - " http://erlangen-crm.org/current/P3_has_note \"1\"^^xsd:int ;\n", - " http://hdn.dantenetwork.it/resource/has_number \"1\"^^xsd:int .
12omai la navicella del mio ingegno,http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/1/verso/2\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - "http://erlangen-crm.org/current/P190_has_symbolic_content \"omai la navicella del mio ingegno,\"^^xsd:string ;\n", - " http://erlangen-crm.org/current/P3_has_note \"2\"^^xsd:int ;\n", - " http://hdn.dantenetwork.it/resource/has_number \"2\"^^xsd:int .
23che lascia dietro a sé mar sì crudele;http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/1/verso/3\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - "http://erlangen-crm.org/current/P190_has_symbolic_content \"che lascia dietro a sé mar sì crudele;\"^^xsd:string ;\n", - " http://erlangen-crm.org/current/P3_has_note \"3\"^^xsd:int ;\n", - " http://hdn.dantenetwork.it/resource/has_number \"3\"^^xsd:int .
34e canterò di quel secondo regnohttp://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/1/verso/4\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - "http://erlangen-crm.org/current/P190_has_symbolic_content \"e canterò di quel secondo regno\"^^xsd:string ;\n", - " http://erlangen-crm.org/current/P3_has_note \"4\"^^xsd:int ;\n", - " http://hdn.dantenetwork.it/resource/has_number \"4\"^^xsd:int .
45dove l'umano spirito si purgahttp://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/1/verso/5\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - "http://erlangen-crm.org/current/P190_has_symbolic_content \"dove l'umano spirito si purga\"^^xsd:string ;\n", - " http://erlangen-crm.org/current/P3_has_note \"5\"^^xsd:int ;\n", - " http://hdn.dantenetwork.it/resource/has_number \"5\"^^xsd:int .
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 161, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_IRI_versi_pur.head().style.set_properties(subset=['IRIVerso'], **{'width': '400px'})" ] }, { "cell_type": "code", - "execution_count": 162, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "FormaFlessa 33245\n", - "Canto 33245\n", - "NumeroVerso 33245\n", - "Offset 33245\n", - "IRIFF 33245\n", - "dtype: int64" - ] - }, - "execution_count": 162, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "data_IRI_ff_pur = [parsed_purgatorio[2]]\n", "#data_IRI_versi\n", @@ -1239,67 +897,9 @@ }, { "cell_type": "code", - "execution_count": 163, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FormaFlessa Canto NumeroVerso Offset IRIFF
33240dispostoCanto 3347553['http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#3', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#3 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#3 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#3 isInPosition 3', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#3 hasExpression disposto^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#3 isOccurrenceOf ulem . ']
33241aCanto 3347554['http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#4', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#4 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#4 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#4 isInPosition 4', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#4 hasExpression a^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#4 isOccurrenceOf ulem . ']
33242salireCanto 3347555['http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#5', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#5 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#5 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#5 isInPosition 5', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#5 hasExpression salire^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#5 isOccurrenceOf ulem . ']
33243alleCanto 3347556['http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#6', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#6 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#6 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#6 isInPosition 6', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#6 hasExpression alle^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#6 isOccurrenceOf ulem . ']
33244stelleCanto 3347557['http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#7', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#7 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#7 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#7 isInPosition 7', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#7 hasExpression stelle^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#7 isOccurrenceOf ulem . ']
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 163, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_IRI_ff_pur.tail().style.set_properties(subset=['IRIFF'], **{'width': '400px'})" ] @@ -1314,43 +914,18 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Handled /Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/paradiso.xml\n" - ] - } - ], + "outputs": [], "source": [ "parsed_paradiso=tei_to_csv_entry('/Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/paradiso.xml', 3)" ] }, { "cell_type": "code", - "execution_count": 69, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "FormaFlessa 32747\n", - "Categoria 32747\n", - "Lemma 32747\n", - "Canto 32747\n", - "Verso 32747\n", - "PosizioneFFNelVerso 32747\n", - "dtype: int64" - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "data_paradiso = [parsed_paradiso[0]]\n", "testo_paradiso_tabella=pd.DataFrame(data_paradiso[0], columns = ['FormaFlessa' , 'Categoria', 'Lemma', 'Canto', 'Verso', 'PosizioneFFNelVerso']) \n", @@ -1359,285 +934,9 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FormaFlessaCategoriaLemmaCantoVersoPosizioneFFNelVerso
0La[rdfs][la]Canto 111
1gloria[sf1fs][gloria]Canto 112
2di[epskg][di]Canto 113
3colui[pdms][colui]Canto 114
4che[pr][che]Canto 115
5tutto[pi1ms][tutto]Canto 116
6move[vta2ips3][muovere]Canto 117
7per[epskpl][per]Canto 121
8l'[rdms][lo]Canto 122
9universo[sm2ms][universo]Canto 123
10penetra[vi1ips3][penetrare]Canto 124
11e[cscc][e]Canto 125
12risplende[vi2ips3][risplendere]Canto 126
13in[epsksl][in]Canto 131
14una[rifs][una]Canto 132
15parte[sf3fs][parte]Canto 133
16più[b][più]Canto 134
17e[cscc][e]Canto 135
18meno[b][meno]Canto 136
19altrove[b][altrove]Canto 137
20Nel[epaksl, rdms][in, il]Canto 141
\n", - "
" - ], - "text/plain": [ - " FormaFlessa Categoria Lemma Canto Verso \\\n", - "0 La [rdfs] [la] Canto 1 1 \n", - "1 gloria [sf1fs] [gloria] Canto 1 1 \n", - "2 di [epskg] [di] Canto 1 1 \n", - "3 colui [pdms] [colui] Canto 1 1 \n", - "4 che [pr] [che] Canto 1 1 \n", - "5 tutto [pi1ms] [tutto] Canto 1 1 \n", - "6 move [vta2ips3] [muovere] Canto 1 1 \n", - "7 per [epskpl] [per] Canto 1 2 \n", - "8 l' [rdms] [lo] Canto 1 2 \n", - "9 universo [sm2ms] [universo] Canto 1 2 \n", - "10 penetra [vi1ips3] [penetrare] Canto 1 2 \n", - "11 e [cscc] [e] Canto 1 2 \n", - "12 risplende [vi2ips3] [risplendere] Canto 1 2 \n", - "13 in [epsksl] [in] Canto 1 3 \n", - "14 una [rifs] [una] Canto 1 3 \n", - "15 parte [sf3fs] [parte] Canto 1 3 \n", - "16 più [b] [più] Canto 1 3 \n", - "17 e [cscc] [e] Canto 1 3 \n", - "18 meno [b] [meno] Canto 1 3 \n", - "19 altrove [b] [altrove] Canto 1 3 \n", - "20 Nel [epaksl, rdms] [in, il] Canto 1 4 \n", - "\n", - " PosizioneFFNelVerso \n", - "0 1 \n", - "1 2 \n", - "2 3 \n", - "3 4 \n", - "4 5 \n", - "5 6 \n", - "6 7 \n", - "7 1 \n", - "8 2 \n", - "9 3 \n", - "10 4 \n", - "11 5 \n", - "12 6 \n", - "13 1 \n", - "14 2 \n", - "15 3 \n", - "16 4 \n", - "17 5 \n", - "18 6 \n", - "19 7 \n", - "20 1 " - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "testo_paradiso_tabella.head(21)" ] @@ -1652,23 +951,9 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "NumeroVerso 4757\n", - "Verso 4757\n", - "IRIVerso 4757\n", - "dtype: int64" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "data_IRI_versi_par = [parsed_paradiso[1]]\n", "#data_IRI_versi\n", @@ -1678,82 +963,9 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NumeroVerso Verso IRIVerso
01La gloria di colui che tutto move\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - " \"La gloria di colui che tutto move\"^^xsd:string ;\n", - " \"1\"^^xsd:int ;\n", - " \"1\"^^xsd:int .
12per l'universo penetra, e risplende\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - " \"per l'universo penetra, e risplende\"^^xsd:string ;\n", - " \"2\"^^xsd:int ;\n", - " \"2\"^^xsd:int .
23in una parte più e meno altrove.\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - " \"in una parte più e meno altrove.\"^^xsd:string ;\n", - " \"3\"^^xsd:int ;\n", - " \"3\"^^xsd:int .
34Nel ciel che più de la sua luce prende\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - " \"Nel ciel che più de la sua luce prende\"^^xsd:string ;\n", - " \"4\"^^xsd:int ;\n", - " \"4\"^^xsd:int .
45fu' io, e vidi cose che ridire\n", - " a efrbroo:F2_Expression ,\n", - " rdfs:Resource ; \n", - " \"fu' io, e vidi cose che ridire\"^^xsd:string ;\n", - " \"5\"^^xsd:int ;\n", - " \"5\"^^xsd:int .
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 72, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_IRI_versi_par.head().style.set_properties(subset=['IRIVerso'], **{'width': '400px'})" ]