diff --git a/Progetto_Lett.ipynb b/Progetto_Lett.ipynb
index 6440163..c529e56 100644
--- a/Progetto_Lett.ipynb
+++ b/Progetto_Lett.ipynb
@@ -9,7 +9,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -17,6 +17,7 @@
"import sys\n",
"import numpy as np\n",
"import pandas as pd\n",
+ "import rdflib\n",
"import matplotlib.pyplot as plt\n",
"# importing useful Python utility libraries we'll need\n",
"from collections import Counter, defaultdict\n",
@@ -25,7 +26,24 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \\\n",
+ "# PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \\\n",
+ "# VOID, XMLNS, XSD\n",
+ "from rdflib.namespace import DC, DCAT, DCTERMS, OWL, \\\n",
+ " RDF, RDFS, SKOS, \\\n",
+ " XMLNS, XSD\n",
+ "from rdflib import Namespace\n",
+ "from rdflib import URIRef, BNode, Literal\n",
+ "n = Namespace(\"http://hdn.dantenetwork.it/resource/work/commedia/cantica/\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -34,7 +52,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -43,7 +61,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -52,7 +70,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -61,7 +79,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -74,7 +92,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -87,7 +105,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -148,16 +166,18 @@
},
{
"cell_type": "code",
- "execution_count": 141,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class TEIFile(object):\n",
" def __init__(self, filename, idres=0):\n",
+ " self.g = rdflib.Graph()\n",
" self.filename = filename\n",
" self.soup = read_tei(filename)\n",
" self._text = None\n",
" self.idres=idres;\n",
+ " self.InFor = URIRef(\"http://example.org/word/InflectedForm\")\n",
" # self._lemmas = None\n",
" # self._lemma_lemmas = None\n",
" # self._categ_lemmas = None\n",
@@ -304,7 +324,7 @@
" IRIff_text= \"http://hdn.dantenetwork.it/resource/work/commedia/cantica/\"+str(self.idres)+\"/\"+\"/\".join(canto.lower().split())+\"/verso/\"+str(i)+\"#\"+str(j);\n",
" IRIff_text_type= IRIff_text +' rdf:type InflectedForm . '\n",
" IRIff_text_pos= IRIff_text +' isInPosition '+str(j);\n",
- " IRIff_text_exp= IRIff_text +' hasExpression '+(\" \".join(lm_text.split()))+\"^^xsd:string . \";\n",
+ " IRIff_text_exp= IRIff_text +' hasExpression \"'+(\" \".join(lm_text.split()))+'\"^^xsd:string .' ;\n",
" IRIff_text_oo= IRIff_text +' isOccurrenceOf ulem . ';\n",
" IRIff_text_co= IRIff_text +\" http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/\"+str(self.idres)+\"/\"+\"/\".join(canto.lower().split())+\"/verso/\"+str(i);\n",
" lstiri.append(IRIff_text);\n",
@@ -321,6 +341,70 @@
" \n",
" \n",
" return iriffs\n",
+ " \n",
+ " #IRI forma flessa RDF\n",
+ " @property\n",
+ " def IRIffRDF(self):\n",
+ " iriffs = []\n",
+ " i=0\n",
+ " for div in self.soup.body.find_all(\"div1\"):\n",
+ " for verso in div.find_all('l'):\n",
+ " i=i+1;\n",
+ " j=0;\n",
+ " for lm in verso.find_all(\"lm\"):\n",
+ " lstctg=[];\n",
+ " lstlms=[];\n",
+ " lstiri=[];\n",
+ " j=j+1;\n",
+ " lm_text=elem_to_text(lm).strip();\n",
+ " #ctg=lm.get('catg');\n",
+ " if (lm.get('catg')!=None):\n",
+ " ctg=lm.get('catg');\n",
+ " else:\n",
+ " ctg=\"non_spec\";\n",
+ " \n",
+ " lstctg.append(\" \".join(ctg.split())); \n",
+ " \n",
+ " if (lm.get('lemma')!=None):\n",
+ " lemma=lm.get('lemma');\n",
+ " else:\n",
+ " lemma=\"non_spec\";\n",
+ " lstlms.append(\" \".join(lemma.split())); \n",
+ " for parent in lm.parents:\n",
+ " if (parent.name=='div1'):\n",
+ " canto = parent.contents[0];\n",
+ " if (parent.name=='lm1' and iriffs[-1][0]==\" \".join(lm_text.split())):\n",
+ " j=j-1;\n",
+ " #lstctg=lstctg+iriffs[-1][1];\n",
+ " #lstlms=lstlms+iriffs[-1][2];\n",
+ " iriffs.pop();\n",
+ " #g.add((bob, RDF.type, FOAF.Person))\n",
+ " #bob = URIRef(\"http://example.org/people/Bob\")\n",
+ " IRIff_text= URIRef(\"http://hdn.dantenetwork.it/resource/work/commedia/cantica/\"+str(self.idres)+\"/\"+\"/\".join(canto.lower().split())+\"/verso/\"+str(i)+\"#\"+str(j));\n",
+ " self.g.remove((IRIff_text, None, None))\n",
+ " self.g.add((IRIff_text, RDF.type, self.InFor))\n",
+ " \n",
+ " IRIff_text= \"http://hdn.dantenetwork.it/resource/work/commedia/cantica/\"+str(self.idres)+\"/\"+\"/\".join(canto.lower().split())+\"/verso/\"+str(i)+\"#\"+str(j);\n",
+ " IRIff_text_type= IRIff_text +' rdf:type InflectedForm . '\n",
+ " IRIff_text_pos= IRIff_text +' isInPosition '+str(j);\n",
+ " IRIff_text_exp= IRIff_text +' hasExpression \"'+(\" \".join(lm_text.split()))+'\"^^xsd:string .' ;\n",
+ " IRIff_text_oo= IRIff_text +' isOccurrenceOf ulem . ';\n",
+ " IRIff_text_co= IRIff_text +\" http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/\"+str(self.idres)+\"/\"+\"/\".join(canto.lower().split())+\"/verso/\"+str(i);\n",
+ " lstiri.append(IRIff_text);\n",
+ " lstiri.append(IRIff_text_type);\n",
+ " lstiri.append(IRIff_text_co);\n",
+ " lstiri.append(IRIff_text_pos);\n",
+ " lstiri.append(IRIff_text_exp);\n",
+ " lstiri.append(IRIff_text_oo);\n",
+ " iriffs.append((\" \".join(lm_text.split()), canto.replace('\\n','').strip(), i, j, lstiri));\n",
+ " \n",
+ " \n",
+ " # ordr_lms.append((\" \".join(lm_text.split()), \" \".join(ctg.split()), \" \".join(lemma.split()), canto.replace('\\n','').strip(), i, j, \"hdn:Works/Commedia/Cantica/1/\"+str(i),\n",
+ " # \"hdn:Works/Commedia/Cantica/1/\"+str(i)+\"/#\"+str(j)));\n",
+ " \n",
+ " \n",
+ " return self.g\n",
+ " \n",
" #IRI del verso\n",
" @property\n",
" def IRIverso(self):\n",
@@ -362,6 +446,7 @@
" \n",
" \n",
" return iris\n",
+ " #IRI del verso\n",
" \n",
" \n",
" #test\n",
@@ -406,7 +491,7 @@
},
{
"cell_type": "code",
- "execution_count": 142,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -414,7 +499,7 @@
" tei = TEIFile(tei_file, idres)\n",
" print(f\"Handled {tei_file}\")\n",
" base_name = tei_file\n",
- " return tei.orderedlemma, tei.IRIverso, tei.IRIff, tei.categ_lemma, tei.lemma_lemma #, tei.abstract"
+ " return tei.orderedlemma, tei.IRIverso, tei.IRIff, tei.IRIffRDF, tei.categ_lemma, tei.lemma_lemma #, tei.abstract"
]
},
{
@@ -427,43 +512,9 @@
},
{
"cell_type": "code",
- "execution_count": 112,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "('Nel', 'rdms', 'il', 'Canto 1')\n",
- "\n",
- "('mezzo', 'eilaksl', 'in mezzo di', 'Canto 1')\n",
- "\n",
- "('del', 'rdms', 'il', 'Canto 1')\n",
- "\n",
- "('cammin', 'sm2ms', 'cammino', 'Canto 1')\n",
- "\n",
- "('di', 'epskg', 'di', 'Canto 1')\n",
- "\n",
- "('nostra', 'as1fs', 'nostro', 'Canto 1')\n",
- "\n",
- "('vita', 'sf1fs', 'vita', 'Canto 1')\n",
- "\n",
- "('mi', 'pf1sypr', 'mi', 'Canto 1')\n",
- "\n",
- "('ritrovai', 'vta+1irs1', 'ritrovare', 'Canto 1')\n",
- "\n",
- "('per', 'epskpl', 'per', 'Canto 1')\n",
- "\n",
- "('una', 'rifs', 'una', 'Canto 1')\n",
- "\n",
- "('selva', 'sf1fs', 'selva', 'Canto 1')\n",
- "\n",
- "('oscura', 'a1fs', 'oscuro', 'Canto 1')\n",
- "\n",
- "...\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"tei = TEIFile('/Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/inferno_forparsing.xml', 1)\n",
"bbs=tei.ff_ea\n",
@@ -474,6 +525,21 @@
" break"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "g1=tei.IRIffRDF\n",
+ " \n",
+ "print(len(g1)) # prints 2\n",
+ "\n",
+ "import pprint\n",
+ "for stmt in g1:\n",
+ " pprint.pprint(stmt)\n"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -484,43 +550,18 @@
},
{
"cell_type": "code",
- "execution_count": 143,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Handled /Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/inferno_forparsing.xml\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"mytesto=tei_to_csv_entry('/Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/inferno_forparsing.xml', 1)"
]
},
{
"cell_type": "code",
- "execution_count": 144,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "FormaFlessa 33400\n",
- "Categoria 33400\n",
- "Lemma 33400\n",
- "Canto 33400\n",
- "Verso 33400\n",
- "PosizioneFFNelVerso 33400\n",
- "dtype: int64"
- ]
- },
- "execution_count": 144,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"data = [mytesto[0]]\n",
"#data[0]\n",
@@ -531,152 +572,9 @@
},
{
"cell_type": "code",
- "execution_count": 145,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " FormaFlessa | \n",
- " Categoria | \n",
- " Lemma | \n",
- " Canto | \n",
- " Verso | \n",
- " PosizioneFFNelVerso | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 33390 | \n",
- " un | \n",
- " [rims] | \n",
- " [uno] | \n",
- " Canto 34 | \n",
- " 4720 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 33391 | \n",
- " pertugio | \n",
- " [sm2ms] | \n",
- " [pertugio] | \n",
- " Canto 34 | \n",
- " 4720 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " 33392 | \n",
- " tondo | \n",
- " [a1ms] | \n",
- " [tondo] | \n",
- " Canto 34 | \n",
- " 4720 | \n",
- " 8 | \n",
- "
\n",
- " \n",
- " 33393 | \n",
- " E | \n",
- " [cscc] | \n",
- " [e] | \n",
- " Canto 34 | \n",
- " 4721 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 33394 | \n",
- " quindi | \n",
- " [b] | \n",
- " [quindi] | \n",
- " Canto 34 | \n",
- " 4721 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 33395 | \n",
- " uscimmo | \n",
- " [vi3irp1] | \n",
- " [uscire] | \n",
- " Canto 34 | \n",
- " 4721 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 33396 | \n",
- " a | \n",
- " [epsf] | \n",
- " [a] | \n",
- " Canto 34 | \n",
- " 4721 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 33397 | \n",
- " riveder | \n",
- " [vta2fp] | \n",
- " [rivedere] | \n",
- " Canto 34 | \n",
- " 4721 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " 33398 | \n",
- " le | \n",
- " [rdfp] | \n",
- " [la] | \n",
- " Canto 34 | \n",
- " 4721 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 33399 | \n",
- " stelle | \n",
- " [sf1fp] | \n",
- " [stella] | \n",
- " Canto 34 | \n",
- " 4721 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " FormaFlessa Categoria Lemma Canto Verso PosizioneFFNelVerso\n",
- "33390 un [rims] [uno] Canto 34 4720 6\n",
- "33391 pertugio [sm2ms] [pertugio] Canto 34 4720 7\n",
- "33392 tondo [a1ms] [tondo] Canto 34 4720 8\n",
- "33393 E [cscc] [e] Canto 34 4721 1\n",
- "33394 quindi [b] [quindi] Canto 34 4721 2\n",
- "33395 uscimmo [vi3irp1] [uscire] Canto 34 4721 3\n",
- "33396 a [epsf] [a] Canto 34 4721 4\n",
- "33397 riveder [vta2fp] [rivedere] Canto 34 4721 5\n",
- "33398 le [rdfp] [la] Canto 34 4721 6\n",
- "33399 stelle [sf1fp] [stella] Canto 34 4721 7"
- ]
- },
- "execution_count": 145,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"testo_tabella.tail(10)"
]
@@ -703,23 +601,9 @@
},
{
"cell_type": "code",
- "execution_count": 146,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "NumeroVerso 4721\n",
- "Verso 4721\n",
- "IRIVerso 4721\n",
- "dtype: int64"
- ]
- },
- "execution_count": 146,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"data_IRI_versi_inf = [mytesto[1]]\n",
"#data_IRI_versi\n",
@@ -729,82 +613,9 @@
},
{
"cell_type": "code",
- "execution_count": 147,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- " | NumeroVerso | Verso | IRIVerso |
\n",
- " \n",
- " 0 | \n",
- " 1 | \n",
- " Nel mezzo del cammin di nostra vita | \n",
- " http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1\n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- "http://erlangen-crm.org/current/P190_has_symbolic_content \"Nel mezzo del cammin di nostra vita\"^^xsd:string ;\n",
- " http://erlangen-crm.org/current/P3_has_note \"1\"^^xsd:int ;\n",
- " http://hdn.dantenetwork.it/resource/has_number \"1\"^^xsd:int . | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2 | \n",
- " mi ritrovai per una selva oscura | \n",
- " http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/2\n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- "http://erlangen-crm.org/current/P190_has_symbolic_content \"mi ritrovai per una selva oscura\"^^xsd:string ;\n",
- " http://erlangen-crm.org/current/P3_has_note \"2\"^^xsd:int ;\n",
- " http://hdn.dantenetwork.it/resource/has_number \"2\"^^xsd:int . | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 3 | \n",
- " ché la diritta via era smarrita. | \n",
- " http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/3\n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- "http://erlangen-crm.org/current/P190_has_symbolic_content \"ché la diritta via era smarrita.\"^^xsd:string ;\n",
- " http://erlangen-crm.org/current/P3_has_note \"3\"^^xsd:int ;\n",
- " http://hdn.dantenetwork.it/resource/has_number \"3\"^^xsd:int . | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 4 | \n",
- " Ahi quanto a dir qual era è cosa dura | \n",
- " http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/4\n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- "http://erlangen-crm.org/current/P190_has_symbolic_content \"Ahi quanto a dir qual era è cosa dura\"^^xsd:string ;\n",
- " http://erlangen-crm.org/current/P3_has_note \"4\"^^xsd:int ;\n",
- " http://hdn.dantenetwork.it/resource/has_number \"4\"^^xsd:int . | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 5 | \n",
- " esta selva selvaggia e aspra e forte | \n",
- " http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/5\n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- "http://erlangen-crm.org/current/P190_has_symbolic_content \"esta selva selvaggia e aspra e forte\"^^xsd:string ;\n",
- " http://erlangen-crm.org/current/P3_has_note \"5\"^^xsd:int ;\n",
- " http://hdn.dantenetwork.it/resource/has_number \"5\"^^xsd:int . | \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- ""
- ]
- },
- "execution_count": 147,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df_IRI_versi_inf.head().style.set_properties(subset=['IRIVerso'], **{'width': '400px'})"
]
@@ -820,25 +631,9 @@
},
{
"cell_type": "code",
- "execution_count": 148,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "FormaFlessa 33400\n",
- "Canto 33400\n",
- "NumeroVerso 33400\n",
- "Offset 33400\n",
- "IRIFF 33400\n",
- "dtype: int64"
- ]
- },
- "execution_count": 148,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"data_IRI_ff_inf = [mytesto[2]]\n",
"#data_IRI_versi\n",
@@ -848,69 +643,160 @@
},
{
"cell_type": "code",
- "execution_count": 149,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- " | FormaFlessa | Canto | NumeroVerso | Offset | IRIFF |
\n",
- " \n",
- " 0 | \n",
- " Nel | \n",
- " Canto 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " ['http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#1 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#1 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#1 isInPosition 1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#1 hasExpression Nel^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#1 isOccurrenceOf ulem . '] | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " mezzo | \n",
- " Canto 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " ['http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#2', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#2 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#2 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#2 isInPosition 2', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#2 hasExpression mezzo^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#2 isOccurrenceOf ulem . '] | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " del | \n",
- " Canto 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " ['http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#3', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#3 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#3 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#3 isInPosition 3', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#3 hasExpression del^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#3 isOccurrenceOf ulem . '] | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " cammin | \n",
- " Canto 1 | \n",
- " 1 | \n",
- " 4 | \n",
- " ['http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#4', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#4 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#4 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#4 isInPosition 4', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#4 hasExpression cammin^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#4 isOccurrenceOf ulem . '] | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " di | \n",
- " Canto 1 | \n",
- " 1 | \n",
- " 5 | \n",
- " ['http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#5', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#5 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#5 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#5 isInPosition 5', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#5 hasExpression di^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1#5 isOccurrenceOf ulem . '] | \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- ""
- ]
- },
- "execution_count": 149,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "df_IRI_ff_inf.head().style.set_properties(subset=['IRIFF'], **{'width': '400px'})"
+ "df_IRI_ff_inf.tail().style.set_properties(subset=['IRIFF'], **{'width': '400px'})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Forse non tutti sanno che... \n",
+ "\n",
+ "\n",
+ "*Nota: i risultati delle prossime elaborazioni considerano diverse tra loro due parole parole anche se differiscono per la presenza di maiuscole/minuscole*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_inf_per_test=df_IRI_ff_inf[['FormaFlessa', 'Canto', 'NumeroVerso', 'Offset']]\n",
+ "df_num_ff=df_inf_per_test[df_inf_per_test['FormaFlessa'].str.len()>3]['FormaFlessa'].value_counts()\n",
+ "print(\"Le 10 parole (più lunghe di 3 caratteri) usate con maggiore frequenza nella prima Cantica sono:\", end=\"\\n\"*2)\n",
+ "print('{:<10}{}'.format('Parola', 'Frequenza'))\n",
+ "df_num_ff.head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_inf_versi=df_inf_per_test.groupby('NumeroVerso')['FormaFlessa'].apply(list).reset_index(name='parole')\n",
+ "#test_inf_versi.head()\n",
+ "parole_counter = Counter(itertools.chain(*test_inf_versi['parole']))\n",
+ "print('\\nCi sono {} parole diverse nella prima Cantica.\\n'.format(len(parole_counter)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"\\nLe 10 parole più frequenti nella prima Cantica, indipendentemente dalla lunghezza in caratteri, sono: \\n\")\n",
+ "print('{:<30}Frequenza\\n'.format(\"Parola\"))\n",
+ "for k, v in parole_counter.most_common(10):\n",
+ " print(f'{k:<30}{v}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Nel risultato della cella qui sotto si vede che alcune parole hanno il segno di punteggiatura, nella creazione degli IRI dovremmo toglierlo?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "least_common_parole = parole_counter.most_common()[-30:]\n",
+ "print(\"\\nAlcune parole che compaiono una sola volta nella prima Cantica: \\n\")\n",
+ "print('{:<30}Frequenza\\n'.format(\"Parola\"))\n",
+ "for lk, lv in least_common_parole:\n",
+ " print(f'{lk:<30}{lv}')\n",
+ " \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Frequenza delle parole palindrome\n",
+ "def is_palindrome(s):\n",
+ " return s==s[::-1]\n",
+ "\n",
+ "for k, v in parole_counter.most_common():\n",
+ " if(len(k)>1 and is_palindrome(k)):\n",
+ " print(f'{k:<30}{v}')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#test_versi_1=test_inf_versi['parole']\n",
+ "#for tve in test_versi_1:\n",
+ "# if(is_palindrome((\"\".join(tve)))):\n",
+ "# print (\"\".join(tve))\n",
+ " #print ((\" \".join(tve)[::-1]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cooccurrences = []\n",
+ "\n",
+ "for parole in test_inf_versi['parole']:\n",
+ " parole_pairs = itertools.combinations(parole, 2)\n",
+ " for pair in parole_pairs:\n",
+ " if(len(pair[0])>3 and len(pair[1])>3):\n",
+ " cooccurrences.append(tuple((pair)))\n",
+ " # cooccurrences.append(tuple(sorted(pair)))\n",
+ "\n",
+ "# Conto la frequenza di ciascuna cooccorrenza\n",
+ "parole_co_counter = Counter(cooccurrences)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"La frequenza delle co-occorrenze di due parole (non necessariamente consecutive e formate da almeno 4 caratteri) \\nin uno stesso verso della prima Cantica\", '\\n')\n",
+ "print('{:<50}{}'.format('Co-ooccorrenza', 'Frequenza\\n'))\n",
+ "for k, v in parole_co_counter.most_common(20):\n",
+ " parole = '['+k[0] + ' , ' + k[1]+']'\n",
+ " print(f'{parole:<50}{v}')\n",
+ "print('\\n')\n",
+ "#print('\\nMedia:')\n",
+ "#print(np.median(list(parole_co_counter.values())))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Cominciamo a lavorare con RDF"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "\n",
+ "#g.parse(\"/Users/cesare/Projects/hdn/triple/DanteTriple/xml/DaMa/Commedia.rdf\", format=\"nt\")\n"
]
},
{
@@ -923,7 +809,7 @@
},
{
"cell_type": "code",
- "execution_count": 62,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -939,43 +825,18 @@
},
{
"cell_type": "code",
- "execution_count": 157,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Handled /Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/purgatorio.xml\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"parsed_purgatorio=tei_to_csv_entry('/Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/purgatorio.xml', 2)"
]
},
{
"cell_type": "code",
- "execution_count": 158,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "FormaFlessa 33245\n",
- "Categoria 33245\n",
- "Lemma 33245\n",
- "Canto 33245\n",
- "Verso 33245\n",
- "PosizioneFFNelVerso 33245\n",
- "dtype: int64"
- ]
- },
- "execution_count": 158,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"data_purgatorio = [parsed_purgatorio[0]]\n",
"#dfObj_purgatorio = pd.DataFrame(data_purgatorio[0]) \n",
@@ -985,109 +846,9 @@
},
{
"cell_type": "code",
- "execution_count": 159,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " FormaFlessa | \n",
- " Categoria | \n",
- " Lemma | \n",
- " Canto | \n",
- " Verso | \n",
- " PosizioneFFNelVerso | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 33240 | \n",
- " disposto | \n",
- " [vtp2pra1ms] | \n",
- " [disporre] | \n",
- " Canto 33 | \n",
- " 4755 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 33241 | \n",
- " a | \n",
- " [epsb] | \n",
- " [a] | \n",
- " Canto 33 | \n",
- " 4755 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 33242 | \n",
- " salire | \n",
- " [vi3fp] | \n",
- " [salire] | \n",
- " Canto 33 | \n",
- " 4755 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " 33243 | \n",
- " alle | \n",
- " [rdfp, epakml] | \n",
- " [la, a] | \n",
- " Canto 33 | \n",
- " 4755 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 33244 | \n",
- " stelle | \n",
- " [sf1fp] | \n",
- " [stella] | \n",
- " Canto 33 | \n",
- " 4755 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " FormaFlessa Categoria Lemma Canto Verso \\\n",
- "33240 disposto [vtp2pra1ms] [disporre] Canto 33 4755 \n",
- "33241 a [epsb] [a] Canto 33 4755 \n",
- "33242 salire [vi3fp] [salire] Canto 33 4755 \n",
- "33243 alle [rdfp, epakml] [la, a] Canto 33 4755 \n",
- "33244 stelle [sf1fp] [stella] Canto 33 4755 \n",
- "\n",
- " PosizioneFFNelVerso \n",
- "33240 3 \n",
- "33241 4 \n",
- "33242 5 \n",
- "33243 6 \n",
- "33244 7 "
- ]
- },
- "execution_count": 159,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"testo_purgatorio_tabella.tail()"
]
@@ -1103,23 +864,9 @@
},
{
"cell_type": "code",
- "execution_count": 160,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "NumeroVerso 4755\n",
- "Verso 4755\n",
- "IRIVerso 4755\n",
- "dtype: int64"
- ]
- },
- "execution_count": 160,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"data_IRI_versi_pur = [parsed_purgatorio[1]]\n",
"#data_IRI_versi\n",
@@ -1129,107 +876,18 @@
},
{
"cell_type": "code",
- "execution_count": 161,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- " | NumeroVerso | Verso | IRIVerso |
\n",
- " \n",
- " 0 | \n",
- " 1 | \n",
- " Per correr miglior acque alza le vele | \n",
- " http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/1/verso/1\n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- "http://erlangen-crm.org/current/P190_has_symbolic_content \"Per correr miglior acque alza le vele\"^^xsd:string ;\n",
- " http://erlangen-crm.org/current/P3_has_note \"1\"^^xsd:int ;\n",
- " http://hdn.dantenetwork.it/resource/has_number \"1\"^^xsd:int . | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2 | \n",
- " omai la navicella del mio ingegno, | \n",
- " http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/1/verso/2\n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- "http://erlangen-crm.org/current/P190_has_symbolic_content \"omai la navicella del mio ingegno,\"^^xsd:string ;\n",
- " http://erlangen-crm.org/current/P3_has_note \"2\"^^xsd:int ;\n",
- " http://hdn.dantenetwork.it/resource/has_number \"2\"^^xsd:int . | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 3 | \n",
- " che lascia dietro a sé mar sì crudele; | \n",
- " http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/1/verso/3\n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- "http://erlangen-crm.org/current/P190_has_symbolic_content \"che lascia dietro a sé mar sì crudele;\"^^xsd:string ;\n",
- " http://erlangen-crm.org/current/P3_has_note \"3\"^^xsd:int ;\n",
- " http://hdn.dantenetwork.it/resource/has_number \"3\"^^xsd:int . | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 4 | \n",
- " e canterò di quel secondo regno | \n",
- " http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/1/verso/4\n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- "http://erlangen-crm.org/current/P190_has_symbolic_content \"e canterò di quel secondo regno\"^^xsd:string ;\n",
- " http://erlangen-crm.org/current/P3_has_note \"4\"^^xsd:int ;\n",
- " http://hdn.dantenetwork.it/resource/has_number \"4\"^^xsd:int . | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 5 | \n",
- " dove l'umano spirito si purga | \n",
- " http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/1/verso/5\n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- "http://erlangen-crm.org/current/P190_has_symbolic_content \"dove l'umano spirito si purga\"^^xsd:string ;\n",
- " http://erlangen-crm.org/current/P3_has_note \"5\"^^xsd:int ;\n",
- " http://hdn.dantenetwork.it/resource/has_number \"5\"^^xsd:int . | \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- ""
- ]
- },
- "execution_count": 161,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df_IRI_versi_pur.head().style.set_properties(subset=['IRIVerso'], **{'width': '400px'})"
]
},
{
"cell_type": "code",
- "execution_count": 162,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "FormaFlessa 33245\n",
- "Canto 33245\n",
- "NumeroVerso 33245\n",
- "Offset 33245\n",
- "IRIFF 33245\n",
- "dtype: int64"
- ]
- },
- "execution_count": 162,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"data_IRI_ff_pur = [parsed_purgatorio[2]]\n",
"#data_IRI_versi\n",
@@ -1239,67 +897,9 @@
},
{
"cell_type": "code",
- "execution_count": 163,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- " | FormaFlessa | Canto | NumeroVerso | Offset | IRIFF |
\n",
- " \n",
- " 33240 | \n",
- " disposto | \n",
- " Canto 33 | \n",
- " 4755 | \n",
- " 3 | \n",
- " ['http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#3', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#3 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#3 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#3 isInPosition 3', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#3 hasExpression disposto^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#3 isOccurrenceOf ulem . '] | \n",
- "
\n",
- " \n",
- " 33241 | \n",
- " a | \n",
- " Canto 33 | \n",
- " 4755 | \n",
- " 4 | \n",
- " ['http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#4', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#4 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#4 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#4 isInPosition 4', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#4 hasExpression a^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#4 isOccurrenceOf ulem . '] | \n",
- "
\n",
- " \n",
- " 33242 | \n",
- " salire | \n",
- " Canto 33 | \n",
- " 4755 | \n",
- " 5 | \n",
- " ['http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#5', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#5 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#5 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#5 isInPosition 5', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#5 hasExpression salire^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#5 isOccurrenceOf ulem . '] | \n",
- "
\n",
- " \n",
- " 33243 | \n",
- " alle | \n",
- " Canto 33 | \n",
- " 4755 | \n",
- " 6 | \n",
- " ['http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#6', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#6 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#6 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#6 isInPosition 6', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#6 hasExpression alle^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#6 isOccurrenceOf ulem . '] | \n",
- "
\n",
- " \n",
- " 33244 | \n",
- " stelle | \n",
- " Canto 33 | \n",
- " 4755 | \n",
- " 7 | \n",
- " ['http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#7', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#7 rdf:type InflectedForm . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#7 http://erlangen-crm.org/current/P148_is_component_of http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#7 isInPosition 7', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#7 hasExpression stelle^^xsd:string . ', 'http://hdn.dantenetwork.it/resource/work/commedia/cantica/2/canto/33/verso/4755#7 isOccurrenceOf ulem . '] | \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- ""
- ]
- },
- "execution_count": 163,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df_IRI_ff_pur.tail().style.set_properties(subset=['IRIFF'], **{'width': '400px'})"
]
@@ -1314,43 +914,18 @@
},
{
"cell_type": "code",
- "execution_count": 68,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Handled /Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/paradiso.xml\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"parsed_paradiso=tei_to_csv_entry('/Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/paradiso.xml', 3)"
]
},
{
"cell_type": "code",
- "execution_count": 69,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "FormaFlessa 32747\n",
- "Categoria 32747\n",
- "Lemma 32747\n",
- "Canto 32747\n",
- "Verso 32747\n",
- "PosizioneFFNelVerso 32747\n",
- "dtype: int64"
- ]
- },
- "execution_count": 69,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"data_paradiso = [parsed_paradiso[0]]\n",
"testo_paradiso_tabella=pd.DataFrame(data_paradiso[0], columns = ['FormaFlessa' , 'Categoria', 'Lemma', 'Canto', 'Verso', 'PosizioneFFNelVerso']) \n",
@@ -1359,285 +934,9 @@
},
{
"cell_type": "code",
- "execution_count": 70,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " FormaFlessa | \n",
- " Categoria | \n",
- " Lemma | \n",
- " Canto | \n",
- " Verso | \n",
- " PosizioneFFNelVerso | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " La | \n",
- " [rdfs] | \n",
- " [la] | \n",
- " Canto 1 | \n",
- " 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " gloria | \n",
- " [sf1fs] | \n",
- " [gloria] | \n",
- " Canto 1 | \n",
- " 1 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " di | \n",
- " [epskg] | \n",
- " [di] | \n",
- " Canto 1 | \n",
- " 1 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " colui | \n",
- " [pdms] | \n",
- " [colui] | \n",
- " Canto 1 | \n",
- " 1 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " che | \n",
- " [pr] | \n",
- " [che] | \n",
- " Canto 1 | \n",
- " 1 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " tutto | \n",
- " [pi1ms] | \n",
- " [tutto] | \n",
- " Canto 1 | \n",
- " 1 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " move | \n",
- " [vta2ips3] | \n",
- " [muovere] | \n",
- " Canto 1 | \n",
- " 1 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " per | \n",
- " [epskpl] | \n",
- " [per] | \n",
- " Canto 1 | \n",
- " 2 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " l' | \n",
- " [rdms] | \n",
- " [lo] | \n",
- " Canto 1 | \n",
- " 2 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " universo | \n",
- " [sm2ms] | \n",
- " [universo] | \n",
- " Canto 1 | \n",
- " 2 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " penetra | \n",
- " [vi1ips3] | \n",
- " [penetrare] | \n",
- " Canto 1 | \n",
- " 2 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " e | \n",
- " [cscc] | \n",
- " [e] | \n",
- " Canto 1 | \n",
- " 2 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " risplende | \n",
- " [vi2ips3] | \n",
- " [risplendere] | \n",
- " Canto 1 | \n",
- " 2 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " in | \n",
- " [epsksl] | \n",
- " [in] | \n",
- " Canto 1 | \n",
- " 3 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " una | \n",
- " [rifs] | \n",
- " [una] | \n",
- " Canto 1 | \n",
- " 3 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " parte | \n",
- " [sf3fs] | \n",
- " [parte] | \n",
- " Canto 1 | \n",
- " 3 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " più | \n",
- " [b] | \n",
- " [più] | \n",
- " Canto 1 | \n",
- " 3 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " e | \n",
- " [cscc] | \n",
- " [e] | \n",
- " Canto 1 | \n",
- " 3 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " meno | \n",
- " [b] | \n",
- " [meno] | \n",
- " Canto 1 | \n",
- " 3 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " altrove | \n",
- " [b] | \n",
- " [altrove] | \n",
- " Canto 1 | \n",
- " 3 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " Nel | \n",
- " [epaksl, rdms] | \n",
- " [in, il] | \n",
- " Canto 1 | \n",
- " 4 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " FormaFlessa Categoria Lemma Canto Verso \\\n",
- "0 La [rdfs] [la] Canto 1 1 \n",
- "1 gloria [sf1fs] [gloria] Canto 1 1 \n",
- "2 di [epskg] [di] Canto 1 1 \n",
- "3 colui [pdms] [colui] Canto 1 1 \n",
- "4 che [pr] [che] Canto 1 1 \n",
- "5 tutto [pi1ms] [tutto] Canto 1 1 \n",
- "6 move [vta2ips3] [muovere] Canto 1 1 \n",
- "7 per [epskpl] [per] Canto 1 2 \n",
- "8 l' [rdms] [lo] Canto 1 2 \n",
- "9 universo [sm2ms] [universo] Canto 1 2 \n",
- "10 penetra [vi1ips3] [penetrare] Canto 1 2 \n",
- "11 e [cscc] [e] Canto 1 2 \n",
- "12 risplende [vi2ips3] [risplendere] Canto 1 2 \n",
- "13 in [epsksl] [in] Canto 1 3 \n",
- "14 una [rifs] [una] Canto 1 3 \n",
- "15 parte [sf3fs] [parte] Canto 1 3 \n",
- "16 più [b] [più] Canto 1 3 \n",
- "17 e [cscc] [e] Canto 1 3 \n",
- "18 meno [b] [meno] Canto 1 3 \n",
- "19 altrove [b] [altrove] Canto 1 3 \n",
- "20 Nel [epaksl, rdms] [in, il] Canto 1 4 \n",
- "\n",
- " PosizioneFFNelVerso \n",
- "0 1 \n",
- "1 2 \n",
- "2 3 \n",
- "3 4 \n",
- "4 5 \n",
- "5 6 \n",
- "6 7 \n",
- "7 1 \n",
- "8 2 \n",
- "9 3 \n",
- "10 4 \n",
- "11 5 \n",
- "12 6 \n",
- "13 1 \n",
- "14 2 \n",
- "15 3 \n",
- "16 4 \n",
- "17 5 \n",
- "18 6 \n",
- "19 7 \n",
- "20 1 "
- ]
- },
- "execution_count": 70,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"testo_paradiso_tabella.head(21)"
]
@@ -1652,23 +951,9 @@
},
{
"cell_type": "code",
- "execution_count": 71,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "NumeroVerso 4757\n",
- "Verso 4757\n",
- "IRIVerso 4757\n",
- "dtype: int64"
- ]
- },
- "execution_count": 71,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"data_IRI_versi_par = [parsed_paradiso[1]]\n",
"#data_IRI_versi\n",
@@ -1678,82 +963,9 @@
},
{
"cell_type": "code",
- "execution_count": 72,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- " | NumeroVerso | Verso | IRIVerso |
\n",
- " \n",
- " 0 | \n",
- " 1 | \n",
- " La gloria di colui che tutto move | \n",
- " \n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- " \"La gloria di colui che tutto move\"^^xsd:string ;\n",
- " \"1\"^^xsd:int ;\n",
- " \"1\"^^xsd:int . | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2 | \n",
- " per l'universo penetra, e risplende | \n",
- " \n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- " \"per l'universo penetra, e risplende\"^^xsd:string ;\n",
- " \"2\"^^xsd:int ;\n",
- " \"2\"^^xsd:int . | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 3 | \n",
- " in una parte più e meno altrove. | \n",
- " \n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- " \"in una parte più e meno altrove.\"^^xsd:string ;\n",
- " \"3\"^^xsd:int ;\n",
- " \"3\"^^xsd:int . | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 4 | \n",
- " Nel ciel che più de la sua luce prende | \n",
- " \n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- " \"Nel ciel che più de la sua luce prende\"^^xsd:string ;\n",
- " \"4\"^^xsd:int ;\n",
- " \"4\"^^xsd:int . | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 5 | \n",
- " fu' io, e vidi cose che ridire | \n",
- " \n",
- " a efrbroo:F2_Expression ,\n",
- " rdfs:Resource ; \n",
- " \"fu' io, e vidi cose che ridire\"^^xsd:string ;\n",
- " \"5\"^^xsd:int ;\n",
- " \"5\"^^xsd:int . | \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- ""
- ]
- },
- "execution_count": 72,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df_IRI_versi_par.head().style.set_properties(subset=['IRIVerso'], **{'width': '400px'})"
]