Upload files to ''

This commit is contained in:
Cesare Concordia 2021-02-03 18:34:33 +01:00
parent 8101fc1287
commit 48a8d295b2
1 changed files with 462 additions and 78 deletions

View File

@ -1,8 +1,15 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Test per Parsing e generazione IRI"
]
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -18,7 +25,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@ -27,7 +34,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@ -36,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@ -45,7 +52,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@ -54,7 +61,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@ -67,7 +74,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@ -80,7 +87,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@ -93,9 +100,49 @@
" surname: str"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Parser\n",
"\n",
"Provo a creare un parser.\n",
"\n",
"Un estratto dal file inferno.xml:\n",
"\n",
"~~~~\n",
"<div1> <head>Canto 1</head>\n",
"<lg type=\"canto\">\n",
" <l>\n",
" <LM lemma=\"il\" catg=\"rdms\">Nel</LM>\n",
" <LM lemma=\"in mezzo di\" catg=\"eilaksl\">mezzo</LM>\n",
" <LM lemma=\"il\" catg=\"rdms\">del</LM>\n",
" <LM lemma=\"cammino\" catg=\"sm2ms\">cammin</LM>\n",
" <LM lemma=\"di\" catg=\"epskg\">di</LM>\n",
" <LM lemma=\"nostro\" catg=\"as1fs\">nostra</LM>\n",
" <LM lemma=\"vita\" catg=\"sf1fs\">vita</LM>\n",
" </l>\n",
" <l>\n",
" <LM lemma=\"mi\" catg=\"pf1sypr\">mi</LM>\n",
" <LM lemma=\"ritrovare\" catg=\"vta+1irs1\">ritrovai</LM>\n",
" <LM lemma=\"per\" catg=\"epskpl\">per</LM>\n",
" <LM lemma=\"una\" catg=\"rifs\">una</LM>\n",
" <LM lemma=\"selva\" catg=\"sf1fs\">selva</LM>\n",
" <LM lemma=\"oscuro\" catg=\"a1fs\">oscura</LM>\n",
" </l>\n",
" <l>\n",
" ...\n",
"~~~~\n",
"\n",
" \n",
"Il tag \\<div1\\> individua la porzione di file di un *Canto*, il tag \\<l\\> individua un verso, il tag \\<LM\\> individua un termine/parola/lemma(?) ciascuno dei quali ha 1 o 2 attributi.\n",
"\n",
"per questa implementazione uso la libreria Python [Beatiful Soup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)."
]
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 160,
"metadata": {},
"outputs": [],
"source": [
@ -176,23 +223,44 @@
" i=i+1;\n",
" j=0;\n",
" for lm in verso.find_all(\"lm\"):\n",
" lstctg=[];\n",
" lstlms=[];\n",
" j=j+1;\n",
" lm_text=elem_to_text(lm).strip();\n",
" ctg=lm.get('catg');\n",
" #ctg=lm.get('catg');\n",
" if (lm.get('catg')!=None):\n",
" ctg=lm.get('catg');\n",
" else:\n",
" ctg=\"non_spec\";\n",
" \n",
" lstctg.append(\" \".join(ctg.split())); \n",
" \n",
" if (lm.get('lemma')!=None):\n",
" lemma=lm.get('lemma');\n",
" else:\n",
" lemma=\"non_spec\";\n",
" lstlms.append(\" \".join(lemma.split())); \n",
" for parent in lm.parents:\n",
" if (parent.name=='div1'):\n",
" canto = parent.contents[0]\n",
" break;\n",
" ordr_lms.append((\" \".join(lm_text.split()), \" \".join(ctg.split()), \" \".join(lemma.split()), canto.replace('\\n','').strip(), i, j)); \n",
" \n",
" canto = parent.contents[0];\n",
" if (parent.name=='lm1' and ordr_lms[-1][0]==\" \".join(lm_text.split())):\n",
" j=j-1;\n",
" lstctg=lstctg+ordr_lms[-1][1];\n",
" lstlms=lstlms+ordr_lms[-1][2];\n",
" ordr_lms.pop();\n",
" \n",
" ordr_lms.append((\" \".join(lm_text.split()), lstctg, lstlms, canto.replace('\\n','').strip(), i, j, \"hdn:Works/Commedia/Cantica/1/\"+str(i),\n",
" \"hdn:Works/Commedia/Cantica/1/\"+str(i)+\"/#\"+str(j)));\n",
" \n",
" \n",
" # ordr_lms.append((\" \".join(lm_text.split()), \" \".join(ctg.split()), \" \".join(lemma.split()), canto.replace('\\n','').strip(), i, j, \"hdn:Works/Commedia/Cantica/1/\"+str(i),\n",
" # \"hdn:Works/Commedia/Cantica/1/\"+str(i)+\"/#\"+str(j)));\n",
" \n",
" \n",
" return ordr_lms\n",
" \n",
" @property\n",
" def lemma(self):\n",
" def ff_ea(self):\n",
" lms_text = []\n",
" lms_tupl=()\n",
" for lm in self.soup.body.find_all(\"lm\"):\n",
@ -232,7 +300,7 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 101,
"metadata": {},
"outputs": [],
"source": [
@ -248,43 +316,43 @@
"metadata": {},
"source": [
"### Provo a vedere se il parser funziona\n",
"Dovrebbe arrivare sino al termine 'oscuro'"
"Dovrebbe arrivare sino al termine 'oscuro', controllare!"
]
},
{
"cell_type": "code",
"execution_count": 61,
"execution_count": 164,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('Nel', 'rdms', 'il', 'Canto 1', 1, 1)\n",
"('Nel', 'rdms', 'il', 'Canto 1')\n",
"\n",
"('mezzo', 'eilaksl', 'in mezzo di', 'Canto 1', 1, 2)\n",
"('mezzo', 'eilaksl', 'in mezzo di', 'Canto 1')\n",
"\n",
"('del', 'rdms', 'il', 'Canto 1', 1, 3)\n",
"('del', 'rdms', 'il', 'Canto 1')\n",
"\n",
"('cammin', 'sm2ms', 'cammino', 'Canto 1', 1, 4)\n",
"('cammin', 'sm2ms', 'cammino', 'Canto 1')\n",
"\n",
"('di', 'epskg', 'di', 'Canto 1', 1, 5)\n",
"('di', 'epskg', 'di', 'Canto 1')\n",
"\n",
"('nostra', 'as1fs', 'nostro', 'Canto 1', 1, 6)\n",
"('nostra', 'as1fs', 'nostro', 'Canto 1')\n",
"\n",
"('vita', 'sf1fs', 'vita', 'Canto 1', 1, 7)\n",
"('vita', 'sf1fs', 'vita', 'Canto 1')\n",
"\n",
"('mi', 'pf1sypr', 'mi', 'Canto 1', 2, 1)\n",
"('mi', 'pf1sypr', 'mi', 'Canto 1')\n",
"\n",
"('ritrovai', 'vta+1irs1', 'ritrovare', 'Canto 1', 2, 2)\n",
"('ritrovai', 'vta+1irs1', 'ritrovare', 'Canto 1')\n",
"\n",
"('per', 'epskpl', 'per', 'Canto 1', 2, 3)\n",
"('per', 'epskpl', 'per', 'Canto 1')\n",
"\n",
"('una', 'rifs', 'una', 'Canto 1', 2, 4)\n",
"('una', 'rifs', 'una', 'Canto 1')\n",
"\n",
"('selva', 'sf1fs', 'selva', 'Canto 1', 2, 5)\n",
"('selva', 'sf1fs', 'selva', 'Canto 1')\n",
"\n",
"('oscura', 'a1fs', 'oscuro', 'Canto 1', 2, 6)\n",
"('oscura', 'a1fs', 'oscuro', 'Canto 1')\n",
"\n",
"...\n"
]
@ -292,7 +360,7 @@
],
"source": [
"tei = TEIFile('/Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/inferno_forparsing.xml')\n",
"bbs=tei.orderedlemma\n",
"bbs=tei.ff_ea\n",
"for re in bbs:\n",
" print (re, end=\"\\n\"*2)\n",
" if (re[0].startswith('oscura')):\n",
@ -304,13 +372,13 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Carico il testo e creo una tabella\n",
"faccio il parsing del testo e creo una tabella con ha 3 colonne: *lemma, categoria, lemma italiano\""
"### Carico il testo *inferno.xml* e creo una tabella\n",
"Eseguo il parsing del testo presente nel file e creo una tabella con le seguenti colonne: *lemma, categoria, lemma italiano, canto, verso, pposizione lemma nel verso, IRIVerso, IRIParola*"
]
},
{
"cell_type": "code",
"execution_count": 63,
"execution_count": 165,
"metadata": {},
"outputs": [
{
@ -327,22 +395,24 @@
},
{
"cell_type": "code",
"execution_count": 66,
"execution_count": 166,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Lemma 34280\n",
"Categoria 34280\n",
"LemmaItaliano 34280\n",
"Canto 34280\n",
"Verso 34280\n",
"PosizioneLemmaNelVerso 34280\n",
"FormaFlessa 33400\n",
"Categoria 33400\n",
"LemmaItaliano 33400\n",
"Canto 33400\n",
"Verso 33400\n",
"PosizioneLemmaNelVerso 33400\n",
"IRIVerso 33400\n",
"IRIParola 33400\n",
"dtype: int64"
]
},
"execution_count": 66,
"execution_count": 166,
"metadata": {},
"output_type": "execute_result"
}
@ -351,13 +421,34 @@
"data = [mytesto[0]]\n",
"#data[0]\n",
"dfObj = pd.DataFrame(data[0]) \n",
"testo_tabella=pd.DataFrame(data[0], columns = ['Lemma' , 'Categoria', 'LemmaItaliano', 'Canto', 'Verso', 'PosizioneLemmaNelVerso']) \n",
"testo_tabella=pd.DataFrame(data[0], columns = ['FormaFlessa' , 'Categoria', 'LemmaItaliano', 'Canto', 'Verso', 'PosizioneLemmaNelVerso', 'IRIVerso', 'IRIParola']) \n",
"testo_tabella.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Generiamo una tabella\n",
"\n",
"La abella contiene tutti i lemmi della cantica, insieme con le categorie, il lemma in italiano, il numero del verso, la posizione all'interno del verso e l'IRI del verso. Per l'IRI del verso mi son basato su quanto riportato nel *Manifesto*, Appendice 7: \n",
"\n",
"> lIRI del primo verso del sesto libro dellEneide: hdn:Works\\Eneide\\Liber\\VI\\1\n",
"\n",
"una possibile struttura delle IRI per i versi della Cantica Inferno della Commedia \n",
"\n",
"hdn:Works\\Commedia\\Cantica\\1\\n\n",
"\n",
"dove n è il numero del verso. \n",
"\n",
"Si tratta di un esempio, è facilmente modficabile.\n",
"Link al criterio di definizione dell'IRI del verso\n",
"http://hdn.dantenetwork.it/resource/work/commedia/cantica/1/canto/1/verso/1"
]
},
{
"cell_type": "code",
"execution_count": 67,
"execution_count": 167,
"metadata": {},
"outputs": [
{
@ -381,124 +472,170 @@
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Lemma</th>\n",
" <th>FormaFlessa</th>\n",
" <th>Categoria</th>\n",
" <th>LemmaItaliano</th>\n",
" <th>Canto</th>\n",
" <th>Verso</th>\n",
" <th>PosizioneLemmaNelVerso</th>\n",
" <th>IRIVerso</th>\n",
" <th>IRIParola</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Nel</td>\n",
" <td>rdms</td>\n",
" <td>il</td>\n",
" <td>[rdms]</td>\n",
" <td>[il]</td>\n",
" <td>Canto 1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1/#1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>mezzo</td>\n",
" <td>eilaksl</td>\n",
" <td>in mezzo di</td>\n",
" <td>[eilaksl]</td>\n",
" <td>[in mezzo di]</td>\n",
" <td>Canto 1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1/#2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>del</td>\n",
" <td>rdms</td>\n",
" <td>il</td>\n",
" <td>[rdms]</td>\n",
" <td>[il]</td>\n",
" <td>Canto 1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1/#3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>cammin</td>\n",
" <td>sm2ms</td>\n",
" <td>cammino</td>\n",
" <td>[sm2ms]</td>\n",
" <td>[cammino]</td>\n",
" <td>Canto 1</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1/#4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>di</td>\n",
" <td>epskg</td>\n",
" <td>di</td>\n",
" <td>[epskg]</td>\n",
" <td>[di]</td>\n",
" <td>Canto 1</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1/#5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>nostra</td>\n",
" <td>as1fs</td>\n",
" <td>nostro</td>\n",
" <td>[as1fs]</td>\n",
" <td>[nostro]</td>\n",
" <td>Canto 1</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1/#6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>vita</td>\n",
" <td>sf1fs</td>\n",
" <td>vita</td>\n",
" <td>[sf1fs]</td>\n",
" <td>[vita]</td>\n",
" <td>Canto 1</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/1/#7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>mi</td>\n",
" <td>pf1sypr</td>\n",
" <td>mi</td>\n",
" <td>[pf1sypr]</td>\n",
" <td>[mi]</td>\n",
" <td>Canto 1</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/2</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/2/#1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>ritrovai</td>\n",
" <td>vta+1irs1</td>\n",
" <td>ritrovare</td>\n",
" <td>[vta+1irs1]</td>\n",
" <td>[ritrovare]</td>\n",
" <td>Canto 1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/2</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/2/#2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>per</td>\n",
" <td>epskpl</td>\n",
" <td>per</td>\n",
" <td>[epskpl]</td>\n",
" <td>[per]</td>\n",
" <td>Canto 1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/2</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/2/#3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Lemma Categoria LemmaItaliano Canto Verso PosizioneLemmaNelVerso\n",
"0 Nel rdms il Canto 1 1 1\n",
"1 mezzo eilaksl in mezzo di Canto 1 1 2\n",
"2 del rdms il Canto 1 1 3\n",
"3 cammin sm2ms cammino Canto 1 1 4\n",
"4 di epskg di Canto 1 1 5\n",
"5 nostra as1fs nostro Canto 1 1 6\n",
"6 vita sf1fs vita Canto 1 1 7\n",
"7 mi pf1sypr mi Canto 1 2 1\n",
"8 ritrovai vta+1irs1 ritrovare Canto 1 2 2\n",
"9 per epskpl per Canto 1 2 3"
" FormaFlessa Categoria LemmaItaliano Canto Verso \\\n",
"0 Nel [rdms] [il] Canto 1 1 \n",
"1 mezzo [eilaksl] [in mezzo di] Canto 1 1 \n",
"2 del [rdms] [il] Canto 1 1 \n",
"3 cammin [sm2ms] [cammino] Canto 1 1 \n",
"4 di [epskg] [di] Canto 1 1 \n",
"5 nostra [as1fs] [nostro] Canto 1 1 \n",
"6 vita [sf1fs] [vita] Canto 1 1 \n",
"7 mi [pf1sypr] [mi] Canto 1 2 \n",
"8 ritrovai [vta+1irs1] [ritrovare] Canto 1 2 \n",
"9 per [epskpl] [per] Canto 1 2 \n",
"\n",
" PosizioneLemmaNelVerso IRIVerso \\\n",
"0 1 hdn:Works/Commedia/Cantica/1/1 \n",
"1 2 hdn:Works/Commedia/Cantica/1/1 \n",
"2 3 hdn:Works/Commedia/Cantica/1/1 \n",
"3 4 hdn:Works/Commedia/Cantica/1/1 \n",
"4 5 hdn:Works/Commedia/Cantica/1/1 \n",
"5 6 hdn:Works/Commedia/Cantica/1/1 \n",
"6 7 hdn:Works/Commedia/Cantica/1/1 \n",
"7 1 hdn:Works/Commedia/Cantica/1/2 \n",
"8 2 hdn:Works/Commedia/Cantica/1/2 \n",
"9 3 hdn:Works/Commedia/Cantica/1/2 \n",
"\n",
" IRIParola \n",
"0 hdn:Works/Commedia/Cantica/1/1/#1 \n",
"1 hdn:Works/Commedia/Cantica/1/1/#2 \n",
"2 hdn:Works/Commedia/Cantica/1/1/#3 \n",
"3 hdn:Works/Commedia/Cantica/1/1/#4 \n",
"4 hdn:Works/Commedia/Cantica/1/1/#5 \n",
"5 hdn:Works/Commedia/Cantica/1/1/#6 \n",
"6 hdn:Works/Commedia/Cantica/1/1/#7 \n",
"7 hdn:Works/Commedia/Cantica/1/2/#1 \n",
"8 hdn:Works/Commedia/Cantica/1/2/#2 \n",
"9 hdn:Works/Commedia/Cantica/1/2/#3 "
]
},
"execution_count": 67,
"execution_count": 167,
"metadata": {},
"output_type": "execute_result"
}
@ -507,6 +644,253 @@
"testo_tabella.head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### File *purgatorio.xml*\n",
"\n",
"Eseguiamo un test, la stampa dovrebbe fermarsi al termine *ingegno*.\n"
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('Per', 'epsf', 'per', 'Canto 1')\n",
"\n",
"('correr', 'vta2fp', 'correre', 'Canto 1')\n",
"\n",
"('miglior', 'a2fp', 'migliore', 'Canto 1')\n",
"\n",
"('acque', 'sf1fp', 'acqua', 'Canto 1')\n",
"\n",
"('alza', 'vta1ips3', 'alzare', 'Canto 1')\n",
"\n",
"('le', 'adfp', 'la', 'Canto 1')\n",
"\n",
"('vele', 'sf1fp', 'vela', 'Canto 1')\n",
"\n",
"('omai', 'b', 'ormai', 'Canto 1')\n",
"\n",
"('la', 'rdfs', 'la', 'Canto 1')\n",
"\n",
"('navicella', 'sf1fs', 'navicella', 'Canto 1')\n",
"\n",
"('del', 'epa', 'di', 'Canto 1')\n",
"\n",
"('del', 'rdms', 'il', 'Canto 1')\n",
"\n",
"('mio', 'as1ms', 'mio', 'Canto 1')\n",
"\n",
"('ingegno', 'sm2ms', 'ingegno', 'Canto 1')\n",
"\n",
"...\n"
]
}
],
"source": [
"tei_purgatorio = TEIFile('/Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/purgatorio.xml')\n",
"bbs_pu=tei_purgatorio.ff_ea\n",
"for repu in bbs_pu:\n",
" print (repu, end=\"\\n\"*2)\n",
" if (repu[0].startswith('ingegno')):\n",
" print('...')\n",
" break"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Carico il testo *purgatorio.xml* e creo una tabella\n",
"Eseguo il parsing del testo presente nel file e creo una tabella simile alla precedente\n",
"\\<LM1\\> forma flessa con due lemmi, gestire nel parser"
]
},
{
"cell_type": "code",
"execution_count": 169,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Handled /Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/purgatorio.xml\n"
]
}
],
"source": [
"parsed_purgatorio=tei_to_csv_entry('/Users/cesare/Projects/hdn/triple/DanteTriple/xml/DanteSearch/grammaticale/purgatorio.xml')"
]
},
{
"cell_type": "code",
"execution_count": 170,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"FormaFlessa 33245\n",
"Categoria 33245\n",
"Lemma 33245\n",
"Canto 33245\n",
"Verso 33245\n",
"PosizioneLemmaNelVerso 33245\n",
"IRIVerso 33245\n",
"IRIParola 33245\n",
"dtype: int64"
]
},
"execution_count": 170,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#DA COMPLETARE CON IRI CORRETTO!\n",
"\n",
"data_purgatorio = [parsed_purgatorio[0]]\n",
"#dfObj_purgatorio = pd.DataFrame(data_purgatorio[0]) \n",
"testo_purgatorio_tabella=pd.DataFrame(data_purgatorio[0], columns = ['FormaFlessa' , 'Categoria', 'Lemma', 'Canto', 'Verso', 'PosizioneLemmaNelVerso', 'IRIVerso', 'IRIParola']) \n",
"testo_purgatorio_tabella.count()"
]
},
{
"cell_type": "code",
"execution_count": 175,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>FormaFlessa</th>\n",
" <th>Categoria</th>\n",
" <th>Lemma</th>\n",
" <th>Canto</th>\n",
" <th>Verso</th>\n",
" <th>PosizioneLemmaNelVerso</th>\n",
" <th>IRIVerso</th>\n",
" <th>IRIParola</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>33240</th>\n",
" <td>disposto</td>\n",
" <td>[vtp2pra1ms]</td>\n",
" <td>[disporre]</td>\n",
" <td>Canto 33</td>\n",
" <td>4755</td>\n",
" <td>3</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/4755</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/4755/#3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33241</th>\n",
" <td>a</td>\n",
" <td>[epsb]</td>\n",
" <td>[a]</td>\n",
" <td>Canto 33</td>\n",
" <td>4755</td>\n",
" <td>4</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/4755</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/4755/#4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33242</th>\n",
" <td>salire</td>\n",
" <td>[vi3fp]</td>\n",
" <td>[salire]</td>\n",
" <td>Canto 33</td>\n",
" <td>4755</td>\n",
" <td>5</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/4755</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/4755/#5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33243</th>\n",
" <td>alle</td>\n",
" <td>[rdfp, epakml]</td>\n",
" <td>[la, a]</td>\n",
" <td>Canto 33</td>\n",
" <td>4755</td>\n",
" <td>6</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/4755</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/4755/#6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33244</th>\n",
" <td>stelle</td>\n",
" <td>[sf1fp]</td>\n",
" <td>[stella]</td>\n",
" <td>Canto 33</td>\n",
" <td>4755</td>\n",
" <td>7</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/4755</td>\n",
" <td>hdn:Works/Commedia/Cantica/1/4755/#7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" FormaFlessa Categoria Lemma Canto Verso \\\n",
"33240 disposto [vtp2pra1ms] [disporre] Canto 33 4755 \n",
"33241 a [epsb] [a] Canto 33 4755 \n",
"33242 salire [vi3fp] [salire] Canto 33 4755 \n",
"33243 alle [rdfp, epakml] [la, a] Canto 33 4755 \n",
"33244 stelle [sf1fp] [stella] Canto 33 4755 \n",
"\n",
" PosizioneLemmaNelVerso IRIVerso \\\n",
"33240 3 hdn:Works/Commedia/Cantica/1/4755 \n",
"33241 4 hdn:Works/Commedia/Cantica/1/4755 \n",
"33242 5 hdn:Works/Commedia/Cantica/1/4755 \n",
"33243 6 hdn:Works/Commedia/Cantica/1/4755 \n",
"33244 7 hdn:Works/Commedia/Cantica/1/4755 \n",
"\n",
" IRIParola \n",
"33240 hdn:Works/Commedia/Cantica/1/4755/#3 \n",
"33241 hdn:Works/Commedia/Cantica/1/4755/#4 \n",
"33242 hdn:Works/Commedia/Cantica/1/4755/#5 \n",
"33243 hdn:Works/Commedia/Cantica/1/4755/#6 \n",
"33244 hdn:Works/Commedia/Cantica/1/4755/#7 "
]
},
"execution_count": 175,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"testo_purgatorio_tabella.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,