Eliminare 'CItationDHres.ipynb'

This commit is contained in:
Cesare Concordia 2021-12-14 08:22:41 +01:00
parent 1fefe236ac
commit a21eef6d94
1 changed files with 0 additions and 809 deletions

View File

@ -1,809 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import ast\n",
"import sys\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from bokeh.io import output_notebook, show\n",
"from bokeh.plotting import figure\n",
"from bs4 import BeautifulSoup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def read_tei(tei_file):\n",
" with open(tei_file, 'r') as tei:\n",
" soup = BeautifulSoup(tei, 'lxml')\n",
" return soup\n",
" raise RuntimeError('Cannot generate a soup from the input')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def elem_to_text(elem, default=''):\n",
" if elem:\n",
" return elem.getText(separator=' ', strip=True)\n",
" else:\n",
" return default"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from dataclasses import dataclass\n",
"\n",
"@dataclass\n",
"class Person:\n",
" firstname: str\n",
" middlename: str\n",
" surname: str"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class TEIFile(object):\n",
" def __init__(self, filename):\n",
" self.filename = filename\n",
" self.soup = read_tei(filename)\n",
" self._text = None\n",
" self._title = ''\n",
" self._abstract = ''\n",
"\n",
" @property\n",
" def doi(self):\n",
" idno_elem = self.soup.find('idno', type='DOI')\n",
" if not idno_elem:\n",
" return ''\n",
" else:\n",
" return idno_elem.getText()\n",
"\n",
" @property\n",
" def title(self):\n",
" if not self._title:\n",
" if not self.soup.title:\n",
" self._title = \"na\"\n",
" else:\n",
" self._title = self.soup.title.getText()\n",
" return self._title\n",
"\n",
" @property\n",
" def abstract(self):\n",
" if not self._abstract:\n",
" abstract = self.soup.abstract.getText(separator=' ', strip=True)\n",
" self._abstract = abstract\n",
" return self._abstract\n",
"\n",
" @property\n",
" def authors(self):\n",
" #authors_in_header = self.soup.analytic.find_all('author')\n",
" authors_in_header = self.soup.find_all('author')\n",
"\n",
" result = []\n",
" for author in authors_in_header:\n",
" persname = author.persname\n",
" if not persname:\n",
" continue\n",
" firstname = elem_to_text(persname.find(\"forename\"))#, type=\"first\"))\n",
" middlename = elem_to_text(persname.find(\"forename\", type=\"middle\"))\n",
" surname = elem_to_text(persname.surname)\n",
" person = Person(firstname, middlename, surname)\n",
" result.append(person)\n",
" return result\n",
" \n",
" @property\n",
" def bibliography(self):\n",
" bibliography = self.soup.find_all('bibl')\n",
" result = []\n",
" for bibl in bibliography:\n",
" if not bibl:\n",
" continue\n",
" \n",
" result.append(elem_to_text(bibl))\n",
" return result\n",
" \n",
" @property\n",
" def text(self):\n",
" if not self._text:\n",
" divs_text = []\n",
" for div in self.soup.body.find_all(\"div\"):\n",
" # div is neither an appendix nor references, just plain text.\n",
" if not div.get(\"type\"):\n",
" div_text = div.get_text(separator=' ', strip=True)\n",
" divs_text.append(div_text)\n",
"\n",
" plain_text = \" \".join(divs_text)\n",
" self._text = plain_text\n",
" return self._text"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import multiprocessing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from os.path import basename, splitext\n",
"\n",
"def basename_without_ext(path):\n",
" base_name = basename(path)\n",
" stem, ext = splitext(base_name)\n",
" if stem.endswith('.tei'):\n",
" # Return base name without tei file\n",
" return stem[0:-4]\n",
" else:\n",
" return stem"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def tei_to_csv_entry(tei_file):\n",
" tei = TEIFile(tei_file)\n",
" print(f\"Handled {tei_file}\")\n",
" base_name = basename_without_ext(tei_file)\n",
" return base_name, tei.authors, tei.title, tei.bibliography#, tei.abstract"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import glob\n",
"from pathlib import Path\n",
"papers15 = sorted(Path(\"/Users/cesare/git/SSHOCCitationService/dataset/ToolXtractor/data/xml/2015/\").glob('*.xml'))\n",
"papers16 = sorted(Path(\"/Users/cesare/git/SSHOCCitationService/dataset/ToolXtractor/data/xml/2016/\").glob('*.xml'))\n",
"papers17 = sorted(Path(\"/Users/cesare/git/SSHOCCitationService/dataset/ToolXtractor/data/xml/2017/\").glob('*.xml'))\n",
"papers18 = sorted(Path(\"/Users/cesare/git/SSHOCCitationService/dataset/ToolXtractor/data/xml/2018/\").glob('*.xml'))\n",
"papers19 = sorted(Path(\"/Users/cesare/git/SSHOCCitationService/dataset/ToolXtractor/data/xml/2019/\").glob('*.xml'))\n",
"papers20 = sorted(Path(\"/Users/cesare/git/SSHOCCitationService/dataset/ToolXtractor/data/xml/2020/\").glob('*.xml'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from multiprocessing.pool import Pool\n",
"pool = Pool()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Import the DH conference papers (2016-20020)\n",
"\n",
"The papers are downloaded from https://github.com/lehkost/ToolXtractor/"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"csv_entries15 = pool.map(tei_to_csv_entry, papers15)\n",
"csv_entries16 = pool.map(tei_to_csv_entry, papers16)\n",
"csv_entries17 = pool.map(tei_to_csv_entry, papers17)\n",
"csv_entries18 = pool.map(tei_to_csv_entry, papers18)\n",
"csv_entries19 = pool.map(tei_to_csv_entry, papers19)\n",
"csv_entries20 = pool.map(tei_to_csv_entry, papers20)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ID 2359\n",
"Authors 2359\n",
"Title 2359\n",
"Bibliography 2359\n",
"dtype: int64"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result_csv0 = pd.DataFrame(csv_entries15, columns=['ID', 'Authors', 'Title', 'Bibliography'])\n",
"result_csv1 = result_csv0.append(pd.DataFrame(csv_entries16, columns=['ID', 'Authors', 'Title', 'Bibliography']))\n",
"result_csv2 = result_csv1.append(pd.DataFrame(csv_entries17, columns=['ID', 'Authors', 'Title', 'Bibliography']))\n",
"result_csv3 = result_csv2.append(pd.DataFrame(csv_entries18, columns=['ID', 'Authors', 'Title', 'Bibliography']))\n",
"result_csv4 = result_csv3.append(pd.DataFrame(csv_entries19, columns=['ID', 'Authors', 'Title', 'Bibliography']))\n",
"result_csv = result_csv4.append(pd.DataFrame(csv_entries20, columns=['ID', 'Authors', 'Title', 'Bibliography']))\n",
"result_csv.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Select the papers having the TEI \\<bibl\\> elements.\n",
"\n",
"The \\<bibl\\> element (bibliographic citation) contains a loosely-structured bibliographic citation of which the sub-components may or may not be explicitly tagged. There are 1195 papers havingthis element and in total there are 11746 citations."
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ID 1195\n",
"Authors 1195\n",
"Title 1195\n",
"Bibliography 1195\n",
"dtype: int64"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_csv=result_csv[result_csv['Bibliography'].str.len()>0]\n",
"test_csv.count()"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ID 11746\n",
"Title 11746\n",
"Bibliography 11746\n",
"dtype: int64"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# all citations\n",
"my_df=test_csv[['ID','Title','Bibliography']]\n",
"my_exp_df=my_df.explode('Bibliography')\n",
"my_exp_df.count()"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"134"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Curiosity: there are at least 134 references cited more than once\n",
"df_p_d=my_exp_df[my_exp_df.duplicated(['Bibliography'], keep=\"last\")].sort_values('Bibliography')\n",
"df_p_d['Bibliography'].drop_duplicates().count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Citations with DOI\n",
"There are 821 (of 11746) citations with a DOI"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Reference 821\n",
"DOI 821\n",
"dtype: int64"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import re\n",
"regex = re.compile(r'\\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\"&\\'<>])\\S)+)\\b', re.IGNORECASE)\n",
"df_refs=my_exp_df.Bibliography.values\n",
"df_refs_with_doi = pd.DataFrame(columns=[\"Reference\", \"DOI\"])\n",
"references=[]\n",
"DOIs=[]\n",
"for reference in df_refs:\n",
" mydoi=re.search(regex, reference)\n",
" if mydoi:\n",
" references.append(reference);\n",
" DOIs.append(mydoi[1]);\n",
"df_refs_with_doi['Reference']=references;\n",
"df_refs_with_doi['DOI']=DOIs;\n",
"df_refs_with_doi.count()"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Reference</th>\n",
" <th>DOI</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Byrne, G., and Goddard, L. (2010). The Stronge...</td>\n",
" <td>10.1045/november2010-byrne</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Lampert, C. K., and Southwick, S. B. (2013). L...</td>\n",
" <td>10.1080/19386389.2013.826095</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Singer, R. (2009). Linked Library Data Now! Jo...</td>\n",
" <td>10.1080/19411260903035809</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Thomas, L. and Solomon, D. (2014). Active User...</td>\n",
" <td>10.1353/cea.2014.0014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Farquhar, A. and Baker, J. (2014). Interoperab...</td>\n",
" <td>10.6084/m9.figshare.1092550%20</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Reference \\\n",
"0 Byrne, G., and Goddard, L. (2010). The Stronge... \n",
"1 Lampert, C. K., and Southwick, S. B. (2013). L... \n",
"2 Singer, R. (2009). Linked Library Data Now! Jo... \n",
"3 Thomas, L. and Solomon, D. (2014). Active User... \n",
"4 Farquhar, A. and Baker, J. (2014). Interoperab... \n",
"\n",
" DOI \n",
"0 10.1045/november2010-byrne \n",
"1 10.1080/19386389.2013.826095 \n",
"2 10.1080/19411260903035809 \n",
"3 10.1353/cea.2014.0014 \n",
"4 10.6084/m9.figshare.1092550%20 "
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Example: five citations that have DOIs\n",
"df_refs_with_doi.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Retrieve citation DOIs using CrossRef API\n",
"Crossref API allows querying the database by giving it in input strings that contain bibliography references. The reference string does not to be necessarily a well-written references. The input string is parsed by Crossref using machine learning techniques and the system tries to match the reference string with the metadata that are stored in the database. \n",
"\n",
"An important feature of Crossref API, is the score of sureness that Crossref API retrieve beside the documents metadata. For each request, Crossref score indicates how much it is sure about the entities retrieved, if the score value is high the metadata retrieved are probably the corrected ones, if the score is low the metadata retrieved might be the wrong ones."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As first test we use the Crossref API to check citations having DOIs, we chose '110' as minimum score value."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import urllib.request, json\n",
"references=df_refs_with_doi['Reference'];\n",
"df_citations16 = pd.DataFrame(columns=[\"Orig\", \"Crossref\", \"DOI\"])\n",
"originalCitations=[]\n",
"crossrefCitations=[]\n",
"DOIs=[]\n",
"score=[]\n",
"i=0;\n",
"j =0;\n",
"for cite in references:\n",
" cit=urllib.parse.quote_plus(cite)\n",
" try:\n",
" with urllib.request.urlopen(\"https://api.crossref.org/works?query.bibliographic=\"+cit+\"&sort=score&mailto=cesare.concordia@gmail.com#\") as url:\n",
" data16 = json.loads(url.read().decode())\n",
" j=j+1\n",
" if (j%25 == 0):\n",
" print(f\"{j}, ({i})\")\n",
" if (len(data16[\"message\"][\"items\"])>0) and (data16[\"message\"][\"items\"][0]['score'] >115):\n",
" originalCitations.append(cite)\n",
" crossrefCitations.append( data16[\"message\"][\"items\"][0])\n",
" DOIs.append(data16[\"message\"][\"items\"][0]['DOI'])\n",
" score.append(data16[\"message\"][\"items\"][0]['score'])\n",
" i=i+1\n",
" #print(f\"{i} found, out of {j}\")\n",
" if (j>1000):\n",
" break\n",
" except urllib.error.URLError:\n",
" print(cit)\n",
" except urllib.error.HTTPError:\n",
" print(cit)\n",
" \n",
"df_citations16[\"Orig\"] = originalCitations\n",
"df_citations16[\"Crossref\"] = crossrefCitations\n",
"df_citations16[\"DOI\"] = DOIs\n",
"df_citations16[\"Score\"] = score\n",
"df_citations16.head()"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Orig 327\n",
"Crossref 327\n",
"DOI_CR 327\n",
"Score 327\n",
"DOI 327\n",
"dtype: int64"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_cit_datasets=df_citations16.join(df_refs_with_doi.set_index('Reference'), on='Orig', lsuffix='_CR')\n",
"df_cit_datasets.count()"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Orig 278\n",
"Crossref 278\n",
"DOI 278\n",
"Score 278\n",
"dtype: int64"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#remove duplicates\n",
"test=df_citations16\n",
"df_temp_dois=test.drop_duplicates(['DOI'])\n",
"df_temp_dois.count()"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"278"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_dois=df_temp_dois[df_temp_dois['DOI'] != '']\n",
"df_dois_values=df_dois.DOI.values\n",
"df_dois_values.size"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_cn_citations = pd.DataFrame (columns = ['doi','cn_citation'])\n",
"import requests\n",
"#headers_dict = {\"Accept\": \"application/x-bibtex\"}\n",
"headers_dict = {\"Accept\": \"text/x-bibliography\", \"locale\":\"en-EN\"}\n",
"for var in df_dois_values:\n",
" if ( var != \"\" and var!=None):\n",
" print(var)\n",
" try:\n",
" r =requests.get(\"http://doi.org/\"+var, headers=headers_dict, timeout=20)\n",
" # print(\"result: \"+r.content.decode(\"utf-8\"))\n",
" df_cn_citations = df_cn_citations.append({'doi': var, 'cn_citation': r.content.decode(\"utf-8\")}, ignore_index=True)\n",
" except requests.exceptions.ConnectionError:\n",
" # print(var)\n",
" df_cn_citations = df_cn_citations.append({'doi': var, 'cn_citation': int(503)}, ignore_index=True)\n",
" except requests.exceptions.ConnectTimeout:\n",
" # print(var)\n",
" df_cn_citations = df_cn_citations.append({'doi': var, 'cn_citation': int(408)}, ignore_index=True)\n",
" except requests.exceptions.ReadTimeout:\n",
" df_cn_citations = df_cn_citations.append({'doi': var, 'cn_citation': int(408)}, ignore_index=True)\n",
" else:\n",
" # print(var ,0)\n",
" df_cn_citations = df_cn_citations.append({'url': doi, 'cn_citation': int(400)}, ignore_index=True)\n",
"df_cn_citations.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Orig</th>\n",
" <th>DOI</th>\n",
" <th>cn_citation</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Lampert, C. K., and Southwick, S. B. (2013). L...</td>\n",
" <td>10.1080/19386389.2013.826095</td>\n",
" <td>Lampert, C. K., &amp; Southwick, S. B. (2013). Lea...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Thomas, L. and Solomon, D. (2014). Active User...</td>\n",
" <td>10.1353/cea.2014.0014</td>\n",
" <td>Thomas, L., &amp; Solomon, D. (2014). Active Users...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Omid, M. (2011). Design of an Expert System fo...</td>\n",
" <td>10.1016/j.eswa.2010.09.103</td>\n",
" <td>Omid, M. (2011). Design of an expert system fo...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Trelea , I. C. (2003). The Particle Swarm Opti...</td>\n",
" <td>10.1016/s0020-0190(02)00447-7</td>\n",
" <td>Trelea, I. C. (2003). The particle swarm optim...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Kenderdine, S. (2013). Pure Land: Inhabiting...</td>\n",
" <td>10.1111/cura.12020</td>\n",
" <td>Kenderdine, S. (2013). “Pure Land”: Inhabiting...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Haentjens Dekker , R., van Hulle , D. , Middel...</td>\n",
" <td>10.1093/llc/fqu007</td>\n",
" <td>Haentjens Dekker, R., van Hulle, D., Middell, ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Orig \\\n",
"0 Lampert, C. K., and Southwick, S. B. (2013). L... \n",
"1 Thomas, L. and Solomon, D. (2014). Active User... \n",
"2 Omid, M. (2011). Design of an Expert System fo... \n",
"3 Trelea , I. C. (2003). The Particle Swarm Opti... \n",
"4 Kenderdine, S. (2013). Pure Land: Inhabiting... \n",
"5 Haentjens Dekker , R., van Hulle , D. , Middel... \n",
"\n",
" DOI \\\n",
"0 10.1080/19386389.2013.826095 \n",
"1 10.1353/cea.2014.0014 \n",
"2 10.1016/j.eswa.2010.09.103 \n",
"3 10.1016/s0020-0190(02)00447-7 \n",
"4 10.1111/cura.12020 \n",
"5 10.1093/llc/fqu007 \n",
"\n",
" cn_citation \n",
"0 Lampert, C. K., & Southwick, S. B. (2013). Lea... \n",
"1 Thomas, L., & Solomon, D. (2014). Active Users... \n",
"2 Omid, M. (2011). Design of an expert system fo... \n",
"3 Trelea, I. C. (2003). The particle swarm optim... \n",
"4 Kenderdine, S. (2013). “Pure Land”: Inhabiting... \n",
"5 Haentjens Dekker, R., van Hulle, D., Middell, ... "
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_cit_datasets=df_citations16.join(df_cn_citations.set_index('doi'), on='DOI')\n",
"df_cit_datasets[['Orig', 'DOI', 'cn_citation']].head(6)"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Orig', 'Score', 'DOI', 'cn_citation'], dtype='object')"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_cit_table=df_cit_datasets[['Orig', 'Score', 'DOI', 'cn_citation']]\n",
"df_cit_table.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import plotly.graph_objects as go\n",
"fig = go.Figure(data=[go.Table(header=dict(values=list(df_cit_table.columns), line=dict(color='black')), \n",
" cells=dict(values=[df_cit_table.Orig, df_cit_table.Score, df_cit_table.DOI, df_cit_table['cn_citation'] ]))])\n",
"fig.show(\"notebook\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}