sshoc-skosmapping/SSHOC_Data_Repositories_ana...

823 lines
645 KiB
Plaintext
Raw Normal View History

2021-10-18 13:44:23 +02:00
{
"cells": [
{
"cell_type": "markdown",
"id": "veterinary-compatibility",
"metadata": {},
"source": [
"### Welcome to the D 3.5 Repository Evaluation Notebook\n",
"\n",
"This notebook serves as supplemental material to the analysisy contained in SSHOC Deliverable D 3.5. The analysis made uses the information collected in the document [D 3.5 Repository Evaluation](https://docs.google.com/spreadsheets/d/1h3ZKLv_aR3sqjpKQ67uyzzsJXLWV7Yo-clX4BzVxJGE/edit#gid=1723227300) to create a db of information about repositories, for a number of such repositories the data is enriched with the information provided by [Re3Data](https://www.re3data.org/). "
]
},
{
"cell_type": "markdown",
"id": "plain-working",
"metadata": {},
"source": [
"### Install and download data"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "civic-outdoors",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import requests\n",
"import re\n",
"import pickle\n",
"import os.path\n",
"import os\n",
"import json\n",
"import numpy as np\n",
"import xml.etree.ElementTree as ET"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "discrete-bookmark",
"metadata": {},
"outputs": [],
"source": [
"def getRe3data(repid):\n",
" headers_dict = {\"locale\":\"en-EN\"};\n",
" labels=['re3data.orgIdentifier','institutionName', 'repositoryName',\n",
" 'repositoryURL', 'contentType','type','keyword',\n",
" 'additionalName', 'enhancedPublication',\n",
" \"pidSystem\", 'versioning',\n",
" 'dataLicenseURL','dataAccessType','metadataStandardName', 'api']\n",
"\n",
" cols = []\n",
" data = []\n",
" df_n1=pd.DataFrame(columns=labels)\n",
"\n",
" for repid in repids:\n",
" rd =requests.get(repurl+repid, headers=headers_dict, timeout=3)\n",
" root = ET.XML(rd.text) # Parse XML\n",
" data=[]\n",
" for label in labels:\n",
" #print (label)\n",
" value=[]\n",
" for elem in root.iter('{http://www.re3data.org/schema/2-2}'+label):\n",
" #print (elem.text)\n",
" value.append(elem.text)\n",
" #print(value)\n",
" data.append(value)\n",
" a_series = pd.Series(data, index = df_n1.columns)\n",
" df_n1=df_n1.append(a_series, ignore_index=True)\n",
"\n",
" #df_n = pd.DataFrame(data).T\n",
" #df_n.columns =labels\n",
" #df_n1.head(15)\n",
" return df_n1;"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "knowing-watershed",
"metadata": {},
"outputs": [],
"source": [
"url='https://www.re3data.org/api/v1/repositories'\n",
"df_desc_par=pd.read_xml(url)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "allied-reform",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2749, 3)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#r3d100010136\n",
"df_desc_par.shape"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "monetary-sharing",
"metadata": {},
"outputs": [],
"source": [
"repurl='https://www.re3data.org/api/v1/repository/'\n",
"repids=df_desc_par['id'].values"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "turkish-telephone",
"metadata": {},
"outputs": [],
"source": [
"#df_n1=getRe3data(repids)\n",
"#df_n1.to_pickle('../data/re3data.pickle')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "intellectual-window",
"metadata": {},
"outputs": [],
"source": [
"df_n1 = pd.read_pickle('../data/re3data.pickle')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "deadly-leeds",
"metadata": {},
"outputs": [],
"source": [
"df_re=pd.read_csv(filepath_or_buffer='../data/datarepo.csv')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "favorite-division",
"metadata": {},
"outputs": [],
"source": [
"df_sshoc_repo=df_re[['Community', 'Repository name', 'Abbreviation', 'Host organisation', 'Country', 'Website', 're3data_id','Discipline_subject ', 'example_id', 'PID system', 'model_citation_lp']]\n",
"#df_sshoc_repo.tail()\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "magnetic-orleans",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/tljh/user/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"df_sshoc_repo['PID system']=df_sshoc_repo['PID system'].apply(lambda y: 'na' if y is np.nan else y)\n",
"#df_sshoc_repo['PID system'].tail()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "hollywood-karma",
"metadata": {},
"outputs": [],
"source": [
"df_n1.metadataStandardName=df_n1.metadataStandardName.apply(lambda y: ['none'] if len(y)==0 else y)\n",
"df_n1.pidSystem=df_n1.pidSystem.apply(lambda y: np.nan if ','.join(map(str, y))=='none' else y)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "injured-niger",
"metadata": {},
"outputs": [],
"source": [
"#df_n1['MetadataStandard']=[', '.join(map(str, l)) for l in df_n1['metadataStandardName']]\n",
"df_n1['identifier']=['https://www.re3data.org/repository/'+ ''.join(map(str, l)) for l in df_n1['re3data.orgIdentifier']]#df_n1['re3data.orgIdentifier']"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "hazardous-government",
"metadata": {},
"outputs": [],
"source": [
"test=pd.merge(left=df_sshoc_repo, right=df_n1, left_on='re3data_id', right_on='identifier', how='left').fillna(np.nan)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "collaborative-making",
"metadata": {},
"outputs": [],
"source": [
"test_ex=test.explode('metadataStandardName')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "compact-engineer",
"metadata": {},
"outputs": [],
"source": [
"test_ex['metadataStandardName']=test_ex['metadataStandardName'].apply(lambda y: 'na' if y is np.nan else y)"
]
},
{
"cell_type": "markdown",
"id": "brave-liverpool",
"metadata": {},
"source": [
"### Number of repositories by Metadata Standards"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "ideal-ribbon",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"na 78\n",
"Dublin Core 29\n",
"DDI - Data Documentation Initiative 15\n",
"none 11\n",
"DataCite Metadata Schema 5\n",
"Name: metadataStandardName, dtype: int64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_ms_ex=test_ex['metadataStandardName'].value_counts()\n",
"df_ms_ex.head()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "median-facing",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA3kAAAK5CAYAAAACb+fVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAB0f0lEQVR4nOzdd5hkZZ238fs7BMlgABRUgoCAigKDgvoaQEwoqGBAVIysa8K0hl3XnLNiBEUxYXZhzcgiJhRJAgKCCigIEgREMszv/eOchpqmh5nurqozffr+XFddXeep9O2ne3rqV+cJqSokSZIkSf2woOsAkiRJkqThsciTJEmSpB6xyJMkSZKkHrHIkyRJkqQesciTJEmSpB6xyJMkSZKkHrHIk7TcSPLmJJXkR1Pc9s0kPx1jloe1We49rtecjiRbJfl5kqvanBt3nWmmkmzcfg+PG2h7TZKHDfl13pzkkmE+5xJe59nt97PGiJ6/2sszprjtGRO3z+B5h9rnSR43k9/NUfzsb+O11mt/LzYex+tNeu1Lkrx5hM+/Rtv/zx7Va0haflnkSVoePTLJDl2HWM69D1gH2B3YCbig0zSzcwHN9/CLgbbXAA8b8ut8BnjUkJ+zK/8CnjZF+97tbTMxij6fiXHmWA94E7DxmF5PksbCIk/S8uYfwCnAf3UdZJSSrDLLp9gSOKKqjqyqX1fVdcv4uqvO8nWHrqqua7+Hy0fx/ElWSrJCVZ1XVceP4jU68L80H4bcfqIhyR2AXYHDO0ulsUpjtn9LJPWQRZ6k5U0B7wB2T3KfJd1pSUPv2uFJLxk4PifJ+5O8LskFSa5I8oH2zdFjk/w+yZVJ/mfwDfOADZJ8tx0W+ZckL5ziNf9fkqOTXJ3k0iQHJVlz4PaJ4Xv3T/LTJNcA/3Eb39v9khzZPt9lSb6cZP32to3boXj3AF7RPu9Pb+O5Kskrk3w4ycU0BTRJVkny3iR/TXJdkt8leeykx0703X8nuTDJv9osa0+63yZt//2z7cv/TbLZpPs8L8lpSa5ph6kdneReg9/TxHDNJOcAdwTeNDA08WHtbasl+Wib59okv03yyEmv9dM0w3v3S/In4Nr253ir35kkd0hyYJK/t8/3qyQPWNbsSzExpPaaJGcmeeLAc76o7c/FhnTmlmHC913Kcx8D/A3Yc6Btz7btmMl3XtrPeyl9/qq2n69o+2mqn2/a/r2o/R34ArDWFDneneSU9ns/r/19uvOwckxlST+/NEM0T2nvdlQGhrkmWT3Jx5L8Ic2/w7OTfDzJWpOeu5Lsn+SdSS5uv/+PJ7ndpPs9pO3za5Mcn+SBU+TcLckR7XP8M8mvp/jdfnP7PTw4yW9pfref3N62Z/t7dk2Sn9F8ECRpnrLIk7Q8+gZwFsM7m/c04P7Ac4D3Aq8EPgi8Dfhv4IXAQ4F3TfHYzwInA08Cvg98MovPHXsQ8BPgQmAv4OXAY4HPTfFch9KcgXks8N2pgiZZF/gpsBrwdOClbbYjkqzMLUMbLwS+0l5/0VK+//8A7gI8E3hZ2/ZN4NnAO4HHA78FDk9yv0mP3Rt4BPACmn7bjWbY40Te2wFHAlu193k2sAlwdJozSyR5CPAp4IvAY4DnAr8CFisWBzwRuIKm73dqLye0tx1E83N8R3u/vwLfS/LgSc/xIODfgde2398Vk1+kzf6T9vv7D+AJwMXATyYKjxlkH/Q14DCa351TgG8MFG9fAVag+Z0Z9BzghKr63VKeu9rn33ugbW/gq0u4/9J+3rfV53cFPgbsQfMzXgH4VRYv9l8GvBE4sP2erqH5tzbZem2G3Wj+rWwK/F+Sifcjs82xmKX8/C4A9mnv+uKB14Pm398KNH+DHkPzd2Jnmr9Nk70K2AB4Bs0w6n8D9h/IsAHwA5pRCnsBnwa+3L7GoE1o/j48k6Zg/xXwg/ZvzKDVgENo/h0+Gjg2yXY0vw+/o/l9+1/g60vqF0nzQFV58eLFy3JxAd4MXNJefzZwE7BFe/xN4KdT3XfScxTwkoHjc4A/AisMtB0L3AhsMtD2XuDvA8cPa5/rwEnPfwTw64HjnwNHTbrPzu1j7z3wvRSw/zL0wbuBy4G1Btoe0D5+70nf1/uX4fmKpmgYbNulbX/opPafAd+Y9Br/ANYYaNsHWARs1R6/sO3LTQfuc1fgeuD17fGrgeNvI+PGbZ7HDbRdArx50v22al9734G2BcCpwI8G2n5KU2Ssv6Tfr/b4eW3OzQfaVgT+BLxvWbIv4fuZ+Hn/56ScZwBfHWj7EnD0wPEaNPPpXrKU5y/gJcC2bd+vD9yZ5t/L/drbagY/71v1+RSvvQKwKnAl8KyBtr8Bn5zi30oBG9/Gc23Y3uchs82xhPst7Xfv3u3rP2wpr7cizQcHBdx90s/iZ5Pu+z8s/jfivcClwGqT/h3Vkr7P9vdlReBHwMGTfocL2GPS/b8OnAZkoO2/2vs+ezq/v168eOnHxTN5kpZXXwL+Arx+CM/106q6aeD4j8A5VXX2pLZ127Nlg74z6fjbwPZJVkiyGs0n/19PsuLEhWYBkRuA7Sc99nvLkPX+wI+r6p8TDVX1G5qCa/LZqmX1/UnHj6A5E/jLSbmPBBZOuu8RVTW4kMd3gAATC+Pcn6aI/PNA3vOAXw7kPQnYNsmH2mFrk/t4We3QvvbNZ1OqalF7PLlvjq+qvy/l+R4BHA+cPdAHAEdzSz/MJvvNvzttzsNo+mvCZ4H/l2TT9vgpNG/sv7IsT15VJ9L83j6lvZxZVSdNcdfp/LxvJcmO7TDCS2mKyqtpCtIt2rvcjeZM8WGTHvrtKZ7rMWmGxF7RPtd57U1bTL7vDHJM5SRm+PNL8swkJyb5F82/54mFgSa/3o8nHZ9G80HHhPvT/Du6eqBt8t8Vktw1ySFJzqf5/m4AHjnF6xXNmcFB9wcOr6rBVVVv1f+S5g+LPEnLpaq6keYT8Gck2WiWT3f5pOPrl9AWYPKbwIumOF4RuBNwe5ozCp+geUM2cbkOWInmze+gpRUd0LxZnup+fwfusAyPn8rk57sTzZmfGyZd3sytMy/2/bdvVP/V5lymvFX1E5phiA+hOct2STtvafVpfh93Af416c3yxGutNmke1LL09Z2AHbl1PzyHth9mmX2q3527DBz/FPgzzZk/2tc5rKr+sQzPPeFrNMORn9Zen8p0ft6LSXJ3miImNMMQH0RTbF8ETCz4MTGnbqrvd/C5dqBZFOY8miGJO9H0PwPPNZsctzLTn1+a+ZNfoJnf+OQ258Scysmvd/mk4+sn3efOLPnf0cTrLaDpmwfSDHt9ePv9/WCK17usqq6f1Har15jiWNI8suLS7yJJnTkYeAPNvKrJrmVSQZapF06ZrfWmOL6RZkjZKrRDrrj12TJohrANWpa9yy6Y4jWhGZI305UhJ7/uP4DzaeagLc1iWdqzl2twy5YNFwBTLUKyfvs6TYCqQ4BD2jmHTwI+RDPU7nXLkGHCBcAaSVabVOitD1xdi68wuix9/Q/gOJq5e5Pd/FyzyL4ezTC9weObt7qoqkpyMLBfki/RnI18zDLkHvRVmvli0Mw3m8p0ft6TPZpmDtgeVXUVQHsWcPADhwvbr1P9Wxn0RJo5j0+dOOM0jQ9wliXHlGb483sy8Juqunm+a5KHLmPWyS5kyf+OJmxGM/z2MVX1w4H7TbUa7lS/27d6jSmOJc0jnsmTtNxq37S/n+bN610m3XwesGaSDQfaHsnwPXGK4+Or6qb2zeavgXtW1XFTXCYXecviN8CjsvjqnDvQzFv7xZIeNE1H0nzy/6+pck+6765ZfAXIJ9K8yZy4329ohq9uMpB3Q5ozErfKW1UXV9WnaeYybn0bGSefDYFmsZBiYLGSJGmPZ9I3R9K8uf7LFP1wyuQ7TyP7hMHVNBfQLBhy7KT7fJ5maN9naQqxI6bzDVTV6TSLnRxYVWcs4W7L+vOeqs9XpZkHeeNA28Sw0gl/pSky9pj02CdN8Vw3TBpSuA+3NtMct2kJP7+JM2JTvd7kbUmmyrosfkvz72hwoZXJf1cmirmbX7MtgCcvunJbr7F7++9hwuT+lzS
"text/plain": [
"<Figure size 1080x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"fig, ax = plt.subplots()\n",
"df_ms_ex.plot(kind='bar', figsize=(15,6))\n",
"plt.grid(alpha=0.6)\n",
"ax.yaxis.set_label_text(\"\")\n",
"ax.set_title(\"Number of repositories by Metadata standard\", fontsize=15)\n",
"ax.set_xlabel('Metadata standard', fontsize=14)\n",
"ax.set_ylabel('Repositories', fontsize=14);\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "casual-speaking",
"metadata": {},
"source": [
"### Number of repositories by PID Type"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "interesting-seeker",
"metadata": {},
"outputs": [],
"source": [
"\n",
"test[\"pidSystem\"].fillna(test[\"PID system\"], inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "wanted-sector",
"metadata": {},
"outputs": [],
"source": [
"test[\"pidSystem\"]=test[\"pidSystem\"].apply(lambda y: 'none' if (y=='None' or y=='none') else y)\n",
"test[\"pidSystem\"]=test[\"pidSystem\"].apply(lambda y: y if isinstance(y, list) else [y])"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "foster-spencer",
"metadata": {},
"outputs": [],
"source": [
"test_pis_ex=test.explode('pidSystem')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "verbal-greek",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"hdl 35\n",
"na 31\n",
"none 28\n",
"DOI 23\n",
"Can't find 10\n",
"URN 4\n",
"Other (local) 1\n",
"PURL 1\n",
"Other - permanent URLs 1\n",
"Permalink 1\n",
"Name: pidSystem, dtype: int64"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pippo_pis_ex=test_pis_ex['pidSystem'].value_counts()\n",
"pippo_pis_ex.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "reflected-vegetation",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA3kAAAH4CAYAAAD3gySBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAABCU0lEQVR4nO3deZhkZXn///cHUEGQIAEJQ1RQGBUVUcctGAUM7vv2jQYURYlRE9cYND8DajSouMRoEkFGiWtccEXFkQCGuCCgsguooKIgo6IgiMDcvz/OaSiKnpmunjN9us68X9dVV9V5znbXPTXddffznOekqpAkSZIkDcNGfQcgSZIkSeqORZ4kSZIkDYhFniRJkiQNiEWeJEmSJA2IRZ4kSZIkDYhFniRJkiQNiEWeJM1RkkOSVJJjZ1n3ySQnLGAse7ax3GOhzjmJJHdL8r9JftfGuWPfMc1Xkh3b9/DYkbZXJdmz4/MckmRll8dczXn2b9/PFuvp+DXyuDrJGUlemGSjsW1ePLL8gZF9rkvyyyQnJTkoyR/N4ZwPT/LS9fF+JGkaWeRJ0uQenuR+fQexyL0V2Ap4PPAg4Oe9RrNufk7zHk4aaXsVsGfH53kf8IiOj9mXt9Hk7DHA8cB7gBeuZZ9z233+HHhWu98rge/M4Y8EDwdeOv9wJWlYNuk7AEmaMr8CLgb+EXhiv6GsP0k2rarfr8Mh7gp8rqqOm/C8m1XV1etw3s5V1TXAN9fX8ZPcAlhVVT8Ffrq+zrPALqyqmZz9T5Jdgb8B3r2GfX43sg/AMUneC5wMvB/Ya/2EKknDY0+eJE2mgDcCj09yz9VttLqhd7MMU7swyWHtsLSfJ/lNkrel8egkZyW5Islnktx2llMtSfKFdljkj5O8YJZz/nmSE5Nc1Q6DOyLJbUbWzwzfu3+SE5JcDfz9Gt7b7kmOa4/36yQfTrJdu27HJAXcGXhZe9wT1nCsSvLyJO9MchlwRtu+aZK3JPlJkmuSfC/Jo8f2ncnda5NckuTKNpY/GttupzZ/v21z+fkkO49tc0CSs9vhhSvbfN199D3NDNdMciHwx8DBI0MM92zX3TrJu9p4fp/k20kePnauE9IM7z0wyQ+A37f/jjf7zCTZOsnhSS5tj/f1JA+Ya+xrMTOk9uok5yV50sgxX9jm8yZDOnPjMOF7zeH4o04FdpxwH9rC9/XAnknuOts2SQ4BXgHcceTf4wPt/59VSXYa236ntv0J7fLov8eFbT6OSbLD2H5r/UxK0mJhkSdJk/sEcD5Nb14X/hK4P/Ac4C3Ay4G3A28AXgu8AHgo8C+z7HskcDrwZOCLwH/kpteO7QF8FbgEeCrNkLZH0/SMjPso8Pl2/RdmCzTJtsAJwK2BZwJ/28a2IsktuXFo4yXAR9rXaxum9/fA9sB+wN+1bZ8E9gfeBDwO+DbwuSS7j+37DOAvgOfT5O0xNMMeZ+K9FXAccLd2m/2BnYATk2zdbvMQ4D+BDwKPAp4LfB1Y3bVgTwJ+Q5P7B7WP09p1R9D8O76x3e4nND1SDx47xh40PVv/0L6/34yfpI39q+37+3uanuPLgK8m+ZN5xj7qv4HP0nx2zgA+MVK8fQTYmOYzM+o5wGlV9b05HH/UjjSfiflY0T4/cDXr30cT7yXc+O/xBuBY4GfAs8e23x/4BXDMSNuDaD7LLwcOAHYDPjO231w/k5LUv6ry4cOHDx9zeACHACvb1/sD1wNL2+VPAifMtu3YMQp48cjyhcAFwMYjbScD1wE7jbS9Bbh0ZHnP9liHjx1/BfDNkeX/BY4f22bvdt97jLyXAl4yhxwcClwObDnS9oB2/2eMva/D5nC8oikaRtse1rY/dKz9a8Anxs7xK2CLkba/AlYBd2uXX9Dm8k4j2/wp8Afg1e3yK4FT1xDjjm08jx1pWwkcMrbd3dpzP3ukbSPgTODYkbYTgKuB7Vb3+WqXD2jj3GWkbRPgB8Bb5xL7at7PzL/3a8biPBf42Ejbh4ATR5a3AK4c/fyu4d/079pYb0Nzfd11o58Hbv7/4APAKas53q3a7f9hDec8jGaI6Hj7PwM/AtIuZ/yz2f57XAvcYaRtj/acj5zkM+nDhw8fi+VhT54kzc+HgB8Dr+7gWCdU1fUjyxfQfGH90Vjbtm1v2ahPjy0fDdw3ycZJbk3TQ/HxJJvMPGgmELkWuO/YvsewdvcHvlJVv51pqKpv0XxxHu+tmqsvji3/BU2vzP+NxX0csGxs2xVVdeXI8qdpvsjPTIxzf5oi8ocj8f4U+L+ReL8L3DvJO5I8ZJYcz9X92nN/YuRcq9rl8dycWlWXruV4f0EzzPFHIzkAOJEb87Ausd/w2Wnj/CxNvmYcCfx5kju1y0+nKdw+Modj/yvNZ+y3NAXch2iK2PnIPPcDWA7ckRsnydmrXR7vyT6tqn48s1BV/0fT2zeTj0k+k5LUO4s8SZqHqrqOpndt3yR3XMfDXT62/IfVtAUY/xL/i1mWNwG2AW5LM+Tu32m+cM88rgFuAdx+bN+1FR3QDKucbbtLga3nsP9sxo+3DfAn3DTma2mKhPGYb/L+q+oqmt6m7ecab1V9lWYY4kNoenVWJnlPks0nfB/bA1e2MYyf69bt8MvRtrXZhmaI4ngenkObh3WMfbbPzvYjyycAP6Tp+aM9z2er6ldzOPZbaYreewCbV9X+Y8X4JGaujZtLzm6iLe5PoImd9vnkqjprbNPxXMy0zeRjks+kJPXO2TUlaf6WA/8fzXVV437PWEGW2SdOWVe3m2X5OprhhJvSDDE7hJv3lkFzvdKomsP5fj7LOQG2o+l1mo/x887MYPrEOex7k1ja3sstuPGWDT8HZpuEZLv2PE0AVUcBR7XXHD4ZeAdwBXDQHGKY8XNgiyS3Hiv0tgOuqmaWzhtOOYfj/Qo4hebavXE3HGsdYr8d8Mux5RtudVFVlWQ5cGCSD9H0Rj5qDnED/LiqTpnjtmszM3HNN+a5//uAI5K8miY/r5hlm9k+06P5mOQzKUm9sydPkuap/dJ+GM1kF9uPrf4pcJuxGfoeTveeNMvyqVV1fVX9jmbq/7tU1SmzPMaLvLn4FvCI3HR2zvvRXLd20up2mtBxNL0mV84W99i2+4zNAPkkmgJqZrtv0QxfvWGGxfbf5M9mi7eqLquq99Jcy7jrGmL8A00RPerb7blvmKwkSdrl+eTmOGBn2oJp7HHGOsQ+Y3Q2zY2AJ9BcDzrqAzTXMB5JU+SsYAEl+VOayYeOr6rvr2HT2f49Zhzdrv8Yzfeej82yzX2S3GHkvHvQFHkz+ZjkMylJvbMnT5LWzXuB19AUDSeOtH+ZZnKN5UneRjOj481ub9CBRyV5Y3vuJwP70HxZn/Eq4Lgkq2gmh7kCuAPNLJT/WFXnTXi+t9P0LB2b5M00vWaH0szO+Kl1eSMjVtDMjLiiPcdZwJbA7sCmVTV6HeTVNLNXvpWm0H4r8OmqOrtd/wGantYvJfknmslyDqbp6XwvQJLX0QzdPKFtvzfNjKFr6gk7F3hMki/TDA/9flWdk+SjwLvbIvgHNDN63pXZe+PW5r9oPjMnJDmMZujkH9NcJ3ZJVb1jnrHPeF6SP9BMDPM8moLyGaMbVNXP2vf4GOBfxq4d7drmSR5IMyx5K5r/Uy+g+cw+Zw37QfPvsV2S/Wnez8qquhCgqn6f5MPAi4CPVtXls+x/Gc3n6GCaYvHNNNfpfbldP8lnUpJ6Z0+eJK2DdljeO2ZpXwk8haYX5DPAvjS3HOja84D7tOd4LPCiqvrcSBwn0VyvtS3NNPufpyn8fsL8rnG6jGbyit/T3HLhPTQ9R/tU1R/W5Y2MnKNoCtblNLd8OJamIHsQN+8R+xhwPE1P0zuBL9HMSjlzrGtoJs04t93mKJoJc/Ycubbs2zQ9X//ZnutvaIa4/usawvx74Hc0k9V8mxsnsXl+e45/opnI5I40s3JO3JNXzc3o96IpMF4HfKWNaRdu7GGaT+wz/pKmN+8zwL2A/1dV35llu8+0z7PddqNLd6UZknkS8GGaWWDfBty7qi5ay74fpyno30KTk0PG1n+mfV6+mv2/TvN
"text/plain": [
"<Figure size 1080x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots()\n",
"pippo_pis_ex.plot(kind='bar', figsize=(15,6))\n",
"plt.grid(alpha=0.6)\n",
"ax.yaxis.set_label_text(\"\")\n",
"ax.set_title(\"Number of repositories by PID type\", fontsize=15)\n",
"ax.set_xlabel('Metadata standard', fontsize=14)\n",
"ax.set_ylabel('Repositories', fontsize=14);\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "vanilla-advertiser",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnAAAAHyCAYAAABrg/PtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAACHHUlEQVR4nOzdd3hb1f0G8Pdoy0vedraySJzEZELCSiBswoaWtlBMB10BSkuHu91F019bSmmBtLSUsFpoS1uKaYFSIJMsMpTEzt7TcWJrWPPe8/vjKs5yEseWdDTez/P4wZav7n0V4virc+45XyGlBBERERFlDpPqAERERER0dljAEREREWUYFnBEREREGYYFHBEREVGGYQFHRERElGEsqgMQ0dlx1zeaYbz5EvH/nupDAIgB8G+bNSOqJi0RESWD4DYiRCnW4BIAKgD0BdAPQBWAkmM/nojdePj/Yh85F0AhgIL4fwsBOHt41SiAwDEf/i6+9gM4CGBf/GPvkc+3zZoR7uF1iYgoCTgCR5RoDa4qAMMBDIJRoB0p1I583geA7XSnGCAOvAvg4gSmsgIojn+cNXd9YxuOKejiH7sBbAKwAcDmbbNmRBKQk4iIuoEFHFFPNLjyAJwDYMQJ/z0HgKu3py+BX/T2HAlWHP+oOcX3NXd94w4AG2EUdMf+d9u2WTO0FGQkIsoZLOCITseY7hwOYCKACQDGwShi+sK4xywpXMKfaQuMzAAGxz+uOuF7EXd941YAawAsP/KxbdaM1tRGJCLKHrwHjuiIBpcJwEgcLdYmwijYClMdZZtetejSyC8vSPV1U2w7jinoYBR1B9VGIiLKDCzgKHc1uFww7jObBuAiGMVanspIR7RI1/Lzwk9OVJ1DgR0wirmlAOYCWMIVtEREJ2MBR7mjwVUOYGr8YxqAc5GmeyH6pHNtbfgPo1XnSAMBAAsAvAvgHQDLts2aEVOaiIgoDbCAo+zV4CoDcAWMYm0ajHvX0m1xQJfC0rJlRPjZIapzpCE/gPk4WtAt5wIJIspFLOAouzS4xgGYEf+YjDQdYTsTTYr9Q8MvVKnOkQF8AOYBeB3Aq9tmzdipOA8RUUqwgKPM1uAqgDHKNgPAtTD2Wst4UqJjcPjFtLgfL8OsAPBPGMXcCtVhiIiShQUcZZ4GV38AtwK4HsbU6Gk3xc1UQ0PPxTSYudVPz+0A8CqMgu49LoYgomzCAo4yg9Hd4EMAPgLgQmTIvWy9MT40+9BhFJWqzpEl2gH8G0ZB99q2WTN8ivMQEfUKCzhKX8YihNsA3AFjpM2sNlBqXRb++Y6tsu9A1TmyUAjAvwC8AODfbAFGRJmIBRylF2NvtltgFG1XIIe7hdwabmj+QJ4zUnWOLHcYwF8BPA9g3rZZM/gPIhFlBBZwlB4aXNMA3AtjxM2hOE1auDfy5ZVv6ZPGqc6RQ7YCeA7AnG2zZmxRHYaI6HRYwJE6Da5KAPcA+DSMfqN0jPropxf/WZs+WXWOHCRhbE0yB8DL22bN8CvOQ0R0kpydniJFjH6jV8Mo2m4AYFUbKH2VwctVk2oIHO3Y8Yi7vnEOgMe3zZqxQW0sIqKjWMBRajS4+sGYIv0EAN6Y3w1lwqurzkBwAXgAwP3u+sa3APwawOvbZs3g/xsiUooFHCVXg2sigC/D2AKEo21noUT4WCSkDwHgqvjHFnd945MA/rBt1ozDamMRUa7iPXCUeMY06Q0wCrepitNkrPe0c9+ti9ZfqjoHnVIQxlYkv942a8Zq1WGIKLewgKPEaXDlwZgi/SK4KKHXVupD590c+eElqnNQt8wD8HMA/+JWJESUCizgqPcaXH0B3A/gswBKFKfJGlv0PgunR35xoeocdFZWAXgYwF95nxwRJRMLOOo5oyfpNwF8Clnaj1Sl/bJ42eTwE5NU56AeaQbwEwAvbps1I6Y6DBFlHxZwdPZYuKWEV+Z5zg3/vlZ1DuqVLQB+CuAZtuwiokRiAUfdx8ItpULSunlkeM5Q1TkoIXYB+BmAp7bNmhFUHYaIMh8LODozFm5KaNK0d2j4+T6qc1BC7Yex2OE322bNCKkOQ0SZiwUcnVqDqxrAd2B0TWDhlmJSwj84/GKB6hyUFDsAfBvA81y1SkQ9wQKOTmZsB/IVAF8FwAJCoSGh5zUdJrPqHJQ0KwB8bdusGf9VHYSIMgsLODrK2ID3bgA/AtBPcRoCMDb0u/Z2FLhU56CkewPAV7fNmuFRHYSIMoNJdQBKEw2uywEsB/BHsHhLGy4R8KnOQClxNYCV7vrGP7rrG/nzR0RnxBG4XNfgqoGxOm6G6ih0spvCP9iwSg47R3UOSqkggEcBzNo2a4ZXcRYiSlMcgctVDa4yNLieALAaLN7SVqnwccuJ3OME8A0Am9z1jfcozkJEaYoFXC5qcH0Cxk7xnwdgUZyGTqNMeMOqM5AyFQD+6K5vfNdd31ijOgwRpRf+8s4lDa5RAJ4EMFV1FOqeMnijqjOQctMArHLXN/4cwA+5ETARARyByw0NLicaXA8DWAkWbxmlTLRrqjNQWrDCmFZd665vvE51GCJSjwVctmtwXQtgLYx//K2K09BZKhM+XXUGSiuDATS66xv/xtWqRLmNBVy2anD1RYPrLwBeh/GPPmWgEnAXEerSrQCa3PWNX3LXN3KjZ6IcxAIuGxmLFJoA3K46CvVOsfDzZ5ROpRDAIwCWuusba1WHIaLU4iKGbGL0Lv0dgBtUR6HEKEIHf0bpTMbDKOK+A+AX22bN4LQ7UQ7gu/ts0eD6EIA1YPGWVfJFkPctUnfYAfwfgHfd9Y28ZYIoB7CAy3QNrlI0uP4E4GUAZarjUGI5EbGrzkAZ5RIAq931jZ9WHYSIkosFXCZrcF0HY9TtI6qjUHLYEXWqzkAZpwDAU+76xn+56xurVIfpLiGEWwixprvHCCEuFUK8lpp0ROmHBVwmanAVosH1ewCNAPqojkPJY0UsT3UGyljXA1jjrm+8VXUQIko8FnCZpsE1DsAHAD6lOAmlgBl6geoMlNHKAfzNXd/4nLu+0aU6TDeYhRBPCSHWCiHeFEI4hRAThRCrhBCrAMxUHZAoXbCAyyQNrs8DeB/AMNVRKGUKASlVh6CMdxeAFe76xomqg5zBcACPSylHA2gDcBuAPwK4X0o5VmUwonTDAi4TGFOmfwbwBIzVZpQjhICpAEHu5kuJMBjAAnd942dVBzmNrVLKlfHPlwNwAyiWUs6NP/acilBE6YgFXLo7OmV6h+IkpEixCPhVZ6CsYQcw213f+Ky7vjEd768MH/O5BmMKmIi6kLUFXHy1UlMX91PcK4RYGr+n4m9CiHT8R8zAKVMCUAJfQHUGyjofB7DYXd94juogZ9AGoE0IcXH86zsVZiFKK1lbwMV1dT/FK1LK8+L3UzQhHRcDcMqUjlEifEHVGSgrjQGwzF3f+CHVQc7gEwAeF0KsBCAUZyFKG0Jm6f3RQgg3gLeklMPjX38dgBXAPAA/AlAMY7+kN6SUn1MU82QNrhEAXgWQ7u+MKUW+FPn80r/rl5ynOgdltccAfGXbrBlR1UGIqHuyfQTuxPspLACeAXCflLIWwPcBOBTk6lqD62oYU6Ys3qhTmfDylyol2wMA5rrrGweoDkJE3ZPtBVxXCgHsFUJYkU73UzS4vgxjY95ixUkozZQJb0x1BsoJUwAsd9c3Xqg6CBGdWS4WcN8BsBjAAgDNirMADS4bGlxPA/gFALPqOJR+SuHVVWegnFEB4H/u+saPqg5CRKeXtffAZYQGVxWAVwDwHS+d0lvahHfvjX7lUtU5KKdIAA3bZs34geogRNS1XByBSw8NrvEAloLFG51BsfDz55RSTQD4frwFF1fCE6Uh/mJQocF1O4D5AHjDMJ1RETo4tU6q3AXgv+76Rm6oS5RmWMClWoPriwBeBpC+GwhTWikQQavqDJTTLgbwvru+cYTqIER0FAu4VGpwzQLwKLgZJZ2FPIQ5hUWqDQWwyF3fOF11ECIysIBLhQaXBQ2uZwB8XXUUyjx2RNJnr0LKZSUA/uOub/yE6iBExAIu+Rpc+TA6K9SpjkKZyQotX3UGojgrgKf
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure()\n",
"ax = fig.add_axes([0,0,1,1])\n",
"ax.axis('equal')\n",
"explode = [0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3]\n",
"\n",
"ax.pie(pippo_pis_ex.values, labels = pippo_pis_ex.index, autopct='%1.2f%%', radius = 2)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "current-cornwall",
"metadata": {},
"source": [
"### Inspect PID Type"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "detailed-hearts",
"metadata": {},
"outputs": [],
"source": [
"df_pis_kw=test_pis_ex.explode('metadataStandardName')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "formed-maldives",
"metadata": {},
"outputs": [],
"source": [
"df_pis_kw['metadataStandardName']=df_pis_kw['metadataStandardName'].apply(lambda y: 'none' if y is np.nan else y)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "reflected-warrior",
"metadata": {},
"outputs": [],
"source": [
"tt_a_df=df_pis_kw.groupby(['metadataStandardName', 'pidSystem']).count()['Repository name'].unstack('pidSystem')\n",
"#tt_a_df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "solved-domestic",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA+0AAAI4CAYAAAAfwwg7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAB+xElEQVR4nOzdebxnc/3A8debIfu+VMQgknVirKVQKqUsyVqiIr+SkKUiYypFRCFbyihCtkglobFlG/tOtpA1u6zj/fvj8/m6X9f3LsPMfM+deT0fj/u433vO55zzPp/v+d573+eznMhMJEmSJElS80zT7QAkSZIkSVJnJu2SJEmSJDWUSbskSZIkSQ1l0i5JkiRJUkOZtEuSJEmS1FAm7ZIkSZIkNZRJuyRpihERW0dERsTW3Y5lShQRw2v9jul2LFOjWvdjuxzDvRFxbzdjkKSpjUm7JL1FETGm/hPd/vV8RNwUEftFxJx9lN96gP2Mj4inI+KuiPhjROwQEXO/hfi6/g/+1CQi1mx7D++JiOij3CwR8Uxb2eFv87hjJsZ+Jqe2utqn27G0NDEmSZIAhnU7AEmaApwJXFdfvxP4DLAHsHFErJyZT7yF/cwKvAdYA1gf2DcivpWZYyZSzJp0XgWGA+sA53ZYvxnl/X0V/w5LkqQB+M+CJL19f2xPpiNiV+AKYCngm8Dot7Kfuq9hwJeBXwDHRsRLmXnixAhak8x5wFrAtnRO2rcFHgL+DawyGeOSJElDkN3jJWkiy8zngOPqjyu/zX29mplHA1+viw6KiBn726Y1rrv++JFeXe/3iYgl6+t/9LOPGyPilYh4V/15zbbtV4uI82oX/mcj4m8RMbKP/QyLiK9HxOW1S/j/IuLa2uX/TX+DIuKzEXF+RDwUES9FxH8i4sKI+Hqn/Q9QD5+OiH/WIQtPRsSpEbF4rzIn1vP6SB/7+Fxdf9gEHPq/wOnA+hExb6/9LUe5Jo6ltLT3FfuStdv7/RHxckQ8EhG/j4j39SqXwJfqj/e0vc/3tpVZMSJ+ERHXR8QTEfFiRNwZET/rPYSjbZtZI+KgiHiglr8tInahj/8bImKJOiRkXEQ8Vt+7+yLi6IhYsFfZMUDr2hvV6/pcs5aZPSJ2i4gLagwv1/2eFRGr9VVvfcQ2f0QcGBG312vhqfp6TEQsOiljqtuPjYh5al20ruubI2KbPraZPiK+H2V4zEtRhlr8KCLe0Uf5d0fE3hFxaUQ8XOP6T71elupQ/vV5Cer7dnJEPBoRr7Wda0T5jN5c3/8HI+KwiJh9QupekjRx2NIuSZNGazxz9ltq8I4DRgELA2sDf+6n7HWU1v1RwH3AmLZ1YzPztigJ+1oRsURm3tG+cUSsDiwDnJaZD/Xa9yrAdymtyb8E3gtsBHw4Ij6emRe37Wc64E/AJ4Dbgd8DL1JaoQ+t+/piW/ntgKOAh+t2jwPzAcsB2wCH93POvW0ErAucAYwFRgCfq+e8embeXssdQemuvh1wYYf9fK1+P3ICjg3wK2BzSkJ9YNvybSnXxK+Bvm4UfJKS9Lfq71/AgvWcPh0Ra2XmNbX4aGADYHlKb4yn6vKnevbItsCGlPM7j5J4rwjsAqwbEatk5rNtx38HcD6wEnA9cAIwB/D9vmKusW1PSXz/CbwMLA18FfhMRIzMzAdr2T/W71+qMY1t28+99fv7gX2BiyjX+pPAQsBna8yfycxz+ojldRExE3ApsBjwd0p9BuVztD5wKnD3JI5pjhrDy/V47wA+D/wmIl7LzNYNPiIigD/U2O4CDgOmp/S2WbaP0/ww8B1K3Z8GPAcsDmwMfDYiPpiZ13fYbjFKj6A7KO/xjMAzdd3PgR0pPUKOBl6pMa1S43m5j1gkSZNCZvrll19++fUWvijJcAJb91o+C3BLXff9QZTvuLzD8X5Xy40eZHxJSdI7rdu4rj+wn/Nap23ZmnVZAjv0Kr9+XX4nME3b8n3q8kOBaduWT0tJWhNYv2351cBLwHwdYppnkOe8dVuc6/Va9626/Pxey2+i3EyYu9fyRYHXgEsHeexWHR1PSQzvBG5rWz8jJdH7e/35klp+eFuZOWuZx4Gleu1/GUpCdk0f79fwPuJauL3+25Z/pW63R6/l36vLT+v1fi4CPFHXjem1zQLAOzoc4+PAeOCIPupqnz5inr3Te065efEf4NZBviefqcc5uMO66YFZJ2VMbdfiMb0+A0tRelrc0qv8FrX8ZcAMbcvnoiTxb/pMU25szdrh2MvX6+WvvZYPb4vrxx22W72u+xcwV9vyGWpcCdw7mPr3yy+//PJr4nzZPV6S3r4NonQb3ycijqC0Kr+fnpayiaXVUjlvv6UG54+UVrSt27vdRsQcwCaU2M/rsN2/6NXinZlnUlon30uZOI8oXd+/SWk13zkzx7eVHw98m/LP/5a99v8qpVXvDTLz8Qk5OeCCzDy717LDKOe1dkQs3Lb8CErr59a9ym9LSb6PmsBjk5mtRO19EfHhunhjSqvrr/rZdKtaZlRm3tJrnzfVbT/QqdtzP7Hc117/bX5DaVn9RK/l21BuVuyema+17ece4JA+jvFgZr7UYfm5wM0djjFQzE93es8z8wFKa/WSEbHQBOzyhQ77ejnbehhMwpj+B+zS6zNwC6X1/f0RMUtb2VaX+e9l5ott5Z8AfthHXI92Oo8sresXUHqXTNdh00foPN9GK4Z9s20SzRrPdzvFIEmatOweL0lv3/r1C0pycC+lu+l+mfnkRDzOROtyn5mvRsSvgL0p3cZ/X1d9kdIifHRNPHu7uD2RazOW0nX6A5QEfglK6+CdwF7R+elnL1BubrScAPwMuCUiTqr7uTQzH5uwswM6dHXPzPERcQmlW/AHKEMHAH4L7EfpIv8zeL1r/9aUVu8/vIXjQ2kB/yEl+b+o7v9xerpid9IaG718dH702BL1+/spvTkGVM/la5RhAEtRWozbb9ov0FZ2VsrNl/sz864OuxtLGXbR+xhBuQGzNaWFd05Kj4qWCe5OHREfpPSOWI3Smjx9ryILUCbz68+FlJtd34mIFYC/UJLl6/q4kTEpYrozM5/hze6v3+ektIgDrEC5YXJJh/Jj+4nr05ThCSOBeXjz/3fzUG7Stbu+042WGgN0Hi5yCaXnhCRpMjJpl6S3b5ucPI9ie3f9/laS2E6OBvakJHStpH07SoJ1bB/bPNLH8ofr99ZEVa3nyi9OhySvzeutjJl5UEQ8Tpl0b0dgJyAj4kJgt8wc189+3mqcZOazEXE8sH0dL/4PyjjldwI/b2/xnBCZ+UhE/An4XEQcDnwI+Flm9pfAtupt2wF2P8sA69udTBnTfjflsYIPU4YhQKnj9gnOWvUyUP31dlDd10PA3yiJcqt1e2tKF/1Bi4gNKa3XL1LGot8FPE9JaNek3CDqODFbu8x8JiJWpbQof5aeFv/H63vyo8x8U8+OiRzTU33ssjURYfvNjdmBJ/qIqWPdR8S3KGPQn6xx/ZvSup/0zHfQKa6+3ss+r4F6s29Ce71Ikt4mk3ZJGgJqd/NWN+srJsY+M/PBiDgL2DAilqS0jC8DnNxP6/b8fSx/Z/3+dK/vZ2TmRhMQ02+B39Zu+qtTks0vA3+LiCUnoNV9sHG2HEFpqfwaZUKv1gR0Rw/yeH05mjJJW6u1vr+u8e1xLZ+ZN7zNYxNlVv8NKUMd1s3MV9vWTQPs3sfxB6q/9mPMR7nJchOweu+u2hGx+VsI/YeUm0cjM/PWXvs7ir4nxHuT2n39K7U3wFKUiRy/QellMg1lgr3JGlM/ngbmiojpOiTunep+GGXuiIeBFbLXxJF9zWpf9dVjp/0auLvD8eYBHuhnv5Kkicwx7ZI0NGxNman6IXoeTTWQ13hjK14nrfHpX6O0skP/Y7g/FB0e1UZpaQS4tn6/jdLCuGof42n7lZlPZeZfMnNbSjfzuei5aTEYb0qgImJaSmt3e5yt491A6Ta9YUSsAnwMuKh3cvYW/J3SDX/Bur/bByh/ef2+xgQco9VdudN7/d76/az2hL1amTIU4nU14f4XsEBELNZhf2t2WLYo5f+Jczsk7AvW9RMScyv
"text/plain": [
"<Figure size 864x576 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"fig, ax = plt.subplots(figsize=(12, 8))\n",
"sns.heatmap(tt_a_df, annot=True, linewidths=0.1, fmt='.0f', ax=ax, cbar=None, cmap='Blues', linecolor='gray')\n",
"ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha='right')\n",
"ax.set_yticklabels(ax.get_yticklabels(), rotation=0)\n",
"ax.invert_yaxis()\n",
"ax.set_xlabel(None)\n",
"ax.set_ylabel(None)\n",
"#sns.heatmap(df, linewidths=2, linecolor='yellow')\n",
"title = ' PID types by Metadata standard\\n'\n",
"plt.title(title, loc='left', fontsize=20)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "motivated-outdoors",
"metadata": {},
"outputs": [],
"source": [
"comm_a_df=df_pis_kw.groupby(['Community', 'pidSystem']).count()['Repository name'].unstack('pidSystem')"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "honey-solid",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAuoAAAMRCAYAAAC6RZPIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAABaZ0lEQVR4nO3dd5wdVf3/8deHhBJDiUAoUqQ3EQIEQZQuiopURRBRbAgK0puAIYgIiqCCP4pKlWahf+00EaQEDB0pgtJB6aGYhM/vjzNLLstuSCDZObt5PR+PfWR37tzZz53M3vueM+ecicxEkiRJUl1marsASZIkSW9kUJckSZIqZFCXJEmSKmRQlyRJkipkUJckSZIqZFCXJEmSKmRQ14ARETtEREbEDm3Xounnrf4/N8+5YvpUJUnStGdQf4si4tTmg7/za1xE3BYRR0TEO3tZf4c32c7EiHg2Iu6LiAsiYpeImOct1GcoaUlEbBQRZ0bE/RHxYkS8FBH3RsQZEfHRtusbqCLigYh4oO06JEmaVga3XcAAcCEwtvl+AeATwH7AJyPifZn51FvYzhzAIsDawGbAdyJit8w8dRrVrOkgIuYATgc2B14GLgPOA8YDiwMfAz4bET/IzL3bqnMAOB+4Fnh0Kp+3PPDitC9HkqTpw6D+9l3QGaAjYm/gOmAFYFdg9FvZTrOtwcAXgR8Bp0TEK5l59rQoWtNWRMwE/Ar4CHA58NnMfKTbOrMCOwHL9H2FA0dmPgs8+xaed9d0KEeSpOnGri/TWGa+AJzW/Pi+t7mtCZl5EvC1ZtHRETFkcs/p6r/b/Lhut241h0TEcs33l09mG7dGxPiIWLD5eb2O578/Iv7cdM95PiL+EBEje9nO4Ij4WkRcGxHPNd1A/t5053nDsRcRm0bEpRHxaES8EhGPRMSVEfG1nrb/Jvvh4xFxTdMd6emI+HVELN1tnbOb17VuL9vYqnn8uCn4ldtSQvq9wCe6h3SAzHwlM38E7Nnt98waEfs3+/3FZl9dFRFb91DTYk1Np0bEks3r+m/zf/HHiFixWW94RJzU7MuXI+KGiFi/h+0d0mxvvYjYNiJubGp4JCKObk4uiIgNIuKKpranm248b+iSNbkuVx3dvBbr5fUsFhHnRMR/mprHRMQmPWzndX3Uu45P4N3Au7sd86e+WW1tHqeSJE2OQX36iObfnOxaU+404F+UrjUbvMm6Y5nUiv+v5vuuryuaVsXLgfUi4g0tuxGxFrAicGFmdu9asAZwBfAK8BPgd8CGwFURsXa37cwMXNKsNww4CziJcswdy6STma71d6R0/1kBuBj4AfBbYAjwhTd5zd1tCVwAPES5GvE3YCvg2ohYtmO945t/d+xlO19t/j1hCn5n1zaOysxxk1sxM1/p+j4iZgH+AHyXcoXrJ8AZlFb3cyPi8F42sxjlys38wKnAH4EPAVc0JyTXAqsD5wK/BFYGfhcRi/ayvV2BnwP/oOyX/wJ7ACdGxBaU/+unKP+HdwKfBX4xudc5ld4NXN+8rjOaulcELuzpBKObByjHd1dLe+cxf8HkntjycSpJ0uRlpl9v4YsSjhLYodvy2YE7mscOnoL1e1zew+87o1lv9BTWl5Rg3tNjn2weP2oyr2ujjmXrNcsS2KXb+ps1y+8BZupYfkiz/FhgUMfyQZRAmMBmHctvpJwAzNdDTfNO4WveoaPOTbo9tluz/NJuy2+j9Cefp9vyJYBXgaun4PcObmpPYKmpPI4OaJ73W2Bwx/L5KAE0gbU6li/W8RoP7Latg5vlT1FOLjr/P7ZvHjum23O6/p+eBZbvWD4rcDswkRLa1+14bCbgT83zRkzFcdd1bC3Wy+sZ1W39j3Ttm17+n7v/LT0APDA1fxNtHKd++eWXX375NaVftqi/fZs33QcOiYjjKS2SywP3AVPSZWJKPdz8O3wabOsCykC8Hbq6NgBExDBga0rtf+7hefcC/69zQWZeCFwJLEUZ/NrVX3tX4DFgj8yc2LH+RGAvSgDartv2J1AGXr5OZv5nal4ccFlmXtJt2XGU17VBRLy7Y/nxlFC6Q7f1v0K5MnLiFPy+uYFZmu8fmspav0jZF3tm5oSuhZn5BPDt5scv9/C8B4Ajui3rav2dFdgnM1/teOwsyv4d0UsdP87MOzt+/yuUVu2ZgP/LzCs7HnuVSa3pK/f2wqbSv4DDOhdk5h+Af/M2u5D1poLjVJKkyXIw6du3WfMF8BIlQJ0JHJGZT0/D3zPNutNk5oSI+CnwLUqXkLOah7anXMI/KTN7+j1XdQt/Xa4A1gVWoYT2ZSjh9R7goIjo4Sm8RDmh6XImpRvBHRFxTrOdqzPzyal7ddA893Uyc2JE/BVYsqnzX81Dp1MC747N7+/qDrED8DSl28h0EWWWmKWAh7PngY6XNf+u0sNjYzuDZaOrX/zdmfl85wPN638cWLiXcsb0sKxrezf28FjXiWNv25taPb0egAeB90+j39Fd28epJEmTZVB/+76QfTNt4ruaf6dVIDgJOJDSD7srqO8I/A84pZfnPN7L8seaf+dq/u0aZLg0MGoyNcze9U1mHh0R/6EMnP0GsDuQEXElpXW4pyDZmymtk8x8PiJ+AewUEetn5uXAppTxAD/MzJen4Pc9RdlvswALUVrup0RXHb1NM9i1fFgPj71h1pPmBKzHxxoTgJl7eayn50yYgsd6297UeqaX5ROYfmNp2j5OJUmaLLu+9APNJfp1mh+vmxbbzMyHgYuAdaLMBNM1iPT8ybQOzt/L8gWaf5/t9u/5mRmT+Vq8W02nZ+aalAD1cUof4XWAP0TE1HT5mdI6u3QNKv1qt39PmpJf1nRZubb5ccMpeU63Ohbo5fEFu63XHyS9NwAM68M6pkTbx6kkSZNlUO8fdgAWpbSw9jqtYjevUgbETU5Xf/OvMmnWksn1yf5gT9PVUQabAvy9+fcuSgvpmk03kqmSmc9k5m8z8yuUAYhzM+lEZUq8YbrFiBgEfLBbnV2/7xbgamCLiFiDMnvKXzr7bE+BrlC/d0S8Y3Irdo0LaLqn3AcsFN2mjmx0zXZy01TU0banKTfrep1m/4+Yzr97Im9+zHdq+ziVJGmyDOoVa+Z3/gpl6rikDHibkq4YUGbqeENg6uZS4G7g85RBpP9oun70ZmkmzeneVeNmlGB8L3AVvNbCfCylRfjH0cPc7xGxYESs0PHz+tFzJ+H5mn+n5o6SG/Qw//YulP7pl2fmv3p4zvGUriu/oYwHmJIpGTudTZlmcWnKlIILdl8hImaJiK/T9IVvnNz8vu83YbZr3Xkps7h0rdNfXA8sGhEf7rb8IMoUjNPTf4HhPR1vPangOJUkabLso16PzTtuBDOU0oK+NiVEPAvsmJnnTsX2LgW2iYiLKS2y4ymtxH/pWiEzMyJOAI5uFr1ZV4/fAz+IiI8CN1MGQm5Jmd7wi90Gmn6bMiPITsAnIuIyygDE+Shh9gOUPvJ3NOufD7wQEddSBuRG8/pXpwxm7GkWmt5cDJwfEedTTiBGAB+l9CXv7aY0vwKOofQx/w9w3lT8PjLz1Yj4FGUazc2Af0bEpZQ5xydSpiHcgDJrz1EdTz2qqW0z4OaI+C3wDuBTlH31vcz869TU0rKjKNMqXhgR51L2+VrA4pRBx+tNx999KeV4+X1E/IUyjeLNmXnxZJ7T5nEqSdJkGdTr0TV7zKvAOMqg0espH/xnZeZTU7m9rnnDNwQ+Rrl6Mhr4S7f1TqWEq//R7eYuPbgOOJQSbnahhJTLKPN539C5YmaOj4jNKTfG2QHYhDIo70ngfkpr8ZkdT9mfEvBWbep9mTIzy37A8Zn5hunwJuM8Jg2W/TjlJOU84IDMvLunJ2Tm/yLiTMrgwFOz46ZEU6rpyrJ505q8A2W2kg0p++kRyv/l6Zn5+26/dyPK3Uo/Q5kucALlRGj3zDx7autoU2Ze2vy/fwvYhnIs/wn4NJNuxDW9HEbpB/8JSsAeRDmmew3qLR+nkiRNVvQ8C59mFBGxHqXf+y8yc/s3WWd0Zh7SR6X1ueb28usAy2bmPS2XI0mSZnD2Ude+zb/T8uZM/U5
"text/plain": [
"<Figure size 864x864 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots(figsize=(12, 12))\n",
"sns.heatmap(comm_a_df, annot=True, linewidths=0.1, fmt='.0f', ax=ax, cbar=None, cmap='Blues', linecolor='gray')\n",
"ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha='right')\n",
"ax.set_yticklabels(ax.get_yticklabels(), rotation=0)\n",
"ax.invert_yaxis()\n",
"ax.set_xlabel(None)\n",
"ax.set_ylabel(None)\n",
"#sns.heatmap(df, linewidths=2, linecolor='yellow')\n",
"title = ' PID types by Communities\\n'\n",
"plt.title(title, loc='left', fontsize=20)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "loaded-tattoo",
"metadata": {},
"outputs": [],
"source": [
"ds_a_df=df_pis_kw.groupby(['Discipline_subject ', 'pidSystem']).count()['Repository name'].unstack('pidSystem')\n"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "optical-imagination",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABGwAAAUxCAYAAADdlxDBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdebhkVXm//fsroKAgKg7BsRUHlKmBbhQVBESjhggiioGIrYmIiTi9KCqE5mgwgFNEFASijTNBEGdQkUkZpBmawVmBX8QJoiCEGZ73j73KUxR1pqabUw3357rOdXatvfdaz15714F6eq1VqSokSZIkSZI0Ou432wFIkiRJkiTpzkzYSJIkSZIkjRgTNpIkSZIkSSPGhI0kSZIkSdKIMWEjSZIkSZI0YkzYSJIkSZIkjRgTNpIk6V4hyYIklWTBbMcyUy3uU++Bdua0thYtg7ouT3L5QNkKew8kSRo1JmwkSVoKSRa1D6b9P/+X5JIkByZ56ATHL5iintuTXJvkV0lOSPKmJGstRXz3SAJAnSRbDXkebkjyuySnJ/lAko1nO05JkrTiWHm2A5AkaQX3VeDCtv03wN8DewM7Jdmsqv60FPWsATwO2ALYHjggyVuqatEyilnLzxXAorZ9f+ARwCbAXsBeSb4AvKGqrh847+nADfdAfFe2tq5dTvV/BTgb+N1yql+SpPsMEzaSJN09J/QnUpLsBZwDPAPYExhbmnpaXSsDrwM+Cnw6yc1V9cVlEbSWm8urav/BwiRzgc8AuwAPA17cv7+qfnpPBFdVtwLLra2qupbllwySJOk+xSlRkiQtQ23kxNHt5WZ3s67bquoI4F9a0YeTrDbZOb01RNrL5w1M0dk/ybpt+5RJ6rg4ya1J1m6vt+o7f/Mk32vTtq5LclKSeRPUs3KSf0lydpK/tClCF7RpXnf5f5AkL01ycptGdHOS3yY5Lcm/DKt/in74uyRntmlqf07y5SRPGTjmi+26njdBHS9v+w+dafuDqupCYFvgKuBFSXYYaOsuU9iSrJHk39o0u7+0/v5VkmOSbDok3s3avitb//0uyXeSvLLvmKFr2PRNzXtSkrcn+WmSm5L8JslHkjx4Otc50Ro2aevdJHlQmx72/1qMv0yyd5JMUN8z2737fZJbkvxPkk8mefR04pEkaUVmwkaSpGWv9+GzJj1q+o6mm2rzN8A2Uxx7IeOjeq5o272fU9tIjlOArZI8dfDkJM8G1ge+WlWD01qeCZwK3Ax8HPg28HzgjCRbDNSzCvCNdtxDgC8AR9D9v8fHGE9q9Y7fnW5a2DOArwMfAr4FrAa8doprHrQjcALwG7rRSWcBLwfOTvK0vuMOa793n6CeN7Tfh8+w/aGq6o/AJ9vLXSc7tiUwTgTeC/wFOIou3nOALYHNB45/PXAmsEP7/SHgm8AjGU/4TcdHgH8DTqPru6uBtwLfT7LqDOoZZhXgJLp78W26a1oNOBDYb/DgJK8Dfkg3GukU4D+BxcA/A4uTPP5uxiNJ0khzSpQkSctQktWB17SX5yyLOqvqjiRnAE+gG7XzzUmOvRC4MMlCJpieA3wC2JouUbHXwL5e8uKT3NWLgD2r6q8jTpJsT5cc+VSSp1XVHW3XPsDfAocCb62q29vxK9Elbl6X5MtV9dV2/BuAW4CNWmKDvjYePtH1TuDvgb+vqm/01fEWug/8n6BLMlFVpye5FHh5kjdX1f/2Hf8kuhExZ1bVJTNsfzKnAvsy9eir9YFn002Ve1n/jjY6ac2+18+gu66/AFtU1aUDxz92BvE9B5hbVVe0c98NHEuXBHsH8L4Z1DXo0cAS4AVVdWOrfwz4OfC2JO9vU7ZoycTDgcuB51XVlX3X83zgO3QJpTv1jSRJ9yaOsJEk6e7ZoU0V2j/JYcDP6BZ1/RVdsmJZ6X1gfcQyqOsEukVhFyR5QK8wyUOAV9LF/r0h5/2SLjHwVy3hchrwZLpFknsJhT2B3wNv6yVr2vG3A/8f3eijwVEmtwG3DjZaVVfP5OKA7/cna5pD6a5rmyRP6Cs/DHgAsGDg+NfTjZQalri6O2Z6H28cLKiqO6rqz31Fb6T7R7j3DSZr2vG/mUF8H+0la3pt0SVq7qBbT+nuenMvWdPq/yPdyKo1gf7RT2+kG5Hzlv5kTTvnZOBrwN8nWWMZxCRJ0khyhI0kSXfP9u0Hug/XlwOfBw4c+FB9dy2zaVZVdVuSI+mmobycbroSwKvppqgcUVXD2jmjbwRNv1OB5wEb0yVvnkq3sO4vgH0nWJ7kRrrEVs/n6abx/DjJl1o9P6yqq2Z2ddDOvZOquj3JD4B1Wpy9pMRn6Kbk7N7a703nWgD8GfjvpWh/MtO9jz+mm972Dy3B9FXgB8Diqrpl4Nhntd/fXgbxDeu7Xyf5H2BOkodU1TVLWfe1VfXLIeX/034/tK+sN+XreUnmDznnkcBKdM/aeUsZjyRJI82EjSRJd89r76Gv2+4tsro0CYxhjqCbtvQGxhM2u9NNS/r0BOf8YYLy37ffvWk6a7XfTwEWThLD6r2Nqvpwkqvp1lt5M926KZXkNOAdVbV4knqWNk6q6roknwP2SLJ1VZ0CvJRuvaD/rKqbZtDudEzrPrYE0zZ0SbWdgIParuuSHA28u++rwR/Sfl/J3TdZ3z2Bru+uWcq6JzrvtvZ7pb6y3jP0jinqXH2K/ZIkrbCcEiVJ0ohrU4y2bC+X1bo4V9JNK9ky3TdH9RYb/soko1oeNUH537Tf1w78/kpVZZKfJw7E9Jmqehbdh/W/A/6L7rpPSjKTqWDTjbOnt/jwGwZ+HzGDNqdr6/Z7yvtYVX+uqrdV1ePokl//TPeV3G9iPGYYT4Q8ZhnEN9O+W1567aw5xTN0lxFBkiTdW5iwkSRp9C0AHk+37syEX8c94A7uPGJhmN56NG9g8sWGe56bIV/HDWzVfl/Qfv+ULonwrDa9aEaq6pqq+lZVvR5YRDe9asvJz7qTu3xNd1vs+LkDcfbau4ju24heluSZdIsNn15VP5lp7JNJ8kjGk0Gfn8m5VfXLqvovumu7nvFpeABnt98vvttBDu+7JwGPo1vE+ppl0MZ09K5pi0mPkiTpXsyEjSRJIyrJyu3rmj9Ot+bJ22YwRed/6T5kT+Zkum/oeQ3dYsM/a1OCJvIUBr4iun1L1PPoFiQ+A7o1cui+untt4JAkqw1WlGTt9u1GvddbZ/hiN49sv2+Y4lr6bZNku4GyN9GtX3NK/6K6fQ4D7g8cR7fOzDL5Ku+eJBsB3wUeDnyrqr42xfFPbImSQQ+lWyS5fzHiw+imFf1bf5/21TWTb4l6S/+izC1B9wG6/2ecaKrc8nAo3QLUH5ng6+fvP/hV8pIk3du4ho0kSaNhhyRz2vaD6EbUbEGX9LgW2L2qjplBfScDr0rydeB8ug+/p1fV6b0DqqqSHA58uBVNNQXoROBDSV5M9/XMT6b7uuebgNcNLEj8PmAjYA+6b/P5Pt0aK4+kS/w8h24NnR+3478CXJ/kbLqFm9Oufz7dorLDvrVqIl8HvpLkK3SJpLl0o0/+xEDCqc+xwEfophVdDRw/g/b6zUmyf9tehS5Bs2n7AfgcXZ9MZSPg+CTnAj8Bfkv3zVLbt3p7a9pQVT9O8i90SaYLknyVbsHntej67y+MT8Wayg/pvhb+GLrn7m9bLOcBB0+zjrutqn6a5HXAp4BLk5xIl1xchfH3xlXAuvdUTJIk3dNM2EiSNBp63zZ1B/B/dB9Gf0SXqPhCVf1phvW9hW5UzvOBl9CNkBgDTh84bhHwQbrFho+eos5zgPfSJWPeRJdU+T6wT1Wd239gVd2aZAfgH+mmdG1Ht0DsVcBlwL9x52lB76JLDmzS4r2J7puc9gYOq6q7fN33JI5nfFHlv6NLVh1Pt1Dvz4edUFW3JPk83WLHi6rq5hm01+8JjC+0fBPd1LBf0PXx56vqwmnWs5ju26ueB7yIbmTNVXSJk0Oq6k7fCFVVRya5BNiLboraDnSJp4uAo2YQ/9uAl9F9rfkcupFaHwX2Ww4LME+qqj6XZAnd18BvDbyQ7r3xW+DLwEwSmJIkrXAy/Fs
"text/plain": [
"<Figure size 864x1584 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import textwrap\n",
"fig, ax = plt.subplots(figsize=(12, 22))\n",
"sns.heatmap(ds_a_df, annot=True, linewidths=0.1, fmt='.0f', ax=ax, cbar=None, cmap='Blues', linecolor='gray')\n",
"ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha='right')\n",
"#ax.set_yticklabels([textwrap.fill(e, 7) for e in data['Client Name'].head()])\n",
"ax.set_yticklabels([textwrap.fill(e, 85) for e in ds_a_df.index], rotation=0)\n",
"#ax.set_yticklabels(ax.get_yticklabels(), rotation=0)\n",
"ax.invert_yaxis()\n",
"ax.set_xlabel(None)\n",
"ax.set_ylabel(None)\n",
"#sns.heatmap(df, linewidths=2, linecolor='yellow')\n",
"title = ' PID types by Discipline\\n'\n",
"plt.title(title, loc='left', fontsize=20)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "brown-license",
"metadata": {},
"outputs": [],
"source": [
"co_a_df=df_pis_kw.groupby(['Country', 'pidSystem']).count()['Repository name'].unstack('pidSystem')\n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "usual-therapist",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAyUAAAMRCAYAAADyfxkkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAACbyUlEQVR4nOzdeZgdZZn38e+PCLIHWWTUUYKKMoCCpFFQwYigo4YJgzAKYkRnDDgKo8gYEaRpHAZwEB1BI8goqIgMgqDgi3sAZW12UVAUGEUUkH0LJLnfP061fWy7s6erm3w/13WuU+epp6ruulOdPnc/taSqkCRJkqS2rNR2AJIkSZJWbBYlkiRJklplUSJJkiSpVRYlkiRJklplUSJJkiSpVRYlkiRJklplUSKNQ0n2SVJJ9mk7lhWd/xaSJC09ixItVJJTmi9d3a9HkvwsydFJnjFC/30Wsp55SR5I8usk5yR5f5L1liC+SjJ76fZSSyLJzklOS3JrkkeTPJbkliRfSfLGtuNbFF3H5aS2Y5EkaUX1tLYD0LhyLnBtM/03wC7ATGD3JC+vqnuXYD1rAc8FtgemAUcm+beqOmUZxazlIMlawJeBXYHHgR8BZwNPAhsDbwL2TvLJqjqorThHyTeBy4A72w5EkqTxyqJEi+Oc7mIhyUHA5cBmwP5A35Ksp1nX04B3A/8NfCnJnKo6fVkErWUryUrAmcAbgB8De1fV74f0eTqwH/Ci0Y9wdFXVA8ADbcchSdJ45ulbWmJV9TBwavPx5Uu5rrlVdRLwr03TcUlWW9AyA+fyNx9fM+TUsMOTbNpM/3gB67ghyZNJntV8ntK1/HZJftCcYvZQku8m6RlhPU9L8q9JLkvyYHMq0zXNKWl/9XOW5B+S/DDJnUnmJPl9kguT/Otw619IHt6c5JLmlLr7knwjySZD+pze7NdrRljHW5r5JyzCJvekU5DcAuwytCABqKo5VfXfwIFDtvP0JB9p8v5ok6uLk/zTMDH9+d9ihJhvS3LbkLY/X9+R5LVJZjf/dg8mOT/J3w3pX8A7m4+3dh0/t3X1md20rZLksCQ3N/9mpwzd5jAx/m2SE5L8plnmT0m+lWSbYfquleRj6ZwW+WAT96+TnJFk8nA5kCTpqcKiREsrzXstsNeiOxW4nc7pYTsupO+1DI7O3N5MD7xmV9VNdP6SPyXJX/3FPskrgS2Ac6tq6Kk3rwBmA3OAzwL/D3gdcHGS7YesZ2XgvKbfOsDXgJPo/Hwdz2DhNtB/Bp1T2DYDvg18EvgOsBrwroXs81C7AecAv6MzynQp8BbgsiQv7uo3q3mfMcJ69m3eP78I2xxYx7FV9ciCOlbVnIHpJKsA3wWOojNK+1ngK3RGU85I8p+LsO1FNRX4HvAgnX26mM4pZRcmWb+rXx9wXTP93wweP58eZp1n0SmaL2nm37CgAJJsTecY/VfgZjrHwreBHYCfJHlTV98AFwBHNDGfTOff7PKm/3aLsM+SJI1fVeXL1wJfwCl0io59hrSvCfy8mfexReg/bPsw2/tK069vEeMrOkXIcPN2b+Yfu4D92rmrbUrTVsD7h/Sf1rT/Clipq/3wpv14YEJX+wTgf5p507rar6JT7DxzmJjWX8R93qcrzqlD5v1b0/7DIe0/o3P9x3pD2p8PzAd+ugjbfVoTewEvXMzj6OBmue8AT+tqfyZwWzPvlcP8Wxw+wvpuA24bIS9zgdcNmXdUM+/DIxwHk0bYzuxm/vXD/ft0bXOfrran0RlJehx4zZD+zwbuoHMNytObtpc06/jmMOtfCXjG4uTaly9fvnz5Gm8vR0q0OHZtTms6PMksOn/9/Tvg18CinPazqO5o3jdYBus6h86Xv32a6xwASLIO8E90Yv/BMMvdAnyuu6GqzgUuBF5I58L8gesr9gf+AHywquZ19Z8HfIjOl823D1n/XDoXhf+FqrpncXYO+FFVnTek7QQ6+7Vjko262mcBT6fzJbrbe+iMeJ24CNtbF1ilmf7dYsb6bjq5OLCq5g40VtVdwMebj/+ymOscyder6odD2k5q3pf0VMOPLca/z5uBFwDHV9WF3TOqc7rbJ+iMBr5uyHKPDV1RVc2vqvuWIF5JksYNL3TX4pjWvKDz5ek24DTg6GX8pWmZnRJWVXOTfAE4jM5pTV9rZr2DzulSJ1XVcNu5uKrmD9M+G3gN8DI6BcqL6HxR/xVwaOcsnL/yGJ3ibcBpdE7Z+nmSrzfr+WlV3b14ewfNsn+hquYl+QmdL8Uvo3NqG3TulnU0ndOvPgl/PvVsH+A+4H+XYPuLJJ27db0QuKM6p9UN9aPm/WXLaJP9w7T9tnl/xjDzFsUVi9F34HSrjUa4Jmbgmp+/ozNy9HM6p3rt2RSS5wI/Afqr6oklilaSpHHEokSL4101OrfqfXbzviRf0odzEnAInesmBoqSGcATwJdGWOaPI7T/oXmf2LwPPFdlE6B3ATGsOTBRVccluYfOtQYHAB8AKsmFwL9X1XBfqEeyqHFSVQ8l+SqwX5LXVtWPgX+g8xf7T1fV44uwvXvp5G0V4Dl0RmQWxUAcI902d6B9nUVc38LcP7ShKVChc1rdkvjDwrv82cBxscdC+q0Jfy4kd6RTPO8OHNPMfyjJqcDB1bmxhCRJT0mevqUxpTkdaofm4+XLYp1VdQfwLWCHdO7INXCB+zcXMDqx4Qjtf9O8PzDk/ZtVlQW8Nh4S05erals6X17fTOfakx2A7yZZnNPWFjXOAQMXvO875P0kFkFz2tVlzcehpx4tyEAcfzPC/GcN6Qed61xg5D+erLMY219qI4yojWRgP6Yt5Lj48220q+q+qvpgVT2XTpH7L8BNwPsZ/HeTJOkpyaJEY80+wPPo/OV8xFv5DjGfhf/1e+D6kH0ZvHvUgq6hePVwt/Klc/E1wDXN+010/iq/bXMq1GKpqvur6jtV9R46F1yvy2BRtij+6ha/SSYArx4S58D2rgd+CvxjklcAOwEXVdUvFmObAwXMQUlWX1DHget4quohOqMqzxl6u+LGa5v3q7vaBk4JfO4w630hXaNAS2ngOqAlHUEZzkDhtv0Ce42gqm6pqv+h8+/7MIOnTUqS9JRkUaIxoXnOx3vo3Ca26Fw0viinEwH8iWG+uA7xQ+CXdJ5J8U/Azc3pSyPZhMFnpgzEOI3Ol8Rb6NxidmDk4Hg6f+n/TIZ5tkqSZyXZrOvzazP8xSfPbN4fXci+dNsxydQhbe+ncz3Jj6vq9mGWmUXn9Kuz6Fy/syi3Ae52Op1b+24CnJvmGS/dmmd6vI/m2pXGF5vt/VdTOA30XR/4WFefATfRuT3utCTP7Oq/GvCZxYx5Qf7UvD9vGa7zXDpF2Pu6b/3bLZ3n4KzeTG+c5PnDdHsGnZsT/NUF8JIkPZV4TYnasGuSSc30GnS+DG5P54v9A8CMqjpjMdb3Q+BtSb5N5y/tT9L56/9FAx2qqpJ8HjiuaVrY6UoXAJ9M8kY6z7F4IZ1ngjwOvHvIRfAfB7ak8wTzXZL8iM4dxJ5J54v7q+hc0/Lzpv83gYeTXEbnZgFp9n8bOrcLHu5uYCP5NvDNJN+kUyxtBbyRzrUfIz2I8UzgU3SuCbkHOHsxtkdVzU+yB51bN08DfpPkh8Av6Iw6TKLzjJkNgGO7Fj22iW0acF2S7wCr07nu4pnAJ6rqJ13beTLJf9MpWK5p9vFpwM7A75vXsvBD4N+BLyQ5C3gIuL+qlviOck3su9Ep3s5PcgmdC9kfpVNAb0PnVszPatq2BM5OciWdPP6eTv6mASszeI2JJElPSRYlasPAXbzmA4/QuaD9Cjpfxr9WVfcu5voGnsvxOjoPyFuJzgPwLhrS7xQ6X4yfYMgDDYdxOZ0H2X2czshD6Nwh6pCqurK7Y/MFdFdgbzqnn02lcwHz3cCtdL5Un9a1yEfoPBF96ybex+ncIWsmMKuq/upWwQtwNoMX8r+ZTkF2Np0Lo3853AJV9USS0+hcYH9KdT3gcFE1p2PtmuT1dPZ5Ozr5D50v1D8AvlxVFwzZ7s50nvK+F51bKc+lU/R9oKpOH2ZTvXS+tL+Hzml3fwC+Tuf
"text/plain": [
"<Figure size 864x864 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots(figsize=(12, 12))\n",
"sns.heatmap(co_a_df, annot=True, linewidths=0.1, fmt='.0f', ax=ax, cbar=None, cmap='Blues', linecolor='gray')\n",
"ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha='right')\n",
"ax.set_yticklabels(ax.get_yticklabels(), rotation=0)\n",
"ax.invert_yaxis()\n",
"ax.set_xlabel(None)\n",
"ax.set_ylabel(None)\n",
"#sns.heatmap(df, linewidths=2, linecolor='yellow')\n",
"title = ' PID types by Countries\\n'\n",
"plt.title(title, loc='left', fontsize=20)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "quick-quilt",
"metadata": {},
"outputs": [],
"source": [
"mc_a_df=df_pis_kw.groupby(['Discipline_subject ', 'model_citation_lp']).count()['Repository name'].unstack('model_citation_lp')"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "constant-price",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABGwAAAUNCAYAAACuL9cXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd7hlVX3/8feHoqAgBjUGK4oFpQ0woKggINagIIIYC0GNiIn9hwED4XI1KKCxIIqC0cEWEUXUaAAFKUqRoQxFLJGS2EEFRTp8f3/sdeRwOLfN3Jl7Zni/nuc+95y1917ru9uZ2d+71jqpKiRJkiRJkjQ6VprrACRJkiRJknR3JmwkSZIkSZJGjAkbSZIkSZKkEWPCRpIkSZIkacSYsJEkSZIkSRoxJmwkSZIkSZJGjAkbSZKWoSSnJak5jmHdJJVkwVzGsbxKsm07fgfNdSwALZbT5jqOUZHkqiRXDZTt2Y7TnnMT1b3P8npdJlnQYl93GbQ1K/8eJDmoxbztQPlyeQ4k3cWEjSRJSyDJ/ZO8NcmpSX6b5NYk1yX5QZKDkzx2GnXMegLA/6hrtvRdn6fNdSwrmpZcqr6fO9vnx1lJ/inJKnMd4/JoyHG9LcnvklyS5LNJdktyn7mOU5Km4j8CkiQtpiRPBb4MPBz4OfAt4JfA/YFNgX2BdyR5alVd0DbbA7jfHITb7xfAk4Dr5ziO5dUP6I7ftXMdSPMk4Ma5DmLEfRU4B/jVXAcygQ8D1wErA48BXgJsBTwL2GXuwlru9Y7rSsADgCcCLwZeCfw0ySur6gcD27wTOITuc3JpW9r/HvjZIC3nTNhIkrQYkqwPnASsAewH/HtV3T6wzmOAQ+keFACoqv9dlnEOU1W3AT+a6ziWV1V1IyN0/KpqZGIZVVV1PaOdoPxQVV3Ve5PkvcB5wIuTPLOqTp+zyJZvdzuuAEnWAt4NvAk4uSXU/3IPVdWvWEaJvaX974GfDdLyzyFRkiQtno/QJWIOrapDB5M1AFV1ZVW9FDi7VzY4Z0GbR+a77e3YQDf+bds6ayV5Rxt29fM27OqaJF9PslV/m725OtrbZw7Ud1BbZ8I5bJKsk+SjbUhBr53jk2w+ZN2/zAuSZLu2b39K8sck30zypOkezHT+vg0FuSbJzUn+L8lJSXYfsv4jkhyR5Iokt7ThDl9PssWQdf8yv0OSlyc5N8kNbR+f2pZ9dZLYLm9trN3eTziELcnabSjcpUluTHJ9kkVJDkly/yHrvrfVf1Nb95Qkz5nucWv13GO40sA+75puiN6NSX6f5ItJHj6TNmYQy32SvDHJt5Jc3Y7b75N8J8nzJ9jmqvZz/yTvS/K/bbv/SbJvkgzZJq2dy9q18ot2Paw1QRtD57BZgrbfkuSHg21nyPw5i6OqLgNOa2+3bO1ul+So1u4f2zVzaZKxJKtNsN/rJPl0uuGaNyW5qN1nU13DS3xd9tX3sHTDkHoxnJ/k5QPrPLfF8+kJ6rhvkmvbz30XJ46eqrq+qt4MfAZYi643TX9bQ+ewSfKidhx+1a6RXyY5Pck/Dol3Wp8DGTKHTf+5SbJVu3euT/fZelKS+dPd18ziZ8NsXxeSpsceNpIkzVC6njM7ADcDh021flXdMsniE9rvvwdO566HNICr2u8nAQcDZwDfBP4APAp4EfD8JC+sqhPbuhcB48AYcDWwoK++/rrvoe3X94CHAacC/wk8EtgN+NskL6mq/xqy6Y7ATsB/Ax8Hngy8ANgiyZOrajpDhw6mG4pwJfAlut4Q6wBbtPaP7YtzM+BkYG26Xk7HAw8Gdga+l+TFVfWtIW38P+DZwDfokmRrVdU5SX4MvCDJg6rqdwPHZEtgfeArVfX7yXagHb/vAo8GzgeOpPvj2BOAt7Vj8+e27qPpzse6wJnAiXRD6XYETkzy+qo6erL2pukf6a6Tr9NdX08Bdgc2STJvimtzcaxNNwzlLODbwDV05/GFwLeSvK6qPjlku1XpzuXD6K6j2+nO5yHAanTXdL8PAW+m6wlxFHAb3TX4FOA+wK0ziHmmbX8UeAPd8MejWlsvokusrNpimQ29ZFHvgX5fumvxLLrPgdWApwMHAdsm2aGq7vjLxslf0yWLH0332XEW8DfAx+jun3s2OPvX5V+1dq8DPg08EHgp8PkkD6+q97X1TgZ+Brw0yVtbj6h+LwEeRNeTcbau2XfRDUnaMckDquqPE62YZC/gE8Cv6T4/rgX+GtgYeDXdMe2tO+3PgSk8he4z8Tt019zj6IbHbZPkOVV15kx2dohpfzYsw88rSYOqyh9//PHHH3/8mcEP8Cq6h6jvLca2p3X//N6tbNtW30ETbLMW8OAh5Y+ge2i8fMiyAk6boL512/IFA+UntfL9B8qfRvcQ+ztgjb7yPdv6twPPGtjmvW3ZP0/zuPyObh6g+w1Z9uC+16sA/0OXLHvmwHoPo5t34lfAffvKD2qx/BnYdEj972zL3zhk2UfbshdOdb7oHkwLeOewfQBWG7gO7gReNrDeA+mSbjcBD53msbvHue7b5z8CGw0s+0Jb9tJp1r/tsDYmWPe+wCMmuIYvBX4PrD6w7KpW/7f6l9E9EF/XflYduB6rXQdr95WvRpegKOCqgTZ61+qeS9j21m39HwMP7Cu/D11S5B5tT3G8eu2vO1C+Ad3cIwVs3coeC2RIHe9u6+0+UP4frfzQgfJNgFsmuIZn+7osugTsSn3lj2nXwa3AY/vK92Hi+/C0tuwJS3Jch6z3f2297frKFgxuS5d4uQX46yF1PHjg/Uw/B2qC++0ex4IuKVnATweO6UGtfNsh52CJPxtm87rwxx9/ZvbjkChJkmZunfb758uiseq68N+jl0pV/Zxu0uP1kzxqSdpI8gjgOcD/MtBrqKrOouttszbDJ0D9YlWdMlB2VPu95QzCuA24Y7BwYN//FlgP+EgNzOtRVb9ssf8N3WStg46qqguHlH+W7mHk7/sL032LzMuA39L1vJhQuiFjW9E9vBw6bB+q6ua27ibAM+l67XxxYL3r6HpHrUbXq2BJHV5VlwyU9f4SPpNzMy1VdUu7LgfLrwc+Rdfj4h7D1po3V9VNfdv8FvgaXbLniX3rvbr9Prj6ej214/vOxQx9um33rpGD27nqrX/rErQN8NY2VOXdST5HN3/N6sBXq/WkqKorqqqGbPvB9vu5vYJ27f4dXU+1f+tfuaoW0Q0HupuldF3eAexbVXf21XUlcDhdb6RX9a37abpE7OsH4npii+u7VfWTGbQ9Hb2JhR8yjXVvZ0jvqf7Pp5l8DkzD/9DXc6dt/zW63jCPo0seLolpfTYs488rSQMcEiVJ0nIgydOBt9A9DPw13V/0+z2cLtmyuDZtv8+sblLiQafSfbPKptzzYW/hkPX/r/3+q2m2/3m6SUB/mORLdA8lZ9c9h0b05ux59LD5N4DHt99Pous10W/w22CALvGV5BTg2W0I1w/bohfSJak+WEPmKBrw1Pb7pP6H0wn09mGtCfah9/A47TmAJjEb52ZGkmwAvAPYhi65OTi/yrA5Mq6vqv8ZUj4s1s3a72ET8X6PIUm/Kcyk7d598r0h659D91C/ON7SfhdwA3Ax8Dm64TMAtLlP3kL3LUdPANbkrmFTcPfj+kS6hM/CqvrTkPa+B/zDQNnSuC7/tyVoBp1G96DfO55U1e/avb9Hkqe1RDHAXu33x5l9g8POJvJ54N/pPp++SHftfb+qrhlYbyafA1M5c4I6TqNLoGzK8Htguqb72bAsP68kDTBhI0nSzPW+QWSpTNw6KMmL6XrS3Ew3L8jP6Ib33EnXff6ZdENRlsRa7fdE347SK3/gkGXXDRZU1e3p5mtdeZrtvw24gq73xH7t5/Yk3wL+X98D9YPa792mqG+NIWW/nmT9BXTz2/w93VwhcFdvimOmaAvuOi7T+Srg3j48u/1MZNg+zNR1Q8p6SYXpnptpS/dV96fS/R/zFLr5Mf5Id63OoxvSMexaHRYnDI+1d63+ZnDldt3N9OvWZ6vtO5L8brB8mh5TA99m1C/JqnTHdUu6oWXH0s0P1EuujnH34zphnJOUL43rcqL
"text/plain": [
"<Figure size 864x1584 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots(figsize=(12, 22))\n",
"sns.heatmap(mc_a_df, annot=True, linewidths=0.1, fmt='.0f', ax=ax, cbar=None, cmap='Blues', linecolor='gray')\n",
"ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha='right')\n",
"ax.set_yticklabels([textwrap.fill(e, 85) for e in mc_a_df.index], rotation=0)\n",
"ax.invert_yaxis()\n",
"ax.set_xlabel(None)\n",
"ax.set_ylabel(None)\n",
"#sns.heatmap(df, linewidths=2, linecolor='yellow')\n",
"title = ' Citation service in Landing Page by Discipline\\n'\n",
"plt.title(title, loc='left', fontsize=20)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "announced-brook",
"metadata": {},
"outputs": [],
"source": [
"df_pis_kw[\"model_citation_lp\"].fillna('na', inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "comfortable-reform",
"metadata": {},
"outputs": [],
"source": [
"mcbc_a_df=df_pis_kw.groupby(['Community', 'model_citation_lp']).count()['Repository name'].unstack('model_citation_lp')"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "pleasant-decade",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAuoAAAIUCAYAAABM/wnPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAABANUlEQVR4nO3dd7gcZdnH8e8NoffeMXSQFiFSFKQpIK9IFQgKoq8CvqKAioCoIdJFBTtFMSgiqICgolggIsQAoQqiKBKkSJESigRCuN8/njmwOdmTRpJ5kvP9XNdee87M7My9u7O7v33mmWcjM5EkSZJUl7naLkCSJEnSpAzqkiRJUoUM6pIkSVKFDOqSJElShQzqkiRJUoUM6pIkSVKFDOqzWESMiIhWx8SMiIERkRExvM06ZlcRsW3z+B3fdi0ATS0j2q6jFhExJiLG9Jp2UPM4HdROVf2P+2X/4PMszVwG9dcpIhaKiCMi4uqIeCwiXoqIpyPixog4KSJWn4p1zPDg55unZpSO/XNE27XMaZovFdlxeaV5/xgZER+NiAFt1zg7i4hVIuLUiLg5Ip6KiPHN+/TvIuLwiFis7RrnRBFxfLM/b9t2LdLszg+B1yEitgB+CqwEPAhcCTwMLAS8CTgaOCoitsjMW5qbHQgs2EK5nR4C1gPGtlzH7OpGyuP3n7YLaawH/LftIip3GTAK+HfbhfThq8DTwNzAasBewJbADsCe7ZU1+4qIDwHfAOYDbgd+BDwFLAVsBZwJfA5YuqUS5xS+/0gzkUF9OkXEusBVwMLAMcCXM/PlXsusBpwGLNozLTP/NSvr7CYzxwN/bbuO2VVm/peKHr/MrKaWWmXmWOr+YnpmZo7p+SciTgFuAvaIiG0y8w+tVTYbioj3AudSgvlemfnLLsu8FfjmrK5tTuP7jzSTZaaX6bgAvwUSOHkqlp2v4+8R5WF/9f/hzXq6XbZtllkMOAq4mtJy/xLwOHAFsGWvbR00mfUd3ywzsPl/eJdaV6B8eI3p2M6lwKZdlu3Z1kHAds19exZ4BvglsN40PJ4BvB8Y2WxzHPAA5cvQvl2WX5nSWvZP4EXgiebxeHOXZY/veTyB/YEbgOea+7hFM++yydR2d7ONJZv/t+18PHstuyRwEnAnpZVpLKU171RgoS7LntKs/4Vm2d8DO07jvpjAiMnc570pRwH+CzwJXASsNA3r37bbNvpYdl7gMMrRpfubx+1J4HfAO/u4zZjmshBwOvCv5nb/oByVij72l8OAu5p95aFmf1isZ3197aszaNuHA3+Zmm1P4fEa09Q1sMu8K5t5RzX/bwec02z3mWafuRMYCszfx/pXAL4HPNYsfxvldTalfXiG7ZfAisAPOmq4Gdi/17I7Nct/r491zUc5gvUfOt5P+1h2Ecr7QU6p5m7rohzF+HWz374I3EN5/S7WZdkRzXbmAT4P3NvsE38DPtyx3KHAn5v7/yAwDJir17oGNusaDqxBOVr7BOU99TfABs1yyzT7wb+bbd0EbNeltuGT2be6Pv8d92cA8Bng781j8ACl0WneKb3/8No+Pcmlmf+j5v9t+nhO9mrmf2Na9jcvXubUiy3q06FpKX875U3yi1NaPjNfnMzsnzXX7wf+QHmj7DGmuV6PEv6upQTgp4BVgXcD74yIXTPz182yt1E+BIZSgtLwjvV1rnsSzf26jvLBejXlDXUV4D3A/0TEXpn5iy43fRewG/Ar4CzgjcAuwJsj4o2ZOTVdRE4CjgXuA35MCQcrAG9utn9xR52bUD64lqQE+Usph693B66LiD0y88ou2/gk8A7g58A1lA/eURHxN2CXiFgqM5/o9ZhsBqwLXJKZT07uDjSP3zXAGyhh5NuU80DWBo5sHpvnm2XfQHk+BgJ/pASDhSiP5a8j4pDMPHdy25tK/0fZT66g7F+bA/sCG0fEoCnsm9NjSUo3jpGUL7OPU57HXYErI+LDmfmdLrebh/JcrkjZj16mPJ+nAvNT9ulOZwIfp4SVc4DxlH1wc8qXhZemoeZp3fY3gY9Qurmd02zr3cBmzbrGT8O2Jyea656Tz4+m7IsjKe8D8wNvpXwp2zYi3p6ZE169ccSywJ8o++O1ze2WB75Fef1MusEZv18u0Wz3acoXhsWBfYAfRsRKmXl6s9xvKCF3n4g4IssRkE57UbqsfHkq9tm9KfvhqMzsej979F5XRBxCed0+D/yE8uViW8pjv2tEvDUzn+6yqoso+96VlOd/b+CciBgPbER5f/8F5QvPuymh/r+U8NvbQEpjwt2U9++BwB7AiIjYkvKcPEN5T1wS2A/4VUSsnTPuiO2FwNaU18MzlPfzTwPLAh+Ywm3PpLx+tgHO57XPsR7fbmo+mPKe1NshzfVZ01y1NCdq+5vC7HgBDqB8eF43HbcdQUeLejNtW/po3WrmLwYs3WX6ypSwcHeXeX22gNJHizolrCRwXK/pb6GElyeAhTumH9Qs/zKwQ6/bnNLM+/RUPi5PUFqaFuwyb+mOvwdQWjvH0atFhhK0HqKEt86jGMc3tTwPvKnL+o9t5h/WZd43m3m7Tun5ogSSBI7tdh/oaPVs9oNXgP16Lbc45cvWC8ByU/nYTa5F/Rlgw17zLmzm7TOV69+22zb6WHY+YOU+9uE7Ka2UC/SaN6ZZ/5Wd8yih4OnmMk+v/TGb/WDJjunzU4JpMm0t6tOy7a2b5f8GLN4xfV5KGJ5k21N4vHq2P7DX9PUpQS6BrZtpq9O9hf+EZrl9e03/bjP9tF7TN6a0kvbVojoj98ukfPGeq2P6as1+8BKwesf0T9H363BEM2/tqdhuz/0+cWqfh+Z2b2gel2eAdXvN+1azznP6qOumXvvD6s39e4rS+LBSx7zFKUcGHgcGdEwf2PGY9X4P/lwz/UlKgO18PHs+j87odZvh3fatXq/pbs9/UhoaOl9bC1FebxOA5bs8z329/2zbx2N9J+U9fKle01dv9r/rp+W58+JlTr446sv0WaG5fnBWbCwzx2aXVunMfJByeHTdiFj19WwjIlYGdqQc+p/oKEFmjqS0ri9J9xPbLsrM3/eadk5zvdk0lDGe8kEwkV73/X8oh4W/nr367Wbmw03ty1MOX/d2Tmbe2mX6DygfDu/vnBgR81Jafh6jtCz1KSI2pZz8dxtdWsky8z+ZOa5ZdmNKa9MlmXlRr+WepunKQGlFfL2+lpl/7jWtp0V0Wp6bqZKZLzb7Ze/pY4HzKC2sb+7j5h/PzBc6bvMYcDkl5K/TsVxPi95J2XGUo3l8j53O0qd22z37yEnZ0bKamS+9jm0DHNGMlHFCRFxACX4LULpk/bHZxj8zM7vc9ozmeqeeCc2+O4RyZOrEzoUz83bg+71XMpP2ywnA0Zn5Sse67gO+Rjn6cEDHst+jhLdDOlcQEes0dV2TmfdMxTan9/35fZQvXN/ISftdH0fpgnJARMzX5bbH9Nof/kk5Ork4cEJmPtQx72nKUb2lKQMR9DaGcjSn0/nN9XyUrlCvdMy7kNJYMqjPezbtju712noe+CHlCOHgGbD+b1Puy0G9pn+YciTp7BmwDWmOYNeX2URz4tPhlDC4LOUDpdNKlJA9vd7UXP8xy8mmvV1N+SB7E5N+yI/usvwDzfUSU7n9HwIfA/4SET+mHBL9U056CHzL5voNfQxnuVZzvR6llbTTjd02nJkPRsTvgXc0XXX+0szalfLl5IzsdaJwF1s011f1+hDtpuc+LNbHfVimuV5vCuuZGjPiuZkmEbE+5ZyKt1FC0/y9FukWTsZm5j+6TO9W6ybNdbfD5tfR5cveFEzLtnteJ9d1WX4UJTBNj8Ob66ScP3EHcAEdh/8jYqFmuT0o3akW4bXuMTDx47oOJeiPzsxnu2zvOuBDvabNjP3yX00w720EJfj3PJ5k5hPNa//AiHhL00AApYsEzPyuED371dW9Z2TmUxFxK2WfXpdy3kmnbq+zh5vrm7vM6wnuK1O6KHa6LTu6MPVa1z29n8/MnBARjzbrmlFm9vvG9ylfRg4GvgwQEfNQgvtTlKMwkjCoT6+eId66BY4ZLiL2oLScj6P0+72X0o3jFco
"text/plain": [
"<Figure size 864x576 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots(figsize=(12, 8))\n",
"sns.heatmap(mcbc_a_df, annot=True, linewidths=0.1, fmt='.0f', ax=ax, cbar=None, cmap='Blues', linecolor='gray')\n",
"ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha='right')\n",
"ax.set_yticklabels([textwrap.fill(e, 85) for e in mcbc_a_df.index], rotation=0)\n",
"ax.invert_yaxis()\n",
"ax.set_xlabel(None)\n",
"ax.set_ylabel(None)\n",
"#sns.heatmap(df, linewidths=2, linecolor='yellow')\n",
"title = ' Citation service in Landing Page by Community\\n'\n",
"plt.title(title, loc='left', fontsize=20)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "coupled-answer",
"metadata": {},
"outputs": [],
"source": [
"msn_a_df=df_pis_kw.groupby(['Community', 'metadataStandardName']).count()['Repository name'].unstack('metadataStandardName')\n",
"#tt_a_df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "associate-lawrence",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.0, 1.0, ' Metadata Standard by Community\\n')"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAyIAAAKZCAYAAABEN59cAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAACTcUlEQVR4nOzddZhd1fXG8e+bBA8Q3Nvg7qHID3d3C+7S4hSXENwDlBYtTiFQoFixAkFKkeBWoLi7awjr98fal9xMZkISJufO5L6f55knmXvOPbPnzLn3nrX32msrIjAzMzMzM6tSl0Y3wMzMzMzMmo8DETMzMzMzq5wDETMzMzMzq5wDETMzMzMzq5wDETMzMzMzq5wDETMzMzMzq5wDEWtKko6QFJKWbXRbmln5GwxocBtek/TaSOzf8Dbb6Oe/s5nZ6OdAxIYh6aLyIRySjhzOflvX7TegHX5up/vgrztXPSv6eRtJulXSB5IGSfpY0nOSLpO0dYt9ly1tO6KKtll1JM0g6XhJj0r6tFwLH0j6l6Q9JU3c6DaOidyBYWbWvro1ugHWof0IbCupb0QMbmX7jmUfX0cVkHQuec6/BW4GXgUEzAGsBSwLXNyo9lk1JO0AnAmMAzwJXAF8CkwGLAmcBhwGTN6gJo4p5gS+aXQjzMzGZL6BtOG5CVgXWJW88f2ZpDmB/wOuA9arvGVNRtKSZBDyFrB4RLzVYvtYZCBiYzBJmwPnkYHHBhFxcyv7/B/w56rbNqaJiP82ug1mZmM6p2bZ8FxO9r7v2Mq22mPnD+8AknpLulvSZ5K+k/S8pEMljVO3zzaSony7TF2611BpRWW/ayS9IulbSV9I+rekLYbz8xcuqUxflv3/JWnx4ey/bklzelHS1+XrUUl7SOrSYt8AaulQr9a1+bUWP/90SU9K+qScg5cknSJpkuGduxaWKP9e0zIIAYiIQRFxR93PvQi4u3zbp8U5XbbsM7Gk/STdJektST9I+lDSDW2do1r6nKTJJZ0r6V1J30t6VtK2bTxnbEmHSXq57PuqpKPrr4EW+08r6fDyt32vtOsdSX+TNFcr+/cs7bpI0myS+pc0pZ/qfldJ2q208ztJb0s689ekMJV2Xlp+1rflOtmsxT6rlLZd2MYxxpH0Uflq9XzU7TshcEb5dtPWghCAiPg3sGgrz1+hvBY+KX+HF5XpXcOcg/I3Dkljlb/Fy+W8vSBpx7r9dpH0dPn935LUt5XXSf3fZ2ZJf1emFH4p6XZJ85T9pqi7pr6T9Iik5VppW5vpkGojHbHu9+km6eDyGvxe0puSTpA0divHGipVVPm67lO+vbv+NVW2X1G+X6blscr2Dcr2M1vbbmbWjDwiYsPzGXA1sJmkqSPiPcibJ2Ar4B7gxbaeLOkCYFuyF/+acrzFgKOAFSStFBE/Ak8AfckP+deBi+oOM6Du/2cBzwL3Au+SqSirA5dKmj0iDmvx85cA/gWMDVwL/A9YoBzzrjaafTzwE/AQ8DYwMbA8cDqwCLBl3b59yRGj+cv2z8rjn9XtsyM5YnRPaUsXYGFgH2A1SYtGxJdttKXex+XfWUdgX4B/lH+3Lj97QN2218q/cwLHkOfzZrKX/TfA2qVta0XEra0cuwfwb+AH4O9kitBGwAWSfoqIn9PDJAm4ClgHeJlMKRob2A6Yt422Lw0cSAZS1wBfkb/3hsDakv4vIp5s5Xkzk3+3F8kgejzgi7LtNGAP8ro5FxhU2rRoac8PbbSlLZMAD5B/6wvJc7IxcLmk6SLipLLf7eX33ljSXhHxeYvjbEBex6dExPe/8DM3BCYFHoyI24e3Y8tjSdqZfP18Tb6mPyBH0A4A1irn9LNWDnUleY7+SZ6zDYFzJQ0C5iOvr5uAO8nr5nAynemEVo7Vk/z7PE++xnuSr40BysD3VvLv1b/8npsCt0iaLSLeGN7vOxL+BiwF3FJ+1urA/sCU5HvV8JxGvt6XIVMgX2ux/azS5p3I11xLO5d/zx7pVpuZjakiwl/+GuqLvEkIYEUy5zyAg+q2b1oe2xyYpfx/QItjbFMevxYYr8W2I8q2PVs8PsxxWmyfuZXHxiZvggYB09U9LuC/5ZjrtHjOnuXxAJYdgZ/RhbzxCGDRNs5Vzzba/FugayuPb1+ed8AI/k2mI296A7gB2Iy8OddwnrNs2f+INrZPDEzeyuPTA+8Az7eyrXbezq//vYC5yPlCz7XYf7Oy/3+Acesen5S8QW/t2pkSmLCVnz0/GZTc0uLxnnXtOraV5y1Rtv0PmLTu8XFLuwJ4bSReH7WfdRXQpe7xGYFPyKBmprrH/1j2362VYw0o22YbgZ/717Lv0SPa1rpr8HvyxnuOFtv+Uo55bhvtegToUff4TOX3+5Sco1T/musBfAR8CHRr4+9zSIufc1h5/BPyBr3+fG5ZtvUb0ddcW9d83e/zaItrYIJyXQwGpm7l79zy2jyCVt436rY/A3wHTNbi8ZnIDo5/j8zfzl/+8pe/xvQvp2bZcEXE/eQN/Q6ldxuyl/9Tsre6LXuSN6bbRcS3LbYdRfbwbz6SbXm5lcd+IPPhuwEr1G1aApgduDcirm/xtDPJm+AR/Rk/kSMeAKuMZJtfj9Yn+l9A3hiO0PEi4m2y9/hlcmL65WTP/+cl3WYLSV1Hsm2fR8RHrTz+FjnSMYek37Ty1G+Afep/r4h4jhwlmVNS97p9a73MB0fEd3X7f0JeB62164NoZZQochTkLmA55ZyYlt4nR6laqrXhmPJza8f7DjiotTaMgMFkEPlT3fFeJVOnxmLokbMLyZvTnesPIGl2snf97ohoc2SxzjTl32FS837BFmTAfmYMO+/hEOBLYMs2UsMOjLqRkoh4BbifDDqOKtdlbdtnwI3kJPnpWjnWa+SIY73a6Nk4wH7155McvfiRHMVsLwe0uAa+Jl9LXYBe7XD8s8jfZZsWj+9Ido6c0w4/w8xsjOFAxEbEeWSP3vKSZgGWAy6tv7GsJ2l8svf6U2AvZcnLn7/IXtDvydSgESbpN5L+LOm/kr6py8+uBUT1Nz8LlX+HSZEoN9D3t/EzJit5809J+qruZzzays8YkTaPpZybcH/JzR9cjvcTMNHIHC8i7gZmI1OXDiNHm74hg5lLgVt/aZ5BK+37P0lXlVz57+t+393LLq2176WI+KKVx98s/9bPfVmI/F1bO98DhtOuNSTdWOYLDKpr11rkjV5rFaGejNbTm9q8Fkq7WgsUf8kbJfBoaUD5d8HaAxHxMTl6Mk9JF6zZqfw7ulN1ar//MOmIEfEp8Dg5OjRHK88d2Mpj75R/H21lWy0wmb6VbU+0EpTXjvViy+Cz7Pt+G8caVa39Pq1dt6PqEnLUrva3rRWS2IZ8P7yqHX6GmdkYw3NEbERcAhwL7EDO4RAZnLRlkrLPFAyZ3PmrSJoJeLgc+z4y9/5z8iayJ5mrXn8TXpuA+34bh3yvlZ/Rg0xFmbH8rEvIlJEfyR7gPVv8jBHRnxzJeAW4vvzc2s3yXiN7vNJjfF/5qs3BWInsWV4R2JXMZf9FktYjRz6+A+4gR1u+JgOHZcne+tba91kbh/yx/Fs/MjMx8ElEDGpl/2H+BqVde5K/w6elXW+QAVcwZE5Oa+1q9XgM51qIiB8lDTMqNAJ+6bqauMXjfyHnVe0MPFACxq3JuRrXjeDPfLf8O1LBcF1b3m1je+3xHi03xLBzWmDI33l421obsRpm/3L+2zpW7XitHWuUROvzYFq7bkf1+F9KugzYRdJypfNgbWBq4LS2Om/MzJqVAxH7RRHxkaTryBvqL4D/RMQzw3lK7abi8YhYaDj7jYx9yEm920bERfUbJPVmSPWqlm2Yqo3jTd3KYzuQQUjfiDiixc9YnAxERpikXuQ5+xewWuTE/Nq2LuQk2V8lIgK4XdKh5LyN5RnBQIRMjfoB6BURz7do+zlkIPJrfQ5MKmmsVoKRYf4GkrqRefjvAQtFxLsttrdZ8YwMVNpqA+S18EorP29yRj7d6Zeuq6FurCPiIUmPUyatA6uR1/MJbQRprbmfnOS/AjkiNqJqbZmaLPbQ0jQt9uvoaulbrX1+9ai
"text/plain": [
"<Figure size 864x576 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots(figsize=(12, 8))\n",
"sns.heatmap(msn_a_df, annot=True, linewidths=0.1, fmt='.0f', ax=ax, cbar=None, cmap='Blues', linecolor='gray')\n",
"ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha='right')\n",
"ax.set_yticklabels([textwrap.fill(e, 85) for e in mcbc_a_df.index], rotation=0)\n",
"ax.invert_yaxis()\n",
"ax.set_xlabel(None)\n",
"ax.set_ylabel(None)\n",
"#sns.heatmap(df, linewidths=2, linecolor='yellow')\n",
"title = ' Metadata Standard by Community\\n'\n",
"plt.title(title, loc='left', fontsize=20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "critical-windows",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}