From 7a47672c14960d26f3ed04cf9dfaee821f29f50d Mon Sep 17 00:00:00 2001 From: Cesare Date: Fri, 22 Oct 2021 10:49:33 +0200 Subject: [PATCH] repo profiles added --- pom.xml | 180 +++++++++--------- .../service/services/CitationHarvester.java | 8 +- .../impl/CitationHarvesterImpl.java | 81 ++++++-- .../wfconfigurator/util/CSVHelper.java | 115 +++++++++++ .../wfconfigurator/util/HeuristicParsers.java | 132 ++++++++++++- src/main/resources/repoprofiles.csv | 128 +++++++++++++ 6 files changed, 537 insertions(+), 107 deletions(-) create mode 100644 src/main/java/eu/sshoc/citation/service/wfconfigurator/util/CSVHelper.java create mode 100644 src/main/resources/repoprofiles.csv diff --git a/pom.xml b/pom.xml index 018e3a7..19ab164 100644 --- a/pom.xml +++ b/pom.xml @@ -1,9 +1,11 @@ - - 4.0.0 - eu.sshoc - citation - 0.0.2-SNAPSHOT - war + + 4.0.0 + eu.sshoc + citation + 0.0.2-SNAPSHOT + war CitationService The Citation Service of the SSHOC system @@ -18,9 +20,9 @@ - - - + + + junit junit @@ -105,85 +107,93 @@ 1.2.16 - org.springframework.boot - spring-boot-starter-data-mongodb - + org.springframework.boot + spring-boot-starter-data-mongodb + - org.springframework.boot - spring-boot-starter-test - test - - - - org.apache.curator - apache-curator - 3.3.0 - pom - - - org.apache.curator - curator-client - 3.3.0 - - - org.apache.curator - curator-framework - 3.3.0 - - - org.json - json - 20180130 - - - org.apache.maven.plugins - maven-shade-plugin - 3.1.0 - - - com.auth0 - java-jwt - 3.3.0 + org.springframework.boot + spring-boot-starter-test + test + + + javax.json + javax.json-api + 1.1.4 + + + org.glassfish + javax.json + 1.0.4 + + + org.apache.curator + apache-curator + 3.3.0 + pom + + + org.apache.curator + curator-client + 3.3.0 + + + org.apache.curator + curator-framework + 3.3.0 + + + org.json + json + 20180130 + + + org.apache.maven.plugins + maven-shade-plugin + 3.1.0 + + + com.auth0 + java-jwt + 3.3.0 - - - org.apache.curator - curator-x-discovery - 4.0.1 - - - org.apache.curator - curator-x-async - 4.0.1 - - - org.apache.curator - curator-recipes - 4.0.1 - + + org.apache.curator + curator-x-discovery + 4.0.1 + - - org.jsoup - jsoup - 1.11.3 - + + org.apache.curator + curator-x-async + 4.0.1 + + + org.apache.curator + curator-recipes + 4.0.1 + + + + org.jsoup + jsoup + 1.11.3 + + + org.apache.commons + commons-csv + 1.6 + - - data-local - data - file://${project.basedir}/repo - - - + + data-local + data + file://${project.basedir}/repo + + + @@ -215,10 +225,8 @@ false - + true @@ -229,7 +237,7 @@ maven-shade-plugin 3.1.0 - + \ No newline at end of file diff --git a/src/main/java/eu/sshoc/citation/service/services/CitationHarvester.java b/src/main/java/eu/sshoc/citation/service/services/CitationHarvester.java index 7042f64..3bbe75f 100644 --- a/src/main/java/eu/sshoc/citation/service/services/CitationHarvester.java +++ b/src/main/java/eu/sshoc/citation/service/services/CitationHarvester.java @@ -85,11 +85,11 @@ public class CitationHarvester { } - @ApiOperation(value = "Returns a metadata record for a citation searching in the available metadata repositories", - notes = "A client with a valid identifier can invoke this web service to obtain metadata of a citation by searching in the available metadata repositories (DOI repositories, landing pages, etc...) ", + @ApiOperation(value = "Returns a metadata record for a citation searching in the landing page and on metadata repositories", + notes = "A client with a valid identifier can invoke this web service to obtain metadata of a citation by searching in metadata repositories and in landing pages", response = String.class) - @RequestMapping(value="/citharvester/getmetadatahtml", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE) - public String getCitationMetadataFromHTML(@RequestParam(value="pid") String pid, @RequestParam(value="token") String token) { + @RequestMapping(value="/citharvester/getcitationmetadata", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE) + public String getCitationMetadata(@RequestParam(value="pid") String pid, @RequestParam(value="token") String token) { return wfc.getCitationMetadataFromHTML(pid, token).toString(); } diff --git a/src/main/java/eu/sshoc/citation/service/wfconfigurator/impl/CitationHarvesterImpl.java b/src/main/java/eu/sshoc/citation/service/wfconfigurator/impl/CitationHarvesterImpl.java index 4add8cb..b4a35f4 100644 --- a/src/main/java/eu/sshoc/citation/service/wfconfigurator/impl/CitationHarvesterImpl.java +++ b/src/main/java/eu/sshoc/citation/service/wfconfigurator/impl/CitationHarvesterImpl.java @@ -13,6 +13,7 @@ import java.net.ProtocolException; import java.net.URL; import java.net.URLEncoder; import java.nio.charset.Charset; +import java.util.Iterator; import javax.net.ssl.HttpsURLConnection; @@ -112,7 +113,8 @@ public class CitationHarvesterImpl { myURL = new URL(sid); HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection(); - myURLConnection.setRequestProperty("Accept", "application/rdf+xml;q=0.5, application/vnd.citationstyles.csl+json;q=1.0"); + //myURLConnection.setRequestProperty("Accept", "application/rdf+xml;q=0.5, application/vnd.citationstyles.csl+json;q=1.0"); + myURLConnection.setRequestProperty("Accept", "application/vnd.citationstyles.csl+json, application/rdf+xml\\"); myURLConnection.setConnectTimeout(18000); InputStream mis = myURLConnection.getInputStream(); BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8"))); @@ -170,15 +172,33 @@ public class CitationHarvesterImpl { System.out.println("citation string "+ jsonText); jsondata = new JSONObject(); - if (!jsonText.isEmpty()) + if (!jsonText.isEmpty()) { jsondata.put("citation string", jsonText); - + //urllib.request.urlopen("https://api.crossref.org/works?query.bibliographic="+cit+"&sort=score&mailto=cesare.concordia@gmail.com#", timeout=18000) + String crurl="https://api.crossref.org/works?query.bibliographic="+ URLEncoder.encode(jsonText)+"&sort=score&mailto=cesare.concordia@gmail.com#"; + URL crActURL= new URL(crurl); + HttpURLConnection crURLConnection = (HttpURLConnection)crActURL.openConnection(); + crURLConnection.setConnectTimeout(18000); //set timeout to 18 seconds + InputStream cris = crURLConnection.getInputStream(); + BufferedReader crrd = new BufferedReader(new InputStreamReader(cris, Charset.forName("UTF-8"))); + String crjsonText = readAll(crrd); + //System.out.println(crjsonText); + + JSONObject jsmeta=new JSONObject(crjsonText); + JSONObject jsmessage = (JSONObject) jsmeta.get("message"); + JSONArray jsitems = (JSONArray) jsmessage.get("items"); + //if score > 110 ok + //System.out.println(jsitems.toString()); + } } catch (Exception e) { + System.out.println(e); + /* e.printStackTrace(pw); String sStackTrace = sw.toString(); // stack trace as a string if (sStackTrace.length()>200) - System.out.println(sStackTrace.substring(0, 199)); + System.out.println(sStackTrace.substring(0, 2000)); + */ System.out.println ("####################### ("+pid+") getCitationCSL, citation by content negotiation not available"); } @@ -188,14 +208,21 @@ public class CitationHarvesterImpl { JSONObject jsondata=null; + jsondata=heup.getAPIMetadata(pid); if (pid.startsWith("http://")) { pid=pid.replace("http://", "https://"); } + + //special cases + if (pid.contains("ien.bg.ac.rs")) + pid=pid.replace("https://", "http://"); //first landing pages - System.out.println("getZenodoMetadataJSONLD ("+pid+")"); - jsondata=heup.getZenodoMetadataJSONLD(pid); + if (jsondata==null || jsondata.length()==0) { + System.out.println("getZenodoMetadataJSONLD ("+pid+")"); + jsondata=heup.getZenodoMetadataJSONLD(pid); + } if (jsondata==null || jsondata.length()==0) { System.out.println("getMetaMetadata ("+pid+")"); @@ -208,17 +235,43 @@ public class CitationHarvesterImpl { } //then DOI service providers... - - if (jsondata==null || jsondata.length()==0) { - getCNCit=true; - System.out.println("getCitationMetadata ("+pid+")"); - jsondata=getCitationMetadata(pid, token); - getCNCit=true; + try { + //System.out.println("jsondata.length() "+((JSONObject) jsondata.get("properties")).length()); + //int jopl=((JSONObject) jsondata.get("properties")).length(); + + if ((jsondata==null || !(jsondata.has("properties")) || ((JSONObject) jsondata.get("properties")).length() < 3) && pid.contains("doi.org") ) { + getCNCit=true; + System.out.println("getCitationMetadata ("+pid+")"); + jsondata=getCitationMetadata(pid, token); + getCNCit=true; + } + } catch (JSONException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); } - if (jsondata==null || jsondata.length()==0) { + if (pid.contains("doi.org") && (jsondata==null || jsondata.length()==0 || jsondata.isNull("citation string"))) { System.out.println("getCitationCSL ("+pid+")"); - jsondata=getCitationCSL(pid, token); + JSONObject jsoncsldata=null; + jsoncsldata=getCitationCSL(pid, token); + if (jsondata==null) + jsondata=jsoncsldata; + else { + Iterator ite= jsoncsldata.keys(); + while (ite.hasNext()) { + String key=ite.next(); + try { + jsondata.put(key, jsoncsldata.get(key)); + } catch (JSONException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } + + } + + if (jsondata==null || jsondata.length()==0) { try { jsondata=new JSONObject(); diff --git a/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/CSVHelper.java b/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/CSVHelper.java new file mode 100644 index 0000000..566dac4 --- /dev/null +++ b/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/CSVHelper.java @@ -0,0 +1,115 @@ +/******************************************************************************* + * Copyright (c) 2021 VRE4EIC Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ +package eu.sshoc.citation.service.wfconfigurator.util; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.net.URISyntaxException; +import java.net.URL; + +public class CSVHelper { + + static CSVHelper app = new CSVHelper(); + public static void main(String[] args) { + + try { + File repo=app.getFileFromResource("repoprofiles.csv"); + try( + BufferedReader br = new BufferedReader(new FileReader(repo)); + CSVParser parser = CSVFormat.DEFAULT.withDelimiter(';').withHeader().parse(br); + ) { + System.out.println("header "+parser.getHeaderMap()); + + for(CSVRecord record : parser) { + System.out.println(record.get("Repository name") +" - "+record.get("Website") +" - "+record.get("dns")+" - "+record.get("viewer")); + } + } catch (Exception e) { + System.out.println(e); + } + } catch (URISyntaxException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + private File getFileFromResource(String fileName) throws URISyntaxException{ + + ClassLoader classLoader = getClass().getClassLoader(); + URL resource = classLoader.getResource(fileName); + if (resource == null) { + throw new IllegalArgumentException("file not found! " + fileName); + } else { + + // failed if files have whitespaces or special characters + //return new File(resource.getFile()); + + return new File(resource.toURI()); + } + } + + public CSVParser getRepoProfiles() throws URISyntaxException{ + + File repo=app.getFileFromResource("repoprofiles.csv"); + try( + BufferedReader br = new BufferedReader(new FileReader(repo)); + CSVParser parser = CSVFormat.DEFAULT.withDelimiter(';').withHeader().parse(br); + ) { + System.out.println("header "+parser.getHeaderMap()); + return parser; + + } + catch (Exception e) { + System.out.println(e); + } + return null; + } + + public String getRepoApi(String repositoryurl) throws URISyntaxException{ + + File repo=app.getFileFromResource("repoprofiles.csv"); + try( + BufferedReader br = new BufferedReader(new FileReader(repo)); + CSVParser parser = CSVFormat.DEFAULT.withDelimiter(';').withHeader().parse(br); + ) { + + int slashpos= repositoryurl.indexOf('/', 8); + String repoid=repositoryurl.substring(0, slashpos); + System.out.println("repoid "+repoid); + //System.out.println("repositoryurl "+repositoryurl); + for(CSVRecord record : parser) { + String apiurl=record.get("api"); + //apiurl=apiurl.replace("[", ""); + //apiurl=apiurl.replace("]", ""); + //System.out.println("dns "+record.get("dns").trim()); + if(record.get("Website").trim().contains(repoid) || apiurl.contains(repoid) || record.get("dns").trim().contains(repoid)) { + System.out.println(record.get("Repository name") +" - "+record.get("api")+" - "+record.get("viewer")); + return record.get("viewer"); + } + + } + return "none"; + + } + catch (Exception e) { + System.out.println(e); + } + return null; + } + +} diff --git a/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/HeuristicParsers.java b/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/HeuristicParsers.java index b604116..f63c3bd 100644 --- a/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/HeuristicParsers.java +++ b/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/HeuristicParsers.java @@ -15,11 +15,18 @@ *******************************************************************************/ package eu.sshoc.citation.service.wfconfigurator.util; +import java.io.BufferedReader; import java.io.FileWriter; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.StringReader; import java.io.StringWriter; +import java.net.HttpURLConnection; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.charset.Charset; import java.util.HashMap; import java.util.Stack; import java.util.Vector; @@ -38,7 +45,80 @@ public class HeuristicParsers { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); + CSVHelper repo=new CSVHelper(); + public JSONObject getAPIMetadata(String pid) { + JSONObject metajsondata=new JSONObject(); + JSONObject jsondata=new JSONObject(); + try { + //Check if it is a handle + String dnsPid=""; + + + if (pid.contains(".handle.net/")) { + //get the actual repository URL + String haid=pid.substring(22); + haid="https://hdl.handle.net/api/handles/"+haid; + haid=haid.replace("handles//", "handles/"); + System.out.println("haid "+haid); + Document doc = SSLHelper.getConnection(haid).ignoreContentType(true) + .userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").timeout(20 * 1000).get(); + //System.out.println("doc "+doc.text()); + JSONObject jsonrepometa=new JSONObject(doc.text()); + JSONArray vals = new JSONArray(); + vals=jsonrepometa.getJSONArray("values"); + for (int i=0; i0 && metajsondata.has(child.tagName())) { + String tvalue=metajsondata.getString(child.tagName()); + metajsondata.put(child.tagName(), tvalue+", "+child.text()); + } + else + metajsondata.put(child.tagName(), child.text()); + + } + } + + } + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + if (metajsondata!=null && metajsondata.length()>0) { + try { + jsondata.put("citation string", "na"); + jsondata.put("metadata source", "Repository API"); + jsondata.put("properties", metajsondata); + } catch (JSONException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + return jsondata; + } public JSONObject getZenodoMetadataJSONLD(String pid) { JSONObject jsondata=new JSONObject(); try { @@ -73,7 +153,11 @@ public class HeuristicParsers { //System.out.println(jsonStr); //getGraph(jsonStr); JSONObject jsonproperties=new JSONObject(); + jsonproperties=new JSONObject(jsonStr.trim()); + if (jsonproperties.has("mainEntity")) { + jsonproperties=jsonproperties.getJSONObject("mainEntity"); + } if (jsondata.length()>0 && jsondata.has("properties")) { jsondata.put("additional_properties", jsonproperties); } @@ -102,6 +186,24 @@ public class HeuristicParsers { if (!citationStr.trim().isEmpty()) { jsondata.put("citation string", citationStr.trim()); } + else + if (pid.contains("doi.org/10") && jsondata.length()>0){ + URL myURL = new URL(pid); + HttpURLConnection csURLConnection = (HttpURLConnection)myURL.openConnection(); + csURLConnection.setRequestProperty("Accept", "text/x-bibliography"); + csURLConnection.setConnectTimeout(18000); //set timeout to 18 seconds + InputStream mis = csURLConnection.getInputStream(); + BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8"))); + StringBuilder sb = new StringBuilder(); + int cp; + while ((cp = rd.read()) != -1) { + sb.append((char) cp); + } + String jsonText = sb.toString(); + if (!jsonText.trim().isEmpty()) { + jsondata.put("citation string", jsonText.trim()); + } + } } catch (Exception e) { @@ -118,6 +220,9 @@ public class HeuristicParsers { return jsondata; } + + + public JSONObject getLinkMetadata(String pid) { @@ -184,9 +289,18 @@ public class HeuristicParsers { if (!name.trim().isEmpty() && !content.trim().isEmpty() && (name.toLowerCase().trim().startsWith("dc.") || - name.trim().startsWith("citation_"))){ - - jsonproperties.put(name, content); + name.trim().startsWith("citation_")) || + name.toLowerCase().trim().startsWith("eprints.")){ + if (jsonproperties.has(name)) { + String names= jsonproperties.get(name).toString(); + content=names+", "+content; + } + + if (name.equalsIgnoreCase("eprints.citation")) { + jsondata.put("citation string", content); + } + else + jsonproperties.put(name, content); } if (name.trim().contains("citation_author")) {//to be used to double check authors authors_affiliation.put(content, "na"); @@ -217,6 +331,18 @@ public class HeuristicParsers { if (citationstring!=null) { jsondata.put("citation string", citationstring.text().trim()); } + + //check if metadata is in the html elements europeana style + + Elements euRecord = doc.select("div[data-field-name]"); + for (Element divmeta: euRecord) { + String mdname = divmeta.attr("data-field-name"); + if (mdname != "") { + jsonproperties.put(mdname, divmeta.text().trim()); + } + + + } if (jsonproperties!=null && jsonproperties.length()>0) jsondata.put("properties", jsonproperties); diff --git a/src/main/resources/repoprofiles.csv b/src/main/resources/repoprofiles.csv new file mode 100644 index 0000000..5cafa6b --- /dev/null +++ b/src/main/resources/repoprofiles.csv @@ -0,0 +1,128 @@ +Community;Repository name;Website;metadataStandardName;dataAccessType;api;dns;viewer +DARIAH;"A web museum to show and share videos; documentaries and studies related with cultural manifestations of Intangible Cultural Heritage (ICH).";http://www.memoriamedia.net/index.php/en;;;;;none +CLARIN;ACDH - A Resource Centre for the HumanitiEs;https://centres.clarin.eu/centre/45;['Dublin Core'];['embargoed', 'open', 'restricted'];['https://arche.acdh.oeaw.ac.at/oaipmh/', 'https://arche.acdh.oeaw.ac.at/browser/api-access'];https://id.acdh.oeaw.ac.at/grundbuecher-facs;https://arche.acdh.oeaw.ac.at/oaipmh?verb=GetRecord&metadataPrefix=oai_dc&identifier=https://hdl.handle.net/viewerPid +CLARIN;ASV Leipzig;https://centres.clarin.eu/centre/4;['none'];['embargoed', 'open', 'restricted'];['https://clarinoai.informatik.uni-leipzig.de/oaiprovider/?verb=Identify', 'http://clarinoai.informatik.uni-leipzig.de:8080/oaiprovider/', 'http://api.corpora.uni-leipzig.de/ws/swagger-ui.html'];;none +E-RIHS;Accélérateur Grand Louvre d'Analyses Elémentaires (C2RMF);https://c2rmf.fr/analyser/un-laboratoire-de-haute-technologie-pour-les-collections-des-musees/aglae;;;;;none +DARIAH;Archive of the Italian Latinity of the Middle Ages;http://en.alim.unisi.it/;;;;;none +CESSDA;Aspiring SPs - need a list;;;;;;none +CESSDA-SP;Austrian Social Science Data Archive; https://aussda.at;['DDI - Data Documentation Initiative', 'Dublin Core'];['embargoed', 'open', 'restricted'];[];;none +E-RIHS;Basel Mission Archives;http://www.bmarchives.org/;;;;;none +CLARIN;Bayerisches Archiv für Sprachsignale;https://centres.clarin.eu/centre/5;['Dublin Core'];['restricted'];['ftp://ftp.bas.uni-muenchen.de/', 'https://www.phonetik.uni-muenchen.de/cgi-bin/BASRepository/oaipmh/oai.pl?verb=Identify', 'https://webapp.phonetik.uni-muenchen.de/BASSRU/', 'https://clarin.phonetik.uni-muenchen.de/BASWebServices/interface'];;none +CLARIN;Berlin-Brandenburg Academy of Sciences and Humanities;https://centres.clarin.eu/centre/6;['Dublin Core'];['open'];['https://vlo.clarin.eu/data/Berlin_Brandenburg_Academy_of_Sciences_and_Humanities_BBAW_.html', 'https://clarin.bbaw.de:8088/oaiprovider'];;none +DARIAH;Bibliographic database of world Slavic Linguistics publications;http://www.isybislaw.ispan.waw.pl/;;;;;none +DARIAH;Biblioteca Digitale Romanza;http://piccolabdr.humnet.unipi.it/;;;;;none +E-RIHS;BioArCh;https://www.york.ac.uk/archaeology/centres-facilities/bioarch/;;;;;none +E-RIHS;Budapest Neutron Centre;https://www.bnc.hu/;;;;;none +CLARIN;CLARIN-PL Language Technology Centre;https://centres.clarin.eu/centre/25;['Dublin Core', 'OAI-ORE - Open Archives Initiative Object Reuse and Exchange'];['closed', 'embargoed', 'open', 'restricted'];['https://vlo.clarin.eu/data/CLARIN_PL_digital_repository.html', 'https://clarin-pl.eu/oai/request?verb=Identify'];;none +CLARIN;CLARIN.SI Language Technology Centre;https://centres.clarin.eu/centre/30;['Dublin Core', 'OAI-ORE - Open Archives Initiative Object Reuse and Exchange'];['embargoed', 'open', 'restricted'];['http://www.clarin.si/repository/oai/request?verb=Identify', 'http://www.language-archives.org/archive_records/clarin.si'];https://www.clarin.si/repository/xmlui/handle/11356/1186;http://www.clarin.si/repository/oai/request?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:www.clarin.si:viewerPid +CLARIN;CLARINO Bergen Center;https://centres.clarin.eu/centre/29;['none'];['embargoed', 'open', 'restricted'];['https://clarino.uib.no/oai'];https://repo.clarino.uib.no/xmlui/handle/11509/122;none +CLARIN;CMU-TalkBank;https://centres.clarin.eu/centre/18;['Repository-Developed Metadata Schemas'];['closed', 'open', 'restricted'];[];;none +;Center for Socio-Political Data / Sciences Po;https://data.sciencespo.fr/dataverse/cdsp https://cdsp.sciences-po.fr/en/;['DDI - Data Documentation Initiative', 'Dublin Core'];['embargoed', 'open', 'restricted'];[];;none +CLARIN;Center of Estonian Language Resources (CELR-EKK);https://centres.clarin.eu/centre/15;['none'];['open', 'restricted'];[];;none +E-RIHS;Centre de Recherche et de Restauration des Museés de France;https://c2rmf.fr/categorie/archives;;;;;none +E-RIHS;Centre de recherche sur la conservation;http://crc.mnhn.fr/?lang=en;;;;;none +E-RIHS;Centro Nacional de Investigación sobre la Evolución Humana;https://www.cenieh.es/;;;;;none +DARIAH;Clavius on the Web;http://claviusontheweb.it/;;;;;none +DARIAH;"Collection of documents (text; sheet music; audio; video; photo) about traditional and contemporary culture and society";https://repozitorij.dief.eu/;;;;;none +DARIAH;Corpus of French authentic text messages;http://88milsms.huma-num.fr/;;;;;none +DARIAH;Corpus of authentic contemporary Italian texts;http://www.corpusitaliano.it;;;;;none +E-RIHS;Corpus testuale OVI;http://www.ovi.cnr.it/index.php/it/il-corpus-testuale;;;;;none +DARIAH;Croatian encyclopaedia;http://enciklopedija.hr/;;;;;none +DARIAH;CulturaItalia;http://www.culturaitalia.it;;;;;none +CESSDA-SP;Czech Social Science Data Archive;http://archiv.soc.cas.cz/;['none'];['restricted'];[];http://nesstar.soc.cas.cz/webview/index/en/nesstar/-SDA.c.nesstar/English.d.2/ISSP.d.6/Social-Inequality-ISSP-2019-Czech-Republic/fStudy/ISSP00027en;none +DARIAH;DARIAH-DE Repository ingest tool;https://de.dariah.eu/en/publikator;['Dublin Core'];['open'];['https://repository.de.dariah.eu/1.0/oaipmh/oai', 'https://repository.de.dariah.eu/doc/services/'];;https://repository.de.dariah.eu/1.0/oaipmh/oai?verb=GetRecord&metadataPrefix=oai_dc&identifier=hdl:viewerPid +CESSDA-SP;Danish National Archives;https://www.sa.dk;['DDI - Data Documentation Initiative', 'Repository-Developed Metadata Schemas'];['restricted'];['http://dda.dk/search-api?'];;none +CESSDA-SP;Data Archiving and Networked Services;https://dans.knaw.nl/en;['DDI - Data Documentation Initiative', 'Dublin Core'];['embargoed', 'open', 'restricted'];[];https://easy.dans.knaw.nl/ui/datasets/id/easy-dataset:203175;none +CESSDA-SP;Data Center Serbia for Social Sciences;;;;;;none +DARIAH;Digital Archives for Medieval Culture;http://www.mirabileweb.it/;;;;;none +DARIAH;Digital Collections in the Cloud;https://locloudhosting.net/;;;;;none +DARIAH;Digital Library Federation;http://fbc.pionier.net.pl/;;;;;none +DARIAH;Digital Repository of Ireland;https://www.dri.ie/;['Dublin Core', 'MIDAS-Heritage'];['embargoed', 'open', 'restricted'];[];https://repository.dri.ie/objects/s465jx541/doi/s465jx541;none +DARIAH;Digital Repository of Scientific Institutes;http://rcin.org.pl;;;;https://rcin.org.pl/igipz/publication/180608;none +DARIAH;Digital library of late-antique latin texts;http://www.digiliblt.unipmn.it/index.php;;;;;none +CLARIN;Eberhard Karls Universität Tübingen;https://centres.clarin.eu/centre/1;['Dublin Core'];['closed', 'open', 'restricted', 'restricted'];['http://weblicht.sfs.uni-tuebingen.de/oaiprovider/', 'https://talar.sfb833.uni-tuebingen.de:8443/erdora/rest/oai', 'http://catalog.clarin.eu/oai-harvester/Clarin_D_Repository_Tubingen.html'];;none +DARIAH;Electronic Archive of Carlo Emilio Gadda’s Works;http://www.ilc.cnr.it/CEG/;;;;;none +E-RIHS;European Research Platform on Ancient Materials IPANEMA;http://ipanema.cnrs.fr/;;;;;none +E-RIHS;FORTH - Laboratory of Geophysical - Satellite Remote Sensing and Archaeo-environment;http://ims.forth.gr/index_main.php?c=33&l=e&s=&p=1&d=7;;;;;none +CESSDA-SP;Finnish Social Science Data Archive;www.fsd.tuni.fi;['DDI - Data Documentation Initiative', 'Dublin Core'];['embargoed', 'open', 'restricted'];['http://services.fsd.tuni.fi/v0/oai'];;none +CESSDA-SP;Finnish Social Science Data Archive;www.fsd.tuni.fi;['DDI - Data Documentation Initiative', 'Dublin Core'];['embargoed', 'open', 'restricted'];['http://services.fsd.tuni.fi/v0/oai'];;none +E-RIHS;Foundation for Research and Technology - Hellas;https://www.forth.gr/index.php?l=e#;;;;;none +CESSDA-SP;GESIS - Leibniz Institute for the Social Sciences;http://www.gesis.org/;['DDI - Data Documentation Initiative', 'DataCite Metadata Schema'];['closed', 'embargoed', 'open', 'restricted'];['https://dbk.gesis.org/dbkoai/?verb=Identify'];https://search.gesis.org/research_data/ZA5950?doi=10.4232/1.12312;none +DARIAH;Geisteswissenschaftliches Asset Management System;http://gams.uni-graz.at;['Dublin Core'];['open', 'restricted'];['http://gams.uni-graz.at/oaiprovider?verb=Identify'];http://gams.uni-graz.at/o:corema.b2/DC;graz.at/oaiprovider/?verb=GetRecord&metadataPrefix=oai_dc&identifier=hdl:viewerPid +CESSDA-SP;Greek research infrastructure for the social sciences;http://sodanet.gr;['DDI - Data Documentation Initiative'];['open', 'restricted'];[];https://datacatalogue.sodanet.gr/citation?persistentId=doi:10.17903/FK2/L4WTVC;none +E-RIHS;Groningen Institute of Archaeology - Rijksuniversiteit Groningen;https://www.rug.nl/research/groningen-institute-of-archaeology/;;;;;none +E-RIHS;HELgroup;https://www.helgroup.com/;;;;;none +CLARIN;Hamburger Zentrum für Sprachkorpora;https://centres.clarin.eu/centre/9;['Dublin Core'];['open', 'restricted'];['http://corpora.uni-hamburg.de:8080/oai/provider', 'https://vlo.clarin.eu/data/clarin/results/cmdi/HZSK_Repository/'];https://corpora.uni-hamburg.de/repository/spoken-corpus:slc-1.0.0;none +DARIAH;Hebrew Text Database;https://shebanq.ancient-data.org/;;;;;none +E-RIHS;"Herança Cultural; Estudos e Salvaguardia - Universidade de Évora";http://www.hercules.uevora.pt/www.hercules.uevora.pt;;;;;none +E-RIHS;Heritage Laboratory;https://www.raa.se/in-english/cultural-heritage/heritage-laboratory/;;;;;none +DARIAH;History of Slovenia;http://www.sistory.si/;;;;http://sistory.si/11686/37723;none +DARIAH;IAH online library catalogue and database;http://library.foi.hr/lib/index.php?B=561;;;;;none +E-RIHS;"Imaging and Sensing for Archaeology; Art History and Conservation - Nottingham Trent University";https://www.ntu.ac.uk/research/groups-and-centres/groups/isaac;;;;;none +CLARIN;Institut für Deutsche Sprache;https://centres.clarin.eu/centre/11;['Dublin Core'];['closed', 'open', 'restricted'];['http://repos.ids-mannheim.de/oaiprovider/'];;none +CLARIN;Institut für Maschinelle Sprachverarbeitung;https://centres.clarin.eu/centre/10;['Dublin Core'];['closed', 'open', 'restricted', 'restricted'];['https://vlo.clarin.eu/data/clarin/results/cmdi/IMS_Repository/', 'http://clarin04.ims.uni-stuttgart.de/oaiprovider/oai'];;none +E-RIHS;"Institute for Nuclear Research; Hungarian Academy of Sciences";http://w3.atomki.hu/index_en.html;;;;;none +E-RIHS;Institute of Theoretical and Applied Mechanics of the Czech Academy of Sciences;http://www.itam.cas.cz/;;;;;none +E-RIHS;Institute of the Protection of Cultural Heritage of Slovenia;https://www.zvkds.si/en;;;;;none +E-RIHS;Instituto de Química-Física Rocasolano;https://www.iqfr.csic.es/es/;;;;;none +E-RIHS;Instituto del Patrimonio Cultural de España;https://ipce.culturaydeporte.gob.es/inicio.html;;;;;none +CLARIN;Instituut voor de Nederlandse Taal;https://centres.clarin.eu/centre/22;['none'];['open', 'restricted'];['https://vlo.clarin.eu/data/Instituut_voor_Nederlandse_Lexicologie_INL_Metadata_Repository.html', 'https://portal.clarin.inl.nl/doc/information_about_deposition.pdf'];;none +E-RIHS;Istituto per i Beni Archeologici e Monumentali - Laboratorio di Fotogrammetria;http://www.ibam.cnr.it/en/2-non-categorizzato/158-photogrammetry-laboratory;;;;;none +E-RIHS;"Istituto per i Beni Archeologici e Monumentali - Laboratorio di di Topografia antica; Archeologia e Telerilevamento";http://www.ibam.cnr.it/2-non-categorizzato/152-laboratorio-di-topografia-antica-archeologia-e-telerilevamento;;;;;none +DARIAH;Italian WordNet;http://www.ilc.cnr.it/iwndb/iwndb_php/;;;;;none +E-RIHS;Koninklijk Instituut voor het Kunstpatrimonium;www.kikirpa.be;;;;;none +E-RIHS;Koninklijke Musea voor Kunst en Geschiedenis/Museés Royaux d’Art et d’Histoire;http://www.kmkg-mrah.be/fr/archives;;;;;none +CLARIN;LINDAT/CLARIN;https://centres.clarin.eu/centre/3;['none'];['open', 'restricted', 'restricted'];['https://centres.clarin.eu/oai_pmh'];https://lindat.mff.cuni.cz/repository/xmlui/handle/11234/1-3687;none +E-RIHS;Laboratoire de Recherche des Monuments Historiques;http://www.lrmh.fr/-Archives-70-.html;;;;;none +E-RIHS;Laboratory of Ion Beam Applications - Institute of Nuclear Research of the Hungarian Academy of Sciences;http://iba.atomki.hu/index_en.html;;;;;none +E-RIHS;Laboratório Nacional de Engenharia Civil;http://www.lnec.pt/en/;;;;;none +DARIAH;"Linguistic service; portal of many dictionaries";https://fran.si/;;;;;none +E-RIHS;MObile-laboratory VIsualization DAta;https://www.sciencedirect.com/science/article/pii/S1296207412000477;;;;;none +CLARIN;MPI for Psycholinguistics;https://centres.clarin.eu/centre/24;['none'];['open', 'restricted'];['https://archive.mpi.nl/oai2'];https://archive.mpi.nl/objects/lat:1839_00_0000_0000_0000_C816_3/datastreams/CMD/content?asOfDateTime=2018-03-02T11:00:00.000Z;none +CLARIN;Meertens Instituut/HUC;https://centres.clarin.eu/centre/23;['Dublin Core'];['embargoed', 'open', 'restricted', 'restricted'];['https://vlo.clarin.eu/data/Meertens_Institute_Metadata_Repository.html', 'http://www.meertens.knaw.nl/oai/oai_server.php', 'http://www.meertens.knaw.nl/oai/'];;none +DARIAH;NAKALA;https://www.nakala.fr/;['Dublin Core'];['open', 'restricted'];['https://documentation.huma-num.fr/content/14/141/en/how-to-use-nakala-.html', 'https://documentation.huma-num.fr/content/14/142/en/is-it-possible-to-upload-a-set-of-data-or-documents-in-nakala.html', 'https://www.nakala.fr/sparql'];;none +CESSDA-SP;NSD - Norwegian Centre for Research Data;http://www.nsd.no;['DDI - Data Documentation Initiative'];['closed', 'embargoed', 'open', 'restricted'];[];;none +E-RIHS;National Gallery;https://www.nationalgallery.org.uk/research/research-centre/archive;;;;;none +E-RIHS;National Institute for Heritage;https://patrimoniu.ro/;;;;;none +DARIAH;Online Digital Source and Annotation System;https://www.odsas.net/;;;;;none +DARIAH;Online dialectal resource;http://serverdbt.ilc.cnr.it/ALTWEB/;;;;;none +DARIAH;Online repository of texts in the field of Slavic studies;https://ispan.waw.pl/ireteslaw/;;;;https://ispan.waw.pl/ireteslaw/handle/20.500.12528/1906;none +DARIAH;Open Repository of Historical Sciences;http://lectorium.edu.pl/en/;;;;;none +E-RIHS;Opificio delle Pietre Dure - MIBAC;http://www.opificiodellepietredure.it/;;;;;none +CLARIN;PORTULAN CLARIN;https://centres.clarin.eu/centre/50;['none'];['open', 'restricted'];['https://portulanclarin.net/repository/oaipmh/?verb=Identify'];https://portulanclarin.net/repository/browse/yamcha-yet-another-multipurpose-chunk-annotator/ebf64a38d0e111e1a404080027e73ea2229d220cc2034f7989729aae7dfd25dd/;none +CESSDA-SP;PROGEDO Research Infrastructure;"http://www.progedo.fr; http://quetelet.progedo.fr/";['DDI - Data Documentation Initiative'];['closed', 'restricted'];[];;none +E-RIHS;Piattaforma Lessicografica Unica del Tesoro delle Origini;http://pluto.ovi.cnr.it/btv;;;;;none +DARIAH;Polish Literary Bibliography;https://pbl.ibl.waw.pl/;;;;;none +DARIAH;Portuguese Early Music Database;http://pemdatabase.eu/;;;;;none +CESSDA-SP;Portuguese Social Information Archive;http://www.apis.ics.ul.pt/;['DDI - Data Documentation Initiative', 'Dublin Core'];['embargoed', 'open', 'restricted'];[];;none +DARIAH;Repository of Institute of Slovenian Ethnology;http://isn3.zrc-sazu.si/etnofolk/OAI-2.0/oai.php?verb=ListRecords&metadataPrefix=eef;;;;;none +E-RIHS;Rijksdienst voor het Cultureel Erfgoed;https://www.cultureelerfgoed.nl/;;;;;none +DARIAH;Samuel Schwarz Library;http://bibliotecasamuelschwarz.fcsh.unl.pt/;;;;;none +DARIAH;Sapienza Digital Library;http://sapienzadigitallibrary.uniroma1.it/sito/;;;;;none +E-RIHS;Science for Life Laboratory;https://www.scilifelab.se/;['DataCite Metadata Schema', 'Dublin Core', 'RDF Data Cube Vocabulary'];['embargoed', 'open', 'restricted'];['https://docs.figshare.com'];https://scilifelab.figshare.com/articles/dataset/DNA_methylation_signatures_predict_cytogenetic_subtype_and_outcome_in_pediatric_acute_myeloid_leukemia_AML_/14666127/1;none +E-RIHS;Scientific Methodologies applied to Archaeology and Art - Università di Perugia;http://www.dcbb.unipg.it/23-risorse/137-smaart;;;;;none +CESSDA-SP;Slovak Archive of Social Data;http://sasd.sav.sk/sk/;;;;http://sasd.sav.sk/en/data_katalog_abs.php?id=sasd_2018001;none +CESSDA-SP;Social Science Data Archives;https://www.adp.fdv.uni-lj.si/;['DDI - Data Documentation Initiative'];['restricted'];[];http://www.adp.fdv.uni-lj.si/opisi/apc01/;none +CESSDA-SP;Social Sciences and Humanities Data Archive;https://www.sodha.be/;['DDI - Data Documentation Initiative', 'DataCite Metadata Schema', 'Dublin Core', 'OAI-ORE - Open Archives Initiative Object Reuse and Exchange', 'Repository-Developed Metadata Schemas'];['closed', 'embargoed', 'open', 'restricted'];[];https://www.sodha.be/citation?persistentId=doi:10.34934/DVN/GMZOXG;none +CLARIN;"Språkbanken; The Swedish language bank";https://centres.clarin.eu/centre/37;['none'];['open', 'restricted'];['http://www.lrec-conf.org/proceedings/lrec2012/pdf/248_Paper.pdf'];;none +E-RIHS;Stiftung Preussischer Kulturbesitz - Rathgen-Forschungslabor - Staatlichen Museen zu Berlin;https://www.smb.museum/museen-und-einrichtungen/rathgen-forschungslabor/service/dienstleistungen.html;;;;;none +CESSDA-SP;Swedish National Data Service;https://snd.gu.se/en;['DDI - Data Documentation Initiative', 'DataCite Metadata Schema', 'Dublin Core'];['open', 'restricted'];[];https://snd.gu.se/catalogue/study/2021-198/1/1;none +E-RIHS;Swedish National Heritage Board;https://www.raa.se/;;;;;none +CESSDA-SP;Swiss Centre of Expertise in the Social Sciences;http://forscenter.ch/en/;['DDI - Data Documentation Initiative'];['open', 'restricted'];[];https://forsbase.unil.ch/datasets/dataset-public-detail/10985/776/;none +E-RIHS;Synchrotron SOLEIL - Optimized Light Source of Intermediate Energy to LURE;https://www.synchrotron-soleil.fr/en;;;;;none +E-RIHS;Tesoro della Lingua Italiana delle Origini;http://tlio.ovi.cnr.it/TLIO/;;;;;none +CLARIN;The CLARIN Centre at University of Copenhagen;https://centres.clarin.eu/centre/14;['Repository-Developed Metadata Schemas'];['closed', 'open', 'restricted'];['http://clarin.dk/oaiprovider/'];https://repository.clarin.dk/repository/xmlui/handle/20.500.12115/45;http://repository.clarin.dk/repository/oai/request?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:repository.clarin.dk:viewerPid +CLARIN;The ILC4CLARIN Centre at the Institute for Computational Linguistics;https://centres.clarin.eu/centre/34;['Dublin Core'];['closed', 'embargoed', 'open', 'restricted'];['http://dspace-clarin-it.ilc.cnr.it/repository/oai/openaire_data', 'http://dspace-clarin-it.ilc.cnr.it/repository/oai/request'];https://dspace-clarin-it.ilc.cnr.it/repository/xmlui/handle/20.500.11752/OPEN-556;http://dspace-clarin-it.ilc.cnr.it/repository/oai/request?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:dspace-clarin-it.ilc.cnr.it:viewerPid +DARIAH;The ISIDORE research platform;https://isidore.science/;['Dublin Core'];['open'];['https://isidore.science/api', 'https://isidore.science/sparql'];;none +DARIAH;The Institutional Repository of the Universidade Nova de Lisboa;https://run.unl.pt/;;;;https://run.unl.pt/handle/10362/119340;none +CLARIN;The Language Bank of Finland;https://centres.clarin.eu/centre/17;['Dublin Core'];['closed', 'open', 'restricted'];['https://kielipankki.fi/md_api/que', 'https://www.kielipankki.fi/support/korpapi/'];;none +DARIAH;The TextGrid Repository is a digital preservation archive for human sciences research data;https://textgridrep.org/;['Dublin Core'];['open'];['https://www.textgridlab.org/doc/services/', 'https://www.textgridlab.org/doc/services/'];;none +DARIAH;The open archive HAL;https://hal.archives-ouvertes.fr/;;;;;none +DARIAH;This database of philosophical texts of the modern age;http://lie11.let.uniroma1.it:8777/iliesi/home.htm;;;;;none +CESSDA-SP;Tárki Data Archive;http://www.tarki.hu/en/;['none'];['restricted'];[];;none +CESSDA-SP;UK Data Service;http://ukdataservice.ac.uk;['none'];['closed', 'open', 'restricted', 'restricted', 'restricted'];['https://www.ukdataservice.ac.uk/media/455425/iassist_publicapisjws_1-0.pdf'];https://beta.ukdataservice.ac.uk/datacatalogue/doi/?id=7111#1;none +E-RIHS;University College London;https://www.ucl.ac.uk/;['DataCite Metadata Schema', 'Dublin Core'];['embargoed', 'open'];[];;none +E-RIHS;"University of Ljubljana; Faculty of Chemistry and Chemical Technology";https://www.uni-lj.si/academies_and_faculties/faculties/2013071111393229/;;;;;none +CLARIN;Universität des Saarlandes;https://centres.clarin.eu/centre/13;['Dublin Core'];['open', 'restricted'];['https://vlo.clarin.eu/data/UdS_CLARIN_D_Repository.html', 'http://fedora.clarin-d.uni-saarland.de/oaiprovider/'];;none +E-RIHS;Université de Lille;https://www.univ-lille.fr/home/;;;;https://lilloa.univ-lille.fr/handle/20.500.12210/9110;none +E-RIHS;Vrije Universiteit Amsterdam;https://www.vu.nl/en/;;;;;none