diff --git a/src/main/java/eu/sshoc/citation/service/wfconfigurator/impl/CitationHarvesterImpl.java b/src/main/java/eu/sshoc/citation/service/wfconfigurator/impl/CitationHarvesterImpl.java index b4a35f4..eb422a8 100644 --- a/src/main/java/eu/sshoc/citation/service/wfconfigurator/impl/CitationHarvesterImpl.java +++ b/src/main/java/eu/sshoc/citation/service/wfconfigurator/impl/CitationHarvesterImpl.java @@ -114,7 +114,8 @@ public class CitationHarvesterImpl { HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection(); //myURLConnection.setRequestProperty("Accept", "application/rdf+xml;q=0.5, application/vnd.citationstyles.csl+json;q=1.0"); - myURLConnection.setRequestProperty("Accept", "application/vnd.citationstyles.csl+json, application/rdf+xml\\"); + //curl -LH "Accept: application/vnd.citationstyles.csl+json, application/rdf+xml" https://doi.org/10.1080/01930826.2016.1186969 + myURLConnection.setRequestProperty("Accept", "application/vnd.citationstyles.csl+json, application/rdf+xml"); myURLConnection.setConnectTimeout(18000); InputStream mis = myURLConnection.getInputStream(); BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8"))); @@ -208,7 +209,9 @@ public class CitationHarvesterImpl { JSONObject jsondata=null; - jsondata=heup.getAPIMetadata(pid); + if (pid.contains(".handle.net/")) { + jsondata=heup.getAPIMetadata(pid); + } if (pid.startsWith("http://")) { pid=pid.replace("http://", "https://"); } @@ -222,12 +225,35 @@ public class CitationHarvesterImpl { if (jsondata==null || jsondata.length()==0) { System.out.println("getZenodoMetadataJSONLD ("+pid+")"); jsondata=heup.getZenodoMetadataJSONLD(pid); + } if (jsondata==null || jsondata.length()==0) { System.out.println("getMetaMetadata ("+pid+")"); jsondata=heup.getMetaMetadata(pid); - } + if (jsondata!=null && jsondata.has("properties")){ + try { + JSONObject testp=(JSONObject) jsondata.get("properties"); + String citdoi=""; + if (testp.has("citation_doi") && !(pid.contains("doi.org/"))) {//check if there is metadata in DOI RA + + citdoi = testp.getString("citation_doi"); + if (citdoi.trim()!="" && !citdoi.contains("doi.org/")) { + citdoi="https://doi.org/"+citdoi.trim(); + } + if (citdoi.trim()!="") + pid=citdoi; + + } + + } catch (JSONException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } + + if (jsondata==null || jsondata.length()==0) { System.out.println("getLinkMetadata ("+pid+")"); @@ -249,6 +275,7 @@ public class CitationHarvesterImpl { // TODO Auto-generated catch block e1.printStackTrace(); } + if (pid.contains("doi.org") && (jsondata==null || jsondata.length()==0 || jsondata.isNull("citation string"))) { System.out.println("getCitationCSL ("+pid+")"); JSONObject jsoncsldata=null; @@ -256,14 +283,16 @@ public class CitationHarvesterImpl { if (jsondata==null) jsondata=jsoncsldata; else { - Iterator ite= jsoncsldata.keys(); - while (ite.hasNext()) { - String key=ite.next(); - try { - jsondata.put(key, jsoncsldata.get(key)); - } catch (JSONException e) { - // TODO Auto-generated catch block - e.printStackTrace(); + if(jsoncsldata!=null && jsoncsldata.length()>0) { + Iterator ite= jsoncsldata.keys(); + while (ite.hasNext()) { + String key=ite.next(); + try { + jsondata.put(key, jsoncsldata.get(key)); + } catch (JSONException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } } } } @@ -271,6 +300,25 @@ public class CitationHarvesterImpl { } + if(jsondata!=null && jsondata.has("properties") && pid.contains("doi.org")){ + + getCNCit=false; + JSONObject mydoijo=new JSONObject(); + mydoijo=getCitationMetadata(pid, token); + if (mydoijo!=null) { + try { + if (mydoijo.has("properties")) + jsondata.put("ra_properties", mydoijo.get("properties")); + + } catch (JSONException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + getCNCit=true; + + } + if (jsondata==null || jsondata.length()==0) { try { diff --git a/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/CSVHelper.java b/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/CSVHelper.java index 566dac4..88a1ecb 100644 --- a/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/CSVHelper.java +++ b/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/CSVHelper.java @@ -26,6 +26,11 @@ import java.net.URL; public class CSVHelper { static CSVHelper app = new CSVHelper(); + static BufferedReader br; + static CSVParser parser; + public CSVHelper() { + + } public static void main(String[] args) { try { @@ -62,7 +67,7 @@ public class CSVHelper { return new File(resource.toURI()); } } - + public CSVParser getRepoProfiles() throws URISyntaxException{ File repo=app.getFileFromResource("repoprofiles.csv"); @@ -79,7 +84,7 @@ public class CSVHelper { } return null; } - + public String getRepoApi(String repositoryurl) throws URISyntaxException{ File repo=app.getFileFromResource("repoprofiles.csv"); @@ -87,7 +92,7 @@ public class CSVHelper { BufferedReader br = new BufferedReader(new FileReader(repo)); CSVParser parser = CSVFormat.DEFAULT.withDelimiter(';').withHeader().parse(br); ) { - + int slashpos= repositoryurl.indexOf('/', 8); String repoid=repositoryurl.substring(0, slashpos); System.out.println("repoid "+repoid); @@ -101,7 +106,7 @@ public class CSVHelper { System.out.println(record.get("Repository name") +" - "+record.get("api")+" - "+record.get("viewer")); return record.get("viewer"); } - + } return "none"; @@ -111,5 +116,59 @@ public class CSVHelper { } return null; } + public String getRepoName(String repositoryurl) throws URISyntaxException{ + + File repo=app.getFileFromResource("repoprofiles.csv"); + try( + BufferedReader br = new BufferedReader(new FileReader(repo)); + CSVParser parser = CSVFormat.DEFAULT.withDelimiter(';').withHeader().parse(br); + ) { + + int slashpos= repositoryurl.indexOf('/', 8); + String repoid=repositoryurl.substring(0, slashpos); + //System.out.println("repoid "+repoid); + for(CSVRecord record : parser) { + String apiurl=record.get("api"); + if(record.get("Website").trim().contains(repoid) || apiurl.contains(repoid) || record.get("dns").trim().contains(repoid)) { + System.out.println(record.get("Repository name") +" - "+record.get("api")+" - "+record.get("viewer")); + return record.get("Repository name"); + } + + } + return "none"; + + } + catch (Exception e) { + System.out.println(e); + } + return "none"; + } + public String getRepoURL(String repositoryurl) throws URISyntaxException{ + + File repo=app.getFileFromResource("repoprofiles.csv"); + try( + BufferedReader br = new BufferedReader(new FileReader(repo)); + CSVParser parser = CSVFormat.DEFAULT.withDelimiter(';').withHeader().parse(br); + ) { + + int slashpos= repositoryurl.indexOf('/', 8); + String repoid=repositoryurl.substring(0, slashpos); + //System.out.println("repoid "+repoid); + for(CSVRecord record : parser) { + String apiurl=record.get("api"); + if(record.get("Website").trim().contains(repoid) || apiurl.contains(repoid) || record.get("dns").trim().contains(repoid)) { + System.out.println(record.get("Repository name") +" - "+record.get("api")+" - "+record.get("viewer")); + return record.get("Website"); + } + + } + return "none"; + + } + catch (Exception e) { + System.out.println(e); + } + return "none"; + } } diff --git a/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/HeuristicParsers.java b/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/HeuristicParsers.java index f63c3bd..f115195 100644 --- a/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/HeuristicParsers.java +++ b/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/HeuristicParsers.java @@ -50,6 +50,7 @@ public class HeuristicParsers { public JSONObject getAPIMetadata(String pid) { JSONObject metajsondata=new JSONObject(); JSONObject jsondata=new JSONObject(); + JSONObject jsonrepodata=new JSONObject(); try { //Check if it is a handle String dnsPid=""; @@ -73,10 +74,14 @@ public class HeuristicParsers { if (tmp.getString("type").contains("URL")) { JSONObject urlob=(JSONObject) tmp.get("data"); dnsPid=urlob.getString("value"); + } - } - + } } + + if (dnsPid.trim()=="") + return (jsondata); + System.out.println("pid "+dnsPid); String apiURL=repo.getRepoApi(dnsPid); @@ -103,6 +108,16 @@ public class HeuristicParsers { } } + + if (metajsondata!=null && metajsondata.length()>0) { + String rname=repo.getRepoName(dnsPid); + String rws=repo.getRepoURL(dnsPid); + if (rname!="none") + jsonrepodata.put("name", rname); + if (rws!="none") + jsonrepodata.put("URL", rws); + + } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -110,7 +125,8 @@ public class HeuristicParsers { if (metajsondata!=null && metajsondata.length()>0) { try { jsondata.put("citation string", "na"); - jsondata.put("metadata source", "Repository API"); + + jsondata.put("metadata source", jsonrepodata); jsondata.put("properties", metajsondata); } catch (JSONException e) { // TODO Auto-generated catch block @@ -125,7 +141,7 @@ public class HeuristicParsers { //Document doc = Jsoup.connect(pid).get(); Document doc = SSLHelper.getConnection(pid).ignoreContentType(true) - .userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").timeout(20 * 1000).get(); + .userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").referrer("http://www.google.com").timeout(20 * 1000).get(); Elements cmdheader=doc.getElementsByTag("cmd:Header"); if (cmdheader!=null && cmdheader.size()>0) { JSONObject talarproperties=new JSONObject(); @@ -135,10 +151,12 @@ public class HeuristicParsers { return jsondata; } + JSONObject jsonproperties=new JSONObject(); //application/ld+json Elements scripts = doc.getElementsByTag("script"); for (Element script: scripts) {//get metadata from