changes for handles
This commit is contained in:
parent
77c5fc1515
commit
7fbb65680c
|
@ -36,7 +36,7 @@ public class SwaggerConfig {
|
||||||
@Bean
|
@Bean
|
||||||
public Docket api() {
|
public Docket api() {
|
||||||
return new Docket(DocumentationType.SWAGGER_2)
|
return new Docket(DocumentationType.SWAGGER_2)
|
||||||
.groupName("citationservice-api")
|
.groupName("SSHOC-citationservice-api")
|
||||||
.apiInfo(apiInfo())
|
.apiInfo(apiInfo())
|
||||||
.select()
|
.select()
|
||||||
//.apis(!(RequestHandlerSelectors.withClassAnnotation(JsonIgnore.class))
|
//.apis(!(RequestHandlerSelectors.withClassAnnotation(JsonIgnore.class))
|
||||||
|
|
|
@ -47,7 +47,7 @@ public class CitationHarvester {
|
||||||
}*/
|
}*/
|
||||||
}
|
}
|
||||||
|
|
||||||
@ApiOperation(value = "Returns a list of citations from specific citation source",
|
@ApiOperation(value = "Returns a list of citations from specific citation source (implementation in progress)",
|
||||||
notes = "A client with a valid identifier can invoke this web service to obtain a list of citations from a specified source",
|
notes = "A client with a valid identifier can invoke this web service to obtain a list of citations from a specified source",
|
||||||
response = String.class)
|
response = String.class)
|
||||||
@RequestMapping(value="/citharvester/getcitationlist", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
@RequestMapping(value="/citharvester/getcitationlist", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
||||||
|
@ -66,7 +66,7 @@ public class CitationHarvester {
|
||||||
return wfc.getCitation(sourceid, pid, token).toString();
|
return wfc.getCitation(sourceid, pid, token).toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ApiOperation(value = "Returns a metadata of a citation via Content Negotiated requests",
|
@ApiOperation(value = "Returns a metadata record of a citation via Content Negotiated requests",
|
||||||
notes = "A client with a valid identifier can invoke this web service to to retrieve the citation metadata using DOI content negotiated requests",
|
notes = "A client with a valid identifier can invoke this web service to to retrieve the citation metadata using DOI content negotiated requests",
|
||||||
response = String.class)
|
response = String.class)
|
||||||
@RequestMapping(value="/citharvester/getmetadatacn", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
@RequestMapping(value="/citharvester/getmetadatacn", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
||||||
|
@ -75,7 +75,7 @@ public class CitationHarvester {
|
||||||
return wfc.getCitationMetadata(pid, token).toString();
|
return wfc.getCitationMetadata(pid, token).toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ApiOperation(value = "Returns formatted citation using content negotiated request",
|
@ApiOperation(value = "Returns formatted citation using content negotiated requests",
|
||||||
notes = "A client with a valid identifier can invoke this web service to obtain a formatted citation, the text/bibliography content type is used ",
|
notes = "A client with a valid identifier can invoke this web service to obtain a formatted citation, the text/bibliography content type is used ",
|
||||||
response = String.class)
|
response = String.class)
|
||||||
@RequestMapping(value="/citharvester/getformcit", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
@RequestMapping(value="/citharvester/getformcit", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
||||||
|
@ -85,8 +85,8 @@ public class CitationHarvester {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ApiOperation(value = "Returns a metadata of a citation parsing the HTML landing page",
|
@ApiOperation(value = "Returns a metadata record for a citation searching in the available metadata repositories",
|
||||||
notes = "A client with a valid identifier can invoke this web service to obtain metadata of a citation by parsing its HTML landing page",
|
notes = "A client with a valid identifier can invoke this web service to obtain metadata of a citation by searching in the available metadata repositories (DOI repositories, landing pages, etc...) ",
|
||||||
response = String.class)
|
response = String.class)
|
||||||
@RequestMapping(value="/citharvester/getmetadatahtml", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
@RequestMapping(value="/citharvester/getmetadatahtml", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
||||||
public String getCitationMetadataFromHTML(@RequestParam(value="pid") String pid, @RequestParam(value="token") String token) {
|
public String getCitationMetadataFromHTML(@RequestParam(value="pid") String pid, @RequestParam(value="token") String token) {
|
||||||
|
|
|
@ -64,7 +64,7 @@ public class CitationService {
|
||||||
}
|
}
|
||||||
|
|
||||||
@ApiOperation(value = "Returns a list of citations",
|
@ApiOperation(value = "Returns a list of citations",
|
||||||
notes = "A client with a valid identifier can invoke this web service to obtain a list of citations stored in SSHOC citation repository",
|
notes = "A client with a valid identifier can invoke this web service to obtain a list of citations",
|
||||||
response = String.class)
|
response = String.class)
|
||||||
@RequestMapping(value="/citservice/getcitation", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
@RequestMapping(value="/citservice/getcitation", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
||||||
public String getServiceDescriptions(@RequestParam(value="sshocid") String sshocid, @RequestParam(value="token") String token) {
|
public String getServiceDescriptions(@RequestParam(value="sshocid") String sshocid, @RequestParam(value="token") String token) {
|
||||||
|
@ -72,8 +72,8 @@ public class CitationService {
|
||||||
return wfc.getServiceDescriptions(sshocid, token);
|
return wfc.getServiceDescriptions(sshocid, token);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ApiOperation(value = "Search for citation",
|
@ApiOperation(value = "Search for citation (implementation in progress)",
|
||||||
notes = "A client with a valid identifier can invoke this web service to search the SSHOC repository for citations",
|
notes = "A client with a valid identifier can invoke this web service to search for citations",
|
||||||
response = String.class)
|
response = String.class)
|
||||||
@RequestMapping(value="/citservice/searchcitation", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
@RequestMapping(value="/citservice/searchcitation", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
||||||
public String searchServiceDescriptions(@RequestParam(value="sshocid") String sshocid, @RequestParam(value="query") String query, @RequestParam(value="token") String token){
|
public String searchServiceDescriptions(@RequestParam(value="sshocid") String sshocid, @RequestParam(value="query") String query, @RequestParam(value="token") String token){
|
||||||
|
@ -81,8 +81,8 @@ public class CitationService {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ApiOperation(value = "Save a citation",
|
@ApiOperation(value = "Save a citation (implementation in progress)",
|
||||||
notes = "A client with a valid identifier can invoke this web service to save the citation in the SSHOC repository",
|
notes = "A client with a valid identifier can invoke this web service to save the citation",
|
||||||
response = String.class)
|
response = String.class)
|
||||||
@RequestMapping(value="/citservice/savecitation", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
@RequestMapping(value="/citservice/savecitation", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
||||||
public String saveCitations(@RequestParam(value="sshocid") String sshocid, @RequestParam(value="description") String description, @RequestParam(value="token") String token){
|
public String saveCitations(@RequestParam(value="sshocid") String sshocid, @RequestParam(value="description") String description, @RequestParam(value="token") String token){
|
||||||
|
@ -90,7 +90,7 @@ public class CitationService {
|
||||||
|
|
||||||
return wfc.saveCitations(sshocid, description, token);
|
return wfc.saveCitations(sshocid, description, token);
|
||||||
}
|
}
|
||||||
@ApiOperation(value = "Search for citations ",
|
@ApiOperation(value = "Search for citations (implementation in progress)",
|
||||||
notes = "A client with a valid identifier can invoke this web service to search the SSHOC repository for citations",
|
notes = "A client with a valid identifier can invoke this web service to search the SSHOC repository for citations",
|
||||||
response = String.class)
|
response = String.class)
|
||||||
@RequestMapping(value="/citservice/searchcite", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
@RequestMapping(value="/citservice/searchcite", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
|
||||||
|
|
|
@ -4,7 +4,9 @@ import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.PrintWriter;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
import java.io.StringWriter;
|
||||||
import java.net.HttpURLConnection;
|
import java.net.HttpURLConnection;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.ProtocolException;
|
import java.net.ProtocolException;
|
||||||
|
@ -22,15 +24,28 @@ import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
|
import eu.sshoc.citation.service.wfconfigurator.util.Consts;
|
||||||
import eu.sshoc.citation.service.wfconfigurator.util.HeuristicParsers;
|
import eu.sshoc.citation.service.wfconfigurator.util.HeuristicParsers;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public class CitationHarvesterImpl {
|
public class CitationHarvesterImpl {
|
||||||
|
|
||||||
|
HeuristicParsers heup= new HeuristicParsers();
|
||||||
|
StringWriter sw = new StringWriter();
|
||||||
|
PrintWriter pw = new PrintWriter(sw);
|
||||||
|
String sid="10.1007/s11082-018-1327-1"; //10.1126/science.169.3946.635
|
||||||
|
URL myURL;
|
||||||
|
Boolean getCNCit=true;
|
||||||
|
|
||||||
public CitationHarvesterImpl(){
|
public CitationHarvesterImpl(){
|
||||||
|
|
||||||
|
try {
|
||||||
|
URL myURL = new URL("https://doi.org/"+sid);
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
// TODO Auto-generated catch block
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,25 +100,46 @@ public class CitationHarvesterImpl {
|
||||||
return jsondata;
|
return jsondata;
|
||||||
}
|
}
|
||||||
public JSONObject getCitationMetadata(String pid, String token){
|
public JSONObject getCitationMetadata(String pid, String token){
|
||||||
JSONObject jsondata=null;
|
JSONObject jsondata=new JSONObject();
|
||||||
|
JSONObject jsonproperties=null;
|
||||||
|
//Consts myC= new Consts();
|
||||||
|
//String sid="10.1007/s11082-018-1327-1";
|
||||||
try {
|
try {
|
||||||
String sid="10.1007/s11082-018-1327-1";
|
|
||||||
if (pid!=null && pid.trim()!="" && !pid.trim().equalsIgnoreCase("test"))
|
if (pid!=null && pid.trim()!="" && !pid.trim().equalsIgnoreCase("test"))
|
||||||
sid=pid.trim();
|
sid=pid.trim();
|
||||||
URL myURL = new URL("https://doi.org/"+sid);
|
//URL myURL = new URL("https://doi.org/"+sid);
|
||||||
|
if (sid.startsWith("http://") || sid.startsWith("https://"))
|
||||||
|
myURL = new URL(sid);
|
||||||
|
|
||||||
HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection();
|
HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection();
|
||||||
myURLConnection.setRequestProperty("Accept", "application/rdf+xml;q=0.5, application/vnd.citationstyles.csl+json;q=1.0");
|
myURLConnection.setRequestProperty("Accept", "application/rdf+xml;q=0.5, application/vnd.citationstyles.csl+json;q=1.0");
|
||||||
|
myURLConnection.setConnectTimeout(18000);
|
||||||
InputStream mis = myURLConnection.getInputStream();
|
InputStream mis = myURLConnection.getInputStream();
|
||||||
BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8")));
|
BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8")));
|
||||||
String jsonText = readAll(rd);
|
String jsonText = readAll(rd);
|
||||||
jsondata = new JSONObject(jsonText);
|
jsonproperties = new JSONObject(jsonText);
|
||||||
|
//clean object
|
||||||
|
for (String field : Consts.unusedFields) {
|
||||||
|
if (jsonproperties.has(field))
|
||||||
|
jsonproperties.remove(field);
|
||||||
|
}
|
||||||
|
jsondata.put("properties", jsonproperties);
|
||||||
|
if (getCNCit)
|
||||||
|
jsondata.put("citation string", getCitationCSL(pid, token).get("citation string"));
|
||||||
System.out.println(jsondata.toString());
|
System.out.println(jsondata.toString());
|
||||||
//JSONObject jsoncit = new JSONObject();
|
//JSONObject jsoncit = new JSONObject();
|
||||||
//jsondata.put("citation", getStandardCitation("https://doi.org/"+sid));
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
||||||
e.printStackTrace();
|
//e.printStackTrace();
|
||||||
|
|
||||||
|
e.printStackTrace(pw);
|
||||||
|
String sStackTrace = sw.toString(); // stack trace as a string
|
||||||
|
if (sStackTrace.length()>800)
|
||||||
|
System.out.println(sStackTrace.substring(0, 799));
|
||||||
|
pw.flush();
|
||||||
|
System.out.println ("***************** ("+sid+") getCitationMetadata, content negotiation not available, maybe later?");
|
||||||
|
//return jsondata;
|
||||||
}
|
}
|
||||||
|
|
||||||
return jsondata;
|
return jsondata;
|
||||||
|
@ -112,32 +148,88 @@ public class CitationHarvesterImpl {
|
||||||
public JSONObject getCitationCSL(String pid, String token){
|
public JSONObject getCitationCSL(String pid, String token){
|
||||||
String jsonText="";
|
String jsonText="";
|
||||||
JSONObject jsondata=null;
|
JSONObject jsondata=null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
String sid="10.1007/s11082-018-1327-1"; //10.1126/science.169.3946.635
|
|
||||||
if (pid!=null && pid.trim()!="" && !pid.trim().equalsIgnoreCase("test"))
|
if (pid!=null && pid.trim()!="" && !pid.trim().equalsIgnoreCase("test"))
|
||||||
sid=pid.trim();
|
sid=pid.trim();
|
||||||
URL myURL = new URL("https://doi.org/"+sid);
|
|
||||||
|
if (sid.startsWith("http://") || sid.startsWith("https://")) {
|
||||||
|
if (sid.startsWith("http://")) {
|
||||||
|
sid=sid.replace("http://", "https://");
|
||||||
|
}
|
||||||
|
myURL = new URL(sid);
|
||||||
|
}
|
||||||
//URL myURL = new URL("https://doi.org/10.1126/science.169.3946.635");
|
//URL myURL = new URL("https://doi.org/10.1126/science.169.3946.635");
|
||||||
HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection();
|
HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection();
|
||||||
myURLConnection.setRequestProperty("Accept", "text/x-bibliography; style=harvard3; locale=en-EN");
|
myURLConnection.setRequestProperty("Accept", "text/x-bibliography");
|
||||||
|
myURLConnection.setConnectTimeout(18000); //set timeout to 18 seconds
|
||||||
InputStream mis = myURLConnection.getInputStream();
|
InputStream mis = myURLConnection.getInputStream();
|
||||||
BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8")));
|
BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8")));
|
||||||
jsonText = readAll(rd);
|
jsonText = readAll(rd);
|
||||||
|
|
||||||
System.out.println(jsonText);
|
System.out.println("citation string "+ jsonText);
|
||||||
jsondata = new JSONObject();
|
jsondata = new JSONObject();
|
||||||
jsondata.put("citation", jsonText);
|
if (!jsonText.isEmpty())
|
||||||
|
jsondata.put("citation string", jsonText);
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
||||||
e.printStackTrace();
|
e.printStackTrace(pw);
|
||||||
|
String sStackTrace = sw.toString(); // stack trace as a string
|
||||||
|
if (sStackTrace.length()>200)
|
||||||
|
System.out.println(sStackTrace.substring(0, 199));
|
||||||
|
System.out.println ("####################### ("+pid+") getCitationCSL, citation by content negotiation not available");
|
||||||
}
|
}
|
||||||
|
|
||||||
return jsondata;
|
return jsondata;
|
||||||
}
|
}
|
||||||
public JSONObject getCitationMetadataFromHTML(String pid, String token){
|
public JSONObject getCitationMetadataFromHTML(String pid, String token){
|
||||||
HeuristicParsers heup= new HeuristicParsers();
|
|
||||||
return heup.getZenodoMetadata(pid);
|
JSONObject jsondata=null;
|
||||||
|
|
||||||
|
if (pid.startsWith("http://")) {
|
||||||
|
pid=pid.replace("http://", "https://");
|
||||||
|
}
|
||||||
|
|
||||||
|
//first landing pages
|
||||||
|
|
||||||
|
System.out.println("getZenodoMetadataJSONLD ("+pid+")");
|
||||||
|
jsondata=heup.getZenodoMetadataJSONLD(pid);
|
||||||
|
|
||||||
|
if (jsondata==null || jsondata.length()==0) {
|
||||||
|
System.out.println("getMetaMetadata ("+pid+")");
|
||||||
|
jsondata=heup.getMetaMetadata(pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (jsondata==null || jsondata.length()==0) {
|
||||||
|
System.out.println("getLinkMetadata ("+pid+")");
|
||||||
|
jsondata= heup.getLinkMetadata(pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
//then DOI service providers...
|
||||||
|
|
||||||
|
if (jsondata==null || jsondata.length()==0) {
|
||||||
|
getCNCit=true;
|
||||||
|
System.out.println("getCitationMetadata ("+pid+")");
|
||||||
|
jsondata=getCitationMetadata(pid, token);
|
||||||
|
getCNCit=true;
|
||||||
|
}
|
||||||
|
if (jsondata==null || jsondata.length()==0) {
|
||||||
|
System.out.println("getCitationCSL ("+pid+")");
|
||||||
|
jsondata=getCitationCSL(pid, token);
|
||||||
|
}
|
||||||
|
if (jsondata==null || jsondata.length()==0) {
|
||||||
|
try {
|
||||||
|
jsondata=new JSONObject();
|
||||||
|
jsondata.put("citation string", "na");
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// TODO Auto-generated catch block
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return jsondata;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2020 VRE4EIC Consortium
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*******************************************************************************/
|
||||||
|
package eu.sshoc.citation.service.wfconfigurator.util;
|
||||||
|
|
||||||
|
import java.util.Vector;
|
||||||
|
|
||||||
|
public class Consts {
|
||||||
|
|
||||||
|
public static Vector <String> unusedFields=new <String> Vector();
|
||||||
|
public static Vector <String> dcterms=new <String> Vector();
|
||||||
|
|
||||||
|
public Consts () {
|
||||||
|
unusedFields.add("published-print");
|
||||||
|
unusedFields.add("published-online");
|
||||||
|
unusedFields.add("is-referenced-by-count");
|
||||||
|
|
||||||
|
unusedFields.add("journal-issue");
|
||||||
|
unusedFields.add("content-domain");
|
||||||
|
unusedFields.add("alternative-id");
|
||||||
|
unusedFields.add("relation");
|
||||||
|
unusedFields.add("score");
|
||||||
|
|
||||||
|
unusedFields.add("member");
|
||||||
|
unusedFields.add("inference_count");
|
||||||
|
unusedFields.add("assertion");
|
||||||
|
unusedFields.add("issue");
|
||||||
|
unusedFields.add("indexed");
|
||||||
|
//dcterms
|
||||||
|
dcterms.add("dcterms:title");
|
||||||
|
dcterms.add("dcterms:language");
|
||||||
|
dcterms.add("dcterms:accessRights");
|
||||||
|
dcterms.add("dcterms:creator");
|
||||||
|
dcterms.add("dcterms:service");
|
||||||
|
}
|
||||||
|
}
|
|
@ -15,17 +15,228 @@
|
||||||
*******************************************************************************/
|
*******************************************************************************/
|
||||||
package eu.sshoc.citation.service.wfconfigurator.util;
|
package eu.sshoc.citation.service.wfconfigurator.util;
|
||||||
|
|
||||||
|
import java.io.FileWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.io.StringWriter;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.Stack;
|
||||||
|
import java.util.Vector;
|
||||||
|
|
||||||
import org.json.JSONArray;
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONException;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
import javax.json.stream.JsonParser;
|
||||||
|
import javax.json.Json;
|
||||||
|
|
||||||
public class HeuristicParsers {
|
public class HeuristicParsers {
|
||||||
|
|
||||||
|
StringWriter sw = new StringWriter();
|
||||||
|
PrintWriter pw = new PrintWriter(sw);
|
||||||
|
|
||||||
|
public JSONObject getZenodoMetadataJSONLD(String pid) {
|
||||||
|
JSONObject jsondata=new JSONObject();
|
||||||
|
try {
|
||||||
|
//Document doc = Jsoup.connect(pid).get();
|
||||||
|
|
||||||
|
Document doc = SSLHelper.getConnection(pid).ignoreContentType(true)
|
||||||
|
.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").timeout(20 * 1000).get();
|
||||||
|
Elements cmdheader=doc.getElementsByTag("cmd:Header");
|
||||||
|
if (cmdheader!=null && cmdheader.size()>0) {
|
||||||
|
JSONObject talarproperties=new JSONObject();
|
||||||
|
talarproperties=getTalarData(doc);
|
||||||
|
if (talarproperties!=null)
|
||||||
|
jsondata.put("properties", talarproperties);
|
||||||
|
return jsondata;
|
||||||
|
}
|
||||||
|
|
||||||
|
//application/ld+json
|
||||||
|
Elements scripts = doc.getElementsByTag("script");
|
||||||
|
for (Element script: scripts) {//get metadata from <script>
|
||||||
|
String type = script.attr("type");
|
||||||
|
if (type!=null && !type.trim().isEmpty() &&
|
||||||
|
(type.trim().equalsIgnoreCase("application/ld+json") || type.trim().equalsIgnoreCase("application/json"))) {
|
||||||
|
|
||||||
|
String jsonStr=script.toString().trim();
|
||||||
|
int headerLimit=jsonStr.indexOf(">");
|
||||||
|
String tmpStr=jsonStr.trim().substring(headerLimit+1);
|
||||||
|
// String jsonStr=script.outerHtml();
|
||||||
|
// String tmpStr=jsonStr.replace("<script type=\"application/ld+json\">", "");
|
||||||
|
String jsonStrraw1=tmpStr.trim().replace("</script>", "");
|
||||||
|
String jsonStrraw = jsonStrraw1.replace("\r\n", "");
|
||||||
|
jsonStr = jsonStrraw.replace('\r', ' ');
|
||||||
|
//System.out.println(jsonStr);
|
||||||
|
//getGraph(jsonStr);
|
||||||
|
JSONObject jsonproperties=new JSONObject();
|
||||||
|
jsonproperties=new JSONObject(jsonStr.trim());
|
||||||
|
if (jsondata.length()>0 && jsondata.has("properties")) {
|
||||||
|
jsondata.put("additional_properties", jsonproperties);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
jsondata.put("properties", jsonproperties);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//get the citation string
|
||||||
|
|
||||||
|
Element citationid = doc.getElementById("invenio-csl");
|
||||||
|
//Elements citationclass = doc.getElementsByAttributeValue("class", "citation-select");
|
||||||
|
Element citationclass = doc.select("span.citation-select").first();
|
||||||
|
String citationStr="";
|
||||||
|
if (citationid!=null) {
|
||||||
|
Elements cits= citationid.getElementsByTag("invenio-csl");
|
||||||
|
|
||||||
|
Element cit=cits.first();
|
||||||
|
|
||||||
|
citationStr=cit.attr("ng-init");
|
||||||
|
}else {
|
||||||
|
if (citationclass!=null) {
|
||||||
|
citationStr= citationclass.text();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!citationStr.trim().isEmpty()) {
|
||||||
|
jsondata.put("citation string", citationStr.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
|
||||||
|
//e.printStackTrace();
|
||||||
|
e.printStackTrace(pw);
|
||||||
|
String sStackTrace = sw.toString(); // stack trace as a string
|
||||||
|
pw.flush();
|
||||||
|
if (sStackTrace.length()>1500)
|
||||||
|
System.out.println(sStackTrace.substring(0, 1499));
|
||||||
|
System.out.println ("("+pid+") not available");
|
||||||
|
}
|
||||||
|
|
||||||
|
return jsondata;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public JSONObject getLinkMetadata(String pid) {
|
||||||
|
|
||||||
|
JSONObject jsondata=new JSONObject();
|
||||||
|
|
||||||
|
JSONObject jsonproperties=new JSONObject();
|
||||||
|
try {
|
||||||
|
//Document doc = Jsoup.connect(pid).timeout(15 * 1000).get();
|
||||||
|
Document doc = SSLHelper.getConnection(pid).timeout(15 * 1000).get();
|
||||||
|
Elements links = doc.getElementsByTag("link");
|
||||||
|
for (Element link: links) {//get metadata from <link>
|
||||||
|
String rel = link.attr("rel");
|
||||||
|
String href= link.attr("href");
|
||||||
|
if (!rel.trim().isEmpty() &&
|
||||||
|
rel.trim().equalsIgnoreCase("metadata")){
|
||||||
|
|
||||||
|
jsonproperties.put(rel, href);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if (jsonproperties.length()>0)
|
||||||
|
jsondata.put("properties", jsonproperties);
|
||||||
|
System.out.println(jsondata);
|
||||||
|
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
|
||||||
|
e.printStackTrace(pw);
|
||||||
|
String sStackTrace = sw.toString(); // stack trace as a string
|
||||||
|
if (sStackTrace.length()>200)
|
||||||
|
System.out.println(sStackTrace.substring(0, 199));
|
||||||
|
pw.flush();
|
||||||
|
System.out.println ("&&&&&&&&&&&&&&&&&&&&&&&&& ("+pid+") link metadata not available");
|
||||||
|
}
|
||||||
|
|
||||||
|
return jsondata;
|
||||||
|
|
||||||
|
}
|
||||||
|
public JSONObject getMetaMetadata(String pid){
|
||||||
|
JSONObject jsondata=new JSONObject();
|
||||||
|
// JSONArray authors = new JSONArray();
|
||||||
|
JSONObject jsonproperties=new JSONObject();
|
||||||
|
|
||||||
|
HashMap<String, String> authors_affiliation = new HashMap<String, String>();
|
||||||
|
try {
|
||||||
|
//Document doc = Jsoup.connect(pid).timeout(10 * 1000).get();
|
||||||
|
Document doc = SSLHelper.getConnection(pid).timeout(15 * 1000).get();
|
||||||
|
//String title = doc.title();
|
||||||
|
Elements cmdheader=doc.getElementsByTag("cmd:Header");
|
||||||
|
if (cmdheader!=null && cmdheader.size()>0) {
|
||||||
|
JSONObject talarproperties=new JSONObject();
|
||||||
|
talarproperties=getTalarData(doc);
|
||||||
|
if (talarproperties!=null)
|
||||||
|
jsondata.put("properties", talarproperties);
|
||||||
|
return jsondata;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Elements metas = doc.getElementsByTag("meta");
|
||||||
|
for (Element meta: metas) {//get metadata from <meta>
|
||||||
|
String name = meta.attr("name");
|
||||||
|
String property = meta.attr("property");
|
||||||
|
String content = meta.attr("content");
|
||||||
|
if (!name.trim().isEmpty() &&
|
||||||
|
!content.trim().isEmpty() &&
|
||||||
|
(name.toLowerCase().trim().startsWith("dc.") ||
|
||||||
|
name.trim().startsWith("citation_"))){
|
||||||
|
|
||||||
|
jsonproperties.put(name, content);
|
||||||
|
}
|
||||||
|
if (name.trim().contains("citation_author")) {//to be used to double check authors
|
||||||
|
authors_affiliation.put(content, "na");
|
||||||
|
}
|
||||||
|
if (!property.trim().isEmpty() && !content.trim().isEmpty())
|
||||||
|
jsonproperties.put(property.trim(), content.trim());
|
||||||
|
|
||||||
|
}
|
||||||
|
//get metadata about authors and affiliation from <div vocab="http://schema.org/"> section
|
||||||
|
|
||||||
|
Element record = doc.select("div[vocab]").first();
|
||||||
|
//search for vocab
|
||||||
|
if (record!=null) {
|
||||||
|
Elements items = record.select("span[property]"); // span with property attribute
|
||||||
|
|
||||||
|
for (Element item: items) {
|
||||||
|
String name = item.attr("property");
|
||||||
|
String val=item.attr("value");
|
||||||
|
if (!name.trim().isEmpty() && !val.trim().isEmpty()) {
|
||||||
|
jsonproperties.put(name, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//<div class="citation-popup" data-style-name="harvard" title="Harvard Citation" style="display:none;">
|
||||||
|
Element citationstring= doc.select("div[data-style-name]").first();
|
||||||
|
if (citationstring!=null) {
|
||||||
|
jsondata.put("citation string", citationstring.text().trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (jsonproperties!=null && jsonproperties.length()>0)
|
||||||
|
jsondata.put("properties", jsonproperties);
|
||||||
|
|
||||||
|
if (jsondata==null || jsondata.length()==0) {
|
||||||
|
jsondata=getZenodoData(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
|
||||||
|
e.printStackTrace(pw);
|
||||||
|
String sStackTrace = sw.toString(); // stack trace as a string
|
||||||
|
if (sStackTrace.length()>200)
|
||||||
|
System.out.println(sStackTrace.substring(0, 199));
|
||||||
|
pw.flush();
|
||||||
|
System.out.println ("^^^^^^^^^^^^^^^^^^^^^ ("+pid+") meta metadata not available");
|
||||||
|
}
|
||||||
|
|
||||||
|
return jsondata;
|
||||||
|
}
|
||||||
public JSONObject getZenodoMetadata(String pid){
|
public JSONObject getZenodoMetadata(String pid){
|
||||||
JSONObject jsondata=new JSONObject();
|
JSONObject jsondata=new JSONObject();
|
||||||
JSONArray authors = new JSONArray();
|
JSONArray authors = new JSONArray();
|
||||||
|
@ -70,7 +281,8 @@ public class HeuristicParsers {
|
||||||
}
|
}
|
||||||
|
|
||||||
jsondata.put("authors", authors);
|
jsondata.put("authors", authors);
|
||||||
//search for alternate link
|
|
||||||
|
|
||||||
|
|
||||||
Element head = doc.select("head").first();
|
Element head = doc.select("head").first();
|
||||||
Elements links= head.select("link[type]");
|
Elements links= head.select("link[type]");
|
||||||
|
@ -87,13 +299,419 @@ public class HeuristicParsers {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//get the citation string
|
||||||
|
|
||||||
|
Element citationid = doc.getElementById("invenio-csl");
|
||||||
|
|
||||||
|
Elements cits= citationid.getElementsByTag("invenio-csl");
|
||||||
|
|
||||||
|
Element cit=cits.first();
|
||||||
|
|
||||||
|
String citationStr=cit.attr("ng-init");
|
||||||
|
|
||||||
|
if (!citationStr.trim().isEmpty()) {
|
||||||
|
jsondata.put("citation string", citationStr.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
||||||
e.printStackTrace();
|
e.printStackTrace(pw);
|
||||||
|
String sStackTrace = sw.toString(); // stack trace as a string
|
||||||
|
if (sStackTrace.length()>200)
|
||||||
|
System.out.println(sStackTrace.substring(0, 199));
|
||||||
|
pw.flush();
|
||||||
|
System.out.println ("@@@@@@@@@@@@@@@("+pid+") zenodo metadata not available");
|
||||||
}
|
}
|
||||||
|
|
||||||
return jsondata;
|
return jsondata;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
private JSONObject getTalarData (Document doc) throws JSONException {
|
||||||
|
JSONObject taljsondata=new JSONObject();
|
||||||
|
//header
|
||||||
|
Elements cmdheader=doc.getElementsByTag("cmd:Header");
|
||||||
|
if (cmdheader!=null && cmdheader.size()>0) {
|
||||||
|
for (Element header: cmdheader) {
|
||||||
|
Element mdcreator=header.getElementsByTag("cmd:MdCreator").first();
|
||||||
|
Element creationdate=header.getElementsByTag("cmd:MdCreationDate").first();
|
||||||
|
Element MdSelfLink=header.getElementsByTag("cmd:MdSelfLink").first();
|
||||||
|
taljsondata.put("cmd:MdCreator", mdcreator.text());
|
||||||
|
taljsondata.put("cmd:MdCreationDate", creationdate.text());
|
||||||
|
taljsondata.put("cmd:MdSelfLink", MdSelfLink.text());
|
||||||
|
taljsondata.put("cmd:MdProfile", header.getElementsByTag("cmd:MdProfile").first().text());
|
||||||
|
taljsondata.put("cmd:MdCollectionDisplayName", header.getElementsByTag("cmd:MdCollectionDisplayName").first().text());
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//cmd:ResourceProxy
|
||||||
|
Elements resproxys=doc.getElementsByTag("cmd:ResourceProxy");
|
||||||
|
JSONArray proxyarray = new JSONArray();
|
||||||
|
for (Element proxy: resproxys) {
|
||||||
|
|
||||||
|
JSONObject jsonproxy=new JSONObject();
|
||||||
|
Element restype=proxy.getElementsByTag("cmd:ResourceType").first();
|
||||||
|
Element resref=proxy.getElementsByTag("cmd:Resourceref").first();
|
||||||
|
if (restype!=null) {
|
||||||
|
String mimetype=restype.attr("mimetype");
|
||||||
|
String rtype=restype.text();
|
||||||
|
jsonproxy.put("resource", rtype);
|
||||||
|
jsonproxy.put("type", mimetype);
|
||||||
|
}
|
||||||
|
if (resref!=null) {
|
||||||
|
String resid=resref.text();
|
||||||
|
jsonproxy.put("reference", resid);
|
||||||
|
}
|
||||||
|
|
||||||
|
//search for cmdp:TypeSpecificSizeInfo and cmdp:ResourceProxyInfo
|
||||||
|
|
||||||
|
String proxyid = proxy.attr("id");
|
||||||
|
Elements pinfo=doc.getElementsByAttributeValue("cmd:ref", proxyid);
|
||||||
|
JSONObject jsonproxyinfo=new JSONObject();
|
||||||
|
|
||||||
|
for (Element info:pinfo) {
|
||||||
|
String tname=info.tagName();
|
||||||
|
|
||||||
|
Elements infochildren=info.children();
|
||||||
|
JSONArray proxinfoyarray = new JSONArray();
|
||||||
|
for (Element infochild:infochildren) {
|
||||||
|
//System.out.println("tttname "+infochild.tagName());
|
||||||
|
JSONObject jsonproxyinfochildren=new JSONObject();
|
||||||
|
jsonproxyinfochildren.put(infochild.tagName(), infochild.text());
|
||||||
|
proxinfoyarray.put(jsonproxyinfochildren);
|
||||||
|
|
||||||
|
}
|
||||||
|
jsonproxyinfo.put(tname, proxinfoyarray);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
if (jsonproxyinfo!=null) {
|
||||||
|
jsonproxy.put("info", jsonproxyinfo);
|
||||||
|
}
|
||||||
|
if (jsonproxy !=null)
|
||||||
|
proxyarray.put(jsonproxy);
|
||||||
|
|
||||||
|
}
|
||||||
|
if (proxyarray!=null) {
|
||||||
|
taljsondata.put("cmd:ResourceProxyList", proxyarray);
|
||||||
|
}
|
||||||
|
|
||||||
|
//cmdp:GeneralInfo
|
||||||
|
Elements generalInfo=doc.getElementsByTag("cmdp:GeneralInfo");
|
||||||
|
proxyarray=new JSONArray();
|
||||||
|
for (Element ginfo: generalInfo) {
|
||||||
|
JSONObject jsoninfo=new JSONObject();
|
||||||
|
Element location=ginfo.getElementsByTag("cmdp:Location").first();
|
||||||
|
if (location!=null) {
|
||||||
|
Element address= location.getElementsByTag("cmdp:Address").first();
|
||||||
|
Element country= location.getElementsByTag("cmdp:Country").first();
|
||||||
|
jsoninfo.put("cmdp:Address", address.text());
|
||||||
|
if (country!=null) {
|
||||||
|
JSONObject jsoncountry=new JSONObject();
|
||||||
|
JSONObject jsoncountryname=new JSONObject();
|
||||||
|
Element countryname= country.getElementsByTag("cmdp:CountryName").first();
|
||||||
|
String xmllang = countryname.attr("xml:lang");
|
||||||
|
jsoncountryname.put("xml:lang", xmllang);
|
||||||
|
jsoncountryname.put("cmdp:CountryName", countryname.text());
|
||||||
|
String countrycoding=country.getElementsByTag("cmdp:CountryCoding").first().text();
|
||||||
|
jsoncountry.put("cmdp:CountryCoding", countrycoding);
|
||||||
|
jsoncountry.put("cmdp:Countryname", jsoncountryname);
|
||||||
|
jsoninfo.put("cmdp:Country", jsoncountry);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
taljsondata.put("cmdp:location", jsoninfo);
|
||||||
|
//Tags
|
||||||
|
Element tags=ginfo.getElementsByTag("cmdp:Tags").first();
|
||||||
|
proxyarray=new JSONArray();
|
||||||
|
if (tags!=null) {
|
||||||
|
Elements taglist = tags.getElementsByTag("cmdp:tag");
|
||||||
|
for (Element tag:taglist){
|
||||||
|
String taglang="";
|
||||||
|
JSONObject jsontag=new JSONObject();
|
||||||
|
taglang = tag.attr("xml:lang");
|
||||||
|
jsontag.put("xml:lang", taglang);
|
||||||
|
jsontag.put("cmdp:tag", tag.text());
|
||||||
|
proxyarray.put(jsontag);
|
||||||
|
}
|
||||||
|
taljsondata.put("cmdp:Tags", proxyarray);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
//cmdp:Creators
|
||||||
|
|
||||||
|
Element creators=doc.getElementsByTag("cmdp:Creators").first();
|
||||||
|
proxyarray=new JSONArray();
|
||||||
|
if (creators!=null) {
|
||||||
|
//System.out.println(creators.getElementsByTag("cmdp:Person").first().text());
|
||||||
|
Elements creatorlist = creators.getElementsByTag("cmdp:Person");//cmdp:Person
|
||||||
|
for (Element person:creatorlist){
|
||||||
|
JSONObject jsonperson=new JSONObject();
|
||||||
|
jsonperson.put("cmdp:firstName", person.getElementsByTag("cmdp:firstName").first().text());
|
||||||
|
jsonperson.put("cmdp:lastName", person.getElementsByTag("cmdp:lastName").first().text());
|
||||||
|
jsonperson.put("cmdp:role", person.getElementsByTag("cmdp:role").first().text());
|
||||||
|
proxyarray.put(jsonperson);
|
||||||
|
}
|
||||||
|
taljsondata.put("cmdp:Creators", proxyarray);
|
||||||
|
}
|
||||||
|
//cmdp:Descriptions
|
||||||
|
Element descriptions=doc.getElementsByTag("cmdp:Descriptions").first();
|
||||||
|
proxyarray=new JSONArray();
|
||||||
|
if (descriptions!=null) {
|
||||||
|
Elements desclist = descriptions.getElementsByTag("cmdp:Description");
|
||||||
|
for (Element description:desclist){
|
||||||
|
JSONObject jsondesc=new JSONObject();
|
||||||
|
jsondesc.put("cmdp:Description", description.getElementsByTag("cmdp:Description").first().text());
|
||||||
|
jsondesc.put("xml:lang", description.getElementsByTag("cmdp:Description").first().attr("xml:lang"));
|
||||||
|
proxyarray.put(jsondesc);
|
||||||
|
}
|
||||||
|
taljsondata.put("cmdp:Descriptions", proxyarray);
|
||||||
|
}
|
||||||
|
//cmdp:ResourceName
|
||||||
|
Element resourcename=doc.getElementsByTag("cmdp:ResourceName").first();
|
||||||
|
if (resourcename!=null) {
|
||||||
|
taljsondata.put("cmdp:ResourceName", resourcename.text());
|
||||||
|
}
|
||||||
|
//cmdp:ResourceTitle
|
||||||
|
Element resourcetitle=doc.getElementsByTag("cmdp:ResourceTitle").first();
|
||||||
|
if (resourcetitle!=null) {
|
||||||
|
JSONObject jsonrestitle=new JSONObject();
|
||||||
|
jsonrestitle.put("cmdp:ResourceName", resourcetitle.text());
|
||||||
|
jsonrestitle.put("xml:lang", resourcetitle.attr("xml:lang"));
|
||||||
|
taljsondata.put("cmdp:ResourceTitle", jsonrestitle);
|
||||||
|
}
|
||||||
|
//cmdp:LegalOwner
|
||||||
|
|
||||||
|
Elements legalowners=doc.getElementsByTag("cmdp:LegalOwner");
|
||||||
|
proxyarray=new JSONArray();
|
||||||
|
for (Element legalown: legalowners) {
|
||||||
|
if (legalown!=null) {
|
||||||
|
JSONObject jsonlo=new JSONObject();
|
||||||
|
jsonlo.put("cmdp:LegalOwner", legalown.text());
|
||||||
|
jsonlo.put("xml:lang", legalown.attr("xml:lang"));
|
||||||
|
proxyarray.put(jsonlo);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
taljsondata.put("cmdp:LegalOwner", proxyarray);
|
||||||
|
|
||||||
|
//cmdp:TimeCoverage
|
||||||
|
Elements timecoves=doc.getElementsByTag("cmdp:TimeCoverage");
|
||||||
|
proxyarray=new JSONArray();
|
||||||
|
for (Element timecov: timecoves) {
|
||||||
|
if (timecov!=null) {
|
||||||
|
JSONObject jsonlo=new JSONObject();
|
||||||
|
jsonlo.put("cmdp:TimeCoverage", timecov.text());
|
||||||
|
jsonlo.put("xml:lang", timecov.attr("xml:lang"));
|
||||||
|
proxyarray.put(jsonlo);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
taljsondata.put("cmdp:TimeCoverage", proxyarray);
|
||||||
|
|
||||||
|
return taljsondata;
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject getZenodoData(Document doc) throws JSONException{
|
||||||
|
JSONObject jsondata=new JSONObject();
|
||||||
|
Elements scripts = doc.getElementsByTag("script");
|
||||||
|
for (Element script: scripts) {//get metadata from <script>
|
||||||
|
String type = script.attr("type");
|
||||||
|
if (type!=null && !type.trim().isEmpty() &&
|
||||||
|
(type.trim().equalsIgnoreCase("application/ld+json") || type.trim().equalsIgnoreCase("application/json"))) {
|
||||||
|
|
||||||
|
String jsonStr=script.toString().trim();
|
||||||
|
int headerLimit=jsonStr.indexOf(">");
|
||||||
|
String tmpStr=jsonStr.trim().substring(headerLimit+1);
|
||||||
|
// String jsonStr=script.outerHtml();
|
||||||
|
// String tmpStr=jsonStr.replace("<script type=\"application/ld+json\">", "");
|
||||||
|
String jsonStrraw1=tmpStr.trim().replace("</script>", "");
|
||||||
|
String jsonStrraw = jsonStrraw1.replace("\r\n", "");
|
||||||
|
jsonStr = jsonStrraw.replace('\r', ' ');
|
||||||
|
//System.out.println(jsonStr);
|
||||||
|
//getGraph(jsonStr);
|
||||||
|
JSONObject jsonproperties=new JSONObject();
|
||||||
|
jsonproperties=new JSONObject(jsonStr.trim());
|
||||||
|
if (jsondata.length()>0 && jsondata.has("properties")) {
|
||||||
|
jsondata.put("additional_properties", jsonproperties);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
jsondata.put("properties", jsonproperties);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//get the citation string
|
||||||
|
|
||||||
|
Element citationid = doc.getElementById("invenio-csl");
|
||||||
|
//Elements citationclass = doc.getElementsByAttributeValue("class", "citation-select");
|
||||||
|
Element citationclass = doc.select("span.citation-select").first();
|
||||||
|
String citationStr="";
|
||||||
|
if (citationid!=null) {
|
||||||
|
Elements cits= citationid.getElementsByTag("invenio-csl");
|
||||||
|
|
||||||
|
Element cit=cits.first();
|
||||||
|
|
||||||
|
citationStr=cit.attr("ng-init");
|
||||||
|
}else {
|
||||||
|
if (citationclass!=null) {
|
||||||
|
citationStr= citationclass.text();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!citationStr.trim().isEmpty()) {
|
||||||
|
jsondata.put("citation string", citationStr.trim());
|
||||||
|
}
|
||||||
|
return jsondata;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject getGraph(String jsondata) {
|
||||||
|
JsonParser parser = Json.createParser(new StringReader(jsondata));
|
||||||
|
JSONArray jo = new JSONArray();
|
||||||
|
JSONArray links = new JSONArray();
|
||||||
|
JSONObject graph= new JSONObject();
|
||||||
|
String id="";
|
||||||
|
Stack<String> source = new Stack<>();
|
||||||
|
String target="";
|
||||||
|
boolean isarray=false;
|
||||||
|
int group=0;
|
||||||
|
int count=0;
|
||||||
|
try {
|
||||||
|
source.push("root");
|
||||||
|
while (parser.hasNext()) {
|
||||||
|
JsonParser.Event event = parser.next();
|
||||||
|
JSONObject joitem = new JSONObject();
|
||||||
|
JSONObject link = new JSONObject();
|
||||||
|
//target="";
|
||||||
|
switch(event) {
|
||||||
|
case START_ARRAY:
|
||||||
|
System.out.println("sa " + event.toString() +" - "+id+" "+group);
|
||||||
|
|
||||||
|
joitem.put("id", id+"_"+count);
|
||||||
|
joitem.put("name", id);
|
||||||
|
joitem.put("group", group);
|
||||||
|
joitem.put("value", id);
|
||||||
|
|
||||||
|
link.put("source", source.peek());
|
||||||
|
link.put("target", id+"_"+count);
|
||||||
|
link.put("value", 15);
|
||||||
|
links.put(link);
|
||||||
|
target="";
|
||||||
|
|
||||||
|
source.push(id+"_"+count);
|
||||||
|
group=group+1;
|
||||||
|
//id="";
|
||||||
|
isarray=true;
|
||||||
|
break;
|
||||||
|
case END_ARRAY:
|
||||||
|
group=group-1;
|
||||||
|
//source.push(id+"_"+count);
|
||||||
|
//isarray=false;
|
||||||
|
break;
|
||||||
|
case START_OBJECT:
|
||||||
|
System.out.println("so " + event.toString() +" - "+id+" "+group);
|
||||||
|
if (!isarray && !id.trim().isEmpty()) {
|
||||||
|
joitem.put("id", id+"_"+count);
|
||||||
|
joitem.put("name", id);
|
||||||
|
joitem.put("group", group);
|
||||||
|
joitem.put("value", id);
|
||||||
|
|
||||||
|
link.put("source", source.peek());
|
||||||
|
link.put("target", id+"_"+count);
|
||||||
|
link.put("value", 10);
|
||||||
|
links.put(link);
|
||||||
|
target="";
|
||||||
|
source.push(id+"_"+count);
|
||||||
|
group=group+1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (!isarray && id.trim().isEmpty())
|
||||||
|
group=group+1;
|
||||||
|
else {
|
||||||
|
isarray=false;
|
||||||
|
source.push(source.peek());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
break;
|
||||||
|
case END_OBJECT:
|
||||||
|
System.out.println("eo " + event.toString() +" - "+id+" "+group);
|
||||||
|
group=group-1;
|
||||||
|
|
||||||
|
if (!source.empty())
|
||||||
|
source.pop();
|
||||||
|
target="";
|
||||||
|
id="";
|
||||||
|
break;
|
||||||
|
case VALUE_FALSE:
|
||||||
|
case VALUE_NULL:
|
||||||
|
case VALUE_TRUE:
|
||||||
|
System.out.println("true " + event.toString());
|
||||||
|
break;
|
||||||
|
case KEY_NAME:
|
||||||
|
System.out.print("name " +event.toString() + " " +
|
||||||
|
parser.getString() + " - ");
|
||||||
|
|
||||||
|
id=parser.getString();
|
||||||
|
break;
|
||||||
|
case VALUE_STRING:
|
||||||
|
System.out.println("string " + event.toString() + " " +
|
||||||
|
parser.getString() +" "+group);
|
||||||
|
joitem.put("id", id+"_"+count);
|
||||||
|
joitem.put("name", id);
|
||||||
|
joitem.put("group", group);
|
||||||
|
joitem.put("value", parser.getString());
|
||||||
|
//id="";
|
||||||
|
target=id+"_"+count;
|
||||||
|
break;
|
||||||
|
case VALUE_NUMBER:
|
||||||
|
System.out.println("number " + event.toString() + " " +
|
||||||
|
parser.getString());
|
||||||
|
joitem.put("id", id+"_"+count);
|
||||||
|
joitem.put("name", id);
|
||||||
|
joitem.put("group", group);
|
||||||
|
joitem.put("value", parser.getString());
|
||||||
|
target=id+"_"+count;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(joitem.length()>0) {
|
||||||
|
jo.put(joitem);
|
||||||
|
count++;
|
||||||
|
if (!target.trim().isEmpty()) {
|
||||||
|
link.put("source", source.peek());
|
||||||
|
link.put("target", target);
|
||||||
|
link.put("value", 20);
|
||||||
|
links.put(link);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
JSONObject temp= new JSONObject();
|
||||||
|
temp.put("id", "root");
|
||||||
|
temp.put("name", "root");
|
||||||
|
temp.put("group", 0);
|
||||||
|
temp.put("value", "root");
|
||||||
|
jo.put(temp);
|
||||||
|
|
||||||
|
graph.put("nodes", jo);
|
||||||
|
graph.put("links", links);
|
||||||
|
|
||||||
|
}
|
||||||
|
catch(Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
System.out.println(graph.toString());
|
||||||
|
// System.out.println(links.toString());
|
||||||
|
try (FileWriter file = new FileWriter("mygraphtest.json")) {
|
||||||
|
|
||||||
|
file.write(graph.toString());
|
||||||
|
file.flush();
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
|
||||||
|
package eu.sshoc.citation.service.wfconfigurator.util;
|
||||||
|
|
||||||
|
import java.security.KeyManagementException;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
|
||||||
|
import javax.net.ssl.SSLContext;
|
||||||
|
import javax.net.ssl.SSLSocketFactory;
|
||||||
|
import javax.net.ssl.TrustManager;
|
||||||
|
import javax.net.ssl.X509TrustManager;
|
||||||
|
import java.security.cert.X509Certificate;
|
||||||
|
|
||||||
|
import org.jsoup.Connection;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
|
||||||
|
public class SSLHelper {
|
||||||
|
|
||||||
|
static public Connection getConnection(String url){
|
||||||
|
return Jsoup.connect(url).followRedirects(true).timeout(15000).sslSocketFactory(SSLHelper.socketFactory());
|
||||||
|
}
|
||||||
|
|
||||||
|
static private SSLSocketFactory socketFactory() {
|
||||||
|
TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() {
|
||||||
|
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
|
||||||
|
return new X509Certificate[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkClientTrusted(X509Certificate[] certs, String authType) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkServerTrusted(X509Certificate[] certs, String authType) {
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
|
||||||
|
try {
|
||||||
|
SSLContext sslContext = SSLContext.getInstance("SSL");
|
||||||
|
sslContext.init(null, trustAllCerts, new java.security.SecureRandom());
|
||||||
|
SSLSocketFactory result = sslContext.getSocketFactory();
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} catch (NoSuchAlgorithmException | KeyManagementException e) {
|
||||||
|
throw new RuntimeException("Failed to create a SSL socket factory", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -21,7 +21,6 @@
|
||||||
<li> <a href="http://139.91.183.48:8181/EVREMetadataServices">Metadata Service</a></li>
|
<li> <a href="http://139.91.183.48:8181/EVREMetadataServices">Metadata Service</a></li>
|
||||||
<li> <a href="http://www.unity-idm.eu/documentation/unity-1.9.4/manual.html#_how_to_work_">AAAI</a></li>
|
<li> <a href="http://www.unity-idm.eu/documentation/unity-1.9.4/manual.html#_how_to_work_">AAAI</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
</p>
|
|
||||||
<p></p>
|
<p></p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
<div class="footer">
|
<div class="footer">
|
||||||
|
|
||||||
<hr/>
|
<hr/>
|
||||||
<p><i>Powered by</i> <img alt="SSHOC logo" src="logo.png" width="80" height="50" style="vertical-align:middle"> </img></p>
|
<p><i>Powered by</i> <img alt="SSHOC logo" src="logo.png" width="80" height="50" style="vertical-align:middle"></img> </p>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue