changes for handles

This commit is contained in:
Cesare Concordia 2021-06-01 11:15:35 +02:00
parent 77c5fc1515
commit 7fbb65680c
9 changed files with 946 additions and 144 deletions

View File

@ -36,7 +36,7 @@ public class SwaggerConfig {
@Bean
public Docket api() {
return new Docket(DocumentationType.SWAGGER_2)
.groupName("citationservice-api")
.groupName("SSHOC-citationservice-api")
.apiInfo(apiInfo())
.select()
//.apis(!(RequestHandlerSelectors.withClassAnnotation(JsonIgnore.class))

View File

@ -47,7 +47,7 @@ public class CitationHarvester {
}*/
}
@ApiOperation(value = "Returns a list of citations from specific citation source",
@ApiOperation(value = "Returns a list of citations from specific citation source (implementation in progress)",
notes = "A client with a valid identifier can invoke this web service to obtain a list of citations from a specified source",
response = String.class)
@RequestMapping(value="/citharvester/getcitationlist", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
@ -66,7 +66,7 @@ public class CitationHarvester {
return wfc.getCitation(sourceid, pid, token).toString();
}
@ApiOperation(value = "Returns a metadata of a citation via Content Negotiated requests",
@ApiOperation(value = "Returns a metadata record of a citation via Content Negotiated requests",
notes = "A client with a valid identifier can invoke this web service to to retrieve the citation metadata using DOI content negotiated requests",
response = String.class)
@RequestMapping(value="/citharvester/getmetadatacn", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
@ -75,7 +75,7 @@ public class CitationHarvester {
return wfc.getCitationMetadata(pid, token).toString();
}
@ApiOperation(value = "Returns formatted citation using content negotiated request",
@ApiOperation(value = "Returns formatted citation using content negotiated requests",
notes = "A client with a valid identifier can invoke this web service to obtain a formatted citation, the text/bibliography content type is used ",
response = String.class)
@RequestMapping(value="/citharvester/getformcit", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
@ -85,8 +85,8 @@ public class CitationHarvester {
}
@ApiOperation(value = "Returns a metadata of a citation parsing the HTML landing page",
notes = "A client with a valid identifier can invoke this web service to obtain metadata of a citation by parsing its HTML landing page",
@ApiOperation(value = "Returns a metadata record for a citation searching in the available metadata repositories",
notes = "A client with a valid identifier can invoke this web service to obtain metadata of a citation by searching in the available metadata repositories (DOI repositories, landing pages, etc...) ",
response = String.class)
@RequestMapping(value="/citharvester/getmetadatahtml", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
public String getCitationMetadataFromHTML(@RequestParam(value="pid") String pid, @RequestParam(value="token") String token) {

View File

@ -64,7 +64,7 @@ public class CitationService {
}
@ApiOperation(value = "Returns a list of citations",
notes = "A client with a valid identifier can invoke this web service to obtain a list of citations stored in SSHOC citation repository",
notes = "A client with a valid identifier can invoke this web service to obtain a list of citations",
response = String.class)
@RequestMapping(value="/citservice/getcitation", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
public String getServiceDescriptions(@RequestParam(value="sshocid") String sshocid, @RequestParam(value="token") String token) {
@ -72,8 +72,8 @@ public class CitationService {
return wfc.getServiceDescriptions(sshocid, token);
}
@ApiOperation(value = "Search for citation",
notes = "A client with a valid identifier can invoke this web service to search the SSHOC repository for citations",
@ApiOperation(value = "Search for citation (implementation in progress)",
notes = "A client with a valid identifier can invoke this web service to search for citations",
response = String.class)
@RequestMapping(value="/citservice/searchcitation", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
public String searchServiceDescriptions(@RequestParam(value="sshocid") String sshocid, @RequestParam(value="query") String query, @RequestParam(value="token") String token){
@ -81,8 +81,8 @@ public class CitationService {
return null;
}
@ApiOperation(value = "Save a citation",
notes = "A client with a valid identifier can invoke this web service to save the citation in the SSHOC repository",
@ApiOperation(value = "Save a citation (implementation in progress)",
notes = "A client with a valid identifier can invoke this web service to save the citation",
response = String.class)
@RequestMapping(value="/citservice/savecitation", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)
public String saveCitations(@RequestParam(value="sshocid") String sshocid, @RequestParam(value="description") String description, @RequestParam(value="token") String token){
@ -90,7 +90,7 @@ public class CitationService {
return wfc.saveCitations(sshocid, description, token);
}
@ApiOperation(value = "Search for citations ",
@ApiOperation(value = "Search for citations (implementation in progress)",
notes = "A client with a valid identifier can invoke this web service to search the SSHOC repository for citations",
response = String.class)
@RequestMapping(value="/citservice/searchcite", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE)

View File

@ -4,7 +4,9 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.ProtocolException;
@ -22,122 +24,212 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import eu.sshoc.citation.service.wfconfigurator.util.Consts;
import eu.sshoc.citation.service.wfconfigurator.util.HeuristicParsers;
public class CitationHarvesterImpl {
HeuristicParsers heup= new HeuristicParsers();
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
String sid="10.1007/s11082-018-1327-1"; //10.1126/science.169.3946.635
URL myURL;
Boolean getCNCit=true;
public CitationHarvesterImpl(){
try {
URL myURL = new URL("https://doi.org/"+sid);
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private static String readAll(Reader rd) throws IOException {
StringBuilder sb = new StringBuilder();
int cp;
while ((cp = rd.read()) != -1) {
sb.append((char) cp);
}
return sb.toString();
}
public static JSONObject readJsonFromUrl(String url) throws IOException, JSONException {
InputStream is = new URL(url).openStream();
try {
BufferedReader rd = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));
String jsonText = readAll(rd);
JSONObject json = new JSONObject(jsonText);
return json;
} finally {
is.close();
}
}
private static String getStandardCitation(String citurl) throws IOException{
URL myURL = new URL(citurl);
HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection();
myURLConnection.setRequestProperty("Accept", "text/x-bibliography; style=harvard3; locale=fr-FR");
InputStream mis = myURLConnection.getInputStream();
BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8")));
return (readAll(rd));
}
private static String readAll(Reader rd) throws IOException {
StringBuilder sb = new StringBuilder();
int cp;
while ((cp = rd.read()) != -1) {
sb.append((char) cp);
}
return sb.toString();
}
public static JSONObject readJsonFromUrl(String url) throws IOException, JSONException {
InputStream is = new URL(url).openStream();
try {
BufferedReader rd = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));
String jsonText = readAll(rd);
JSONObject json = new JSONObject(jsonText);
return json;
} finally {
is.close();
}
}
private static String getStandardCitation(String citurl) throws IOException{
URL myURL = new URL(citurl);
HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection();
myURLConnection.setRequestProperty("Accept", "text/x-bibliography; style=harvard3; locale=fr-FR");
InputStream mis = myURLConnection.getInputStream();
BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8")));
return (readAll(rd));
}
public String getCitationList(String id, String token){
return null;
}
public JSONObject getCitation(String id, String pid, String token){
JSONObject jsondata=null;
try {
String sid="10.1007/s11082-018-1327-1";
if (pid!=null && pid.trim()!="" && !pid.trim().equalsIgnoreCase("test"))
sid=pid.trim();
jsondata = readJsonFromUrl("https://api.test.datacite.org/dois/"+sid);
System.out.println(jsondata.toString());
System.out.println(jsondata.toString());
// System.out.println(json.get("id"));
} catch (IOException | JSONException e) {
e.printStackTrace();
}
return jsondata;
}
public JSONObject getCitationMetadata(String pid, String token){
JSONObject jsondata=null;
JSONObject jsondata=new JSONObject();
JSONObject jsonproperties=null;
//Consts myC= new Consts();
//String sid="10.1007/s11082-018-1327-1";
try {
String sid="10.1007/s11082-018-1327-1";
if (pid!=null && pid.trim()!="" && !pid.trim().equalsIgnoreCase("test"))
sid=pid.trim();
URL myURL = new URL("https://doi.org/"+sid);
//URL myURL = new URL("https://doi.org/"+sid);
if (sid.startsWith("http://") || sid.startsWith("https://"))
myURL = new URL(sid);
HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection();
myURLConnection.setRequestProperty("Accept", "application/rdf+xml;q=0.5, application/vnd.citationstyles.csl+json;q=1.0");
myURLConnection.setConnectTimeout(18000);
InputStream mis = myURLConnection.getInputStream();
BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8")));
String jsonText = readAll(rd);
jsondata = new JSONObject(jsonText);
String jsonText = readAll(rd);
jsonproperties = new JSONObject(jsonText);
//clean object
for (String field : Consts.unusedFields) {
if (jsonproperties.has(field))
jsonproperties.remove(field);
}
jsondata.put("properties", jsonproperties);
if (getCNCit)
jsondata.put("citation string", getCitationCSL(pid, token).get("citation string"));
System.out.println(jsondata.toString());
//JSONObject jsoncit = new JSONObject();
//jsondata.put("citation", getStandardCitation("https://doi.org/"+sid));
} catch (Exception e) {
e.printStackTrace();
//e.printStackTrace();
e.printStackTrace(pw);
String sStackTrace = sw.toString(); // stack trace as a string
if (sStackTrace.length()>800)
System.out.println(sStackTrace.substring(0, 799));
pw.flush();
System.out.println ("***************** ("+sid+") getCitationMetadata, content negotiation not available, maybe later?");
//return jsondata;
}
return jsondata;
}
public JSONObject getCitationCSL(String pid, String token){
String jsonText="";
JSONObject jsondata=null;
try {
String sid="10.1007/s11082-018-1327-1"; //10.1126/science.169.3946.635
if (pid!=null && pid.trim()!="" && !pid.trim().equalsIgnoreCase("test"))
sid=pid.trim();
URL myURL = new URL("https://doi.org/"+sid);
if (sid.startsWith("http://") || sid.startsWith("https://")) {
if (sid.startsWith("http://")) {
sid=sid.replace("http://", "https://");
}
myURL = new URL(sid);
}
//URL myURL = new URL("https://doi.org/10.1126/science.169.3946.635");
HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection();
myURLConnection.setRequestProperty("Accept", "text/x-bibliography; style=harvard3; locale=en-EN");
myURLConnection.setRequestProperty("Accept", "text/x-bibliography");
myURLConnection.setConnectTimeout(18000); //set timeout to 18 seconds
InputStream mis = myURLConnection.getInputStream();
BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8")));
jsonText = readAll(rd);
System.out.println(jsonText);
jsonText = readAll(rd);
System.out.println("citation string "+ jsonText);
jsondata = new JSONObject();
jsondata.put("citation", jsonText);
if (!jsonText.isEmpty())
jsondata.put("citation string", jsonText);
} catch (Exception e) {
e.printStackTrace();
e.printStackTrace(pw);
String sStackTrace = sw.toString(); // stack trace as a string
if (sStackTrace.length()>200)
System.out.println(sStackTrace.substring(0, 199));
System.out.println ("####################### ("+pid+") getCitationCSL, citation by content negotiation not available");
}
return jsondata;
}
public JSONObject getCitationMetadataFromHTML(String pid, String token){
HeuristicParsers heup= new HeuristicParsers();
return heup.getZenodoMetadata(pid);
JSONObject jsondata=null;
if (pid.startsWith("http://")) {
pid=pid.replace("http://", "https://");
}
//first landing pages
System.out.println("getZenodoMetadataJSONLD ("+pid+")");
jsondata=heup.getZenodoMetadataJSONLD(pid);
if (jsondata==null || jsondata.length()==0) {
System.out.println("getMetaMetadata ("+pid+")");
jsondata=heup.getMetaMetadata(pid);
}
if (jsondata==null || jsondata.length()==0) {
System.out.println("getLinkMetadata ("+pid+")");
jsondata= heup.getLinkMetadata(pid);
}
//then DOI service providers...
if (jsondata==null || jsondata.length()==0) {
getCNCit=true;
System.out.println("getCitationMetadata ("+pid+")");
jsondata=getCitationMetadata(pid, token);
getCNCit=true;
}
if (jsondata==null || jsondata.length()==0) {
System.out.println("getCitationCSL ("+pid+")");
jsondata=getCitationCSL(pid, token);
}
if (jsondata==null || jsondata.length()==0) {
try {
jsondata=new JSONObject();
jsondata.put("citation string", "na");
} catch (JSONException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return jsondata;
}
}

View File

@ -0,0 +1,48 @@
/*******************************************************************************
* Copyright (c) 2020 VRE4EIC Consortium
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package eu.sshoc.citation.service.wfconfigurator.util;
import java.util.Vector;
public class Consts {
public static Vector <String> unusedFields=new <String> Vector();
public static Vector <String> dcterms=new <String> Vector();
public Consts () {
unusedFields.add("published-print");
unusedFields.add("published-online");
unusedFields.add("is-referenced-by-count");
unusedFields.add("journal-issue");
unusedFields.add("content-domain");
unusedFields.add("alternative-id");
unusedFields.add("relation");
unusedFields.add("score");
unusedFields.add("member");
unusedFields.add("inference_count");
unusedFields.add("assertion");
unusedFields.add("issue");
unusedFields.add("indexed");
//dcterms
dcterms.add("dcterms:title");
dcterms.add("dcterms:language");
dcterms.add("dcterms:accessRights");
dcterms.add("dcterms:creator");
dcterms.add("dcterms:service");
}
}

View File

@ -15,85 +15,703 @@
*******************************************************************************/
package eu.sshoc.citation.service.wfconfigurator.util;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.HashMap;
import java.util.Stack;
import java.util.Vector;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import javax.json.stream.JsonParser;
import javax.json.Json;
public class HeuristicParsers {
public JSONObject getZenodoMetadata(String pid){
JSONObject jsondata=new JSONObject();
JSONArray authors = new JSONArray();
HashMap<String, String> authors_affiliation = new HashMap<String, String>();
try {
Document doc = Jsoup.connect(pid).get();
//String title = doc.title();
Elements metas = doc.getElementsByTag("meta");
for (Element meta: metas) {//get metadata from <meta>
String name = meta.attr("name");
String property = meta.attr("property");
String content = meta.attr("content");
if (!name.trim().isEmpty() &&
!content.trim().isEmpty() &&
!name.trim().contains("-site-verification") &&
!name.trim().contains("citation_author")) {
jsondata.put(name, content);
}
if (name.trim().contains("citation_author")) {//to be used to double check authors
authors_affiliation.put(content, "na");
}
if (!property.trim().isEmpty() && !content.trim().isEmpty())
jsondata.put(property.trim(), content.trim());
}
//get metadata about authors and affiliation from <div class="container record-detail"> section
Element recordDetail = doc.select("div.container.record-detail").first();
Elements affiliations = recordDetail.select("span[title]"); // span with title attribute
for (Element affiliation: affiliations) {
String organisation = affiliation.attr("title");
String auth=affiliation.text();
if (!organisation.trim().isEmpty() && !auth.trim().isEmpty()) {
JSONObject org = new JSONObject();
org.put("organisation", organisation);
org.put("author_name", auth);
authors.put(org);
}
}
jsondata.put("authors", authors);
//search for alternate link
Element head = doc.select("head").first();
Elements links= head.select("link[type]");
for (Element link:links) {
String rel=link.attr("rel");
String type= link.attr("type");
String href= link.attr("href");
if (rel.trim().equalsIgnoreCase("alternate")) {
JSONObject alt = new JSONObject();
alt.put("type", type);
alt.put("href", href);
jsondata.put("alternate", alt);
}
}
} catch (Exception e) {
e.printStackTrace();
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
public JSONObject getZenodoMetadataJSONLD(String pid) {
JSONObject jsondata=new JSONObject();
try {
//Document doc = Jsoup.connect(pid).get();
Document doc = SSLHelper.getConnection(pid).ignoreContentType(true)
.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").timeout(20 * 1000).get();
Elements cmdheader=doc.getElementsByTag("cmd:Header");
if (cmdheader!=null && cmdheader.size()>0) {
JSONObject talarproperties=new JSONObject();
talarproperties=getTalarData(doc);
if (talarproperties!=null)
jsondata.put("properties", talarproperties);
return jsondata;
}
return jsondata;
}
//application/ld+json
Elements scripts = doc.getElementsByTag("script");
for (Element script: scripts) {//get metadata from <script>
String type = script.attr("type");
if (type!=null && !type.trim().isEmpty() &&
(type.trim().equalsIgnoreCase("application/ld+json") || type.trim().equalsIgnoreCase("application/json"))) {
String jsonStr=script.toString().trim();
int headerLimit=jsonStr.indexOf(">");
String tmpStr=jsonStr.trim().substring(headerLimit+1);
// String jsonStr=script.outerHtml();
// String tmpStr=jsonStr.replace("<script type=\"application/ld+json\">", "");
String jsonStrraw1=tmpStr.trim().replace("</script>", "");
String jsonStrraw = jsonStrraw1.replace("\r\n", "");
jsonStr = jsonStrraw.replace('\r', ' ');
//System.out.println(jsonStr);
//getGraph(jsonStr);
JSONObject jsonproperties=new JSONObject();
jsonproperties=new JSONObject(jsonStr.trim());
if (jsondata.length()>0 && jsondata.has("properties")) {
jsondata.put("additional_properties", jsonproperties);
}
else {
jsondata.put("properties", jsonproperties);
}
}
}
//get the citation string
Element citationid = doc.getElementById("invenio-csl");
//Elements citationclass = doc.getElementsByAttributeValue("class", "citation-select");
Element citationclass = doc.select("span.citation-select").first();
String citationStr="";
if (citationid!=null) {
Elements cits= citationid.getElementsByTag("invenio-csl");
Element cit=cits.first();
citationStr=cit.attr("ng-init");
}else {
if (citationclass!=null) {
citationStr= citationclass.text();
}
}
if (!citationStr.trim().isEmpty()) {
jsondata.put("citation string", citationStr.trim());
}
}
catch (Exception e) {
//e.printStackTrace();
e.printStackTrace(pw);
String sStackTrace = sw.toString(); // stack trace as a string
pw.flush();
if (sStackTrace.length()>1500)
System.out.println(sStackTrace.substring(0, 1499));
System.out.println ("("+pid+") not available");
}
return jsondata;
}
public JSONObject getLinkMetadata(String pid) {
JSONObject jsondata=new JSONObject();
JSONObject jsonproperties=new JSONObject();
try {
//Document doc = Jsoup.connect(pid).timeout(15 * 1000).get();
Document doc = SSLHelper.getConnection(pid).timeout(15 * 1000).get();
Elements links = doc.getElementsByTag("link");
for (Element link: links) {//get metadata from <link>
String rel = link.attr("rel");
String href= link.attr("href");
if (!rel.trim().isEmpty() &&
rel.trim().equalsIgnoreCase("metadata")){
jsonproperties.put(rel, href);
}
}
if (jsonproperties.length()>0)
jsondata.put("properties", jsonproperties);
System.out.println(jsondata);
}
catch (Exception e) {
e.printStackTrace(pw);
String sStackTrace = sw.toString(); // stack trace as a string
if (sStackTrace.length()>200)
System.out.println(sStackTrace.substring(0, 199));
pw.flush();
System.out.println ("&&&&&&&&&&&&&&&&&&&&&&&&& ("+pid+") link metadata not available");
}
return jsondata;
}
public JSONObject getMetaMetadata(String pid){
JSONObject jsondata=new JSONObject();
// JSONArray authors = new JSONArray();
JSONObject jsonproperties=new JSONObject();
HashMap<String, String> authors_affiliation = new HashMap<String, String>();
try {
//Document doc = Jsoup.connect(pid).timeout(10 * 1000).get();
Document doc = SSLHelper.getConnection(pid).timeout(15 * 1000).get();
//String title = doc.title();
Elements cmdheader=doc.getElementsByTag("cmd:Header");
if (cmdheader!=null && cmdheader.size()>0) {
JSONObject talarproperties=new JSONObject();
talarproperties=getTalarData(doc);
if (talarproperties!=null)
jsondata.put("properties", talarproperties);
return jsondata;
}
Elements metas = doc.getElementsByTag("meta");
for (Element meta: metas) {//get metadata from <meta>
String name = meta.attr("name");
String property = meta.attr("property");
String content = meta.attr("content");
if (!name.trim().isEmpty() &&
!content.trim().isEmpty() &&
(name.toLowerCase().trim().startsWith("dc.") ||
name.trim().startsWith("citation_"))){
jsonproperties.put(name, content);
}
if (name.trim().contains("citation_author")) {//to be used to double check authors
authors_affiliation.put(content, "na");
}
if (!property.trim().isEmpty() && !content.trim().isEmpty())
jsonproperties.put(property.trim(), content.trim());
}
//get metadata about authors and affiliation from <div vocab="http://schema.org/"> section
Element record = doc.select("div[vocab]").first();
//search for vocab
if (record!=null) {
Elements items = record.select("span[property]"); // span with property attribute
for (Element item: items) {
String name = item.attr("property");
String val=item.attr("value");
if (!name.trim().isEmpty() && !val.trim().isEmpty()) {
jsonproperties.put(name, val);
}
}
}
//<div class="citation-popup" data-style-name="harvard" title="Harvard Citation" style="display:none;">
Element citationstring= doc.select("div[data-style-name]").first();
if (citationstring!=null) {
jsondata.put("citation string", citationstring.text().trim());
}
if (jsonproperties!=null && jsonproperties.length()>0)
jsondata.put("properties", jsonproperties);
if (jsondata==null || jsondata.length()==0) {
jsondata=getZenodoData(doc);
}
}
catch (Exception e) {
e.printStackTrace(pw);
String sStackTrace = sw.toString(); // stack trace as a string
if (sStackTrace.length()>200)
System.out.println(sStackTrace.substring(0, 199));
pw.flush();
System.out.println ("^^^^^^^^^^^^^^^^^^^^^ ("+pid+") meta metadata not available");
}
return jsondata;
}
public JSONObject getZenodoMetadata(String pid){
JSONObject jsondata=new JSONObject();
JSONArray authors = new JSONArray();
HashMap<String, String> authors_affiliation = new HashMap<String, String>();
try {
Document doc = Jsoup.connect(pid).get();
//String title = doc.title();
Elements metas = doc.getElementsByTag("meta");
for (Element meta: metas) {//get metadata from <meta>
String name = meta.attr("name");
String property = meta.attr("property");
String content = meta.attr("content");
if (!name.trim().isEmpty() &&
!content.trim().isEmpty() &&
!name.trim().contains("-site-verification") &&
!name.trim().contains("citation_author")) {
jsondata.put(name, content);
}
if (name.trim().contains("citation_author")) {//to be used to double check authors
authors_affiliation.put(content, "na");
}
if (!property.trim().isEmpty() && !content.trim().isEmpty())
jsondata.put(property.trim(), content.trim());
}
//get metadata about authors and affiliation from <div class="container record-detail"> section
Element recordDetail = doc.select("div.container.record-detail").first();
Elements affiliations = recordDetail.select("span[title]"); // span with title attribute
for (Element affiliation: affiliations) {
String organisation = affiliation.attr("title");
String auth=affiliation.text();
if (!organisation.trim().isEmpty() && !auth.trim().isEmpty()) {
JSONObject org = new JSONObject();
org.put("organisation", organisation);
org.put("author_name", auth);
authors.put(org);
}
}
jsondata.put("authors", authors);
Element head = doc.select("head").first();
Elements links= head.select("link[type]");
for (Element link:links) {
String rel=link.attr("rel");
String type= link.attr("type");
String href= link.attr("href");
if (rel.trim().equalsIgnoreCase("alternate")) {
JSONObject alt = new JSONObject();
alt.put("type", type);
alt.put("href", href);
jsondata.put("alternate", alt);
}
}
//get the citation string
Element citationid = doc.getElementById("invenio-csl");
Elements cits= citationid.getElementsByTag("invenio-csl");
Element cit=cits.first();
String citationStr=cit.attr("ng-init");
if (!citationStr.trim().isEmpty()) {
jsondata.put("citation string", citationStr.trim());
}
} catch (Exception e) {
e.printStackTrace(pw);
String sStackTrace = sw.toString(); // stack trace as a string
if (sStackTrace.length()>200)
System.out.println(sStackTrace.substring(0, 199));
pw.flush();
System.out.println ("@@@@@@@@@@@@@@@("+pid+") zenodo metadata not available");
}
return jsondata;
}
private JSONObject getTalarData (Document doc) throws JSONException {
JSONObject taljsondata=new JSONObject();
//header
Elements cmdheader=doc.getElementsByTag("cmd:Header");
if (cmdheader!=null && cmdheader.size()>0) {
for (Element header: cmdheader) {
Element mdcreator=header.getElementsByTag("cmd:MdCreator").first();
Element creationdate=header.getElementsByTag("cmd:MdCreationDate").first();
Element MdSelfLink=header.getElementsByTag("cmd:MdSelfLink").first();
taljsondata.put("cmd:MdCreator", mdcreator.text());
taljsondata.put("cmd:MdCreationDate", creationdate.text());
taljsondata.put("cmd:MdSelfLink", MdSelfLink.text());
taljsondata.put("cmd:MdProfile", header.getElementsByTag("cmd:MdProfile").first().text());
taljsondata.put("cmd:MdCollectionDisplayName", header.getElementsByTag("cmd:MdCollectionDisplayName").first().text());
}
}
//cmd:ResourceProxy
Elements resproxys=doc.getElementsByTag("cmd:ResourceProxy");
JSONArray proxyarray = new JSONArray();
for (Element proxy: resproxys) {
JSONObject jsonproxy=new JSONObject();
Element restype=proxy.getElementsByTag("cmd:ResourceType").first();
Element resref=proxy.getElementsByTag("cmd:Resourceref").first();
if (restype!=null) {
String mimetype=restype.attr("mimetype");
String rtype=restype.text();
jsonproxy.put("resource", rtype);
jsonproxy.put("type", mimetype);
}
if (resref!=null) {
String resid=resref.text();
jsonproxy.put("reference", resid);
}
//search for cmdp:TypeSpecificSizeInfo and cmdp:ResourceProxyInfo
String proxyid = proxy.attr("id");
Elements pinfo=doc.getElementsByAttributeValue("cmd:ref", proxyid);
JSONObject jsonproxyinfo=new JSONObject();
for (Element info:pinfo) {
String tname=info.tagName();
Elements infochildren=info.children();
JSONArray proxinfoyarray = new JSONArray();
for (Element infochild:infochildren) {
//System.out.println("tttname "+infochild.tagName());
JSONObject jsonproxyinfochildren=new JSONObject();
jsonproxyinfochildren.put(infochild.tagName(), infochild.text());
proxinfoyarray.put(jsonproxyinfochildren);
}
jsonproxyinfo.put(tname, proxinfoyarray);
}
if (jsonproxyinfo!=null) {
jsonproxy.put("info", jsonproxyinfo);
}
if (jsonproxy !=null)
proxyarray.put(jsonproxy);
}
if (proxyarray!=null) {
taljsondata.put("cmd:ResourceProxyList", proxyarray);
}
//cmdp:GeneralInfo
Elements generalInfo=doc.getElementsByTag("cmdp:GeneralInfo");
proxyarray=new JSONArray();
for (Element ginfo: generalInfo) {
JSONObject jsoninfo=new JSONObject();
Element location=ginfo.getElementsByTag("cmdp:Location").first();
if (location!=null) {
Element address= location.getElementsByTag("cmdp:Address").first();
Element country= location.getElementsByTag("cmdp:Country").first();
jsoninfo.put("cmdp:Address", address.text());
if (country!=null) {
JSONObject jsoncountry=new JSONObject();
JSONObject jsoncountryname=new JSONObject();
Element countryname= country.getElementsByTag("cmdp:CountryName").first();
String xmllang = countryname.attr("xml:lang");
jsoncountryname.put("xml:lang", xmllang);
jsoncountryname.put("cmdp:CountryName", countryname.text());
String countrycoding=country.getElementsByTag("cmdp:CountryCoding").first().text();
jsoncountry.put("cmdp:CountryCoding", countrycoding);
jsoncountry.put("cmdp:Countryname", jsoncountryname);
jsoninfo.put("cmdp:Country", jsoncountry);
}
}
taljsondata.put("cmdp:location", jsoninfo);
//Tags
Element tags=ginfo.getElementsByTag("cmdp:Tags").first();
proxyarray=new JSONArray();
if (tags!=null) {
Elements taglist = tags.getElementsByTag("cmdp:tag");
for (Element tag:taglist){
String taglang="";
JSONObject jsontag=new JSONObject();
taglang = tag.attr("xml:lang");
jsontag.put("xml:lang", taglang);
jsontag.put("cmdp:tag", tag.text());
proxyarray.put(jsontag);
}
taljsondata.put("cmdp:Tags", proxyarray);
}
}
//cmdp:Creators
Element creators=doc.getElementsByTag("cmdp:Creators").first();
proxyarray=new JSONArray();
if (creators!=null) {
//System.out.println(creators.getElementsByTag("cmdp:Person").first().text());
Elements creatorlist = creators.getElementsByTag("cmdp:Person");//cmdp:Person
for (Element person:creatorlist){
JSONObject jsonperson=new JSONObject();
jsonperson.put("cmdp:firstName", person.getElementsByTag("cmdp:firstName").first().text());
jsonperson.put("cmdp:lastName", person.getElementsByTag("cmdp:lastName").first().text());
jsonperson.put("cmdp:role", person.getElementsByTag("cmdp:role").first().text());
proxyarray.put(jsonperson);
}
taljsondata.put("cmdp:Creators", proxyarray);
}
//cmdp:Descriptions
Element descriptions=doc.getElementsByTag("cmdp:Descriptions").first();
proxyarray=new JSONArray();
if (descriptions!=null) {
Elements desclist = descriptions.getElementsByTag("cmdp:Description");
for (Element description:desclist){
JSONObject jsondesc=new JSONObject();
jsondesc.put("cmdp:Description", description.getElementsByTag("cmdp:Description").first().text());
jsondesc.put("xml:lang", description.getElementsByTag("cmdp:Description").first().attr("xml:lang"));
proxyarray.put(jsondesc);
}
taljsondata.put("cmdp:Descriptions", proxyarray);
}
//cmdp:ResourceName
Element resourcename=doc.getElementsByTag("cmdp:ResourceName").first();
if (resourcename!=null) {
taljsondata.put("cmdp:ResourceName", resourcename.text());
}
//cmdp:ResourceTitle
Element resourcetitle=doc.getElementsByTag("cmdp:ResourceTitle").first();
if (resourcetitle!=null) {
JSONObject jsonrestitle=new JSONObject();
jsonrestitle.put("cmdp:ResourceName", resourcetitle.text());
jsonrestitle.put("xml:lang", resourcetitle.attr("xml:lang"));
taljsondata.put("cmdp:ResourceTitle", jsonrestitle);
}
//cmdp:LegalOwner
Elements legalowners=doc.getElementsByTag("cmdp:LegalOwner");
proxyarray=new JSONArray();
for (Element legalown: legalowners) {
if (legalown!=null) {
JSONObject jsonlo=new JSONObject();
jsonlo.put("cmdp:LegalOwner", legalown.text());
jsonlo.put("xml:lang", legalown.attr("xml:lang"));
proxyarray.put(jsonlo);
}
}
taljsondata.put("cmdp:LegalOwner", proxyarray);
//cmdp:TimeCoverage
Elements timecoves=doc.getElementsByTag("cmdp:TimeCoverage");
proxyarray=new JSONArray();
for (Element timecov: timecoves) {
if (timecov!=null) {
JSONObject jsonlo=new JSONObject();
jsonlo.put("cmdp:TimeCoverage", timecov.text());
jsonlo.put("xml:lang", timecov.attr("xml:lang"));
proxyarray.put(jsonlo);
}
}
taljsondata.put("cmdp:TimeCoverage", proxyarray);
return taljsondata;
}
private JSONObject getZenodoData(Document doc) throws JSONException{
JSONObject jsondata=new JSONObject();
Elements scripts = doc.getElementsByTag("script");
for (Element script: scripts) {//get metadata from <script>
String type = script.attr("type");
if (type!=null && !type.trim().isEmpty() &&
(type.trim().equalsIgnoreCase("application/ld+json") || type.trim().equalsIgnoreCase("application/json"))) {
String jsonStr=script.toString().trim();
int headerLimit=jsonStr.indexOf(">");
String tmpStr=jsonStr.trim().substring(headerLimit+1);
// String jsonStr=script.outerHtml();
// String tmpStr=jsonStr.replace("<script type=\"application/ld+json\">", "");
String jsonStrraw1=tmpStr.trim().replace("</script>", "");
String jsonStrraw = jsonStrraw1.replace("\r\n", "");
jsonStr = jsonStrraw.replace('\r', ' ');
//System.out.println(jsonStr);
//getGraph(jsonStr);
JSONObject jsonproperties=new JSONObject();
jsonproperties=new JSONObject(jsonStr.trim());
if (jsondata.length()>0 && jsondata.has("properties")) {
jsondata.put("additional_properties", jsonproperties);
}
else {
jsondata.put("properties", jsonproperties);
}
}
}
//get the citation string
Element citationid = doc.getElementById("invenio-csl");
//Elements citationclass = doc.getElementsByAttributeValue("class", "citation-select");
Element citationclass = doc.select("span.citation-select").first();
String citationStr="";
if (citationid!=null) {
Elements cits= citationid.getElementsByTag("invenio-csl");
Element cit=cits.first();
citationStr=cit.attr("ng-init");
}else {
if (citationclass!=null) {
citationStr= citationclass.text();
}
}
if (!citationStr.trim().isEmpty()) {
jsondata.put("citation string", citationStr.trim());
}
return jsondata;
}
private JSONObject getGraph(String jsondata) {
JsonParser parser = Json.createParser(new StringReader(jsondata));
JSONArray jo = new JSONArray();
JSONArray links = new JSONArray();
JSONObject graph= new JSONObject();
String id="";
Stack<String> source = new Stack<>();
String target="";
boolean isarray=false;
int group=0;
int count=0;
try {
source.push("root");
while (parser.hasNext()) {
JsonParser.Event event = parser.next();
JSONObject joitem = new JSONObject();
JSONObject link = new JSONObject();
//target="";
switch(event) {
case START_ARRAY:
System.out.println("sa " + event.toString() +" - "+id+" "+group);
joitem.put("id", id+"_"+count);
joitem.put("name", id);
joitem.put("group", group);
joitem.put("value", id);
link.put("source", source.peek());
link.put("target", id+"_"+count);
link.put("value", 15);
links.put(link);
target="";
source.push(id+"_"+count);
group=group+1;
//id="";
isarray=true;
break;
case END_ARRAY:
group=group-1;
//source.push(id+"_"+count);
//isarray=false;
break;
case START_OBJECT:
System.out.println("so " + event.toString() +" - "+id+" "+group);
if (!isarray && !id.trim().isEmpty()) {
joitem.put("id", id+"_"+count);
joitem.put("name", id);
joitem.put("group", group);
joitem.put("value", id);
link.put("source", source.peek());
link.put("target", id+"_"+count);
link.put("value", 10);
links.put(link);
target="";
source.push(id+"_"+count);
group=group+1;
}
else {
if (!isarray && id.trim().isEmpty())
group=group+1;
else {
isarray=false;
source.push(source.peek());
}
}
break;
case END_OBJECT:
System.out.println("eo " + event.toString() +" - "+id+" "+group);
group=group-1;
if (!source.empty())
source.pop();
target="";
id="";
break;
case VALUE_FALSE:
case VALUE_NULL:
case VALUE_TRUE:
System.out.println("true " + event.toString());
break;
case KEY_NAME:
System.out.print("name " +event.toString() + " " +
parser.getString() + " - ");
id=parser.getString();
break;
case VALUE_STRING:
System.out.println("string " + event.toString() + " " +
parser.getString() +" "+group);
joitem.put("id", id+"_"+count);
joitem.put("name", id);
joitem.put("group", group);
joitem.put("value", parser.getString());
//id="";
target=id+"_"+count;
break;
case VALUE_NUMBER:
System.out.println("number " + event.toString() + " " +
parser.getString());
joitem.put("id", id+"_"+count);
joitem.put("name", id);
joitem.put("group", group);
joitem.put("value", parser.getString());
target=id+"_"+count;
break;
}
if(joitem.length()>0) {
jo.put(joitem);
count++;
if (!target.trim().isEmpty()) {
link.put("source", source.peek());
link.put("target", target);
link.put("value", 20);
links.put(link);
}
}
}
JSONObject temp= new JSONObject();
temp.put("id", "root");
temp.put("name", "root");
temp.put("group", 0);
temp.put("value", "root");
jo.put(temp);
graph.put("nodes", jo);
graph.put("links", links);
}
catch(Exception e) {
e.printStackTrace();
}
System.out.println(graph.toString());
// System.out.println(links.toString());
try (FileWriter file = new FileWriter("mygraphtest.json")) {
file.write(graph.toString());
file.flush();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}

View File

@ -0,0 +1,45 @@
package eu.sshoc.citation.service.wfconfigurator.util;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.security.cert.X509Certificate;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
public class SSLHelper {
static public Connection getConnection(String url){
return Jsoup.connect(url).followRedirects(true).timeout(15000).sslSocketFactory(SSLHelper.socketFactory());
}
static private SSLSocketFactory socketFactory() {
TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[0];
}
public void checkClientTrusted(X509Certificate[] certs, String authType) {
}
public void checkServerTrusted(X509Certificate[] certs, String authType) {
}
}};
try {
SSLContext sslContext = SSLContext.getInstance("SSL");
sslContext.init(null, trustAllCerts, new java.security.SecureRandom());
SSLSocketFactory result = sslContext.getSocketFactory();
return result;
} catch (NoSuchAlgorithmException | KeyManagementException e) {
throw new RuntimeException("Failed to create a SSL socket factory", e);
}
}
}

View File

@ -21,7 +21,6 @@
<li> <a href="http://139.91.183.48:8181/EVREMetadataServices">Metadata Service</a></li>
<li> <a href="http://www.unity-idm.eu/documentation/unity-1.9.4/manual.html#_how_to_work_">AAAI</a></li>
</ul>
</p>
<p></p>
</div>
</div>

View File

@ -21,7 +21,7 @@
<div class="footer">
<hr/>
<p><i>Powered by</i> <img alt="SSHOC logo" src="logo.png" width="80" height="50" style="vertical-align:middle"> </img></p>
<p><i>Powered by</i> <img alt="SSHOC logo" src="logo.png" width="80" height="50" style="vertical-align:middle"></img> </p>
</div>