diff --git a/src/main/java/eu/sshoc/citation/service/SwaggerConfig.java b/src/main/java/eu/sshoc/citation/service/SwaggerConfig.java index 2df6bdb..860f3cf 100644 --- a/src/main/java/eu/sshoc/citation/service/SwaggerConfig.java +++ b/src/main/java/eu/sshoc/citation/service/SwaggerConfig.java @@ -36,7 +36,7 @@ public class SwaggerConfig { @Bean public Docket api() { return new Docket(DocumentationType.SWAGGER_2) - .groupName("citationservice-api") + .groupName("SSHOC-citationservice-api") .apiInfo(apiInfo()) .select() //.apis(!(RequestHandlerSelectors.withClassAnnotation(JsonIgnore.class)) diff --git a/src/main/java/eu/sshoc/citation/service/services/CitationHarvester.java b/src/main/java/eu/sshoc/citation/service/services/CitationHarvester.java index b468041..7042f64 100644 --- a/src/main/java/eu/sshoc/citation/service/services/CitationHarvester.java +++ b/src/main/java/eu/sshoc/citation/service/services/CitationHarvester.java @@ -47,7 +47,7 @@ public class CitationHarvester { }*/ } - @ApiOperation(value = "Returns a list of citations from specific citation source", + @ApiOperation(value = "Returns a list of citations from specific citation source (implementation in progress)", notes = "A client with a valid identifier can invoke this web service to obtain a list of citations from a specified source", response = String.class) @RequestMapping(value="/citharvester/getcitationlist", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE) @@ -66,7 +66,7 @@ public class CitationHarvester { return wfc.getCitation(sourceid, pid, token).toString(); } - @ApiOperation(value = "Returns a metadata of a citation via Content Negotiated requests", + @ApiOperation(value = "Returns a metadata record of a citation via Content Negotiated requests", notes = "A client with a valid identifier can invoke this web service to to retrieve the citation metadata using DOI content negotiated requests", response = String.class) @RequestMapping(value="/citharvester/getmetadatacn", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE) @@ -75,7 +75,7 @@ public class CitationHarvester { return wfc.getCitationMetadata(pid, token).toString(); } - @ApiOperation(value = "Returns formatted citation using content negotiated request", + @ApiOperation(value = "Returns formatted citation using content negotiated requests", notes = "A client with a valid identifier can invoke this web service to obtain a formatted citation, the text/bibliography content type is used ", response = String.class) @RequestMapping(value="/citharvester/getformcit", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE) @@ -85,8 +85,8 @@ public class CitationHarvester { } - @ApiOperation(value = "Returns a metadata of a citation parsing the HTML landing page", - notes = "A client with a valid identifier can invoke this web service to obtain metadata of a citation by parsing its HTML landing page", + @ApiOperation(value = "Returns a metadata record for a citation searching in the available metadata repositories", + notes = "A client with a valid identifier can invoke this web service to obtain metadata of a citation by searching in the available metadata repositories (DOI repositories, landing pages, etc...) ", response = String.class) @RequestMapping(value="/citharvester/getmetadatahtml", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE) public String getCitationMetadataFromHTML(@RequestParam(value="pid") String pid, @RequestParam(value="token") String token) { diff --git a/src/main/java/eu/sshoc/citation/service/services/CitationService.java b/src/main/java/eu/sshoc/citation/service/services/CitationService.java index 012ebe6..2d96226 100644 --- a/src/main/java/eu/sshoc/citation/service/services/CitationService.java +++ b/src/main/java/eu/sshoc/citation/service/services/CitationService.java @@ -64,7 +64,7 @@ public class CitationService { } @ApiOperation(value = "Returns a list of citations", - notes = "A client with a valid identifier can invoke this web service to obtain a list of citations stored in SSHOC citation repository", + notes = "A client with a valid identifier can invoke this web service to obtain a list of citations", response = String.class) @RequestMapping(value="/citservice/getcitation", method=RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE) public String getServiceDescriptions(@RequestParam(value="sshocid") String sshocid, @RequestParam(value="token") String token) { @@ -72,8 +72,8 @@ public class CitationService { return wfc.getServiceDescriptions(sshocid, token); } - @ApiOperation(value = "Search for citation", - notes = "A client with a valid identifier can invoke this web service to search the SSHOC repository for citations", + @ApiOperation(value = "Search for citation (implementation in progress)", + notes = "A client with a valid identifier can invoke this web service to search for citations", response = String.class) @RequestMapping(value="/citservice/searchcitation", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE) public String searchServiceDescriptions(@RequestParam(value="sshocid") String sshocid, @RequestParam(value="query") String query, @RequestParam(value="token") String token){ @@ -81,8 +81,8 @@ public class CitationService { return null; } - @ApiOperation(value = "Save a citation", - notes = "A client with a valid identifier can invoke this web service to save the citation in the SSHOC repository", + @ApiOperation(value = "Save a citation (implementation in progress)", + notes = "A client with a valid identifier can invoke this web service to save the citation", response = String.class) @RequestMapping(value="/citservice/savecitation", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE) public String saveCitations(@RequestParam(value="sshocid") String sshocid, @RequestParam(value="description") String description, @RequestParam(value="token") String token){ @@ -90,7 +90,7 @@ public class CitationService { return wfc.saveCitations(sshocid, description, token); } - @ApiOperation(value = "Search for citations ", + @ApiOperation(value = "Search for citations (implementation in progress)", notes = "A client with a valid identifier can invoke this web service to search the SSHOC repository for citations", response = String.class) @RequestMapping(value="/citservice/searchcite", method = RequestMethod.GET, produces = MediaType.APPLICATION_JSON_VALUE) diff --git a/src/main/java/eu/sshoc/citation/service/wfconfigurator/impl/CitationHarvesterImpl.java b/src/main/java/eu/sshoc/citation/service/wfconfigurator/impl/CitationHarvesterImpl.java index 9f12fb8..4add8cb 100644 --- a/src/main/java/eu/sshoc/citation/service/wfconfigurator/impl/CitationHarvesterImpl.java +++ b/src/main/java/eu/sshoc/citation/service/wfconfigurator/impl/CitationHarvesterImpl.java @@ -4,7 +4,9 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.PrintWriter; import java.io.Reader; +import java.io.StringWriter; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.ProtocolException; @@ -22,122 +24,212 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import eu.sshoc.citation.service.wfconfigurator.util.Consts; import eu.sshoc.citation.service.wfconfigurator.util.HeuristicParsers; public class CitationHarvesterImpl { - - + + HeuristicParsers heup= new HeuristicParsers(); + StringWriter sw = new StringWriter(); + PrintWriter pw = new PrintWriter(sw); + String sid="10.1007/s11082-018-1327-1"; //10.1126/science.169.3946.635 + URL myURL; + Boolean getCNCit=true; + public CitationHarvesterImpl(){ - + + try { + URL myURL = new URL("https://doi.org/"+sid); + } catch (MalformedURLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } } - - private static String readAll(Reader rd) throws IOException { - StringBuilder sb = new StringBuilder(); - int cp; - while ((cp = rd.read()) != -1) { - sb.append((char) cp); - } - return sb.toString(); - } - public static JSONObject readJsonFromUrl(String url) throws IOException, JSONException { - InputStream is = new URL(url).openStream(); - try { - BufferedReader rd = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8"))); - String jsonText = readAll(rd); - JSONObject json = new JSONObject(jsonText); - return json; - } finally { - is.close(); - } - } - private static String getStandardCitation(String citurl) throws IOException{ - URL myURL = new URL(citurl); - HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection(); - myURLConnection.setRequestProperty("Accept", "text/x-bibliography; style=harvard3; locale=fr-FR"); - InputStream mis = myURLConnection.getInputStream(); - BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8"))); - return (readAll(rd)); - - } + private static String readAll(Reader rd) throws IOException { + StringBuilder sb = new StringBuilder(); + int cp; + while ((cp = rd.read()) != -1) { + sb.append((char) cp); + } + return sb.toString(); + } + + public static JSONObject readJsonFromUrl(String url) throws IOException, JSONException { + InputStream is = new URL(url).openStream(); + try { + BufferedReader rd = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8"))); + String jsonText = readAll(rd); + JSONObject json = new JSONObject(jsonText); + return json; + } finally { + is.close(); + } + } + private static String getStandardCitation(String citurl) throws IOException{ + URL myURL = new URL(citurl); + HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection(); + myURLConnection.setRequestProperty("Accept", "text/x-bibliography; style=harvard3; locale=fr-FR"); + InputStream mis = myURLConnection.getInputStream(); + BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8"))); + return (readAll(rd)); + + } public String getCitationList(String id, String token){ return null; } - + public JSONObject getCitation(String id, String pid, String token){ JSONObject jsondata=null; try { String sid="10.1007/s11082-018-1327-1"; if (pid!=null && pid.trim()!="" && !pid.trim().equalsIgnoreCase("test")) sid=pid.trim(); - + jsondata = readJsonFromUrl("https://api.test.datacite.org/dois/"+sid); - System.out.println(jsondata.toString()); + System.out.println(jsondata.toString()); // System.out.println(json.get("id")); } catch (IOException | JSONException e) { - + e.printStackTrace(); } - + return jsondata; } public JSONObject getCitationMetadata(String pid, String token){ - JSONObject jsondata=null; + JSONObject jsondata=new JSONObject(); + JSONObject jsonproperties=null; + //Consts myC= new Consts(); + //String sid="10.1007/s11082-018-1327-1"; try { - String sid="10.1007/s11082-018-1327-1"; if (pid!=null && pid.trim()!="" && !pid.trim().equalsIgnoreCase("test")) sid=pid.trim(); - URL myURL = new URL("https://doi.org/"+sid); + //URL myURL = new URL("https://doi.org/"+sid); + if (sid.startsWith("http://") || sid.startsWith("https://")) + myURL = new URL(sid); + HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection(); myURLConnection.setRequestProperty("Accept", "application/rdf+xml;q=0.5, application/vnd.citationstyles.csl+json;q=1.0"); + myURLConnection.setConnectTimeout(18000); InputStream mis = myURLConnection.getInputStream(); BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8"))); - String jsonText = readAll(rd); - jsondata = new JSONObject(jsonText); + String jsonText = readAll(rd); + jsonproperties = new JSONObject(jsonText); + //clean object + for (String field : Consts.unusedFields) { + if (jsonproperties.has(field)) + jsonproperties.remove(field); + } + jsondata.put("properties", jsonproperties); + if (getCNCit) + jsondata.put("citation string", getCitationCSL(pid, token).get("citation string")); System.out.println(jsondata.toString()); //JSONObject jsoncit = new JSONObject(); - //jsondata.put("citation", getStandardCitation("https://doi.org/"+sid)); - + } catch (Exception e) { - - e.printStackTrace(); + + //e.printStackTrace(); + + e.printStackTrace(pw); + String sStackTrace = sw.toString(); // stack trace as a string + if (sStackTrace.length()>800) + System.out.println(sStackTrace.substring(0, 799)); + pw.flush(); + System.out.println ("***************** ("+sid+") getCitationMetadata, content negotiation not available, maybe later?"); + //return jsondata; } - + return jsondata; } - + public JSONObject getCitationCSL(String pid, String token){ String jsonText=""; JSONObject jsondata=null; + try { - String sid="10.1007/s11082-018-1327-1"; //10.1126/science.169.3946.635 + if (pid!=null && pid.trim()!="" && !pid.trim().equalsIgnoreCase("test")) sid=pid.trim(); - URL myURL = new URL("https://doi.org/"+sid); + + if (sid.startsWith("http://") || sid.startsWith("https://")) { + if (sid.startsWith("http://")) { + sid=sid.replace("http://", "https://"); + } + myURL = new URL(sid); + } //URL myURL = new URL("https://doi.org/10.1126/science.169.3946.635"); HttpURLConnection myURLConnection = (HttpURLConnection)myURL.openConnection(); - myURLConnection.setRequestProperty("Accept", "text/x-bibliography; style=harvard3; locale=en-EN"); + myURLConnection.setRequestProperty("Accept", "text/x-bibliography"); + myURLConnection.setConnectTimeout(18000); //set timeout to 18 seconds InputStream mis = myURLConnection.getInputStream(); BufferedReader rd = new BufferedReader(new InputStreamReader(mis, Charset.forName("UTF-8"))); - jsonText = readAll(rd); - - System.out.println(jsonText); + jsonText = readAll(rd); + + System.out.println("citation string "+ jsonText); jsondata = new JSONObject(); - jsondata.put("citation", jsonText); - + if (!jsonText.isEmpty()) + jsondata.put("citation string", jsonText); + } catch (Exception e) { - - e.printStackTrace(); + + e.printStackTrace(pw); + String sStackTrace = sw.toString(); // stack trace as a string + if (sStackTrace.length()>200) + System.out.println(sStackTrace.substring(0, 199)); + System.out.println ("####################### ("+pid+") getCitationCSL, citation by content negotiation not available"); } - + return jsondata; } public JSONObject getCitationMetadataFromHTML(String pid, String token){ - HeuristicParsers heup= new HeuristicParsers(); - return heup.getZenodoMetadata(pid); + + JSONObject jsondata=null; + + if (pid.startsWith("http://")) { + pid=pid.replace("http://", "https://"); + } + + //first landing pages + + System.out.println("getZenodoMetadataJSONLD ("+pid+")"); + jsondata=heup.getZenodoMetadataJSONLD(pid); + + if (jsondata==null || jsondata.length()==0) { + System.out.println("getMetaMetadata ("+pid+")"); + jsondata=heup.getMetaMetadata(pid); + } + + if (jsondata==null || jsondata.length()==0) { + System.out.println("getLinkMetadata ("+pid+")"); + jsondata= heup.getLinkMetadata(pid); + } + + //then DOI service providers... + + if (jsondata==null || jsondata.length()==0) { + getCNCit=true; + System.out.println("getCitationMetadata ("+pid+")"); + jsondata=getCitationMetadata(pid, token); + getCNCit=true; + } + if (jsondata==null || jsondata.length()==0) { + System.out.println("getCitationCSL ("+pid+")"); + jsondata=getCitationCSL(pid, token); + } + if (jsondata==null || jsondata.length()==0) { + try { + jsondata=new JSONObject(); + jsondata.put("citation string", "na"); + } catch (JSONException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + return jsondata; } } diff --git a/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/Consts.java b/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/Consts.java new file mode 100644 index 0000000..b2d9adc --- /dev/null +++ b/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/Consts.java @@ -0,0 +1,48 @@ +/******************************************************************************* + * Copyright (c) 2020 VRE4EIC Consortium + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ +package eu.sshoc.citation.service.wfconfigurator.util; + +import java.util.Vector; + +public class Consts { + + public static Vector unusedFields=new Vector(); + public static Vector dcterms=new Vector(); + + public Consts () { + unusedFields.add("published-print"); + unusedFields.add("published-online"); + unusedFields.add("is-referenced-by-count"); + + unusedFields.add("journal-issue"); + unusedFields.add("content-domain"); + unusedFields.add("alternative-id"); + unusedFields.add("relation"); + unusedFields.add("score"); + + unusedFields.add("member"); + unusedFields.add("inference_count"); + unusedFields.add("assertion"); + unusedFields.add("issue"); + unusedFields.add("indexed"); + //dcterms + dcterms.add("dcterms:title"); + dcterms.add("dcterms:language"); + dcterms.add("dcterms:accessRights"); + dcterms.add("dcterms:creator"); + dcterms.add("dcterms:service"); + } +} diff --git a/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/HeuristicParsers.java b/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/HeuristicParsers.java index 6b9e41c..b604116 100644 --- a/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/HeuristicParsers.java +++ b/src/main/java/eu/sshoc/citation/service/wfconfigurator/util/HeuristicParsers.java @@ -15,85 +15,703 @@ *******************************************************************************/ package eu.sshoc.citation.service.wfconfigurator.util; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringReader; +import java.io.StringWriter; import java.util.HashMap; +import java.util.Stack; +import java.util.Vector; import org.json.JSONArray; +import org.json.JSONException; import org.json.JSONObject; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import javax.json.stream.JsonParser; +import javax.json.Json; public class HeuristicParsers { - - public JSONObject getZenodoMetadata(String pid){ - JSONObject jsondata=new JSONObject(); - JSONArray authors = new JSONArray(); - - HashMap authors_affiliation = new HashMap(); - try { - Document doc = Jsoup.connect(pid).get(); - //String title = doc.title(); - Elements metas = doc.getElementsByTag("meta"); - for (Element meta: metas) {//get metadata from - String name = meta.attr("name"); - String property = meta.attr("property"); - String content = meta.attr("content"); - if (!name.trim().isEmpty() && - !content.trim().isEmpty() && - !name.trim().contains("-site-verification") && - !name.trim().contains("citation_author")) { - - jsondata.put(name, content); - } - if (name.trim().contains("citation_author")) {//to be used to double check authors - authors_affiliation.put(content, "na"); - } - if (!property.trim().isEmpty() && !content.trim().isEmpty()) - jsondata.put(property.trim(), content.trim()); - - } - - //get metadata about authors and affiliation from
section - - Element recordDetail = doc.select("div.container.record-detail").first(); - Elements affiliations = recordDetail.select("span[title]"); // span with title attribute - for (Element affiliation: affiliations) { - String organisation = affiliation.attr("title"); - String auth=affiliation.text(); - if (!organisation.trim().isEmpty() && !auth.trim().isEmpty()) { - JSONObject org = new JSONObject(); - org.put("organisation", organisation); - org.put("author_name", auth); - authors.put(org); - } - } - - jsondata.put("authors", authors); - //search for alternate link - - Element head = doc.select("head").first(); - Elements links= head.select("link[type]"); - for (Element link:links) { - String rel=link.attr("rel"); - String type= link.attr("type"); - String href= link.attr("href"); - if (rel.trim().equalsIgnoreCase("alternate")) { - JSONObject alt = new JSONObject(); - alt.put("type", type); - alt.put("href", href); - jsondata.put("alternate", alt); - } - - } - - } catch (Exception e) { - - e.printStackTrace(); + + StringWriter sw = new StringWriter(); + PrintWriter pw = new PrintWriter(sw); + + public JSONObject getZenodoMetadataJSONLD(String pid) { + JSONObject jsondata=new JSONObject(); + try { + //Document doc = Jsoup.connect(pid).get(); + + Document doc = SSLHelper.getConnection(pid).ignoreContentType(true) + .userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").timeout(20 * 1000).get(); + Elements cmdheader=doc.getElementsByTag("cmd:Header"); + if (cmdheader!=null && cmdheader.size()>0) { + JSONObject talarproperties=new JSONObject(); + talarproperties=getTalarData(doc); + if (talarproperties!=null) + jsondata.put("properties", talarproperties); + return jsondata; } - - return jsondata; - - } + + //application/ld+json + Elements scripts = doc.getElementsByTag("script"); + for (Element script: scripts) {//get metadata from ", ""); + String jsonStrraw = jsonStrraw1.replace("\r\n", ""); + jsonStr = jsonStrraw.replace('\r', ' '); + //System.out.println(jsonStr); + //getGraph(jsonStr); + JSONObject jsonproperties=new JSONObject(); + jsonproperties=new JSONObject(jsonStr.trim()); + if (jsondata.length()>0 && jsondata.has("properties")) { + jsondata.put("additional_properties", jsonproperties); + } + else { + jsondata.put("properties", jsonproperties); + } + } + } + //get the citation string + + Element citationid = doc.getElementById("invenio-csl"); + //Elements citationclass = doc.getElementsByAttributeValue("class", "citation-select"); + Element citationclass = doc.select("span.citation-select").first(); + String citationStr=""; + if (citationid!=null) { + Elements cits= citationid.getElementsByTag("invenio-csl"); + + Element cit=cits.first(); + + citationStr=cit.attr("ng-init"); + }else { + if (citationclass!=null) { + citationStr= citationclass.text(); + } + } + if (!citationStr.trim().isEmpty()) { + jsondata.put("citation string", citationStr.trim()); + } + + } + catch (Exception e) { + + //e.printStackTrace(); + e.printStackTrace(pw); + String sStackTrace = sw.toString(); // stack trace as a string + pw.flush(); + if (sStackTrace.length()>1500) + System.out.println(sStackTrace.substring(0, 1499)); + System.out.println ("("+pid+") not available"); + } + + return jsondata; + + } + + public JSONObject getLinkMetadata(String pid) { + + JSONObject jsondata=new JSONObject(); + + JSONObject jsonproperties=new JSONObject(); + try { + //Document doc = Jsoup.connect(pid).timeout(15 * 1000).get(); + Document doc = SSLHelper.getConnection(pid).timeout(15 * 1000).get(); + Elements links = doc.getElementsByTag("link"); + for (Element link: links) {//get metadata from + String rel = link.attr("rel"); + String href= link.attr("href"); + if (!rel.trim().isEmpty() && + rel.trim().equalsIgnoreCase("metadata")){ + + jsonproperties.put(rel, href); + } + + } + if (jsonproperties.length()>0) + jsondata.put("properties", jsonproperties); + System.out.println(jsondata); + + } + catch (Exception e) { + + e.printStackTrace(pw); + String sStackTrace = sw.toString(); // stack trace as a string + if (sStackTrace.length()>200) + System.out.println(sStackTrace.substring(0, 199)); + pw.flush(); + System.out.println ("&&&&&&&&&&&&&&&&&&&&&&&&& ("+pid+") link metadata not available"); + } + + return jsondata; + + } + public JSONObject getMetaMetadata(String pid){ + JSONObject jsondata=new JSONObject(); + // JSONArray authors = new JSONArray(); + JSONObject jsonproperties=new JSONObject(); + + HashMap authors_affiliation = new HashMap(); + try { + //Document doc = Jsoup.connect(pid).timeout(10 * 1000).get(); + Document doc = SSLHelper.getConnection(pid).timeout(15 * 1000).get(); + //String title = doc.title(); + Elements cmdheader=doc.getElementsByTag("cmd:Header"); + if (cmdheader!=null && cmdheader.size()>0) { + JSONObject talarproperties=new JSONObject(); + talarproperties=getTalarData(doc); + if (talarproperties!=null) + jsondata.put("properties", talarproperties); + return jsondata; + } + + + Elements metas = doc.getElementsByTag("meta"); + for (Element meta: metas) {//get metadata from + String name = meta.attr("name"); + String property = meta.attr("property"); + String content = meta.attr("content"); + if (!name.trim().isEmpty() && + !content.trim().isEmpty() && + (name.toLowerCase().trim().startsWith("dc.") || + name.trim().startsWith("citation_"))){ + + jsonproperties.put(name, content); + } + if (name.trim().contains("citation_author")) {//to be used to double check authors + authors_affiliation.put(content, "na"); + } + if (!property.trim().isEmpty() && !content.trim().isEmpty()) + jsonproperties.put(property.trim(), content.trim()); + + } + //get metadata about authors and affiliation from
section + + Element record = doc.select("div[vocab]").first(); + //search for vocab + if (record!=null) { + Elements items = record.select("span[property]"); // span with property attribute + + for (Element item: items) { + String name = item.attr("property"); + String val=item.attr("value"); + if (!name.trim().isEmpty() && !val.trim().isEmpty()) { + jsonproperties.put(name, val); + } + } + + } + + // diff --git a/src/main/resources/templates/welcome.html b/src/main/resources/templates/welcome.html index 78daefa..2880be5 100644 --- a/src/main/resources/templates/welcome.html +++ b/src/main/resources/templates/welcome.html @@ -21,7 +21,7 @@