();
+ try {
+ Document doc = Jsoup.connect(pid).get();
+ //String title = doc.title();
+ Elements metas = doc.getElementsByTag("meta");
+ for (Element meta: metas) {//get metadata from
+ String name = meta.attr("name");
+ String property = meta.attr("property");
+ String content = meta.attr("content");
+ if (!name.trim().isEmpty() &&
+ !content.trim().isEmpty() &&
+ !name.trim().contains("-site-verification") &&
+ !name.trim().contains("citation_author")) {
+
+ jsondata.put(name, content);
+ }
+ if (name.trim().contains("citation_author")) {//to be used to double check authors
+ authors_affiliation.put(content, "na");
+ }
+ if (!property.trim().isEmpty() && !content.trim().isEmpty())
+ jsondata.put(property.trim(), content.trim());
+
+ }
+
+ //get metadata about authors and affiliation from section
+
+ Element recordDetail = doc.select("div.container.record-detail").first();
+ Elements affiliations = recordDetail.select("span[title]"); // span with title attribute
+ for (Element affiliation: affiliations) {
+ String organisation = affiliation.attr("title");
+ String auth=affiliation.text();
+ if (!organisation.trim().isEmpty() && !auth.trim().isEmpty()) {
+ JSONObject org = new JSONObject();
+ org.put("organisation", organisation);
+ org.put("author_name", auth);
+ authors.put(org);
+ }
+ }
+
+ jsondata.put("authors", authors);
+ //search for alternate link
+
+ Element head = doc.select("head").first();
+ Elements links= head.select("link[type]");
+ for (Element link:links) {
+ String rel=link.attr("rel");
+ String type= link.attr("type");
+ String href= link.attr("href");
+ if (rel.trim().equalsIgnoreCase("alternate")) {
+ JSONObject alt = new JSONObject();
+ alt.put("type", type);
+ alt.put("href", href);
+ jsondata.put("alternate", alt);
+ }
+
+ }
+
+ } catch (Exception e) {
+
+ e.printStackTrace();
+ }
+
+ return jsondata;
+
+ }
+
+}