code improved
This commit is contained in:
parent
992464e0a8
commit
e729dfc702
|
@ -216,8 +216,11 @@ public class CitationHarvesterImpl {
|
|||
pid=pid.replace("http://", "https://");
|
||||
}
|
||||
|
||||
if (!pid.startsWith("https"))
|
||||
pid="https://"+pid.trim();
|
||||
|
||||
//special cases
|
||||
if (pid.contains("ien.bg.ac.rs"))
|
||||
if (pid.contains("ien.bg.ac.rs") || pid.contains("eprints.rclis.org") || pid.contains("opengrey.eu"))
|
||||
pid=pid.replace("https://", "http://");
|
||||
|
||||
//first landing pages
|
||||
|
@ -228,38 +231,47 @@ public class CitationHarvesterImpl {
|
|||
|
||||
}
|
||||
|
||||
if (jsondata==null || jsondata.length()==0) {
|
||||
/*if (jsondata==null || jsondata.length()==0) {
|
||||
System.out.println("getMetaMetadata ("+pid+")");
|
||||
jsondata=heup.getMetaMetadata(pid);
|
||||
if (jsondata!=null && jsondata.has("properties")){
|
||||
try {
|
||||
JSONObject testp=(JSONObject) jsondata.get("properties");
|
||||
String citdoi="";
|
||||
if (testp.has("citation_doi") && !(pid.contains("doi.org/"))) {//check if there is metadata in DOI RA
|
||||
|
||||
citdoi = testp.getString("citation_doi");
|
||||
if (citdoi.trim()!="" && !citdoi.contains("doi.org/")) {
|
||||
citdoi="https://doi.org/"+citdoi.trim();
|
||||
}
|
||||
if (citdoi.trim()!="")
|
||||
pid=citdoi;
|
||||
|
||||
}
|
||||
|
||||
} catch (JSONException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
|
||||
|
||||
if (jsondata==null || jsondata.length()==0) {
|
||||
/*if (jsondata==null || jsondata.length()==0) {
|
||||
System.out.println("getLinkMetadata ("+pid+")");
|
||||
jsondata= heup.getLinkMetadata(pid);
|
||||
}
|
||||
}*/
|
||||
|
||||
if (jsondata!=null && jsondata.has("properties")){
|
||||
try {
|
||||
JSONObject testp=(JSONObject) jsondata.get("properties");
|
||||
String citdoi="";
|
||||
if (testp.has("citation_doi") && !(pid.contains("doi.org/"))) {//check if there is metadata in DOI RA
|
||||
|
||||
citdoi = testp.getString("citation_doi");
|
||||
if (citdoi.trim()!="" && !citdoi.contains("doi.org/")) {
|
||||
citdoi="https://doi.org/"+citdoi.trim();
|
||||
}
|
||||
|
||||
}else {
|
||||
if ((testp.has("og:url")) && (testp.getString("og:url").trim().startsWith("https://dl.acm.org/doi/abs/"))) {
|
||||
citdoi=testp.getString("og:url").trim().replace("https://dl.acm.org/doi/abs/", "https://doi.org/");
|
||||
}
|
||||
|
||||
}
|
||||
if (citdoi.trim()!="")
|
||||
pid=citdoi;
|
||||
|
||||
} catch (JSONException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
//https://link.springer.com/book/
|
||||
if (pid.startsWith("https://link.springer.com/book/"))
|
||||
pid=pid.trim().replace("https://link.springer.com/book/", "https://doi.org/");
|
||||
//then DOI service providers...
|
||||
try {
|
||||
//System.out.println("jsondata.length() "+((JSONObject) jsondata.get("properties")).length());
|
||||
|
@ -276,8 +288,9 @@ public class CitationHarvesterImpl {
|
|||
e1.printStackTrace();
|
||||
}
|
||||
|
||||
if (pid.contains("doi.org") && (jsondata==null || jsondata.length()==0 || jsondata.isNull("citation string"))) {
|
||||
System.out.println("getCitationCSL ("+pid+")");
|
||||
|
||||
if (pid.contains("doi.org") && (jsondata==null || jsondata.length()==0 || jsondata.isNull("citation string")) && !token.contains("testAPI")) {
|
||||
System.out.println("getCitationCSL ("+pid+") "+ token);
|
||||
JSONObject jsoncsldata=null;
|
||||
jsoncsldata=getCitationCSL(pid, token);
|
||||
if (jsondata==null)
|
||||
|
|
|
@ -54,8 +54,8 @@ public class HeuristicParsers {
|
|||
try {
|
||||
//Check if it is a handle
|
||||
String dnsPid="";
|
||||
|
||||
|
||||
|
||||
|
||||
if (pid.contains(".handle.net/")) {
|
||||
//get the actual repository URL
|
||||
String haid=pid.substring(22);
|
||||
|
@ -74,18 +74,18 @@ public class HeuristicParsers {
|
|||
if (tmp.getString("type").contains("URL")) {
|
||||
JSONObject urlob=(JSONObject) tmp.get("data");
|
||||
dnsPid=urlob.getString("value");
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (dnsPid.trim()=="")
|
||||
return (jsondata);
|
||||
|
||||
|
||||
System.out.println("pid "+dnsPid);
|
||||
String apiURL=repo.getRepoApi(dnsPid);
|
||||
|
||||
if (apiURL!="none") {
|
||||
|
||||
if (!apiURL.trim().equalsIgnoreCase("none")) {
|
||||
apiURL=apiURL.replace("viewerPid", pid.substring(23));
|
||||
System.out.println("apiURL "+apiURL);
|
||||
Document apiDoc = SSLHelper.getConnection(apiURL).ignoreContentType(true)
|
||||
|
@ -103,12 +103,12 @@ public class HeuristicParsers {
|
|||
}
|
||||
else
|
||||
metajsondata.put(child.tagName(), child.text());
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (metajsondata!=null && metajsondata.length()>0) {
|
||||
String rname=repo.getRepoName(dnsPid);
|
||||
String rws=repo.getRepoURL(dnsPid);
|
||||
|
@ -116,7 +116,7 @@ public class HeuristicParsers {
|
|||
jsonrepodata.put("name", rname);
|
||||
if (rws!="none")
|
||||
jsonrepodata.put("URL", rws);
|
||||
|
||||
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// TODO Auto-generated catch block
|
||||
|
@ -125,7 +125,7 @@ public class HeuristicParsers {
|
|||
if (metajsondata!=null && metajsondata.length()>0) {
|
||||
try {
|
||||
jsondata.put("citation string", "na");
|
||||
|
||||
|
||||
jsondata.put("metadata source", jsonrepodata);
|
||||
jsondata.put("properties", metajsondata);
|
||||
} catch (JSONException e) {
|
||||
|
@ -141,7 +141,7 @@ public class HeuristicParsers {
|
|||
//Document doc = Jsoup.connect(pid).get();
|
||||
|
||||
Document doc = SSLHelper.getConnection(pid).ignoreContentType(true)
|
||||
.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").referrer("http://www.google.com").timeout(20 * 1000).get();
|
||||
.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").timeout(30 * 1000).get();
|
||||
Elements cmdheader=doc.getElementsByTag("cmd:Header");
|
||||
if (cmdheader!=null && cmdheader.size()>0) {
|
||||
JSONObject talarproperties=new JSONObject();
|
||||
|
@ -168,10 +168,12 @@ public class HeuristicParsers {
|
|||
String jsonStrraw1=tmpStr.trim().replace("</script>", "");
|
||||
String jsonStrraw = jsonStrraw1.replace("\r\n", "");
|
||||
jsonStr = jsonStrraw.replace('\r', ' ');
|
||||
jsonStr = jsonStr.replace("/*]]>*/", "");
|
||||
jsonStr = jsonStr.replace("/*<![CDATA[*/", "");
|
||||
//System.out.println(jsonStr);
|
||||
//getGraph(jsonStr);
|
||||
//JSONObject jsonproperties=new JSONObject();
|
||||
|
||||
|
||||
jsonproperties=new JSONObject(jsonStr.trim());
|
||||
if (jsonproperties.has("mainEntity")) {
|
||||
jsonproperties=jsonproperties.getJSONObject("mainEntity");
|
||||
|
@ -192,7 +194,7 @@ public class HeuristicParsers {
|
|||
int obend=tmpStr.indexOf("}");
|
||||
String jsstr=tmpStr.substring(0, obend);
|
||||
System.out.println("--------------------"+jsstr);
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
Elements metas = doc.getElementsByTag("meta");
|
||||
|
@ -200,30 +202,32 @@ public class HeuristicParsers {
|
|||
String name = meta.attr("name");
|
||||
String property = meta.attr("property");
|
||||
String content = meta.attr("content");
|
||||
|
||||
|
||||
if (!name.trim().isEmpty() &&
|
||||
!content.trim().isEmpty() &&
|
||||
(name.toLowerCase().trim().startsWith("dc.") ||
|
||||
name.trim().startsWith("citation_")) ||
|
||||
name.toLowerCase().trim().startsWith("eprints.")){
|
||||
name.toLowerCase().trim().startsWith("eprints.")){
|
||||
if (name.equalsIgnoreCase("eprints.citation")) {
|
||||
jsondata.put("citation string", content);
|
||||
}
|
||||
if (jsonproperties.has(name)) {
|
||||
String names= jsonproperties.get(name).toString();
|
||||
content=names+", "+content;
|
||||
}
|
||||
|
||||
if (name.equalsIgnoreCase("eprints.citation")) {
|
||||
jsondata.put("citation string", content);
|
||||
}
|
||||
else
|
||||
jsonproperties.put(name, content);
|
||||
}
|
||||
|
||||
|
||||
if (!property.trim().isEmpty() && !content.trim().isEmpty())
|
||||
jsonproperties.put(property.trim(), content.trim());
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (jsonproperties!=null && jsonproperties.length()>0) {
|
||||
jsondata.put("properties", jsonproperties);
|
||||
}
|
||||
|
||||
|
||||
//get the citation string
|
||||
|
||||
Element citationid = doc.getElementById("invenio-csl");
|
||||
|
@ -261,6 +265,26 @@ public class HeuristicParsers {
|
|||
if (!jsonText.trim().isEmpty()) {
|
||||
jsondata.put("citation string", jsonText.trim());
|
||||
}
|
||||
}
|
||||
if (citationStr.trim().isEmpty() && jsonproperties!=null && jsonproperties.has("citation")){
|
||||
|
||||
jsondata.put("citation string", jsonproperties.getString("citation"));
|
||||
|
||||
}
|
||||
|
||||
if (jsondata==null || jsondata.length()==0) {
|
||||
System.out.println("private getMetaMetadata ("+pid+")");
|
||||
jsondata=getMetaMetadata(doc);
|
||||
}
|
||||
|
||||
if (jsondata==null || jsondata.length()==0) {
|
||||
System.out.println("private getLinkMetadata ("+pid+")");
|
||||
jsondata=getLinkMetadata(doc);
|
||||
}
|
||||
|
||||
if (jsondata==null || jsondata.length()==0) {
|
||||
System.out.println("private getAttributeMetadata ("+pid+")");
|
||||
jsondata=getAttributeMetadata(doc);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -275,12 +299,101 @@ public class HeuristicParsers {
|
|||
System.out.println ("("+pid+") not available");
|
||||
}
|
||||
|
||||
|
||||
return jsondata;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
private JSONObject getLinkMetadata(Document doc) {
|
||||
|
||||
|
||||
JSONObject jsondata=new JSONObject();
|
||||
|
||||
JSONObject jsonproperties=new JSONObject();
|
||||
try {
|
||||
|
||||
Elements links = doc.getElementsByTag("link");
|
||||
for (Element link: links) {//get metadata from <link>
|
||||
String rel = link.attr("rel");
|
||||
String href= link.attr("href");
|
||||
if (!rel.trim().isEmpty() &&
|
||||
rel.trim().equalsIgnoreCase("metadata")){
|
||||
|
||||
jsonproperties.put(rel, href);
|
||||
}
|
||||
|
||||
}
|
||||
if (jsonproperties.length()>0)
|
||||
jsondata.put("properties", jsonproperties);
|
||||
else {
|
||||
Elements xdts=doc.getElementsByAttributeValueContaining("xmlns:dcterms", "http://purl.org/dc/terms/");
|
||||
for (Element xdt : xdts) {
|
||||
Elements dd=xdt.getElementsByTag("dd");
|
||||
Elements dt=xdt.getElementsByTag("dt");
|
||||
for (int i=0; i<dd.size(); i++) {
|
||||
System.out.println(dt.get(i).text()+" - "+dd.get(i).text());
|
||||
String content=dd.get(i).text().trim();
|
||||
if (!dt.get(i).text().trim().isEmpty()){
|
||||
if (jsonproperties.has(dt.get(i).text().trim())) {
|
||||
String names= jsonproperties.get(dt.get(i).text().trim()).toString();
|
||||
content=names+", "+content;
|
||||
}
|
||||
|
||||
if (dt.get(i).text().trim().equalsIgnoreCase("eprints.citation")) {
|
||||
jsondata.put("citation string", content);
|
||||
}
|
||||
else
|
||||
jsonproperties.put(dt.get(i).text().trim(), content);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
if (jsonproperties.length()>0)
|
||||
jsondata.put("properties", jsonproperties);
|
||||
}
|
||||
if (jsonproperties.length()==0) {
|
||||
Elements ulxmlns=doc.getElementsByTag("ul");
|
||||
for (Element ulxmln: ulxmlns) {
|
||||
if (ulxmln.hasAttr("xmlns")) {
|
||||
Elements ctncs=ulxmln.getElementsByAttributeValue("class", "Component_tree_node_content");
|
||||
for (Element ctnc: ctncs) {
|
||||
Element mye= ctnc.parent();
|
||||
String ppr=mye.getElementsByTag("code").text().trim();
|
||||
String pco=mye.getElementsByTag("sample").text().trim();
|
||||
if (!ppr.isEmpty()){
|
||||
if (jsonproperties.has(ppr)) {
|
||||
String vals= jsonproperties.get(ppr).toString();
|
||||
pco=vals+", "+pco;
|
||||
}
|
||||
jsonproperties.put(ppr, pco);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if (jsonproperties.length()>0)
|
||||
jsondata.put("properties", jsonproperties);
|
||||
}
|
||||
System.out.println(jsondata);
|
||||
|
||||
}
|
||||
catch (Exception e) {
|
||||
|
||||
e.printStackTrace(pw);
|
||||
String sStackTrace = sw.toString(); // stack trace as a string
|
||||
if (sStackTrace.length()>200)
|
||||
System.out.println(sStackTrace.substring(0, 199));
|
||||
pw.flush();
|
||||
System.out.println ("&&&&&&&&&&&&&&&&&&&&&&&&& private link metadata not available");
|
||||
}
|
||||
|
||||
return jsondata;
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
public JSONObject getLinkMetadata(String pid) {
|
||||
|
||||
|
@ -318,6 +431,179 @@ public class HeuristicParsers {
|
|||
|
||||
return jsondata;
|
||||
|
||||
}
|
||||
//
|
||||
private JSONObject getAttributeMetadata(Document doc) {
|
||||
|
||||
|
||||
JSONObject jsondata=new JSONObject();
|
||||
JSONObject jsonproperties=new JSONObject();
|
||||
|
||||
HashMap<String, String> authors_affiliation = new HashMap<String, String>();
|
||||
try {
|
||||
|
||||
Elements dcel=doc.getElementsByAttributeValueStarting("property", "dc");
|
||||
|
||||
for (Element meta: dcel) {//get metadata from <element property="dc...">
|
||||
String content = meta.text();
|
||||
String property = meta.attr("property");
|
||||
|
||||
if (!content.trim().isEmpty()){
|
||||
if (jsonproperties.has(property)) {
|
||||
String names= jsonproperties.get(property).toString();
|
||||
content=names+", "+content;
|
||||
}
|
||||
|
||||
if (property.equalsIgnoreCase("eprints.citation")) {
|
||||
jsondata.put("citation string", content);
|
||||
}
|
||||
else
|
||||
jsonproperties.put(property, content);
|
||||
|
||||
}
|
||||
if (property.trim().contains("citation_author")) {//to be used to double check authors
|
||||
authors_affiliation.put(content, "na");
|
||||
}
|
||||
|
||||
if (!property.trim().isEmpty() && !content.trim().isEmpty())
|
||||
jsonproperties.put(property.trim(), content.trim());
|
||||
|
||||
}
|
||||
//get metadata about authors and affiliation from <div vocab="http://schema.org/"> section
|
||||
|
||||
|
||||
|
||||
//<div class="citation-popup" data-style-name="harvard" title="Harvard Citation" style="display:none;">
|
||||
Element citationstring= doc.select("div[data-style-name]").first();
|
||||
if (citationstring!=null) {
|
||||
jsondata.put("citation string", citationstring.text().trim());
|
||||
}
|
||||
|
||||
//check if metadata is in the html elements europeana style
|
||||
|
||||
if (jsonproperties!=null && jsonproperties.length()>0)
|
||||
jsondata.put("properties", jsonproperties);
|
||||
|
||||
|
||||
}
|
||||
catch (Exception e) {
|
||||
|
||||
e.printStackTrace(pw);
|
||||
String sStackTrace = sw.toString(); // stack trace as a string
|
||||
if (sStackTrace.length()>200)
|
||||
System.out.println(sStackTrace.substring(0, 199));
|
||||
pw.flush();
|
||||
System.out.println ("^^^^^^^^^^^^^^^^^^^^^ private attribute metadata not available");
|
||||
}
|
||||
|
||||
return jsondata;
|
||||
|
||||
|
||||
}
|
||||
//
|
||||
private JSONObject getMetaMetadata(Document doc) {
|
||||
|
||||
JSONObject jsondata=new JSONObject();
|
||||
// JSONArray authors = new JSONArray();
|
||||
JSONObject jsonproperties=new JSONObject();
|
||||
|
||||
HashMap<String, String> authors_affiliation = new HashMap<String, String>();
|
||||
try {
|
||||
|
||||
Elements cmdheader=doc.getElementsByTag("cmd:Header");
|
||||
if (cmdheader!=null && cmdheader.size()>0) {
|
||||
JSONObject talarproperties=new JSONObject();
|
||||
talarproperties=getTalarData(doc);
|
||||
if (talarproperties!=null)
|
||||
jsondata.put("properties", talarproperties);
|
||||
return jsondata;
|
||||
}
|
||||
|
||||
|
||||
Elements metas = doc.getElementsByTag("meta");
|
||||
for (Element meta: metas) {//get metadata from <meta>
|
||||
String name = meta.attr("name");
|
||||
String property = meta.attr("property");
|
||||
String content = meta.attr("content");
|
||||
if (!name.trim().isEmpty() &&
|
||||
!content.trim().isEmpty() &&
|
||||
(name.toLowerCase().trim().startsWith("dc.") ||
|
||||
name.trim().startsWith("citation_")) ||
|
||||
name.toLowerCase().trim().startsWith("eprints.")){
|
||||
if (jsonproperties.has(name)) {
|
||||
String names= jsonproperties.get(name).toString();
|
||||
content=names+", "+content;
|
||||
}
|
||||
|
||||
if (name.equalsIgnoreCase("eprints.citation")) {
|
||||
jsondata.put("citation string", content);
|
||||
}
|
||||
else
|
||||
jsonproperties.put(name, content);
|
||||
|
||||
}
|
||||
if (name.trim().contains("citation_author")) {//to be used to double check authors
|
||||
authors_affiliation.put(content, "na");
|
||||
}
|
||||
|
||||
if (!property.trim().isEmpty() && !content.trim().isEmpty())
|
||||
jsonproperties.put(property.trim(), content.trim());
|
||||
|
||||
}
|
||||
//get metadata about authors and affiliation from <div vocab="http://schema.org/"> section
|
||||
|
||||
Element record = doc.select("div[vocab]").first();
|
||||
//search for vocab
|
||||
if (record!=null) {
|
||||
Elements items = record.select("span[property]"); // span with property attribute
|
||||
|
||||
for (Element item: items) {
|
||||
String name = item.attr("property");
|
||||
String val=item.attr("value");
|
||||
if (!name.trim().isEmpty() && !val.trim().isEmpty()) {
|
||||
jsonproperties.put(name, val);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//<div class="citation-popup" data-style-name="harvard" title="Harvard Citation" style="display:none;">
|
||||
Element citationstring= doc.select("div[data-style-name]").first();
|
||||
if (citationstring!=null) {
|
||||
jsondata.put("citation string", citationstring.text().trim());
|
||||
}
|
||||
|
||||
//check if metadata is in the html elements europeana style
|
||||
|
||||
Elements euRecord = doc.select("div[data-field-name]");
|
||||
for (Element divmeta: euRecord) {
|
||||
String mdname = divmeta.attr("data-field-name");
|
||||
if (mdname != "") {
|
||||
jsonproperties.put(mdname, divmeta.text().trim());
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
if (jsonproperties!=null && jsonproperties.length()>0)
|
||||
jsondata.put("properties", jsonproperties);
|
||||
|
||||
/*if (jsondata==null || jsondata.length()==0) {
|
||||
jsondata=getZenodoData(doc);
|
||||
}*/
|
||||
}
|
||||
catch (Exception e) {
|
||||
|
||||
e.printStackTrace(pw);
|
||||
String sStackTrace = sw.toString(); // stack trace as a string
|
||||
if (sStackTrace.length()>200)
|
||||
System.out.println(sStackTrace.substring(0, 199));
|
||||
pw.flush();
|
||||
System.out.println ("^^^^^^^^^^^^^^^^^^^^^ private meta metadata not available");
|
||||
}
|
||||
|
||||
return jsondata;
|
||||
|
||||
}
|
||||
public JSONObject getMetaMetadata(String pid){
|
||||
JSONObject jsondata=new JSONObject();
|
||||
|
@ -348,23 +634,23 @@ public class HeuristicParsers {
|
|||
!content.trim().isEmpty() &&
|
||||
(name.toLowerCase().trim().startsWith("dc.") ||
|
||||
name.trim().startsWith("citation_")) ||
|
||||
name.toLowerCase().trim().startsWith("eprints.")){
|
||||
name.toLowerCase().trim().startsWith("eprints.")){
|
||||
if (jsonproperties.has(name)) {
|
||||
String names= jsonproperties.get(name).toString();
|
||||
content=names+", "+content;
|
||||
}
|
||||
|
||||
|
||||
if (name.equalsIgnoreCase("eprints.citation")) {
|
||||
jsondata.put("citation string", content);
|
||||
}
|
||||
else
|
||||
jsonproperties.put(name, content);
|
||||
|
||||
|
||||
}
|
||||
if (name.trim().contains("citation_author")) {//to be used to double check authors
|
||||
authors_affiliation.put(content, "na");
|
||||
}
|
||||
|
||||
|
||||
if (!property.trim().isEmpty() && !content.trim().isEmpty())
|
||||
jsonproperties.put(property.trim(), content.trim());
|
||||
|
||||
|
@ -391,7 +677,7 @@ public class HeuristicParsers {
|
|||
if (citationstring!=null) {
|
||||
jsondata.put("citation string", citationstring.text().trim());
|
||||
}
|
||||
|
||||
|
||||
//check if metadata is in the html elements europeana style
|
||||
|
||||
Elements euRecord = doc.select("div[data-field-name]");
|
||||
|
@ -400,8 +686,8 @@ public class HeuristicParsers {
|
|||
if (mdname != "") {
|
||||
jsonproperties.put(mdname, divmeta.text().trim());
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
if (jsonproperties!=null && jsonproperties.length()>0)
|
||||
|
@ -581,7 +867,7 @@ public class HeuristicParsers {
|
|||
if (proxyarray!=null) {
|
||||
taljsondata.put("cmd:ResourceProxyList", proxyarray);
|
||||
}
|
||||
|
||||
|
||||
//cmdp:GeneralInfo
|
||||
Elements generalInfo=doc.getElementsByTag("cmdp:GeneralInfo");
|
||||
proxyarray=new JSONArray();
|
||||
|
@ -678,10 +964,10 @@ public class HeuristicParsers {
|
|||
jsonlo.put("xml:lang", legalown.attr("xml:lang"));
|
||||
proxyarray.put(jsonlo);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
taljsondata.put("cmdp:LegalOwner", proxyarray);
|
||||
|
||||
|
||||
//cmdp:TimeCoverage
|
||||
Elements timecoves=doc.getElementsByTag("cmdp:TimeCoverage");
|
||||
proxyarray=new JSONArray();
|
||||
|
@ -692,10 +978,10 @@ public class HeuristicParsers {
|
|||
jsonlo.put("xml:lang", timecov.attr("xml:lang"));
|
||||
proxyarray.put(jsonlo);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
taljsondata.put("cmdp:TimeCoverage", proxyarray);
|
||||
|
||||
|
||||
return taljsondata;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue