extracting pid from citation, in progress
This commit is contained in:
parent
6cf3981c6a
commit
108693a706
|
@ -13,7 +13,11 @@ import java.net.ProtocolException;
|
|||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.net.ssl.HttpsURLConnection;
|
||||
|
||||
|
@ -152,6 +156,7 @@ public class CitationHarvesterImpl {
|
|||
String jsonText="";
|
||||
JSONObject jsondata=null;
|
||||
|
||||
|
||||
try {
|
||||
|
||||
if (pid!=null && pid.trim()!="" && !pid.trim().equalsIgnoreCase("test"))
|
||||
|
@ -208,6 +213,9 @@ public class CitationHarvesterImpl {
|
|||
public JSONObject getCitationMetadataFromHTML(String pid, String token){
|
||||
|
||||
JSONObject jsondata=null;
|
||||
//System.out.println("################## mypid completo "+pid);
|
||||
pid=extractURL(pid)[0];
|
||||
System.out.println("################## mypid "+pid);
|
||||
|
||||
if (pid.contains(".handle.net/")) {
|
||||
jsondata=heup.getAPIMetadata(pid);
|
||||
|
@ -231,18 +239,8 @@ public class CitationHarvesterImpl {
|
|||
|
||||
}
|
||||
|
||||
/*if (jsondata==null || jsondata.length()==0) {
|
||||
System.out.println("getMetaMetadata ("+pid+")");
|
||||
jsondata=heup.getMetaMetadata(pid);
|
||||
}*/
|
||||
|
||||
|
||||
|
||||
/*if (jsondata==null || jsondata.length()==0) {
|
||||
System.out.println("getLinkMetadata ("+pid+")");
|
||||
jsondata= heup.getLinkMetadata(pid);
|
||||
}*/
|
||||
|
||||
if (jsondata!=null && jsondata.has("properties")){
|
||||
try {
|
||||
JSONObject testp=(JSONObject) jsondata.get("properties");
|
||||
|
@ -345,5 +343,17 @@ public class CitationHarvesterImpl {
|
|||
|
||||
return jsondata;
|
||||
}
|
||||
private String[] extractURL(String text) {
|
||||
List<String> list = new ArrayList<String>();
|
||||
Pattern pattern = Pattern
|
||||
.compile(
|
||||
"(http://|https://){1}[\\w\\.\\-/:\\#\\?\\=\\&\\;\\%\\~\\+]+",
|
||||
Pattern.CASE_INSENSITIVE);
|
||||
Matcher matcher = pattern.matcher(text);
|
||||
while (matcher.find()) {
|
||||
list.add(matcher.group());
|
||||
}
|
||||
return list.toArray(new String[list.size()]);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue