extracting pid from citation, in progress
This commit is contained in:
parent
6cf3981c6a
commit
108693a706
|
@ -13,7 +13,11 @@ import java.net.ProtocolException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import javax.net.ssl.HttpsURLConnection;
|
import javax.net.ssl.HttpsURLConnection;
|
||||||
|
|
||||||
|
@ -151,6 +155,7 @@ public class CitationHarvesterImpl {
|
||||||
public JSONObject getCitationCSL(String pid, String token){
|
public JSONObject getCitationCSL(String pid, String token){
|
||||||
String jsonText="";
|
String jsonText="";
|
||||||
JSONObject jsondata=null;
|
JSONObject jsondata=null;
|
||||||
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
|
@ -208,6 +213,9 @@ public class CitationHarvesterImpl {
|
||||||
public JSONObject getCitationMetadataFromHTML(String pid, String token){
|
public JSONObject getCitationMetadataFromHTML(String pid, String token){
|
||||||
|
|
||||||
JSONObject jsondata=null;
|
JSONObject jsondata=null;
|
||||||
|
//System.out.println("################## mypid completo "+pid);
|
||||||
|
pid=extractURL(pid)[0];
|
||||||
|
System.out.println("################## mypid "+pid);
|
||||||
|
|
||||||
if (pid.contains(".handle.net/")) {
|
if (pid.contains(".handle.net/")) {
|
||||||
jsondata=heup.getAPIMetadata(pid);
|
jsondata=heup.getAPIMetadata(pid);
|
||||||
|
@ -231,17 +239,7 @@ public class CitationHarvesterImpl {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*if (jsondata==null || jsondata.length()==0) {
|
|
||||||
System.out.println("getMetaMetadata ("+pid+")");
|
|
||||||
jsondata=heup.getMetaMetadata(pid);
|
|
||||||
}*/
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*if (jsondata==null || jsondata.length()==0) {
|
|
||||||
System.out.println("getLinkMetadata ("+pid+")");
|
|
||||||
jsondata= heup.getLinkMetadata(pid);
|
|
||||||
}*/
|
|
||||||
|
|
||||||
if (jsondata!=null && jsondata.has("properties")){
|
if (jsondata!=null && jsondata.has("properties")){
|
||||||
try {
|
try {
|
||||||
|
@ -345,5 +343,17 @@ public class CitationHarvesterImpl {
|
||||||
|
|
||||||
return jsondata;
|
return jsondata;
|
||||||
}
|
}
|
||||||
|
private String[] extractURL(String text) {
|
||||||
|
List<String> list = new ArrayList<String>();
|
||||||
|
Pattern pattern = Pattern
|
||||||
|
.compile(
|
||||||
|
"(http://|https://){1}[\\w\\.\\-/:\\#\\?\\=\\&\\;\\%\\~\\+]+",
|
||||||
|
Pattern.CASE_INSENSITIVE);
|
||||||
|
Matcher matcher = pattern.matcher(text);
|
||||||
|
while (matcher.find()) {
|
||||||
|
list.add(matcher.group());
|
||||||
|
}
|
||||||
|
return list.toArray(new String[list.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue