Mercurial > hg > eSciDocImport
view src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java @ 3:58b52df9763c
added update functionality if index.meta has changed
author | dwinter |
---|---|
date | Wed, 12 Jan 2011 11:00:14 +0100 |
parents | fab8e78184fa |
children | cb5668b07bfc |
line wrap: on
line source
package de.mpiwg.itgroup.eSciDoc.Tools; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URL; import java.net.URLEncoder; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.swing.text.html.HTMLDocument.HTMLReader.IsindexAction; import org.apache.commons.codec.EncoderException; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpDelete; import org.apache.http.client.methods.HttpEntityEnclosingRequestBase; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpPut; import org.apache.http.client.methods.HttpRequestBase; import org.apache.http.client.params.ClientPNames; import org.apache.http.client.params.CookiePolicy; import org.apache.http.entity.InputStreamEntity; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.conn.SingleClientConnManager; import org.apache.http.protocol.BasicHttpContext; import org.apache.http.protocol.HttpContext; import org.apache.log4j.Logger; import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.Text; import org.jdom.input.SAXBuilder; import org.jdom.xpath.XPath; import org.w3c.dom.Node; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; import de.mpiwg.itgroup.eSciDoc.exceptions.ConnectorException; import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; import de.mpiwg.itgroup.eSciDoc.exceptions.ObjectNotUniqueError; import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; /** * Handler to create a connection with an eScidoc server * @author dwinter * */ public class EScidocBasicHandler { private Logger logger; private String eScidocServer; private int eScidocPort; private String user; private String password; public String eScidocUrl; private HttpClient httpclient=null; /** * @param eScidocServer * @param eScidocPort * @param user * @param password */ public EScidocBasicHandler(String eScidocServer, int eScidocPort,String user, String password){ this.eScidocServer=eScidocServer; this.eScidocPort=eScidocPort; this.user=user; this.password=password; this.eScidocUrl="http://"+eScidocServer+":"+String.valueOf(eScidocPort); logger = Logger.getRootLogger(); } /** * Logs you into escidoc and sets the httpclient field to the current client for this session * @return gives you the httpclient for further usage. * @throws HttpException * @throws ClientProtocolException * @throws IOException */ public HttpClient login() throws IOException { httpclient = new DefaultHttpClient(); HttpContext localContext = new BasicHttpContext(); httpclient.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BROWSER_COMPATIBILITY); HttpPost httppost = new HttpPost(eScidocUrl+"/aa/login?target=/"); logger.debug("executing request"); HttpResponse response = httpclient.execute(httppost); //HttpEntity entity = httppost.getRes System.out.println("----------------------------------------"); System.out.println(response.getStatusLine()); HttpEntity entity = response.getEntity(); if (entity != null) { entity.consumeContent(); } HttpGet httpget = new HttpGet(eScidocUrl+"/aa/j_spring_security_check?j_username="+user+"&j_password="+password); response = httpclient.execute(httpget); //entity = response.getEntity(); System.out.println("----------------------------------------"); System.out.println(response.getStatusLine()); entity = response.getEntity(); if (entity != null) { entity.consumeContent(); } //entity.consumeContent(); return httpclient; } /** * Sends a PUT request to the escidoc client. performs a login if not done before. * @param command * @param body * @return * @throws ClientProtocolException * @throws IOException */ public HttpResponse eScidocPut(String command, InputStream body) throws IOException { HttpPut httpput = new HttpPut(eScidocUrl+command); return eScidocRequestBase(httpput,command,body); } /** * Sends a PUT request to the escidoc client. performs a login if not done before. * @param command * @param url * @return * @throws ClientProtocolException * @throws IOException */ public HttpResponse eScidocPut(String command, URL url) throws IOException{ HttpPut httpput = new HttpPut(eScidocUrl+command); return eScidocRequestBase(httpput,command,url.openStream()); } /** * Sends a POST request to the escidoc client. performs a login if not done before. * @param command * @param body * @return * @throws ClientProtocolException * @throws IOException */ public HttpResponse eScidocPost(String command, InputStream body) throws IOException{ HttpPost httppost = new HttpPost(eScidocUrl+command); return eScidocRequestBase(httppost,command,body); } /** Sends a POST request to the escidoc client. performs a login if not done before. * @param command * @param url * @return * @throws ClientProtocolException * @throws IOException */ public HttpResponse eScidocPost(String command, URL url) throws IOException{ HttpPost httppost = new HttpPost(eScidocUrl+command); return eScidocRequestBase(httppost,command,url.openStream()); } /** * Sends a GET request to the escidoc client. performs a login if not done before. * @param command * @return * @throws ClientProtocolException * @throws IOException */ public HttpResponse eScidocGet(String command) throws IOException{ HttpGet httpget = new HttpGet(eScidocUrl+command); return eScidocRequestBase(httpget,command,null); } /** Send a delete command to the escidoc client. performs a login if necessara * @param command * @return * @throws ClientProtocolException * @throws IOException */ public HttpResponse eScidocDelete(String command) throws IOException{ HttpDelete httpdelete = new HttpDelete(eScidocUrl+command); return eScidocRequestBase(httpdelete,command,null); } /** * Generates and sends a general request to escidoc, used by escidocPUT, POST and GET. * @param httpBase * @param command * @param body * @return * @throws ClientProtocolException * @throws IOException */ private HttpResponse eScidocRequestBase(HttpRequestBase httpBase, String command, InputStream body) throws IOException { if (httpclient==null) login(); if (HttpEntityEnclosingRequestBase.class.isInstance(httpBase)){ if (body!=null){ HttpEntity entity = new InputStreamEntity(body, -1); ((HttpEntityEnclosingRequestBase)httpBase).setEntity(entity); } } //logger.debug("executing request:"+httpBase.getRequestLine()); HttpResponse status = httpclient.execute(httpBase); //HttpEntity responseEntity = response.getEntity(); logger.debug("----------------------------------------"); logger.debug(status); return status; } /** * * To convert the InputStream to String we use the BufferedReader.readLine() * method. We iterate until the BufferedReader return null which means * there's no more data to read. Each line will appended to a StringBuilder * and returned as String. * @param is * @return */ public static String convertStreamToString(InputStream is) { BufferedReader reader = new BufferedReader(new InputStreamReader(is)); StringBuilder sb = new StringBuilder(); String line = null; try { while ((line = reader.readLine()) != null) { sb.append(line + "\n"); } } catch (IOException e) { e.printStackTrace(); } finally { try { is.close(); } catch (IOException e) { e.printStackTrace(); } } return sb.toString(); } /** converts a stream to a string * @param string * @return * @throws UnsupportedEncodingException */ public static InputStream convertStringToStream(String string) throws UnsupportedEncodingException{ return new ByteArrayInputStream(string.getBytes("utf-8")); } /** * Find the last-modification-date from an escidoc xml-file (item/container/context) * @param ret * @return */ public static String getDateStamp(String ret) { Pattern p = Pattern.compile("last-modification-date=\"([^\"]*)\""); Matcher m = p.matcher(ret); m.find(); String txt; try { txt = m.group(1); } catch (IllegalStateException e) { e.printStackTrace(); System.out.println(ret); throw new IllegalStateException(); } return txt; } /** * Find the content-modell (item) * @param ret * @return * @throws IOException * @throws JDOMException * @throws UnsupportedEncodingException */ public static String getContentModel(InputStream escidocstream) throws UnsupportedEncodingException, JDOMException, IOException { SAXBuilder builder = new SAXBuilder(); Document doc = builder.build(escidocstream); Element root= doc.getRootElement(); XPath xpath = EScidocTools.getESciDocXpath("escidocItem:properties/srel:content-model/@xlink:href"); Attribute node = (Attribute) xpath.selectSingleNode(root); return node.getValue(); } /** * Find the escidoc:id from an escidoc xml (item/container/context) * @param ret * @return */ public static String getId(String ret) { Pattern p = Pattern.compile("xlink:href=\"([^\"]*)\""); Matcher m = p.matcher(ret); m.find(); String txt = m.group(1); String[] splitted = txt.split("/"); String id = splitted[splitted.length-1]; return id; } /** * Get the pid from an escidoc xml-file (item/container/context) * @param retTxt * @return * @throws JDOMException * @throws IOException */ public static String getPID(String retTxt) throws JDOMException, IOException { SAXBuilder builder = new SAXBuilder(); Document doc = builder.build(EScidocBasicHandler.convertStringToStream(retTxt)); XPath xpath = XPath.newInstance("//prop:pid"); xpath.addNamespace("prop", EScidocTools.prop); Element node = (Element) xpath.selectSingleNode(doc); return node.getTextTrim(); } /** * get the number of the last version from an escidoc xml-file (item/container/context) * @param retTxt * @return * @throws JDOMException * @throws IOException */ public static String getLastVersionNumber(String retTxt) throws JDOMException, IOException { SAXBuilder builder = new SAXBuilder(); Document doc = builder.build(EScidocBasicHandler.convertStringToStream(retTxt)); XPath xpath = XPath.newInstance("//prop:version[@xlink:title='This Version']/version:number"); xpath.addNamespace("prop", EScidocTools.prop); xpath.addNamespace("xlink", EScidocTools.xlink); xpath.addNamespace("version", EScidocTools.version); Element node = (Element) xpath.selectSingleNode(doc); return node.getText(); } public static Object getXPath(Element node, String string,boolean single) throws JDOMException { XPath xpath= XPath.newInstance(string); xpath.addNamespace("dc",EScidocTools.DC); xpath.addNamespace("escidocComponents",EScidocTools.escidocComponents); xpath.addNamespace("prop",EScidocTools.prop); xpath.addNamespace("xlink",EScidocTools.xlink); xpath.addNamespace("mpiwg",EScidocTools.MPIWG); xpath.addNamespace("version",EScidocTools.version); if (single) return xpath.selectSingleNode(node); else return xpath.selectNodes(node); } public Long writeItem(eSciDocXmlObject escidocItem) { // TODO Auto-generated method stub return null; } public Boolean createItem(eSciDocXmlObject escidocItem) { String cmd="/ir/item"; try { String retStr = escidocItem.printXML(); String newObj = createObject(cmd, retStr); return escidocItem.upDateFromXML(newObj); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); return false; } } public String createObject (String command,String xml) throws Exception { InputStream stream = new ByteArrayInputStream(xml.getBytes("utf-8")); //DefaultHttpClient httpclient = login(); HttpResponse eScidocPut = eScidocPut(command, stream); //HttpEntity responseEntity = eScidocPut.getEntity(); System.out.println("----------------------------------------"); System.out.println(eScidocPut.getStatusLine()); int code = eScidocPut.getStatusLine().getStatusCode(); InputStream st = eScidocPut.getEntity().getContent(); //System.out.println(EScidocBasicHandler.convertStreamToString(st)); String xmlret = EScidocBasicHandler.convertStreamToString(st); if (code !=200){ logger.error(xmlret); throw (new Exception("CAN not DO error:"+code)); } return xmlret; } public String getIDfromPID(String pid, String context) throws ConnectorException { String filter = "<param><filter name=\"http://escidoc.de/core/01/properties/pid\">"; filter += pid; filter += "</filter></param>"; String command = context + "/resources/members/filter"; HttpResponse result; try { result = eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); throw new ConnectorException(); } try { Document dom = new SAXBuilder().build(result.getEntity().getContent()); XPath xp = EScidocTools.getESciDocXpath("//escidocItem:item/@xlink:href"); Attribute attr = (Attribute)xp.selectSingleNode(dom); if (attr!=null){ return attr.getValue(); } return null; //return convertStreamToString(result.getEntity().getContent()); } catch (IllegalStateException e) { // TODO Auto-generated catch block e.printStackTrace(); throw new ConnectorException(); } catch (JDOMException e) { // TODO Auto-generated catch block e.printStackTrace(); throw new ConnectorException(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); throw new ConnectorException(); } } public HttpResponse submitAnObject(eSciDocXmlObject obj, String comment) throws ClientProtocolException, IOException, JDOMException { try { //addVersionPid(obj); } catch (Exception e) { // TODO: handle exception } String retTxt = obj.printXML(); String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); String param = "<param last-modification-date=\""+dateStamp+"\">"; param+="<comment>"+comment+"</comment>"; param+="</param>"; String command=obj.getESciDocId()+"/submit"; HttpResponse result = eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); return result; } public List<eSciDocXmlObject> getObjectListFromFilterResult(String command, String objectXpath) throws IOException, IllegalStateException, JDOMException,ESciDocXmlObjectException { //String filter = "<param><filter></filter></param>"; // //String command = context // + "/resources/members/filter"; //HttpResponse result =eScidocPost(command, //new ByteArrayInputStream(filter.getBytes())); //String command = context+"/resources/members"; HttpResponse result =eScidocGet(command); //InputStream text=result.getEntity().getContent(); //String tmtxt = convertStreamToString(text); //System.out.println(tmtxt); Document dom = new SAXBuilder().build(result.getEntity().getContent()); XPath xp = EScidocTools.getESciDocXpath(objectXpath); List<Element> attr = (List<Element>)xp.selectNodes(dom); ArrayList<eSciDocXmlObject> ret = new ArrayList<eSciDocXmlObject>(); for (Element el: attr){ ret.add(new eSciDocXmlObject(el)); } return ret; } public Integer getNumberOfHitsFromFilterResult(String command, String objectXPath, int mode) throws IOException, IllegalStateException, JDOMException { String query=""; if(mode==0 | mode==2){ query="query=%22/properties/version/status%22=pending"; } else { query="query=%22/properties/version/status%22=submitted"; } HttpResponse result =eScidocGet(command+"?maximumRecords=1&"+query); //InputStream text=result.getEntity().getContent(); //String tmtxt = convertStreamToString(text); //System.out.println(tmtxt); Document dom = new SAXBuilder().build(result.getEntity().getContent()); XPath xp = EScidocTools.getESciDocXpath("//srw:numberOfRecords"); Element attr = (Element)xp.selectSingleNode(dom); Integer tmpInt = Integer.valueOf(attr.getText()); return tmpInt; } public boolean addVersionPid(eSciDocXmlObject obj) throws ClientProtocolException, IOException, JDOMException{ //HttpResponse ret = eScidocGet(href); //String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); String retTxt = obj.printXML(); String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); String pid=EScidocBasicHandler.getPID(retTxt); String versionNumber = EScidocBasicHandler.getLastVersionNumber(retTxt); String vpid=pid+":"+versionNumber; String param = "<param last-modification-date=\""+dateStamp+"\">"; param+="<pid>"+vpid+"</pid>"; param+="</param>"; String command=obj.getESciDocId()+"/assign-version-pid"; HttpResponse result = eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); int code = result.getStatusLine().getStatusCode(); result.getEntity().consumeContent(); if (code!=200){ if(code==450) return false; return false; } return true; } public HttpResponse releaseAnObject(eSciDocXmlObject obj, String comment) throws IOException, JDOMException { //HttpResponse ret = getEsciDocHandler().eScidocGet(href); //String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); String retTxt = obj.printXML(); String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); String param = "<param last-modification-date=\""+dateStamp+"\">"; param+="<comment>"+comment+"</comment>"; param+="</param>"; String command=obj.getESciDocId()+"/release"; HttpResponse result = eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); return result; } public boolean upDateObject(eSciDocXmlObject obj) throws Exception { HttpResponse result = eScidocGet(obj.getESciDocId()); if (result.getStatusLine().getStatusCode()!=200){ logger.debug(result.getEntity().getContent()); return false; } String xml = convertStreamToString(result.getEntity().getContent()); obj.upDateFromXML(xml); return true; } public ECHOObject alreadyExists(String indexField, String testString, String context) throws ConnectorException, ObjectNotUniqueError { String[] ct = context.split("/"); // gebraucht wird hier nur die id, dh ohne /ir/... String contextId=ct[ct.length-1]; String searchString = String.format("\"%s\"=\"%s\"",indexField,testString); searchString += " and "+String.format("\"%s\"=\"%s\"","/properties/context/id",contextId); HttpResponse ret; try{ searchString = URLEncoder.encode(searchString,"utf-8"); ret = eScidocGet("/ir/items?operation=searchRetrieve&version=1.1&query="+searchString); } catch (UnsupportedEncodingException e) { throw new ConnectorException(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); throw new ConnectorException(); } if (ret.getStatusLine().getStatusCode()!=200) { logger.debug("alreadyExists: error searchstring:"+searchString); HttpEntity ent = ret.getEntity(); if (ent!=null) { try { ent.consumeContent(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); throw new ConnectorException(); } throw new ConnectorException(); } } try{ Document doc = new SAXBuilder().build(ret.getEntity().getContent()); XPath xp = EScidocTools.getESciDocXpath("//zs:numberOfRecords/text()"); String hitsStr = ((Text)xp.selectSingleNode(doc)).getText(); Integer hits = Integer.valueOf(hitsStr); if (hits>0){ if (hits>1) throw new ObjectNotUniqueError(); return getOldObjectFromESciDoc(doc); } return null; } catch (IOException e) { e.printStackTrace(); throw new ConnectorException(); } catch (IllegalStateException e) { // TODO Auto-generated catch block e.printStackTrace(); throw new ConnectorException(); } catch (JDOMException e) { // TODO Auto-generated catch block e.printStackTrace(); throw new ConnectorException(); } } private ECHOObject getOldObjectFromESciDoc(Document doc) { Map<String,String>retMap = new HashMap<String,String>(); XPath md5Nodes; XPath itemId; XPath lastModificationDate; try { md5Nodes= EScidocTools.getESciDocXpath(".//escidocComponents:component/escidocComponents:properties[prop:content-category[text()='index_meta']]/prop:checksum"); itemId= EScidocTools.getESciDocXpath(".//escidocItem:item/@xlink:href"); lastModificationDate = EScidocTools.getESciDocXpath(".//escidocItem:item/@last-modification-date"); } catch (JDOMException e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } Element node; Attribute idNode; Attribute lastModificationDateNode; try { node = (Element)md5Nodes.selectSingleNode(doc); idNode = (Attribute)itemId.selectSingleNode(doc); lastModificationDateNode =(Attribute)lastModificationDate.selectSingleNode(doc); } catch (JDOMException e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } String md5= node.getTextTrim(); String escidocId= idNode.getValue(); ECHORessource er; try { er = new ECHORessource(); er.eScidocId=escidocId; er.indexMetaMD5stored=md5; er.lastModificationDate= lastModificationDateNode.getValue(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } return er; } public ArrayList<String> getAllLinksOfContext(String string, String context) throws IOException, IllegalStateException, JDOMException { HttpResponse result = eScidocGet(context+"/resources/members"); Document doc = new SAXBuilder().build(result.getEntity().getContent()); XPath xp = EScidocTools.getESciDocXpath("//escidocItem:item"); XPath id = EScidocTools.getESciDocXpath("./@xlink:href"); XPath url= EScidocTools.getESciDocXpath(".//escidocComponents:component[escidocComponents:properties/prop:content-category[text()='"+string+"']]/escidocComponents:content/@xlink:href"); ArrayList<String> ret = new ArrayList<String>(); List<Element> items = xp.selectNodes(doc); File fl = new File("/tmp/linksofcontext.out"); FileWriter fw = new FileWriter(fl); for (Element item: items){ Attribute idAttribute = (Attribute)id.selectSingleNode(item); String idStr = idAttribute.getValue(); Attribute urlAttribute = (Attribute)url.selectSingleNode(item); String urlStr = urlAttribute.getValue(); ret.add(idStr+","+urlStr); logger.debug("getALLLinksOfContex:"+idStr+","+urlStr); fw.write(idStr+","+urlStr+"\n"); } fw.close(); return ret; } }