Mercurial > hg > eSciDocImport
diff src/de/mpiwg/itgroup/eSciDoc/Tools/Ingestor.java @ 0:c6929e63b0b8
first import
author | dwinter |
---|---|
date | Wed, 24 Nov 2010 16:52:07 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Ingestor.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,521 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.rpc.ServiceException; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.ClientProtocolException; +import org.apache.xmlrpc.XmlRpcException; +import org.apache.xmlrpc.client.XmlRpcClient; +import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; +import org.jdom.Attribute; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.Namespace; +import org.jdom.input.SAXBuilder; +import org.jdom.output.XMLOutputter; +import org.jdom.xpath.XPath; + + + +//import fedora.client.FedoraClient; +//import fedora.server.access.FedoraAPIA; +//import fedora.server.management.FedoraAPIM; + + + +public class Ingestor { + protected EScidocBasicHandler eSciDocHandler = null; + protected String ESCIDOC_SERVER_URL; + protected String ZOPEPROVIDER; + private String USER; + private String PASSWORD; + private int PORT; + + public Ingestor(String SERVER_URL,int ServerPort, String ZOPE, String User, String Password) + { + ESCIDOC_SERVER_URL = SERVER_URL; + ZOPEPROVIDER= ZOPE; + USER = User; + PASSWORD = Password; + PORT = ServerPort; + + } + public String getID() throws MalformedURLException, XmlRpcException { + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + XmlRpcClient client = new XmlRpcClient(); + config.setServerURL(new URL(ZOPEPROVIDER+"/idGenerator")); + client.setConfig(config); + Object[] params = new Object[]{}; + return (String) client.execute("generateId", params); + } + + + + protected EScidocBasicHandler getEsciDocHandler(){ + if (eSciDocHandler==null) + { + eSciDocHandler = new EScidocBasicHandler(ESCIDOC_SERVER_URL,PORT,USER,PASSWORD); + } + return eSciDocHandler; + } + + public String ingest (String command,String xml) throws ServiceException, IOException{ + + //File ff = new File ("/tmp/test3.xml"); + + //FileInputStream stream = new FileInputStream(ff); + InputStream stream = new ByteArrayInputStream(xml.getBytes("utf-8")); + + + + //DefaultHttpClient httpclient = login(); + + + + HttpResponse eScidocPut = getEsciDocHandler().eScidocPut(command, stream); + + + HttpEntity responseEntity = eScidocPut.getEntity(); + + System.out.println("----------------------------------------"); + System.out.println(eScidocPut.getStatusLine()); + + InputStream st = responseEntity.getContent(); + //System.out.println(EScidocBasicHandler.convertStreamToString(st)); + String xmlret = EScidocBasicHandler.convertStreamToString(st); + + return xmlret; + +} + + public void addToCollection(String collection_url,String newMember) throws ClientProtocolException, IOException{ + HttpResponse ret = getEsciDocHandler().eScidocGet("/ir/container/"+collection_url); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + + + String param = "<param last-modification-date=\""+dateStamp+"\">"; + param+="<id>"+newMember+"</id>"; + param+="</param>"; + + ByteArrayInputStream stream = new ByteArrayInputStream(param.getBytes()); + HttpResponse result = getEsciDocHandler().eScidocPost("/ir/container/"+collection_url+"/members/add",stream); + + System.out.println("----->addResult:"+result.getStatusLine()); + + System.out.println("added "+newMember+" to "+collection_url); + + } + + public List<String> getAllMembers(String href) throws ClientProtocolException, IOException, JDOMException + { + String command=href+"/members/filter"; + //String filter="<param><filter name=\"http://escidoc.de/core/01/structural-relations/context\">escidoc:7017</filter></param>"; + String filter="<param><filter/></param>"; + System.out.println(filter); + HttpResponse result = getEsciDocHandler().eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); + + InputStream stream = result.getEntity().getContent(); + //System.out.println(EScidocBasicHandler.convertStreamToString(stream)); + //stream.reset(); + SAXBuilder builder = new SAXBuilder(); + + + Document doc = builder.build(stream); + String pid = null; + XPath xpath = XPath.newInstance("//container:container"); + xpath.addNamespace("container", EScidocNameSpaceContext.container); + + List<Element> nodes = xpath.selectNodes(doc); + + xpath = XPath.newInstance("//item:item"); + xpath.addNamespace("item", EScidocNameSpaceContext.item); + nodes.addAll(nodes = xpath.selectNodes(doc)); + + List<String> ret = new ArrayList<String>(); + + for (Element el : nodes){ + //XMLOutputter output = new XMLOutputter(); + //output.output(el, System.out); + xpath =XPath.newInstance("@xlink:href"); + xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); + + Attribute idAtrr = (Attribute) xpath.selectSingleNode(el); + + String id = idAtrr.getValue(); + + + + ret.add(id); + + } + + return ret; + + + } + public HashMap<String, String> getPIDsAndEscidocIdsOfCollections(String collectionContainer) throws ClientProtocolException, IOException, JDOMException{ + + HashMap<String, String> ret = new HashMap<String, String>(); + String command="/ir/container/"+collectionContainer+"/members/filter"; + //String filter="<param><filter name=\"http://escidoc.de/core/01/structural-relations/context\">escidoc:7017</filter></param>"; + String filter="<param><filter/></param>"; + System.out.println(filter); + HttpResponse result = getEsciDocHandler().eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); + + InputStream stream = result.getEntity().getContent(); + //System.out.println(EScidocBasicHandler.convertStreamToString(stream)); + //stream.reset(); + SAXBuilder builder = new SAXBuilder(); + + + Document doc = builder.build(stream); + String pid = null; + XPath xpath = XPath.newInstance("//container:container"); + xpath.addNamespace("container", EScidocNameSpaceContext.container); + + List<Element> nodes = xpath.selectNodes(doc); + for (Element el : nodes){ + //XMLOutputter output = new XMLOutputter(); + //output.output(el, System.out); + xpath =XPath.newInstance(".//container:properties/prop:pid"); + xpath.addNamespace("prop", EScidocNameSpaceContext.prop); + xpath.addNamespace("container", EScidocNameSpaceContext.container); + Element pidEl =(Element) xpath.selectSingleNode(el); + pid =pidEl.getText(); + + Namespace ns = Namespace.getNamespace("xlink",EScidocNameSpaceContext.xlink); + + xpath =XPath.newInstance("@xlink:href"); + xpath.addNamespace(ns); + + Attribute idAtrr = (Attribute) xpath.selectSingleNode(el); + List attrs = el.getAttributes(); + String id = idAtrr.getValue(); + + String[] splitted = id.split("/"); + + ret.put(pid, splitted[splitted.length-1]); + + } + + return ret; + + + } + protected InputStream getXMLfromPID(String pid,String context) + throws ClientProtocolException, IOException { + String filter = "<param><filter name=\"http://escidoc.de/core/01/properties/pid\">"; + + filter += pid; + filter += "</filter></param>"; + + String command = "/ir/context/" + context + + "/resources/members/filter"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, + new ByteArrayInputStream(filter.getBytes())); + + return result.getEntity().getContent(); + +} + public void deleteAllContentOfCollectionWithPID(String collection_Id, String pid) throws ClientProtocolException, IOException, JDOMException + { + InputStream xmlStream = getXMLfromPID(pid, collection_Id); + SAXBuilder builderA = new SAXBuilder(); + Document docA = builderA.build(xmlStream); + + XPath xpathA = XPath.newInstance("//member-list:member-list/container:container/@xlink:href"); + xpathA.addNamespace("member-list", "http://www.escidoc.de/schemas/memberlist/0.7"); + xpathA.addNamespace("container", EScidocNameSpaceContext.container); + xpathA.addNamespace("xlink", EScidocNameSpaceContext.xlink); + + List <Attribute> nodesA = xpathA.selectNodes(docA); + + + + for (Attribute nodeA: nodesA){ + String id = nodeA.getValue(); + String filter="<param><filter/></param>"; + String command=id+"/members/filter"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); + + xmlStream = result.getEntity().getContent(); + + + XMLOutputter xmlout = new XMLOutputter(); + SAXBuilder builder = new SAXBuilder(); + Document doc = builder.build(xmlStream); + + XPath xpath = XPath.newInstance("//item:item"); + xpath.addNamespace("item", EScidocNameSpaceContext.item); + List <Element> nodes = xpath.selectNodes(doc); + + for (Element node: nodes) + { + xpath = XPath.newInstance("@xlink:href"); + xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); + Attribute attribute = (Attribute) xpath.selectSingleNode(node); + String href = attribute.getValue(); + delete(href); + } + + xpath = XPath.newInstance("//container:container"); + xpath.addNamespace("container", EScidocNameSpaceContext.container); + nodes = xpath.selectNodes(doc); + + for (Element node: nodes) + { + xpath = XPath.newInstance("@xlink:href"); + xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); + Attribute attribute = (Attribute) xpath.selectSingleNode(node); + String href = attribute.getValue(); + removeMembers(href); + delete(href); + } + delete(id); + + } + } + + public void deleteAllContentOfCollection(String collection_Id) throws ClientProtocolException, IOException, JDOMException + { + HashMap<String, String> pids = getPIDsAndEscidocIdsOfCollections(collection_Id); + for (String id: pids.values()){ + String filter="<param><filter/></param>"; + String command="/ir/container/"+id+"/members/filter"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); + + InputStream xmlStream = result.getEntity().getContent(); + + + XMLOutputter xmlout = new XMLOutputter(); + SAXBuilder builder = new SAXBuilder(); + Document doc = builder.build(xmlStream); + + XPath xpath = XPath.newInstance("//item:item"); + xpath.addNamespace("item", EScidocNameSpaceContext.item); + List <Element> nodes = xpath.selectNodes(doc); + + for (Element node: nodes) + { + xpath = XPath.newInstance("@xlink:href"); + xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); + Attribute attribute = (Attribute) xpath.selectSingleNode(node); + String href = attribute.getValue(); + delete(href); + } + + xpath = XPath.newInstance("//container:container"); + xpath.addNamespace("container", EScidocNameSpaceContext.container); + nodes = xpath.selectNodes(doc); + + for (Element node: nodes) + { + xpath = XPath.newInstance("@xlink:href"); + xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); + Attribute attribute = (Attribute) xpath.selectSingleNode(node); + String href = attribute.getValue(); + removeMembers(href); + delete(href); + } + + } + } + + private void removeMembers(String href) throws ClientProtocolException, IOException, JDOMException { + + List<String> cols = getAllMembers(href); + HttpResponse result = getEsciDocHandler().eScidocGet(href); + String ret= EScidocBasicHandler.convertStreamToString(result.getEntity().getContent()); + String ds = getEsciDocHandler().getDateStamp(ret); + + String delString="<param last-modification-date=\""+ds+"\">"; + for (String colid: cols){ + String[] colids = colid.split("/"); + + delString+="<id>"+colids[colids.length-1]+"</id>"; + } + delString += "</param>"; + System.out.println(delString); + InputStream body = new ByteArrayInputStream(delString.getBytes()); + result = getEsciDocHandler().eScidocPost(href+"/members/remove", body); + System.out.println("stat:"+result.getStatusLine()); + try { + System.out.println(EScidocBasicHandler.convertStreamToString(result.getEntity().getContent())); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + private void delete(String href) throws ClientProtocolException, IOException { + System.out.println(href); + HttpResponse result = getEsciDocHandler().eScidocDelete(href); + System.out.println("stat:"+result.getStatusLine()); + try { + System.out.println(EScidocBasicHandler.convertStreamToString(result.getEntity().getContent())); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + public String createMPIWGCollection(String title, String creator,String description,String parentID) throws Exception{ + + // get a PID for the Collection + + String pid = getID(); + eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:"+pid,"MPIWGContainerTemplate.xml"); + + HashMap<String,String> dcs = new HashMap<String,String>(); // Store for the metadata + + //Verbinde dich mit der Collection Ÿber XML-rpc + + + dcs.put("description", description); + dcs.put("title", title); + dcs.put("creator", creator); + obj.insertDC(dcs); + //obj.setRelationship("info:fedora/echo:col1"); + String xml = obj.printXML(); + //System.out.println(xml); + String ret = ingest("/ir/container",xml); + String contid =EScidocBasicHandler.getId(ret); + + if (parentID!=null) + addToCollection(parentID,contid); + + + return contid; + + } + + public String createContentModell() throws ClientProtocolException, IOException{ + File cmf = new File("MPWGContentModelTemplate.xml"); + FileInputStream stream = new FileInputStream(cmf); + + HttpResponse ret = getEsciDocHandler().eScidocPost("/cmm/content-model", stream); + + String retString = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + return EScidocBasicHandler.getId(retString); + + + } + + public HttpResponse submitAnObject(String href,String comment) throws ClientProtocolException, IOException{ + HttpResponse ret = getEsciDocHandler().eScidocGet(href); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + String param = "<param last-modification-date=\""+dateStamp+"\">"; + param+="<comment>"+comment+"</comment>"; + param+="</param>"; + + String command=href+"/submit"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); + + return result; + + + } + public HttpResponse addVersionPid(String href) throws ClientProtocolException, IOException, JDOMException{ + HttpResponse ret = getEsciDocHandler().eScidocGet(href); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + + String pid=EScidocBasicHandler.getPID(retTxt); + String versionNumber = EScidocBasicHandler.getLastVersionNumber(retTxt); + + String vpid=pid+":"+versionNumber; + + String param = "<param last-modification-date=\""+dateStamp+"\">"; + param+="<pid>"+vpid+"</pid>"; + param+="</param>"; + String command=href+"/assign-version-pid"; + + HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); + + return result; + + + } + + public HttpResponse releaseAnObject(String href,String comment) throws ClientProtocolException, IOException{ + HttpResponse ret = getEsciDocHandler().eScidocGet(href); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + String param = "<param last-modification-date=\""+dateStamp+"\">"; + param+="<comment>"+comment+"</comment>"; + param+="</param>"; + + String command=href+"/release"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); + + return result; + + + } + public HttpResponse withdrawAnObject(String href, String comment) throws ClientProtocolException, IOException{ + HttpResponse ret = getEsciDocHandler().eScidocGet(href); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + String param = "<param last-modification-date=\""+dateStamp+"\">"; + param+="<comment>"+comment+"</comment>"; + param+="</param>"; + + String command=href+"/withdraw"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); + + return result; + } + public HttpResponse reviseAnObject(String href, String comment) throws ClientProtocolException, IOException{ + HttpResponse ret = getEsciDocHandler().eScidocGet(href); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + String param = "<param last-modification-date=\""+dateStamp+"\">"; + param+="<comment>"+comment+"</comment>"; + param+="</param>"; + + String command=href+"/revise"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); + + return result; + + } + public String resubmitAnObject(String href, String comment) throws ClientProtocolException, IOException, ServiceException { + HttpResponse ret = getEsciDocHandler().eScidocGet(href); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + System.out.println(retTxt); + retTxt=ingest("/ir/item", retTxt); + System.out.println(retTxt); + return retTxt; + + } + + + } + + +