Mercurial > hg > eSciDocImport
view src/de/mpiwg/itgroup/eSciDoc/Tools/Ingestor.java @ 0:c6929e63b0b8
first import
author | dwinter |
---|---|
date | Wed, 24 Nov 2010 16:52:07 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpiwg.itgroup.eSciDoc.Tools; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.rpc.ServiceException; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.xmlrpc.XmlRpcException; import org.apache.xmlrpc.client.XmlRpcClient; import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.Namespace; import org.jdom.input.SAXBuilder; import org.jdom.output.XMLOutputter; import org.jdom.xpath.XPath; //import fedora.client.FedoraClient; //import fedora.server.access.FedoraAPIA; //import fedora.server.management.FedoraAPIM; public class Ingestor { protected EScidocBasicHandler eSciDocHandler = null; protected String ESCIDOC_SERVER_URL; protected String ZOPEPROVIDER; private String USER; private String PASSWORD; private int PORT; public Ingestor(String SERVER_URL,int ServerPort, String ZOPE, String User, String Password) { ESCIDOC_SERVER_URL = SERVER_URL; ZOPEPROVIDER= ZOPE; USER = User; PASSWORD = Password; PORT = ServerPort; } public String getID() throws MalformedURLException, XmlRpcException { XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); XmlRpcClient client = new XmlRpcClient(); config.setServerURL(new URL(ZOPEPROVIDER+"/idGenerator")); client.setConfig(config); Object[] params = new Object[]{}; return (String) client.execute("generateId", params); } protected EScidocBasicHandler getEsciDocHandler(){ if (eSciDocHandler==null) { eSciDocHandler = new EScidocBasicHandler(ESCIDOC_SERVER_URL,PORT,USER,PASSWORD); } return eSciDocHandler; } public String ingest (String command,String xml) throws ServiceException, IOException{ //File ff = new File ("/tmp/test3.xml"); //FileInputStream stream = new FileInputStream(ff); InputStream stream = new ByteArrayInputStream(xml.getBytes("utf-8")); //DefaultHttpClient httpclient = login(); HttpResponse eScidocPut = getEsciDocHandler().eScidocPut(command, stream); HttpEntity responseEntity = eScidocPut.getEntity(); System.out.println("----------------------------------------"); System.out.println(eScidocPut.getStatusLine()); InputStream st = responseEntity.getContent(); //System.out.println(EScidocBasicHandler.convertStreamToString(st)); String xmlret = EScidocBasicHandler.convertStreamToString(st); return xmlret; } public void addToCollection(String collection_url,String newMember) throws ClientProtocolException, IOException{ HttpResponse ret = getEsciDocHandler().eScidocGet("/ir/container/"+collection_url); String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); String param = "<param last-modification-date=\""+dateStamp+"\">"; param+="<id>"+newMember+"</id>"; param+="</param>"; ByteArrayInputStream stream = new ByteArrayInputStream(param.getBytes()); HttpResponse result = getEsciDocHandler().eScidocPost("/ir/container/"+collection_url+"/members/add",stream); System.out.println("----->addResult:"+result.getStatusLine()); System.out.println("added "+newMember+" to "+collection_url); } public List<String> getAllMembers(String href) throws ClientProtocolException, IOException, JDOMException { String command=href+"/members/filter"; //String filter="<param><filter name=\"http://escidoc.de/core/01/structural-relations/context\">escidoc:7017</filter></param>"; String filter="<param><filter/></param>"; System.out.println(filter); HttpResponse result = getEsciDocHandler().eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); InputStream stream = result.getEntity().getContent(); //System.out.println(EScidocBasicHandler.convertStreamToString(stream)); //stream.reset(); SAXBuilder builder = new SAXBuilder(); Document doc = builder.build(stream); String pid = null; XPath xpath = XPath.newInstance("//container:container"); xpath.addNamespace("container", EScidocNameSpaceContext.container); List<Element> nodes = xpath.selectNodes(doc); xpath = XPath.newInstance("//item:item"); xpath.addNamespace("item", EScidocNameSpaceContext.item); nodes.addAll(nodes = xpath.selectNodes(doc)); List<String> ret = new ArrayList<String>(); for (Element el : nodes){ //XMLOutputter output = new XMLOutputter(); //output.output(el, System.out); xpath =XPath.newInstance("@xlink:href"); xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); Attribute idAtrr = (Attribute) xpath.selectSingleNode(el); String id = idAtrr.getValue(); ret.add(id); } return ret; } public HashMap<String, String> getPIDsAndEscidocIdsOfCollections(String collectionContainer) throws ClientProtocolException, IOException, JDOMException{ HashMap<String, String> ret = new HashMap<String, String>(); String command="/ir/container/"+collectionContainer+"/members/filter"; //String filter="<param><filter name=\"http://escidoc.de/core/01/structural-relations/context\">escidoc:7017</filter></param>"; String filter="<param><filter/></param>"; System.out.println(filter); HttpResponse result = getEsciDocHandler().eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); InputStream stream = result.getEntity().getContent(); //System.out.println(EScidocBasicHandler.convertStreamToString(stream)); //stream.reset(); SAXBuilder builder = new SAXBuilder(); Document doc = builder.build(stream); String pid = null; XPath xpath = XPath.newInstance("//container:container"); xpath.addNamespace("container", EScidocNameSpaceContext.container); List<Element> nodes = xpath.selectNodes(doc); for (Element el : nodes){ //XMLOutputter output = new XMLOutputter(); //output.output(el, System.out); xpath =XPath.newInstance(".//container:properties/prop:pid"); xpath.addNamespace("prop", EScidocNameSpaceContext.prop); xpath.addNamespace("container", EScidocNameSpaceContext.container); Element pidEl =(Element) xpath.selectSingleNode(el); pid =pidEl.getText(); Namespace ns = Namespace.getNamespace("xlink",EScidocNameSpaceContext.xlink); xpath =XPath.newInstance("@xlink:href"); xpath.addNamespace(ns); Attribute idAtrr = (Attribute) xpath.selectSingleNode(el); List attrs = el.getAttributes(); String id = idAtrr.getValue(); String[] splitted = id.split("/"); ret.put(pid, splitted[splitted.length-1]); } return ret; } protected InputStream getXMLfromPID(String pid,String context) throws ClientProtocolException, IOException { String filter = "<param><filter name=\"http://escidoc.de/core/01/properties/pid\">"; filter += pid; filter += "</filter></param>"; String command = "/ir/context/" + context + "/resources/members/filter"; HttpResponse result = getEsciDocHandler().eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); return result.getEntity().getContent(); } public void deleteAllContentOfCollectionWithPID(String collection_Id, String pid) throws ClientProtocolException, IOException, JDOMException { InputStream xmlStream = getXMLfromPID(pid, collection_Id); SAXBuilder builderA = new SAXBuilder(); Document docA = builderA.build(xmlStream); XPath xpathA = XPath.newInstance("//member-list:member-list/container:container/@xlink:href"); xpathA.addNamespace("member-list", "http://www.escidoc.de/schemas/memberlist/0.7"); xpathA.addNamespace("container", EScidocNameSpaceContext.container); xpathA.addNamespace("xlink", EScidocNameSpaceContext.xlink); List <Attribute> nodesA = xpathA.selectNodes(docA); for (Attribute nodeA: nodesA){ String id = nodeA.getValue(); String filter="<param><filter/></param>"; String command=id+"/members/filter"; HttpResponse result = getEsciDocHandler().eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); xmlStream = result.getEntity().getContent(); XMLOutputter xmlout = new XMLOutputter(); SAXBuilder builder = new SAXBuilder(); Document doc = builder.build(xmlStream); XPath xpath = XPath.newInstance("//item:item"); xpath.addNamespace("item", EScidocNameSpaceContext.item); List <Element> nodes = xpath.selectNodes(doc); for (Element node: nodes) { xpath = XPath.newInstance("@xlink:href"); xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); Attribute attribute = (Attribute) xpath.selectSingleNode(node); String href = attribute.getValue(); delete(href); } xpath = XPath.newInstance("//container:container"); xpath.addNamespace("container", EScidocNameSpaceContext.container); nodes = xpath.selectNodes(doc); for (Element node: nodes) { xpath = XPath.newInstance("@xlink:href"); xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); Attribute attribute = (Attribute) xpath.selectSingleNode(node); String href = attribute.getValue(); removeMembers(href); delete(href); } delete(id); } } public void deleteAllContentOfCollection(String collection_Id) throws ClientProtocolException, IOException, JDOMException { HashMap<String, String> pids = getPIDsAndEscidocIdsOfCollections(collection_Id); for (String id: pids.values()){ String filter="<param><filter/></param>"; String command="/ir/container/"+id+"/members/filter"; HttpResponse result = getEsciDocHandler().eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); InputStream xmlStream = result.getEntity().getContent(); XMLOutputter xmlout = new XMLOutputter(); SAXBuilder builder = new SAXBuilder(); Document doc = builder.build(xmlStream); XPath xpath = XPath.newInstance("//item:item"); xpath.addNamespace("item", EScidocNameSpaceContext.item); List <Element> nodes = xpath.selectNodes(doc); for (Element node: nodes) { xpath = XPath.newInstance("@xlink:href"); xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); Attribute attribute = (Attribute) xpath.selectSingleNode(node); String href = attribute.getValue(); delete(href); } xpath = XPath.newInstance("//container:container"); xpath.addNamespace("container", EScidocNameSpaceContext.container); nodes = xpath.selectNodes(doc); for (Element node: nodes) { xpath = XPath.newInstance("@xlink:href"); xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); Attribute attribute = (Attribute) xpath.selectSingleNode(node); String href = attribute.getValue(); removeMembers(href); delete(href); } } } private void removeMembers(String href) throws ClientProtocolException, IOException, JDOMException { List<String> cols = getAllMembers(href); HttpResponse result = getEsciDocHandler().eScidocGet(href); String ret= EScidocBasicHandler.convertStreamToString(result.getEntity().getContent()); String ds = getEsciDocHandler().getDateStamp(ret); String delString="<param last-modification-date=\""+ds+"\">"; for (String colid: cols){ String[] colids = colid.split("/"); delString+="<id>"+colids[colids.length-1]+"</id>"; } delString += "</param>"; System.out.println(delString); InputStream body = new ByteArrayInputStream(delString.getBytes()); result = getEsciDocHandler().eScidocPost(href+"/members/remove", body); System.out.println("stat:"+result.getStatusLine()); try { System.out.println(EScidocBasicHandler.convertStreamToString(result.getEntity().getContent())); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } private void delete(String href) throws ClientProtocolException, IOException { System.out.println(href); HttpResponse result = getEsciDocHandler().eScidocDelete(href); System.out.println("stat:"+result.getStatusLine()); try { System.out.println(EScidocBasicHandler.convertStreamToString(result.getEntity().getContent())); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } public String createMPIWGCollection(String title, String creator,String description,String parentID) throws Exception{ // get a PID for the Collection String pid = getID(); eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:"+pid,"MPIWGContainerTemplate.xml"); HashMap<String,String> dcs = new HashMap<String,String>(); // Store for the metadata //Verbinde dich mit der Collection Ÿber XML-rpc dcs.put("description", description); dcs.put("title", title); dcs.put("creator", creator); obj.insertDC(dcs); //obj.setRelationship("info:fedora/echo:col1"); String xml = obj.printXML(); //System.out.println(xml); String ret = ingest("/ir/container",xml); String contid =EScidocBasicHandler.getId(ret); if (parentID!=null) addToCollection(parentID,contid); return contid; } public String createContentModell() throws ClientProtocolException, IOException{ File cmf = new File("MPWGContentModelTemplate.xml"); FileInputStream stream = new FileInputStream(cmf); HttpResponse ret = getEsciDocHandler().eScidocPost("/cmm/content-model", stream); String retString = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); return EScidocBasicHandler.getId(retString); } public HttpResponse submitAnObject(String href,String comment) throws ClientProtocolException, IOException{ HttpResponse ret = getEsciDocHandler().eScidocGet(href); String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); String param = "<param last-modification-date=\""+dateStamp+"\">"; param+="<comment>"+comment+"</comment>"; param+="</param>"; String command=href+"/submit"; HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); return result; } public HttpResponse addVersionPid(String href) throws ClientProtocolException, IOException, JDOMException{ HttpResponse ret = getEsciDocHandler().eScidocGet(href); String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); String pid=EScidocBasicHandler.getPID(retTxt); String versionNumber = EScidocBasicHandler.getLastVersionNumber(retTxt); String vpid=pid+":"+versionNumber; String param = "<param last-modification-date=\""+dateStamp+"\">"; param+="<pid>"+vpid+"</pid>"; param+="</param>"; String command=href+"/assign-version-pid"; HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); return result; } public HttpResponse releaseAnObject(String href,String comment) throws ClientProtocolException, IOException{ HttpResponse ret = getEsciDocHandler().eScidocGet(href); String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); String param = "<param last-modification-date=\""+dateStamp+"\">"; param+="<comment>"+comment+"</comment>"; param+="</param>"; String command=href+"/release"; HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); return result; } public HttpResponse withdrawAnObject(String href, String comment) throws ClientProtocolException, IOException{ HttpResponse ret = getEsciDocHandler().eScidocGet(href); String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); String param = "<param last-modification-date=\""+dateStamp+"\">"; param+="<comment>"+comment+"</comment>"; param+="</param>"; String command=href+"/withdraw"; HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); return result; } public HttpResponse reviseAnObject(String href, String comment) throws ClientProtocolException, IOException{ HttpResponse ret = getEsciDocHandler().eScidocGet(href); String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); String param = "<param last-modification-date=\""+dateStamp+"\">"; param+="<comment>"+comment+"</comment>"; param+="</param>"; String command=href+"/revise"; HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); return result; } public String resubmitAnObject(String href, String comment) throws ClientProtocolException, IOException, ServiceException { HttpResponse ret = getEsciDocHandler().eScidocGet(href); String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); System.out.println(retTxt); retTxt=ingest("/ir/item", retTxt); System.out.println(retTxt); return retTxt; } }