Mercurial > hg > eSciDocImport
changeset 2:fab8e78184fa
minor
author | dwinter |
---|---|
date | Mon, 10 Jan 2011 12:42:27 +0100 |
parents | 6b0267cb40ed |
children | 58b52df9763c |
files | src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java |
diffstat | 5 files changed, 223 insertions(+), 112 deletions(-) [+] |
line wrap: on
line diff
--- a/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java Fri Nov 26 09:09:25 2010 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java Mon Jan 10 12:42:27 2011 +0100 @@ -236,7 +236,7 @@ } } - logger.debug("executing request:"+httpBase.getRequestLine()); + //logger.debug("executing request:"+httpBase.getRequestLine()); HttpResponse status = httpclient.execute(httpBase); @@ -510,7 +510,7 @@ public HttpResponse submitAnObject(eSciDocXmlObject obj, String comment) throws ClientProtocolException, IOException, JDOMException { try { - addVersionPid(obj); + //addVersionPid(obj); } catch (Exception e) { // TODO: handle exception } @@ -540,9 +540,13 @@ //String command = context+"/resources/members"; HttpResponse result =eScidocGet(command); + //InputStream text=result.getEntity().getContent(); + //String tmtxt = convertStreamToString(text); + //System.out.println(tmtxt); Document dom = new SAXBuilder().build(result.getEntity().getContent()); XPath xp = EScidocTools.getESciDocXpath(objectXpath); + List<Element> attr = (List<Element>)xp.selectNodes(dom); ArrayList<eSciDocXmlObject> ret = new ArrayList<eSciDocXmlObject>(); for (Element el: attr){ @@ -551,6 +555,28 @@ return ret; } + public Integer getNumberOfHitsFromFilterResult(String command, + String objectXPath, int mode) throws IOException, IllegalStateException, JDOMException { + String query=""; + if(mode==0 | mode==2){ + query="query=%22/properties/version/status%22=pending"; + } else { + query="query=%22/properties/version/status%22=submitted"; + } + + HttpResponse result =eScidocGet(command+"?maximumRecords=1&"+query); + //InputStream text=result.getEntity().getContent(); + //String tmtxt = convertStreamToString(text); + //System.out.println(tmtxt); + Document dom = new SAXBuilder().build(result.getEntity().getContent()); + XPath xp = EScidocTools.getESciDocXpath("//srw:numberOfRecords"); + + Element attr = (Element)xp.selectSingleNode(dom); + + Integer tmpInt = Integer.valueOf(attr.getText()); + return tmpInt; + } + public boolean addVersionPid(eSciDocXmlObject obj) throws ClientProtocolException, IOException, JDOMException{ //HttpResponse ret = eScidocGet(href); //String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); @@ -571,8 +597,11 @@ int code = result.getStatusLine().getStatusCode(); result.getEntity().consumeContent(); - if (code!=200) + if (code!=200){ + if(code==450) + return false; return false; + } return true; @@ -679,4 +708,8 @@ fw.close(); return ret; } + + + + }
--- a/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java Fri Nov 26 09:09:25 2010 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java Mon Jan 10 12:42:27 2011 +0100 @@ -45,7 +45,6 @@ public static XPath getESciDocXpath(String xpath) throws JDOMException{ XPath xpathResources = XPath.newInstance(xpath); - xpathResources.addNamespace("prop",prop); xpathResources.addNamespace("mpiwg",MPIWG);
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Fri Nov 26 09:09:25 2010 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Mon Jan 10 12:42:27 2011 +0100 @@ -3,11 +3,12 @@ import java.io.File; import java.io.FileWriter; import java.io.IOException; +import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; - +import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.log4j.BasicConfigurator; import org.apache.log4j.Level; @@ -26,7 +27,8 @@ import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; public class ESciDocDataHarvester { - + static int MAX_REC=1000; + //static int MAX_REC=5; protected Logger logger = Logger.getRootLogger(); protected Importer importer; protected EScidocBasicHandler connector; @@ -35,134 +37,197 @@ private String echoContext; private Logger addedFile = Logger.getLogger("addedFilesLogger"); private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger"); - - - public ESciDocDataHarvester(Importer importer, Transformer transformer, EScidocBasicHandler connector, String context) throws IOException{ - this.importer=importer; - this.transformer=transformer; - this.connector=connector; - this.tools=new EScidocTools(connector); - this.echoContext= context; - - + + public ESciDocDataHarvester(Importer importer, Transformer transformer, + EScidocBasicHandler connector, String context) throws IOException { + this.importer = importer; + this.transformer = transformer; + this.connector = connector; + this.tools = new EScidocTools(connector); + this.echoContext = context; + } - public Boolean readObjectsFromInstance(String type) throws Exception{ + + public Boolean readObjectsFromInstance(String type) throws Exception { ArrayList<String> addedObjects = new ArrayList<String>(); ArrayList<String> notAddedObjects = new ArrayList<String>(); - for (ECHOObject obj: importer.getObjectList(type)){ - - - if (ECHORessource.class.isInstance(obj)){ - try { - if (connector.alreadyExists("/md-records/md-record/admin/archivePath",((ECHORessource)obj).archivePath,echoContext)){ - logger.debug("already exist:"+((ECHORessource)obj).archivePath); + for (ECHOObject obj : importer.getObjectList(type)) { + + if (ECHORessource.class.isInstance(obj)) { + try { + if (connector.alreadyExists( + "/md-records/md-record/admin/archivePath", + ((ECHORessource) obj).archivePath, echoContext)) { + logger.debug("already exist:" + + ((ECHORessource) obj).archivePath); + continue; + } + } catch (Exception e) { + logger.debug("already exist error"); + e.printStackTrace(); continue; } - } catch (Exception e) { - logger.debug("already exist error"); - e.printStackTrace(); - continue; } - } - - obj.context=echoContext; - - String contid=connector.getIDfromPID(obj.pid,echoContext); - if (contid!=null){ - System.out.println("------- belongsTo:"+contid); + + obj.context = echoContext; + + String contid = connector.getIDfromPID(obj.pid, echoContext); + if (contid != null) { + System.out.println("------- belongsTo:" + contid); } else { - - eSciDocXmlObject escidocItem = transformer.transform(obj); - logger.info(escidocItem.printXML()); - // TODO write PID to back to echo-obj - Boolean result = connector.createItem(escidocItem); - if (result){ - addedObjects.add(escidocItem.getESciDocId()); - addedFile.debug(escidocItem.getESciDocId()+"\n"); - //addedFile.write(escidocItem.getESciDocId()+"\n"); - //addedFile.flush(); - - }else { - notAddedObjects.add(obj.echoUrl); - notAddedFile.debug(obj.echoUrl); - //notAddedFile.write(obj.echoUrl+"\n"); - //notAddedFile.flush(); - } - //if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){ - // logger.info("PID already exists:"+obj); - //} else if (result == ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){ - // logger.info("Object with reference to the same digital object already exists:"+obj); - //} - + + eSciDocXmlObject escidocItem = transformer.transform(obj); + logger.info(escidocItem.printXML()); + // TODO write PID to back to echo-obj + Boolean result = connector.createItem(escidocItem); + if (result) { + addedObjects.add(escidocItem.getESciDocId()); + addedFile.debug(escidocItem.getESciDocId() + "\n"); + // addedFile.write(escidocItem.getESciDocId()+"\n"); + // addedFile.flush(); + + } else { + notAddedObjects.add(obj.echoUrl); + notAddedFile.debug(obj.echoUrl); + // notAddedFile.write(obj.echoUrl+"\n"); + // notAddedFile.flush(); + } + // if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){ + // logger.info("PID already exists:"+obj); + // } else if (result == + // ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){ + // logger.info("Object with reference to the same digital object already exists:"+obj); + // } + } } - if(logger.getLevel()==Level.DEBUG){ - for (String addedObject:addedObjects){ + if (logger.getLevel() == Level.DEBUG) { + for (String addedObject : addedObjects) { logger.debug(addedObject); } } - -// File outFile = new File("/tmp/import.out"); -// FileWriter fw = new FileWriter(outFile); -// for (String addedObject:addedObjects){ -// fw.write(addedObject+"\n"); -// } -// for (String addedObject:notAddedObjects){ -// fw.write(addedObject+"\n"); -// } -// fw.close(); + + // File outFile = new File("/tmp/import.out"); + // FileWriter fw = new FileWriter(outFile); + // for (String addedObject:addedObjects){ + // fw.write(addedObject+"\n"); + // } + // for (String addedObject:notAddedObjects){ + // fw.write(addedObject+"\n"); + // } + // fw.close(); return true; } - public void releaseAndSubmitObjects(String command,String objectXPath) throws Exception{ - for (eSciDocXmlObject obj: connector.getObjectListFromFilterResult(command,objectXPath)){ - HttpResponse res = connector.submitAnObject(obj,"first release"); - logger.debug(res.getStatusLine()); - if (res.getStatusLine().getStatusCode()!=200){ - logger.debug("Can not submit:"+obj.getESciDocId()); - //res.getEntity().consumeContent(); // necessary to release the conneciton - - } - res.getEntity().consumeContent(); // necessary to release the conneciton - - if (!connector.upDateObject(obj)){ - logger.debug("Can not update:"+obj.getESciDocId()); - //continue; + /** + * @param command + * @param objectXPath + * @param mode 0 : only submit, 1:only release, 2:release and submit + * @throws Exception + */ + public void releaseAndSubmitObjects(String command, String objectXPath,int mode) + throws Exception { + + Integer numberOfHits = connector.getNumberOfHitsFromFilterResult( + command, objectXPath,mode); + + + int tausend = ((numberOfHits-1) / MAX_REC); + + String queryRestrict=""; + if(mode==0 | mode==2){ + queryRestrict="query=%22/properties/version/status%22=pending"; + } else { + queryRestrict="query=%22/properties/version/status%22=submitted"; + } + + for (int t = 0; t <= tausend; t++) { + int start = t * MAX_REC+1; + // int max=Math.min((t+1)*1000, numberOfHits); + String query = "?maximumRecords="+String.valueOf(MAX_REC)+"&startRecord=" + + String.valueOf(start)+"&"+queryRestrict; + for (eSciDocXmlObject obj : connector + .getObjectListFromFilterResult(command+query, objectXPath)) { + //TODO is the following really necessary, currently the obj in the list is sometimes not the current one. + try{ + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + logger.debug("Can not retrieve:" + obj.getESciDocId()); + continue; + } + } catch (Exception e){ + logger.debug("Can not retrieve:" + obj.getESciDocId()); + continue; + } + if (mode==0 | mode==2){ + HttpResponse res = connector.submitAnObject(obj, + "first release"); + logger.debug(res.getStatusLine()); + + if (res.getStatusLine().getStatusCode() != 200) { + logger.debug("Can not submit:" + obj.getESciDocId()); + // res.getEntity().consumeContent(); // necessary to release + // the conneciton + + } + InputStream restream = res.getEntity().getContent(); + logger.debug(EScidocBasicHandler.convertStreamToString(restream)); + //res.getEntity().consumeContent(); // necessary to release the + // conneciton + + if (!connector.upDateObject(obj)) { + logger.debug("Can not update:" + obj.getESciDocId()); + // continue; + + } + } + + if (mode==1 | mode==2){ + HttpResponse res = connector.releaseAnObject(obj, "first release"); + logger.debug(res.getStatusLine()); + if (res.getStatusLine().getStatusCode() != 200) { + logger.debug("Can not release:" + obj.getESciDocId()); + res.getEntity().consumeContent(); // necessary to release + // the conneciton + continue; + } + addedFile.debug("RELEASED:" + obj.getESciDocId()); + res.getEntity().consumeContent(); // necessary to release the + // connecito + } } - - - res = connector.releaseAnObject(obj, "first release"); - logger.debug(res.getStatusLine()); - if (res.getStatusLine().getStatusCode()!=200){ - logger.debug("Can not release:"+obj.getESciDocId()); - res.getEntity().consumeContent(); // necessary to release the conneciton - continue; - } - addedFile.debug("RELEASED:"+obj.getESciDocId()); - res.getEntity().consumeContent(); // necessary to release the conneciton } - + } - public static void main(String[] args) throws Exception{ - + + public static void main(String[] args) throws Exception { + Logger rl = Logger.getRootLogger(); DOMConfigurator.configure("log4uconf.xml"); - rl.setLevel(Level.DEBUG); + rl.setLevel(Level.DEBUG); - - EScidocBasicHandler connector = new EScidocBasicHandler("192.168.56.2",8080,"dwinter","weikiki7"); - ECHOImporter newimporter = new ECHOImporter(new URL("file:///Users/dwinter/libcoll.rdf")); - ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter, - new ECHOTransformer(),connector,"/ir/context/escidoc:40001"); - - //hv.readObjectsFromInstance("ECHO_collection"); - //hv.readObjectsFromInstance("ECHO_resource"); - - hv.releaseAndSubmitObjects("/ir/context/escidoc:40001/resources/members","//escidocItem:item"); + + EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7"); -// newimporter.organizeRessourcesInCollections(connector, "/ir/context/escidoc:1001"); - //hv.releaseAndSubmitObjects("/ir/containers","//container:container"); + ECHOImporter newimporter = new ECHOImporter(new URL( + "file:///Users/dwinter/libcoll.rdf")); + ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter, + new ECHOTransformer(), connector, "/ir/context/escidoc:1001"); + + // hv.readObjectsFromInstance("ECHO_collection"); + // hv.readObjectsFromInstance("ECHO_resource"); + + hv.releaseAndSubmitObjects( + "/ir/context/escidoc:1001/resources/members", + "//escidocItem:item",1); + + // newimporter.organizeRessourcesInCollections(connector, + // "/ir/context/escidoc:1001"); + // hv.releaseAndSubmitObjects("/ir/containers","//container:container"); } }
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java Fri Nov 26 09:09:25 2010 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java Mon Jan 10 12:42:27 2011 +0100 @@ -41,9 +41,10 @@ new ECHOTransformer(),connector,"/ir/context/escidoc:12001"); //hv.readObjectsFromInstance("ECHO_collection"); - hv.readObjectsFromInstance("ECHO_resource"); + //hv.readObjectsFromInstance("ECHO_resource"); //hv.releaseAndSubmitObjects("/ir/context/escidoc:12001"); + hv.releaseAndSubmitObjects("/ir/context/escidoc:12001/resources/members","//escidocItem:item",1); }
--- a/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java Fri Nov 26 09:09:25 2010 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java Mon Jan 10 12:42:27 2011 +0100 @@ -85,6 +85,19 @@ this.pid= test.getTextTrim(); } + public eSciDocXmlObject(InputStream content) throws JDOMException, IOException { + SAXBuilder builder = new SAXBuilder(); + + dom = builder.build(content); + + xpath = EScidocTools.getESciDocXpath("//prop:pid"); + + Element test = (Element) xpath.selectSingleNode(dom); + + this.pid= test.getTextTrim(); + + } + public void insertDC(Element mdDc) throws Exception{ //NodeList dcList = mdDc.getFirstChild().getChildNodes(); XPath dcXPath = EScidocTools.getESciDocXpath("//dc-md"); //todo check path to dc in escddoc templates