Mercurial > hg > eSciDocImport
diff src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java @ 2:fab8e78184fa
minor
author | dwinter |
---|---|
date | Mon, 10 Jan 2011 12:42:27 +0100 |
parents | c6929e63b0b8 |
children | 58b52df9763c |
line wrap: on
line diff
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Fri Nov 26 09:09:25 2010 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Mon Jan 10 12:42:27 2011 +0100 @@ -3,11 +3,12 @@ import java.io.File; import java.io.FileWriter; import java.io.IOException; +import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; - +import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.log4j.BasicConfigurator; import org.apache.log4j.Level; @@ -26,7 +27,8 @@ import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; public class ESciDocDataHarvester { - + static int MAX_REC=1000; + //static int MAX_REC=5; protected Logger logger = Logger.getRootLogger(); protected Importer importer; protected EScidocBasicHandler connector; @@ -35,134 +37,197 @@ private String echoContext; private Logger addedFile = Logger.getLogger("addedFilesLogger"); private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger"); - - - public ESciDocDataHarvester(Importer importer, Transformer transformer, EScidocBasicHandler connector, String context) throws IOException{ - this.importer=importer; - this.transformer=transformer; - this.connector=connector; - this.tools=new EScidocTools(connector); - this.echoContext= context; - - + + public ESciDocDataHarvester(Importer importer, Transformer transformer, + EScidocBasicHandler connector, String context) throws IOException { + this.importer = importer; + this.transformer = transformer; + this.connector = connector; + this.tools = new EScidocTools(connector); + this.echoContext = context; + } - public Boolean readObjectsFromInstance(String type) throws Exception{ + + public Boolean readObjectsFromInstance(String type) throws Exception { ArrayList<String> addedObjects = new ArrayList<String>(); ArrayList<String> notAddedObjects = new ArrayList<String>(); - for (ECHOObject obj: importer.getObjectList(type)){ - - - if (ECHORessource.class.isInstance(obj)){ - try { - if (connector.alreadyExists("/md-records/md-record/admin/archivePath",((ECHORessource)obj).archivePath,echoContext)){ - logger.debug("already exist:"+((ECHORessource)obj).archivePath); + for (ECHOObject obj : importer.getObjectList(type)) { + + if (ECHORessource.class.isInstance(obj)) { + try { + if (connector.alreadyExists( + "/md-records/md-record/admin/archivePath", + ((ECHORessource) obj).archivePath, echoContext)) { + logger.debug("already exist:" + + ((ECHORessource) obj).archivePath); + continue; + } + } catch (Exception e) { + logger.debug("already exist error"); + e.printStackTrace(); continue; } - } catch (Exception e) { - logger.debug("already exist error"); - e.printStackTrace(); - continue; } - } - - obj.context=echoContext; - - String contid=connector.getIDfromPID(obj.pid,echoContext); - if (contid!=null){ - System.out.println("------- belongsTo:"+contid); + + obj.context = echoContext; + + String contid = connector.getIDfromPID(obj.pid, echoContext); + if (contid != null) { + System.out.println("------- belongsTo:" + contid); } else { - - eSciDocXmlObject escidocItem = transformer.transform(obj); - logger.info(escidocItem.printXML()); - // TODO write PID to back to echo-obj - Boolean result = connector.createItem(escidocItem); - if (result){ - addedObjects.add(escidocItem.getESciDocId()); - addedFile.debug(escidocItem.getESciDocId()+"\n"); - //addedFile.write(escidocItem.getESciDocId()+"\n"); - //addedFile.flush(); - - }else { - notAddedObjects.add(obj.echoUrl); - notAddedFile.debug(obj.echoUrl); - //notAddedFile.write(obj.echoUrl+"\n"); - //notAddedFile.flush(); - } - //if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){ - // logger.info("PID already exists:"+obj); - //} else if (result == ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){ - // logger.info("Object with reference to the same digital object already exists:"+obj); - //} - + + eSciDocXmlObject escidocItem = transformer.transform(obj); + logger.info(escidocItem.printXML()); + // TODO write PID to back to echo-obj + Boolean result = connector.createItem(escidocItem); + if (result) { + addedObjects.add(escidocItem.getESciDocId()); + addedFile.debug(escidocItem.getESciDocId() + "\n"); + // addedFile.write(escidocItem.getESciDocId()+"\n"); + // addedFile.flush(); + + } else { + notAddedObjects.add(obj.echoUrl); + notAddedFile.debug(obj.echoUrl); + // notAddedFile.write(obj.echoUrl+"\n"); + // notAddedFile.flush(); + } + // if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){ + // logger.info("PID already exists:"+obj); + // } else if (result == + // ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){ + // logger.info("Object with reference to the same digital object already exists:"+obj); + // } + } } - if(logger.getLevel()==Level.DEBUG){ - for (String addedObject:addedObjects){ + if (logger.getLevel() == Level.DEBUG) { + for (String addedObject : addedObjects) { logger.debug(addedObject); } } - -// File outFile = new File("/tmp/import.out"); -// FileWriter fw = new FileWriter(outFile); -// for (String addedObject:addedObjects){ -// fw.write(addedObject+"\n"); -// } -// for (String addedObject:notAddedObjects){ -// fw.write(addedObject+"\n"); -// } -// fw.close(); + + // File outFile = new File("/tmp/import.out"); + // FileWriter fw = new FileWriter(outFile); + // for (String addedObject:addedObjects){ + // fw.write(addedObject+"\n"); + // } + // for (String addedObject:notAddedObjects){ + // fw.write(addedObject+"\n"); + // } + // fw.close(); return true; } - public void releaseAndSubmitObjects(String command,String objectXPath) throws Exception{ - for (eSciDocXmlObject obj: connector.getObjectListFromFilterResult(command,objectXPath)){ - HttpResponse res = connector.submitAnObject(obj,"first release"); - logger.debug(res.getStatusLine()); - if (res.getStatusLine().getStatusCode()!=200){ - logger.debug("Can not submit:"+obj.getESciDocId()); - //res.getEntity().consumeContent(); // necessary to release the conneciton - - } - res.getEntity().consumeContent(); // necessary to release the conneciton - - if (!connector.upDateObject(obj)){ - logger.debug("Can not update:"+obj.getESciDocId()); - //continue; + /** + * @param command + * @param objectXPath + * @param mode 0 : only submit, 1:only release, 2:release and submit + * @throws Exception + */ + public void releaseAndSubmitObjects(String command, String objectXPath,int mode) + throws Exception { + + Integer numberOfHits = connector.getNumberOfHitsFromFilterResult( + command, objectXPath,mode); + + + int tausend = ((numberOfHits-1) / MAX_REC); + + String queryRestrict=""; + if(mode==0 | mode==2){ + queryRestrict="query=%22/properties/version/status%22=pending"; + } else { + queryRestrict="query=%22/properties/version/status%22=submitted"; + } + + for (int t = 0; t <= tausend; t++) { + int start = t * MAX_REC+1; + // int max=Math.min((t+1)*1000, numberOfHits); + String query = "?maximumRecords="+String.valueOf(MAX_REC)+"&startRecord=" + + String.valueOf(start)+"&"+queryRestrict; + for (eSciDocXmlObject obj : connector + .getObjectListFromFilterResult(command+query, objectXPath)) { + //TODO is the following really necessary, currently the obj in the list is sometimes not the current one. + try{ + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + logger.debug("Can not retrieve:" + obj.getESciDocId()); + continue; + } + } catch (Exception e){ + logger.debug("Can not retrieve:" + obj.getESciDocId()); + continue; + } + if (mode==0 | mode==2){ + HttpResponse res = connector.submitAnObject(obj, + "first release"); + logger.debug(res.getStatusLine()); + + if (res.getStatusLine().getStatusCode() != 200) { + logger.debug("Can not submit:" + obj.getESciDocId()); + // res.getEntity().consumeContent(); // necessary to release + // the conneciton + + } + InputStream restream = res.getEntity().getContent(); + logger.debug(EScidocBasicHandler.convertStreamToString(restream)); + //res.getEntity().consumeContent(); // necessary to release the + // conneciton + + if (!connector.upDateObject(obj)) { + logger.debug("Can not update:" + obj.getESciDocId()); + // continue; + + } + } + + if (mode==1 | mode==2){ + HttpResponse res = connector.releaseAnObject(obj, "first release"); + logger.debug(res.getStatusLine()); + if (res.getStatusLine().getStatusCode() != 200) { + logger.debug("Can not release:" + obj.getESciDocId()); + res.getEntity().consumeContent(); // necessary to release + // the conneciton + continue; + } + addedFile.debug("RELEASED:" + obj.getESciDocId()); + res.getEntity().consumeContent(); // necessary to release the + // connecito + } } - - - res = connector.releaseAnObject(obj, "first release"); - logger.debug(res.getStatusLine()); - if (res.getStatusLine().getStatusCode()!=200){ - logger.debug("Can not release:"+obj.getESciDocId()); - res.getEntity().consumeContent(); // necessary to release the conneciton - continue; - } - addedFile.debug("RELEASED:"+obj.getESciDocId()); - res.getEntity().consumeContent(); // necessary to release the conneciton } - + } - public static void main(String[] args) throws Exception{ - + + public static void main(String[] args) throws Exception { + Logger rl = Logger.getRootLogger(); DOMConfigurator.configure("log4uconf.xml"); - rl.setLevel(Level.DEBUG); + rl.setLevel(Level.DEBUG); - - EScidocBasicHandler connector = new EScidocBasicHandler("192.168.56.2",8080,"dwinter","weikiki7"); - ECHOImporter newimporter = new ECHOImporter(new URL("file:///Users/dwinter/libcoll.rdf")); - ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter, - new ECHOTransformer(),connector,"/ir/context/escidoc:40001"); - - //hv.readObjectsFromInstance("ECHO_collection"); - //hv.readObjectsFromInstance("ECHO_resource"); - - hv.releaseAndSubmitObjects("/ir/context/escidoc:40001/resources/members","//escidocItem:item"); + + EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7"); -// newimporter.organizeRessourcesInCollections(connector, "/ir/context/escidoc:1001"); - //hv.releaseAndSubmitObjects("/ir/containers","//container:container"); + ECHOImporter newimporter = new ECHOImporter(new URL( + "file:///Users/dwinter/libcoll.rdf")); + ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter, + new ECHOTransformer(), connector, "/ir/context/escidoc:1001"); + + // hv.readObjectsFromInstance("ECHO_collection"); + // hv.readObjectsFromInstance("ECHO_resource"); + + hv.releaseAndSubmitObjects( + "/ir/context/escidoc:1001/resources/members", + "//escidocItem:item",1); + + // newimporter.organizeRessourcesInCollections(connector, + // "/ir/context/escidoc:1001"); + // hv.releaseAndSubmitObjects("/ir/containers","//container:container"); } }