Mercurial > hg > eSciDocImport
view src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java @ 2:fab8e78184fa
minor
author | dwinter |
---|---|
date | Mon, 10 Jan 2011 12:42:27 +0100 |
parents | c6929e63b0b8 |
children | 58b52df9763c |
line wrap: on
line source
package de.mpiwg.itgroup.eSciDoc.harvesting; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.log4j.BasicConfigurator; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.xml.DOMConfigurator; import org.jdom.JDOMException; import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter; import de.mpiwg.itgroup.eSciDoc.importer.Importer; import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer; import de.mpiwg.itgroup.eSciDoc.transformer.Transformer; import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; public class ESciDocDataHarvester { static int MAX_REC=1000; //static int MAX_REC=5; protected Logger logger = Logger.getRootLogger(); protected Importer importer; protected EScidocBasicHandler connector; protected Transformer transformer; private EScidocTools tools; private String echoContext; private Logger addedFile = Logger.getLogger("addedFilesLogger"); private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger"); public ESciDocDataHarvester(Importer importer, Transformer transformer, EScidocBasicHandler connector, String context) throws IOException { this.importer = importer; this.transformer = transformer; this.connector = connector; this.tools = new EScidocTools(connector); this.echoContext = context; } public Boolean readObjectsFromInstance(String type) throws Exception { ArrayList<String> addedObjects = new ArrayList<String>(); ArrayList<String> notAddedObjects = new ArrayList<String>(); for (ECHOObject obj : importer.getObjectList(type)) { if (ECHORessource.class.isInstance(obj)) { try { if (connector.alreadyExists( "/md-records/md-record/admin/archivePath", ((ECHORessource) obj).archivePath, echoContext)) { logger.debug("already exist:" + ((ECHORessource) obj).archivePath); continue; } } catch (Exception e) { logger.debug("already exist error"); e.printStackTrace(); continue; } } obj.context = echoContext; String contid = connector.getIDfromPID(obj.pid, echoContext); if (contid != null) { System.out.println("------- belongsTo:" + contid); } else { eSciDocXmlObject escidocItem = transformer.transform(obj); logger.info(escidocItem.printXML()); // TODO write PID to back to echo-obj Boolean result = connector.createItem(escidocItem); if (result) { addedObjects.add(escidocItem.getESciDocId()); addedFile.debug(escidocItem.getESciDocId() + "\n"); // addedFile.write(escidocItem.getESciDocId()+"\n"); // addedFile.flush(); } else { notAddedObjects.add(obj.echoUrl); notAddedFile.debug(obj.echoUrl); // notAddedFile.write(obj.echoUrl+"\n"); // notAddedFile.flush(); } // if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){ // logger.info("PID already exists:"+obj); // } else if (result == // ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){ // logger.info("Object with reference to the same digital object already exists:"+obj); // } } } if (logger.getLevel() == Level.DEBUG) { for (String addedObject : addedObjects) { logger.debug(addedObject); } } // File outFile = new File("/tmp/import.out"); // FileWriter fw = new FileWriter(outFile); // for (String addedObject:addedObjects){ // fw.write(addedObject+"\n"); // } // for (String addedObject:notAddedObjects){ // fw.write(addedObject+"\n"); // } // fw.close(); return true; } /** * @param command * @param objectXPath * @param mode 0 : only submit, 1:only release, 2:release and submit * @throws Exception */ public void releaseAndSubmitObjects(String command, String objectXPath,int mode) throws Exception { Integer numberOfHits = connector.getNumberOfHitsFromFilterResult( command, objectXPath,mode); int tausend = ((numberOfHits-1) / MAX_REC); String queryRestrict=""; if(mode==0 | mode==2){ queryRestrict="query=%22/properties/version/status%22=pending"; } else { queryRestrict="query=%22/properties/version/status%22=submitted"; } for (int t = 0; t <= tausend; t++) { int start = t * MAX_REC+1; // int max=Math.min((t+1)*1000, numberOfHits); String query = "?maximumRecords="+String.valueOf(MAX_REC)+"&startRecord=" + String.valueOf(start)+"&"+queryRestrict; for (eSciDocXmlObject obj : connector .getObjectListFromFilterResult(command+query, objectXPath)) { //TODO is the following really necessary, currently the obj in the list is sometimes not the current one. try{ HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); HttpEntity ent = resObj.getEntity(); if (ent!=null){ obj= new eSciDocXmlObject(ent.getContent()); } else { logger.debug("Can not retrieve:" + obj.getESciDocId()); continue; } } catch (Exception e){ logger.debug("Can not retrieve:" + obj.getESciDocId()); continue; } if (mode==0 | mode==2){ HttpResponse res = connector.submitAnObject(obj, "first release"); logger.debug(res.getStatusLine()); if (res.getStatusLine().getStatusCode() != 200) { logger.debug("Can not submit:" + obj.getESciDocId()); // res.getEntity().consumeContent(); // necessary to release // the conneciton } InputStream restream = res.getEntity().getContent(); logger.debug(EScidocBasicHandler.convertStreamToString(restream)); //res.getEntity().consumeContent(); // necessary to release the // conneciton if (!connector.upDateObject(obj)) { logger.debug("Can not update:" + obj.getESciDocId()); // continue; } } if (mode==1 | mode==2){ HttpResponse res = connector.releaseAnObject(obj, "first release"); logger.debug(res.getStatusLine()); if (res.getStatusLine().getStatusCode() != 200) { logger.debug("Can not release:" + obj.getESciDocId()); res.getEntity().consumeContent(); // necessary to release // the conneciton continue; } addedFile.debug("RELEASED:" + obj.getESciDocId()); res.getEntity().consumeContent(); // necessary to release the // connecito } } } } public static void main(String[] args) throws Exception { Logger rl = Logger.getRootLogger(); DOMConfigurator.configure("log4uconf.xml"); rl.setLevel(Level.DEBUG); EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7"); ECHOImporter newimporter = new ECHOImporter(new URL( "file:///Users/dwinter/libcoll.rdf")); ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter, new ECHOTransformer(), connector, "/ir/context/escidoc:1001"); // hv.readObjectsFromInstance("ECHO_collection"); // hv.readObjectsFromInstance("ECHO_resource"); hv.releaseAndSubmitObjects( "/ir/context/escidoc:1001/resources/members", "//escidocItem:item",1); // newimporter.organizeRessourcesInCollections(connector, // "/ir/context/escidoc:1001"); // hv.releaseAndSubmitObjects("/ir/containers","//container:container"); } }