Mercurial > hg > eSciDocImport
view src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java @ 0:c6929e63b0b8
first import
author | dwinter |
---|---|
date | Wed, 24 Nov 2010 16:52:07 +0100 |
parents | |
children | fab8e78184fa |
line wrap: on
line source
package de.mpiwg.itgroup.eSciDoc.harvesting; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import org.apache.http.HttpResponse; import org.apache.log4j.BasicConfigurator; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.xml.DOMConfigurator; import org.jdom.JDOMException; import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter; import de.mpiwg.itgroup.eSciDoc.importer.Importer; import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer; import de.mpiwg.itgroup.eSciDoc.transformer.Transformer; import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; public class ESciDocDataHarvester { protected Logger logger = Logger.getRootLogger(); protected Importer importer; protected EScidocBasicHandler connector; protected Transformer transformer; private EScidocTools tools; private String echoContext; private Logger addedFile = Logger.getLogger("addedFilesLogger"); private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger"); public ESciDocDataHarvester(Importer importer, Transformer transformer, EScidocBasicHandler connector, String context) throws IOException{ this.importer=importer; this.transformer=transformer; this.connector=connector; this.tools=new EScidocTools(connector); this.echoContext= context; } public Boolean readObjectsFromInstance(String type) throws Exception{ ArrayList<String> addedObjects = new ArrayList<String>(); ArrayList<String> notAddedObjects = new ArrayList<String>(); for (ECHOObject obj: importer.getObjectList(type)){ if (ECHORessource.class.isInstance(obj)){ try { if (connector.alreadyExists("/md-records/md-record/admin/archivePath",((ECHORessource)obj).archivePath,echoContext)){ logger.debug("already exist:"+((ECHORessource)obj).archivePath); continue; } } catch (Exception e) { logger.debug("already exist error"); e.printStackTrace(); continue; } } obj.context=echoContext; String contid=connector.getIDfromPID(obj.pid,echoContext); if (contid!=null){ System.out.println("------- belongsTo:"+contid); } else { eSciDocXmlObject escidocItem = transformer.transform(obj); logger.info(escidocItem.printXML()); // TODO write PID to back to echo-obj Boolean result = connector.createItem(escidocItem); if (result){ addedObjects.add(escidocItem.getESciDocId()); addedFile.debug(escidocItem.getESciDocId()+"\n"); //addedFile.write(escidocItem.getESciDocId()+"\n"); //addedFile.flush(); }else { notAddedObjects.add(obj.echoUrl); notAddedFile.debug(obj.echoUrl); //notAddedFile.write(obj.echoUrl+"\n"); //notAddedFile.flush(); } //if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){ // logger.info("PID already exists:"+obj); //} else if (result == ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){ // logger.info("Object with reference to the same digital object already exists:"+obj); //} } } if(logger.getLevel()==Level.DEBUG){ for (String addedObject:addedObjects){ logger.debug(addedObject); } } // File outFile = new File("/tmp/import.out"); // FileWriter fw = new FileWriter(outFile); // for (String addedObject:addedObjects){ // fw.write(addedObject+"\n"); // } // for (String addedObject:notAddedObjects){ // fw.write(addedObject+"\n"); // } // fw.close(); return true; } public void releaseAndSubmitObjects(String command,String objectXPath) throws Exception{ for (eSciDocXmlObject obj: connector.getObjectListFromFilterResult(command,objectXPath)){ HttpResponse res = connector.submitAnObject(obj,"first release"); logger.debug(res.getStatusLine()); if (res.getStatusLine().getStatusCode()!=200){ logger.debug("Can not submit:"+obj.getESciDocId()); //res.getEntity().consumeContent(); // necessary to release the conneciton } res.getEntity().consumeContent(); // necessary to release the conneciton if (!connector.upDateObject(obj)){ logger.debug("Can not update:"+obj.getESciDocId()); //continue; } res = connector.releaseAnObject(obj, "first release"); logger.debug(res.getStatusLine()); if (res.getStatusLine().getStatusCode()!=200){ logger.debug("Can not release:"+obj.getESciDocId()); res.getEntity().consumeContent(); // necessary to release the conneciton continue; } addedFile.debug("RELEASED:"+obj.getESciDocId()); res.getEntity().consumeContent(); // necessary to release the conneciton } } public static void main(String[] args) throws Exception{ Logger rl = Logger.getRootLogger(); DOMConfigurator.configure("log4uconf.xml"); rl.setLevel(Level.DEBUG); EScidocBasicHandler connector = new EScidocBasicHandler("192.168.56.2",8080,"dwinter","weikiki7"); ECHOImporter newimporter = new ECHOImporter(new URL("file:///Users/dwinter/libcoll.rdf")); ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter, new ECHOTransformer(),connector,"/ir/context/escidoc:40001"); //hv.readObjectsFromInstance("ECHO_collection"); //hv.readObjectsFromInstance("ECHO_resource"); hv.releaseAndSubmitObjects("/ir/context/escidoc:40001/resources/members","//escidocItem:item"); // newimporter.organizeRessourcesInCollections(connector, "/ir/context/escidoc:1001"); //hv.releaseAndSubmitObjects("/ir/containers","//container:container"); } }