Mercurial > hg > eSciDocImport
diff src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java @ 3:58b52df9763c
added update functionality if index.meta has changed
author | dwinter |
---|---|
date | Wed, 12 Jan 2011 11:00:14 +0100 |
parents | fab8e78184fa |
children | cb5668b07bfc |
line wrap: on
line diff
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Mon Jan 10 12:42:27 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Wed Jan 12 11:00:14 2011 +0100 @@ -1,16 +1,14 @@ package de.mpiwg.itgroup.eSciDoc.harvesting; -import java.io.File; -import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; -import java.net.MalformedURLException; +import java.io.UnsupportedEncodingException; import java.net.URL; import java.util.ArrayList; +import java.util.Map; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; -import org.apache.log4j.BasicConfigurator; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.xml.DOMConfigurator; @@ -20,50 +18,81 @@ import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; +import de.mpiwg.itgroup.eSciDoc.exceptions.ConnectorException; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.exceptions.ObjectNotUniqueError; +import de.mpiwg.itgroup.eSciDoc.exceptions.TransformerException; import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter; import de.mpiwg.itgroup.eSciDoc.importer.Importer; import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer; import de.mpiwg.itgroup.eSciDoc.transformer.Transformer; import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; +/** + * @author dwinter + * Main class for data harveting from index.meta files into eScidoc + */ public class ESciDocDataHarvester { - static int MAX_REC=1000; + static int MAX_REC=1000; // maximale Anzahl von Records die in einem Stueck eingelesen bzw. bearbeitet werden. //static int MAX_REC=5; protected Logger logger = Logger.getRootLogger(); protected Importer importer; protected EScidocBasicHandler connector; protected Transformer transformer; - private EScidocTools tools; + private String echoContext; private Logger addedFile = Logger.getLogger("addedFilesLogger"); private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger"); + /** + * @param importer Importer for dataObjects, describes how to access the objects + * @param transformer Transformer, generates the eScidocMetaDatasets + * @param connector connects to the eScidocRepository + * @param context Escidoc context path z.b. /ir/context/escidoc:12001 + */ public ESciDocDataHarvester(Importer importer, Transformer transformer, - EScidocBasicHandler connector, String context) throws IOException { + EScidocBasicHandler connector, String context) { this.importer = importer; this.transformer = transformer; this.connector = connector; - this.tools = new EScidocTools(connector); + //this.tools = new EScidocTools(connector); this.echoContext = context; } - public Boolean readObjectsFromInstance(String type) throws Exception { + /** Read objects into eScidoc or updates the objects if indexMeta has changed. + * @param type restrict the imported objects to a specific type, possible types should be defined in + * the given importer @see {@link #importer} + * @return + * @throws ConnectorException + * @throws TransformerException + * @throws ESciDocXmlObjectException + */ + public Boolean readObjectsFromInstance(String type) throws ConnectorException, TransformerException, ESciDocXmlObjectException { ArrayList<String> addedObjects = new ArrayList<String>(); ArrayList<String> notAddedObjects = new ArrayList<String>(); for (ECHOObject obj : importer.getObjectList(type)) { if (ECHORessource.class.isInstance(obj)) { try { - if (connector.alreadyExists( - "/md-records/md-record/admin/archivePath", - ((ECHORessource) obj).archivePath, echoContext)) { + ECHOObject old; + try { + old = connector.alreadyExists( + "/md-records/md-record/admin/archivePath", + ((ECHORessource) obj).archivePath, echoContext); + } catch (ObjectNotUniqueError e) { + // TODO Auto-generated catch block + e.printStackTrace(); + continue; + } + if (old!=null) { logger.debug("already exist:" + ((ECHORessource) obj).archivePath); + handleExistingObject(obj,old); continue; } - } catch (Exception e) { - logger.debug("already exist error"); + } catch (ConnectorException e) { + logger.debug("already exist error:"); e.printStackTrace(); continue; } @@ -77,28 +106,32 @@ } else { eSciDocXmlObject escidocItem = transformer.transform(obj); - logger.info(escidocItem.printXML()); - // TODO write PID to back to echo-obj - Boolean result = connector.createItem(escidocItem); - if (result) { - addedObjects.add(escidocItem.getESciDocId()); - addedFile.debug(escidocItem.getESciDocId() + "\n"); - // addedFile.write(escidocItem.getESciDocId()+"\n"); - // addedFile.flush(); + + + try { + logger.info(escidocItem.printXML()); + // TODO write PID to back to echo-obj + Boolean result = connector.createItem(escidocItem); + if (result) { + addedObjects.add(escidocItem.getESciDocId()); + addedFile.debug(escidocItem.getESciDocId() + "\n"); - } else { - notAddedObjects.add(obj.echoUrl); - notAddedFile.debug(obj.echoUrl); - // notAddedFile.write(obj.echoUrl+"\n"); - // notAddedFile.flush(); - } - // if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){ - // logger.info("PID already exists:"+obj); - // } else if (result == - // ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){ - // logger.info("Object with reference to the same digital object already exists:"+obj); - // } - + } else { + notAddedObjects.add(obj.echoUrl); + notAddedFile.debug(obj.echoUrl); + + } + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } + } } if (logger.getLevel() == Level.DEBUG) { @@ -107,18 +140,61 @@ } } - // File outFile = new File("/tmp/import.out"); - // FileWriter fw = new FileWriter(outFile); - // for (String addedObject:addedObjects){ - // fw.write(addedObject+"\n"); - // } - // for (String addedObject:notAddedObjects){ - // fw.write(addedObject+"\n"); - // } - // fw.close(); return true; } + + + + + /** + * Deal with existing objects, do nothing if md5 of stored metadata and metadata on the server is the same otherwise call {@link #updateObject(ECHOObject)}. + * @param objNew + * @param old + * @throws TransformerException + * @throws ESciDocXmlObjectException + */ + private void handleExistingObject(ECHOObject objNew, ECHOObject old) throws TransformerException, ESciDocXmlObjectException { + ECHORessource objNewRes = (ECHORessource)objNew; + ECHORessource objOldRes = (ECHORessource)old; + String md5onServer = objNewRes.getIndexMetaMD5onServer(); + String md5=objOldRes.getIndexMetaMD5stored(); + if (md5onServer.equals(md5)) + return; + else { + updateObject(objNew, old); + } + + } + + private void updateObject(ECHOObject objNew, ECHOObject objOld) throws TransformerException, ESciDocXmlObjectException { + objNew.context = echoContext; + eSciDocXmlObject escidocItem = transformer.transform(objNew); + String lastModificationDateOld = objOld.lastModificationDate; + escidocItem.setLastModificationDate(lastModificationDateOld); + try { + HttpResponse ret = connector.eScidocPut(objOld.eScidocId, EScidocBasicHandler.convertStringToStream(escidocItem.printXML())); + HttpEntity ent = ret.getEntity(); + if (ret.getStatusLine().getStatusCode() != 200) { + logger.debug("Can not update:" + objOld.eScidocId); + // res.getEntity().consumeContent(); // necessary to release + // the conneciton + ent.consumeContent(); + + } + InputStream restream = ret.getEntity().getContent(); + logger.debug(EScidocBasicHandler.convertStreamToString(restream)); + } catch (UnsupportedEncodingException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + + } + /** * @param command * @param objectXPath