changeset 3:58b52df9763c

added update functionality if index.meta has changed
author dwinter
date Wed, 12 Jan 2011 11:00:14 +0100
parents fab8e78184fa
children cb5668b07bfc
files .classpath src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java src/de/mpiwg/itgroup/eSciDoc/config/echo.properties src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOObject.java src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java src/de/mpiwg/itgroup/eSciDoc/exceptions/ConnectorException.java src/de/mpiwg/itgroup/eSciDoc/exceptions/ESciDocXmlObjectException.java src/de/mpiwg/itgroup/eSciDoc/exceptions/ObjectNotUniqueError.java src/de/mpiwg/itgroup/eSciDoc/exceptions/TransformerException.java src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java src/de/mpiwg/itgroup/eSciDoc/transformer/Transformer.java src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java
diffstat 16 files changed, 493 insertions(+), 154 deletions(-) [+]
line wrap: on
line diff
--- a/.classpath	Mon Jan 10 12:42:27 2011 +0100
+++ b/.classpath	Wed Jan 12 11:00:14 2011 +0100
@@ -13,7 +13,7 @@
 	<classpathentry kind="lib" path="libs/ws-commons-util-1.0.2.jar"/>
 	<classpathentry kind="lib" path="libs/httpclient-4.0.1.jar"/>
 	<classpathentry kind="lib" path="libs/httpcore-4.0.1.jar"/>
+	<classpathentry kind="con" path="org.eclipse.jst.j2ee.internal.module.container"/>
 	<classpathentry combineaccessrules="false" kind="src" path="/MetaDataManager"/>
-	<classpathentry kind="con" path="org.eclipse.jst.j2ee.internal.module.container"/>
 	<classpathentry kind="output" path="bin"/>
 </classpath>
--- a/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java	Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java	Wed Jan 12 11:00:14 2011 +0100
@@ -11,7 +11,9 @@
 import java.net.URL;
 import java.net.URLEncoder;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.StringTokenizer;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -19,6 +21,7 @@
 import javax.swing.text.html.HTMLDocument.HTMLReader.IsindexAction;
 
 
+import org.apache.commons.codec.EncoderException;
 import org.apache.http.HttpEntity;
 import org.apache.http.HttpResponse;
 import org.apache.http.client.ClientProtocolException;
@@ -44,7 +47,13 @@
 import org.jdom.Text;
 import org.jdom.input.SAXBuilder;
 import org.jdom.xpath.XPath;
+import org.w3c.dom.Node;
 
+import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject;
+import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ConnectorException;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ObjectNotUniqueError;
 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
 
 
@@ -477,7 +486,7 @@
 
 
 
-	public String getIDfromPID(String pid, String context) throws ClientProtocolException, IOException, IllegalStateException, JDOMException {
+	public String getIDfromPID(String pid, String context) throws ConnectorException {
 			
 			
 			String filter = "<param><filter name=\"http://escidoc.de/core/01/properties/pid\">";
@@ -487,21 +496,42 @@
 
 			String command = context
 				+ "/resources/members/filter";
-			HttpResponse result =eScidocPost(command,
-			new ByteArrayInputStream(filter.getBytes()));
+			HttpResponse result;
+			try {
+				result = eScidocPost(command,
+				new ByteArrayInputStream(filter.getBytes()));
+			} catch (IOException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+				throw new ConnectorException();
+			}
 
-			Document dom = new SAXBuilder().build(result.getEntity().getContent());
-			
-			XPath xp = EScidocTools.getESciDocXpath("//escidocItem:item/@xlink:href");
-			
-			Attribute attr = (Attribute)xp.selectSingleNode(dom);
-			
-			if (attr!=null){
-				return attr.getValue();
+			try {
+				Document dom = new SAXBuilder().build(result.getEntity().getContent());
+				
+				XPath xp = EScidocTools.getESciDocXpath("//escidocItem:item/@xlink:href");
+				
+				Attribute attr = (Attribute)xp.selectSingleNode(dom);
+				
+				if (attr!=null){
+					return attr.getValue();
+				}
+				
+				return null;
+				//return convertStreamToString(result.getEntity().getContent());
+			} catch (IllegalStateException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+				throw new ConnectorException();
+			} catch (JDOMException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+				throw new ConnectorException();
+			} catch (IOException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+				throw new ConnectorException();
 			}
-			
-			return null;
-			//return convertStreamToString(result.getEntity().getContent());
 
 	
 	}
@@ -529,7 +559,7 @@
 	
 	}
 	
-	public List<eSciDocXmlObject> getObjectListFromFilterResult(String command, String objectXpath) throws IOException, IllegalStateException, JDOMException {
+	public List<eSciDocXmlObject> getObjectListFromFilterResult(String command, String objectXpath) throws IOException, IllegalStateException, JDOMException,ESciDocXmlObjectException {
 		//String filter = "<param><filter></filter></param>";
 		//
 		//String command = context
@@ -646,7 +676,7 @@
 
 
 
-	public boolean alreadyExists(String indexField, String testString, String context) throws Exception {
+	public ECHOObject alreadyExists(String indexField, String testString, String context) throws ConnectorException, ObjectNotUniqueError {
 		
 		String[] ct = context.split("/"); // gebraucht wird hier nur die id, dh ohne /ir/...
 		
@@ -656,25 +686,103 @@
 		String searchString = String.format("\"%s\"=\"%s\"",indexField,testString);
 		searchString += " and "+String.format("\"%s\"=\"%s\"","/properties/context/id",contextId);
 		
+		HttpResponse ret;
+		try{
 		searchString = URLEncoder.encode(searchString,"utf-8");
-		HttpResponse ret = eScidocGet("/ir/items?operation=searchRetrieve&version=1.1&query="+searchString);
+		ret = eScidocGet("/ir/items?operation=searchRetrieve&version=1.1&query="+searchString);
+		} catch (UnsupportedEncodingException e) {
+			throw new ConnectorException();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			throw new ConnectorException();
+		}
 		
 		if (ret.getStatusLine().getStatusCode()!=200)
 		{
 			logger.debug("alreadyExists: error searchstring:"+searchString);
 			HttpEntity ent = ret.getEntity();
-			if (ent!=null)
-				ent.consumeContent();
-			throw new Exception();
+			if (ent!=null) {
+				try {
+					ent.consumeContent();
+				} catch (IOException e) {
+					// TODO Auto-generated catch block
+					e.printStackTrace();
+					throw new ConnectorException();
+				}
+			throw new ConnectorException();
+			}
 		}
+		
+		try{
 		Document doc = new SAXBuilder().build(ret.getEntity().getContent());
 		
 		XPath xp = EScidocTools.getESciDocXpath("//zs:numberOfRecords/text()");
 		String hitsStr = ((Text)xp.selectSingleNode(doc)).getText();
 		Integer hits = Integer.valueOf(hitsStr);
-		if (hits>0)
-			return true;
-		return false;
+		if (hits>0){
+			if (hits>1)
+				throw new ObjectNotUniqueError();
+			return getOldObjectFromESciDoc(doc);
+		}
+		return null;
+		} catch (IOException e) {
+			e.printStackTrace();
+			throw new ConnectorException();
+		} catch (IllegalStateException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			throw new ConnectorException();
+		} catch (JDOMException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			throw new ConnectorException();
+		}
+	}
+
+
+
+	private ECHOObject getOldObjectFromESciDoc(Document doc) {
+		Map<String,String>retMap = new HashMap<String,String>();
+		XPath md5Nodes;
+		XPath itemId;
+		XPath lastModificationDate;
+		try {
+			md5Nodes= EScidocTools.getESciDocXpath(".//escidocComponents:component/escidocComponents:properties[prop:content-category[text()='index_meta']]/prop:checksum");
+			itemId= EScidocTools.getESciDocXpath(".//escidocItem:item/@xlink:href");
+			lastModificationDate = EScidocTools.getESciDocXpath(".//escidocItem:item/@last-modification-date");
+		} catch (JDOMException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return null;
+		}
+		Element node;
+		Attribute idNode;
+		Attribute lastModificationDateNode;
+		try {
+			node = (Element)md5Nodes.selectSingleNode(doc);
+			idNode = (Attribute)itemId.selectSingleNode(doc);
+			lastModificationDateNode =(Attribute)lastModificationDate.selectSingleNode(doc);
+			
+		} catch (JDOMException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return null;
+		}
+		String md5= node.getTextTrim();
+		String escidocId= idNode.getValue();
+		ECHORessource er;
+		try {
+			er = new ECHORessource();
+			er.eScidocId=escidocId;
+			er.indexMetaMD5stored=md5;
+			er.lastModificationDate= lastModificationDateNode.getValue();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return null;
+		}
+		return er;
 	}
 
 
@@ -711,5 +819,8 @@
 
 
 
+	
+
+
 
 }
--- a/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java	Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java	Wed Jan 12 11:00:14 2011 +0100
@@ -63,6 +63,7 @@
 		xpathResources.addNamespace("escidocComponents",escidocComponents);
 		xpathResources.addNamespace("xlink",xlink);
 		xpathResources.addNamespace("metadata-records",metadataRecords);
+		xpathResources.addNamespace("metadataRecords",metadataRecords);
 		xpathResources.addNamespace("container",container);
 		xpathResources.addNamespace("item",item);
 		xpathResources.addNamespace("srw",srw);
--- a/src/de/mpiwg/itgroup/eSciDoc/config/echo.properties	Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/config/echo.properties	Wed Jan 12 11:00:14 2011 +0100
@@ -1,3 +1,3 @@
 textServletUrl=http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=
 dirInfoUrl=http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/dirInfo-xml.jsp?fn=
-metaDataManager=http://localhost:48080/MetaDataManagerRestlet
\ No newline at end of file
+metaDataManager=http://localhost:8180/MetaDataManagerRestlet
\ No newline at end of file
--- a/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOObject.java	Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOObject.java	Wed Jan 12 11:00:14 2011 +0100
@@ -28,6 +28,8 @@
 	public String metadataMananagerUrl;
 	public String context;
 	public String description ="";
+	public String eScidocId;
+	public String lastModificationDate;
 	
 	public ECHOObject() throws IOException{
 		Properties echoProperties = new Properties();
@@ -92,4 +94,6 @@
 		}
 		return pid;
 	}
+
+	
 }
--- a/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java	Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java	Wed Jan 12 11:00:14 2011 +0100
@@ -1,12 +1,18 @@
 package de.mpiwg.itgroup.eSciDoc.echoObjects;
 
 import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
 import java.net.URI;
+import java.net.URL;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import java.util.Properties;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 
+import org.apache.commons.codec.binary.Hex;
 import org.apache.http.Header;
 import org.apache.http.HttpException;
 import org.apache.http.HttpHost;
@@ -34,6 +40,8 @@
 	private String dirInfoUrl;
 	public String link;
 	
+	public String indexMetaMD5stored;
+	
 
 	public ECHORessource() throws IOException{
 		super();
@@ -190,4 +198,50 @@
 		}	
 		return false;
 	}
-}
+
+
+	public String getIndexMetaMD5onServer() {
+		String digest="";
+		try {
+			URL url = new URL(metaData);
+			InputStream is = url.openStream();
+			MessageDigest md = MessageDigest.getInstance("MD5");
+			digest = getDigest(is, md, 2048);
+		} catch (MalformedURLException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		} catch (NoSuchAlgorithmException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		return digest;
+	}
+
+	public static String getDigest(InputStream is, MessageDigest md, int byteArraySize)
+	throws NoSuchAlgorithmException, IOException {
+
+		md.reset();
+		byte[] bytes = new byte[byteArraySize];
+		int numBytes;
+		while ((numBytes = is.read(bytes)) != -1) {
+			md.update(bytes, 0, numBytes);
+		}
+		byte[] digest = md.digest();
+		String result = new String(Hex.encodeHex(digest));
+		return result;
+		}
+
+
+	public String getIndexMetaMD5stored() {
+		return indexMetaMD5stored;
+	}
+	
+	public String getEScidocId() {
+		return eScidocId;
+	}
+
+
+	}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/exceptions/ConnectorException.java	Wed Jan 12 11:00:14 2011 +0100
@@ -0,0 +1,5 @@
+package de.mpiwg.itgroup.eSciDoc.exceptions;
+
+public class ConnectorException extends Exception {
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/exceptions/ESciDocXmlObjectException.java	Wed Jan 12 11:00:14 2011 +0100
@@ -0,0 +1,5 @@
+package de.mpiwg.itgroup.eSciDoc.exceptions;
+
+public class ESciDocXmlObjectException extends Exception{
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/exceptions/ObjectNotUniqueError.java	Wed Jan 12 11:00:14 2011 +0100
@@ -0,0 +1,5 @@
+package de.mpiwg.itgroup.eSciDoc.exceptions;
+
+public class ObjectNotUniqueError extends Exception {
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/exceptions/TransformerException.java	Wed Jan 12 11:00:14 2011 +0100
@@ -0,0 +1,5 @@
+package de.mpiwg.itgroup.eSciDoc.exceptions;
+
+public class TransformerException extends Exception {
+
+}
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java	Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java	Wed Jan 12 11:00:14 2011 +0100
@@ -1,16 +1,14 @@
 package de.mpiwg.itgroup.eSciDoc.harvesting;
 
-import java.io.File;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
-import java.net.MalformedURLException;
+import java.io.UnsupportedEncodingException;
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.Map;
 
 import org.apache.http.HttpEntity;
 import org.apache.http.HttpResponse;
-import org.apache.log4j.BasicConfigurator;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 import org.apache.log4j.xml.DOMConfigurator;
@@ -20,50 +18,81 @@
 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject;
 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ConnectorException;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ObjectNotUniqueError;
+import de.mpiwg.itgroup.eSciDoc.exceptions.TransformerException;
 import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter;
 import de.mpiwg.itgroup.eSciDoc.importer.Importer;
 import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer;
 import de.mpiwg.itgroup.eSciDoc.transformer.Transformer;
 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
 
+/**
+ * @author dwinter
+ * Main class for data harveting from index.meta files into eScidoc
+ */
 public class ESciDocDataHarvester {
-	static int MAX_REC=1000;
+	static int MAX_REC=1000; // maximale Anzahl von Records die in einem Stueck eingelesen bzw. bearbeitet werden.
 	//static int MAX_REC=5;
 	protected Logger logger = Logger.getRootLogger();
 	protected Importer importer;
 	protected EScidocBasicHandler connector;
 	protected Transformer transformer;
-	private EScidocTools tools;
+	
 	private String echoContext;
 	private Logger addedFile = Logger.getLogger("addedFilesLogger");
 	private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger");
 
+	/**
+	 * @param importer Importer for dataObjects, describes how to access the objects
+	 * @param transformer Transformer, generates the eScidocMetaDatasets
+	 * @param connector connects to the eScidocRepository	
+	 * @param context Escidoc context path z.b. /ir/context/escidoc:12001
+	 */
 	public ESciDocDataHarvester(Importer importer, Transformer transformer,
-			EScidocBasicHandler connector, String context) throws IOException {
+			EScidocBasicHandler connector, String context) {
 		this.importer = importer;
 		this.transformer = transformer;
 		this.connector = connector;
-		this.tools = new EScidocTools(connector);
+		//this.tools = new EScidocTools(connector);
 		this.echoContext = context;
 
 	}
 
-	public Boolean readObjectsFromInstance(String type) throws Exception {
+	/** Read objects into eScidoc or updates the objects if indexMeta has changed.
+	 * @param type restrict the imported objects to a specific type, possible types should be defined in 
+	 * the given importer @see {@link #importer}
+	 * @return
+	 * @throws ConnectorException
+	 * @throws TransformerException
+	 * @throws ESciDocXmlObjectException
+	 */
+	public Boolean readObjectsFromInstance(String type) throws ConnectorException, TransformerException, ESciDocXmlObjectException {
 		ArrayList<String> addedObjects = new ArrayList<String>();
 		ArrayList<String> notAddedObjects = new ArrayList<String>();
 		for (ECHOObject obj : importer.getObjectList(type)) {
 
 			if (ECHORessource.class.isInstance(obj)) {
 				try {
-					if (connector.alreadyExists(
-							"/md-records/md-record/admin/archivePath",
-							((ECHORessource) obj).archivePath, echoContext)) {
+					ECHOObject old;
+					try {
+						old = connector.alreadyExists(
+								"/md-records/md-record/admin/archivePath",
+								((ECHORessource) obj).archivePath, echoContext);
+					} catch (ObjectNotUniqueError e) {
+						// TODO Auto-generated catch block
+						e.printStackTrace();
+						continue;
+					} 
+					if (old!=null) {
 						logger.debug("already exist:"
 								+ ((ECHORessource) obj).archivePath);
+						handleExistingObject(obj,old);
 						continue;
 					}
-				} catch (Exception e) {
-					logger.debug("already exist error");
+				} catch (ConnectorException e) {
+					logger.debug("already exist error:");
 					e.printStackTrace();
 					continue;
 				}
@@ -77,28 +106,32 @@
 			} else {
 
 				eSciDocXmlObject escidocItem = transformer.transform(obj);
-				logger.info(escidocItem.printXML());
-				// TODO write PID to back to echo-obj
-				Boolean result = connector.createItem(escidocItem);
-				if (result) {
-					addedObjects.add(escidocItem.getESciDocId());
-					addedFile.debug(escidocItem.getESciDocId() + "\n");
-					// addedFile.write(escidocItem.getESciDocId()+"\n");
-					// addedFile.flush();
+				
+		
+					try {
+						logger.info(escidocItem.printXML());
+						// TODO write PID to back to echo-obj
+						Boolean result = connector.createItem(escidocItem);
+						if (result) {
+							addedObjects.add(escidocItem.getESciDocId());
+							addedFile.debug(escidocItem.getESciDocId() + "\n");
 
-				} else {
-					notAddedObjects.add(obj.echoUrl);
-					notAddedFile.debug(obj.echoUrl);
-					// notAddedFile.write(obj.echoUrl+"\n");
-					// notAddedFile.flush();
-				}
-				// if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){
-				// logger.info("PID already exists:"+obj);
-				// } else if (result ==
-				// ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){
-				// logger.info("Object with reference to the same digital object already exists:"+obj);
-				// }
-
+						} else {
+							notAddedObjects.add(obj.echoUrl);
+							notAddedFile.debug(obj.echoUrl);
+				
+						}
+					
+					} catch (IOException e) {
+						// TODO Auto-generated catch block
+						e.printStackTrace();
+						throw new ESciDocXmlObjectException();
+					} catch (JDOMException e) {
+						// TODO Auto-generated catch block
+						e.printStackTrace();
+						throw new ESciDocXmlObjectException();
+					}
+				
 			}
 		}
 		if (logger.getLevel() == Level.DEBUG) {
@@ -107,18 +140,61 @@
 			}
 		}
 
-		// File outFile = new File("/tmp/import.out");
-		// FileWriter fw = new FileWriter(outFile);
-		// for (String addedObject:addedObjects){
-		// fw.write(addedObject+"\n");
-		// }
-		// for (String addedObject:notAddedObjects){
-		// fw.write(addedObject+"\n");
-		// }
-		// fw.close();
 		return true;
 	}
 
+	
+		
+	
+
+	/**
+	 * Deal with existing objects, do nothing if md5 of stored metadata and metadata on the server is the same otherwise call {@link #updateObject(ECHOObject)}.
+	 * @param objNew
+	 * @param old
+	 * @throws TransformerException 
+	 * @throws ESciDocXmlObjectException 
+	 */
+	private void handleExistingObject(ECHOObject objNew, ECHOObject old) throws TransformerException, ESciDocXmlObjectException {
+		ECHORessource objNewRes = (ECHORessource)objNew;
+		ECHORessource objOldRes = (ECHORessource)old;
+		String md5onServer = objNewRes.getIndexMetaMD5onServer();
+		String md5=objOldRes.getIndexMetaMD5stored();
+		if (md5onServer.equals(md5))
+			return;
+		else {
+			updateObject(objNew, old);
+		}
+		
+	}
+
+	private void updateObject(ECHOObject objNew, ECHOObject objOld) throws TransformerException, ESciDocXmlObjectException {
+		objNew.context = echoContext;
+		eSciDocXmlObject escidocItem = transformer.transform(objNew);
+		String lastModificationDateOld = objOld.lastModificationDate;
+		escidocItem.setLastModificationDate(lastModificationDateOld);
+		try {
+			HttpResponse ret = connector.eScidocPut(objOld.eScidocId, EScidocBasicHandler.convertStringToStream(escidocItem.printXML()));
+			HttpEntity ent = ret.getEntity();
+			if (ret.getStatusLine().getStatusCode() != 200) {
+				logger.debug("Can not update:" + objOld.eScidocId);
+				// res.getEntity().consumeContent(); // necessary to release
+				// the conneciton
+				ent.consumeContent();
+
+			}
+			InputStream restream = ret.getEntity().getContent();
+			logger.debug(EScidocBasicHandler.convertStreamToString(restream));
+		} catch (UnsupportedEncodingException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		
+		
+	}
+
 	/**
 	 * @param command
 	 * @param objectXPath
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java	Mon Jan 10 12:42:27 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-package de.mpiwg.itgroup.eSciDoc.harvesting;
-
-import java.io.File;
-import java.net.MalformedURLException;
-import java.net.URL;
-
-import org.apache.log4j.BasicConfigurator;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-import org.apache.log4j.xml.DOMConfigurator;
-
-import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
-import de.mpiwg.itgroup.eSciDoc.foxridge.IndexMetaIterator;
-import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter;
-import de.mpiwg.itgroup.eSciDoc.importer.FoxridgeImporter;
-import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer;
-
-public class FoxridgeHarverster {
-
-	/**
-	 * @param args
-	 * @throws Exception 
-	 */
-	public static void main(String[] args) throws Exception {
-		
-		DOMConfigurator.configure("log4uconf.xml");
-		Logger rl = Logger.getRootLogger();
-		Logger.getLogger("transformerLogger").setLevel(Level.DEBUG);
-		Logger.getLogger("addedFilesLogger").setLevel(Level.DEBUG);
-		Logger.getLogger("notAddedFilesLogger").setLevel(Level.DEBUG);
-		
-        rl.setLevel(Level.DEBUG);
-
-        //IndexMetaIterator sd = new IndexMetaIterator(new File("/Volumes/online_permanent/echo"));
-        //while (sd.hasNext()){
-        //	System.out.println(sd.next());
-        //}
-		EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7");
-		
-		ESciDocDataHarvester hv = new ESciDocDataHarvester(new FoxridgeImporter(new File("/Volumes/online_permanent/")),
-				new ECHOTransformer(),connector,"/ir/context/escidoc:12001"); 
-		
-		//hv.readObjectsFromInstance("ECHO_collection");
-		//hv.readObjectsFromInstance("ECHO_resource");
-		
-		//hv.releaseAndSubmitObjects("/ir/context/escidoc:12001");
-		hv.releaseAndSubmitObjects("/ir/context/escidoc:12001/resources/members","//escidocItem:item",1);
-		
-
-	}
-
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java	Wed Jan 12 11:00:14 2011 +0100
@@ -0,0 +1,52 @@
+package de.mpiwg.itgroup.eSciDoc.harvesting;
+
+import java.io.File;
+import java.net.MalformedURLException;
+import java.net.URL;
+
+import org.apache.log4j.BasicConfigurator;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.log4j.xml.DOMConfigurator;
+
+import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
+import de.mpiwg.itgroup.eSciDoc.foxridge.IndexMetaIterator;
+import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter;
+import de.mpiwg.itgroup.eSciDoc.importer.FoxridgeImporter;
+import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer;
+
+public class FoxridgeHarvester {
+
+	/**
+	 * @param args
+	 * @throws Exception 
+	 */
+	public static void main(String[] args) throws Exception {
+		
+		DOMConfigurator.configure("log4uconf.xml");
+		Logger rl = Logger.getRootLogger();
+		Logger.getLogger("transformerLogger").setLevel(Level.DEBUG);
+		Logger.getLogger("addedFilesLogger").setLevel(Level.DEBUG);
+		Logger.getLogger("notAddedFilesLogger").setLevel(Level.DEBUG);
+		
+        rl.setLevel(Level.DEBUG);
+
+        //IndexMetaIterator sd = new IndexMetaIterator(new File("/Volumes/online_permanent/echo"));
+        //while (sd.hasNext()){
+        //	System.out.println(sd.next());
+        //}
+		EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7");
+		
+		ESciDocDataHarvester hv = new ESciDocDataHarvester(new FoxridgeImporter(new File("/Volumes/online_permanent/shipbuilding/")),
+				new ECHOTransformer(),connector,"/ir/context/escidoc:12001"); 
+		
+		//hv.readObjectsFromInstance("ECHO_collection");
+		hv.readObjectsFromInstance("ECHO_resource");
+		
+		//hv.releaseAndSubmitObjects("/ir/context/escidoc:12001");
+		//hv.releaseAndSubmitObjects("/ir/context/escidoc:12001/resources/members","//escidocItem:item",1);
+		
+
+	}
+
+}
--- a/src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java	Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java	Wed Jan 12 11:00:14 2011 +0100
@@ -19,6 +19,7 @@
 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOCollection;
 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject;
 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource;
+import de.mpiwg.itgroup.eSciDoc.exceptions.TransformerException;
 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
 import de.mpiwg.itgroup.metadataManager.pid.DCTransformer;
 
@@ -34,8 +35,9 @@
 		
 	}
 	
-	public eSciDocXmlObject transform(ECHOObject obj) throws Exception {
+	public eSciDocXmlObject transform(ECHOObject obj) throws TransformerException {
 		
+		try{
 		if (obj.pid==null)
 			return null;
 		
@@ -113,6 +115,10 @@
 			eSciDocObj.insertDC(dc);
 		}
 		return eSciDocObj;
+		} catch (Exception e){
+			throw new TransformerException();
+		}
 	}
+	
 
 }
--- a/src/de/mpiwg/itgroup/eSciDoc/transformer/Transformer.java	Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/transformer/Transformer.java	Wed Jan 12 11:00:14 2011 +0100
@@ -1,11 +1,12 @@
 package de.mpiwg.itgroup.eSciDoc.transformer;
 
 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject;
+import de.mpiwg.itgroup.eSciDoc.exceptions.TransformerException;
 
 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
 
 public interface Transformer {
 
-	eSciDocXmlObject transform(ECHOObject obj) throws Exception;
+	eSciDocXmlObject transform(ECHOObject obj) throws TransformerException;
 
 }
--- a/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java	Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java	Wed Jan 12 11:00:14 2011 +0100
@@ -36,6 +36,7 @@
 
 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
 import de.mpiwg.itgroup.eSciDoc.Tools.MyFileNameFilter;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
 
 public class eSciDocXmlObject {
 
@@ -45,56 +46,92 @@
 	private String pid;
 	private DOMBuilder parser;
 	
-	public eSciDocXmlObject(String pid, InputStream template) throws Exception {
+	public eSciDocXmlObject(String pid, InputStream template) throws ESciDocXmlObjectException {
 		
 		SAXBuilder  builder = new SAXBuilder();
 		
-		dom = builder.build(template);
-		
-		xpath = EScidocTools.getESciDocXpath("//prop:pid");
-		
-		Element test = (Element) xpath.selectSingleNode(dom);
+		try {
+			dom = builder.build(template);
+			
+			xpath = EScidocTools.getESciDocXpath("//prop:pid");
 			
-		test.setText(pid);
-		this.pid=pid;
+			Element test = (Element) xpath.selectSingleNode(dom);
+				
+			test.setText(pid);
+			this.pid=pid;
+		} catch (JDOMException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			throw new ESciDocXmlObjectException();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			throw new ESciDocXmlObjectException();
+		}
 	
 	}
 	
-public eSciDocXmlObject(URL url) throws Exception {
+public eSciDocXmlObject(URL url) throws ESciDocXmlObjectException {
 		
 		SAXBuilder  builder = new SAXBuilder();
 		
-		dom = builder.build(url);
-		
-		xpath = EScidocTools.getESciDocXpath("//prop:pid");
-		
-		Element test = (Element) xpath.selectSingleNode(dom);
+		try {
+			dom = builder.build(url);
+			
+			xpath = EScidocTools.getESciDocXpath("//prop:pid");
 			
-		this.pid= test.getTextTrim();
+			Element test = (Element) xpath.selectSingleNode(dom);
+				
+			this.pid= test.getTextTrim();
+		} catch (JDOMException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			throw new ESciDocXmlObjectException();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			throw new ESciDocXmlObjectException();
+		}
 	
 	}
 	
-	public eSciDocXmlObject(Element el) throws JDOMException {
-		
-		dom = new Document((Element)el.clone());
+	public eSciDocXmlObject(Element el) throws ESciDocXmlObjectException {
 		
-		xpath = EScidocTools.getESciDocXpath("//prop:pid");
-		
-		Element test = (Element) xpath.selectSingleNode(dom);
-		if (test!=null)	
-			this.pid= test.getTextTrim();
+		try {
+			dom = new Document((Element)el.clone());
+			
+			xpath = EScidocTools.getESciDocXpath("//prop:pid");
+			
+			Element test = (Element) xpath.selectSingleNode(dom);
+			if (test!=null)	
+				this.pid= test.getTextTrim();
+		} catch (JDOMException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			throw new ESciDocXmlObjectException();
+		}
 }
 
-	public eSciDocXmlObject(InputStream content) throws JDOMException, IOException {
+	public eSciDocXmlObject(InputStream content) throws  ESciDocXmlObjectException{
 		SAXBuilder  builder = new SAXBuilder();
 		
-		dom = builder.build(content);
-		
-		xpath = EScidocTools.getESciDocXpath("//prop:pid");
-		
-		Element test = (Element) xpath.selectSingleNode(dom);
+		try {
+			dom = builder.build(content);
+			
+			xpath = EScidocTools.getESciDocXpath("//prop:pid");
 			
-		this.pid= test.getTextTrim();
+			Element test = (Element) xpath.selectSingleNode(dom);
+				
+			this.pid= test.getTextTrim();
+		} catch (JDOMException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			throw new ESciDocXmlObjectException();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			throw new ESciDocXmlObjectException();
+		}
 	
 	}
 
@@ -318,6 +355,10 @@
 
 		
 	
+	/**
+	 * @return Object in eScidoc XML formatting
+	 * @throws IOException
+	 */
 	public String printXML() throws IOException{
 		 	XMLOutputter out = new XMLOutputter();
 		 	
@@ -466,5 +507,30 @@
 		return dom;
 	}
 
+	public void setLastModificationDate(String lastModificationDateOld) throws ESciDocXmlObjectException {
+		Attribute item;
+		try {
+			XPath xp = EScidocTools.getESciDocXpath(".//escidocItem:item/@last-modification-date");
+			item = (Attribute)xp.selectSingleNode(dom);
+			
+		
+		if (item==null) {//existiert noch nicht}
+			//Namespace namespace = Namespace.getNamespace("item",EScidocTools.item);
+			xp = EScidocTools.getESciDocXpath(".//escidocItem:item");
+			Element itemElement = (Element)xp.selectSingleNode(dom);
+			itemElement.setAttribute("last-modification-date", lastModificationDateOld);
+			
+			
+		} else {
+		item.setValue(lastModificationDateOld);
+		}
+		} catch (JDOMException e) {
+			e.printStackTrace();
+			throw new ESciDocXmlObjectException();
+		}
+
+		
+	}
+
 
 }