# HG changeset patch
# User dwinter
# Date 1294826414 -3600
# Node ID 58b52df9763c1811e09f6610534a8d4b09a5f187
# Parent fab8e78184fa0737acdc75b2eaa8bc90d379e289
added update functionality if index.meta has changed
diff -r fab8e78184fa -r 58b52df9763c .classpath
--- a/.classpath Mon Jan 10 12:42:27 2011 +0100
+++ b/.classpath Wed Jan 12 11:00:14 2011 +0100
@@ -13,7 +13,7 @@
+
-
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java
--- a/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java Wed Jan 12 11:00:14 2011 +0100
@@ -11,7 +11,9 @@
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -19,6 +21,7 @@
import javax.swing.text.html.HTMLDocument.HTMLReader.IsindexAction;
+import org.apache.commons.codec.EncoderException;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
@@ -44,7 +47,13 @@
import org.jdom.Text;
import org.jdom.input.SAXBuilder;
import org.jdom.xpath.XPath;
+import org.w3c.dom.Node;
+import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject;
+import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ConnectorException;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ObjectNotUniqueError;
import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
@@ -477,7 +486,7 @@
- public String getIDfromPID(String pid, String context) throws ClientProtocolException, IOException, IllegalStateException, JDOMException {
+ public String getIDfromPID(String pid, String context) throws ConnectorException {
String filter = "";
@@ -487,21 +496,42 @@
String command = context
+ "/resources/members/filter";
- HttpResponse result =eScidocPost(command,
- new ByteArrayInputStream(filter.getBytes()));
+ HttpResponse result;
+ try {
+ result = eScidocPost(command,
+ new ByteArrayInputStream(filter.getBytes()));
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ConnectorException();
+ }
- Document dom = new SAXBuilder().build(result.getEntity().getContent());
-
- XPath xp = EScidocTools.getESciDocXpath("//escidocItem:item/@xlink:href");
-
- Attribute attr = (Attribute)xp.selectSingleNode(dom);
-
- if (attr!=null){
- return attr.getValue();
+ try {
+ Document dom = new SAXBuilder().build(result.getEntity().getContent());
+
+ XPath xp = EScidocTools.getESciDocXpath("//escidocItem:item/@xlink:href");
+
+ Attribute attr = (Attribute)xp.selectSingleNode(dom);
+
+ if (attr!=null){
+ return attr.getValue();
+ }
+
+ return null;
+ //return convertStreamToString(result.getEntity().getContent());
+ } catch (IllegalStateException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ConnectorException();
+ } catch (JDOMException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ConnectorException();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ConnectorException();
}
-
- return null;
- //return convertStreamToString(result.getEntity().getContent());
}
@@ -529,7 +559,7 @@
}
- public List getObjectListFromFilterResult(String command, String objectXpath) throws IOException, IllegalStateException, JDOMException {
+ public List getObjectListFromFilterResult(String command, String objectXpath) throws IOException, IllegalStateException, JDOMException,ESciDocXmlObjectException {
//String filter = "";
//
//String command = context
@@ -646,7 +676,7 @@
- public boolean alreadyExists(String indexField, String testString, String context) throws Exception {
+ public ECHOObject alreadyExists(String indexField, String testString, String context) throws ConnectorException, ObjectNotUniqueError {
String[] ct = context.split("/"); // gebraucht wird hier nur die id, dh ohne /ir/...
@@ -656,25 +686,103 @@
String searchString = String.format("\"%s\"=\"%s\"",indexField,testString);
searchString += " and "+String.format("\"%s\"=\"%s\"","/properties/context/id",contextId);
+ HttpResponse ret;
+ try{
searchString = URLEncoder.encode(searchString,"utf-8");
- HttpResponse ret = eScidocGet("/ir/items?operation=searchRetrieve&version=1.1&query="+searchString);
+ ret = eScidocGet("/ir/items?operation=searchRetrieve&version=1.1&query="+searchString);
+ } catch (UnsupportedEncodingException e) {
+ throw new ConnectorException();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ConnectorException();
+ }
if (ret.getStatusLine().getStatusCode()!=200)
{
logger.debug("alreadyExists: error searchstring:"+searchString);
HttpEntity ent = ret.getEntity();
- if (ent!=null)
- ent.consumeContent();
- throw new Exception();
+ if (ent!=null) {
+ try {
+ ent.consumeContent();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ConnectorException();
+ }
+ throw new ConnectorException();
+ }
}
+
+ try{
Document doc = new SAXBuilder().build(ret.getEntity().getContent());
XPath xp = EScidocTools.getESciDocXpath("//zs:numberOfRecords/text()");
String hitsStr = ((Text)xp.selectSingleNode(doc)).getText();
Integer hits = Integer.valueOf(hitsStr);
- if (hits>0)
- return true;
- return false;
+ if (hits>0){
+ if (hits>1)
+ throw new ObjectNotUniqueError();
+ return getOldObjectFromESciDoc(doc);
+ }
+ return null;
+ } catch (IOException e) {
+ e.printStackTrace();
+ throw new ConnectorException();
+ } catch (IllegalStateException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ConnectorException();
+ } catch (JDOMException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ConnectorException();
+ }
+ }
+
+
+
+ private ECHOObject getOldObjectFromESciDoc(Document doc) {
+ MapretMap = new HashMap();
+ XPath md5Nodes;
+ XPath itemId;
+ XPath lastModificationDate;
+ try {
+ md5Nodes= EScidocTools.getESciDocXpath(".//escidocComponents:component/escidocComponents:properties[prop:content-category[text()='index_meta']]/prop:checksum");
+ itemId= EScidocTools.getESciDocXpath(".//escidocItem:item/@xlink:href");
+ lastModificationDate = EScidocTools.getESciDocXpath(".//escidocItem:item/@last-modification-date");
+ } catch (JDOMException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ return null;
+ }
+ Element node;
+ Attribute idNode;
+ Attribute lastModificationDateNode;
+ try {
+ node = (Element)md5Nodes.selectSingleNode(doc);
+ idNode = (Attribute)itemId.selectSingleNode(doc);
+ lastModificationDateNode =(Attribute)lastModificationDate.selectSingleNode(doc);
+
+ } catch (JDOMException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ return null;
+ }
+ String md5= node.getTextTrim();
+ String escidocId= idNode.getValue();
+ ECHORessource er;
+ try {
+ er = new ECHORessource();
+ er.eScidocId=escidocId;
+ er.indexMetaMD5stored=md5;
+ er.lastModificationDate= lastModificationDateNode.getValue();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ return null;
+ }
+ return er;
}
@@ -711,5 +819,8 @@
+
+
+
}
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java
--- a/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java Wed Jan 12 11:00:14 2011 +0100
@@ -63,6 +63,7 @@
xpathResources.addNamespace("escidocComponents",escidocComponents);
xpathResources.addNamespace("xlink",xlink);
xpathResources.addNamespace("metadata-records",metadataRecords);
+ xpathResources.addNamespace("metadataRecords",metadataRecords);
xpathResources.addNamespace("container",container);
xpathResources.addNamespace("item",item);
xpathResources.addNamespace("srw",srw);
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/config/echo.properties
--- a/src/de/mpiwg/itgroup/eSciDoc/config/echo.properties Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/config/echo.properties Wed Jan 12 11:00:14 2011 +0100
@@ -1,3 +1,3 @@
textServletUrl=http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=
dirInfoUrl=http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/dirInfo-xml.jsp?fn=
-metaDataManager=http://localhost:48080/MetaDataManagerRestlet
\ No newline at end of file
+metaDataManager=http://localhost:8180/MetaDataManagerRestlet
\ No newline at end of file
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOObject.java
--- a/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOObject.java Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOObject.java Wed Jan 12 11:00:14 2011 +0100
@@ -28,6 +28,8 @@
public String metadataMananagerUrl;
public String context;
public String description ="";
+ public String eScidocId;
+ public String lastModificationDate;
public ECHOObject() throws IOException{
Properties echoProperties = new Properties();
@@ -92,4 +94,6 @@
}
return pid;
}
+
+
}
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java
--- a/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java Wed Jan 12 11:00:14 2011 +0100
@@ -1,12 +1,18 @@
package de.mpiwg.itgroup.eSciDoc.echoObjects;
import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
import java.net.URI;
+import java.net.URL;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.commons.codec.binary.Hex;
import org.apache.http.Header;
import org.apache.http.HttpException;
import org.apache.http.HttpHost;
@@ -34,6 +40,8 @@
private String dirInfoUrl;
public String link;
+ public String indexMetaMD5stored;
+
public ECHORessource() throws IOException{
super();
@@ -190,4 +198,50 @@
}
return false;
}
-}
+
+
+ public String getIndexMetaMD5onServer() {
+ String digest="";
+ try {
+ URL url = new URL(metaData);
+ InputStream is = url.openStream();
+ MessageDigest md = MessageDigest.getInstance("MD5");
+ digest = getDigest(is, md, 2048);
+ } catch (MalformedURLException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (NoSuchAlgorithmException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ return digest;
+ }
+
+ public static String getDigest(InputStream is, MessageDigest md, int byteArraySize)
+ throws NoSuchAlgorithmException, IOException {
+
+ md.reset();
+ byte[] bytes = new byte[byteArraySize];
+ int numBytes;
+ while ((numBytes = is.read(bytes)) != -1) {
+ md.update(bytes, 0, numBytes);
+ }
+ byte[] digest = md.digest();
+ String result = new String(Hex.encodeHex(digest));
+ return result;
+ }
+
+
+ public String getIndexMetaMD5stored() {
+ return indexMetaMD5stored;
+ }
+
+ public String getEScidocId() {
+ return eScidocId;
+ }
+
+
+ }
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/exceptions/ConnectorException.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/exceptions/ConnectorException.java Wed Jan 12 11:00:14 2011 +0100
@@ -0,0 +1,5 @@
+package de.mpiwg.itgroup.eSciDoc.exceptions;
+
+public class ConnectorException extends Exception {
+
+}
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/exceptions/ESciDocXmlObjectException.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/exceptions/ESciDocXmlObjectException.java Wed Jan 12 11:00:14 2011 +0100
@@ -0,0 +1,5 @@
+package de.mpiwg.itgroup.eSciDoc.exceptions;
+
+public class ESciDocXmlObjectException extends Exception{
+
+}
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/exceptions/ObjectNotUniqueError.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/exceptions/ObjectNotUniqueError.java Wed Jan 12 11:00:14 2011 +0100
@@ -0,0 +1,5 @@
+package de.mpiwg.itgroup.eSciDoc.exceptions;
+
+public class ObjectNotUniqueError extends Exception {
+
+}
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/exceptions/TransformerException.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/exceptions/TransformerException.java Wed Jan 12 11:00:14 2011 +0100
@@ -0,0 +1,5 @@
+package de.mpiwg.itgroup.eSciDoc.exceptions;
+
+public class TransformerException extends Exception {
+
+}
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Wed Jan 12 11:00:14 2011 +0100
@@ -1,16 +1,14 @@
package de.mpiwg.itgroup.eSciDoc.harvesting;
-import java.io.File;
-import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
-import java.net.MalformedURLException;
+import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.Map;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
-import org.apache.log4j.BasicConfigurator;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.xml.DOMConfigurator;
@@ -20,50 +18,81 @@
import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject;
import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ConnectorException;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ObjectNotUniqueError;
+import de.mpiwg.itgroup.eSciDoc.exceptions.TransformerException;
import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter;
import de.mpiwg.itgroup.eSciDoc.importer.Importer;
import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer;
import de.mpiwg.itgroup.eSciDoc.transformer.Transformer;
import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
+/**
+ * @author dwinter
+ * Main class for data harveting from index.meta files into eScidoc
+ */
public class ESciDocDataHarvester {
- static int MAX_REC=1000;
+ static int MAX_REC=1000; // maximale Anzahl von Records die in einem Stueck eingelesen bzw. bearbeitet werden.
//static int MAX_REC=5;
protected Logger logger = Logger.getRootLogger();
protected Importer importer;
protected EScidocBasicHandler connector;
protected Transformer transformer;
- private EScidocTools tools;
+
private String echoContext;
private Logger addedFile = Logger.getLogger("addedFilesLogger");
private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger");
+ /**
+ * @param importer Importer for dataObjects, describes how to access the objects
+ * @param transformer Transformer, generates the eScidocMetaDatasets
+ * @param connector connects to the eScidocRepository
+ * @param context Escidoc context path z.b. /ir/context/escidoc:12001
+ */
public ESciDocDataHarvester(Importer importer, Transformer transformer,
- EScidocBasicHandler connector, String context) throws IOException {
+ EScidocBasicHandler connector, String context) {
this.importer = importer;
this.transformer = transformer;
this.connector = connector;
- this.tools = new EScidocTools(connector);
+ //this.tools = new EScidocTools(connector);
this.echoContext = context;
}
- public Boolean readObjectsFromInstance(String type) throws Exception {
+ /** Read objects into eScidoc or updates the objects if indexMeta has changed.
+ * @param type restrict the imported objects to a specific type, possible types should be defined in
+ * the given importer @see {@link #importer}
+ * @return
+ * @throws ConnectorException
+ * @throws TransformerException
+ * @throws ESciDocXmlObjectException
+ */
+ public Boolean readObjectsFromInstance(String type) throws ConnectorException, TransformerException, ESciDocXmlObjectException {
ArrayList addedObjects = new ArrayList();
ArrayList notAddedObjects = new ArrayList();
for (ECHOObject obj : importer.getObjectList(type)) {
if (ECHORessource.class.isInstance(obj)) {
try {
- if (connector.alreadyExists(
- "/md-records/md-record/admin/archivePath",
- ((ECHORessource) obj).archivePath, echoContext)) {
+ ECHOObject old;
+ try {
+ old = connector.alreadyExists(
+ "/md-records/md-record/admin/archivePath",
+ ((ECHORessource) obj).archivePath, echoContext);
+ } catch (ObjectNotUniqueError e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ continue;
+ }
+ if (old!=null) {
logger.debug("already exist:"
+ ((ECHORessource) obj).archivePath);
+ handleExistingObject(obj,old);
continue;
}
- } catch (Exception e) {
- logger.debug("already exist error");
+ } catch (ConnectorException e) {
+ logger.debug("already exist error:");
e.printStackTrace();
continue;
}
@@ -77,28 +106,32 @@
} else {
eSciDocXmlObject escidocItem = transformer.transform(obj);
- logger.info(escidocItem.printXML());
- // TODO write PID to back to echo-obj
- Boolean result = connector.createItem(escidocItem);
- if (result) {
- addedObjects.add(escidocItem.getESciDocId());
- addedFile.debug(escidocItem.getESciDocId() + "\n");
- // addedFile.write(escidocItem.getESciDocId()+"\n");
- // addedFile.flush();
+
+
+ try {
+ logger.info(escidocItem.printXML());
+ // TODO write PID to back to echo-obj
+ Boolean result = connector.createItem(escidocItem);
+ if (result) {
+ addedObjects.add(escidocItem.getESciDocId());
+ addedFile.debug(escidocItem.getESciDocId() + "\n");
- } else {
- notAddedObjects.add(obj.echoUrl);
- notAddedFile.debug(obj.echoUrl);
- // notAddedFile.write(obj.echoUrl+"\n");
- // notAddedFile.flush();
- }
- // if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){
- // logger.info("PID already exists:"+obj);
- // } else if (result ==
- // ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){
- // logger.info("Object with reference to the same digital object already exists:"+obj);
- // }
-
+ } else {
+ notAddedObjects.add(obj.echoUrl);
+ notAddedFile.debug(obj.echoUrl);
+
+ }
+
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ESciDocXmlObjectException();
+ } catch (JDOMException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ESciDocXmlObjectException();
+ }
+
}
}
if (logger.getLevel() == Level.DEBUG) {
@@ -107,18 +140,61 @@
}
}
- // File outFile = new File("/tmp/import.out");
- // FileWriter fw = new FileWriter(outFile);
- // for (String addedObject:addedObjects){
- // fw.write(addedObject+"\n");
- // }
- // for (String addedObject:notAddedObjects){
- // fw.write(addedObject+"\n");
- // }
- // fw.close();
return true;
}
+
+
+
+
+ /**
+ * Deal with existing objects, do nothing if md5 of stored metadata and metadata on the server is the same otherwise call {@link #updateObject(ECHOObject)}.
+ * @param objNew
+ * @param old
+ * @throws TransformerException
+ * @throws ESciDocXmlObjectException
+ */
+ private void handleExistingObject(ECHOObject objNew, ECHOObject old) throws TransformerException, ESciDocXmlObjectException {
+ ECHORessource objNewRes = (ECHORessource)objNew;
+ ECHORessource objOldRes = (ECHORessource)old;
+ String md5onServer = objNewRes.getIndexMetaMD5onServer();
+ String md5=objOldRes.getIndexMetaMD5stored();
+ if (md5onServer.equals(md5))
+ return;
+ else {
+ updateObject(objNew, old);
+ }
+
+ }
+
+ private void updateObject(ECHOObject objNew, ECHOObject objOld) throws TransformerException, ESciDocXmlObjectException {
+ objNew.context = echoContext;
+ eSciDocXmlObject escidocItem = transformer.transform(objNew);
+ String lastModificationDateOld = objOld.lastModificationDate;
+ escidocItem.setLastModificationDate(lastModificationDateOld);
+ try {
+ HttpResponse ret = connector.eScidocPut(objOld.eScidocId, EScidocBasicHandler.convertStringToStream(escidocItem.printXML()));
+ HttpEntity ent = ret.getEntity();
+ if (ret.getStatusLine().getStatusCode() != 200) {
+ logger.debug("Can not update:" + objOld.eScidocId);
+ // res.getEntity().consumeContent(); // necessary to release
+ // the conneciton
+ ent.consumeContent();
+
+ }
+ InputStream restream = ret.getEntity().getContent();
+ logger.debug(EScidocBasicHandler.convertStreamToString(restream));
+ } catch (UnsupportedEncodingException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+
+ }
+
/**
* @param command
* @param objectXPath
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java Mon Jan 10 12:42:27 2011 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-package de.mpiwg.itgroup.eSciDoc.harvesting;
-
-import java.io.File;
-import java.net.MalformedURLException;
-import java.net.URL;
-
-import org.apache.log4j.BasicConfigurator;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-import org.apache.log4j.xml.DOMConfigurator;
-
-import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
-import de.mpiwg.itgroup.eSciDoc.foxridge.IndexMetaIterator;
-import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter;
-import de.mpiwg.itgroup.eSciDoc.importer.FoxridgeImporter;
-import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer;
-
-public class FoxridgeHarverster {
-
- /**
- * @param args
- * @throws Exception
- */
- public static void main(String[] args) throws Exception {
-
- DOMConfigurator.configure("log4uconf.xml");
- Logger rl = Logger.getRootLogger();
- Logger.getLogger("transformerLogger").setLevel(Level.DEBUG);
- Logger.getLogger("addedFilesLogger").setLevel(Level.DEBUG);
- Logger.getLogger("notAddedFilesLogger").setLevel(Level.DEBUG);
-
- rl.setLevel(Level.DEBUG);
-
- //IndexMetaIterator sd = new IndexMetaIterator(new File("/Volumes/online_permanent/echo"));
- //while (sd.hasNext()){
- // System.out.println(sd.next());
- //}
- EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7");
-
- ESciDocDataHarvester hv = new ESciDocDataHarvester(new FoxridgeImporter(new File("/Volumes/online_permanent/")),
- new ECHOTransformer(),connector,"/ir/context/escidoc:12001");
-
- //hv.readObjectsFromInstance("ECHO_collection");
- //hv.readObjectsFromInstance("ECHO_resource");
-
- //hv.releaseAndSubmitObjects("/ir/context/escidoc:12001");
- hv.releaseAndSubmitObjects("/ir/context/escidoc:12001/resources/members","//escidocItem:item",1);
-
-
- }
-
-}
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java Wed Jan 12 11:00:14 2011 +0100
@@ -0,0 +1,52 @@
+package de.mpiwg.itgroup.eSciDoc.harvesting;
+
+import java.io.File;
+import java.net.MalformedURLException;
+import java.net.URL;
+
+import org.apache.log4j.BasicConfigurator;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.log4j.xml.DOMConfigurator;
+
+import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
+import de.mpiwg.itgroup.eSciDoc.foxridge.IndexMetaIterator;
+import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter;
+import de.mpiwg.itgroup.eSciDoc.importer.FoxridgeImporter;
+import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer;
+
+public class FoxridgeHarvester {
+
+ /**
+ * @param args
+ * @throws Exception
+ */
+ public static void main(String[] args) throws Exception {
+
+ DOMConfigurator.configure("log4uconf.xml");
+ Logger rl = Logger.getRootLogger();
+ Logger.getLogger("transformerLogger").setLevel(Level.DEBUG);
+ Logger.getLogger("addedFilesLogger").setLevel(Level.DEBUG);
+ Logger.getLogger("notAddedFilesLogger").setLevel(Level.DEBUG);
+
+ rl.setLevel(Level.DEBUG);
+
+ //IndexMetaIterator sd = new IndexMetaIterator(new File("/Volumes/online_permanent/echo"));
+ //while (sd.hasNext()){
+ // System.out.println(sd.next());
+ //}
+ EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7");
+
+ ESciDocDataHarvester hv = new ESciDocDataHarvester(new FoxridgeImporter(new File("/Volumes/online_permanent/shipbuilding/")),
+ new ECHOTransformer(),connector,"/ir/context/escidoc:12001");
+
+ //hv.readObjectsFromInstance("ECHO_collection");
+ hv.readObjectsFromInstance("ECHO_resource");
+
+ //hv.releaseAndSubmitObjects("/ir/context/escidoc:12001");
+ //hv.releaseAndSubmitObjects("/ir/context/escidoc:12001/resources/members","//escidocItem:item",1);
+
+
+ }
+
+}
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java
--- a/src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java Wed Jan 12 11:00:14 2011 +0100
@@ -19,6 +19,7 @@
import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOCollection;
import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject;
import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource;
+import de.mpiwg.itgroup.eSciDoc.exceptions.TransformerException;
import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
import de.mpiwg.itgroup.metadataManager.pid.DCTransformer;
@@ -34,8 +35,9 @@
}
- public eSciDocXmlObject transform(ECHOObject obj) throws Exception {
+ public eSciDocXmlObject transform(ECHOObject obj) throws TransformerException {
+ try{
if (obj.pid==null)
return null;
@@ -113,6 +115,10 @@
eSciDocObj.insertDC(dc);
}
return eSciDocObj;
+ } catch (Exception e){
+ throw new TransformerException();
+ }
}
+
}
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/transformer/Transformer.java
--- a/src/de/mpiwg/itgroup/eSciDoc/transformer/Transformer.java Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/transformer/Transformer.java Wed Jan 12 11:00:14 2011 +0100
@@ -1,11 +1,12 @@
package de.mpiwg.itgroup.eSciDoc.transformer;
import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject;
+import de.mpiwg.itgroup.eSciDoc.exceptions.TransformerException;
import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
public interface Transformer {
- eSciDocXmlObject transform(ECHOObject obj) throws Exception;
+ eSciDocXmlObject transform(ECHOObject obj) throws TransformerException;
}
diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java
--- a/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java Mon Jan 10 12:42:27 2011 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java Wed Jan 12 11:00:14 2011 +0100
@@ -36,6 +36,7 @@
import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
import de.mpiwg.itgroup.eSciDoc.Tools.MyFileNameFilter;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
public class eSciDocXmlObject {
@@ -45,56 +46,92 @@
private String pid;
private DOMBuilder parser;
- public eSciDocXmlObject(String pid, InputStream template) throws Exception {
+ public eSciDocXmlObject(String pid, InputStream template) throws ESciDocXmlObjectException {
SAXBuilder builder = new SAXBuilder();
- dom = builder.build(template);
-
- xpath = EScidocTools.getESciDocXpath("//prop:pid");
-
- Element test = (Element) xpath.selectSingleNode(dom);
+ try {
+ dom = builder.build(template);
+
+ xpath = EScidocTools.getESciDocXpath("//prop:pid");
- test.setText(pid);
- this.pid=pid;
+ Element test = (Element) xpath.selectSingleNode(dom);
+
+ test.setText(pid);
+ this.pid=pid;
+ } catch (JDOMException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ESciDocXmlObjectException();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ESciDocXmlObjectException();
+ }
}
-public eSciDocXmlObject(URL url) throws Exception {
+public eSciDocXmlObject(URL url) throws ESciDocXmlObjectException {
SAXBuilder builder = new SAXBuilder();
- dom = builder.build(url);
-
- xpath = EScidocTools.getESciDocXpath("//prop:pid");
-
- Element test = (Element) xpath.selectSingleNode(dom);
+ try {
+ dom = builder.build(url);
+
+ xpath = EScidocTools.getESciDocXpath("//prop:pid");
- this.pid= test.getTextTrim();
+ Element test = (Element) xpath.selectSingleNode(dom);
+
+ this.pid= test.getTextTrim();
+ } catch (JDOMException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ESciDocXmlObjectException();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ESciDocXmlObjectException();
+ }
}
- public eSciDocXmlObject(Element el) throws JDOMException {
-
- dom = new Document((Element)el.clone());
+ public eSciDocXmlObject(Element el) throws ESciDocXmlObjectException {
- xpath = EScidocTools.getESciDocXpath("//prop:pid");
-
- Element test = (Element) xpath.selectSingleNode(dom);
- if (test!=null)
- this.pid= test.getTextTrim();
+ try {
+ dom = new Document((Element)el.clone());
+
+ xpath = EScidocTools.getESciDocXpath("//prop:pid");
+
+ Element test = (Element) xpath.selectSingleNode(dom);
+ if (test!=null)
+ this.pid= test.getTextTrim();
+ } catch (JDOMException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ESciDocXmlObjectException();
+ }
}
- public eSciDocXmlObject(InputStream content) throws JDOMException, IOException {
+ public eSciDocXmlObject(InputStream content) throws ESciDocXmlObjectException{
SAXBuilder builder = new SAXBuilder();
- dom = builder.build(content);
-
- xpath = EScidocTools.getESciDocXpath("//prop:pid");
-
- Element test = (Element) xpath.selectSingleNode(dom);
+ try {
+ dom = builder.build(content);
+
+ xpath = EScidocTools.getESciDocXpath("//prop:pid");
- this.pid= test.getTextTrim();
+ Element test = (Element) xpath.selectSingleNode(dom);
+
+ this.pid= test.getTextTrim();
+ } catch (JDOMException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ESciDocXmlObjectException();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ throw new ESciDocXmlObjectException();
+ }
}
@@ -318,6 +355,10 @@
+ /**
+ * @return Object in eScidoc XML formatting
+ * @throws IOException
+ */
public String printXML() throws IOException{
XMLOutputter out = new XMLOutputter();
@@ -466,5 +507,30 @@
return dom;
}
+ public void setLastModificationDate(String lastModificationDateOld) throws ESciDocXmlObjectException {
+ Attribute item;
+ try {
+ XPath xp = EScidocTools.getESciDocXpath(".//escidocItem:item/@last-modification-date");
+ item = (Attribute)xp.selectSingleNode(dom);
+
+
+ if (item==null) {//existiert noch nicht}
+ //Namespace namespace = Namespace.getNamespace("item",EScidocTools.item);
+ xp = EScidocTools.getESciDocXpath(".//escidocItem:item");
+ Element itemElement = (Element)xp.selectSingleNode(dom);
+ itemElement.setAttribute("last-modification-date", lastModificationDateOld);
+
+
+ } else {
+ item.setValue(lastModificationDateOld);
+ }
+ } catch (JDOMException e) {
+ e.printStackTrace();
+ throw new ESciDocXmlObjectException();
+ }
+
+
+ }
+
}