# HG changeset patch # User dwinter # Date 1294826414 -3600 # Node ID 58b52df9763c1811e09f6610534a8d4b09a5f187 # Parent fab8e78184fa0737acdc75b2eaa8bc90d379e289 added update functionality if index.meta has changed diff -r fab8e78184fa -r 58b52df9763c .classpath --- a/.classpath Mon Jan 10 12:42:27 2011 +0100 +++ b/.classpath Wed Jan 12 11:00:14 2011 +0100 @@ -13,7 +13,7 @@ + - diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java --- a/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java Mon Jan 10 12:42:27 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java Wed Jan 12 11:00:14 2011 +0100 @@ -11,7 +11,9 @@ import java.net.URL; import java.net.URLEncoder; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -19,6 +21,7 @@ import javax.swing.text.html.HTMLDocument.HTMLReader.IsindexAction; +import org.apache.commons.codec.EncoderException; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; @@ -44,7 +47,13 @@ import org.jdom.Text; import org.jdom.input.SAXBuilder; import org.jdom.xpath.XPath; +import org.w3c.dom.Node; +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; +import de.mpiwg.itgroup.eSciDoc.exceptions.ConnectorException; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.exceptions.ObjectNotUniqueError; import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; @@ -477,7 +486,7 @@ - public String getIDfromPID(String pid, String context) throws ClientProtocolException, IOException, IllegalStateException, JDOMException { + public String getIDfromPID(String pid, String context) throws ConnectorException { String filter = ""; @@ -487,21 +496,42 @@ String command = context + "/resources/members/filter"; - HttpResponse result =eScidocPost(command, - new ByteArrayInputStream(filter.getBytes())); + HttpResponse result; + try { + result = eScidocPost(command, + new ByteArrayInputStream(filter.getBytes())); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ConnectorException(); + } - Document dom = new SAXBuilder().build(result.getEntity().getContent()); - - XPath xp = EScidocTools.getESciDocXpath("//escidocItem:item/@xlink:href"); - - Attribute attr = (Attribute)xp.selectSingleNode(dom); - - if (attr!=null){ - return attr.getValue(); + try { + Document dom = new SAXBuilder().build(result.getEntity().getContent()); + + XPath xp = EScidocTools.getESciDocXpath("//escidocItem:item/@xlink:href"); + + Attribute attr = (Attribute)xp.selectSingleNode(dom); + + if (attr!=null){ + return attr.getValue(); + } + + return null; + //return convertStreamToString(result.getEntity().getContent()); + } catch (IllegalStateException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ConnectorException(); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ConnectorException(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ConnectorException(); } - - return null; - //return convertStreamToString(result.getEntity().getContent()); } @@ -529,7 +559,7 @@ } - public List getObjectListFromFilterResult(String command, String objectXpath) throws IOException, IllegalStateException, JDOMException { + public List getObjectListFromFilterResult(String command, String objectXpath) throws IOException, IllegalStateException, JDOMException,ESciDocXmlObjectException { //String filter = ""; // //String command = context @@ -646,7 +676,7 @@ - public boolean alreadyExists(String indexField, String testString, String context) throws Exception { + public ECHOObject alreadyExists(String indexField, String testString, String context) throws ConnectorException, ObjectNotUniqueError { String[] ct = context.split("/"); // gebraucht wird hier nur die id, dh ohne /ir/... @@ -656,25 +686,103 @@ String searchString = String.format("\"%s\"=\"%s\"",indexField,testString); searchString += " and "+String.format("\"%s\"=\"%s\"","/properties/context/id",contextId); + HttpResponse ret; + try{ searchString = URLEncoder.encode(searchString,"utf-8"); - HttpResponse ret = eScidocGet("/ir/items?operation=searchRetrieve&version=1.1&query="+searchString); + ret = eScidocGet("/ir/items?operation=searchRetrieve&version=1.1&query="+searchString); + } catch (UnsupportedEncodingException e) { + throw new ConnectorException(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ConnectorException(); + } if (ret.getStatusLine().getStatusCode()!=200) { logger.debug("alreadyExists: error searchstring:"+searchString); HttpEntity ent = ret.getEntity(); - if (ent!=null) - ent.consumeContent(); - throw new Exception(); + if (ent!=null) { + try { + ent.consumeContent(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ConnectorException(); + } + throw new ConnectorException(); + } } + + try{ Document doc = new SAXBuilder().build(ret.getEntity().getContent()); XPath xp = EScidocTools.getESciDocXpath("//zs:numberOfRecords/text()"); String hitsStr = ((Text)xp.selectSingleNode(doc)).getText(); Integer hits = Integer.valueOf(hitsStr); - if (hits>0) - return true; - return false; + if (hits>0){ + if (hits>1) + throw new ObjectNotUniqueError(); + return getOldObjectFromESciDoc(doc); + } + return null; + } catch (IOException e) { + e.printStackTrace(); + throw new ConnectorException(); + } catch (IllegalStateException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ConnectorException(); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ConnectorException(); + } + } + + + + private ECHOObject getOldObjectFromESciDoc(Document doc) { + MapretMap = new HashMap(); + XPath md5Nodes; + XPath itemId; + XPath lastModificationDate; + try { + md5Nodes= EScidocTools.getESciDocXpath(".//escidocComponents:component/escidocComponents:properties[prop:content-category[text()='index_meta']]/prop:checksum"); + itemId= EScidocTools.getESciDocXpath(".//escidocItem:item/@xlink:href"); + lastModificationDate = EScidocTools.getESciDocXpath(".//escidocItem:item/@last-modification-date"); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } + Element node; + Attribute idNode; + Attribute lastModificationDateNode; + try { + node = (Element)md5Nodes.selectSingleNode(doc); + idNode = (Attribute)itemId.selectSingleNode(doc); + lastModificationDateNode =(Attribute)lastModificationDate.selectSingleNode(doc); + + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } + String md5= node.getTextTrim(); + String escidocId= idNode.getValue(); + ECHORessource er; + try { + er = new ECHORessource(); + er.eScidocId=escidocId; + er.indexMetaMD5stored=md5; + er.lastModificationDate= lastModificationDateNode.getValue(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } + return er; } @@ -711,5 +819,8 @@ + + + } diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java --- a/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java Mon Jan 10 12:42:27 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java Wed Jan 12 11:00:14 2011 +0100 @@ -63,6 +63,7 @@ xpathResources.addNamespace("escidocComponents",escidocComponents); xpathResources.addNamespace("xlink",xlink); xpathResources.addNamespace("metadata-records",metadataRecords); + xpathResources.addNamespace("metadataRecords",metadataRecords); xpathResources.addNamespace("container",container); xpathResources.addNamespace("item",item); xpathResources.addNamespace("srw",srw); diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/config/echo.properties --- a/src/de/mpiwg/itgroup/eSciDoc/config/echo.properties Mon Jan 10 12:42:27 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/config/echo.properties Wed Jan 12 11:00:14 2011 +0100 @@ -1,3 +1,3 @@ textServletUrl=http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn= dirInfoUrl=http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/dirInfo-xml.jsp?fn= -metaDataManager=http://localhost:48080/MetaDataManagerRestlet \ No newline at end of file +metaDataManager=http://localhost:8180/MetaDataManagerRestlet \ No newline at end of file diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOObject.java --- a/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOObject.java Mon Jan 10 12:42:27 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOObject.java Wed Jan 12 11:00:14 2011 +0100 @@ -28,6 +28,8 @@ public String metadataMananagerUrl; public String context; public String description =""; + public String eScidocId; + public String lastModificationDate; public ECHOObject() throws IOException{ Properties echoProperties = new Properties(); @@ -92,4 +94,6 @@ } return pid; } + + } diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java --- a/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java Mon Jan 10 12:42:27 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java Wed Jan 12 11:00:14 2011 +0100 @@ -1,12 +1,18 @@ package de.mpiwg.itgroup.eSciDoc.echoObjects; import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; import java.net.URI; +import java.net.URL; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.codec.binary.Hex; import org.apache.http.Header; import org.apache.http.HttpException; import org.apache.http.HttpHost; @@ -34,6 +40,8 @@ private String dirInfoUrl; public String link; + public String indexMetaMD5stored; + public ECHORessource() throws IOException{ super(); @@ -190,4 +198,50 @@ } return false; } -} + + + public String getIndexMetaMD5onServer() { + String digest=""; + try { + URL url = new URL(metaData); + InputStream is = url.openStream(); + MessageDigest md = MessageDigest.getInstance("MD5"); + digest = getDigest(is, md, 2048); + } catch (MalformedURLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (NoSuchAlgorithmException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return digest; + } + + public static String getDigest(InputStream is, MessageDigest md, int byteArraySize) + throws NoSuchAlgorithmException, IOException { + + md.reset(); + byte[] bytes = new byte[byteArraySize]; + int numBytes; + while ((numBytes = is.read(bytes)) != -1) { + md.update(bytes, 0, numBytes); + } + byte[] digest = md.digest(); + String result = new String(Hex.encodeHex(digest)); + return result; + } + + + public String getIndexMetaMD5stored() { + return indexMetaMD5stored; + } + + public String getEScidocId() { + return eScidocId; + } + + + } diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/exceptions/ConnectorException.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/exceptions/ConnectorException.java Wed Jan 12 11:00:14 2011 +0100 @@ -0,0 +1,5 @@ +package de.mpiwg.itgroup.eSciDoc.exceptions; + +public class ConnectorException extends Exception { + +} diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/exceptions/ESciDocXmlObjectException.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/exceptions/ESciDocXmlObjectException.java Wed Jan 12 11:00:14 2011 +0100 @@ -0,0 +1,5 @@ +package de.mpiwg.itgroup.eSciDoc.exceptions; + +public class ESciDocXmlObjectException extends Exception{ + +} diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/exceptions/ObjectNotUniqueError.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/exceptions/ObjectNotUniqueError.java Wed Jan 12 11:00:14 2011 +0100 @@ -0,0 +1,5 @@ +package de.mpiwg.itgroup.eSciDoc.exceptions; + +public class ObjectNotUniqueError extends Exception { + +} diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/exceptions/TransformerException.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/exceptions/TransformerException.java Wed Jan 12 11:00:14 2011 +0100 @@ -0,0 +1,5 @@ +package de.mpiwg.itgroup.eSciDoc.exceptions; + +public class TransformerException extends Exception { + +} diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java --- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Mon Jan 10 12:42:27 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Wed Jan 12 11:00:14 2011 +0100 @@ -1,16 +1,14 @@ package de.mpiwg.itgroup.eSciDoc.harvesting; -import java.io.File; -import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; -import java.net.MalformedURLException; +import java.io.UnsupportedEncodingException; import java.net.URL; import java.util.ArrayList; +import java.util.Map; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; -import org.apache.log4j.BasicConfigurator; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.xml.DOMConfigurator; @@ -20,50 +18,81 @@ import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; +import de.mpiwg.itgroup.eSciDoc.exceptions.ConnectorException; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.exceptions.ObjectNotUniqueError; +import de.mpiwg.itgroup.eSciDoc.exceptions.TransformerException; import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter; import de.mpiwg.itgroup.eSciDoc.importer.Importer; import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer; import de.mpiwg.itgroup.eSciDoc.transformer.Transformer; import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; +/** + * @author dwinter + * Main class for data harveting from index.meta files into eScidoc + */ public class ESciDocDataHarvester { - static int MAX_REC=1000; + static int MAX_REC=1000; // maximale Anzahl von Records die in einem Stueck eingelesen bzw. bearbeitet werden. //static int MAX_REC=5; protected Logger logger = Logger.getRootLogger(); protected Importer importer; protected EScidocBasicHandler connector; protected Transformer transformer; - private EScidocTools tools; + private String echoContext; private Logger addedFile = Logger.getLogger("addedFilesLogger"); private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger"); + /** + * @param importer Importer for dataObjects, describes how to access the objects + * @param transformer Transformer, generates the eScidocMetaDatasets + * @param connector connects to the eScidocRepository + * @param context Escidoc context path z.b. /ir/context/escidoc:12001 + */ public ESciDocDataHarvester(Importer importer, Transformer transformer, - EScidocBasicHandler connector, String context) throws IOException { + EScidocBasicHandler connector, String context) { this.importer = importer; this.transformer = transformer; this.connector = connector; - this.tools = new EScidocTools(connector); + //this.tools = new EScidocTools(connector); this.echoContext = context; } - public Boolean readObjectsFromInstance(String type) throws Exception { + /** Read objects into eScidoc or updates the objects if indexMeta has changed. + * @param type restrict the imported objects to a specific type, possible types should be defined in + * the given importer @see {@link #importer} + * @return + * @throws ConnectorException + * @throws TransformerException + * @throws ESciDocXmlObjectException + */ + public Boolean readObjectsFromInstance(String type) throws ConnectorException, TransformerException, ESciDocXmlObjectException { ArrayList addedObjects = new ArrayList(); ArrayList notAddedObjects = new ArrayList(); for (ECHOObject obj : importer.getObjectList(type)) { if (ECHORessource.class.isInstance(obj)) { try { - if (connector.alreadyExists( - "/md-records/md-record/admin/archivePath", - ((ECHORessource) obj).archivePath, echoContext)) { + ECHOObject old; + try { + old = connector.alreadyExists( + "/md-records/md-record/admin/archivePath", + ((ECHORessource) obj).archivePath, echoContext); + } catch (ObjectNotUniqueError e) { + // TODO Auto-generated catch block + e.printStackTrace(); + continue; + } + if (old!=null) { logger.debug("already exist:" + ((ECHORessource) obj).archivePath); + handleExistingObject(obj,old); continue; } - } catch (Exception e) { - logger.debug("already exist error"); + } catch (ConnectorException e) { + logger.debug("already exist error:"); e.printStackTrace(); continue; } @@ -77,28 +106,32 @@ } else { eSciDocXmlObject escidocItem = transformer.transform(obj); - logger.info(escidocItem.printXML()); - // TODO write PID to back to echo-obj - Boolean result = connector.createItem(escidocItem); - if (result) { - addedObjects.add(escidocItem.getESciDocId()); - addedFile.debug(escidocItem.getESciDocId() + "\n"); - // addedFile.write(escidocItem.getESciDocId()+"\n"); - // addedFile.flush(); + + + try { + logger.info(escidocItem.printXML()); + // TODO write PID to back to echo-obj + Boolean result = connector.createItem(escidocItem); + if (result) { + addedObjects.add(escidocItem.getESciDocId()); + addedFile.debug(escidocItem.getESciDocId() + "\n"); - } else { - notAddedObjects.add(obj.echoUrl); - notAddedFile.debug(obj.echoUrl); - // notAddedFile.write(obj.echoUrl+"\n"); - // notAddedFile.flush(); - } - // if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){ - // logger.info("PID already exists:"+obj); - // } else if (result == - // ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){ - // logger.info("Object with reference to the same digital object already exists:"+obj); - // } - + } else { + notAddedObjects.add(obj.echoUrl); + notAddedFile.debug(obj.echoUrl); + + } + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } + } } if (logger.getLevel() == Level.DEBUG) { @@ -107,18 +140,61 @@ } } - // File outFile = new File("/tmp/import.out"); - // FileWriter fw = new FileWriter(outFile); - // for (String addedObject:addedObjects){ - // fw.write(addedObject+"\n"); - // } - // for (String addedObject:notAddedObjects){ - // fw.write(addedObject+"\n"); - // } - // fw.close(); return true; } + + + + + /** + * Deal with existing objects, do nothing if md5 of stored metadata and metadata on the server is the same otherwise call {@link #updateObject(ECHOObject)}. + * @param objNew + * @param old + * @throws TransformerException + * @throws ESciDocXmlObjectException + */ + private void handleExistingObject(ECHOObject objNew, ECHOObject old) throws TransformerException, ESciDocXmlObjectException { + ECHORessource objNewRes = (ECHORessource)objNew; + ECHORessource objOldRes = (ECHORessource)old; + String md5onServer = objNewRes.getIndexMetaMD5onServer(); + String md5=objOldRes.getIndexMetaMD5stored(); + if (md5onServer.equals(md5)) + return; + else { + updateObject(objNew, old); + } + + } + + private void updateObject(ECHOObject objNew, ECHOObject objOld) throws TransformerException, ESciDocXmlObjectException { + objNew.context = echoContext; + eSciDocXmlObject escidocItem = transformer.transform(objNew); + String lastModificationDateOld = objOld.lastModificationDate; + escidocItem.setLastModificationDate(lastModificationDateOld); + try { + HttpResponse ret = connector.eScidocPut(objOld.eScidocId, EScidocBasicHandler.convertStringToStream(escidocItem.printXML())); + HttpEntity ent = ret.getEntity(); + if (ret.getStatusLine().getStatusCode() != 200) { + logger.debug("Can not update:" + objOld.eScidocId); + // res.getEntity().consumeContent(); // necessary to release + // the conneciton + ent.consumeContent(); + + } + InputStream restream = ret.getEntity().getContent(); + logger.debug(EScidocBasicHandler.convertStreamToString(restream)); + } catch (UnsupportedEncodingException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + + } + /** * @param command * @param objectXPath diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java --- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java Mon Jan 10 12:42:27 2011 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -package de.mpiwg.itgroup.eSciDoc.harvesting; - -import java.io.File; -import java.net.MalformedURLException; -import java.net.URL; - -import org.apache.log4j.BasicConfigurator; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; -import org.apache.log4j.xml.DOMConfigurator; - -import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; -import de.mpiwg.itgroup.eSciDoc.foxridge.IndexMetaIterator; -import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter; -import de.mpiwg.itgroup.eSciDoc.importer.FoxridgeImporter; -import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer; - -public class FoxridgeHarverster { - - /** - * @param args - * @throws Exception - */ - public static void main(String[] args) throws Exception { - - DOMConfigurator.configure("log4uconf.xml"); - Logger rl = Logger.getRootLogger(); - Logger.getLogger("transformerLogger").setLevel(Level.DEBUG); - Logger.getLogger("addedFilesLogger").setLevel(Level.DEBUG); - Logger.getLogger("notAddedFilesLogger").setLevel(Level.DEBUG); - - rl.setLevel(Level.DEBUG); - - //IndexMetaIterator sd = new IndexMetaIterator(new File("/Volumes/online_permanent/echo")); - //while (sd.hasNext()){ - // System.out.println(sd.next()); - //} - EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7"); - - ESciDocDataHarvester hv = new ESciDocDataHarvester(new FoxridgeImporter(new File("/Volumes/online_permanent/")), - new ECHOTransformer(),connector,"/ir/context/escidoc:12001"); - - //hv.readObjectsFromInstance("ECHO_collection"); - //hv.readObjectsFromInstance("ECHO_resource"); - - //hv.releaseAndSubmitObjects("/ir/context/escidoc:12001"); - hv.releaseAndSubmitObjects("/ir/context/escidoc:12001/resources/members","//escidocItem:item",1); - - - } - -} diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java Wed Jan 12 11:00:14 2011 +0100 @@ -0,0 +1,52 @@ +package de.mpiwg.itgroup.eSciDoc.harvesting; + +import java.io.File; +import java.net.MalformedURLException; +import java.net.URL; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.xml.DOMConfigurator; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.foxridge.IndexMetaIterator; +import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter; +import de.mpiwg.itgroup.eSciDoc.importer.FoxridgeImporter; +import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer; + +public class FoxridgeHarvester { + + /** + * @param args + * @throws Exception + */ + public static void main(String[] args) throws Exception { + + DOMConfigurator.configure("log4uconf.xml"); + Logger rl = Logger.getRootLogger(); + Logger.getLogger("transformerLogger").setLevel(Level.DEBUG); + Logger.getLogger("addedFilesLogger").setLevel(Level.DEBUG); + Logger.getLogger("notAddedFilesLogger").setLevel(Level.DEBUG); + + rl.setLevel(Level.DEBUG); + + //IndexMetaIterator sd = new IndexMetaIterator(new File("/Volumes/online_permanent/echo")); + //while (sd.hasNext()){ + // System.out.println(sd.next()); + //} + EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7"); + + ESciDocDataHarvester hv = new ESciDocDataHarvester(new FoxridgeImporter(new File("/Volumes/online_permanent/shipbuilding/")), + new ECHOTransformer(),connector,"/ir/context/escidoc:12001"); + + //hv.readObjectsFromInstance("ECHO_collection"); + hv.readObjectsFromInstance("ECHO_resource"); + + //hv.releaseAndSubmitObjects("/ir/context/escidoc:12001"); + //hv.releaseAndSubmitObjects("/ir/context/escidoc:12001/resources/members","//escidocItem:item",1); + + + } + +} diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java --- a/src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java Mon Jan 10 12:42:27 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java Wed Jan 12 11:00:14 2011 +0100 @@ -19,6 +19,7 @@ import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOCollection; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; +import de.mpiwg.itgroup.eSciDoc.exceptions.TransformerException; import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; import de.mpiwg.itgroup.metadataManager.pid.DCTransformer; @@ -34,8 +35,9 @@ } - public eSciDocXmlObject transform(ECHOObject obj) throws Exception { + public eSciDocXmlObject transform(ECHOObject obj) throws TransformerException { + try{ if (obj.pid==null) return null; @@ -113,6 +115,10 @@ eSciDocObj.insertDC(dc); } return eSciDocObj; + } catch (Exception e){ + throw new TransformerException(); + } } + } diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/transformer/Transformer.java --- a/src/de/mpiwg/itgroup/eSciDoc/transformer/Transformer.java Mon Jan 10 12:42:27 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/transformer/Transformer.java Wed Jan 12 11:00:14 2011 +0100 @@ -1,11 +1,12 @@ package de.mpiwg.itgroup.eSciDoc.transformer; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; +import de.mpiwg.itgroup.eSciDoc.exceptions.TransformerException; import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; public interface Transformer { - eSciDocXmlObject transform(ECHOObject obj) throws Exception; + eSciDocXmlObject transform(ECHOObject obj) throws TransformerException; } diff -r fab8e78184fa -r 58b52df9763c src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java --- a/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java Mon Jan 10 12:42:27 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java Wed Jan 12 11:00:14 2011 +0100 @@ -36,6 +36,7 @@ import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; import de.mpiwg.itgroup.eSciDoc.Tools.MyFileNameFilter; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; public class eSciDocXmlObject { @@ -45,56 +46,92 @@ private String pid; private DOMBuilder parser; - public eSciDocXmlObject(String pid, InputStream template) throws Exception { + public eSciDocXmlObject(String pid, InputStream template) throws ESciDocXmlObjectException { SAXBuilder builder = new SAXBuilder(); - dom = builder.build(template); - - xpath = EScidocTools.getESciDocXpath("//prop:pid"); - - Element test = (Element) xpath.selectSingleNode(dom); + try { + dom = builder.build(template); + + xpath = EScidocTools.getESciDocXpath("//prop:pid"); - test.setText(pid); - this.pid=pid; + Element test = (Element) xpath.selectSingleNode(dom); + + test.setText(pid); + this.pid=pid; + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } } -public eSciDocXmlObject(URL url) throws Exception { +public eSciDocXmlObject(URL url) throws ESciDocXmlObjectException { SAXBuilder builder = new SAXBuilder(); - dom = builder.build(url); - - xpath = EScidocTools.getESciDocXpath("//prop:pid"); - - Element test = (Element) xpath.selectSingleNode(dom); + try { + dom = builder.build(url); + + xpath = EScidocTools.getESciDocXpath("//prop:pid"); - this.pid= test.getTextTrim(); + Element test = (Element) xpath.selectSingleNode(dom); + + this.pid= test.getTextTrim(); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } } - public eSciDocXmlObject(Element el) throws JDOMException { - - dom = new Document((Element)el.clone()); + public eSciDocXmlObject(Element el) throws ESciDocXmlObjectException { - xpath = EScidocTools.getESciDocXpath("//prop:pid"); - - Element test = (Element) xpath.selectSingleNode(dom); - if (test!=null) - this.pid= test.getTextTrim(); + try { + dom = new Document((Element)el.clone()); + + xpath = EScidocTools.getESciDocXpath("//prop:pid"); + + Element test = (Element) xpath.selectSingleNode(dom); + if (test!=null) + this.pid= test.getTextTrim(); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } } - public eSciDocXmlObject(InputStream content) throws JDOMException, IOException { + public eSciDocXmlObject(InputStream content) throws ESciDocXmlObjectException{ SAXBuilder builder = new SAXBuilder(); - dom = builder.build(content); - - xpath = EScidocTools.getESciDocXpath("//prop:pid"); - - Element test = (Element) xpath.selectSingleNode(dom); + try { + dom = builder.build(content); + + xpath = EScidocTools.getESciDocXpath("//prop:pid"); - this.pid= test.getTextTrim(); + Element test = (Element) xpath.selectSingleNode(dom); + + this.pid= test.getTextTrim(); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } } @@ -318,6 +355,10 @@ + /** + * @return Object in eScidoc XML formatting + * @throws IOException + */ public String printXML() throws IOException{ XMLOutputter out = new XMLOutputter(); @@ -466,5 +507,30 @@ return dom; } + public void setLastModificationDate(String lastModificationDateOld) throws ESciDocXmlObjectException { + Attribute item; + try { + XPath xp = EScidocTools.getESciDocXpath(".//escidocItem:item/@last-modification-date"); + item = (Attribute)xp.selectSingleNode(dom); + + + if (item==null) {//existiert noch nicht} + //Namespace namespace = Namespace.getNamespace("item",EScidocTools.item); + xp = EScidocTools.getESciDocXpath(".//escidocItem:item"); + Element itemElement = (Element)xp.selectSingleNode(dom); + itemElement.setAttribute("last-modification-date", lastModificationDateOld); + + + } else { + item.setValue(lastModificationDateOld); + } + } catch (JDOMException e) { + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } + + + } + }