comparison src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java @ 3:58b52df9763c

added update functionality if index.meta has changed
author dwinter
date Wed, 12 Jan 2011 11:00:14 +0100
parents fab8e78184fa
children cb5668b07bfc
comparison
equal deleted inserted replaced
2:fab8e78184fa 3:58b52df9763c
1 package de.mpiwg.itgroup.eSciDoc.harvesting; 1 package de.mpiwg.itgroup.eSciDoc.harvesting;
2 2
3 import java.io.File;
4 import java.io.FileWriter;
5 import java.io.IOException; 3 import java.io.IOException;
6 import java.io.InputStream; 4 import java.io.InputStream;
7 import java.net.MalformedURLException; 5 import java.io.UnsupportedEncodingException;
8 import java.net.URL; 6 import java.net.URL;
9 import java.util.ArrayList; 7 import java.util.ArrayList;
8 import java.util.Map;
10 9
11 import org.apache.http.HttpEntity; 10 import org.apache.http.HttpEntity;
12 import org.apache.http.HttpResponse; 11 import org.apache.http.HttpResponse;
13 import org.apache.log4j.BasicConfigurator;
14 import org.apache.log4j.Level; 12 import org.apache.log4j.Level;
15 import org.apache.log4j.Logger; 13 import org.apache.log4j.Logger;
16 import org.apache.log4j.xml.DOMConfigurator; 14 import org.apache.log4j.xml.DOMConfigurator;
17 import org.jdom.JDOMException; 15 import org.jdom.JDOMException;
18 16
19 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; 17 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
20 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; 18 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
21 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; 19 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject;
22 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; 20 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource;
21 import de.mpiwg.itgroup.eSciDoc.exceptions.ConnectorException;
22 import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
23 import de.mpiwg.itgroup.eSciDoc.exceptions.ObjectNotUniqueError;
24 import de.mpiwg.itgroup.eSciDoc.exceptions.TransformerException;
23 import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter; 25 import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter;
24 import de.mpiwg.itgroup.eSciDoc.importer.Importer; 26 import de.mpiwg.itgroup.eSciDoc.importer.Importer;
25 import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer; 27 import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer;
26 import de.mpiwg.itgroup.eSciDoc.transformer.Transformer; 28 import de.mpiwg.itgroup.eSciDoc.transformer.Transformer;
27 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; 29 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
28 30
31 /**
32 * @author dwinter
33 * Main class for data harveting from index.meta files into eScidoc
34 */
29 public class ESciDocDataHarvester { 35 public class ESciDocDataHarvester {
30 static int MAX_REC=1000; 36 static int MAX_REC=1000; // maximale Anzahl von Records die in einem Stueck eingelesen bzw. bearbeitet werden.
31 //static int MAX_REC=5; 37 //static int MAX_REC=5;
32 protected Logger logger = Logger.getRootLogger(); 38 protected Logger logger = Logger.getRootLogger();
33 protected Importer importer; 39 protected Importer importer;
34 protected EScidocBasicHandler connector; 40 protected EScidocBasicHandler connector;
35 protected Transformer transformer; 41 protected Transformer transformer;
36 private EScidocTools tools; 42
37 private String echoContext; 43 private String echoContext;
38 private Logger addedFile = Logger.getLogger("addedFilesLogger"); 44 private Logger addedFile = Logger.getLogger("addedFilesLogger");
39 private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger"); 45 private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger");
40 46
47 /**
48 * @param importer Importer for dataObjects, describes how to access the objects
49 * @param transformer Transformer, generates the eScidocMetaDatasets
50 * @param connector connects to the eScidocRepository
51 * @param context Escidoc context path z.b. /ir/context/escidoc:12001
52 */
41 public ESciDocDataHarvester(Importer importer, Transformer transformer, 53 public ESciDocDataHarvester(Importer importer, Transformer transformer,
42 EScidocBasicHandler connector, String context) throws IOException { 54 EScidocBasicHandler connector, String context) {
43 this.importer = importer; 55 this.importer = importer;
44 this.transformer = transformer; 56 this.transformer = transformer;
45 this.connector = connector; 57 this.connector = connector;
46 this.tools = new EScidocTools(connector); 58 //this.tools = new EScidocTools(connector);
47 this.echoContext = context; 59 this.echoContext = context;
48 60
49 } 61 }
50 62
51 public Boolean readObjectsFromInstance(String type) throws Exception { 63 /** Read objects into eScidoc or updates the objects if indexMeta has changed.
64 * @param type restrict the imported objects to a specific type, possible types should be defined in
65 * the given importer @see {@link #importer}
66 * @return
67 * @throws ConnectorException
68 * @throws TransformerException
69 * @throws ESciDocXmlObjectException
70 */
71 public Boolean readObjectsFromInstance(String type) throws ConnectorException, TransformerException, ESciDocXmlObjectException {
52 ArrayList<String> addedObjects = new ArrayList<String>(); 72 ArrayList<String> addedObjects = new ArrayList<String>();
53 ArrayList<String> notAddedObjects = new ArrayList<String>(); 73 ArrayList<String> notAddedObjects = new ArrayList<String>();
54 for (ECHOObject obj : importer.getObjectList(type)) { 74 for (ECHOObject obj : importer.getObjectList(type)) {
55 75
56 if (ECHORessource.class.isInstance(obj)) { 76 if (ECHORessource.class.isInstance(obj)) {
57 try { 77 try {
58 if (connector.alreadyExists( 78 ECHOObject old;
59 "/md-records/md-record/admin/archivePath", 79 try {
60 ((ECHORessource) obj).archivePath, echoContext)) { 80 old = connector.alreadyExists(
81 "/md-records/md-record/admin/archivePath",
82 ((ECHORessource) obj).archivePath, echoContext);
83 } catch (ObjectNotUniqueError e) {
84 // TODO Auto-generated catch block
85 e.printStackTrace();
86 continue;
87 }
88 if (old!=null) {
61 logger.debug("already exist:" 89 logger.debug("already exist:"
62 + ((ECHORessource) obj).archivePath); 90 + ((ECHORessource) obj).archivePath);
91 handleExistingObject(obj,old);
63 continue; 92 continue;
64 } 93 }
65 } catch (Exception e) { 94 } catch (ConnectorException e) {
66 logger.debug("already exist error"); 95 logger.debug("already exist error:");
67 e.printStackTrace(); 96 e.printStackTrace();
68 continue; 97 continue;
69 } 98 }
70 } 99 }
71 100
75 if (contid != null) { 104 if (contid != null) {
76 System.out.println("------- belongsTo:" + contid); 105 System.out.println("------- belongsTo:" + contid);
77 } else { 106 } else {
78 107
79 eSciDocXmlObject escidocItem = transformer.transform(obj); 108 eSciDocXmlObject escidocItem = transformer.transform(obj);
80 logger.info(escidocItem.printXML()); 109
81 // TODO write PID to back to echo-obj 110
82 Boolean result = connector.createItem(escidocItem); 111 try {
83 if (result) { 112 logger.info(escidocItem.printXML());
84 addedObjects.add(escidocItem.getESciDocId()); 113 // TODO write PID to back to echo-obj
85 addedFile.debug(escidocItem.getESciDocId() + "\n"); 114 Boolean result = connector.createItem(escidocItem);
86 // addedFile.write(escidocItem.getESciDocId()+"\n"); 115 if (result) {
87 // addedFile.flush(); 116 addedObjects.add(escidocItem.getESciDocId());
88 117 addedFile.debug(escidocItem.getESciDocId() + "\n");
89 } else { 118
90 notAddedObjects.add(obj.echoUrl); 119 } else {
91 notAddedFile.debug(obj.echoUrl); 120 notAddedObjects.add(obj.echoUrl);
92 // notAddedFile.write(obj.echoUrl+"\n"); 121 notAddedFile.debug(obj.echoUrl);
93 // notAddedFile.flush(); 122
94 } 123 }
95 // if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){ 124
96 // logger.info("PID already exists:"+obj); 125 } catch (IOException e) {
97 // } else if (result == 126 // TODO Auto-generated catch block
98 // ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){ 127 e.printStackTrace();
99 // logger.info("Object with reference to the same digital object already exists:"+obj); 128 throw new ESciDocXmlObjectException();
100 // } 129 } catch (JDOMException e) {
101 130 // TODO Auto-generated catch block
131 e.printStackTrace();
132 throw new ESciDocXmlObjectException();
133 }
134
102 } 135 }
103 } 136 }
104 if (logger.getLevel() == Level.DEBUG) { 137 if (logger.getLevel() == Level.DEBUG) {
105 for (String addedObject : addedObjects) { 138 for (String addedObject : addedObjects) {
106 logger.debug(addedObject); 139 logger.debug(addedObject);
107 } 140 }
108 } 141 }
109 142
110 // File outFile = new File("/tmp/import.out");
111 // FileWriter fw = new FileWriter(outFile);
112 // for (String addedObject:addedObjects){
113 // fw.write(addedObject+"\n");
114 // }
115 // for (String addedObject:notAddedObjects){
116 // fw.write(addedObject+"\n");
117 // }
118 // fw.close();
119 return true; 143 return true;
144 }
145
146
147
148
149
150 /**
151 * Deal with existing objects, do nothing if md5 of stored metadata and metadata on the server is the same otherwise call {@link #updateObject(ECHOObject)}.
152 * @param objNew
153 * @param old
154 * @throws TransformerException
155 * @throws ESciDocXmlObjectException
156 */
157 private void handleExistingObject(ECHOObject objNew, ECHOObject old) throws TransformerException, ESciDocXmlObjectException {
158 ECHORessource objNewRes = (ECHORessource)objNew;
159 ECHORessource objOldRes = (ECHORessource)old;
160 String md5onServer = objNewRes.getIndexMetaMD5onServer();
161 String md5=objOldRes.getIndexMetaMD5stored();
162 if (md5onServer.equals(md5))
163 return;
164 else {
165 updateObject(objNew, old);
166 }
167
168 }
169
170 private void updateObject(ECHOObject objNew, ECHOObject objOld) throws TransformerException, ESciDocXmlObjectException {
171 objNew.context = echoContext;
172 eSciDocXmlObject escidocItem = transformer.transform(objNew);
173 String lastModificationDateOld = objOld.lastModificationDate;
174 escidocItem.setLastModificationDate(lastModificationDateOld);
175 try {
176 HttpResponse ret = connector.eScidocPut(objOld.eScidocId, EScidocBasicHandler.convertStringToStream(escidocItem.printXML()));
177 HttpEntity ent = ret.getEntity();
178 if (ret.getStatusLine().getStatusCode() != 200) {
179 logger.debug("Can not update:" + objOld.eScidocId);
180 // res.getEntity().consumeContent(); // necessary to release
181 // the conneciton
182 ent.consumeContent();
183
184 }
185 InputStream restream = ret.getEntity().getContent();
186 logger.debug(EScidocBasicHandler.convertStreamToString(restream));
187 } catch (UnsupportedEncodingException e) {
188 // TODO Auto-generated catch block
189 e.printStackTrace();
190 } catch (IOException e) {
191 // TODO Auto-generated catch block
192 e.printStackTrace();
193 }
194
195
120 } 196 }
121 197
122 /** 198 /**
123 * @param command 199 * @param command
124 * @param objectXPath 200 * @param objectXPath