comparison src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java @ 8:a844f6948dd8

?nderungen im Walker tools f?r pubman
author dwinter
date Mon, 14 May 2012 09:58:45 +0200
parents 4b1ae52418c1
children b6cf6462d709
comparison
equal deleted inserted replaced
7:df8c62d84f8f 8:a844f6948dd8
38 //static int MAX_REC=5; 38 //static int MAX_REC=5;
39 protected Logger logger = Logger.getRootLogger(); 39 protected Logger logger = Logger.getRootLogger();
40 protected Importer importer; 40 protected Importer importer;
41 protected EScidocBasicHandler connector; 41 protected EScidocBasicHandler connector;
42 protected Transformer transformer; 42 protected Transformer transformer;
43 43
44 private String context; 44 private String context;
45 private Logger addedFile = Logger.getLogger("addedFilesLogger"); 45 private Logger addedFile = Logger.getLogger("addedFilesLogger");
46 private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger"); 46 private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger");
47 47
48 /** 48 /**
77 77
78 if (ECHORessource.class.isInstance(obj)) { 78 if (ECHORessource.class.isInstance(obj)) {
79 try { 79 try {
80 ECHOObject old; 80 ECHOObject old;
81 try { 81 try {
82 old = connector.alreadyExists( 82 old = connector.alreadyExists(
83 "/md-records/md-record/admin/archivePath", 83 "admin.archivePath",
84 ((ECHORessource) obj).archivePath, context); 84 ((ECHORessource) obj).archivePath, context,"=");
85 if (old==null){ //FIXME Problem bei der Erfassungder Metadaten sollte eigentlich nicht vorkommen!
86 old = connector.alreadyExists(
87 "admin.archivePath",
88 ((ECHORessource) obj).archivePath.replace("/mpiwg/online/permanent", "/Volumes/online/permanent"), context,"=");
89 }
90 if (old==null){
91 old = connector.alreadyExists(
92 "admin.archivePath",
93 ((ECHORessource) obj).archivePath.replace("/mpiwg/online/experimental", "/Volumes/online/experimental"), context,"=");
94 }
85 } catch (ObjectNotUniqueError e) { 95 } catch (ObjectNotUniqueError e) {
86 // TODO Auto-generated catch block 96 // TODO Auto-generated catch block
87 e.printStackTrace(); 97 e.printStackTrace();
88 continue; 98 continue;
89 } 99 }
106 if (contid != null) { 116 if (contid != null) {
107 System.out.println("------- belongsTo:" + contid); 117 System.out.println("------- belongsTo:" + contid);
108 } else { 118 } else {
109 119
110 eSciDocXmlObject escidocItem = transformer.transform(obj); 120 eSciDocXmlObject escidocItem = transformer.transform(obj);
111 121
112 122
113 try { 123 try {
114 logger.info(escidocItem.printXML()); 124 logger.info(escidocItem.printXML());
115 // TODO write PID to back to echo-obj 125 // TODO write PID to back to echo-obj
116 Boolean result = connector.createItem(escidocItem); 126 Boolean result = connector.createItem(escidocItem);
117 if (result) { 127 if (result) {
118 addedObjects.add(escidocItem.getESciDocId()); 128 addedObjects.add(escidocItem.getESciDocId());
119 addedFile.debug(escidocItem.getESciDocId() + "\n"); 129 addedFile.debug(escidocItem.getESciDocId() + "\n");
120 130
121 } else { 131 } else {
122 notAddedObjects.add(obj.echoUrl); 132 notAddedObjects.add(obj.echoUrl);
123 notAddedFile.debug(obj.echoUrl); 133 notAddedFile.debug(obj.echoUrl);
124 134
125 } 135 }
126 136
127 } catch (IOException e) { 137 } catch (IOException e) {
128 // TODO Auto-generated catch block 138 // TODO Auto-generated catch block
129 e.printStackTrace(); 139 e.printStackTrace();
130 throw new ESciDocXmlObjectException(); 140 throw new ESciDocXmlObjectException();
131 } catch (JDOMException e) { 141 } catch (JDOMException e) {
132 // TODO Auto-generated catch block 142 // TODO Auto-generated catch block
133 e.printStackTrace(); 143 e.printStackTrace();
134 throw new ESciDocXmlObjectException(); 144 throw new ESciDocXmlObjectException();
135 } 145 }
136 146
137 } 147 }
138 } 148 }
139 if (logger.getLevel() == Level.DEBUG) { 149 if (logger.getLevel() == Level.DEBUG) {
140 for (String addedObject : addedObjects) { 150 for (String addedObject : addedObjects) {
141 logger.debug(addedObject); 151 logger.debug(addedObject);
143 } 153 }
144 154
145 return true; 155 return true;
146 } 156 }
147 157
148 158
149 /** Read objects into eScidoc or updates the objects if indexMeta has changed. 159 /** Read objects into eScidoc or updates the objects if indexMeta has changed.
150 * @param type restrict the imported objects to a specific type, possible types should be defined in 160 * @param type restrict the imported objects to a specific type, possible types should be defined in
151 * the given importer @see {@link #importer} 161 * the given importer @see {@link #importer}
152 * @return 162 * @return
153 * @throws ConnectorException 163 * @throws ConnectorException
156 */ 166 */
157 public Boolean readObjectsFromInstance(String type) throws ConnectorException, TransformerException, ESciDocXmlObjectException { 167 public Boolean readObjectsFromInstance(String type) throws ConnectorException, TransformerException, ESciDocXmlObjectException {
158 ArrayList<String> addedObjects = new ArrayList<String>(); 168 ArrayList<String> addedObjects = new ArrayList<String>();
159 ArrayList<String> notAddedObjects = new ArrayList<String>(); 169 ArrayList<String> notAddedObjects = new ArrayList<String>();
160 for (ECHOObject obj : importer.getObjectList(type)) { 170 for (ECHOObject obj : importer.getObjectList(type)) {
161 171 if (obj==null)
172 continue;
162 if (ECHORessource.class.isInstance(obj)) { 173 if (ECHORessource.class.isInstance(obj)) {
163 try { 174 try {
164 175
165 // checke zuerst, ob die MD5 schon im publiziert Teil der Metadaten ist, dann tue nichts 176 // checke zuerst, ob die MD5 schon im publiziert Teil der Metadaten ist, dann tue nichts
166 String md5 = ((ECHORessource) obj).getIndexMetaMD5onServer(); 177 String md5 = ((ECHORessource) obj).getIndexMetaMD5onServer();
167 //List<eSciDocXmlObject> results = connector.getObjectsFromSearch("escidoc.component.checksum",md5); 178 //List<eSciDocXmlObject> results = connector.getObjectsFromSearch("escidoc.component.checksum",md5);
168 List<eSciDocXmlObject> results = connector.getObjectsFromSearch("escidoc.component.checksum",md5,context); 179 List<eSciDocXmlObject> results = connector.getObjectsFromSearch("escidoc.component.checksum",md5,context);
169 if (results.size()>0){ //index.meta schon abgespeichert 180 if (results.size()>0){ //index.meta schon abgespeichert
170 notAddedFile.debug("alredy exist:"+obj.echoUrl); 181 notAddedFile.debug("alredy exist:"+obj.echoUrl);
171 continue; 182
172 } 183 ((ECHORessource) obj).writeEsciDocIDToIndexMeta(results.get(0));
184
185 continue;
186 }
187
173 188
174 189
175 190
176 ECHOObject old; 191 ECHOObject old;
177 try { 192 try {
178 193
179 old = connector.alreadyExists( 194 old = connector.alreadyExists(
180 "/md-records/md-record/admin/archivePath", 195 "admin.archivePath",
181 ((ECHORessource) obj).archivePath, context); 196 ((ECHORessource) obj).archivePath, context,"=");
197 if (old==null){ //FIXME Problem bei der Erfassungder Metadaten sollte eigentlich nicht vorkommen!
198 old = connector.alreadyExists(
199 "admin.archivePath",
200 ((ECHORessource) obj).archivePath.replace("/mpiwg/online/permanent", "/Volumes/online_permanent"), context,"=");
201 }
202 if (old==null){
203 old = connector.alreadyExists(
204 "admin.archivePath",
205 ((ECHORessource) obj).archivePath.replace("/mpiwg/online/experimental", "/Volumes/online_experimental"), context,"=");
206 }
182 } catch (ObjectNotUniqueError e) { 207 } catch (ObjectNotUniqueError e) {
183 // TODO Auto-generated catch block 208 // TODO Auto-generated catch block
184 e.printStackTrace(); 209 e.printStackTrace();
185 continue; 210 continue;
186 } 211 }
196 continue; 221 continue;
197 } 222 }
198 } 223 }
199 224
200 obj.context = context; 225 obj.context = context;
201 226
202 String contid = connector.getIDfromPID(obj.pid, context); 227 String contid=null;
228
229 if (obj.pid!=null)
230 contid = connector.getIDfromPID(obj.pid, context);
203 if (contid != null) { 231 if (contid != null) {
204 System.out.println("------- belongsTo:" + contid); 232 System.out.println("------- belongsTo:" + contid);
205 } else { 233 } else {
206 234
207 eSciDocXmlObject escidocItem = transformer.transform(obj); 235 eSciDocXmlObject escidocItem = transformer.transform(obj);
208 236
209 237
210 try { 238 try {
211 logger.info(escidocItem.printXML()); 239 logger.info(escidocItem.printXML());
212 // TODO write PID to back to echo-obj 240 // TODO write PID to back to echo-obj
213 Boolean result = connector.createItem(escidocItem); 241 Boolean result = connector.createItem(escidocItem);
214 if (result) { 242 if (result) {
215 addedObjects.add(escidocItem.getESciDocId()); 243 addedObjects.add(escidocItem.getESciDocId());
216 addedFile.debug(escidocItem.getESciDocId() + "\n"); 244 addedFile.debug(escidocItem.getESciDocId() + "\n");
217 245
218 } else { 246 } else {
219 notAddedObjects.add(obj.echoUrl); 247 notAddedObjects.add(obj.echoUrl);
220 notAddedFile.debug(obj.echoUrl); 248 notAddedFile.debug(obj.echoUrl);
221 249
222 } 250 }
223 251
224 } catch (IOException e) { 252 } catch (IOException e) {
225 // TODO Auto-generated catch block 253 // TODO Auto-generated catch block
226 e.printStackTrace(); 254 e.printStackTrace();
227 throw new ESciDocXmlObjectException(); 255 throw new ESciDocXmlObjectException();
228 } catch (JDOMException e) { 256 } catch (JDOMException e) {
229 // TODO Auto-generated catch block 257 // TODO Auto-generated catch block
230 e.printStackTrace(); 258 e.printStackTrace();
231 throw new ESciDocXmlObjectException(); 259 throw new ESciDocXmlObjectException();
232 } 260 }
233 261
234 } 262 }
235 } 263 }
236 if (logger.getLevel() == Level.DEBUG) { 264 if (logger.getLevel() == Level.DEBUG) {
237 for (String addedObject : addedObjects) { 265 for (String addedObject : addedObjects) {
238 logger.debug(addedObject); 266 logger.debug(addedObject);
240 } 268 }
241 269
242 return true; 270 return true;
243 } 271 }
244 272
245 273
246 274
247 275
248 /** 276 /**
249 * Deal with existing objects, do nothing if md5 of stored metadata and metadata on the server is the same otherwise call {@link #updateObject(ECHOObject)}. 277 * Deal with existing objects, do nothing if md5 of stored metadata and metadata on the server is the same otherwise call {@link #updateObject(ECHOObject)}.
250 * @param objNew 278 * @param objNew
251 * @param old 279 * @param old
255 private void handleExistingObject(ECHOObject objNew, ECHOObject old) throws TransformerException, ESciDocXmlObjectException { 283 private void handleExistingObject(ECHOObject objNew, ECHOObject old) throws TransformerException, ESciDocXmlObjectException {
256 ECHORessource objNewRes = (ECHORessource)objNew; 284 ECHORessource objNewRes = (ECHORessource)objNew;
257 ECHORessource objOldRes = (ECHORessource)old; 285 ECHORessource objOldRes = (ECHORessource)old;
258 String md5onServer = objNewRes.getIndexMetaMD5onServer(); 286 String md5onServer = objNewRes.getIndexMetaMD5onServer();
259 String md5=objOldRes.getIndexMetaMD5stored(); 287 String md5=objOldRes.getIndexMetaMD5stored();
288
289 objNewRes.writeEsciDocIDToIndexMeta(objOldRes.eScidocId);
290
260 if (md5onServer.equals(md5)) 291 if (md5onServer.equals(md5))
261 return; 292 return;
262 else { 293 else {
294
263 updateObject(objNew, old); 295 updateObject(objNew, old);
264 } 296
265 297
298 }
299
266 } 300 }
267 301
268 private void updateObject(ECHOObject objNew, ECHOObject objOld) throws TransformerException, ESciDocXmlObjectException { 302 private void updateObject(ECHOObject objNew, ECHOObject objOld) throws TransformerException, ESciDocXmlObjectException {
269 objNew.context = context; 303 objNew.context = context;
304
305 //erzeuge erst einmal ein neues XML Object aus den neuen Daten.
270 eSciDocXmlObject escidocItem = transformer.transform(objNew); 306 eSciDocXmlObject escidocItem = transformer.transform(objNew);
271 String lastModificationDateOld = objOld.lastModificationDate; 307 String lastModificationDateOld = objOld.lastModificationDate;
308
309 //jetzt das alte ModeificationDate dort rein (wegen, optimitistic locking)
272 escidocItem.setLastModificationDate(lastModificationDateOld); 310 escidocItem.setLastModificationDate(lastModificationDateOld);
273 try { 311 try {
274 HttpResponse ret = connector.eScidocPut(objOld.eScidocId, EScidocBasicHandler.convertStringToStream(escidocItem.printXML())); 312 HttpResponse ret = connector.eScidocPut("/ir/item/"+objOld.eScidocId, EScidocBasicHandler.convertStringToStream(escidocItem.printXML()));
275 HttpEntity ent = ret.getEntity(); 313 HttpEntity ent = ret.getEntity();
276 if (ret.getStatusLine().getStatusCode() != 200) { 314 if (ret.getStatusLine().getStatusCode() != 200) {
277 logger.debug("Can not update:" + objOld.eScidocId); 315 logger.debug("Can not update:" + objOld.eScidocId);
278 // res.getEntity().consumeContent(); // necessary to release 316 // res.getEntity().consumeContent(); // necessary to release
279 // the conneciton 317 // the conneciton
287 e.printStackTrace(); 325 e.printStackTrace();
288 } catch (IOException e) { 326 } catch (IOException e) {
289 // TODO Auto-generated catch block 327 // TODO Auto-generated catch block
290 e.printStackTrace(); 328 e.printStackTrace();
291 } 329 }
292 330
293 addedFile.debug("updated:"+objOld.eScidocId); 331 addedFile.debug("updated:"+objOld.eScidocId);
294 } 332 }
295 333
296 /** 334 /**
297 * @param command 335 * @param command
298 * @param objectXPath 336 * @param objectXPath
337 * @param comment der in escidoc bei der Operation abgespeichert wird.
299 * @param mode 0 : only submit, 1:only release, 2:release and submit 338 * @param mode 0 : only submit, 1:only release, 2:release and submit
300 * @throws Exception 339 * @throws Exception
301 */ 340 */
302 public void releaseAndSubmitObjects(String command, String objectXPath,int mode) 341 public void releaseAndSubmitObjects(String command, String objectXPath,String comment,int mode)
303 throws Exception { 342 throws Exception {
304 343
305 Integer numberOfHits = connector.getNumberOfHitsFromFilterResult( 344 Integer numberOfHits = connector.getNumberOfHitsFromFilterResult(
306 command, objectXPath,mode); 345 command, objectXPath,mode);
307 346
308 347
309 int tausend = ((numberOfHits-1) / MAX_REC); 348 int tausend = ((numberOfHits-1) / MAX_REC);
310 349
311 String queryRestrict=""; 350 String queryRestrict="";
312 if(mode==0 | mode==2){ 351 if(mode==0 | mode==2){
313 queryRestrict="query=%22/properties/version/status%22=pending"; 352 queryRestrict="query=%22/properties/version/status%22=pending";
314 } else { 353 } else {
315 queryRestrict="query=%22/properties/version/status%22=submitted"; 354 queryRestrict="query=%22/properties/version/status%22=submitted";
316 } 355 }
317 356
318 for (int t = 0; t <= tausend; t++) { 357 for (int t = 0; t <= tausend; t++) {
319 int start = t * MAX_REC+1; 358 int start = t * MAX_REC+1;
320 // int max=Math.min((t+1)*1000, numberOfHits); 359 // int max=Math.min((t+1)*1000, numberOfHits);
321 String query = "?maximumRecords="+String.valueOf(MAX_REC)+"&startRecord=" 360 String query = "?maximumRecords="+String.valueOf(MAX_REC)+"&startRecord="
322 + String.valueOf(start)+"&"+queryRestrict; 361 + String.valueOf(start)+"&"+queryRestrict;
323 for (eSciDocXmlObject obj : connector 362 for (eSciDocXmlObject obj : connector
324 .getObjectsFromFilterResult(command+query, objectXPath)) { 363 .getObjectsFromFilterResult(command+query, objectXPath)) {
325 364
326 //TODO is the following really necessary, currently the obj in the list is sometimes not the current one. 365 //TODO is the following really necessary, currently the obj in the list is sometimes not the current one.
327 try{ 366 try{
328 HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); 367 HttpResponse resObj = connector.eScidocGet(obj.getESciDocId());
329 HttpEntity ent = resObj.getEntity(); 368 HttpEntity ent = resObj.getEntity();
330 if (ent!=null){ 369 if (ent!=null){
331 obj= new eSciDocXmlObject(ent.getContent()); 370 obj= new eSciDocXmlObject(ent.getContent());
332 } else { 371 } else {
333 logger.debug("Can not retrieve:" + obj.getESciDocId()); 372 logger.debug("Can not retrieve:" + obj.getESciDocId());
334 continue; 373 continue;
335 } 374 }
336 } catch (Exception e){ 375 } catch (Exception e){
337 logger.debug("Can not retrieve:" + obj.getESciDocId()); 376 logger.debug("Can not retrieve:" + obj.getESciDocId());
338 continue; 377 continue;
339 } 378 }
340 if (mode==0 | mode==2){ 379 if (mode==0 | mode==2){
341 HttpResponse res = connector.submitAnObject(obj, 380 HttpResponse res = connector.submitAnObject(obj,
342 "first release"); 381 comment);
343 logger.debug(res.getStatusLine()); 382 logger.debug(res.getStatusLine());
344 383
345 if (res.getStatusLine().getStatusCode() != 200) { 384 if (res.getStatusLine().getStatusCode() != 200) {
346 logger.debug("Can not submit:" + obj.getESciDocId()); 385 logger.debug("Can not submit:" + obj.getESciDocId());
347 // res.getEntity().consumeContent(); // necessary to release 386 // res.getEntity().consumeContent(); // necessary to release
348 // the conneciton 387 // the conneciton
349 388
350 } 389 }
351 InputStream restream = res.getEntity().getContent(); 390 InputStream restream = res.getEntity().getContent();
352 logger.debug(EScidocBasicHandler.convertStreamToString(restream)); 391 logger.debug(EScidocBasicHandler.convertStreamToString(restream));
353 //res.getEntity().consumeContent(); // necessary to release the 392 //res.getEntity().consumeContent(); // necessary to release the
354 // conneciton 393 // conneciton
355 394
356 if (!connector.upDateObject(obj)) { 395 if (!connector.upDateObject(obj)) {
357 logger.debug("Can not update:" + obj.getESciDocId()); 396 logger.debug("Can not update:" + obj.getESciDocId());
358 // continue; 397 // continue;
359 398
360 } 399 }
361 } 400 }
362 401
363 if (mode==1 | mode==2){ 402 if (mode==1 | mode==2){
364 HttpResponse res = connector.releaseAnObject(obj, "first release"); 403 HttpResponse res = connector.releaseAnObject(obj, comment);
365 logger.debug(res.getStatusLine()); 404 logger.debug(res.getStatusLine());
366 if (res.getStatusLine().getStatusCode() != 200) { 405 if (res.getStatusLine().getStatusCode() != 200) {
367 logger.debug("Can not release:" + obj.getESciDocId()); 406 logger.debug("Can not release:" + obj.getESciDocId());
368 res.getEntity().consumeContent(); // necessary to release 407 res.getEntity().consumeContent(); // necessary to release
369 // the conneciton 408 // the conneciton
370 continue; 409 continue;
371 } 410 }
372 addedFile.debug("RELEASED:" + obj.getESciDocId()); 411 addedFile.debug("RELEASED:" + obj.getESciDocId());
373 res.getEntity().consumeContent(); // necessary to release the 412 res.getEntity().consumeContent(); // necessary to release the
374 // connecito 413 // connecito
375 } 414 }
376 } 415 }
377 } 416 }
378 417
379 } 418 }
380 419
381 public static void main(String[] args) throws Exception { 420 public static void main(String[] args) throws Exception {
382 421
383 Logger rl = Logger.getRootLogger(); 422 Logger rl = Logger.getRootLogger();
384 DOMConfigurator.configure("log4uconf.xml"); 423 DOMConfigurator.configure("/etc/escidocImportConfig.xml");
385 rl.setLevel(Level.DEBUG); 424 rl.setLevel(Level.DEBUG);
386 425
387 426
388 EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7"); 427 EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7");
389 428
390 429
430 //ECHOImporter newimporter = new ECHOImporter(new URL(
431 // "file:///Users/dwinter/libcoll.rdf"));
432
391 ECHOImporter newimporter = new ECHOImporter(new URL( 433 ECHOImporter newimporter = new ECHOImporter(new URL(
392 "file:///Users/dwinter/libcoll.rdf")); 434 "http://xserve09.mpiwg-berlin.mpg.de:19280/echo_nav/echo_pages/content/showRDF"));
393 ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter, 435 ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter,
394 new ECHOTransformer(), connector, "/ir/context/escidoc:1001"); 436 new ECHOTransformer(), connector, "/ir/context/escidoc:1001");
395 437
396 // hv.readObjectsFromInstance("ECHO_collection"); 438 // hv.readObjectsFromInstance("ECHO_collection");
397 // hv.readObjectsFromInstance("ECHO_resource"); 439 hv.readObjectsFromInstance("ECHO_resource");
398 440
399 hv.releaseAndSubmitObjects( 441 hv.releaseAndSubmitObjects(
400 "/ir/context/escidoc:1001/resources/members", 442 "/ir/context/escidoc:1001/resources/members",
401 "//escidocItem:item",0); 443 "//escidocItem:item","first release",0);
444 hv.releaseAndSubmitObjects(
445 "/ir/context/escidoc:1001/resources/members",
446 "//escidocItem:item","first release",1);
447
402 448
403 // newimporter.organizeRessourcesInCollections(connector, 449 // newimporter.organizeRessourcesInCollections(connector,
404 // "/ir/context/escidoc:1001"); 450 // "/ir/context/escidoc:1001");
405 // hv.releaseAndSubmitObjects("/ir/containers","//container:container"); 451 // hv.releaseAndSubmitObjects("/ir/containers","//container:container");
406 } 452 }