Mercurial > hg > eSciDocImport
comparison src/de/mpiwg/itgroup/eSciDoc/Tools/IngestECHO.java @ 0:c6929e63b0b8
first import
author | dwinter |
---|---|
date | Wed, 24 Nov 2010 16:52:07 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c6929e63b0b8 |
---|---|
1 package de.mpiwg.itgroup.eSciDoc.Tools; | |
2 | |
3 //todo: create context for echo and contentmodell | |
4 import java.io.BufferedReader; | |
5 import java.io.ByteArrayInputStream; | |
6 import java.io.ByteArrayOutputStream; | |
7 import java.io.IOException; | |
8 import java.io.InputStream; | |
9 import java.io.InputStreamReader; | |
10 import java.io.PrintStream; | |
11 import java.io.StringReader; | |
12 import java.net.MalformedURLException; | |
13 import java.net.URL; | |
14 import java.util.ArrayList; | |
15 import java.util.HashMap; | |
16 import java.util.List; | |
17 import java.util.regex.Matcher; | |
18 import java.util.regex.Pattern; | |
19 | |
20 import javax.xml.parsers.DocumentBuilder; | |
21 import javax.xml.parsers.DocumentBuilderFactory; | |
22 import javax.xml.xpath.XPath; | |
23 import javax.xml.xpath.XPathConstants; | |
24 import javax.xml.xpath.XPathFactory; | |
25 | |
26 import org.apache.http.HttpResponse; | |
27 import org.apache.http.client.ClientProtocolException; | |
28 import org.apache.xmlrpc.XmlRpcException; | |
29 import org.apache.xmlrpc.client.XmlRpcClient; | |
30 import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; | |
31 import org.jdom.JDOMException; | |
32 import org.w3c.dom.Document; | |
33 import org.w3c.dom.NodeList; | |
34 import org.xml.sax.InputSource; | |
35 import org.xml.sax.SAXParseException; | |
36 | |
37 import sun.misc.Regexp; | |
38 | |
39 //import fedora.client.FedoraClient; | |
40 //import fedora.server.access.FedoraAPIA; | |
41 //import fedora.server.management.FedoraAPIM; | |
42 //import fedora.server.types.gen.ComparisonOperator; | |
43 //import fedora.server.types.gen.Condition; | |
44 //import fedora.server.types.gen.FieldSearchQuery; | |
45 //import fedora.server.types.gen.FieldSearchResult; | |
46 //import fedora.server.types.gen.ListSession; | |
47 //import fedora.server.types.gen.MIMETypedStream; | |
48 //import fedora.server.types.gen.ObjectFields; | |
49 | |
50 public class IngestECHO extends Ingestor { | |
51 | |
52 protected String ECHORESOURCE_TEMPLATE_XML; | |
53 protected String ECHOCONTAINER_TEMPLATE_XML; | |
54 private String SERVLETURL; | |
55 protected String ECHOURL; | |
56 protected String ECHO_CONTAINER_ID; | |
57 protected String ECHO_ROOT_ID; | |
58 protected String MAIN_CONTEXT; | |
59 private HashMap<String, String> pids; | |
60 | |
61 protected static String ESCIDOC_SERVER_URL = "euler.mpiwg-berlin.mpg.de"; | |
62 protected static String ZOPEPROVIDER = "http://127.0.0.1:18080"; | |
63 | |
64 private static int PORT = 8080; | |
65 | |
66 IngestECHO(String user, String password){ | |
67 | |
68 super(ESCIDOC_SERVER_URL, PORT, ZOPEPROVIDER, user, password); | |
69 ECHORESOURCE_TEMPLATE_XML = "ECHOResourceTemplate.xml"; | |
70 | |
71 SERVLETURL= "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn="; | |
72 | |
73 ECHOURL = "http://echo.mpiwg-berlin.mpg.de"; | |
74 | |
75 //ZOPEPROVIDER = "http://127.0.0.1:18080"; | |
76 | |
77 ECHO_CONTAINER_ID = "escidoc:3006"; // enthaelt alle ECHO | |
78 // objecte | |
79 ECHO_ROOT_ID = "escidoc:3005"; // enthaelt alle Objekte die | |
80 // keiner ECHO collection | |
81 // angehoeren | |
82 | |
83 MAIN_CONTEXT = "escidoc:3002"; | |
84 | |
85 HashMap<String, String> pids = null; | |
86 | |
87 | |
88 } | |
89 | |
90 void ingestECHOCollections() throws XmlRpcException, IOException { | |
91 ArrayList<String> urls = getAllCollections(); | |
92 HashMap<String, String> success = new HashMap<String, String>(); | |
93 HashMap<String, String> nosuccess = new HashMap<String, String>(); | |
94 | |
95 for (String url : urls) { | |
96 | |
97 try { | |
98 String id = ingestECHOCollection(url); | |
99 success.put(id, url); | |
100 } catch (Exception e) { | |
101 | |
102 ByteArrayOutputStream out = new ByteArrayOutputStream(); | |
103 PrintStream s = new PrintStream(out); | |
104 e.printStackTrace(s); | |
105 | |
106 nosuccess.put(url, out.toString()); | |
107 | |
108 e.printStackTrace(); | |
109 } | |
110 } | |
111 System.out.println("SUCCESSFULL INGEST"); | |
112 for (String id : success.keySet()) | |
113 System.out.println("ID:" + id + " URL:" + success.get(id)); | |
114 | |
115 System.out.println("ERRORS:"); | |
116 for (String id : nosuccess.keySet()) { | |
117 System.out.println("URL:" + id); | |
118 System.out.println("Message:" + nosuccess.get(id)); | |
119 } | |
120 | |
121 } | |
122 | |
123 void organizeECHOCollections() throws XmlRpcException, IOException, | |
124 JDOMException { | |
125 ArrayList<String> urls = getAllCollections(); | |
126 HashMap<String, String> success = new HashMap<String, String>(); | |
127 HashMap<String, String> nosuccess = new HashMap<String, String>(); | |
128 | |
129 for (String url : urls) { | |
130 | |
131 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); | |
132 XmlRpcClient client = new XmlRpcClient(); | |
133 config.setServerURL(new URL(url)); | |
134 client.setConfig(config); | |
135 | |
136 Object[] params = new Object[] {}; | |
137 | |
138 if (pids == null) { | |
139 pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID); | |
140 } | |
141 | |
142 try { | |
143 String parentPid; | |
144 String pid = (String) client.execute("getPID", params); | |
145 String contid = pids.get("mpiwg:" + pid); | |
146 addECHOObjectToCollection(client, contid); | |
147 success.put(pid, url); | |
148 } catch (Exception e) { | |
149 | |
150 ByteArrayOutputStream out = new ByteArrayOutputStream(); | |
151 PrintStream s = new PrintStream(out); | |
152 e.printStackTrace(s); | |
153 | |
154 nosuccess.put(url, out.toString()); | |
155 | |
156 e.printStackTrace(); | |
157 } | |
158 } | |
159 System.out.println("SUCCESSFULL ORGANIZED"); | |
160 for (String id : success.keySet()) | |
161 System.out.println("ID:" + id + " URL:" + success.get(id)); | |
162 | |
163 System.out.println("ERRORS:"); | |
164 for (String id : nosuccess.keySet()) { | |
165 System.out.println("URL:" + id); | |
166 System.out.println("Message:" + nosuccess.get(id)); | |
167 } | |
168 | |
169 } | |
170 | |
171 void organizeECHORessources() throws XmlRpcException, IOException, | |
172 JDOMException { | |
173 ArrayList<String> urls = getAllResources(); | |
174 HashMap<String, String> success = new HashMap<String, String>(); | |
175 HashMap<String, String> nosuccess = new HashMap<String, String>(); | |
176 | |
177 for (String url : urls) { | |
178 | |
179 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); | |
180 XmlRpcClient client = new XmlRpcClient(); | |
181 config.setServerURL(new URL(url)); | |
182 client.setConfig(config); | |
183 | |
184 Object[] params = new Object[] {}; | |
185 | |
186 if (pids == null) { | |
187 pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID); | |
188 } | |
189 | |
190 try { | |
191 String parentPid; | |
192 String pid = (String) client.execute("getPID", params); | |
193 String contid = getIDfromPID("mpiwg:" + pid); | |
194 addECHOObjectToCollection(client, contid); | |
195 success.put(pid, url); | |
196 } catch (Exception e) { | |
197 | |
198 ByteArrayOutputStream out = new ByteArrayOutputStream(); | |
199 PrintStream s = new PrintStream(out); | |
200 e.printStackTrace(s); | |
201 | |
202 nosuccess.put(url, out.toString()); | |
203 | |
204 e.printStackTrace(); | |
205 } | |
206 } | |
207 System.out.println("SUCCESSFULL ORGANIZED"); | |
208 for (String id : success.keySet()) | |
209 System.out.println("ID:" + id + " URL:" + success.get(id)); | |
210 | |
211 System.out.println("ERRORS:"); | |
212 for (String id : nosuccess.keySet()) { | |
213 System.out.println("URL:" + id); | |
214 System.out.println("Message:" + nosuccess.get(id)); | |
215 } | |
216 | |
217 } | |
218 | |
219 | |
220 | |
221 private String getIDfromPID(String pid) throws ClientProtocolException, | |
222 IOException { | |
223 InputStream res = getXMLfromPID(pid,MAIN_CONTEXT); | |
224 return EScidocBasicHandler.getId(EScidocBasicHandler | |
225 .convertStreamToString(res)); | |
226 } | |
227 | |
228 /** | |
229 * FŸgt die ECHO Collection unter der URL in eScidoc ein. Der Link auf die | |
230 * Web-Seite wird in einem eigenen item hinterlegt, dass in Collection | |
231 * eingefŸgt wird. | |
232 * | |
233 * @param url | |
234 * @throws Exception | |
235 */ | |
236 private String ingestECHOCollection(String url) throws Exception { | |
237 | |
238 // get a PID for the Collection | |
239 System.out.println("Processing:" + url); | |
240 | |
241 HashMap<String, String> dcs = new HashMap<String, String>(); // Store | |
242 // for | |
243 // the | |
244 // metadata | |
245 | |
246 // Verbinde dich mit der Collection Ÿber XML-rpc | |
247 | |
248 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); | |
249 XmlRpcClient client = new XmlRpcClient(); | |
250 config.setServerURL(new URL(url)); | |
251 client.setConfig(config); | |
252 | |
253 String pid = getOrCreatePID(client); | |
254 | |
255 if (pidAlreadyExists("mpiwg:"+pid)) | |
256 { | |
257 System.out.println("PID:"+pid); | |
258 String contid=getIDfromPID("mpiwg:"+pid); | |
259 System.out.println("------- belongsTo:"+contid); | |
260 return contid; | |
261 } | |
262 Object[] params = new Object[] {}; | |
263 | |
264 eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:" + pid, | |
265 ECHOCONTAINER_TEMPLATE_XML); | |
266 | |
267 String result = (String) client.execute("getDescription", params); | |
268 | |
269 String x = new String(result.getBytes("UTF-8"), ("UTF-8")); | |
270 // System.out.println("DESCR"+x); | |
271 dcs.put("description", x); | |
272 String title = (String) client.execute("getTitle", params); | |
273 dcs.put("title", title); | |
274 obj.insertDC(dcs); | |
275 obj.addOrigUrlToMPIWGMetaData(url); | |
276 | |
277 // obj.setRelationship("info:fedora/echo:col1"); | |
278 String xml = obj.printXML(); | |
279 // System.out.println(xml); | |
280 String ret = ingest("/ir/container", xml); | |
281 String xr = ingestCollectionWebSite(title, url); | |
282 // System.out.println(xr); | |
283 String objid = EScidocBasicHandler.getId(xr); | |
284 String dateStamp = EScidocBasicHandler.getDateStamp(ret); | |
285 String addTxt = "<param last-modification-date=\"" + dateStamp + "\">"; | |
286 addTxt += "<id>" + objid + "</id>"; | |
287 addTxt += "</param>"; | |
288 | |
289 String contid = EScidocBasicHandler.getId(ret); | |
290 | |
291 ByteArrayInputStream stream = new ByteArrayInputStream(addTxt | |
292 .getBytes("utf-8")); | |
293 | |
294 eSciDocHandler.eScidocPost("/ir/container/" + contid + "/members/add", | |
295 stream); | |
296 // System.out.println(response.getStatusLine()); | |
297 // System.out.println(EScidocBasicHandler.convertStreamToString(response.getEntity().getContent())); | |
298 System.out.println("Processed:" + url + "------>" + contid); | |
299 | |
300 addToCollection(ECHO_CONTAINER_ID, contid); | |
301 | |
302 params = new Object[] { pid }; | |
303 client.execute("setPID", params); | |
304 | |
305 System.out.println(ret); | |
306 | |
307 addECHOObjectToCollection(client, contid); | |
308 return contid; | |
309 | |
310 } | |
311 | |
312 public ArrayList<String> findMissingItems() throws XmlRpcException, IOException{ | |
313 return findMissingItemsFromECHOUrls(getAllResources()); | |
314 } | |
315 | |
316 public ArrayList<String> findMissingCollections() throws XmlRpcException, IOException{ | |
317 return findMissingItemsFromECHOUrls(getAllCollections()); | |
318 } | |
319 | |
320 public ArrayList<String> findMissingItemsFromECHOUrls(List<String> urls) throws XmlRpcException, IOException{ | |
321 //ArrayList<String> urls = getAllCollections(); | |
322 System.out.println("GOT the collections"); | |
323 ArrayList<String> ret = new ArrayList<String>(); | |
324 for (String url : urls) { | |
325 System.out.println("checking:"+url); | |
326 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); | |
327 XmlRpcClient client = new XmlRpcClient(); | |
328 | |
329 config.setServerURL(new URL(url)); | |
330 client.setConfig(config); | |
331 String pid; | |
332 try { | |
333 Object[] parameters = new Object[] {}; | |
334 | |
335 pid = (String) client.execute("getPID", parameters); | |
336 } catch (Exception e) { | |
337 pid = null; | |
338 } | |
339 | |
340 if (pid == null){ | |
341 ret.add(url); | |
342 System.out.println(" -- no pid"); | |
343 } else { | |
344 String id; | |
345 try { | |
346 id = getIDfromPID("mpiwg:"+pid); | |
347 } catch (Exception e) { | |
348 id = "NO"; | |
349 ret.add(url); | |
350 } | |
351 | |
352 | |
353 System.out.println(" -- id:"+id); | |
354 } | |
355 | |
356 | |
357 | |
358 } | |
359 return ret; | |
360 } | |
361 private String getOrCreatePID(XmlRpcClient client) throws XmlRpcException, | |
362 MalformedURLException { | |
363 Object[] parameters = new Object[] {}; | |
364 | |
365 String pid = null; | |
366 | |
367 // Hole pid aus ECHO | |
368 try { | |
369 pid = (String) client.execute("getPID", parameters); | |
370 } catch (Exception e) { | |
371 pid = null; | |
372 } | |
373 // Falls dort noch keine ist, erzeuge ein neue | |
374 if (pid == null) | |
375 pid = getID(); | |
376 else | |
377 System.out.println("PID from ECHO:" + pid); | |
378 | |
379 return pid; | |
380 } | |
381 | |
382 private void addECHOObjectToCollection(XmlRpcClient client, String contid) | |
383 throws ClientProtocolException, IOException, JDOMException { | |
384 Object[] params; | |
385 params = new Object[] {}; | |
386 | |
387 if (pids == null) { | |
388 pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID); | |
389 } | |
390 | |
391 String parentId; | |
392 String parentPid; | |
393 try { | |
394 parentPid = (String) client.execute("getParentPID", params); | |
395 parentId = pids.get("mpiwg:" + parentPid); | |
396 } catch (Exception e) { | |
397 parentId = ECHO_ROOT_ID; | |
398 } | |
399 addToCollection(parentId, contid); | |
400 | |
401 } | |
402 | |
403 private String ingestCollectionWebSite(String title, String url) | |
404 throws Exception { | |
405 String pid = getID(); | |
406 eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:" + pid, | |
407 "ECHOCollectionWebRepresentationTemplate.xml"); | |
408 | |
409 HashMap<String, String> dcs = new HashMap<String, String>(); | |
410 | |
411 obj.addWebUrl(url); | |
412 // obj.setRelationship("info:fedora/echo:col1"); | |
413 | |
414 dcs.put("title", title); // ersatzweise den titel aus der echo | |
415 // collection | |
416 obj.insertDC(dcs); | |
417 | |
418 String xml = obj.printXML(); | |
419 // System.out.println(xml); | |
420 String res = ingest("/ir/item", xml); | |
421 return res; | |
422 } | |
423 | |
424 public void ingestECHOResources() throws IOException { | |
425 ingestECHOResources(null); | |
426 | |
427 } | |
428 public void ingestECHOResources(Pattern match) throws IOException { | |
429 ArrayList<String> urls = getAllResources(); | |
430 HashMap<String, String> success = new HashMap<String, String>(); | |
431 HashMap<String, String> nosuccess = new HashMap<String, String>(); | |
432 | |
433 for (String url : urls) { | |
434 | |
435 try { | |
436 Boolean ingest=false; | |
437 | |
438 if (match == null) | |
439 ingest=true; | |
440 else { | |
441 Matcher m = match.matcher(url); | |
442 if (m.matches()) | |
443 ingest=true; | |
444 | |
445 } | |
446 if (ingest){ | |
447 String id = ingestECHOResource(url); | |
448 success.put(id, url); | |
449 } | |
450 } catch (Exception e) { | |
451 | |
452 ByteArrayOutputStream out = new ByteArrayOutputStream(); | |
453 PrintStream s = new PrintStream(out); | |
454 e.printStackTrace(s); | |
455 | |
456 nosuccess.put(url, out.toString()); | |
457 | |
458 e.printStackTrace(); | |
459 } | |
460 } | |
461 System.out.println("SUCCESSFULL INGEST"); | |
462 for (String id : success.keySet()) | |
463 System.out.println("ID:" + id + " URL:" + success.get(id)); | |
464 | |
465 System.out.println("ERRORS:"); | |
466 for (String id : nosuccess.keySet()) { | |
467 System.out.println("URL:" + id); | |
468 System.out.println("Message:" + nosuccess.get(id)); | |
469 } | |
470 | |
471 } | |
472 | |
473 protected ArrayList<String> getAllResources() throws IOException { | |
474 URL echoUrl = new URL(ECHOURL + "/getResourcesXML"); | |
475 Pattern p = Pattern.compile("echoLink=\"([^\"]*)\""); | |
476 BufferedReader in = new BufferedReader(new InputStreamReader(echoUrl | |
477 .openStream())); | |
478 | |
479 ArrayList<String> ret = new ArrayList<String>(); | |
480 String inputLine; | |
481 Matcher m; | |
482 while ((inputLine = in.readLine()) != null) { | |
483 m = p.matcher(inputLine); | |
484 String lit; | |
485 if (m.find()) { | |
486 | |
487 lit = m.group(1); | |
488 ret.add(lit); | |
489 } | |
490 } | |
491 | |
492 in.close(); | |
493 return ret; | |
494 | |
495 } | |
496 | |
497 protected String ingestECHOResource(String url) throws Exception { | |
498 return ingestECHOResource(url, false); | |
499 } | |
500 | |
501 protected String ingestECHOResource(String url,boolean withfullText) throws Exception { | |
502 | |
503 System.out.println("Starting:" + url); | |
504 | |
505 HashMap<String, String> dcs = new HashMap<String, String>(); | |
506 | |
507 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); | |
508 XmlRpcClient client = new XmlRpcClient(); | |
509 config.setServerURL(new URL(url)); | |
510 client.setConfig(config); | |
511 | |
512 String pid = getOrCreatePID(client); | |
513 | |
514 if (pidAlreadyExists("mpiwg:"+pid)) | |
515 { | |
516 System.out.println("PID:"+pid); | |
517 String contid=getIDfromPID("mpiwg:"+pid); | |
518 System.out.println("------- belongsTo:"+contid); | |
519 return contid; | |
520 } | |
521 eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:" + pid, | |
522 ECHORESOURCE_TEMPLATE_XML); | |
523 Object[] params = new Object[] {}; | |
524 | |
525 String title = (String) client.execute("getTitle", params); | |
526 String ml = (String) client.execute("getMetaDataLink", params); | |
527 if (withfullText){ | |
528 String fulltextURL = url+"/getFullTextXML"; | |
529 obj.addFullText(fulltextURL); | |
530 | |
531 } | |
532 ml = correctML(ml); | |
533 | |
534 obj.addWebUrl(url); | |
535 obj.addOrigUrlToMPIWGMetaData(url); | |
536 | |
537 // obj.setRelationship("info:fedora/echo:col1"); | |
538 | |
539 config.setServerURL(new URL(ZOPEPROVIDER + "/metadataMain")); | |
540 client.setConfig(config); | |
541 params = new Object[] { ml }; | |
542 | |
543 try { | |
544 String result = (String) client.execute("getDCFormatted", params); | |
545 System.out.println("dC:"+result); | |
546 DocumentBuilderFactory factory = DocumentBuilderFactory | |
547 .newInstance(); | |
548 factory.setNamespaceAware(true); | |
549 DocumentBuilder db = factory.newDocumentBuilder(); | |
550 | |
551 InputSource resultStream = new InputSource(new StringReader(result)); | |
552 Document dc = db.parse(resultStream); | |
553 obj.insertDC(dc); | |
554 | |
555 Document indexmeta = db.parse(ml); | |
556 | |
557 XPath xpath = XPathFactory.newInstance().newXPath(); | |
558 xpath.setNamespaceContext(new EScidocNameSpaceContext()); | |
559 | |
560 NodeList test = (NodeList) xpath.evaluate("//meta", indexmeta, | |
561 XPathConstants.NODESET); | |
562 if (test.getLength() != 1) | |
563 { | |
564 test = (NodeList) xpath.evaluate("//mpiwg:meta", indexmeta, | |
565 XPathConstants.NODESET); | |
566 | |
567 if (test.getLength() !=1) | |
568 throw new Exception(); | |
569 } | |
570 obj.insertMeta(test.item(0)); | |
571 | |
572 obj.addIndexMetaUrl(ml); | |
573 | |
574 } catch (XmlRpcException e) { | |
575 System.err.println("Ressource:" + url); | |
576 System.err.println("METADATA CANNOT BE PARSED:" + ml); | |
577 HashMap<String, String> dc = new HashMap<String, String>(); | |
578 dc.put("title", title); // ersatzweise den titel aus der echo | |
579 // collection | |
580 obj.insertDC(dc); | |
581 } catch (SAXParseException e) { | |
582 System.err.println("METADATA RESULT CANNOT BE PARSED:"); | |
583 HashMap<String, String> dc = new HashMap<String, String>(); | |
584 dc.put("title", title); // ersatzweise den titel aus der echo | |
585 // collection | |
586 obj.insertDC(dc); | |
587 } | |
588 | |
589 String xml = obj.printXML(); | |
590 System.out.println(xml); | |
591 return "XXX"; | |
592 String result = ingest("/ir/item", xml); | |
593 // String contid = EScidocBasicHandler.getId(result); | |
594 // //String contid="NNNN"; | |
595 // System.out.println("------->" + contid); | |
596 // | |
597 // params = new Object[] { pid }; | |
598 // config.setServerURL(new URL(url)); | |
599 // client.setConfig(config); | |
600 // | |
601 // client.execute("setPID", params); | |
602 // addToCollection(ECHO_CONTAINER_ID, contid); | |
603 // | |
604 // addECHOObjectToCollection(client, contid); | |
605 // return contid; | |
606 | |
607 } | |
608 | |
609 private boolean pidAlreadyExists(String pid) { | |
610 String id; | |
611 try{ | |
612 id = getIDfromPID(pid); | |
613 } catch (Exception e){ | |
614 return false; | |
615 } | |
616 if (!id.equals("")) | |
617 return true; | |
618 return false; | |
619 } | |
620 | |
621 private String correctML(String ml) { | |
622 Pattern p = Pattern.compile("experimental/(.*)"); | |
623 Matcher m = p.matcher(ml); | |
624 String pf; | |
625 if (m.find()) | |
626 pf = "experimental/" + m.group(1); | |
627 else { | |
628 p = Pattern.compile("permanent/(.*)"); | |
629 m = p.matcher(ml); | |
630 if (m.find()) | |
631 pf = "permanent/" + m.group(1); | |
632 else | |
633 return ml; | |
634 } | |
635 return SERVLETURL + pf; | |
636 } | |
637 | |
638 protected ArrayList<String> getAllCollections() throws XmlRpcException, | |
639 IOException { | |
640 System.out.println("ECHO:"+ECHOURL); | |
641 URL echoUrl = new URL(ECHOURL + "/getCollectionsXML"); | |
642 Pattern p = Pattern.compile("echoLink=\"(.*)\""); | |
643 BufferedReader in = new BufferedReader(new InputStreamReader(echoUrl | |
644 .openStream())); | |
645 | |
646 ArrayList<String> ret = new ArrayList<String>(); | |
647 String inputLine; | |
648 Matcher m; | |
649 while ((inputLine = in.readLine()) != null) { | |
650 m = p.matcher(inputLine); | |
651 String lit; | |
652 if (m.find()) { | |
653 | |
654 lit = m.group(1); | |
655 ret.add(lit); | |
656 } | |
657 } | |
658 | |
659 in.close(); | |
660 return ret; | |
661 } | |
662 | |
663 private void submitAndReleaseAnObject(String href) throws ClientProtocolException, | |
664 IOException, JDOMException { | |
665 | |
666 addVersionPid(href); | |
667 HttpResponse res = submitAnObject(href, "submit"); | |
668 System.out.println(EScidocBasicHandler.convertStreamToString(res | |
669 .getEntity().getContent())); | |
670 res = releaseAnObject(href, "first release"); | |
671 System.out.println(EScidocBasicHandler.convertStreamToString(res | |
672 .getEntity().getContent())); | |
673 | |
674 } | |
675 | |
676 void releaseECHORessources() throws XmlRpcException, IOException, | |
677 JDOMException { | |
678 ArrayList<String> urls = getAllResources(); | |
679 HashMap<String, String> success = new HashMap<String, String>(); | |
680 HashMap<String, String> nosuccess = new HashMap<String, String>(); | |
681 int numOfUrl= urls.size(); | |
682 int count = 0; | |
683 for (String url : urls) { | |
684 | |
685 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); | |
686 XmlRpcClient client = new XmlRpcClient(); | |
687 config.setServerURL(new URL(url)); | |
688 client.setConfig(config); | |
689 | |
690 Object[] params = new Object[] {}; | |
691 | |
692 if (pids == null) { | |
693 pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID); | |
694 } | |
695 | |
696 try { | |
697 String parentPid; | |
698 String pid = (String) client.execute("getPID", params); | |
699 String contid = getIDfromPID("mpiwg:" + pid); | |
700 submitAndReleaseAnObject("/ir/item/"+contid); | |
701 success.put(pid, url); | |
702 } catch (Exception e) { | |
703 | |
704 ByteArrayOutputStream out = new ByteArrayOutputStream(); | |
705 PrintStream s = new PrintStream(out); | |
706 e.printStackTrace(s); | |
707 | |
708 nosuccess.put(url, out.toString()); | |
709 | |
710 e.printStackTrace(); | |
711 } | |
712 count+=1; | |
713 System.out.println("DONE:"+count+" of "+numOfUrl); | |
714 } | |
715 System.out.println("SUCCESSFULL ORGANIZED"); | |
716 for (String id : success.keySet()) | |
717 System.out.println("ID:" + id + " URL:" + success.get(id)); | |
718 | |
719 System.out.println("ERRORS:"); | |
720 for (String id : nosuccess.keySet()) { | |
721 System.out.println("URL:" + id); | |
722 System.out.println("Message:" + nosuccess.get(id)); | |
723 } | |
724 | |
725 } | |
726 } |