0
|
1 package de.mpiwg.itgroup.eSciDoc.Tools;
|
|
2
|
|
3 //todo: create context for echo and contentmodell
|
|
4 import java.io.BufferedReader;
|
|
5 import java.io.ByteArrayInputStream;
|
|
6 import java.io.ByteArrayOutputStream;
|
|
7 import java.io.IOException;
|
|
8 import java.io.InputStream;
|
|
9 import java.io.InputStreamReader;
|
|
10 import java.io.PrintStream;
|
|
11 import java.io.StringReader;
|
|
12 import java.net.MalformedURLException;
|
|
13 import java.net.URL;
|
|
14 import java.util.ArrayList;
|
|
15 import java.util.HashMap;
|
|
16 import java.util.List;
|
|
17 import java.util.regex.Matcher;
|
|
18 import java.util.regex.Pattern;
|
|
19
|
|
20 import javax.xml.parsers.DocumentBuilder;
|
|
21 import javax.xml.parsers.DocumentBuilderFactory;
|
|
22 import javax.xml.xpath.XPath;
|
|
23 import javax.xml.xpath.XPathConstants;
|
|
24 import javax.xml.xpath.XPathFactory;
|
|
25
|
|
26 import org.apache.http.HttpResponse;
|
|
27 import org.apache.http.client.ClientProtocolException;
|
|
28 import org.apache.xmlrpc.XmlRpcException;
|
|
29 import org.apache.xmlrpc.client.XmlRpcClient;
|
|
30 import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;
|
|
31 import org.jdom.JDOMException;
|
|
32 import org.w3c.dom.Document;
|
|
33 import org.w3c.dom.NodeList;
|
|
34 import org.xml.sax.InputSource;
|
|
35 import org.xml.sax.SAXParseException;
|
|
36
|
|
37 import sun.misc.Regexp;
|
|
38
|
|
39 //import fedora.client.FedoraClient;
|
|
40 //import fedora.server.access.FedoraAPIA;
|
|
41 //import fedora.server.management.FedoraAPIM;
|
|
42 //import fedora.server.types.gen.ComparisonOperator;
|
|
43 //import fedora.server.types.gen.Condition;
|
|
44 //import fedora.server.types.gen.FieldSearchQuery;
|
|
45 //import fedora.server.types.gen.FieldSearchResult;
|
|
46 //import fedora.server.types.gen.ListSession;
|
|
47 //import fedora.server.types.gen.MIMETypedStream;
|
|
48 //import fedora.server.types.gen.ObjectFields;
|
|
49
|
|
50 public class IngestECHO extends Ingestor {
|
|
51
|
|
52 protected String ECHORESOURCE_TEMPLATE_XML;
|
|
53 protected String ECHOCONTAINER_TEMPLATE_XML;
|
|
54 private String SERVLETURL;
|
|
55 protected String ECHOURL;
|
|
56 protected String ECHO_CONTAINER_ID;
|
|
57 protected String ECHO_ROOT_ID;
|
|
58 protected String MAIN_CONTEXT;
|
|
59 private HashMap<String, String> pids;
|
|
60
|
|
61 protected static String ESCIDOC_SERVER_URL = "euler.mpiwg-berlin.mpg.de";
|
|
62 protected static String ZOPEPROVIDER = "http://127.0.0.1:18080";
|
|
63
|
|
64 private static int PORT = 8080;
|
|
65
|
|
66 IngestECHO(String user, String password){
|
|
67
|
|
68 super(ESCIDOC_SERVER_URL, PORT, ZOPEPROVIDER, user, password);
|
|
69 ECHORESOURCE_TEMPLATE_XML = "ECHOResourceTemplate.xml";
|
|
70
|
|
71 SERVLETURL= "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=";
|
|
72
|
|
73 ECHOURL = "http://echo.mpiwg-berlin.mpg.de";
|
|
74
|
|
75 //ZOPEPROVIDER = "http://127.0.0.1:18080";
|
|
76
|
|
77 ECHO_CONTAINER_ID = "escidoc:3006"; // enthaelt alle ECHO
|
|
78 // objecte
|
|
79 ECHO_ROOT_ID = "escidoc:3005"; // enthaelt alle Objekte die
|
|
80 // keiner ECHO collection
|
|
81 // angehoeren
|
|
82
|
|
83 MAIN_CONTEXT = "escidoc:3002";
|
|
84
|
|
85 HashMap<String, String> pids = null;
|
|
86
|
|
87
|
|
88 }
|
|
89
|
|
90 void ingestECHOCollections() throws XmlRpcException, IOException {
|
|
91 ArrayList<String> urls = getAllCollections();
|
|
92 HashMap<String, String> success = new HashMap<String, String>();
|
|
93 HashMap<String, String> nosuccess = new HashMap<String, String>();
|
|
94
|
|
95 for (String url : urls) {
|
|
96
|
|
97 try {
|
|
98 String id = ingestECHOCollection(url);
|
|
99 success.put(id, url);
|
|
100 } catch (Exception e) {
|
|
101
|
|
102 ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
103 PrintStream s = new PrintStream(out);
|
|
104 e.printStackTrace(s);
|
|
105
|
|
106 nosuccess.put(url, out.toString());
|
|
107
|
|
108 e.printStackTrace();
|
|
109 }
|
|
110 }
|
|
111 System.out.println("SUCCESSFULL INGEST");
|
|
112 for (String id : success.keySet())
|
|
113 System.out.println("ID:" + id + " URL:" + success.get(id));
|
|
114
|
|
115 System.out.println("ERRORS:");
|
|
116 for (String id : nosuccess.keySet()) {
|
|
117 System.out.println("URL:" + id);
|
|
118 System.out.println("Message:" + nosuccess.get(id));
|
|
119 }
|
|
120
|
|
121 }
|
|
122
|
|
123 void organizeECHOCollections() throws XmlRpcException, IOException,
|
|
124 JDOMException {
|
|
125 ArrayList<String> urls = getAllCollections();
|
|
126 HashMap<String, String> success = new HashMap<String, String>();
|
|
127 HashMap<String, String> nosuccess = new HashMap<String, String>();
|
|
128
|
|
129 for (String url : urls) {
|
|
130
|
|
131 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
|
|
132 XmlRpcClient client = new XmlRpcClient();
|
|
133 config.setServerURL(new URL(url));
|
|
134 client.setConfig(config);
|
|
135
|
|
136 Object[] params = new Object[] {};
|
|
137
|
|
138 if (pids == null) {
|
|
139 pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID);
|
|
140 }
|
|
141
|
|
142 try {
|
|
143 String parentPid;
|
|
144 String pid = (String) client.execute("getPID", params);
|
|
145 String contid = pids.get("mpiwg:" + pid);
|
|
146 addECHOObjectToCollection(client, contid);
|
|
147 success.put(pid, url);
|
|
148 } catch (Exception e) {
|
|
149
|
|
150 ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
151 PrintStream s = new PrintStream(out);
|
|
152 e.printStackTrace(s);
|
|
153
|
|
154 nosuccess.put(url, out.toString());
|
|
155
|
|
156 e.printStackTrace();
|
|
157 }
|
|
158 }
|
|
159 System.out.println("SUCCESSFULL ORGANIZED");
|
|
160 for (String id : success.keySet())
|
|
161 System.out.println("ID:" + id + " URL:" + success.get(id));
|
|
162
|
|
163 System.out.println("ERRORS:");
|
|
164 for (String id : nosuccess.keySet()) {
|
|
165 System.out.println("URL:" + id);
|
|
166 System.out.println("Message:" + nosuccess.get(id));
|
|
167 }
|
|
168
|
|
169 }
|
|
170
|
|
171 void organizeECHORessources() throws XmlRpcException, IOException,
|
|
172 JDOMException {
|
|
173 ArrayList<String> urls = getAllResources();
|
|
174 HashMap<String, String> success = new HashMap<String, String>();
|
|
175 HashMap<String, String> nosuccess = new HashMap<String, String>();
|
|
176
|
|
177 for (String url : urls) {
|
|
178
|
|
179 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
|
|
180 XmlRpcClient client = new XmlRpcClient();
|
|
181 config.setServerURL(new URL(url));
|
|
182 client.setConfig(config);
|
|
183
|
|
184 Object[] params = new Object[] {};
|
|
185
|
|
186 if (pids == null) {
|
|
187 pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID);
|
|
188 }
|
|
189
|
|
190 try {
|
|
191 String parentPid;
|
|
192 String pid = (String) client.execute("getPID", params);
|
|
193 String contid = getIDfromPID("mpiwg:" + pid);
|
|
194 addECHOObjectToCollection(client, contid);
|
|
195 success.put(pid, url);
|
|
196 } catch (Exception e) {
|
|
197
|
|
198 ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
199 PrintStream s = new PrintStream(out);
|
|
200 e.printStackTrace(s);
|
|
201
|
|
202 nosuccess.put(url, out.toString());
|
|
203
|
|
204 e.printStackTrace();
|
|
205 }
|
|
206 }
|
|
207 System.out.println("SUCCESSFULL ORGANIZED");
|
|
208 for (String id : success.keySet())
|
|
209 System.out.println("ID:" + id + " URL:" + success.get(id));
|
|
210
|
|
211 System.out.println("ERRORS:");
|
|
212 for (String id : nosuccess.keySet()) {
|
|
213 System.out.println("URL:" + id);
|
|
214 System.out.println("Message:" + nosuccess.get(id));
|
|
215 }
|
|
216
|
|
217 }
|
|
218
|
|
219
|
|
220
|
|
221 private String getIDfromPID(String pid) throws ClientProtocolException,
|
|
222 IOException {
|
|
223 InputStream res = getXMLfromPID(pid,MAIN_CONTEXT);
|
|
224 return EScidocBasicHandler.getId(EScidocBasicHandler
|
|
225 .convertStreamToString(res));
|
|
226 }
|
|
227
|
|
228 /**
|
|
229 * FŸgt die ECHO Collection unter der URL in eScidoc ein. Der Link auf die
|
|
230 * Web-Seite wird in einem eigenen item hinterlegt, dass in Collection
|
|
231 * eingefŸgt wird.
|
|
232 *
|
|
233 * @param url
|
|
234 * @throws Exception
|
|
235 */
|
|
236 private String ingestECHOCollection(String url) throws Exception {
|
|
237
|
|
238 // get a PID for the Collection
|
|
239 System.out.println("Processing:" + url);
|
|
240
|
|
241 HashMap<String, String> dcs = new HashMap<String, String>(); // Store
|
|
242 // for
|
|
243 // the
|
|
244 // metadata
|
|
245
|
|
246 // Verbinde dich mit der Collection Ÿber XML-rpc
|
|
247
|
|
248 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
|
|
249 XmlRpcClient client = new XmlRpcClient();
|
|
250 config.setServerURL(new URL(url));
|
|
251 client.setConfig(config);
|
|
252
|
|
253 String pid = getOrCreatePID(client);
|
|
254
|
|
255 if (pidAlreadyExists("mpiwg:"+pid))
|
|
256 {
|
|
257 System.out.println("PID:"+pid);
|
|
258 String contid=getIDfromPID("mpiwg:"+pid);
|
|
259 System.out.println("------- belongsTo:"+contid);
|
|
260 return contid;
|
|
261 }
|
|
262 Object[] params = new Object[] {};
|
|
263
|
|
264 eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:" + pid,
|
|
265 ECHOCONTAINER_TEMPLATE_XML);
|
|
266
|
|
267 String result = (String) client.execute("getDescription", params);
|
|
268
|
|
269 String x = new String(result.getBytes("UTF-8"), ("UTF-8"));
|
|
270 // System.out.println("DESCR"+x);
|
|
271 dcs.put("description", x);
|
|
272 String title = (String) client.execute("getTitle", params);
|
|
273 dcs.put("title", title);
|
|
274 obj.insertDC(dcs);
|
|
275 obj.addOrigUrlToMPIWGMetaData(url);
|
|
276
|
|
277 // obj.setRelationship("info:fedora/echo:col1");
|
|
278 String xml = obj.printXML();
|
|
279 // System.out.println(xml);
|
|
280 String ret = ingest("/ir/container", xml);
|
|
281 String xr = ingestCollectionWebSite(title, url);
|
|
282 // System.out.println(xr);
|
|
283 String objid = EScidocBasicHandler.getId(xr);
|
|
284 String dateStamp = EScidocBasicHandler.getDateStamp(ret);
|
|
285 String addTxt = "<param last-modification-date=\"" + dateStamp + "\">";
|
|
286 addTxt += "<id>" + objid + "</id>";
|
|
287 addTxt += "</param>";
|
|
288
|
|
289 String contid = EScidocBasicHandler.getId(ret);
|
|
290
|
|
291 ByteArrayInputStream stream = new ByteArrayInputStream(addTxt
|
|
292 .getBytes("utf-8"));
|
|
293
|
|
294 eSciDocHandler.eScidocPost("/ir/container/" + contid + "/members/add",
|
|
295 stream);
|
|
296 // System.out.println(response.getStatusLine());
|
|
297 // System.out.println(EScidocBasicHandler.convertStreamToString(response.getEntity().getContent()));
|
|
298 System.out.println("Processed:" + url + "------>" + contid);
|
|
299
|
|
300 addToCollection(ECHO_CONTAINER_ID, contid);
|
|
301
|
|
302 params = new Object[] { pid };
|
|
303 client.execute("setPID", params);
|
|
304
|
|
305 System.out.println(ret);
|
|
306
|
|
307 addECHOObjectToCollection(client, contid);
|
|
308 return contid;
|
|
309
|
|
310 }
|
|
311
|
|
312 public ArrayList<String> findMissingItems() throws XmlRpcException, IOException{
|
|
313 return findMissingItemsFromECHOUrls(getAllResources());
|
|
314 }
|
|
315
|
|
316 public ArrayList<String> findMissingCollections() throws XmlRpcException, IOException{
|
|
317 return findMissingItemsFromECHOUrls(getAllCollections());
|
|
318 }
|
|
319
|
|
320 public ArrayList<String> findMissingItemsFromECHOUrls(List<String> urls) throws XmlRpcException, IOException{
|
|
321 //ArrayList<String> urls = getAllCollections();
|
|
322 System.out.println("GOT the collections");
|
|
323 ArrayList<String> ret = new ArrayList<String>();
|
|
324 for (String url : urls) {
|
|
325 System.out.println("checking:"+url);
|
|
326 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
|
|
327 XmlRpcClient client = new XmlRpcClient();
|
|
328
|
|
329 config.setServerURL(new URL(url));
|
|
330 client.setConfig(config);
|
|
331 String pid;
|
|
332 try {
|
|
333 Object[] parameters = new Object[] {};
|
|
334
|
|
335 pid = (String) client.execute("getPID", parameters);
|
|
336 } catch (Exception e) {
|
|
337 pid = null;
|
|
338 }
|
|
339
|
|
340 if (pid == null){
|
|
341 ret.add(url);
|
|
342 System.out.println(" -- no pid");
|
|
343 } else {
|
|
344 String id;
|
|
345 try {
|
|
346 id = getIDfromPID("mpiwg:"+pid);
|
|
347 } catch (Exception e) {
|
|
348 id = "NO";
|
|
349 ret.add(url);
|
|
350 }
|
|
351
|
|
352
|
|
353 System.out.println(" -- id:"+id);
|
|
354 }
|
|
355
|
|
356
|
|
357
|
|
358 }
|
|
359 return ret;
|
|
360 }
|
|
361 private String getOrCreatePID(XmlRpcClient client) throws XmlRpcException,
|
|
362 MalformedURLException {
|
|
363 Object[] parameters = new Object[] {};
|
|
364
|
|
365 String pid = null;
|
|
366
|
|
367 // Hole pid aus ECHO
|
|
368 try {
|
|
369 pid = (String) client.execute("getPID", parameters);
|
|
370 } catch (Exception e) {
|
|
371 pid = null;
|
|
372 }
|
|
373 // Falls dort noch keine ist, erzeuge ein neue
|
|
374 if (pid == null)
|
|
375 pid = getID();
|
|
376 else
|
|
377 System.out.println("PID from ECHO:" + pid);
|
|
378
|
|
379 return pid;
|
|
380 }
|
|
381
|
|
382 private void addECHOObjectToCollection(XmlRpcClient client, String contid)
|
|
383 throws ClientProtocolException, IOException, JDOMException {
|
|
384 Object[] params;
|
|
385 params = new Object[] {};
|
|
386
|
|
387 if (pids == null) {
|
|
388 pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID);
|
|
389 }
|
|
390
|
|
391 String parentId;
|
|
392 String parentPid;
|
|
393 try {
|
|
394 parentPid = (String) client.execute("getParentPID", params);
|
|
395 parentId = pids.get("mpiwg:" + parentPid);
|
|
396 } catch (Exception e) {
|
|
397 parentId = ECHO_ROOT_ID;
|
|
398 }
|
|
399 addToCollection(parentId, contid);
|
|
400
|
|
401 }
|
|
402
|
|
403 private String ingestCollectionWebSite(String title, String url)
|
|
404 throws Exception {
|
|
405 String pid = getID();
|
|
406 eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:" + pid,
|
|
407 "ECHOCollectionWebRepresentationTemplate.xml");
|
|
408
|
|
409 HashMap<String, String> dcs = new HashMap<String, String>();
|
|
410
|
|
411 obj.addWebUrl(url);
|
|
412 // obj.setRelationship("info:fedora/echo:col1");
|
|
413
|
|
414 dcs.put("title", title); // ersatzweise den titel aus der echo
|
|
415 // collection
|
|
416 obj.insertDC(dcs);
|
|
417
|
|
418 String xml = obj.printXML();
|
|
419 // System.out.println(xml);
|
|
420 String res = ingest("/ir/item", xml);
|
|
421 return res;
|
|
422 }
|
|
423
|
|
424 public void ingestECHOResources() throws IOException {
|
|
425 ingestECHOResources(null);
|
|
426
|
|
427 }
|
|
428 public void ingestECHOResources(Pattern match) throws IOException {
|
|
429 ArrayList<String> urls = getAllResources();
|
|
430 HashMap<String, String> success = new HashMap<String, String>();
|
|
431 HashMap<String, String> nosuccess = new HashMap<String, String>();
|
|
432
|
|
433 for (String url : urls) {
|
|
434
|
|
435 try {
|
|
436 Boolean ingest=false;
|
|
437
|
|
438 if (match == null)
|
|
439 ingest=true;
|
|
440 else {
|
|
441 Matcher m = match.matcher(url);
|
|
442 if (m.matches())
|
|
443 ingest=true;
|
|
444
|
|
445 }
|
|
446 if (ingest){
|
|
447 String id = ingestECHOResource(url);
|
|
448 success.put(id, url);
|
|
449 }
|
|
450 } catch (Exception e) {
|
|
451
|
|
452 ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
453 PrintStream s = new PrintStream(out);
|
|
454 e.printStackTrace(s);
|
|
455
|
|
456 nosuccess.put(url, out.toString());
|
|
457
|
|
458 e.printStackTrace();
|
|
459 }
|
|
460 }
|
|
461 System.out.println("SUCCESSFULL INGEST");
|
|
462 for (String id : success.keySet())
|
|
463 System.out.println("ID:" + id + " URL:" + success.get(id));
|
|
464
|
|
465 System.out.println("ERRORS:");
|
|
466 for (String id : nosuccess.keySet()) {
|
|
467 System.out.println("URL:" + id);
|
|
468 System.out.println("Message:" + nosuccess.get(id));
|
|
469 }
|
|
470
|
|
471 }
|
|
472
|
|
473 protected ArrayList<String> getAllResources() throws IOException {
|
|
474 URL echoUrl = new URL(ECHOURL + "/getResourcesXML");
|
|
475 Pattern p = Pattern.compile("echoLink=\"([^\"]*)\"");
|
|
476 BufferedReader in = new BufferedReader(new InputStreamReader(echoUrl
|
|
477 .openStream()));
|
|
478
|
|
479 ArrayList<String> ret = new ArrayList<String>();
|
|
480 String inputLine;
|
|
481 Matcher m;
|
|
482 while ((inputLine = in.readLine()) != null) {
|
|
483 m = p.matcher(inputLine);
|
|
484 String lit;
|
|
485 if (m.find()) {
|
|
486
|
|
487 lit = m.group(1);
|
|
488 ret.add(lit);
|
|
489 }
|
|
490 }
|
|
491
|
|
492 in.close();
|
|
493 return ret;
|
|
494
|
|
495 }
|
|
496
|
|
497 protected String ingestECHOResource(String url) throws Exception {
|
|
498 return ingestECHOResource(url, false);
|
|
499 }
|
|
500
|
|
501 protected String ingestECHOResource(String url,boolean withfullText) throws Exception {
|
|
502
|
|
503 System.out.println("Starting:" + url);
|
|
504
|
|
505 HashMap<String, String> dcs = new HashMap<String, String>();
|
|
506
|
|
507 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
|
|
508 XmlRpcClient client = new XmlRpcClient();
|
|
509 config.setServerURL(new URL(url));
|
|
510 client.setConfig(config);
|
|
511
|
|
512 String pid = getOrCreatePID(client);
|
|
513
|
|
514 if (pidAlreadyExists("mpiwg:"+pid))
|
|
515 {
|
|
516 System.out.println("PID:"+pid);
|
|
517 String contid=getIDfromPID("mpiwg:"+pid);
|
|
518 System.out.println("------- belongsTo:"+contid);
|
|
519 return contid;
|
|
520 }
|
|
521 eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:" + pid,
|
|
522 ECHORESOURCE_TEMPLATE_XML);
|
|
523 Object[] params = new Object[] {};
|
|
524
|
|
525 String title = (String) client.execute("getTitle", params);
|
|
526 String ml = (String) client.execute("getMetaDataLink", params);
|
|
527 if (withfullText){
|
|
528 String fulltextURL = url+"/getFullTextXML";
|
|
529 obj.addFullText(fulltextURL);
|
|
530
|
|
531 }
|
|
532 ml = correctML(ml);
|
|
533
|
|
534 obj.addWebUrl(url);
|
|
535 obj.addOrigUrlToMPIWGMetaData(url);
|
|
536
|
|
537 // obj.setRelationship("info:fedora/echo:col1");
|
|
538
|
|
539 config.setServerURL(new URL(ZOPEPROVIDER + "/metadataMain"));
|
|
540 client.setConfig(config);
|
|
541 params = new Object[] { ml };
|
|
542
|
|
543 try {
|
|
544 String result = (String) client.execute("getDCFormatted", params);
|
|
545 System.out.println("dC:"+result);
|
|
546 DocumentBuilderFactory factory = DocumentBuilderFactory
|
|
547 .newInstance();
|
|
548 factory.setNamespaceAware(true);
|
|
549 DocumentBuilder db = factory.newDocumentBuilder();
|
|
550
|
|
551 InputSource resultStream = new InputSource(new StringReader(result));
|
|
552 Document dc = db.parse(resultStream);
|
|
553 obj.insertDC(dc);
|
|
554
|
|
555 Document indexmeta = db.parse(ml);
|
|
556
|
|
557 XPath xpath = XPathFactory.newInstance().newXPath();
|
|
558 xpath.setNamespaceContext(new EScidocNameSpaceContext());
|
|
559
|
|
560 NodeList test = (NodeList) xpath.evaluate("//meta", indexmeta,
|
|
561 XPathConstants.NODESET);
|
|
562 if (test.getLength() != 1)
|
|
563 {
|
|
564 test = (NodeList) xpath.evaluate("//mpiwg:meta", indexmeta,
|
|
565 XPathConstants.NODESET);
|
|
566
|
|
567 if (test.getLength() !=1)
|
|
568 throw new Exception();
|
|
569 }
|
|
570 obj.insertMeta(test.item(0));
|
|
571
|
|
572 obj.addIndexMetaUrl(ml);
|
|
573
|
|
574 } catch (XmlRpcException e) {
|
|
575 System.err.println("Ressource:" + url);
|
|
576 System.err.println("METADATA CANNOT BE PARSED:" + ml);
|
|
577 HashMap<String, String> dc = new HashMap<String, String>();
|
|
578 dc.put("title", title); // ersatzweise den titel aus der echo
|
|
579 // collection
|
|
580 obj.insertDC(dc);
|
|
581 } catch (SAXParseException e) {
|
|
582 System.err.println("METADATA RESULT CANNOT BE PARSED:");
|
|
583 HashMap<String, String> dc = new HashMap<String, String>();
|
|
584 dc.put("title", title); // ersatzweise den titel aus der echo
|
|
585 // collection
|
|
586 obj.insertDC(dc);
|
|
587 }
|
|
588
|
|
589 String xml = obj.printXML();
|
|
590 System.out.println(xml);
|
|
591 return "XXX";
|
|
592 String result = ingest("/ir/item", xml);
|
|
593 // String contid = EScidocBasicHandler.getId(result);
|
|
594 // //String contid="NNNN";
|
|
595 // System.out.println("------->" + contid);
|
|
596 //
|
|
597 // params = new Object[] { pid };
|
|
598 // config.setServerURL(new URL(url));
|
|
599 // client.setConfig(config);
|
|
600 //
|
|
601 // client.execute("setPID", params);
|
|
602 // addToCollection(ECHO_CONTAINER_ID, contid);
|
|
603 //
|
|
604 // addECHOObjectToCollection(client, contid);
|
|
605 // return contid;
|
|
606
|
|
607 }
|
|
608
|
|
609 private boolean pidAlreadyExists(String pid) {
|
|
610 String id;
|
|
611 try{
|
|
612 id = getIDfromPID(pid);
|
|
613 } catch (Exception e){
|
|
614 return false;
|
|
615 }
|
|
616 if (!id.equals(""))
|
|
617 return true;
|
|
618 return false;
|
|
619 }
|
|
620
|
|
621 private String correctML(String ml) {
|
|
622 Pattern p = Pattern.compile("experimental/(.*)");
|
|
623 Matcher m = p.matcher(ml);
|
|
624 String pf;
|
|
625 if (m.find())
|
|
626 pf = "experimental/" + m.group(1);
|
|
627 else {
|
|
628 p = Pattern.compile("permanent/(.*)");
|
|
629 m = p.matcher(ml);
|
|
630 if (m.find())
|
|
631 pf = "permanent/" + m.group(1);
|
|
632 else
|
|
633 return ml;
|
|
634 }
|
|
635 return SERVLETURL + pf;
|
|
636 }
|
|
637
|
|
638 protected ArrayList<String> getAllCollections() throws XmlRpcException,
|
|
639 IOException {
|
|
640 System.out.println("ECHO:"+ECHOURL);
|
|
641 URL echoUrl = new URL(ECHOURL + "/getCollectionsXML");
|
|
642 Pattern p = Pattern.compile("echoLink=\"(.*)\"");
|
|
643 BufferedReader in = new BufferedReader(new InputStreamReader(echoUrl
|
|
644 .openStream()));
|
|
645
|
|
646 ArrayList<String> ret = new ArrayList<String>();
|
|
647 String inputLine;
|
|
648 Matcher m;
|
|
649 while ((inputLine = in.readLine()) != null) {
|
|
650 m = p.matcher(inputLine);
|
|
651 String lit;
|
|
652 if (m.find()) {
|
|
653
|
|
654 lit = m.group(1);
|
|
655 ret.add(lit);
|
|
656 }
|
|
657 }
|
|
658
|
|
659 in.close();
|
|
660 return ret;
|
|
661 }
|
|
662
|
|
663 private void submitAndReleaseAnObject(String href) throws ClientProtocolException,
|
|
664 IOException, JDOMException {
|
|
665
|
|
666 addVersionPid(href);
|
|
667 HttpResponse res = submitAnObject(href, "submit");
|
|
668 System.out.println(EScidocBasicHandler.convertStreamToString(res
|
|
669 .getEntity().getContent()));
|
|
670 res = releaseAnObject(href, "first release");
|
|
671 System.out.println(EScidocBasicHandler.convertStreamToString(res
|
|
672 .getEntity().getContent()));
|
|
673
|
|
674 }
|
|
675
|
|
676 void releaseECHORessources() throws XmlRpcException, IOException,
|
|
677 JDOMException {
|
|
678 ArrayList<String> urls = getAllResources();
|
|
679 HashMap<String, String> success = new HashMap<String, String>();
|
|
680 HashMap<String, String> nosuccess = new HashMap<String, String>();
|
|
681 int numOfUrl= urls.size();
|
|
682 int count = 0;
|
|
683 for (String url : urls) {
|
|
684
|
|
685 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
|
|
686 XmlRpcClient client = new XmlRpcClient();
|
|
687 config.setServerURL(new URL(url));
|
|
688 client.setConfig(config);
|
|
689
|
|
690 Object[] params = new Object[] {};
|
|
691
|
|
692 if (pids == null) {
|
|
693 pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID);
|
|
694 }
|
|
695
|
|
696 try {
|
|
697 String parentPid;
|
|
698 String pid = (String) client.execute("getPID", params);
|
|
699 String contid = getIDfromPID("mpiwg:" + pid);
|
|
700 submitAndReleaseAnObject("/ir/item/"+contid);
|
|
701 success.put(pid, url);
|
|
702 } catch (Exception e) {
|
|
703
|
|
704 ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
705 PrintStream s = new PrintStream(out);
|
|
706 e.printStackTrace(s);
|
|
707
|
|
708 nosuccess.put(url, out.toString());
|
|
709
|
|
710 e.printStackTrace();
|
|
711 }
|
|
712 count+=1;
|
|
713 System.out.println("DONE:"+count+" of "+numOfUrl);
|
|
714 }
|
|
715 System.out.println("SUCCESSFULL ORGANIZED");
|
|
716 for (String id : success.keySet())
|
|
717 System.out.println("ID:" + id + " URL:" + success.get(id));
|
|
718
|
|
719 System.out.println("ERRORS:");
|
|
720 for (String id : nosuccess.keySet()) {
|
|
721 System.out.println("URL:" + id);
|
|
722 System.out.println("Message:" + nosuccess.get(id));
|
|
723 }
|
|
724
|
|
725 }
|
|
726 }
|