annotate src/de/mpiwg/itgroup/eSciDoc/Tools/IngestECHO.java @ 0:c6929e63b0b8

first import
author dwinter
date Wed, 24 Nov 2010 16:52:07 +0100
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c6929e63b0b8 first import
dwinter
parents:
diff changeset
1 package de.mpiwg.itgroup.eSciDoc.Tools;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
2
c6929e63b0b8 first import
dwinter
parents:
diff changeset
3 //todo: create context for echo and contentmodell
c6929e63b0b8 first import
dwinter
parents:
diff changeset
4 import java.io.BufferedReader;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
5 import java.io.ByteArrayInputStream;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
6 import java.io.ByteArrayOutputStream;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
7 import java.io.IOException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
8 import java.io.InputStream;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
9 import java.io.InputStreamReader;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
10 import java.io.PrintStream;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
11 import java.io.StringReader;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
12 import java.net.MalformedURLException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
13 import java.net.URL;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
14 import java.util.ArrayList;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
15 import java.util.HashMap;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
16 import java.util.List;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
17 import java.util.regex.Matcher;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
18 import java.util.regex.Pattern;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
19
c6929e63b0b8 first import
dwinter
parents:
diff changeset
20 import javax.xml.parsers.DocumentBuilder;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
21 import javax.xml.parsers.DocumentBuilderFactory;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
22 import javax.xml.xpath.XPath;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
23 import javax.xml.xpath.XPathConstants;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
24 import javax.xml.xpath.XPathFactory;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
25
c6929e63b0b8 first import
dwinter
parents:
diff changeset
26 import org.apache.http.HttpResponse;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
27 import org.apache.http.client.ClientProtocolException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
28 import org.apache.xmlrpc.XmlRpcException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
29 import org.apache.xmlrpc.client.XmlRpcClient;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
30 import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
31 import org.jdom.JDOMException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
32 import org.w3c.dom.Document;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
33 import org.w3c.dom.NodeList;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
34 import org.xml.sax.InputSource;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
35 import org.xml.sax.SAXParseException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
36
c6929e63b0b8 first import
dwinter
parents:
diff changeset
37 import sun.misc.Regexp;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
38
c6929e63b0b8 first import
dwinter
parents:
diff changeset
39 //import fedora.client.FedoraClient;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
40 //import fedora.server.access.FedoraAPIA;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
41 //import fedora.server.management.FedoraAPIM;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
42 //import fedora.server.types.gen.ComparisonOperator;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
43 //import fedora.server.types.gen.Condition;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
44 //import fedora.server.types.gen.FieldSearchQuery;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
45 //import fedora.server.types.gen.FieldSearchResult;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
46 //import fedora.server.types.gen.ListSession;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
47 //import fedora.server.types.gen.MIMETypedStream;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
48 //import fedora.server.types.gen.ObjectFields;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
49
c6929e63b0b8 first import
dwinter
parents:
diff changeset
50 public class IngestECHO extends Ingestor {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
51
c6929e63b0b8 first import
dwinter
parents:
diff changeset
52 protected String ECHORESOURCE_TEMPLATE_XML;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
53 protected String ECHOCONTAINER_TEMPLATE_XML;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
54 private String SERVLETURL;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
55 protected String ECHOURL;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
56 protected String ECHO_CONTAINER_ID;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
57 protected String ECHO_ROOT_ID;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
58 protected String MAIN_CONTEXT;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
59 private HashMap<String, String> pids;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
60
c6929e63b0b8 first import
dwinter
parents:
diff changeset
61 protected static String ESCIDOC_SERVER_URL = "euler.mpiwg-berlin.mpg.de";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
62 protected static String ZOPEPROVIDER = "http://127.0.0.1:18080";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
63
c6929e63b0b8 first import
dwinter
parents:
diff changeset
64 private static int PORT = 8080;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
65
c6929e63b0b8 first import
dwinter
parents:
diff changeset
66 IngestECHO(String user, String password){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
67
c6929e63b0b8 first import
dwinter
parents:
diff changeset
68 super(ESCIDOC_SERVER_URL, PORT, ZOPEPROVIDER, user, password);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
69 ECHORESOURCE_TEMPLATE_XML = "ECHOResourceTemplate.xml";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
70
c6929e63b0b8 first import
dwinter
parents:
diff changeset
71 SERVLETURL= "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
72
c6929e63b0b8 first import
dwinter
parents:
diff changeset
73 ECHOURL = "http://echo.mpiwg-berlin.mpg.de";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
74
c6929e63b0b8 first import
dwinter
parents:
diff changeset
75 //ZOPEPROVIDER = "http://127.0.0.1:18080";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
76
c6929e63b0b8 first import
dwinter
parents:
diff changeset
77 ECHO_CONTAINER_ID = "escidoc:3006"; // enthaelt alle ECHO
c6929e63b0b8 first import
dwinter
parents:
diff changeset
78 // objecte
c6929e63b0b8 first import
dwinter
parents:
diff changeset
79 ECHO_ROOT_ID = "escidoc:3005"; // enthaelt alle Objekte die
c6929e63b0b8 first import
dwinter
parents:
diff changeset
80 // keiner ECHO collection
c6929e63b0b8 first import
dwinter
parents:
diff changeset
81 // angehoeren
c6929e63b0b8 first import
dwinter
parents:
diff changeset
82
c6929e63b0b8 first import
dwinter
parents:
diff changeset
83 MAIN_CONTEXT = "escidoc:3002";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
84
c6929e63b0b8 first import
dwinter
parents:
diff changeset
85 HashMap<String, String> pids = null;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
86
c6929e63b0b8 first import
dwinter
parents:
diff changeset
87
c6929e63b0b8 first import
dwinter
parents:
diff changeset
88 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
89
c6929e63b0b8 first import
dwinter
parents:
diff changeset
90 void ingestECHOCollections() throws XmlRpcException, IOException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
91 ArrayList<String> urls = getAllCollections();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
92 HashMap<String, String> success = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
93 HashMap<String, String> nosuccess = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
94
c6929e63b0b8 first import
dwinter
parents:
diff changeset
95 for (String url : urls) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
96
c6929e63b0b8 first import
dwinter
parents:
diff changeset
97 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
98 String id = ingestECHOCollection(url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
99 success.put(id, url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
100 } catch (Exception e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
101
c6929e63b0b8 first import
dwinter
parents:
diff changeset
102 ByteArrayOutputStream out = new ByteArrayOutputStream();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
103 PrintStream s = new PrintStream(out);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
104 e.printStackTrace(s);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
105
c6929e63b0b8 first import
dwinter
parents:
diff changeset
106 nosuccess.put(url, out.toString());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
107
c6929e63b0b8 first import
dwinter
parents:
diff changeset
108 e.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
109 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
110 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
111 System.out.println("SUCCESSFULL INGEST");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
112 for (String id : success.keySet())
c6929e63b0b8 first import
dwinter
parents:
diff changeset
113 System.out.println("ID:" + id + " URL:" + success.get(id));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
114
c6929e63b0b8 first import
dwinter
parents:
diff changeset
115 System.out.println("ERRORS:");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
116 for (String id : nosuccess.keySet()) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
117 System.out.println("URL:" + id);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
118 System.out.println("Message:" + nosuccess.get(id));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
119 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
120
c6929e63b0b8 first import
dwinter
parents:
diff changeset
121 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
122
c6929e63b0b8 first import
dwinter
parents:
diff changeset
123 void organizeECHOCollections() throws XmlRpcException, IOException,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
124 JDOMException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
125 ArrayList<String> urls = getAllCollections();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
126 HashMap<String, String> success = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
127 HashMap<String, String> nosuccess = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
128
c6929e63b0b8 first import
dwinter
parents:
diff changeset
129 for (String url : urls) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
130
c6929e63b0b8 first import
dwinter
parents:
diff changeset
131 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
132 XmlRpcClient client = new XmlRpcClient();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
133 config.setServerURL(new URL(url));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
134 client.setConfig(config);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
135
c6929e63b0b8 first import
dwinter
parents:
diff changeset
136 Object[] params = new Object[] {};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
137
c6929e63b0b8 first import
dwinter
parents:
diff changeset
138 if (pids == null) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
139 pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
140 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
141
c6929e63b0b8 first import
dwinter
parents:
diff changeset
142 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
143 String parentPid;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
144 String pid = (String) client.execute("getPID", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
145 String contid = pids.get("mpiwg:" + pid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
146 addECHOObjectToCollection(client, contid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
147 success.put(pid, url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
148 } catch (Exception e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
149
c6929e63b0b8 first import
dwinter
parents:
diff changeset
150 ByteArrayOutputStream out = new ByteArrayOutputStream();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
151 PrintStream s = new PrintStream(out);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
152 e.printStackTrace(s);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
153
c6929e63b0b8 first import
dwinter
parents:
diff changeset
154 nosuccess.put(url, out.toString());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
155
c6929e63b0b8 first import
dwinter
parents:
diff changeset
156 e.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
157 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
158 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
159 System.out.println("SUCCESSFULL ORGANIZED");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
160 for (String id : success.keySet())
c6929e63b0b8 first import
dwinter
parents:
diff changeset
161 System.out.println("ID:" + id + " URL:" + success.get(id));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
162
c6929e63b0b8 first import
dwinter
parents:
diff changeset
163 System.out.println("ERRORS:");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
164 for (String id : nosuccess.keySet()) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
165 System.out.println("URL:" + id);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
166 System.out.println("Message:" + nosuccess.get(id));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
167 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
168
c6929e63b0b8 first import
dwinter
parents:
diff changeset
169 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
170
c6929e63b0b8 first import
dwinter
parents:
diff changeset
171 void organizeECHORessources() throws XmlRpcException, IOException,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
172 JDOMException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
173 ArrayList<String> urls = getAllResources();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
174 HashMap<String, String> success = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
175 HashMap<String, String> nosuccess = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
176
c6929e63b0b8 first import
dwinter
parents:
diff changeset
177 for (String url : urls) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
178
c6929e63b0b8 first import
dwinter
parents:
diff changeset
179 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
180 XmlRpcClient client = new XmlRpcClient();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
181 config.setServerURL(new URL(url));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
182 client.setConfig(config);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
183
c6929e63b0b8 first import
dwinter
parents:
diff changeset
184 Object[] params = new Object[] {};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
185
c6929e63b0b8 first import
dwinter
parents:
diff changeset
186 if (pids == null) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
187 pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
188 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
189
c6929e63b0b8 first import
dwinter
parents:
diff changeset
190 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
191 String parentPid;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
192 String pid = (String) client.execute("getPID", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
193 String contid = getIDfromPID("mpiwg:" + pid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
194 addECHOObjectToCollection(client, contid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
195 success.put(pid, url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
196 } catch (Exception e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
197
c6929e63b0b8 first import
dwinter
parents:
diff changeset
198 ByteArrayOutputStream out = new ByteArrayOutputStream();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
199 PrintStream s = new PrintStream(out);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
200 e.printStackTrace(s);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
201
c6929e63b0b8 first import
dwinter
parents:
diff changeset
202 nosuccess.put(url, out.toString());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
203
c6929e63b0b8 first import
dwinter
parents:
diff changeset
204 e.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
205 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
206 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
207 System.out.println("SUCCESSFULL ORGANIZED");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
208 for (String id : success.keySet())
c6929e63b0b8 first import
dwinter
parents:
diff changeset
209 System.out.println("ID:" + id + " URL:" + success.get(id));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
210
c6929e63b0b8 first import
dwinter
parents:
diff changeset
211 System.out.println("ERRORS:");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
212 for (String id : nosuccess.keySet()) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
213 System.out.println("URL:" + id);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
214 System.out.println("Message:" + nosuccess.get(id));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
215 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
216
c6929e63b0b8 first import
dwinter
parents:
diff changeset
217 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
218
c6929e63b0b8 first import
dwinter
parents:
diff changeset
219
c6929e63b0b8 first import
dwinter
parents:
diff changeset
220
c6929e63b0b8 first import
dwinter
parents:
diff changeset
221 private String getIDfromPID(String pid) throws ClientProtocolException,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
222 IOException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
223 InputStream res = getXMLfromPID(pid,MAIN_CONTEXT);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
224 return EScidocBasicHandler.getId(EScidocBasicHandler
c6929e63b0b8 first import
dwinter
parents:
diff changeset
225 .convertStreamToString(res));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
226 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
227
c6929e63b0b8 first import
dwinter
parents:
diff changeset
228 /**
c6929e63b0b8 first import
dwinter
parents:
diff changeset
229 * FŸgt die ECHO Collection unter der URL in eScidoc ein. Der Link auf die
c6929e63b0b8 first import
dwinter
parents:
diff changeset
230 * Web-Seite wird in einem eigenen item hinterlegt, dass in Collection
c6929e63b0b8 first import
dwinter
parents:
diff changeset
231 * eingefŸgt wird.
c6929e63b0b8 first import
dwinter
parents:
diff changeset
232 *
c6929e63b0b8 first import
dwinter
parents:
diff changeset
233 * @param url
c6929e63b0b8 first import
dwinter
parents:
diff changeset
234 * @throws Exception
c6929e63b0b8 first import
dwinter
parents:
diff changeset
235 */
c6929e63b0b8 first import
dwinter
parents:
diff changeset
236 private String ingestECHOCollection(String url) throws Exception {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
237
c6929e63b0b8 first import
dwinter
parents:
diff changeset
238 // get a PID for the Collection
c6929e63b0b8 first import
dwinter
parents:
diff changeset
239 System.out.println("Processing:" + url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
240
c6929e63b0b8 first import
dwinter
parents:
diff changeset
241 HashMap<String, String> dcs = new HashMap<String, String>(); // Store
c6929e63b0b8 first import
dwinter
parents:
diff changeset
242 // for
c6929e63b0b8 first import
dwinter
parents:
diff changeset
243 // the
c6929e63b0b8 first import
dwinter
parents:
diff changeset
244 // metadata
c6929e63b0b8 first import
dwinter
parents:
diff changeset
245
c6929e63b0b8 first import
dwinter
parents:
diff changeset
246 // Verbinde dich mit der Collection Ÿber XML-rpc
c6929e63b0b8 first import
dwinter
parents:
diff changeset
247
c6929e63b0b8 first import
dwinter
parents:
diff changeset
248 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
249 XmlRpcClient client = new XmlRpcClient();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
250 config.setServerURL(new URL(url));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
251 client.setConfig(config);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
252
c6929e63b0b8 first import
dwinter
parents:
diff changeset
253 String pid = getOrCreatePID(client);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
254
c6929e63b0b8 first import
dwinter
parents:
diff changeset
255 if (pidAlreadyExists("mpiwg:"+pid))
c6929e63b0b8 first import
dwinter
parents:
diff changeset
256 {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
257 System.out.println("PID:"+pid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
258 String contid=getIDfromPID("mpiwg:"+pid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
259 System.out.println("------- belongsTo:"+contid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
260 return contid;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
261 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
262 Object[] params = new Object[] {};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
263
c6929e63b0b8 first import
dwinter
parents:
diff changeset
264 eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:" + pid,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
265 ECHOCONTAINER_TEMPLATE_XML);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
266
c6929e63b0b8 first import
dwinter
parents:
diff changeset
267 String result = (String) client.execute("getDescription", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
268
c6929e63b0b8 first import
dwinter
parents:
diff changeset
269 String x = new String(result.getBytes("UTF-8"), ("UTF-8"));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
270 // System.out.println("DESCR"+x);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
271 dcs.put("description", x);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
272 String title = (String) client.execute("getTitle", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
273 dcs.put("title", title);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
274 obj.insertDC(dcs);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
275 obj.addOrigUrlToMPIWGMetaData(url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
276
c6929e63b0b8 first import
dwinter
parents:
diff changeset
277 // obj.setRelationship("info:fedora/echo:col1");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
278 String xml = obj.printXML();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
279 // System.out.println(xml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
280 String ret = ingest("/ir/container", xml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
281 String xr = ingestCollectionWebSite(title, url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
282 // System.out.println(xr);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
283 String objid = EScidocBasicHandler.getId(xr);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
284 String dateStamp = EScidocBasicHandler.getDateStamp(ret);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
285 String addTxt = "<param last-modification-date=\"" + dateStamp + "\">";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
286 addTxt += "<id>" + objid + "</id>";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
287 addTxt += "</param>";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
288
c6929e63b0b8 first import
dwinter
parents:
diff changeset
289 String contid = EScidocBasicHandler.getId(ret);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
290
c6929e63b0b8 first import
dwinter
parents:
diff changeset
291 ByteArrayInputStream stream = new ByteArrayInputStream(addTxt
c6929e63b0b8 first import
dwinter
parents:
diff changeset
292 .getBytes("utf-8"));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
293
c6929e63b0b8 first import
dwinter
parents:
diff changeset
294 eSciDocHandler.eScidocPost("/ir/container/" + contid + "/members/add",
c6929e63b0b8 first import
dwinter
parents:
diff changeset
295 stream);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
296 // System.out.println(response.getStatusLine());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
297 // System.out.println(EScidocBasicHandler.convertStreamToString(response.getEntity().getContent()));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
298 System.out.println("Processed:" + url + "------>" + contid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
299
c6929e63b0b8 first import
dwinter
parents:
diff changeset
300 addToCollection(ECHO_CONTAINER_ID, contid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
301
c6929e63b0b8 first import
dwinter
parents:
diff changeset
302 params = new Object[] { pid };
c6929e63b0b8 first import
dwinter
parents:
diff changeset
303 client.execute("setPID", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
304
c6929e63b0b8 first import
dwinter
parents:
diff changeset
305 System.out.println(ret);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
306
c6929e63b0b8 first import
dwinter
parents:
diff changeset
307 addECHOObjectToCollection(client, contid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
308 return contid;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
309
c6929e63b0b8 first import
dwinter
parents:
diff changeset
310 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
311
c6929e63b0b8 first import
dwinter
parents:
diff changeset
312 public ArrayList<String> findMissingItems() throws XmlRpcException, IOException{
c6929e63b0b8 first import
dwinter
parents:
diff changeset
313 return findMissingItemsFromECHOUrls(getAllResources());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
314 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
315
c6929e63b0b8 first import
dwinter
parents:
diff changeset
316 public ArrayList<String> findMissingCollections() throws XmlRpcException, IOException{
c6929e63b0b8 first import
dwinter
parents:
diff changeset
317 return findMissingItemsFromECHOUrls(getAllCollections());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
318 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
319
c6929e63b0b8 first import
dwinter
parents:
diff changeset
320 public ArrayList<String> findMissingItemsFromECHOUrls(List<String> urls) throws XmlRpcException, IOException{
c6929e63b0b8 first import
dwinter
parents:
diff changeset
321 //ArrayList<String> urls = getAllCollections();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
322 System.out.println("GOT the collections");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
323 ArrayList<String> ret = new ArrayList<String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
324 for (String url : urls) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
325 System.out.println("checking:"+url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
326 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
327 XmlRpcClient client = new XmlRpcClient();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
328
c6929e63b0b8 first import
dwinter
parents:
diff changeset
329 config.setServerURL(new URL(url));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
330 client.setConfig(config);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
331 String pid;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
332 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
333 Object[] parameters = new Object[] {};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
334
c6929e63b0b8 first import
dwinter
parents:
diff changeset
335 pid = (String) client.execute("getPID", parameters);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
336 } catch (Exception e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
337 pid = null;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
338 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
339
c6929e63b0b8 first import
dwinter
parents:
diff changeset
340 if (pid == null){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
341 ret.add(url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
342 System.out.println(" -- no pid");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
343 } else {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
344 String id;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
345 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
346 id = getIDfromPID("mpiwg:"+pid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
347 } catch (Exception e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
348 id = "NO";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
349 ret.add(url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
350 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
351
c6929e63b0b8 first import
dwinter
parents:
diff changeset
352
c6929e63b0b8 first import
dwinter
parents:
diff changeset
353 System.out.println(" -- id:"+id);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
354 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
355
c6929e63b0b8 first import
dwinter
parents:
diff changeset
356
c6929e63b0b8 first import
dwinter
parents:
diff changeset
357
c6929e63b0b8 first import
dwinter
parents:
diff changeset
358 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
359 return ret;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
360 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
361 private String getOrCreatePID(XmlRpcClient client) throws XmlRpcException,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
362 MalformedURLException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
363 Object[] parameters = new Object[] {};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
364
c6929e63b0b8 first import
dwinter
parents:
diff changeset
365 String pid = null;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
366
c6929e63b0b8 first import
dwinter
parents:
diff changeset
367 // Hole pid aus ECHO
c6929e63b0b8 first import
dwinter
parents:
diff changeset
368 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
369 pid = (String) client.execute("getPID", parameters);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
370 } catch (Exception e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
371 pid = null;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
372 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
373 // Falls dort noch keine ist, erzeuge ein neue
c6929e63b0b8 first import
dwinter
parents:
diff changeset
374 if (pid == null)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
375 pid = getID();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
376 else
c6929e63b0b8 first import
dwinter
parents:
diff changeset
377 System.out.println("PID from ECHO:" + pid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
378
c6929e63b0b8 first import
dwinter
parents:
diff changeset
379 return pid;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
380 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
381
c6929e63b0b8 first import
dwinter
parents:
diff changeset
382 private void addECHOObjectToCollection(XmlRpcClient client, String contid)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
383 throws ClientProtocolException, IOException, JDOMException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
384 Object[] params;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
385 params = new Object[] {};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
386
c6929e63b0b8 first import
dwinter
parents:
diff changeset
387 if (pids == null) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
388 pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
389 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
390
c6929e63b0b8 first import
dwinter
parents:
diff changeset
391 String parentId;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
392 String parentPid;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
393 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
394 parentPid = (String) client.execute("getParentPID", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
395 parentId = pids.get("mpiwg:" + parentPid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
396 } catch (Exception e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
397 parentId = ECHO_ROOT_ID;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
398 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
399 addToCollection(parentId, contid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
400
c6929e63b0b8 first import
dwinter
parents:
diff changeset
401 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
402
c6929e63b0b8 first import
dwinter
parents:
diff changeset
403 private String ingestCollectionWebSite(String title, String url)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
404 throws Exception {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
405 String pid = getID();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
406 eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:" + pid,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
407 "ECHOCollectionWebRepresentationTemplate.xml");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
408
c6929e63b0b8 first import
dwinter
parents:
diff changeset
409 HashMap<String, String> dcs = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
410
c6929e63b0b8 first import
dwinter
parents:
diff changeset
411 obj.addWebUrl(url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
412 // obj.setRelationship("info:fedora/echo:col1");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
413
c6929e63b0b8 first import
dwinter
parents:
diff changeset
414 dcs.put("title", title); // ersatzweise den titel aus der echo
c6929e63b0b8 first import
dwinter
parents:
diff changeset
415 // collection
c6929e63b0b8 first import
dwinter
parents:
diff changeset
416 obj.insertDC(dcs);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
417
c6929e63b0b8 first import
dwinter
parents:
diff changeset
418 String xml = obj.printXML();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
419 // System.out.println(xml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
420 String res = ingest("/ir/item", xml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
421 return res;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
422 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
423
c6929e63b0b8 first import
dwinter
parents:
diff changeset
424 public void ingestECHOResources() throws IOException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
425 ingestECHOResources(null);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
426
c6929e63b0b8 first import
dwinter
parents:
diff changeset
427 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
428 public void ingestECHOResources(Pattern match) throws IOException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
429 ArrayList<String> urls = getAllResources();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
430 HashMap<String, String> success = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
431 HashMap<String, String> nosuccess = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
432
c6929e63b0b8 first import
dwinter
parents:
diff changeset
433 for (String url : urls) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
434
c6929e63b0b8 first import
dwinter
parents:
diff changeset
435 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
436 Boolean ingest=false;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
437
c6929e63b0b8 first import
dwinter
parents:
diff changeset
438 if (match == null)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
439 ingest=true;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
440 else {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
441 Matcher m = match.matcher(url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
442 if (m.matches())
c6929e63b0b8 first import
dwinter
parents:
diff changeset
443 ingest=true;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
444
c6929e63b0b8 first import
dwinter
parents:
diff changeset
445 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
446 if (ingest){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
447 String id = ingestECHOResource(url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
448 success.put(id, url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
449 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
450 } catch (Exception e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
451
c6929e63b0b8 first import
dwinter
parents:
diff changeset
452 ByteArrayOutputStream out = new ByteArrayOutputStream();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
453 PrintStream s = new PrintStream(out);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
454 e.printStackTrace(s);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
455
c6929e63b0b8 first import
dwinter
parents:
diff changeset
456 nosuccess.put(url, out.toString());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
457
c6929e63b0b8 first import
dwinter
parents:
diff changeset
458 e.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
459 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
460 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
461 System.out.println("SUCCESSFULL INGEST");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
462 for (String id : success.keySet())
c6929e63b0b8 first import
dwinter
parents:
diff changeset
463 System.out.println("ID:" + id + " URL:" + success.get(id));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
464
c6929e63b0b8 first import
dwinter
parents:
diff changeset
465 System.out.println("ERRORS:");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
466 for (String id : nosuccess.keySet()) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
467 System.out.println("URL:" + id);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
468 System.out.println("Message:" + nosuccess.get(id));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
469 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
470
c6929e63b0b8 first import
dwinter
parents:
diff changeset
471 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
472
c6929e63b0b8 first import
dwinter
parents:
diff changeset
473 protected ArrayList<String> getAllResources() throws IOException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
474 URL echoUrl = new URL(ECHOURL + "/getResourcesXML");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
475 Pattern p = Pattern.compile("echoLink=\"([^\"]*)\"");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
476 BufferedReader in = new BufferedReader(new InputStreamReader(echoUrl
c6929e63b0b8 first import
dwinter
parents:
diff changeset
477 .openStream()));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
478
c6929e63b0b8 first import
dwinter
parents:
diff changeset
479 ArrayList<String> ret = new ArrayList<String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
480 String inputLine;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
481 Matcher m;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
482 while ((inputLine = in.readLine()) != null) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
483 m = p.matcher(inputLine);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
484 String lit;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
485 if (m.find()) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
486
c6929e63b0b8 first import
dwinter
parents:
diff changeset
487 lit = m.group(1);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
488 ret.add(lit);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
489 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
490 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
491
c6929e63b0b8 first import
dwinter
parents:
diff changeset
492 in.close();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
493 return ret;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
494
c6929e63b0b8 first import
dwinter
parents:
diff changeset
495 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
496
c6929e63b0b8 first import
dwinter
parents:
diff changeset
497 protected String ingestECHOResource(String url) throws Exception {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
498 return ingestECHOResource(url, false);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
499 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
500
c6929e63b0b8 first import
dwinter
parents:
diff changeset
501 protected String ingestECHOResource(String url,boolean withfullText) throws Exception {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
502
c6929e63b0b8 first import
dwinter
parents:
diff changeset
503 System.out.println("Starting:" + url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
504
c6929e63b0b8 first import
dwinter
parents:
diff changeset
505 HashMap<String, String> dcs = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
506
c6929e63b0b8 first import
dwinter
parents:
diff changeset
507 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
508 XmlRpcClient client = new XmlRpcClient();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
509 config.setServerURL(new URL(url));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
510 client.setConfig(config);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
511
c6929e63b0b8 first import
dwinter
parents:
diff changeset
512 String pid = getOrCreatePID(client);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
513
c6929e63b0b8 first import
dwinter
parents:
diff changeset
514 if (pidAlreadyExists("mpiwg:"+pid))
c6929e63b0b8 first import
dwinter
parents:
diff changeset
515 {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
516 System.out.println("PID:"+pid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
517 String contid=getIDfromPID("mpiwg:"+pid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
518 System.out.println("------- belongsTo:"+contid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
519 return contid;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
520 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
521 eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:" + pid,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
522 ECHORESOURCE_TEMPLATE_XML);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
523 Object[] params = new Object[] {};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
524
c6929e63b0b8 first import
dwinter
parents:
diff changeset
525 String title = (String) client.execute("getTitle", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
526 String ml = (String) client.execute("getMetaDataLink", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
527 if (withfullText){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
528 String fulltextURL = url+"/getFullTextXML";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
529 obj.addFullText(fulltextURL);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
530
c6929e63b0b8 first import
dwinter
parents:
diff changeset
531 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
532 ml = correctML(ml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
533
c6929e63b0b8 first import
dwinter
parents:
diff changeset
534 obj.addWebUrl(url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
535 obj.addOrigUrlToMPIWGMetaData(url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
536
c6929e63b0b8 first import
dwinter
parents:
diff changeset
537 // obj.setRelationship("info:fedora/echo:col1");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
538
c6929e63b0b8 first import
dwinter
parents:
diff changeset
539 config.setServerURL(new URL(ZOPEPROVIDER + "/metadataMain"));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
540 client.setConfig(config);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
541 params = new Object[] { ml };
c6929e63b0b8 first import
dwinter
parents:
diff changeset
542
c6929e63b0b8 first import
dwinter
parents:
diff changeset
543 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
544 String result = (String) client.execute("getDCFormatted", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
545 System.out.println("dC:"+result);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
546 DocumentBuilderFactory factory = DocumentBuilderFactory
c6929e63b0b8 first import
dwinter
parents:
diff changeset
547 .newInstance();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
548 factory.setNamespaceAware(true);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
549 DocumentBuilder db = factory.newDocumentBuilder();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
550
c6929e63b0b8 first import
dwinter
parents:
diff changeset
551 InputSource resultStream = new InputSource(new StringReader(result));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
552 Document dc = db.parse(resultStream);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
553 obj.insertDC(dc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
554
c6929e63b0b8 first import
dwinter
parents:
diff changeset
555 Document indexmeta = db.parse(ml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
556
c6929e63b0b8 first import
dwinter
parents:
diff changeset
557 XPath xpath = XPathFactory.newInstance().newXPath();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
558 xpath.setNamespaceContext(new EScidocNameSpaceContext());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
559
c6929e63b0b8 first import
dwinter
parents:
diff changeset
560 NodeList test = (NodeList) xpath.evaluate("//meta", indexmeta,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
561 XPathConstants.NODESET);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
562 if (test.getLength() != 1)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
563 {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
564 test = (NodeList) xpath.evaluate("//mpiwg:meta", indexmeta,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
565 XPathConstants.NODESET);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
566
c6929e63b0b8 first import
dwinter
parents:
diff changeset
567 if (test.getLength() !=1)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
568 throw new Exception();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
569 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
570 obj.insertMeta(test.item(0));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
571
c6929e63b0b8 first import
dwinter
parents:
diff changeset
572 obj.addIndexMetaUrl(ml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
573
c6929e63b0b8 first import
dwinter
parents:
diff changeset
574 } catch (XmlRpcException e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
575 System.err.println("Ressource:" + url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
576 System.err.println("METADATA CANNOT BE PARSED:" + ml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
577 HashMap<String, String> dc = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
578 dc.put("title", title); // ersatzweise den titel aus der echo
c6929e63b0b8 first import
dwinter
parents:
diff changeset
579 // collection
c6929e63b0b8 first import
dwinter
parents:
diff changeset
580 obj.insertDC(dc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
581 } catch (SAXParseException e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
582 System.err.println("METADATA RESULT CANNOT BE PARSED:");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
583 HashMap<String, String> dc = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
584 dc.put("title", title); // ersatzweise den titel aus der echo
c6929e63b0b8 first import
dwinter
parents:
diff changeset
585 // collection
c6929e63b0b8 first import
dwinter
parents:
diff changeset
586 obj.insertDC(dc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
587 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
588
c6929e63b0b8 first import
dwinter
parents:
diff changeset
589 String xml = obj.printXML();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
590 System.out.println(xml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
591 return "XXX";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
592 String result = ingest("/ir/item", xml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
593 // String contid = EScidocBasicHandler.getId(result);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
594 // //String contid="NNNN";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
595 // System.out.println("------->" + contid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
596 //
c6929e63b0b8 first import
dwinter
parents:
diff changeset
597 // params = new Object[] { pid };
c6929e63b0b8 first import
dwinter
parents:
diff changeset
598 // config.setServerURL(new URL(url));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
599 // client.setConfig(config);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
600 //
c6929e63b0b8 first import
dwinter
parents:
diff changeset
601 // client.execute("setPID", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
602 // addToCollection(ECHO_CONTAINER_ID, contid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
603 //
c6929e63b0b8 first import
dwinter
parents:
diff changeset
604 // addECHOObjectToCollection(client, contid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
605 // return contid;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
606
c6929e63b0b8 first import
dwinter
parents:
diff changeset
607 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
608
c6929e63b0b8 first import
dwinter
parents:
diff changeset
609 private boolean pidAlreadyExists(String pid) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
610 String id;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
611 try{
c6929e63b0b8 first import
dwinter
parents:
diff changeset
612 id = getIDfromPID(pid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
613 } catch (Exception e){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
614 return false;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
615 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
616 if (!id.equals(""))
c6929e63b0b8 first import
dwinter
parents:
diff changeset
617 return true;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
618 return false;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
619 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
620
c6929e63b0b8 first import
dwinter
parents:
diff changeset
621 private String correctML(String ml) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
622 Pattern p = Pattern.compile("experimental/(.*)");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
623 Matcher m = p.matcher(ml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
624 String pf;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
625 if (m.find())
c6929e63b0b8 first import
dwinter
parents:
diff changeset
626 pf = "experimental/" + m.group(1);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
627 else {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
628 p = Pattern.compile("permanent/(.*)");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
629 m = p.matcher(ml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
630 if (m.find())
c6929e63b0b8 first import
dwinter
parents:
diff changeset
631 pf = "permanent/" + m.group(1);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
632 else
c6929e63b0b8 first import
dwinter
parents:
diff changeset
633 return ml;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
634 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
635 return SERVLETURL + pf;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
636 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
637
c6929e63b0b8 first import
dwinter
parents:
diff changeset
638 protected ArrayList<String> getAllCollections() throws XmlRpcException,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
639 IOException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
640 System.out.println("ECHO:"+ECHOURL);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
641 URL echoUrl = new URL(ECHOURL + "/getCollectionsXML");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
642 Pattern p = Pattern.compile("echoLink=\"(.*)\"");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
643 BufferedReader in = new BufferedReader(new InputStreamReader(echoUrl
c6929e63b0b8 first import
dwinter
parents:
diff changeset
644 .openStream()));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
645
c6929e63b0b8 first import
dwinter
parents:
diff changeset
646 ArrayList<String> ret = new ArrayList<String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
647 String inputLine;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
648 Matcher m;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
649 while ((inputLine = in.readLine()) != null) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
650 m = p.matcher(inputLine);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
651 String lit;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
652 if (m.find()) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
653
c6929e63b0b8 first import
dwinter
parents:
diff changeset
654 lit = m.group(1);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
655 ret.add(lit);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
656 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
657 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
658
c6929e63b0b8 first import
dwinter
parents:
diff changeset
659 in.close();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
660 return ret;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
661 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
662
c6929e63b0b8 first import
dwinter
parents:
diff changeset
663 private void submitAndReleaseAnObject(String href) throws ClientProtocolException,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
664 IOException, JDOMException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
665
c6929e63b0b8 first import
dwinter
parents:
diff changeset
666 addVersionPid(href);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
667 HttpResponse res = submitAnObject(href, "submit");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
668 System.out.println(EScidocBasicHandler.convertStreamToString(res
c6929e63b0b8 first import
dwinter
parents:
diff changeset
669 .getEntity().getContent()));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
670 res = releaseAnObject(href, "first release");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
671 System.out.println(EScidocBasicHandler.convertStreamToString(res
c6929e63b0b8 first import
dwinter
parents:
diff changeset
672 .getEntity().getContent()));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
673
c6929e63b0b8 first import
dwinter
parents:
diff changeset
674 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
675
c6929e63b0b8 first import
dwinter
parents:
diff changeset
676 void releaseECHORessources() throws XmlRpcException, IOException,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
677 JDOMException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
678 ArrayList<String> urls = getAllResources();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
679 HashMap<String, String> success = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
680 HashMap<String, String> nosuccess = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
681 int numOfUrl= urls.size();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
682 int count = 0;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
683 for (String url : urls) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
684
c6929e63b0b8 first import
dwinter
parents:
diff changeset
685 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
686 XmlRpcClient client = new XmlRpcClient();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
687 config.setServerURL(new URL(url));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
688 client.setConfig(config);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
689
c6929e63b0b8 first import
dwinter
parents:
diff changeset
690 Object[] params = new Object[] {};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
691
c6929e63b0b8 first import
dwinter
parents:
diff changeset
692 if (pids == null) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
693 pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
694 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
695
c6929e63b0b8 first import
dwinter
parents:
diff changeset
696 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
697 String parentPid;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
698 String pid = (String) client.execute("getPID", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
699 String contid = getIDfromPID("mpiwg:" + pid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
700 submitAndReleaseAnObject("/ir/item/"+contid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
701 success.put(pid, url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
702 } catch (Exception e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
703
c6929e63b0b8 first import
dwinter
parents:
diff changeset
704 ByteArrayOutputStream out = new ByteArrayOutputStream();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
705 PrintStream s = new PrintStream(out);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
706 e.printStackTrace(s);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
707
c6929e63b0b8 first import
dwinter
parents:
diff changeset
708 nosuccess.put(url, out.toString());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
709
c6929e63b0b8 first import
dwinter
parents:
diff changeset
710 e.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
711 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
712 count+=1;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
713 System.out.println("DONE:"+count+" of "+numOfUrl);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
714 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
715 System.out.println("SUCCESSFULL ORGANIZED");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
716 for (String id : success.keySet())
c6929e63b0b8 first import
dwinter
parents:
diff changeset
717 System.out.println("ID:" + id + " URL:" + success.get(id));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
718
c6929e63b0b8 first import
dwinter
parents:
diff changeset
719 System.out.println("ERRORS:");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
720 for (String id : nosuccess.keySet()) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
721 System.out.println("URL:" + id);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
722 System.out.println("Message:" + nosuccess.get(id));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
723 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
724
c6929e63b0b8 first import
dwinter
parents:
diff changeset
725 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
726 }