annotate src/de/mpiwg/itgroup/eSciDoc/Tools/ingestLib.java @ 0:c6929e63b0b8

first import
author dwinter
date Wed, 24 Nov 2010 16:52:07 +0100
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c6929e63b0b8 first import
dwinter
parents:
diff changeset
1 package de.mpiwg.itgroup.eSciDoc.Tools;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
2 import java.io.IOException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
3 import java.io.StringReader;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
4 import java.net.MalformedURLException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
5 import java.net.URL;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
6 import java.rmi.RemoteException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
7 import java.text.SimpleDateFormat;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
8 import java.util.Date;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
9 import java.util.regex.Matcher;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
10 import java.util.regex.Pattern;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
11
c6929e63b0b8 first import
dwinter
parents:
diff changeset
12 import javax.xml.parsers.DocumentBuilder;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
13 import javax.xml.parsers.DocumentBuilderFactory;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
14 import javax.xml.rpc.ServiceException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
15
c6929e63b0b8 first import
dwinter
parents:
diff changeset
16 import org.apache.axis.types.NonNegativeInteger;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
17 import org.apache.xmlrpc.XmlRpcException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
18 import org.apache.xmlrpc.client.XmlRpcClient;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
19 import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
20 import org.w3c.dom.Document;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
21 import org.xml.sax.InputSource;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
22
c6929e63b0b8 first import
dwinter
parents:
diff changeset
23
c6929e63b0b8 first import
dwinter
parents:
diff changeset
24 public class ingestLib extends IngestECHO{
c6929e63b0b8 first import
dwinter
parents:
diff changeset
25
c6929e63b0b8 first import
dwinter
parents:
diff changeset
26 /**
c6929e63b0b8 first import
dwinter
parents:
diff changeset
27 * @param args
c6929e63b0b8 first import
dwinter
parents:
diff changeset
28 * @throws Exception
c6929e63b0b8 first import
dwinter
parents:
diff changeset
29 * @throws Exception
c6929e63b0b8 first import
dwinter
parents:
diff changeset
30 */
c6929e63b0b8 first import
dwinter
parents:
diff changeset
31
c6929e63b0b8 first import
dwinter
parents:
diff changeset
32 private static String ZOPEPROVIDER = "http://127.0.0.1:18080";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
33 private static String createFoxml(String litid) throws Exception{
c6929e63b0b8 first import
dwinter
parents:
diff changeset
34
c6929e63b0b8 first import
dwinter
parents:
diff changeset
35 String pid =
c6929e63b0b8 first import
dwinter
parents:
diff changeset
36 eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:"+pid,"vlpRessourceTemplate.xml");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
37 //obj.setTitle("lit11111 - title");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
38
c6929e63b0b8 first import
dwinter
parents:
diff changeset
39
c6929e63b0b8 first import
dwinter
parents:
diff changeset
40 //get DC Metadata from the Ressource
c6929e63b0b8 first import
dwinter
parents:
diff changeset
41 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
42 XmlRpcClient client = new XmlRpcClient();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
43 Object[] params = new Object[]{};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
44 config.setServerURL(new URL(ZOPEPROVIDER+"/metadataMain"));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
45 client.setConfig(config);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
46 params = new Object[]{"http://vlp.mpiwg-berlin.mpg.de/library/data/"+litid+"/index_meta"};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
47 String result = (String) client.execute("getDCFormatted", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
48 System.out.println("dC:"+result);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
49
c6929e63b0b8 first import
dwinter
parents:
diff changeset
50
c6929e63b0b8 first import
dwinter
parents:
diff changeset
51 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
52 factory.setNamespaceAware(true);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
53 DocumentBuilder db =factory.newDocumentBuilder();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
54 InputSource resultStream = new InputSource(new StringReader(result));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
55 Document dc = db.parse(resultStream);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
56 obj.insertDC(dc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
57
c6929e63b0b8 first import
dwinter
parents:
diff changeset
58 obj.setReferenceFolder("/mpiwg/online/permanent/vlp/"+litid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
59 //obj.generateXMLIndex(new File("/mpiwg/online/permanent/vlp/"+litid));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
60 //long date = new Date().getTime();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
61 SimpleDateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss.S'Z'");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
62 String dateStr = dateformat.format(new Date());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
63 obj.addWebUrl("http://vlp.mpiwg-berlin.mpg.de/references?id="+litid, litid, dateStr);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
64 obj.addIndexMetaUrl("http://vlp.mpiwg-berlin.mpg.de/library/data/"+litid+"/index_meta", litid, dateStr);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
65 obj.setRelationship("info:fedora/vlp:col1");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
66 obj.setVLPId(litid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
67 return obj.printXML();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
68
c6929e63b0b8 first import
dwinter
parents:
diff changeset
69 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
70
c6929e63b0b8 first import
dwinter
parents:
diff changeset
71
c6929e63b0b8 first import
dwinter
parents:
diff changeset
72
c6929e63b0b8 first import
dwinter
parents:
diff changeset
73
c6929e63b0b8 first import
dwinter
parents:
diff changeset
74
c6929e63b0b8 first import
dwinter
parents:
diff changeset
75
c6929e63b0b8 first import
dwinter
parents:
diff changeset
76 private static void ingestAllVLPObjects() throws MalformedURLException,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
77 XmlRpcException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
78 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
79 XmlRpcClient client = new XmlRpcClient();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
80 config.setServerURL(new URL("http://127.0.0.1:18080/vlp/vlp_coll/library/data"));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
81 client.setConfig(config);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
82 Object[] params = new Object[]{};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
83 Object[] res = (Object[]) client.execute("getAllRessources", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
84
c6929e63b0b8 first import
dwinter
parents:
diff changeset
85 String string = "";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
86 for (Object re: res){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
87 Object[] rA = (Object [])re;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
88 String[] splitted = ((String) rA[0]).split("/");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
89 String id = splitted[splitted.length-1];
c6929e63b0b8 first import
dwinter
parents:
diff changeset
90
c6929e63b0b8 first import
dwinter
parents:
diff changeset
91 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
92 String xml = createFoxml(id);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
93
c6929e63b0b8 first import
dwinter
parents:
diff changeset
94 String ret = ingest(xml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
95
c6929e63b0b8 first import
dwinter
parents:
diff changeset
96 config.setServerURL(new URL((String)rA[1]));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
97 client.setConfig(config);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
98 params = new Object[]{ret};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
99 client.execute("setPID", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
100
c6929e63b0b8 first import
dwinter
parents:
diff changeset
101 System.out.println(ret);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
102
c6929e63b0b8 first import
dwinter
parents:
diff changeset
103
c6929e63b0b8 first import
dwinter
parents:
diff changeset
104 } catch (Exception e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
105 System.err.println("cannot get:"+(String) rA[0]);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
106
c6929e63b0b8 first import
dwinter
parents:
diff changeset
107 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
108 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
109 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
110
c6929e63b0b8 first import
dwinter
parents:
diff changeset
111 private static void modifyDCSet(String litid, String PID) throws XmlRpcException, ServiceException, IOException
c6929e63b0b8 first import
dwinter
parents:
diff changeset
112 {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
113 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
114 XmlRpcClient client = new XmlRpcClient();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
115 config.setServerURL(new URL(ZOPEPROVIDER+"/metadataMain"));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
116
c6929e63b0b8 first import
dwinter
parents:
diff changeset
117
c6929e63b0b8 first import
dwinter
parents:
diff changeset
118 client.setConfig(config);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
119 Object[] params = new Object[]{"http://vlp.mpiwg-berlin.mpg.de/library/data/"+litid+"/index_meta"};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
120 String result = (String) client.execute("getDCFormatted", params);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
121 //System.out.println("dC:"+result);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
122
c6929e63b0b8 first import
dwinter
parents:
diff changeset
123 FedoraAPIM APIM;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
124 FedoraAPIA APIA;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
125
c6929e63b0b8 first import
dwinter
parents:
diff changeset
126 System.setProperty("javax.net.ssl.trustStore", "/usr/local/fedora/tomcat/conf/keystore");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
127
c6929e63b0b8 first import
dwinter
parents:
diff changeset
128
c6929e63b0b8 first import
dwinter
parents:
diff changeset
129 String baseURL = "https://127.0.0.1:8443/fedora";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
130 FedoraClient fc = new FedoraClient(baseURL, "fedoraAdmin", "XXXX");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
131 APIA=fc.getAPIA();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
132 APIM=fc.getAPIM();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
133 String content = "<oai_dc:dc xmlns:oai_dc='http://www.openarchives.org/OAI/2.0/oai_dc/' xmlns:dc='http://purl.org/dc/elements/1.1/'><dc:description>VLP Literature Object</dc:description><dc:publisher>MPIWG</dc:publisher>";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
134 content += result;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
135 content += "</oai_dc:dc>";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
136 //content="";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
137
c6929e63b0b8 first import
dwinter
parents:
diff changeset
138 //System.out.println(content);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
139
c6929e63b0b8 first import
dwinter
parents:
diff changeset
140
c6929e63b0b8 first import
dwinter
parents:
diff changeset
141 String chksum = MD5.asHex(new MD5(content).Final());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
142
c6929e63b0b8 first import
dwinter
parents:
diff changeset
143 //System.out.println(chksum);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
144 String[] em= new String[]{};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
145 String pid = APIM.modifyDatastreamByValue(PID, "DC",em , "Dublin Core Record for this object", "text/xml", "",content.getBytes("utf-8"), null, null,"metadata changed",false);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
146 System.out.println(pid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
147
c6929e63b0b8 first import
dwinter
parents:
diff changeset
148 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
149
c6929e63b0b8 first import
dwinter
parents:
diff changeset
150
c6929e63b0b8 first import
dwinter
parents:
diff changeset
151 public static void main(String[] args) throws Exception {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
152
c6929e63b0b8 first import
dwinter
parents:
diff changeset
153 //ingestAllVLPObjects();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
154 changeDCMetadata();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
155 //modifyDCSet("lit14191","mpiwg:PR9MPM4E");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
156
c6929e63b0b8 first import
dwinter
parents:
diff changeset
157 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
158
c6929e63b0b8 first import
dwinter
parents:
diff changeset
159 private static void changeDCMetadata() throws MalformedURLException,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
160 ServiceException, IOException, RemoteException, XmlRpcException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
161 System.setProperty("javax.net.ssl.trustStore", "/usr/local/fedora/tomcat/conf/keystore");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
162
c6929e63b0b8 first import
dwinter
parents:
diff changeset
163 String baseURL = "https://127.0.0.1:8443/fedora";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
164 FedoraClient fc = new FedoraClient(baseURL, "fedoraAdmin", "XXX");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
165 FedoraAPIA APIA;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
166 APIA=fc.getAPIA();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
167 Condition[] condition =
c6929e63b0b8 first import
dwinter
parents:
diff changeset
168 {new Condition("pid", ComparisonOperator.has, "mpiwg:*")};
c6929e63b0b8 first import
dwinter
parents:
diff changeset
169 FieldSearchQuery query = new FieldSearchQuery(condition, null);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
170
c6929e63b0b8 first import
dwinter
parents:
diff changeset
171 NonNegativeInteger maxResults = new NonNegativeInteger("" + 200);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
172
c6929e63b0b8 first import
dwinter
parents:
diff changeset
173 FieldSearchResult res = APIA.findObjects(new String[]{"pid"}, maxResults, query);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
174 processResults(APIA, res);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
175 while (true)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
176 {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
177 ListSession ses = res.getListSession();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
178 if (ses == null)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
179 break;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
180 String tok = ses.getToken();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
181 if (tok == null)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
182 break;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
183
c6929e63b0b8 first import
dwinter
parents:
diff changeset
184 res = APIA.resumeFindObjects(res.getListSession().getToken());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
185 if (res.getResultList().length>0)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
186 processResults(APIA, res);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
187 else
c6929e63b0b8 first import
dwinter
parents:
diff changeset
188 break;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
189 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
190 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
191
c6929e63b0b8 first import
dwinter
parents:
diff changeset
192 private static void processResults(FedoraAPIA APIA, FieldSearchResult res)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
193 throws XmlRpcException, ServiceException, IOException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
194 ObjectFields[] fields = res.getResultList();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
195
c6929e63b0b8 first import
dwinter
parents:
diff changeset
196 System.out.println("found:"+fields.length);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
197 for (ObjectFields field: fields){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
198 String pid = field.getPid();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
199 MIMETypedStream ds = APIA.getDatastreamDissemination(pid, "vlp-admin", null);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
200 byte[] x = ds.getStream();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
201 String s = new String(x);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
202 //System.err.println(s);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
203
c6929e63b0b8 first import
dwinter
parents:
diff changeset
204 Pattern p = Pattern.compile("<vlp:identifier>(lit.*)</vlp:identifier>");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
205 Matcher m = p.matcher(s);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
206 m.find();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
207 String lit = m.group(1);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
208 System.out.println(lit);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
209 modifyDCSet(lit, pid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
210 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
211 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
212 }