annotate src/de/mpiwg/itgroup/metadataManager/pid/DCTransformer.java @ 0:90a19cbda471

first ingest
author dwinter
date Wed, 24 Nov 2010 16:54:52 +0100
parents
children 38d823b66aff
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
90a19cbda471 first ingest
dwinter
parents:
diff changeset
1 package de.mpiwg.itgroup.metadataManager.pid;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
2
90a19cbda471 first ingest
dwinter
parents:
diff changeset
3 import java.io.ByteArrayInputStream;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
4 import java.io.IOException;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
5 import java.io.InputStream;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
6 import java.io.StringReader;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
7 import java.io.UnsupportedEncodingException;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
8 import java.net.MalformedURLException;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
9 import java.net.URL;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
10
90a19cbda471 first ingest
dwinter
parents:
diff changeset
11 import javax.xml.parsers.DocumentBuilder;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
12 import javax.xml.parsers.DocumentBuilderFactory;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
13
90a19cbda471 first ingest
dwinter
parents:
diff changeset
14 import org.apache.log4j.Logger;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
15 import org.apache.xmlrpc.XmlRpcException;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
16 import org.apache.xmlrpc.client.XmlRpcClient;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
17 import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
18 import org.jdom.Document;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
19 import org.jdom.Element;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
20 import org.jdom.JDOMException;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
21 import org.jdom.input.SAXBuilder;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
22 import org.jdom.xpath.XPath;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
23
90a19cbda471 first ingest
dwinter
parents:
diff changeset
24 import org.xml.sax.InputSource;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
25
90a19cbda471 first ingest
dwinter
parents:
diff changeset
26
90a19cbda471 first ingest
dwinter
parents:
diff changeset
27
90a19cbda471 first ingest
dwinter
parents:
diff changeset
28 public class DCTransformer {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
29 static public String MPIWG = "http://www.mpiwg-berlin.mpg.de/ns/mpiwg";
90a19cbda471 first ingest
dwinter
parents:
diff changeset
30
90a19cbda471 first ingest
dwinter
parents:
diff changeset
31 public static String converterProvider = "http://127.0.0.1:28080/metadataMain";
90a19cbda471 first ingest
dwinter
parents:
diff changeset
32 private String metaDataLink;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
33 private Logger logger = Logger.getLogger("transformerLogger");
90a19cbda471 first ingest
dwinter
parents:
diff changeset
34
90a19cbda471 first ingest
dwinter
parents:
diff changeset
35 public DCTransformer(String metaDataLink) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
36
90a19cbda471 first ingest
dwinter
parents:
diff changeset
37 this.metaDataLink=metaDataLink;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
38 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
39
90a19cbda471 first ingest
dwinter
parents:
diff changeset
40 public Element getDCFromIndexMeta() {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
41 //Verbinde mit metadataprovider
90a19cbda471 first ingest
dwinter
parents:
diff changeset
42 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
43
90a19cbda471 first ingest
dwinter
parents:
diff changeset
44
90a19cbda471 first ingest
dwinter
parents:
diff changeset
45 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
46 config.setServerURL(new URL(converterProvider));
90a19cbda471 first ingest
dwinter
parents:
diff changeset
47 } catch (MalformedURLException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
48 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
49 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
50 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
51 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
52
90a19cbda471 first ingest
dwinter
parents:
diff changeset
53 XmlRpcClient client = new XmlRpcClient();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
54 client.setConfig(config);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
55 Object[] params = new Object[] { metaDataLink };
90a19cbda471 first ingest
dwinter
parents:
diff changeset
56
90a19cbda471 first ingest
dwinter
parents:
diff changeset
57 String result;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
58 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
59 result = (String) client.execute("getDCFormatted", params);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
60 } catch (XmlRpcException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
61 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
62 logger.error("CANNOT connector to:"+converterProvider);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
63 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
64 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
65 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
66 logger.debug("dC:"+result);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
67
90a19cbda471 first ingest
dwinter
parents:
diff changeset
68 SAXBuilder db = new SAXBuilder();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
69 InputStream resultStream;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
70 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
71 resultStream = new ByteArrayInputStream(result.getBytes("utf-8"));
90a19cbda471 first ingest
dwinter
parents:
diff changeset
72 } catch (UnsupportedEncodingException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
73 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
74 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
75 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
76 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
77 Document dc;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
78 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
79 dc = db.build(resultStream);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
80 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
81 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
82 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
83 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
84 } catch (IOException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
85 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
86 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
87 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
88 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
89
90a19cbda471 first ingest
dwinter
parents:
diff changeset
90 return dc.getRootElement();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
91 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
92
90a19cbda471 first ingest
dwinter
parents:
diff changeset
93 public Element getContentOfMetaTag() {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
94 SAXBuilder db = new SAXBuilder();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
95
90a19cbda471 first ingest
dwinter
parents:
diff changeset
96 Document dc;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
97 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
98 dc = db.build(metaDataLink);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
99 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
100 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
101 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
102 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
103 } catch (IOException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
104 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
105 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
106 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
107 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
108
90a19cbda471 first ingest
dwinter
parents:
diff changeset
109 Element retElement = null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
110 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
111 XPath xp = getXPath("//meta");
90a19cbda471 first ingest
dwinter
parents:
diff changeset
112 retElement = (Element) xp.selectSingleNode(dc);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
113
90a19cbda471 first ingest
dwinter
parents:
diff changeset
114 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
115 //nothing to be done
90a19cbda471 first ingest
dwinter
parents:
diff changeset
116 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
117 if (retElement==null) { // versuche noch mal mit namespace
90a19cbda471 first ingest
dwinter
parents:
diff changeset
118 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
119
90a19cbda471 first ingest
dwinter
parents:
diff changeset
120 XPath xp = getXPath("//mpiwg:meta");
90a19cbda471 first ingest
dwinter
parents:
diff changeset
121 retElement = (Element) xp.selectSingleNode(dc);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
122
90a19cbda471 first ingest
dwinter
parents:
diff changeset
123 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
124 //nothing to be done
90a19cbda471 first ingest
dwinter
parents:
diff changeset
125 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
126
90a19cbda471 first ingest
dwinter
parents:
diff changeset
127 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
128 return retElement;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
129 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
130
90a19cbda471 first ingest
dwinter
parents:
diff changeset
131
90a19cbda471 first ingest
dwinter
parents:
diff changeset
132 private XPath getXPath(String xpath) throws JDOMException{
90a19cbda471 first ingest
dwinter
parents:
diff changeset
133 XPath xpathResources = XPath.newInstance(xpath);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
134
90a19cbda471 first ingest
dwinter
parents:
diff changeset
135 xpathResources.addNamespace("mpiwg",MPIWG);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
136
90a19cbda471 first ingest
dwinter
parents:
diff changeset
137 return xpathResources;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
138 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
139
90a19cbda471 first ingest
dwinter
parents:
diff changeset
140 public String getImagePathFromIndexMeta() {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
141 SAXBuilder db = new SAXBuilder();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
142
90a19cbda471 first ingest
dwinter
parents:
diff changeset
143 Document dc;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
144 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
145 dc = db.build(metaDataLink);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
146 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
147 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
148 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
149 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
150 } catch (IOException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
151 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
152 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
153 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
154 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
155
90a19cbda471 first ingest
dwinter
parents:
diff changeset
156 Element retElement = null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
157 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
158 XPath xp = getXPath("//texttool/image");
90a19cbda471 first ingest
dwinter
parents:
diff changeset
159 retElement = (Element) xp.selectSingleNode(dc);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
160
90a19cbda471 first ingest
dwinter
parents:
diff changeset
161 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
162 //nothing to be done
90a19cbda471 first ingest
dwinter
parents:
diff changeset
163 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
164 if (retElement==null) { // versuche noch mal mit namespace
90a19cbda471 first ingest
dwinter
parents:
diff changeset
165 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
166 XPath xp = getXPath("//mpiwg:texttool/mpiwg:image");
90a19cbda471 first ingest
dwinter
parents:
diff changeset
167 retElement = (Element) xp.selectSingleNode(dc);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
168
90a19cbda471 first ingest
dwinter
parents:
diff changeset
169 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
170 //nothing to be done
90a19cbda471 first ingest
dwinter
parents:
diff changeset
171 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
172
90a19cbda471 first ingest
dwinter
parents:
diff changeset
173 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
174 if (retElement==null)
90a19cbda471 first ingest
dwinter
parents:
diff changeset
175 return "";
90a19cbda471 first ingest
dwinter
parents:
diff changeset
176 return retElement.getTextTrim();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
177 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
178
90a19cbda471 first ingest
dwinter
parents:
diff changeset
179 }