annotate src/de/mpiwg/itgroup/metadataManager/pid/DCTransformer.java @ 2:38d823b66aff default tip

minor
author dwinter
date Mon, 17 Sep 2012 16:42:53 +0200
parents 90a19cbda471
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
90a19cbda471 first ingest
dwinter
parents:
diff changeset
1 package de.mpiwg.itgroup.metadataManager.pid;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
2
90a19cbda471 first ingest
dwinter
parents:
diff changeset
3 import java.io.ByteArrayInputStream;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
4 import java.io.IOException;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
5 import java.io.InputStream;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
6 import java.io.StringReader;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
7 import java.io.UnsupportedEncodingException;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
8 import java.net.MalformedURLException;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
9 import java.net.URL;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
10
90a19cbda471 first ingest
dwinter
parents:
diff changeset
11 import javax.xml.parsers.DocumentBuilder;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
12 import javax.xml.parsers.DocumentBuilderFactory;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
13
90a19cbda471 first ingest
dwinter
parents:
diff changeset
14 import org.apache.log4j.Logger;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
15 import org.apache.xmlrpc.XmlRpcException;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
16 import org.apache.xmlrpc.client.XmlRpcClient;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
17 import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
18 import org.jdom.Document;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
19 import org.jdom.Element;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
20 import org.jdom.JDOMException;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
21 import org.jdom.input.SAXBuilder;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
22 import org.jdom.xpath.XPath;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
23
90a19cbda471 first ingest
dwinter
parents:
diff changeset
24 import org.xml.sax.InputSource;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
25
90a19cbda471 first ingest
dwinter
parents:
diff changeset
26
90a19cbda471 first ingest
dwinter
parents:
diff changeset
27
90a19cbda471 first ingest
dwinter
parents:
diff changeset
28 public class DCTransformer {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
29 static public String MPIWG = "http://www.mpiwg-berlin.mpg.de/ns/mpiwg";
90a19cbda471 first ingest
dwinter
parents:
diff changeset
30
2
dwinter
parents: 0
diff changeset
31 //public static String converterProvider = "http://echoneu.mpiwg-berlin.mpg.de/metadata";
dwinter
parents: 0
diff changeset
32 public static String converterProvider = "http://dw.mpiwg-berlin.mpg.de:8080/echo_nav/echo_pages/metadata";
0
90a19cbda471 first ingest
dwinter
parents:
diff changeset
33 private String metaDataLink;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
34 private Logger logger = Logger.getLogger("transformerLogger");
90a19cbda471 first ingest
dwinter
parents:
diff changeset
35
90a19cbda471 first ingest
dwinter
parents:
diff changeset
36 public DCTransformer(String metaDataLink) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
37
90a19cbda471 first ingest
dwinter
parents:
diff changeset
38 this.metaDataLink=metaDataLink;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
39 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
40
90a19cbda471 first ingest
dwinter
parents:
diff changeset
41 public Element getDCFromIndexMeta() {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
42 //Verbinde mit metadataprovider
90a19cbda471 first ingest
dwinter
parents:
diff changeset
43 XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
44
90a19cbda471 first ingest
dwinter
parents:
diff changeset
45
90a19cbda471 first ingest
dwinter
parents:
diff changeset
46 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
47 config.setServerURL(new URL(converterProvider));
90a19cbda471 first ingest
dwinter
parents:
diff changeset
48 } catch (MalformedURLException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
49 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
50 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
51 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
52 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
53
90a19cbda471 first ingest
dwinter
parents:
diff changeset
54 XmlRpcClient client = new XmlRpcClient();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
55 client.setConfig(config);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
56 Object[] params = new Object[] { metaDataLink };
90a19cbda471 first ingest
dwinter
parents:
diff changeset
57
90a19cbda471 first ingest
dwinter
parents:
diff changeset
58 String result;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
59 try {
2
dwinter
parents: 0
diff changeset
60 Object res = client.execute("getDCDataFromPath", params);
dwinter
parents: 0
diff changeset
61 result = (String) res;
dwinter
parents: 0
diff changeset
62
0
90a19cbda471 first ingest
dwinter
parents:
diff changeset
63 } catch (XmlRpcException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
64 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
65 logger.error("CANNOT connector to:"+converterProvider);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
66 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
67 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
68 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
69 logger.debug("dC:"+result);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
70
90a19cbda471 first ingest
dwinter
parents:
diff changeset
71 SAXBuilder db = new SAXBuilder();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
72 InputStream resultStream;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
73 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
74 resultStream = new ByteArrayInputStream(result.getBytes("utf-8"));
90a19cbda471 first ingest
dwinter
parents:
diff changeset
75 } catch (UnsupportedEncodingException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
76 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
77 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
78 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
79 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
80 Document dc;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
81 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
82 dc = db.build(resultStream);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
83 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
84 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
85 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
86 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
87 } catch (IOException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
88 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
89 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
90 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
91 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
92
90a19cbda471 first ingest
dwinter
parents:
diff changeset
93 return dc.getRootElement();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
94 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
95
90a19cbda471 first ingest
dwinter
parents:
diff changeset
96 public Element getContentOfMetaTag() {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
97 SAXBuilder db = new SAXBuilder();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
98
90a19cbda471 first ingest
dwinter
parents:
diff changeset
99 Document dc;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
100 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
101 dc = db.build(metaDataLink);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
102 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
103 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
104 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
105 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
106 } catch (IOException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
107 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
108 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
109 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
110 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
111
90a19cbda471 first ingest
dwinter
parents:
diff changeset
112 Element retElement = null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
113 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
114 XPath xp = getXPath("//meta");
90a19cbda471 first ingest
dwinter
parents:
diff changeset
115 retElement = (Element) xp.selectSingleNode(dc);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
116
90a19cbda471 first ingest
dwinter
parents:
diff changeset
117 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
118 //nothing to be done
90a19cbda471 first ingest
dwinter
parents:
diff changeset
119 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
120 if (retElement==null) { // versuche noch mal mit namespace
90a19cbda471 first ingest
dwinter
parents:
diff changeset
121 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
122
90a19cbda471 first ingest
dwinter
parents:
diff changeset
123 XPath xp = getXPath("//mpiwg:meta");
90a19cbda471 first ingest
dwinter
parents:
diff changeset
124 retElement = (Element) xp.selectSingleNode(dc);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
125
90a19cbda471 first ingest
dwinter
parents:
diff changeset
126 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
127 //nothing to be done
90a19cbda471 first ingest
dwinter
parents:
diff changeset
128 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
129
90a19cbda471 first ingest
dwinter
parents:
diff changeset
130 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
131 return retElement;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
132 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
133
90a19cbda471 first ingest
dwinter
parents:
diff changeset
134
90a19cbda471 first ingest
dwinter
parents:
diff changeset
135 private XPath getXPath(String xpath) throws JDOMException{
90a19cbda471 first ingest
dwinter
parents:
diff changeset
136 XPath xpathResources = XPath.newInstance(xpath);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
137
90a19cbda471 first ingest
dwinter
parents:
diff changeset
138 xpathResources.addNamespace("mpiwg",MPIWG);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
139
90a19cbda471 first ingest
dwinter
parents:
diff changeset
140 return xpathResources;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
141 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
142
90a19cbda471 first ingest
dwinter
parents:
diff changeset
143 public String getImagePathFromIndexMeta() {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
144 SAXBuilder db = new SAXBuilder();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
145
90a19cbda471 first ingest
dwinter
parents:
diff changeset
146 Document dc;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
147 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
148 dc = db.build(metaDataLink);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
149 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
150 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
151 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
152 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
153 } catch (IOException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
154 // TODO Auto-generated catch block
90a19cbda471 first ingest
dwinter
parents:
diff changeset
155 e.printStackTrace();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
156 return null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
157 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
158
90a19cbda471 first ingest
dwinter
parents:
diff changeset
159 Element retElement = null;
90a19cbda471 first ingest
dwinter
parents:
diff changeset
160 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
161 XPath xp = getXPath("//texttool/image");
90a19cbda471 first ingest
dwinter
parents:
diff changeset
162 retElement = (Element) xp.selectSingleNode(dc);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
163
90a19cbda471 first ingest
dwinter
parents:
diff changeset
164 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
165 //nothing to be done
90a19cbda471 first ingest
dwinter
parents:
diff changeset
166 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
167 if (retElement==null) { // versuche noch mal mit namespace
90a19cbda471 first ingest
dwinter
parents:
diff changeset
168 try {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
169 XPath xp = getXPath("//mpiwg:texttool/mpiwg:image");
90a19cbda471 first ingest
dwinter
parents:
diff changeset
170 retElement = (Element) xp.selectSingleNode(dc);
90a19cbda471 first ingest
dwinter
parents:
diff changeset
171
90a19cbda471 first ingest
dwinter
parents:
diff changeset
172 } catch (JDOMException e) {
90a19cbda471 first ingest
dwinter
parents:
diff changeset
173 //nothing to be done
90a19cbda471 first ingest
dwinter
parents:
diff changeset
174 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
175
90a19cbda471 first ingest
dwinter
parents:
diff changeset
176 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
177 if (retElement==null)
90a19cbda471 first ingest
dwinter
parents:
diff changeset
178 return "";
90a19cbda471 first ingest
dwinter
parents:
diff changeset
179 return retElement.getTextTrim();
90a19cbda471 first ingest
dwinter
parents:
diff changeset
180 }
90a19cbda471 first ingest
dwinter
parents:
diff changeset
181
90a19cbda471 first ingest
dwinter
parents:
diff changeset
182 }