130
|
1 /* XMLMetaLoader -- Load an XML format metadata into a Hashtable
|
|
2
|
|
3 Digital Image Library servlet components
|
|
4
|
|
5 Copyright (C) 2003 Robert Casties (robcast@mail.berlios.de)
|
|
6
|
|
7 This program is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by the
|
|
9 Free Software Foundation; either version 2 of the License, or (at your
|
|
10 option) any later version.
|
|
11
|
|
12 Please read license.txt for the full details. A copy of the GPL
|
|
13 may be found at http://www.gnu.org/copyleft/lgpl.html
|
|
14
|
|
15 You should have received a copy of the GNU General Public License
|
|
16 along with this program; if not, write to the Free Software
|
|
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
18
|
|
19 */
|
|
20
|
|
21 package digilib.io;
|
|
22
|
|
23 import java.io.IOException;
|
|
24 import java.util.HashMap;
|
|
25 import java.util.LinkedList;
|
273
|
26 import java.util.Map;
|
130
|
27
|
|
28 import javax.xml.parsers.ParserConfigurationException;
|
|
29 import javax.xml.parsers.SAXParser;
|
|
30 import javax.xml.parsers.SAXParserFactory;
|
|
31
|
181
|
32 import org.apache.log4j.Logger;
|
130
|
33 import org.xml.sax.Attributes;
|
|
34 import org.xml.sax.SAXException;
|
|
35 import org.xml.sax.helpers.DefaultHandler;
|
|
36
|
|
37 public class XMLMetaLoader {
|
|
38
|
181
|
39 private Logger logger = Logger.getLogger(this.getClass());
|
130
|
40 private String metaTag = "meta";
|
|
41 private String fileTag = "file";
|
|
42 private String fileNameTag = "name";
|
|
43 private String filePathTag = "path";
|
233
|
44 private String imgTag = "img";
|
|
45 private String collectTag = "context";
|
130
|
46
|
|
47 public XMLMetaLoader() {
|
|
48 }
|
|
49
|
|
50 /**
|
|
51 * inner class XMLMetaParser to be called by the parser
|
|
52 */
|
|
53 private class XMLMetaParser extends DefaultHandler {
|
|
54
|
531
|
55 private LinkedList<String> tags;
|
|
56 private Map<String, MetadataMap> files;
|
|
57 private MetadataMap meta;
|
130
|
58 private StringBuffer content;
|
233
|
59 private boolean collecting;
|
|
60 private StringBuffer collectedContent;
|
130
|
61 private String fileName;
|
|
62 private String filePath;
|
|
63
|
139
|
64 /**
|
|
65 * extracts the elements name from either localName ln or qName qn.
|
|
66 *
|
|
67 * @param ln localName
|
|
68 * @param qn qName
|
|
69 * @return element name
|
|
70 */
|
|
71 private String getName(String ln, String qn) {
|
|
72 if (ln != null) {
|
|
73 if (ln.length() > 0) {
|
|
74 return ln;
|
|
75 }
|
|
76 }
|
|
77 // else it's qName (or nothing)
|
|
78 return qn;
|
|
79 }
|
130
|
80
|
233
|
81 /**
|
|
82 * returns all attributes as a String
|
|
83 *
|
|
84 * @param attrs
|
|
85 * @return
|
|
86 */
|
|
87 private String getAttrString(Attributes attrs) {
|
|
88 StringBuffer s = new StringBuffer();
|
|
89 for (int i = 0; i < attrs.getLength(); i++) {
|
|
90 String key = getName(attrs.getLocalName(i), attrs.getQName(i));
|
246
|
91 s.append(" "+key+"=\""+attrs.getValue(i)+"\"");
|
233
|
92 }
|
|
93 return s.toString();
|
|
94 }
|
|
95
|
|
96
|
130
|
97 // Parser calls this once at the beginning of a document
|
|
98 public void startDocument() throws SAXException {
|
531
|
99 tags = new LinkedList<String>();
|
|
100 files = new HashMap<String, MetadataMap>();
|
233
|
101 collecting = false;
|
|
102 collectedContent = null;
|
130
|
103 }
|
|
104
|
|
105 // Parser calls this for each element in a document
|
|
106 public void startElement(
|
|
107 String namespaceURI,
|
|
108 String localName,
|
|
109 String qName,
|
|
110 Attributes atts)
|
|
111 throws SAXException {
|
|
112
|
139
|
113 String name = getName(localName, qName);
|
130
|
114 // open a new tag
|
|
115 tags.addLast(name);
|
|
116 // start new content (no nesting of tags and content)
|
|
117 content = new StringBuffer();
|
|
118
|
|
119 if (name.equals(metaTag)) {
|
|
120 // new meta tag
|
531
|
121 meta = new MetadataMap();
|
233
|
122 collectedContent = new StringBuffer();
|
130
|
123 } else if (name.equals(fileTag)) {
|
|
124 // new file tag
|
|
125 fileName = null;
|
|
126 filePath = null;
|
531
|
127 meta = new MetadataMap();
|
233
|
128 collectedContent = new StringBuffer();
|
|
129 } else if (name.equals(collectTag)) {
|
|
130 // start collecting
|
|
131 collecting = true;
|
|
132 if (collectedContent == null) {
|
|
133 collectedContent = new StringBuffer();
|
|
134 }
|
|
135 }
|
|
136
|
|
137 // record mode
|
|
138 if (collecting) {
|
|
139 collectedContent.append("<"+name);
|
|
140 collectedContent.append(getAttrString(atts));
|
|
141 collectedContent.append(">");
|
130
|
142 }
|
|
143 }
|
|
144
|
|
145 // parser calls this for all tag content (possibly more than once)
|
|
146 public void characters(char[] ch, int start, int length)
|
|
147 throws SAXException {
|
|
148 // append data to current string buffer
|
233
|
149 if (content == null) {
|
|
150 content = new StringBuffer();
|
|
151 }
|
130
|
152 content.append(ch, start, length);
|
|
153 }
|
|
154
|
|
155 // parser calls this at the end of each element
|
|
156 public void endElement(
|
|
157 String namespaceURI,
|
|
158 String localName,
|
|
159 String qName)
|
|
160 throws SAXException {
|
151
|
161
|
139
|
162 String name = getName(localName, qName);
|
130
|
163 // exit the tag
|
|
164 tags.removeLast();
|
531
|
165 String lastTag = (tags.isEmpty()) ? "" : tags.getLast();
|
151
|
166
|
|
167 // was it a file/name tag?
|
233
|
168 if (name.equals(fileNameTag) && lastTag.equals(fileTag)) {
|
130
|
169 // save name as filename
|
151
|
170 if ((content != null) && (content.length() > 0)) {
|
|
171 fileName = content.toString().trim();
|
130
|
172 }
|
233
|
173 content = null;
|
130
|
174 return;
|
|
175 }
|
|
176
|
151
|
177 // was it a file/path tag?
|
233
|
178 if (name.equals(filePathTag) && lastTag.equals(fileTag)) {
|
130
|
179 // save path as filepath
|
151
|
180 if ((content != null) && (content.length() > 0)) {
|
|
181 filePath = content.toString().trim();
|
130
|
182 }
|
233
|
183 content = null;
|
130
|
184 return;
|
|
185 }
|
|
186
|
|
187 // was it a file tag?
|
|
188 if (name.equals(fileTag)) {
|
|
189 // is there meta to save?
|
151
|
190 if ((meta != null) && (meta.size() > 0)) {
|
|
191 // file name is (optional file/path) / file/name
|
130
|
192 String fn = null;
|
151
|
193
|
|
194 if (fileName != null) {
|
|
195 if (filePath != null) {
|
|
196 fn = filePath + "/" + fileName;
|
|
197 } else {
|
|
198 fn = fileName;
|
|
199 }
|
130
|
200 } else {
|
|
201 // no file name, no file
|
233
|
202 content = null;
|
130
|
203 return;
|
|
204 }
|
|
205 // save meta in file list
|
|
206 files.put(fn, meta);
|
|
207 }
|
233
|
208 content = null;
|
130
|
209 return;
|
|
210 }
|
|
211
|
|
212 // was it a meta tag outside a file tag?
|
|
213 if (name.equals(metaTag) && !tags.contains(fileTag)) {
|
|
214 // save meta as dir meta
|
151
|
215 if ((meta != null) && (meta.size() > 0)) {
|
130
|
216 files.put("", meta);
|
|
217 }
|
233
|
218 content = null;
|
130
|
219 return;
|
|
220 }
|
|
221
|
233
|
222 // is this inside an digilib info (=img) tag?
|
|
223 if (lastTag.equals(imgTag)) {
|
130
|
224 // then add whatever this is
|
151
|
225 if ((content != null) && (content.length() > 0)) {
|
|
226 meta.put(name, content.toString().trim());
|
130
|
227 }
|
233
|
228 content = null;
|
|
229 return;
|
130
|
230 }
|
151
|
231
|
233
|
232 // is this the end of collectTag?
|
|
233 if (name.equals(collectTag)) {
|
|
234 collecting = false;
|
|
235 collectedContent.append("</"+collectTag+">\n");
|
|
236 // store collected stuff
|
|
237 meta.put(collectTag, collectedContent.toString());
|
|
238 //logger.debug("collected: '"+collectedContent+"'");
|
|
239 content = null;
|
|
240 return;
|
|
241 }
|
|
242
|
|
243 // write collected content
|
|
244 if (collecting) {
|
|
245 String s = "";
|
|
246 if ((content != null) && (content.length() > 0)) {
|
|
247 s = content.toString().trim();
|
|
248 }
|
|
249 //logger.debug("collect:"+name+" = "+s);
|
|
250 collectedContent.append(s);
|
|
251 collectedContent.append("</"+name+">\n");
|
|
252 content = null;
|
|
253 return;
|
|
254 }
|
130
|
255 }
|
|
256
|
|
257 }
|
|
258
|
|
259 /**
|
|
260 * load and parse a file (as URL)
|
|
261 * returns HashMap with list data
|
|
262 */
|
531
|
263 public Map<String, MetadataMap> loadURL(String path) throws SAXException, IOException {
|
195
|
264 logger.debug("loading meta: "+path);
|
130
|
265 // Create a JAXP SAXParserFactory and configure it
|
|
266 SAXParserFactory spf = SAXParserFactory.newInstance();
|
|
267 spf.setNamespaceAware(true);
|
|
268
|
|
269 SAXParser parser = null;
|
|
270 try {
|
|
271 // Create a JAXP SAXParser
|
|
272 parser = spf.newSAXParser();
|
|
273
|
|
274 } catch (ParserConfigurationException e) {
|
|
275 throw new SAXException(e);
|
|
276 }
|
|
277
|
|
278 // create a list parser (keeps the data!)
|
|
279 XMLMetaParser listParser = new XMLMetaParser();
|
|
280
|
|
281 // Tell the SAXParser to parse the XML document
|
|
282 parser.parse(path, listParser);
|
|
283
|
|
284 return listParser.files;
|
|
285 }
|
|
286
|
|
287 }
|