Mercurial > hg > digilib-old
annotate common/src/main/java/digilib/meta/XMLMetaLoader.java @ 948:cd8c1fe97607
small cleanups.
author | robcast |
---|---|
date | Mon, 02 Jan 2012 22:30:50 +0100 |
parents | 7779b37d1d05 |
children |
rev | line source |
---|---|
130 | 1 /* XMLMetaLoader -- Load an XML format metadata into a Hashtable |
2 | |
3 Digital Image Library servlet components | |
4 | |
5 Copyright (C) 2003 Robert Casties (robcast@mail.berlios.de) | |
6 | |
7 This program is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by the | |
9 Free Software Foundation; either version 2 of the License, or (at your | |
10 option) any later version. | |
11 | |
12 Please read license.txt for the full details. A copy of the GPL | |
13 may be found at http://www.gnu.org/copyleft/lgpl.html | |
14 | |
15 You should have received a copy of the GNU General Public License | |
16 along with this program; if not, write to the Free Software | |
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
18 | |
19 */ | |
20 | |
590 | 21 package digilib.meta; |
130 | 22 |
23 import java.io.IOException; | |
24 import java.util.HashMap; | |
25 import java.util.LinkedList; | |
273 | 26 import java.util.Map; |
130 | 27 |
28 import javax.xml.parsers.ParserConfigurationException; | |
29 import javax.xml.parsers.SAXParser; | |
30 import javax.xml.parsers.SAXParserFactory; | |
31 | |
181 | 32 import org.apache.log4j.Logger; |
130 | 33 import org.xml.sax.Attributes; |
34 import org.xml.sax.SAXException; | |
35 import org.xml.sax.helpers.DefaultHandler; | |
36 | |
37 public class XMLMetaLoader { | |
38 | |
181 | 39 private Logger logger = Logger.getLogger(this.getClass()); |
130 | 40 private String metaTag = "meta"; |
41 private String fileTag = "file"; | |
42 private String fileNameTag = "name"; | |
43 private String filePathTag = "path"; | |
233 | 44 private String imgTag = "img"; |
45 private String collectTag = "context"; | |
130 | 46 |
47 public XMLMetaLoader() { | |
48 } | |
49 | |
50 /** | |
51 * inner class XMLMetaParser to be called by the parser | |
52 */ | |
53 private class XMLMetaParser extends DefaultHandler { | |
54 | |
531 | 55 private LinkedList<String> tags; |
56 private Map<String, MetadataMap> files; | |
57 private MetadataMap meta; | |
130 | 58 private StringBuffer content; |
233 | 59 private boolean collecting; |
60 private StringBuffer collectedContent; | |
130 | 61 private String fileName; |
62 private String filePath; | |
63 | |
139
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
64 /** |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
65 * extracts the elements name from either localName ln or qName qn. |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
66 * |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
67 * @param ln localName |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
68 * @param qn qName |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
69 * @return element name |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
70 */ |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
71 private String getName(String ln, String qn) { |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
72 if (ln != null) { |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
73 if (ln.length() > 0) { |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
74 return ln; |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
75 } |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
76 } |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
77 // else it's qName (or nothing) |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
78 return qn; |
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
79 } |
130 | 80 |
233 | 81 /** |
82 * returns all attributes as a String | |
83 * | |
84 * @param attrs | |
85 * @return | |
86 */ | |
87 private String getAttrString(Attributes attrs) { | |
88 StringBuffer s = new StringBuffer(); | |
89 for (int i = 0; i < attrs.getLength(); i++) { | |
90 String key = getName(attrs.getLocalName(i), attrs.getQName(i)); | |
246 | 91 s.append(" "+key+"=\""+attrs.getValue(i)+"\""); |
233 | 92 } |
93 return s.toString(); | |
94 } | |
95 | |
96 | |
130 | 97 // Parser calls this once at the beginning of a document |
98 public void startDocument() throws SAXException { | |
531 | 99 tags = new LinkedList<String>(); |
100 files = new HashMap<String, MetadataMap>(); | |
233 | 101 collecting = false; |
102 collectedContent = null; | |
130 | 103 } |
104 | |
105 // Parser calls this for each element in a document | |
106 public void startElement( | |
107 String namespaceURI, | |
108 String localName, | |
109 String qName, | |
110 Attributes atts) | |
111 throws SAXException { | |
112 | |
139
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
113 String name = getName(localName, qName); |
130 | 114 // open a new tag |
115 tags.addLast(name); | |
116 // start new content (no nesting of tags and content) | |
117 content = new StringBuffer(); | |
118 | |
119 if (name.equals(metaTag)) { | |
120 // new meta tag | |
531 | 121 meta = new MetadataMap(); |
233 | 122 collectedContent = new StringBuffer(); |
130 | 123 } else if (name.equals(fileTag)) { |
124 // new file tag | |
125 fileName = null; | |
126 filePath = null; | |
531 | 127 meta = new MetadataMap(); |
233 | 128 collectedContent = new StringBuffer(); |
129 } else if (name.equals(collectTag)) { | |
130 // start collecting | |
131 collecting = true; | |
132 if (collectedContent == null) { | |
133 collectedContent = new StringBuffer(); | |
134 } | |
135 } | |
136 | |
137 // record mode | |
138 if (collecting) { | |
139 collectedContent.append("<"+name); | |
140 collectedContent.append(getAttrString(atts)); | |
141 collectedContent.append(">"); | |
130 | 142 } |
143 } | |
144 | |
145 // parser calls this for all tag content (possibly more than once) | |
146 public void characters(char[] ch, int start, int length) | |
147 throws SAXException { | |
148 // append data to current string buffer | |
233 | 149 if (content == null) { |
150 content = new StringBuffer(); | |
151 } | |
130 | 152 content.append(ch, start, length); |
153 } | |
154 | |
155 // parser calls this at the end of each element | |
156 public void endElement( | |
157 String namespaceURI, | |
158 String localName, | |
159 String qName) | |
160 throws SAXException { | |
151 | 161 |
139
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
162 String name = getName(localName, qName); |
130 | 163 // exit the tag |
164 tags.removeLast(); | |
531 | 165 String lastTag = (tags.isEmpty()) ? "" : tags.getLast(); |
151 | 166 |
167 // was it a file/name tag? | |
233 | 168 if (name.equals(fileNameTag) && lastTag.equals(fileTag)) { |
130 | 169 // save name as filename |
151 | 170 if ((content != null) && (content.length() > 0)) { |
171 fileName = content.toString().trim(); | |
130 | 172 } |
233 | 173 content = null; |
130 | 174 return; |
175 } | |
176 | |
151 | 177 // was it a file/path tag? |
233 | 178 if (name.equals(filePathTag) && lastTag.equals(fileTag)) { |
130 | 179 // save path as filepath |
151 | 180 if ((content != null) && (content.length() > 0)) { |
181 filePath = content.toString().trim(); | |
130 | 182 } |
233 | 183 content = null; |
130 | 184 return; |
185 } | |
186 | |
187 // was it a file tag? | |
188 if (name.equals(fileTag)) { | |
189 // is there meta to save? | |
151 | 190 if ((meta != null) && (meta.size() > 0)) { |
191 // file name is (optional file/path) / file/name | |
130 | 192 String fn = null; |
151 | 193 |
194 if (fileName != null) { | |
195 if (filePath != null) { | |
196 fn = filePath + "/" + fileName; | |
197 } else { | |
198 fn = fileName; | |
199 } | |
130 | 200 } else { |
201 // no file name, no file | |
233 | 202 content = null; |
130 | 203 return; |
204 } | |
205 // save meta in file list | |
206 files.put(fn, meta); | |
207 } | |
233 | 208 content = null; |
130 | 209 return; |
210 } | |
211 | |
212 // was it a meta tag outside a file tag? | |
213 if (name.equals(metaTag) && !tags.contains(fileTag)) { | |
214 // save meta as dir meta | |
151 | 215 if ((meta != null) && (meta.size() > 0)) { |
130 | 216 files.put("", meta); |
217 } | |
233 | 218 content = null; |
130 | 219 return; |
220 } | |
221 | |
233 | 222 // is this inside an digilib info (=img) tag? |
223 if (lastTag.equals(imgTag)) { | |
130 | 224 // then add whatever this is |
151 | 225 if ((content != null) && (content.length() > 0)) { |
226 meta.put(name, content.toString().trim()); | |
130 | 227 } |
233 | 228 content = null; |
229 return; | |
130 | 230 } |
151 | 231 |
233 | 232 // is this the end of collectTag? |
233 if (name.equals(collectTag)) { | |
234 collecting = false; | |
235 collectedContent.append("</"+collectTag+">\n"); | |
236 // store collected stuff | |
237 meta.put(collectTag, collectedContent.toString()); | |
238 //logger.debug("collected: '"+collectedContent+"'"); | |
239 content = null; | |
240 return; | |
241 } | |
242 | |
243 // write collected content | |
244 if (collecting) { | |
245 String s = ""; | |
246 if ((content != null) && (content.length() > 0)) { | |
247 s = content.toString().trim(); | |
248 } | |
249 //logger.debug("collect:"+name+" = "+s); | |
250 collectedContent.append(s); | |
251 collectedContent.append("</"+name+">\n"); | |
252 content = null; | |
253 return; | |
254 } | |
130 | 255 } |
256 | |
257 } | |
258 | |
259 /** | |
260 * load and parse a file (as URL) | |
261 * returns HashMap with list data | |
262 */ | |
531 | 263 public Map<String, MetadataMap> loadURL(String path) throws SAXException, IOException { |
195 | 264 logger.debug("loading meta: "+path); |
130 | 265 // Create a JAXP SAXParserFactory and configure it |
266 SAXParserFactory spf = SAXParserFactory.newInstance(); | |
267 spf.setNamespaceAware(true); | |
268 | |
269 SAXParser parser = null; | |
270 try { | |
271 // Create a JAXP SAXParser | |
272 parser = spf.newSAXParser(); | |
273 | |
274 } catch (ParserConfigurationException e) { | |
275 throw new SAXException(e); | |
276 } | |
277 | |
278 // create a list parser (keeps the data!) | |
279 XMLMetaParser listParser = new XMLMetaParser(); | |
280 | |
281 // Tell the SAXParser to parse the XML document | |
282 parser.parse(path, listParser); | |
283 | |
284 return listParser.files; | |
285 } | |
286 | |
287 } |