Mercurial > hg > digilib-old
annotate common/src/main/java/digilib/meta/XMLMetaLoader.java @ 1158:2ee261676828 default tip
better out-of-the box experience:
* digilib works without config files using sensible defaults
* new sample images folder used by default
* config files moved to templates
| author | robcast |
|---|---|
| date | Tue, 19 Feb 2013 17:32:25 +0100 |
| parents | 7779b37d1d05 |
| children |
| rev | line source |
|---|---|
| 130 | 1 /* XMLMetaLoader -- Load an XML format metadata into a Hashtable |
| 2 | |
| 3 Digital Image Library servlet components | |
| 4 | |
| 5 Copyright (C) 2003 Robert Casties (robcast@mail.berlios.de) | |
| 6 | |
| 7 This program is free software; you can redistribute it and/or modify it | |
| 8 under the terms of the GNU General Public License as published by the | |
| 9 Free Software Foundation; either version 2 of the License, or (at your | |
| 10 option) any later version. | |
| 11 | |
| 12 Please read license.txt for the full details. A copy of the GPL | |
| 13 may be found at http://www.gnu.org/copyleft/lgpl.html | |
| 14 | |
| 15 You should have received a copy of the GNU General Public License | |
| 16 along with this program; if not, write to the Free Software | |
| 17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
| 18 | |
| 19 */ | |
| 20 | |
| 590 | 21 package digilib.meta; |
| 130 | 22 |
| 23 import java.io.IOException; | |
| 24 import java.util.HashMap; | |
| 25 import java.util.LinkedList; | |
| 273 | 26 import java.util.Map; |
| 130 | 27 |
| 28 import javax.xml.parsers.ParserConfigurationException; | |
| 29 import javax.xml.parsers.SAXParser; | |
| 30 import javax.xml.parsers.SAXParserFactory; | |
| 31 | |
| 181 | 32 import org.apache.log4j.Logger; |
| 130 | 33 import org.xml.sax.Attributes; |
| 34 import org.xml.sax.SAXException; | |
| 35 import org.xml.sax.helpers.DefaultHandler; | |
| 36 | |
| 37 public class XMLMetaLoader { | |
| 38 | |
| 181 | 39 private Logger logger = Logger.getLogger(this.getClass()); |
| 130 | 40 private String metaTag = "meta"; |
| 41 private String fileTag = "file"; | |
| 42 private String fileNameTag = "name"; | |
| 43 private String filePathTag = "path"; | |
| 233 | 44 private String imgTag = "img"; |
| 45 private String collectTag = "context"; | |
| 130 | 46 |
| 47 public XMLMetaLoader() { | |
| 48 } | |
| 49 | |
| 50 /** | |
| 51 * inner class XMLMetaParser to be called by the parser | |
| 52 */ | |
| 53 private class XMLMetaParser extends DefaultHandler { | |
| 54 | |
| 531 | 55 private LinkedList<String> tags; |
| 56 private Map<String, MetadataMap> files; | |
| 57 private MetadataMap meta; | |
| 130 | 58 private StringBuffer content; |
| 233 | 59 private boolean collecting; |
| 60 private StringBuffer collectedContent; | |
| 130 | 61 private String fileName; |
| 62 private String filePath; | |
| 63 | |
|
139
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
64 /** |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
65 * extracts the elements name from either localName ln or qName qn. |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
66 * |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
67 * @param ln localName |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
68 * @param qn qName |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
69 * @return element name |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
70 */ |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
71 private String getName(String ln, String qn) { |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
72 if (ln != null) { |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
73 if (ln.length() > 0) { |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
74 return ln; |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
75 } |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
76 } |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
77 // else it's qName (or nothing) |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
78 return qn; |
|
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
79 } |
| 130 | 80 |
| 233 | 81 /** |
| 82 * returns all attributes as a String | |
| 83 * | |
| 84 * @param attrs | |
| 85 * @return | |
| 86 */ | |
| 87 private String getAttrString(Attributes attrs) { | |
| 88 StringBuffer s = new StringBuffer(); | |
| 89 for (int i = 0; i < attrs.getLength(); i++) { | |
| 90 String key = getName(attrs.getLocalName(i), attrs.getQName(i)); | |
| 246 | 91 s.append(" "+key+"=\""+attrs.getValue(i)+"\""); |
| 233 | 92 } |
| 93 return s.toString(); | |
| 94 } | |
| 95 | |
| 96 | |
| 130 | 97 // Parser calls this once at the beginning of a document |
| 98 public void startDocument() throws SAXException { | |
| 531 | 99 tags = new LinkedList<String>(); |
| 100 files = new HashMap<String, MetadataMap>(); | |
| 233 | 101 collecting = false; |
| 102 collectedContent = null; | |
| 130 | 103 } |
| 104 | |
| 105 // Parser calls this for each element in a document | |
| 106 public void startElement( | |
| 107 String namespaceURI, | |
| 108 String localName, | |
| 109 String qName, | |
| 110 Attributes atts) | |
| 111 throws SAXException { | |
| 112 | |
|
139
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
113 String name = getName(localName, qName); |
| 130 | 114 // open a new tag |
| 115 tags.addLast(name); | |
| 116 // start new content (no nesting of tags and content) | |
| 117 content = new StringBuffer(); | |
| 118 | |
| 119 if (name.equals(metaTag)) { | |
| 120 // new meta tag | |
| 531 | 121 meta = new MetadataMap(); |
| 233 | 122 collectedContent = new StringBuffer(); |
| 130 | 123 } else if (name.equals(fileTag)) { |
| 124 // new file tag | |
| 125 fileName = null; | |
| 126 filePath = null; | |
| 531 | 127 meta = new MetadataMap(); |
| 233 | 128 collectedContent = new StringBuffer(); |
| 129 } else if (name.equals(collectTag)) { | |
| 130 // start collecting | |
| 131 collecting = true; | |
| 132 if (collectedContent == null) { | |
| 133 collectedContent = new StringBuffer(); | |
| 134 } | |
| 135 } | |
| 136 | |
| 137 // record mode | |
| 138 if (collecting) { | |
| 139 collectedContent.append("<"+name); | |
| 140 collectedContent.append(getAttrString(atts)); | |
| 141 collectedContent.append(">"); | |
| 130 | 142 } |
| 143 } | |
| 144 | |
| 145 // parser calls this for all tag content (possibly more than once) | |
| 146 public void characters(char[] ch, int start, int length) | |
| 147 throws SAXException { | |
| 148 // append data to current string buffer | |
| 233 | 149 if (content == null) { |
| 150 content = new StringBuffer(); | |
| 151 } | |
| 130 | 152 content.append(ch, start, length); |
| 153 } | |
| 154 | |
| 155 // parser calls this at the end of each element | |
| 156 public void endElement( | |
| 157 String namespaceURI, | |
| 158 String localName, | |
| 159 String qName) | |
| 160 throws SAXException { | |
| 151 | 161 |
|
139
11cfe4c89fdc
Servlet version 1.11b1 with improved original-size.
robcast
parents:
130
diff
changeset
|
162 String name = getName(localName, qName); |
| 130 | 163 // exit the tag |
| 164 tags.removeLast(); | |
| 531 | 165 String lastTag = (tags.isEmpty()) ? "" : tags.getLast(); |
| 151 | 166 |
| 167 // was it a file/name tag? | |
| 233 | 168 if (name.equals(fileNameTag) && lastTag.equals(fileTag)) { |
| 130 | 169 // save name as filename |
| 151 | 170 if ((content != null) && (content.length() > 0)) { |
| 171 fileName = content.toString().trim(); | |
| 130 | 172 } |
| 233 | 173 content = null; |
| 130 | 174 return; |
| 175 } | |
| 176 | |
| 151 | 177 // was it a file/path tag? |
| 233 | 178 if (name.equals(filePathTag) && lastTag.equals(fileTag)) { |
| 130 | 179 // save path as filepath |
| 151 | 180 if ((content != null) && (content.length() > 0)) { |
| 181 filePath = content.toString().trim(); | |
| 130 | 182 } |
| 233 | 183 content = null; |
| 130 | 184 return; |
| 185 } | |
| 186 | |
| 187 // was it a file tag? | |
| 188 if (name.equals(fileTag)) { | |
| 189 // is there meta to save? | |
| 151 | 190 if ((meta != null) && (meta.size() > 0)) { |
| 191 // file name is (optional file/path) / file/name | |
| 130 | 192 String fn = null; |
| 151 | 193 |
| 194 if (fileName != null) { | |
| 195 if (filePath != null) { | |
| 196 fn = filePath + "/" + fileName; | |
| 197 } else { | |
| 198 fn = fileName; | |
| 199 } | |
| 130 | 200 } else { |
| 201 // no file name, no file | |
| 233 | 202 content = null; |
| 130 | 203 return; |
| 204 } | |
| 205 // save meta in file list | |
| 206 files.put(fn, meta); | |
| 207 } | |
| 233 | 208 content = null; |
| 130 | 209 return; |
| 210 } | |
| 211 | |
| 212 // was it a meta tag outside a file tag? | |
| 213 if (name.equals(metaTag) && !tags.contains(fileTag)) { | |
| 214 // save meta as dir meta | |
| 151 | 215 if ((meta != null) && (meta.size() > 0)) { |
| 130 | 216 files.put("", meta); |
| 217 } | |
| 233 | 218 content = null; |
| 130 | 219 return; |
| 220 } | |
| 221 | |
| 233 | 222 // is this inside an digilib info (=img) tag? |
| 223 if (lastTag.equals(imgTag)) { | |
| 130 | 224 // then add whatever this is |
| 151 | 225 if ((content != null) && (content.length() > 0)) { |
| 226 meta.put(name, content.toString().trim()); | |
| 130 | 227 } |
| 233 | 228 content = null; |
| 229 return; | |
| 130 | 230 } |
| 151 | 231 |
| 233 | 232 // is this the end of collectTag? |
| 233 if (name.equals(collectTag)) { | |
| 234 collecting = false; | |
| 235 collectedContent.append("</"+collectTag+">\n"); | |
| 236 // store collected stuff | |
| 237 meta.put(collectTag, collectedContent.toString()); | |
| 238 //logger.debug("collected: '"+collectedContent+"'"); | |
| 239 content = null; | |
| 240 return; | |
| 241 } | |
| 242 | |
| 243 // write collected content | |
| 244 if (collecting) { | |
| 245 String s = ""; | |
| 246 if ((content != null) && (content.length() > 0)) { | |
| 247 s = content.toString().trim(); | |
| 248 } | |
| 249 //logger.debug("collect:"+name+" = "+s); | |
| 250 collectedContent.append(s); | |
| 251 collectedContent.append("</"+name+">\n"); | |
| 252 content = null; | |
| 253 return; | |
| 254 } | |
| 130 | 255 } |
| 256 | |
| 257 } | |
| 258 | |
| 259 /** | |
| 260 * load and parse a file (as URL) | |
| 261 * returns HashMap with list data | |
| 262 */ | |
| 531 | 263 public Map<String, MetadataMap> loadURL(String path) throws SAXException, IOException { |
| 195 | 264 logger.debug("loading meta: "+path); |
| 130 | 265 // Create a JAXP SAXParserFactory and configure it |
| 266 SAXParserFactory spf = SAXParserFactory.newInstance(); | |
| 267 spf.setNamespaceAware(true); | |
| 268 | |
| 269 SAXParser parser = null; | |
| 270 try { | |
| 271 // Create a JAXP SAXParser | |
| 272 parser = spf.newSAXParser(); | |
| 273 | |
| 274 } catch (ParserConfigurationException e) { | |
| 275 throw new SAXException(e); | |
| 276 } | |
| 277 | |
| 278 // create a list parser (keeps the data!) | |
| 279 XMLMetaParser listParser = new XMLMetaParser(); | |
| 280 | |
| 281 // Tell the SAXParser to parse the XML document | |
| 282 parser.parse(path, listParser); | |
| 283 | |
| 284 return listParser.files; | |
| 285 } | |
| 286 | |
| 287 } |
