Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/ArchimedesDocContentHandler.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.lt.doc; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import org.xml.sax.*; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import de.mpg.mpiwg.berlin.mpdl.lt.general.Transcoder; import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars; public class ArchimedesDocContentHandler implements ContentHandler { private String xmlnsString = ""; private File outputFile; private String language; private String fromEncoding; private String toEncoding; private OutputStream out; private Element currentElement; public ArchimedesDocContentHandler(String language, String fromEncoding, String toEncoding, File outputFile) throws ApplicationException { this.language = language; this.outputFile = outputFile; this.fromEncoding = fromEncoding; this.toEncoding = toEncoding; } public void startDocument() throws SAXException { try { out = new BufferedOutputStream(new FileOutputStream(outputFile)); write("<?xml version=\"1.0\"?>\n"); } catch (FileNotFoundException e) { throw new SAXException(e); } } public void endDocument() throws SAXException { try { if (out != null) out.close(); } catch (Exception e) { // nothing: always close the stream at the end of the method } } public void characters(char[] c, int start, int length) throws SAXException { char[] cCopy = new char[length]; System.arraycopy(c, start, cCopy, 0, length); String charactersStr = String.valueOf(cCopy); if (charactersStr != null) { String elemName = null; if (currentElement != null) elemName = currentElement.name; if ((! isArchMetadata(elemName)) && (currentElement == null || currentElement.isGreek() || currentElement.isArabic())) { try { if (fromEncoding.equals("betacode") && toEncoding.equals("unicode")) { charactersStr = transcodeFromBetaCode2Unicode(charactersStr); } else if (fromEncoding.equals("buckwalter") && toEncoding.equals("unicode")) { charactersStr = transcodeFromBuckwalter2Unicode(charactersStr); } } catch (ApplicationException e) { throw new SAXException(e); } } charactersStr = StringUtilEscapeChars.forXML(charactersStr); if (currentElement != null) currentElement.value = charactersStr; write(charactersStr); } } public void ignorableWhitespace(char[] c, int start, int length) throws SAXException { } public void processingInstruction(String target, String data) throws SAXException { } public void setDocumentLocator(Locator locator) { } public void startPrefixMapping(String prefix, String uri) throws SAXException { xmlnsString += "xmlns:" + prefix + "=\"" + uri + "\" "; } public void endPrefixMapping(String prefix) throws SAXException { } public void skippedEntity(String name) throws SAXException { } public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException { currentElement = new Element(language, name); int attrSize = attrs.getLength(); String attrString = ""; for (int i=0; i<attrSize; i++) { String attrQName = attrs.getQName(i); String attrValue = attrs.getValue(i); attrValue = StringUtilEscapeChars.forXML(attrValue); attrString = attrString + " " + attrQName + "=\"" + attrValue + "\""; if (attrQName != null && attrQName.equals("lang") && attrValue != null) { currentElement.language = attrValue; } } currentElement.attrString = attrString; if (xmlnsString.equals("")) { write("<" + name + attrString + ">"); } else { currentElement.xmlnsString = xmlnsString; write("<" + name + " " + xmlnsString + attrString + ">"); } xmlnsString = ""; } public void endElement(String uri, String localName, String name) throws SAXException { currentElement = null; write("</" + name + ">"); } private void write(String outStr) throws SAXException { try { byte[] bytes = outStr.getBytes("utf-8"); out.write(bytes, 0, bytes.length); out.flush(); } catch (IOException e) { throw new SAXException(e); } } private String transcodeFromBetaCode2Unicode(String inputStr) throws ApplicationException { Transcoder transcoder = Transcoder.getInstance(); String encodedUnicodeStr = transcoder.transcodeFromBetaCode2Unicode(inputStr); return encodedUnicodeStr; } private String transcodeFromBuckwalter2Unicode(String inputStr) throws ApplicationException { Transcoder transcoder = Transcoder.getInstance(); String encodedUnicodeStr = transcoder.transcodeFromBuckwalter2Unicode(inputStr); return encodedUnicodeStr; } private boolean isArchMetadata(String elemName) { boolean isArchMetadata = false; if (elemName == null) return false; String elName = elemName.toLowerCase().trim(); if (elName.equals("info") || elName.equals("author") || elName.equals("title") || elName.equals("date") || elName.equals("place") || elName.equals("translator") || elName.equals("lang") || elName.equals("cvs_file") || elName.equals("cvs_version") || elName.equals("comments") || elName.equals("locator")) { isArchMetadata = true; } return isArchMetadata; } private class Element { String name; String language; String xmlnsString; String attrString; String value; Element(String language, String name) { this.language = language; this.name = name; } boolean isGreek() { boolean isGreek = false; if (language != null && language.equals("el")) isGreek = true; return isGreek; } boolean isArabic() { boolean isArabic = false; if (language != null && language.equals("ar")) isArabic = true; return isArabic; } } }