diff software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/ArchimedesDocForeignLangContentHandler.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/ArchimedesDocForeignLangContentHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,176 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.doc;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.xml.sax.*;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.general.Transcoder;
+import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars;
+
+public class ArchimedesDocForeignLangContentHandler implements ContentHandler {
+  private String xmlnsString = "";
+  private File outputFile;
+  private OutputStream out;
+  private Element currentElement;
+  private boolean currentElementIsForeign = false;
+  
+  public ArchimedesDocForeignLangContentHandler(File outputFile) throws ApplicationException {
+    this.outputFile = outputFile;
+  }
+  
+  public void startDocument() throws SAXException {
+    try {
+      out = new BufferedOutputStream(new FileOutputStream(outputFile));
+      write("<?xml version=\"1.0\"?>\n");
+    } catch (FileNotFoundException e) {
+      throw new SAXException(e);
+    }
+  }
+
+  public void endDocument() throws SAXException {
+    try { 
+      if (out != null)
+        out.close(); 
+    } catch (Exception e) { 
+        // nothing: always close the stream at the end of the method
+    }  
+  }
+  
+  public void characters(char[] c, int start, int length) throws SAXException {
+    char[] cCopy = new char[length];
+    System.arraycopy(c, start, cCopy, 0, length);
+    String charactersStr = String.valueOf(cCopy);
+    if (charactersStr != null) {
+      String elemName = null;
+      if (currentElement != null)
+        elemName = currentElement.name;
+      if ((! isArchMetadata(elemName)) && (currentElementIsForeign)) {
+        try {
+          charactersStr = transcodeFromBetaCode2Unicode(charactersStr);
+        } catch (ApplicationException e) {
+          throw new SAXException(e);
+        }
+      }
+      charactersStr = StringUtilEscapeChars.forXML(charactersStr);
+      if (currentElement != null)
+        currentElement.value = charactersStr;
+      write(charactersStr);
+    }
+  }
+
+  public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
+  }
+
+  public void processingInstruction(String target, String data) throws SAXException {
+    
+  }
+
+  public void setDocumentLocator(Locator locator) {
+  }
+
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+    xmlnsString += "xmlns:" + prefix + "=\"" + uri + "\" ";
+  }
+  
+  public void endPrefixMapping(String prefix) throws SAXException {
+  }
+
+  public void skippedEntity(String name) throws SAXException {
+  }
+
+  public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
+    Element newElement = new Element(null, name);
+    if (currentElement != null) {
+      if (currentElement.language != null)
+        newElement.language = currentElement.language;  // language wird an Kinder vererbt
+    }
+    currentElement = newElement;
+    int attrSize = attrs.getLength();
+    String attrString = "";
+    for (int i=0; i<attrSize; i++) {
+      String attrQName = attrs.getQName(i);
+      String attrValue = attrs.getValue(i);
+      attrValue = StringUtilEscapeChars.forXML(attrValue);
+      if (attrQName != null && attrQName.equals("lang") && attrValue != null) {
+        currentElement.language = attrValue; // wenn xml:lang belegt ist, wird es an das neue Element gesetzt und überschreibt vom Vater geerbte Werte
+      }
+      // replace "lang=greek" by "lang=grc"
+      if (name.equals("foreign") && attrQName.equals("lang") && attrValue.equals("greek")) {
+        currentElementIsForeign = true;
+        attrString = attrString + " " + attrQName + "=\"" + "grc" + "\"";
+      } else {
+        attrString = attrString + " " + attrQName + "=\"" + attrValue + "\"";
+      }
+    }
+    currentElement.attrString = attrString;
+    if (xmlnsString.equals("")) {
+      write("<" + name + attrString + ">");
+    } else { 
+      currentElement.xmlnsString = xmlnsString;
+      write("<" + name + " " + xmlnsString + attrString + ">");
+    }
+    xmlnsString = "";
+  }
+
+  public void endElement(String uri, String localName, String name) throws SAXException {
+    if (name.equals("foreign"))
+      currentElementIsForeign = false;
+    currentElement = null;
+    write("</" + name + ">");
+  }
+
+  private void write(String outStr) throws SAXException {
+    try {
+      byte[] bytes = outStr.getBytes("utf-8");
+      out.write(bytes, 0, bytes.length);
+      out.flush();
+    } catch (IOException e) {
+      throw new SAXException(e);
+    }
+  }
+  
+  private String transcodeFromBetaCode2Unicode(String inputStr) throws ApplicationException {
+    Transcoder transcoder = Transcoder.getInstance();
+    String encodedUnicodeStr = transcoder.transcodeFromBetaCode2Unicode(inputStr);
+    return encodedUnicodeStr;
+  }
+  
+  private boolean isArchMetadata(String elemName) {
+    boolean isArchMetadata = false;
+    if (elemName == null)
+      return false;
+    String elName = elemName.toLowerCase().trim();
+    if (elName.equals("info") || elName.equals("author") || elName.equals("title") || elName.equals("date") || elName.equals("place") 
+        || elName.equals("translator") || elName.equals("lang") || elName.equals("cvs_file") || elName.equals("cvs_version") || elName.equals("comments") || elName.equals("locator")) {
+      isArchMetadata = true;
+    }
+    return isArchMetadata;
+  }
+
+  private class Element {
+    String name;
+    String language;
+    String xmlnsString;
+    String attrString;
+    String value;
+    
+    Element(String language, String name) {
+      this.language = language;
+      this.name = name;
+    }
+    
+    boolean isGreek() {
+      boolean isGreek = false;
+      if (language != null && (language.equals("el") || language.equals("greek") || language.equals("grc")))
+        isGreek = true;
+      return isGreek;
+    }
+
+  }
+}