# HG changeset patch # User Josef Willenborg # Date 1303224686 -7200 # Node ID fba5577e49d96d4c9c786b40f88b1fdb75d9146d # Parent d6f528ad5d968918547c4ee97f2b4517f8b089de diverse Fehlerbehebungen diff -r d6f528ad5d96 -r fba5577e49d9 software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/externalObjects/app/ExtElement.java --- a/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/externalObjects/app/ExtElement.java Fri Mar 11 13:34:02 2011 +0100 +++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/externalObjects/app/ExtElement.java Tue Apr 19 16:51:26 2011 +0200 @@ -7,10 +7,8 @@ public class ExtElement extends ExtObject { private String pageNumber; - private String xmlNodeId; - private String before = "false"; - private String charPos; - private String xpath; + private String xpath; // path to element + private String point; // ".0", ".1" or a positive integer public ExtElement() { this.type = "element"; @@ -22,11 +20,19 @@ String uid = xmlUtil.evaluateToString(xmlStr, "/object/@uid", null); String dateStr = xmlUtil.evaluateToString(xmlStr, "/object/@modificationDate", null); String docId = xmlUtil.evaluateToString(xmlStr, "/object/@documentId", null); - String pageNumber = xmlUtil.evaluateToString(xmlStr, "/object/@pageNumber", null); - String xmlNodeId = xmlUtil.evaluateToString(xmlStr, "/object/@xmlNodeId", null); - String before = xmlUtil.evaluateToString(xmlStr, "/object/@before", null); - String charPos = xmlUtil.evaluateToString(xmlStr, "/object/@charPos", null); - String xpath = xmlUtil.evaluateToString(xmlStr, "/object/@xpath", null); + String xpointer = xmlUtil.evaluateToString(xmlStr, "/object/@xpointer", null); + String pageNumber = null; + String xpath = null; + String point = null; + if (xpointer != null) { + pageNumber = xpointer.replaceAll("#xpointer\\(id\\('page(.+)?'\\).*", "$1"); + if (xpointer.contains("point(")) { + xpath = xpointer.replaceAll("#xpointer\\(id\\('page.+?'\\)(.*)?/point\\(.+?\\)\\)", "$1"); + point = xpointer.replaceAll("#xpointer\\(id\\('page.+?'\\).*?/point\\((.+)?\\)\\)", "$1"); + } else { + xpath = xpointer.replaceAll("#xpointer\\(id\\('page.+?'\\)(.*)?.*?\\)", "$1"); + } + } String content = xmlUtil.evaluateToXmlString(xmlStr, "/object/content/*", null); Date modDate = xmlUtil.toDate(dateStr); ExtElement e = new ExtElement(); @@ -34,10 +40,8 @@ e.setModificationDate(modDate); e.setDocumentId(docId); e.setPageNumber(pageNumber); - e.setXmlNodeId(xmlNodeId); e.setXpath(xpath); - e.setBefore(before); - e.setCharPos(charPos); + e.setPoint(point); e.setContent(content); return e; } @@ -58,18 +62,17 @@ xmlString = xmlString + " uid=\"" + uid + "\""; if (documentId != null) xmlString = xmlString + " documentId=\"" + documentId + "\""; + if (xpath != null) + xmlString = xmlString + " xmlNodeId=\"" + xpath + "\""; if (pageNumber != null) - xmlString = xmlString + " pageNumber=\"" + pageNumber + "\""; - if (xmlNodeId != null) - xmlString = xmlString + " xmlNodeId=\"" + xmlNodeId + "\""; - if (before != null) - xmlString = xmlString + " before=\"" + before + "\""; - if (charPos != null) - xmlString = xmlString + " charPos=\"" + charPos + "\""; + xmlString = xmlString + " xpointer=\"#xpointer(id('page" + pageNumber + "')"; if (xpath != null) - xmlString = xmlString + " xpath=\"" + xpath + "\""; - xmlString = xmlString + ">"; + xmlString = xmlString + xpath; + if (point != null) + xmlString = xmlString + "/point(" + point + ")"; + xmlString = xmlString + ")\">"; if (content != null) { + // TODO wieder ausbauen // write the uid and modificationDate into the content node if (! content.contains("uid")) { int firstClose = content.indexOf(">"); @@ -90,20 +93,12 @@ this.xpath = xpath; } - public String getXmlNodeId() { - return xmlNodeId; + public String getPoint() { + return point; } - public void setXmlNodeId(String xmlNodeId) { - this.xmlNodeId = xmlNodeId; - } - - public String getCharPos() { - return charPos; - } - - public void setCharPos(String charPos) { - this.charPos = charPos; + public void setPoint(String point) { + this.point = point; } public String getPageNumber() { @@ -114,19 +109,4 @@ this.pageNumber = pageNumber; } - public boolean isBefore() { - if (before != null && before.equals("true")) - return true; - else - return false; - } - - public String getBefore() { - return before; - } - - public void setBefore(String before) { - this.before = before; - } - } diff -r d6f528ad5d96 -r fba5577e49d9 software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/externalObjects/app/ExternalObjectsHandler.java --- a/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/externalObjects/app/ExternalObjectsHandler.java Fri Mar 11 13:34:02 2011 +0100 +++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/externalObjects/app/ExternalObjectsHandler.java Tue Apr 19 16:51:26 2011 +0200 @@ -102,7 +102,7 @@ String docId = element.getDocumentId(); String pageNumber = element.getPageNumber(); String uid = element.getUid(); - String xmlNodeId = element.getXmlNodeId(); + String xpath = element.getXpath(); String hashKey = docId + "###" + pageNumber; boolean updated = false; try { @@ -118,8 +118,8 @@ String foundValueStr = new String(foundValueBytes, "utf-8"); ExtElement elem = ExtElement.parseXmlStr(foundValueStr); String elemUid = elem.getUid(); - String elemXmlNodeId = elem.getXmlNodeId(); - if (uid.equals(elemUid) && xmlNodeId.equals(elemXmlNodeId)) { + String elemXPath = elem.getXpath(); + if (uid.equals(elemUid) && xpath.equals(elemXPath)) { cursor.delete(); byte[] elementXmlStrBytes = elementXmlStr.getBytes("utf-8"); DatabaseEntry dbEntryValue = new DatabaseEntry(elementXmlStrBytes); @@ -143,7 +143,7 @@ String docId = element.getDocumentId(); String pageNumber = element.getPageNumber(); String uid = element.getUid(); - String xmlNodeId = element.getXmlNodeId(); + String xpath = element.getXpath(); String hashKey = docId + "###" + pageNumber; try { Database elementDB = dbEnvExternalObjects.getElementDB(); @@ -158,8 +158,8 @@ String foundValueStr = new String(foundValueBytes, "utf-8"); ExtElement elem = ExtElement.parseXmlStr(foundValueStr); String elemUid = elem.getUid(); - String elemXmlNodeId = elem.getXmlNodeId(); - if (uid.equals(elemUid) && xmlNodeId.equals(elemXmlNodeId)) { + String elemXPath = elem.getXpath(); + if (uid.equals(elemUid) && xpath.equals(elemXPath)) { cursor.delete(); } operationStatus = cursor.getNextDup(dbEntryKey, foundValue, LockMode.DEFAULT); @@ -204,15 +204,15 @@ private void test(ExtElement element) throws ApplicationException { String uid = element.getUid(); String docId = element.getDocumentId(); - String xmlNodeId = element.getXmlNodeId(); + String xpath = element.getXpath(); String pageNumber = element.getPageNumber(); String xmlStr = element.getXmlString(); if (uid == null) throw new ApplicationException("External object: no attribute \"uid\" specified in: " + xmlStr); if (docId == null) throw new ApplicationException("External object: no attribute \"documentId\" specified in: " + xmlStr); - if (xmlNodeId == null) - throw new ApplicationException("External object: no attribute \"xmlNodeId\" specified in: " + xmlStr); + if (xpath == null) + throw new ApplicationException("External object: no xpath in attribute \"xpointer\" specified in: " + xmlStr); if (pageNumber == null) throw new ApplicationException("External object: no attribute \"pageNumber\" specified in: " + xmlStr); } @@ -396,8 +396,7 @@ "" + ""; ExtElement e1 = ExtElement.parseXmlStr(objectXmlStr1); @@ -406,8 +405,7 @@ "" + ""; ExtElement e2 = ExtElement.parseXmlStr(objectXmlStr2); @@ -442,7 +440,8 @@ e.setModificationDate(now); e.setDocumentId("/archimedes/it/l223.xml"); e.setPageNumber("17"); - e.setXmlNodeId(sId); + e.setXpath(sId); + e.setPoint(".1"); e.setContent("This is a test note to element " + sId + " with this external link" + ""); createExternalElement(e); @@ -452,8 +451,8 @@ e2.setModificationDate(now); e2.setDocumentId("/archimedes/it/l223.xml"); e2.setPageNumber("17"); - e2.setXmlNodeId(sId2); - e2.setCharPos("18"); + e2.setXpath(sId2); + e2.setPoint("18"); e2.setContent("This is a test note to element " + sId2 + ""); createExternalElement(e2); @@ -482,8 +481,7 @@ "" + "" + "This is a test note to element " + xmlNodeId + " with this external link" + "" + "" + ""; diff -r d6f528ad5d96 -r fba5577e49d9 software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/DictionarizerContentHandler.java --- a/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/DictionarizerContentHandler.java Fri Mar 11 13:34:02 2011 +0100 +++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/DictionarizerContentHandler.java Tue Apr 19 16:51:26 2011 +0200 @@ -45,8 +45,6 @@ System.arraycopy(c, start, cCopy, 0, length); String charactersStr = String.valueOf(cCopy); if (charactersStr != null && ! charactersStr.equals("")) { - // cause there are problems during xsl transformations with ideographic characters without zwsp characters we put always a zwsp between ideographic characters - charactersStr = zwsp(charactersStr); if (currentElement != null) { Element charElement = new Element("characters", ELEMENT_TYPE_CHARACTERS); charElement.value = StringUtilEscapeChars.deresolveXmlEntities(charactersStr); @@ -68,6 +66,8 @@ public void startPrefixMapping(String prefix, String uri) throws SAXException { xmlnsString += "xmlns:" + prefix + "=\"" + uri + "\" "; + if (prefix != null && prefix.equals("")) + xmlnsString = "xmlns" + prefix + "=\"" + uri + "\" "; } public void endPrefixMapping(String prefix) throws SAXException { @@ -153,20 +153,6 @@ outputXmlFragment += outStr; } - /** - * Puts a zwsp between two ideographic characters (e.g. in CJK Unified Ideographs) - * @param str - * @return - */ - private String zwsp(String str) { - // based on Unicode 3.2 - String ideographic = "[\u3300-\u33ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff]"; - String regex = "(" + ideographic + ")(" + ideographic + ")"; - String retStr = str.replaceAll(regex, "$1\u200b$2"); - retStr = retStr.replaceAll(regex, "$1\u200b$2"); - return retStr; - } - private class Element { private int type; private String name; diff -r d6f528ad5d96 -r fba5577e49d9 software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/NormalizeCharsContentHandler.java --- a/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/NormalizeCharsContentHandler.java Fri Mar 11 13:34:02 2011 +0100 +++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/NormalizeCharsContentHandler.java Tue Apr 19 16:51:26 2011 +0200 @@ -58,6 +58,8 @@ public void startPrefixMapping(String prefix, String uri) throws SAXException { xmlnsString += "xmlns:" + prefix + "=\"" + uri + "\" "; + if (prefix != null && prefix.equals("")) + xmlnsString = "xmlns" + prefix + "=\"" + uri + "\" "; } public void endPrefixMapping(String prefix) throws SAXException { diff -r d6f528ad5d96 -r fba5577e49d9 software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Transcoder.java --- a/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Transcoder.java Fri Mar 11 13:34:02 2011 +0100 +++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Transcoder.java Tue Apr 19 16:51:26 2011 +0200 @@ -46,6 +46,11 @@ throw new ApplicationException(e); } } + // replace "small letter sigma" at the end of a word by the "small letter end sigma" + if (retStr != null && retStr.contains("σ")) { + retStr = retStr.replaceAll("(.*)σ(\\s)", "$1ς$2"); + retStr = retStr.replaceAll("(.*)σ($)", "$1ς$2"); + } return retStr; /* // alternative to JFlex diff -r d6f528ad5d96 -r fba5577e49d9 software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/converter/Converter.java --- a/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/converter/Converter.java Fri Mar 11 13:34:02 2011 +0100 +++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/converter/Converter.java Tue Apr 19 16:51:26 2011 +0200 @@ -110,12 +110,10 @@ String outputFileNameDonatusFrenchSup = OUT_DATA_DIR + "/" + "donatus-sup-fr-forms.xml"; instance.donatusSupplementsConvert("donatus-sup", "fr", inputFileNameDonatusFrenchSup, outputFileNameDonatusFrenchSup); instance.forms = new Hashtable>(); - */ // Italian String inputFileNameItalian = ORIG_ITALIAN_DATA_DIR + "/" + "ital.hash"; String outputFileNameItalian = OUT_DATA_DIR + "/" + "donatus-italian-forms.xml"; instance.donatusItalianConvert("donatus", "it", inputFileNameItalian, outputFileNameItalian); - /* String inputFileNameDonatusItalianSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-it-forms.csv"; String outputFileNameDonatusItalianSup = OUT_DATA_DIR + "/" + "donatus-sup-it-forms.xml"; instance.donatusSupplementsConvert("donatus-sup", "it", inputFileNameDonatusItalianSup, outputFileNameDonatusItalianSup); @@ -461,6 +459,15 @@ Transcoder transcoder = Transcoder.getInstance(); String encodedUnicodeForm = transcoder.transcodeFromBetaCode2Unicode(formName); String encodedUnicodeLemma = transcoder.transcodeFromBetaCode2Unicode(lemmaName); + // replace "small letter sigma" at the end of a word by the "small letter end sigma" + if (encodedUnicodeForm != null && encodedUnicodeForm.endsWith("σ")) { + int length = encodedUnicodeForm.length(); + encodedUnicodeForm = encodedUnicodeForm.substring(0, length - 1) + "ς"; + } + if (encodedUnicodeLemma != null && encodedUnicodeLemma.endsWith("σ")) { + int length = encodedUnicodeLemma.length(); + encodedUnicodeLemma = encodedUnicodeLemma.substring(0, length - 1) + "ς"; + } form.setFormName(encodedUnicodeForm); form.setLemmaName(encodedUnicodeLemma); return form; diff -r d6f528ad5d96 -r fba5577e49d9 software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/converter/PerseusContentHandler.java --- a/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/converter/PerseusContentHandler.java Fri Mar 11 13:34:02 2011 +0100 +++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/converter/PerseusContentHandler.java Tue Apr 19 16:51:26 2011 +0200 @@ -188,6 +188,15 @@ Transcoder transcoder = Transcoder.getInstance(); String encodedUnicodeForm = transcoder.transcodeFromBetaCode2Unicode(formName); String encodedUnicodeLemma = transcoder.transcodeFromBetaCode2Unicode(lemmaName); + // replace "small letter sigma" at the end of a word by the "small letter end sigma" + if (encodedUnicodeForm != null && encodedUnicodeForm.endsWith("σ")) { + int length = encodedUnicodeForm.length(); + encodedUnicodeForm = encodedUnicodeForm.substring(0, length - 1) + "ς"; + } + if (encodedUnicodeLemma != null && encodedUnicodeLemma.endsWith("σ")) { + int length = encodedUnicodeLemma.length(); + encodedUnicodeLemma = encodedUnicodeLemma.substring(0, length - 1) + "ς"; + } form.setFormName(encodedUnicodeForm); form.setLemmaName(encodedUnicodeLemma); return form; diff -r d6f528ad5d96 -r fba5577e49d9 software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/StringUtilEscapeChars.java --- a/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/StringUtilEscapeChars.java Fri Mar 11 13:34:02 2011 +0100 +++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/StringUtilEscapeChars.java Tue Apr 19 16:51:26 2011 +0200 @@ -8,6 +8,22 @@ import java.util.regex.Pattern; public class StringUtilEscapeChars { + + /** + * Puts a zwsp between two ideographic characters (e.g. in CJK Unified Ideographs) + * @param str + * @return + */ + public static String zwsp(String str) { + // based on Unicode 3.2 + String ideographic = "[\u3300-\u33ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff]"; + String regex = "(" + ideographic + ")(" + ideographic + ")"; + String retStr = str.replaceAll(regex, "$1\u200b$2"); + retStr = retStr.replaceAll(regex, "$1\u200b$2"); + return retStr; + } + + public static String deleteSpecialXmlEntities(String inputStr) { inputStr = inputStr.replaceAll("<", ""); inputStr = inputStr.replaceAll(">", "");