Mercurial > hg > documentViewer

--- a/MpiwgXmlTextServer.py	Tue Oct 16 17:34:40 2012 +0200
+++ b/MpiwgXmlTextServer.py	Tue Oct 16 19:46:53 2012 +0200
@@ -204,14 +204,11 @@
         textParams = {'docId': docpath,
                       'page': pn}

-        if 'characterNormalization' in pageinfo:
-            cn = pageinfo['characterNormalization']
-            # TODO: change values in form
-            if cn == 'regPlusNorm':
-                cn = 'norm'
-
-            textParams['normalization'] = cn
-
+        normMode = pageinfo.get('characterNormalization', 'reg')
+        # TODO: change values in form
+        if normMode == 'regPlusNorm':
+            normMode = 'norm'
+
         if not mode:
             # default is dict
             mode = 'text'
@@ -249,13 +246,13 @@
             textmode = 'xml'
             textParams['mode'] = 'untokenized'
             textParams['outputFormat'] = 'xmlDisplay'
-            textParams['normalization'] = 'orig'
+            textParams['normMode'] = 'orig'
         elif 'gis' in modes:
             #FIXME!
             textmode = 'gis'
         else:
             # text is default mode
-            textmode = 'text'
+            textmode = 'plain'
             textParams['mode'] = 'untokenized'
             textParams['outputFormat'] = 'html'

@@ -268,11 +265,14 @@
             return None

         # plain text or text-with-links mode
-        if textmode == "text" or textmode == "dict":
+        if textmode == "plain" or textmode == "dict":
             # the text is in div@class=text
             pagediv = dom.find(".//div[@class='text']")
             logging.debug("pagediv: %s"%repr(pagediv))
             if pagediv is not None:
+                # add textmode and normMode classes
+                pagediv.set('class', 'text %s %s'%(textmode, normMode))
+                #self._processWTags(textmode, normMode, pagediv)
                 #self._processPbTag(pagediv, pageinfo)
                 self._processFigures(pagediv, docinfo)
                 #self._fixEmptyDivs(pagediv)
@@ -342,6 +342,36 @@
         logging.error("getTextPage: error in text mode %s or text!"%(textmode))
         return None

+    def _processWTags(self, textMode, normMode, pagediv):
+        """selects the necessary information from w-spans and removes the rest from pagediv"""
+        logging.debug("processWTags(textMode=%s,norm=%s,pagediv"%(repr(textMode),repr(normMode)))
+        wtags = pagediv.findall(".//span[@class='w']")
+        for wtag in wtags:
+            text = None
+            attr = None
+            if textMode == 'dict':
+                # take a-tag and matching child
+                attr = wtag.find('a').items()
+                text = wtag.find("a/span[@class='%s']"%normMode).text
+            else:
+                # take matching child
+                text = wtag.find("span[@class='nodictionary %s']"%normMode).text
+
+            if text:
+                # replace wtag by new content
+                logging.debug("new w-tag attr=%s text=%s"%(attr,text))
+                wtag.clear()
+
+                if attr:
+                    # make dictionary link
+                    wtag.tag = 'a'
+                    wtag.attrib.update(dict(attr))
+
+                # text content
+                wtag.text = text
+
+        return pagediv
+
     def _processPbTag(self, pagediv, pageinfo):
         """extracts information from pb-tag and removes it from pagediv"""
         pbdiv = pagediv.find(".//span[@class='pb']")
--- a/css/docuviewer.css	Tue Oct 16 17:34:40 2012 +0200
+++ b/css/docuviewer.css	Tue Oct 16 19:46:53 2012 +0200
@@ -268,6 +268,26 @@
     margin-top: 0.5em;
     margin-bottom: 0.25em;
 }
+/* normalization forms */
+div.col.main div.content.text div.text.orig span.w span.reg,
+div.col.main div.content.text div.text.orig span.w span.norm {
+    display: none;
+}
+div.col.main div.content.text div.text.reg span.w span.orig,
+div.col.main div.content.text div.text.reg span.w span.norm {
+    display: none;
+}
+div.col.main div.content.text div.text.norm span.w span.orig,
+div.col.main div.content.text div.text.norm span.w span.reg {
+    display: none;
+}
+/* dictionary forms */
+div.col.main div.content.text div.text.plain span.w a.dictionary {
+    display: none;
+}
+div.col.main div.content.text div.text.dict span.w span.nodictionary {
+    display: none;
+}
 /* page break */
 div.col.main div.content.text span.pb span.n,
 div.col.main div.content.text span.pb span.o {