changeset 583:ca0274423382

follow changes in html format of new text-backend.
author casties
date Mon, 12 Nov 2012 18:12:33 +0100
parents bf0f514b6f92
children 011905457a5f
files MpiwgXmlTextServer.py css/docuviewer.css
diffstat 2 files changed, 57 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/MpiwgXmlTextServer.py	Mon Nov 12 15:27:45 2012 +0100
+++ b/MpiwgXmlTextServer.py	Mon Nov 12 18:12:33 2012 +0100
@@ -295,11 +295,6 @@
                 if punditMode:
                     self._addPunditAttributes(pagediv, pageinfo, docinfo)
                     
-                # TODO: move empty page text
-                ep = dom.find(".//div[@class='emptyPage']")
-                if ep is not None:
-                    pagediv.append(ep)
-                 
                 s = serialize(pagediv)
                 logging.debug("getTextPage done in %s"%(datetime.now()-startTime))    
                 return s
@@ -462,6 +457,55 @@
             except:
                 logging.warn("processFigures: strange figure!")
                 
+
+    def _cleanSearchResult(self, pagediv):
+        """fixes search result html (change pbs and figures)"""
+        # replace figure-tag with figureNumText
+        for fig in pagediv.findall(".//span[@class='figure']"):
+            txt = fig.findtext(".//span[@class='figureNumText']")
+            tail = fig.tail
+            fig.clear()
+            fig.set('class', 'figure')
+            fig.text = txt
+            fig.tail = tail
+                
+        # replace lb-tag with "//"
+        for lb in pagediv.findall(".//br[@class='lb']"):
+            lb.tag = 'span'
+            lb.text = '//'
+        
+        # replace pb-tag with "///"
+        for pb in pagediv.findall(".//span[@class='pb']"):
+            tail = pb.tail
+            pb.clear()
+            pb.set('class', 'pb')
+            pb.text = '///'
+            pb.tail = tail
+        
+        return pagediv
+    
+    def _cleanSearchResult2(self, pagediv):
+        """fixes search result html (change pbs and figures)"""
+        # unfortunately etree can not select class.startswith('figure')
+        divs = pagediv.findall(".//span[@class]")
+        for d in divs:
+            cls = d.get('class')
+            if cls.startswith('figure'):
+                # replace figure-tag with figureNumText
+                txt = d.findtext(".//span[@class='figureNumText']")
+                d.clear()
+                d.set('class', 'figure')
+                d.text = txt
+                
+            elif cls.startswith('pb'):
+                # replace pb-tag with "//"
+                d.clear()
+                d.set('class', 'pb')
+                d.text = '//'
+        
+        return pagediv
+    
+
     
     def _fixEmptyDivs(self, pagediv):
         """fixes empty div-tags by inserting a space"""
@@ -476,8 +520,8 @@
 
     def getSearchResults(self, mode, query=None, pageinfo=None, docinfo=None):
         """loads list of search results and stores XML in docinfo"""
-        
-        logging.debug("getSearchResults mode=%s query=%s"%(mode, query))
+        normMode = pageinfo.get('characterNormalization', 'reg')
+        logging.debug("getSearchResults mode=%s query=%s norm=%s"%(mode, query, normMode))
         if mode == "none":
             return docinfo
               
@@ -486,7 +530,7 @@
         cachedQuery = docinfo.get('cachedQuery', None)
         if cachedQuery is not None:
             # cached search result
-            if cachedQuery == '%s_%s'%(mode,query):
+            if cachedQuery == '%s_%s_%s'%(mode,query,normMode):
                 # same query
                 return docinfo
             
@@ -496,7 +540,7 @@
                 del docinfo['results']
         
         # cache query
-        docinfo['cachedQuery'] = '%s_%s'%(mode,query)
+        docinfo['cachedQuery'] = '%s_%s_%s'%(mode,query,normMode)
         
         # fetch full results
         docpath = docinfo['textURLPath']
@@ -509,6 +553,9 @@
         results = []
         try:
             dom = ET.fromstring(pagexml)
+            # clean html output
+            self._processWTags('plain', normMode, dom)
+            self._cleanSearchResult(dom)
             # page content is currently in multiple <td align=left>
             alldivs = dom.findall(".//tr[@class='hit']")
             for div in alldivs:
@@ -532,7 +579,7 @@
     
 
     def getResultsPage(self, mode="text", query=None, pn=None, start=None, size=None, pageinfo=None, docinfo=None):
-        """returns single page from the table of contents"""
+        """returns single page from the list of search results"""
         logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn))
         # get (cached) result
         self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo)
--- a/css/docuviewer.css	Mon Nov 12 15:27:45 2012 +0100
+++ b/css/docuviewer.css	Mon Nov 12 18:12:33 2012 +0100
@@ -293,20 +293,6 @@
     /* font-size: 70%;
 	vertical-align: super; */
 }
-/* handwritten */
-div.col.main div.content.text span.handwritten {
-	display: block;
-	/* float: left; */
-    margin-top: 0.5em;
-    margin-bottom: 0.5em;
-    padding: 5px;
-    border: 1px dashed silver;	
-}
-div.col.main div.content.text span.handwritten span.figureNum {
-    display: none;
-    /* font-size: 70%;
-	vertical-align: super; */
-}
 /* figure */
 div.col.main div.content.text span.figure {
     display: block;