Context Navigation

← Previous Changeset
Next Changeset →

Changeset 583:ca0274423382 in documentViewer

Timestamp:

Nov 12, 2012, 5:12:33 PM (11 years ago)

Author:

casties

Branch:

default

Message:

follow changes in html format of new text-backend.

Files:

: 2 edited

MpiwgXmlTextServer.py (modified) (7 diffs)
css/docuviewer.css (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

MpiwgXmlTextServer.py

-                      r579
+                      r583
                     self._addPunditAttributes(pagediv, pageinfo, docinfo)
-                # TODO: move empty page text
-                ep = dom.find(".//div[@class='emptyPage']")
-                if ep is not None:
-                    pagediv.append(ep)
                 s = serialize(pagediv)
                 logging.debug("getTextPage done in %s"%(datetime.now()-startTime))
 …
                 logging.warn("processFigures: strange figure!")
+    def _cleanSearchResult(self, pagediv):
+        """fixes search result html (change pbs and figures)"""
+        # replace figure-tag with figureNumText
+        for fig in pagediv.findall(".//span[@class='figure']"):
+            txt = fig.findtext(".//span[@class='figureNumText']")
+            tail = fig.tail
+            fig.clear()
+            fig.set('class', 'figure')
+            fig.text = txt
+            fig.tail = tail
+        # replace lb-tag with "//"
+        for lb in pagediv.findall(".//br[@class='lb']"):
+            lb.tag = 'span'
+            lb.text = '//'
+        # replace pb-tag with "///"
+        for pb in pagediv.findall(".//span[@class='pb']"):
+            tail = pb.tail
+            pb.clear()
+            pb.set('class', 'pb')
+            pb.text = '///'
+            pb.tail = tail
+        return pagediv
+    def _cleanSearchResult2(self, pagediv):
+        """fixes search result html (change pbs and figures)"""
+        # unfortunately etree can not select class.startswith('figure')
+        divs = pagediv.findall(".//span[@class]")
+        for d in divs:
+            cls = d.get('class')
+            if cls.startswith('figure'):
+                # replace figure-tag with figureNumText
+                txt = d.findtext(".//span[@class='figureNumText']")
+                d.clear()
+                d.set('class', 'figure')
+                d.text = txt
+            elif cls.startswith('pb'):
+                # replace pb-tag with "//"
+                d.clear()
+                d.set('class', 'pb')
+                d.text = '//'
+        return pagediv
     def _fixEmptyDivs(self, pagediv):
 …
     def getSearchResults(self, mode, query=None, pageinfo=None, docinfo=None):
         """loads list of search results and stores XML in docinfo"""
         logging.debug("getSearchResults mode=%s query=%s"%(mode, query))
+        normMode = pageinfo.get('characterNormalization', 'reg')
+        logging.debug("getSearchResults mode=%s query=%s norm=%s"%(mode, query, normMode))
         if mode == "none":
             return docinfo
 …
         if cachedQuery is not None:
             # cached search result
             if cachedQuery == '%s_%s'%(mode,query):
+            if cachedQuery == '%s_%s_%s'%(mode,query,normMode):
                 # same query
                 return docinfo
 …
         # cache query
         docinfo['cachedQuery'] = '%s_%s'%(mode,query)
+        docinfo['cachedQuery'] = '%s_%s_%s'%(mode,query,normMode)
         # fetch full results
 …
         try:
             dom = ET.fromstring(pagexml)
+            # clean html output
+            self._processWTags('plain', normMode, dom)
+            self._cleanSearchResult(dom)
             # page content is currently in multiple <td align=left>
             alldivs = dom.findall(".//tr[@class='hit']")
 …
     def getResultsPage(self, mode="text", query=None, pn=None, start=None, size=None, pageinfo=None, docinfo=None):
         """returns single page from the table of contents"""
+        """returns single page from the list of search results"""
         logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn))
         # get (cached) result

css/docuviewer.css

-                      r577
+                      r583
         vertical-align: super; */
+}
-/* handwritten */
-div.col.main div.content.text span.handwritten {
-        display: block;
-        /* float: left; */
-    margin-top: 0.5em;
-    margin-bottom: 0.5em;
-    padding: 5px;
-    border: 1px dashed silver;
+}
-div.col.main div.content.text span.handwritten span.figureNum {
-    display: none;
-    /* font-size: 70%;
-        vertical-align: super; */
+}
 /* figure */
 div.col.main div.content.text span.figure {

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 583:ca0274423382 in documentViewer

Legend:

MpiwgXmlTextServer.py

css/docuviewer.css

Download in other formats: