changeset 554:c56bc63436de

fixed problem with empty div tags in fulltext.
author casties
date Tue, 25 Sep 2012 13:05:48 +0200
parents 2fe04b61ed95
children 841c2094af63
files MpdlXmlTextServer.py version.txt
diffstat 2 files changed, 24 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/MpdlXmlTextServer.py	Mon Sep 24 16:24:22 2012 +0200
+++ b/MpdlXmlTextServer.py	Tue Sep 25 13:05:48 2012 +0200
@@ -282,7 +282,7 @@
         pagediv = None
         # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent']
         # so we look at the second level divs
-        alldivs = dom.findall("div")
+        alldivs = dom.findall('div')
         for div in alldivs:
             dc = div.get('class')
             # page content div
@@ -298,8 +298,15 @@
                 if punditMode:
                     pagediv = self.addPunditAttributes(pagediv, pageinfo, docinfo)
                     
+                # fix empty div tags
+                divs = pagediv.findall('.//div')
+                for d in divs:
+                    if len(d) == 0 and not d.text:
+                        # make empty divs non-empty
+                        d.text = ' '
+                    
                 # check all a-tags
-                links = pagediv.findall(".//a")
+                links = pagediv.findall('.//a')
                 for l in links:
                     href = l.get('href')
                     if href and href.startswith('#note-'):
@@ -316,6 +323,13 @@
                 if punditMode:
                     pagediv = self.addPunditAttributes(pagediv, pageinfo, docinfo)
                     
+                # fix empty div tags
+                divs = pagediv.findall('.//div')
+                for d in divs:
+                    if len(d) == 0 and not d.text:
+                        # make empty divs non-empty
+                        d.text = ' '
+                    
                 # check all a-tags
                 links = pagediv.findall(".//a")
                 for l in links:
@@ -332,14 +346,6 @@
                             # add target to open new page
                             l.set('target', '_blank')
                                                           
-                        # TODO: is this needed?
-#                        if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):
-#                            selfurl = self.absolute_url()
-#                            l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl))
-#                            l.set('target', '_blank')
-#                            l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
-#                            l.set('ondblclick', 'popupWin.focus();')   
-                    
                         if href.startswith('#note-'):
                             # note link
                             l.set('href', href.replace('#note-',"%s#note-"%selfurl))
@@ -359,6 +365,13 @@
         # gis mode
         elif textmode == "gis":
             if pagediv is not None:
+                # fix empty div tags
+                divs = pagediv.findall('.//div')
+                for d in divs:
+                    if len(d) == 0 and not d.text:
+                        # make empty divs non-empty
+                        d.text = ' '
+                    
                 # check all a-tags
                 links = pagediv.findall(".//a")
                 # add our URL as backlink
--- a/version.txt	Mon Sep 24 16:24:22 2012 +0200
+++ b/version.txt	Tue Sep 25 13:05:48 2012 +0200
@@ -1,1 +1,1 @@
-DocumentViewer 2.2.3
\ No newline at end of file
+DocumentViewer 2.2.4
\ No newline at end of file