# HG changeset patch # User casties # Date 1348571148 -7200 # Node ID c56bc63436de7da8673eb95b05037eaf59b00ed9 # Parent 2fe04b61ed95a28f5419c92945be1dad22c4621e fixed problem with empty div tags in fulltext. diff -r 2fe04b61ed95 -r c56bc63436de MpdlXmlTextServer.py --- a/MpdlXmlTextServer.py Mon Sep 24 16:24:22 2012 +0200 +++ b/MpdlXmlTextServer.py Tue Sep 25 13:05:48 2012 +0200 @@ -282,7 +282,7 @@ pagediv = None # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent'] # so we look at the second level divs - alldivs = dom.findall("div") + alldivs = dom.findall('div') for div in alldivs: dc = div.get('class') # page content div @@ -298,8 +298,15 @@ if punditMode: pagediv = self.addPunditAttributes(pagediv, pageinfo, docinfo) + # fix empty div tags + divs = pagediv.findall('.//div') + for d in divs: + if len(d) == 0 and not d.text: + # make empty divs non-empty + d.text = ' ' + # check all a-tags - links = pagediv.findall(".//a") + links = pagediv.findall('.//a') for l in links: href = l.get('href') if href and href.startswith('#note-'): @@ -316,6 +323,13 @@ if punditMode: pagediv = self.addPunditAttributes(pagediv, pageinfo, docinfo) + # fix empty div tags + divs = pagediv.findall('.//div') + for d in divs: + if len(d) == 0 and not d.text: + # make empty divs non-empty + d.text = ' ' + # check all a-tags links = pagediv.findall(".//a") for l in links: @@ -332,14 +346,6 @@ # add target to open new page l.set('target', '_blank') - # TODO: is this needed? -# if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): -# selfurl = self.absolute_url() -# l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) -# l.set('target', '_blank') -# l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") -# l.set('ondblclick', 'popupWin.focus();') - if href.startswith('#note-'): # note link l.set('href', href.replace('#note-',"%s#note-"%selfurl)) @@ -359,6 +365,13 @@ # gis mode elif textmode == "gis": if pagediv is not None: + # fix empty div tags + divs = pagediv.findall('.//div') + for d in divs: + if len(d) == 0 and not d.text: + # make empty divs non-empty + d.text = ' ' + # check all a-tags links = pagediv.findall(".//a") # add our URL as backlink diff -r 2fe04b61ed95 -r c56bc63436de version.txt --- a/version.txt Mon Sep 24 16:24:22 2012 +0200 +++ b/version.txt Tue Sep 25 13:05:48 2012 +0200 @@ -1,1 +1,1 @@ -DocumentViewer 2.2.3 \ No newline at end of file +DocumentViewer 2.2.4 \ No newline at end of file