Mercurial > hg > documentViewer
changeset 554:c56bc63436de
fixed problem with empty div tags in fulltext.
author | casties |
---|---|
date | Tue, 25 Sep 2012 13:05:48 +0200 |
parents | 2fe04b61ed95 |
children | 841c2094af63 |
files | MpdlXmlTextServer.py version.txt |
diffstat | 2 files changed, 24 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/MpdlXmlTextServer.py Mon Sep 24 16:24:22 2012 +0200 +++ b/MpdlXmlTextServer.py Tue Sep 25 13:05:48 2012 +0200 @@ -282,7 +282,7 @@ pagediv = None # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent'] # so we look at the second level divs - alldivs = dom.findall("div") + alldivs = dom.findall('div') for div in alldivs: dc = div.get('class') # page content div @@ -298,8 +298,15 @@ if punditMode: pagediv = self.addPunditAttributes(pagediv, pageinfo, docinfo) + # fix empty div tags + divs = pagediv.findall('.//div') + for d in divs: + if len(d) == 0 and not d.text: + # make empty divs non-empty + d.text = ' ' + # check all a-tags - links = pagediv.findall(".//a") + links = pagediv.findall('.//a') for l in links: href = l.get('href') if href and href.startswith('#note-'): @@ -316,6 +323,13 @@ if punditMode: pagediv = self.addPunditAttributes(pagediv, pageinfo, docinfo) + # fix empty div tags + divs = pagediv.findall('.//div') + for d in divs: + if len(d) == 0 and not d.text: + # make empty divs non-empty + d.text = ' ' + # check all a-tags links = pagediv.findall(".//a") for l in links: @@ -332,14 +346,6 @@ # add target to open new page l.set('target', '_blank') - # TODO: is this needed? -# if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): -# selfurl = self.absolute_url() -# l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) -# l.set('target', '_blank') -# l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") -# l.set('ondblclick', 'popupWin.focus();') - if href.startswith('#note-'): # note link l.set('href', href.replace('#note-',"%s#note-"%selfurl)) @@ -359,6 +365,13 @@ # gis mode elif textmode == "gis": if pagediv is not None: + # fix empty div tags + divs = pagediv.findall('.//div') + for d in divs: + if len(d) == 0 and not d.text: + # make empty divs non-empty + d.text = ' ' + # check all a-tags links = pagediv.findall(".//a") # add our URL as backlink