Changeset 519:9a3cc3732194 in documentViewer for MpdlXmlTextServer.py


Ignore:
Timestamp:
Mar 12, 2012, 6:02:57 PM (12 years ago)
Author:
casties
Branch:
default
Children:
520:8c5aae9bdbbb, 523:acdbd82114bb
Message:

uses xml from doc-info.xql for table of contents now.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • MpdlXmlTextServer.py

    r518 r519  
    500500            tp += '</div>\n'
    501501           
    502         tp += '</div>'
     502        tp += '</div>\n'
    503503       
    504504        return tp
    505        
    506 
    507     def getToc_old(self, mode="text", docinfo=None):
    508         """loads table of contents and stores XML in docinfo"""
    509         logging.debug("getToc mode=%s"%mode)
    510         if mode == "none":
    511             return docinfo
    512              
    513         if 'tocSize_%s'%mode in docinfo:
    514             # cached toc
    515             return docinfo
    516        
    517         docpath = docinfo['textURLPath']
    518         # we need to set a result set size
    519         pagesize = 1000
    520         pn = 1
    521         if mode == "text":
    522             queryType = "toc"
    523         else:
    524             queryType = mode
    525         # number of entries in toc
    526         tocSize = 0
    527         tocDiv = None
    528         # fetch full toc
    529         pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
    530         dom = ET.fromstring(pagexml)
    531         # page content is in <div class="queryResultPage">
    532         pagediv = None
    533         # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage']
    534         alldivs = dom.findall("div")
    535         for div in alldivs:
    536             dc = div.get('class')
    537             # page content div
    538             if dc == 'queryResultPage':
    539                 pagediv = div
    540                
    541             elif dc == 'queryResultHits':
    542                 docinfo['tocSize_%s'%mode] = getInt(div.text)
    543 
    544         if pagediv is not None:
    545             # store XML in docinfo
    546             docinfo['tocXML_%s'%mode] = ET.tostring(pagediv, 'UTF-8')
    547 
    548         return docinfo
    549    
    550     def getTocPage_old(self, mode="text", pn=None, start=None, size=None, pageinfo=None, docinfo=None):
    551         """returns single page from the table of contents"""
    552         logging.debug("getTocPage mode=%s, pn=%s"%(mode,pn))
    553         if mode == "text":
    554             queryType = "toc"
    555         else:
    556             queryType = mode
    557            
    558         # check for cached TOC
    559         if not docinfo.has_key('tocXML_%s'%mode):
    560             self.getToc(mode=mode, docinfo=docinfo)
    561            
    562         tocxml = docinfo.get('tocXML_%s'%mode, None)
    563         if not tocxml:
    564             logging.error("getTocPage: unable to find tocXML")
    565             return "Error: no table of contents!"
    566        
    567         if size is None:
    568             size = pageinfo.get('tocPageSize', 30)
    569            
    570         if start is None:
    571             start = (pn - 1) * size
    572 
    573         fulltoc = ET.fromstring(tocxml)
    574        
    575         if fulltoc is not None:
    576             # paginate
    577             first = (start - 1) * 2
    578             len = size * 2
    579             del fulltoc[:first]
    580             del fulltoc[len:]
    581             tocdivs = fulltoc
    582            
    583             # check all a-tags
    584             links = tocdivs.findall(".//a")
    585             for l in links:
    586                 href = l.get('href')
    587                 if href:
    588                     # take pn from href
    589                     m = re.match(r'page-fragment\.xql.*pn=(\d+)', href)
    590                     if m is not None:
    591                         # and create new url (assuming parent is documentViewer)
    592                         url = self.getLink('pn', m.group(1))
    593                         l.set('href', url)
    594                     else:
    595                         logging.warning("getTocPage: Problem with link=%s"%href)
    596                        
    597             # fix two-divs-per-row with containing div
    598             newtoc = ET.Element('div', {'class':'queryResultPage'})
    599             for (d1,d2) in zip(tocdivs[::2],tocdivs[1::2]):
    600                 e = ET.Element('div',{'class':'tocline'})
    601                 e.append(d1)
    602                 e.append(d2)
    603                 newtoc.append(e)
    604                
    605             return serialize(newtoc)
    606        
    607         return "ERROR: no table of contents!"
    608    
     505           
    609506   
    610507    def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
Note: See TracChangeset for help on using the changeset viewer.