Changeset 458:48b135b089c8 in documentViewer for MpdlXmlTextServer.py


Ignore:
Timestamp:
Jul 19, 2011, 6:46:35 PM (13 years ago)
Author:
casties
Branch:
elementtree
Message:

more renovation

File:
1 edited

Legend:

Unmodified
Added
Removed
  • MpdlXmlTextServer.py

    r456 r458  
    1313import logging
    1414import urllib
    15 import documentViewer
    16 #from documentViewer import getTextFromNode, serializeNode
    17 
    18 def intOr0(s, default=0):
    19     """convert s to int or return default"""
    20     try:
    21         return int(s)
    22     except:
    23         return default
    24 
    25 def getText(node):
    26     """get the cdata content of a node"""
    27     if node is None:
    28         return ""
    29     # ET:
    30     text = node.text or ""
    31     for e in node:
    32         text += gettext(e)
    33         if e.tail:
    34             text += e.tail
    35 
    36     return text
     15
     16from SrvTxtUtils import getInt, getText, getHttpData
    3717
    3818def serialize(node):
     
    9171       
    9272    def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40):
    93        
    9473        """constructor"""
    9574        self.id=id
     
    10382    def getHttpData(self, url, data=None):
    10483        """returns result from url+data HTTP request"""
    105         return documentViewer.getHttpData(url,data,timeout=self.timeout)
     84        return getHttpData(url,data,timeout=self.timeout)
    10685   
    10786    def getServerData(self, method, data=None):
    10887        """returns result from text server for method+data"""
    10988        url = self.serverUrl+method
    110         return documentViewer.getHttpData(url,data,timeout=self.timeout)
     89        return getHttpData(url,data,timeout=self.timeout)
    11190
    11291    # WTF: what does this really do? can it be integrated in getPage?
     
    269248            # pageNumberOrigNorm
    270249            elif dc == 'countFigureEntries':
    271                 docinfo['countFigureEntries'] = intOr0(div.text)
     250                docinfo['countFigureEntries'] = getInt(div.text)
    272251               
    273252            # pageNumberOrigNorm
    274253            elif dc == 'countTocEntries':
    275254                # WTF: s1 = int(s)/30+1
    276                 docinfo['countTocEntries'] = intOr0(div.text)
     255                docinfo['countTocEntries'] = getInt(div.text)
    277256               
    278257            # numTextPages
    279258            elif dc == 'countPages':
    280                 np = intOr0(div.text)                   
     259                np = getInt(div.text)                   
    281260                if np > 0:
    282261                    docinfo['numTextPages'] = np
     
    505484               
    506485            elif dc == 'queryResultHits':
    507                 docinfo['tocSize_%s'%mode] = intOr0(div.text)
     486                docinfo['tocSize_%s'%mode] = getInt(div.text)
    508487
    509488        if pagediv:
    510 #            # split xml in chunks
    511 #            tocs = []
    512 #            tocdivs = pagediv.findall('div')
    513 #            for p in zip(tocdivs[::2], tocdivs[1::2]):
    514 #                toc = serialize(p[0])
    515 #                toc += serialize(p[1])
    516 #                tocs.append(toc)
    517 #                logging.debug("pair: %s"%(toc))
    518489            # store XML in docinfo
    519490            docinfo['tocXML_%s'%mode] = ET.tostring(pagediv, 'UTF-8')
Note: See TracChangeset for help on using the changeset viewer.