# HG changeset patch # User casties # Date 1330971119 -3600 # Node ID aaacdf551f6f0a8888bc320102bc908c89a87aca # Parent 7d7b639d7be7285b4ee978c14ff42e62051b71b1 remove global info from processPageInfo. diff -r 7d7b639d7be7 -r aaacdf551f6f MpdlXmlTextServer.py --- a/MpdlXmlTextServer.py Mon Mar 05 18:04:49 2012 +0100 +++ b/MpdlXmlTextServer.py Mon Mar 05 19:11:59 2012 +0100 @@ -71,19 +71,20 @@ return places - def getTextInfo(self, docinfo=None): + def getTextInfo(self, mode='', docinfo=None): """reads document info, including page concordance, from text server""" logging.debug("getDocInfo") + #TODO: check cached info docpath = docinfo.get('textURLPath', None) if docpath is None: logging.error("getTextInfo: no textURLPath!") return docinfo - + # we need to set a result set size pagesize = 10000 pn = 1 # fetch docinfo - pagexml = self.getServerData("doc-info.xql","document=%s&pageSize=%s&pn=%s"%(docpath,pagesize,pn)) + pagexml = self.getServerData("doc-info.xql","document=%s&info=%s&pageSize=%s&pn=%s"%(docpath,mode,pagesize,pn)) dom = ET.fromstring(pagexml) # all info in tag doc = dom.find("document") @@ -135,8 +136,14 @@ pages[n] = page docinfo['pageNumbers'] = pages - logging.debug("got pageNumbers=%s"%repr(pages)) + #logging.debug("got pageNumbers=%s"%repr(pages)) + # toc + elif name == 'toc': + # contains tags with table of contents + # TODO: implement + pass + return docinfo @@ -163,33 +170,7 @@ # pageHeaderTitle elif dc == 'pageHeaderTitle': pageinfo['pageHeaderTitle'] = div.text - - # numFigureEntries - elif dc == 'countFigureEntries': - docinfo['numFigureEntries'] = getInt(div.text) - - # numTocEntries - elif dc == 'countTocEntries': - # WTF: s1 = int(s)/30+1 - docinfo['numTocEntries'] = getInt(div.text) - - # numPlaces - elif dc == 'countPlaces': - docinfo['numPlaces'] = getInt(div.text) - - # numTextPages - elif dc == 'countPages': - np = getInt(div.text) - if np > 0: - docinfo['numTextPages'] = np - if docinfo.get('numPages', 0) == 0: - # seems to be text-only - update page count - docinfo['numPages'] = np - #pageinfo['end'] = min(pageinfo['end'], np) - pageinfo['numgroups'] = int(np / pageinfo['groupsize']) - if np % pageinfo['groupsize'] > 0: - pageinfo['numgroups'] += 1 - + #logging.debug("processPageInfo: pageinfo=%s"%repr(pageinfo)) return