changeset 517:aaacdf551f6f

remove global info from processPageInfo.
author casties
date Mon, 05 Mar 2012 19:11:59 +0100
parents 7d7b639d7be7
children 91051b36b9cc
files MpdlXmlTextServer.py
diffstat 1 files changed, 12 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/MpdlXmlTextServer.py	Mon Mar 05 18:04:49 2012 +0100
+++ b/MpdlXmlTextServer.py	Mon Mar 05 19:11:59 2012 +0100
@@ -71,19 +71,20 @@
         return places
     
           
-    def getTextInfo(self, docinfo=None):
+    def getTextInfo(self, mode='', docinfo=None):
         """reads document info, including page concordance, from text server"""
         logging.debug("getDocInfo")
+        #TODO: check cached info
         docpath = docinfo.get('textURLPath', None)
         if docpath is None:
             logging.error("getTextInfo: no textURLPath!")
             return docinfo
-        
+                
         # we need to set a result set size
         pagesize = 10000
         pn = 1
         # fetch docinfo
-        pagexml = self.getServerData("doc-info.xql","document=%s&pageSize=%s&pn=%s"%(docpath,pagesize,pn))
+        pagexml = self.getServerData("doc-info.xql","document=%s&info=%s&pageSize=%s&pn=%s"%(docpath,mode,pagesize,pn))
         dom = ET.fromstring(pagexml)
         # all info in tag <document>
         doc = dom.find("document")
@@ -135,8 +136,14 @@
                             pages[n] = page
                         
                     docinfo['pageNumbers'] = pages
-                    logging.debug("got pageNumbers=%s"%repr(pages))
+                    #logging.debug("got pageNumbers=%s"%repr(pages))
                                 
+                # toc
+                elif name == 'toc':
+                    # contains tags with table of contents
+                    # TODO: implement
+                    pass
+
         return docinfo
         
           
@@ -163,33 +170,7 @@
             # pageHeaderTitle
             elif dc == 'pageHeaderTitle':
                 pageinfo['pageHeaderTitle'] = div.text
-                
-            # numFigureEntries
-            elif dc == 'countFigureEntries':
-                docinfo['numFigureEntries'] = getInt(div.text)
-                
-            # numTocEntries
-            elif dc == 'countTocEntries':
-                # WTF: s1 = int(s)/30+1
-                docinfo['numTocEntries'] = getInt(div.text)
-                
-            # numPlaces
-            elif dc == 'countPlaces':
-                docinfo['numPlaces'] = getInt(div.text)
-                
-            # numTextPages
-            elif dc == 'countPages':
-                np = getInt(div.text)                    
-                if np > 0:
-                    docinfo['numTextPages'] = np
-                    if docinfo.get('numPages', 0) == 0:
-                        # seems to be text-only - update page count
-                        docinfo['numPages'] = np
-                        #pageinfo['end'] = min(pageinfo['end'], np)
-                        pageinfo['numgroups'] = int(np / pageinfo['groupsize'])
-                        if np % pageinfo['groupsize'] > 0:
-                            pageinfo['numgroups'] += 1
-        
+                        
         #logging.debug("processPageInfo: pageinfo=%s"%repr(pageinfo))
         return