comparison MpdlXmlTextServer.py @ 517:aaacdf551f6f

remove global info from processPageInfo.
author casties
date Mon, 05 Mar 2012 19:11:59 +0100
parents 7d7b639d7be7
children 91051b36b9cc
comparison
equal deleted inserted replaced
516:7d7b639d7be7 517:aaacdf551f6f
69 places.append(place) 69 places.append(place)
70 70
71 return places 71 return places
72 72
73 73
74 def getTextInfo(self, docinfo=None): 74 def getTextInfo(self, mode='', docinfo=None):
75 """reads document info, including page concordance, from text server""" 75 """reads document info, including page concordance, from text server"""
76 logging.debug("getDocInfo") 76 logging.debug("getDocInfo")
77 #TODO: check cached info
77 docpath = docinfo.get('textURLPath', None) 78 docpath = docinfo.get('textURLPath', None)
78 if docpath is None: 79 if docpath is None:
79 logging.error("getTextInfo: no textURLPath!") 80 logging.error("getTextInfo: no textURLPath!")
80 return docinfo 81 return docinfo
81 82
82 # we need to set a result set size 83 # we need to set a result set size
83 pagesize = 10000 84 pagesize = 10000
84 pn = 1 85 pn = 1
85 # fetch docinfo 86 # fetch docinfo
86 pagexml = self.getServerData("doc-info.xql","document=%s&pageSize=%s&pn=%s"%(docpath,pagesize,pn)) 87 pagexml = self.getServerData("doc-info.xql","document=%s&info=%s&pageSize=%s&pn=%s"%(docpath,mode,pagesize,pn))
87 dom = ET.fromstring(pagexml) 88 dom = ET.fromstring(pagexml)
88 # all info in tag <document> 89 # all info in tag <document>
89 doc = dom.find("document") 90 doc = dom.find("document")
90 if doc is None: 91 if doc is None:
91 logging.error("getTextInfo: unable to find document-tag!") 92 logging.error("getTextInfo: unable to find document-tag!")
133 134
134 if n > 0: 135 if n > 0:
135 pages[n] = page 136 pages[n] = page
136 137
137 docinfo['pageNumbers'] = pages 138 docinfo['pageNumbers'] = pages
138 logging.debug("got pageNumbers=%s"%repr(pages)) 139 #logging.debug("got pageNumbers=%s"%repr(pages))
139 140
141 # toc
142 elif name == 'toc':
143 # contains tags with table of contents
144 # TODO: implement
145 pass
146
140 return docinfo 147 return docinfo
141 148
142 149
143 def processPageInfo(self, dom, docinfo, pageinfo): 150 def processPageInfo(self, dom, docinfo, pageinfo):
144 """processes page info divs from dom and stores in docinfo and pageinfo""" 151 """processes page info divs from dom and stores in docinfo and pageinfo"""
161 pageinfo['pageNumberOrigNorm'] = div.text 168 pageinfo['pageNumberOrigNorm'] = div.text
162 169
163 # pageHeaderTitle 170 # pageHeaderTitle
164 elif dc == 'pageHeaderTitle': 171 elif dc == 'pageHeaderTitle':
165 pageinfo['pageHeaderTitle'] = div.text 172 pageinfo['pageHeaderTitle'] = div.text
166 173
167 # numFigureEntries
168 elif dc == 'countFigureEntries':
169 docinfo['numFigureEntries'] = getInt(div.text)
170
171 # numTocEntries
172 elif dc == 'countTocEntries':
173 # WTF: s1 = int(s)/30+1
174 docinfo['numTocEntries'] = getInt(div.text)
175
176 # numPlaces
177 elif dc == 'countPlaces':
178 docinfo['numPlaces'] = getInt(div.text)
179
180 # numTextPages
181 elif dc == 'countPages':
182 np = getInt(div.text)
183 if np > 0:
184 docinfo['numTextPages'] = np
185 if docinfo.get('numPages', 0) == 0:
186 # seems to be text-only - update page count
187 docinfo['numPages'] = np
188 #pageinfo['end'] = min(pageinfo['end'], np)
189 pageinfo['numgroups'] = int(np / pageinfo['groupsize'])
190 if np % pageinfo['groupsize'] > 0:
191 pageinfo['numgroups'] += 1
192
193 #logging.debug("processPageInfo: pageinfo=%s"%repr(pageinfo)) 174 #logging.debug("processPageInfo: pageinfo=%s"%repr(pageinfo))
194 return 175 return
195 176
196 177
197 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None): 178 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None):