Mercurial > hg > documentViewer
comparison MpdlXmlTextServer.py @ 434:44ccb7bd0938
*** empty log message ***
author | abukhman |
---|---|
date | Thu, 24 Feb 2011 14:55:28 +0100 |
parents | 8d8f2cbd6213 |
children | 94baa8dbc7e8 |
comparison
equal
deleted
inserted
replaced
433:24fc850a35e6 | 434:44ccb7bd0938 |
---|---|
181 docpath = docinfo['textURLPath'] | 181 docpath = docinfo['textURLPath'] |
182 path = docinfo['textURLPath'] | 182 path = docinfo['textURLPath'] |
183 url = docinfo.get('url',None) | 183 url = docinfo.get('url',None) |
184 name = docinfo.get('name',None) | 184 name = docinfo.get('name',None) |
185 pn =pageinfo['current'] | 185 pn =pageinfo['current'] |
186 #viewMode= pageinfo['viewMode'] | |
187 | |
188 sn = pageinfo['sn'] | 186 sn = pageinfo['sn'] |
189 highlightQuery = pageinfo['highlightQuery'] | 187 highlightQuery = pageinfo['highlightQuery'] |
190 #mode = pageinfo ['viewMode'] | 188 #mode = pageinfo ['viewMode'] |
191 tocMode = pageinfo['tocMode'] | 189 tocMode = pageinfo['tocMode'] |
192 characterNormalization=pageinfo['characterNormalization'] | 190 characterNormalization=pageinfo['characterNormalization'] |
207 #original Pages | 205 #original Pages |
208 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") | 206 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") |
209 if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): | 207 if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): |
210 if len(pagedivs)>0: | 208 if len(pagedivs)>0: |
211 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) | 209 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) |
212 logging.debug("ORIGINAL PAGES: %s"%(docinfo['pageNumberOrig'])) | 210 #logging.debug("ORIGINAL PAGES: %s"%(docinfo['pageNumberOrig'])) |
213 | 211 |
214 #original Pages Norm | 212 #original Pages Norm |
215 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']") | 213 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']") |
216 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"): | 214 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"): |
217 if len(pagedivs)>0: | 215 if len(pagedivs)>0: |
218 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0]) | 216 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0]) |
219 logging.debug("ORIGINAL PAGES: %s"%(docinfo['pageNumberOrigNorm'])) | 217 #logging.debug("ORIGINAL PAGES: %s"%(docinfo['pageNumberOrigNorm'])) |
220 | 218 |
221 | 219 |
222 #figureEntries | 220 #figureEntries |
223 pagedivs = dom.xpath("//div[@class='countFigureEntries']") | 221 pagedivs = dom.xpath("//div[@class='countFigureEntries']") |
224 if pagedivs == dom.xpath("//div[@class='countFigureEntries']"): | 222 if pagedivs == dom.xpath("//div[@class='countFigureEntries']"): |
226 docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0]) | 224 docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0]) |
227 s = getTextFromNode(pagedivs[0]) | 225 s = getTextFromNode(pagedivs[0]) |
228 if s=='0': | 226 if s=='0': |
229 try: | 227 try: |
230 docinfo['countFigureEntries'] = int(s) | 228 docinfo['countFigureEntries'] = int(s) |
231 logging.debug("FIGURE ENTRIES: %s"%(s)) | 229 #logging.debug("FIGURE ENTRIES: %s"%(s)) |
232 except: | 230 except: |
233 docinfo['countFigureEntries'] = 0 | 231 docinfo['countFigureEntries'] = 0 |
234 else: | 232 else: |
235 s1 = int(s)/30+1 | 233 s1 = int(s)/30+1 |
236 try: | 234 try: |
237 docinfo['countFigureEntries'] = int(s1) | 235 docinfo['countFigureEntries'] = int(s1) |
238 logging.debug("FIGURE ENTRIES: %s"%(s1)) | 236 #logging.debug("FIGURE ENTRIES: %s"%(s1)) |
239 except: | 237 except: |
240 docinfo['countFigureEntries'] = 0 | 238 docinfo['countFigureEntries'] = 0 |
241 | 239 |
242 #allPlaces | 240 #allPlaces |
243 pagedivs = dom.xpath("//div[@class='countPlaces']") | 241 pagedivs = dom.xpath("//div[@class='countPlaces']") |
245 if len(pagedivs)>0: | 243 if len(pagedivs)>0: |
246 docinfo['countPlaces']= getTextFromNode(pagedivs[0]) | 244 docinfo['countPlaces']= getTextFromNode(pagedivs[0]) |
247 s = getTextFromNode(pagedivs[0]) | 245 s = getTextFromNode(pagedivs[0]) |
248 try: | 246 try: |
249 docinfo['countPlaces'] = int(s) | 247 docinfo['countPlaces'] = int(s) |
250 logging.debug("PLACES HERE: %s"%(s)) | 248 #logging.debug("PLACES HERE: %s"%(s)) |
251 except: | 249 except: |
252 docinfo['countPlaces'] = 0 | 250 docinfo['countPlaces'] = 0 |
253 | 251 |
254 #tocEntries | 252 #tocEntries |
255 pagedivs = dom.xpath("//div[@class='countTocEntries']") | 253 pagedivs = dom.xpath("//div[@class='countTocEntries']") |
258 docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0])) | 256 docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0])) |
259 s = getTextFromNode(pagedivs[0]) | 257 s = getTextFromNode(pagedivs[0]) |
260 if s=='0': | 258 if s=='0': |
261 try: | 259 try: |
262 docinfo['countTocEntries'] = int(s) | 260 docinfo['countTocEntries'] = int(s) |
263 logging.debug("TEXT ENTRIES: %s"%(s)) | 261 #logging.debug("TEXT ENTRIES: %s"%(s)) |
264 except: | 262 except: |
265 docinfo['countTocEntries'] = 0 | 263 docinfo['countTocEntries'] = 0 |
266 else: | 264 else: |
267 s1 = int(s)/30+1 | 265 s1 = int(s)/30+1 |
268 try: | 266 try: |
269 docinfo['countTocEntries'] = int(s1) | 267 docinfo['countTocEntries'] = int(s1) |
270 logging.debug("TEXT ENTRIES: %s"%(s1)) | 268 #logging.debug("TEXT ENTRIES: %s"%(s1)) |
271 except: | 269 except: |
272 docinfo['countTocEntries'] = 0 | 270 docinfo['countTocEntries'] = 0 |
273 | 271 |
274 #numTextPages | 272 #numTextPages |
275 pagedivs = dom.xpath("//div[@class='countPages']") | 273 pagedivs = dom.xpath("//div[@class='countPages']") |
278 docinfo['numPages'] = getTextFromNode(pagedivs[0]) | 276 docinfo['numPages'] = getTextFromNode(pagedivs[0]) |
279 s = getTextFromNode(pagedivs[0]) | 277 s = getTextFromNode(pagedivs[0]) |
280 | 278 |
281 try: | 279 try: |
282 docinfo['numPages'] = int(s) | 280 docinfo['numPages'] = int(s) |
283 logging.debug("PAGE NUMBER: %s"%(s)) | 281 #logging.debug("PAGE NUMBER: %s"%(s)) |
284 | 282 |
285 np = docinfo['numPages'] | 283 np = docinfo['numPages'] |
286 pageinfo['end'] = min(pageinfo['end'], np) | 284 pageinfo['end'] = min(pageinfo['end'], np) |
287 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) | 285 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) |
288 if np % pageinfo['groupsize'] > 0: | 286 if np % pageinfo['groupsize'] > 0: |
406 | 404 |
407 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) | 405 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) |
408 pagedom = Parse(pagexml) | 406 pagedom = Parse(pagexml) |
409 numdivs = pagedom.xpath("//div[@class='queryResultHits']") | 407 numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
410 tocSearch = int(getTextFromNode(numdivs[0])) | 408 tocSearch = int(getTextFromNode(numdivs[0])) |
411 logging.debug("documentViewer (gettoc) tocSearch: %s"%(tocSearch)) | 409 #logging.debug("documentViewer (gettoc) tocSearch: %s"%(tocSearch)) |
412 tc=int((tocSearch/10)+1) | 410 tc=int((tocSearch/10)+1) |
413 logging.debug("documentViewer (gettoc) tc: %s"%(tc)) | 411 #logging.debug("documentViewer (gettoc) tc: %s"%(tc)) |
414 return tc | 412 return tc |
415 | 413 |
416 def getToc(self, mode="text", docinfo=None): | 414 def getToc(self, mode="text", docinfo=None): |
417 """loads table of contents and stores in docinfo""" | 415 """loads table of contents and stores in docinfo""" |
418 logging.debug("documentViewer (gettoc) mode: %s"%(mode)) | 416 #logging.debug("documentViewer (gettoc) mode: %s"%(mode)) |
419 if mode == "none": | 417 if mode == "none": |
420 return docinfo | 418 return docinfo |
421 if 'tocSize_%s'%mode in docinfo: | 419 if 'tocSize_%s'%mode in docinfo: |
422 # cached toc | 420 # cached toc |
423 return docinfo | 421 return docinfo |
466 | 464 |
467 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm&optionToggle=1"%(docpath,queryType, pagesize, pn)) | 465 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm&optionToggle=1"%(docpath,queryType, pagesize, pn)) |
468 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&optionToggle=1'%(selfurl,url, viewMode, tocMode, tocPN)) | 466 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&optionToggle=1'%(selfurl,url, viewMode, tocMode, tocPN)) |
469 text = page.replace('mode=image','mode=texttool') | 467 text = page.replace('mode=image','mode=texttool') |
470 #logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) | 468 #logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) |
471 logging.debug("TEXT %s"%(text)) | 469 #logging.debug("TEXT %s"%(text)) |
472 return text | 470 return text |
473 | 471 |
474 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): | 472 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |
475 """change settings""" | 473 """change settings""" |
476 self.title=title | 474 self.title=title |