comparison MpdlXmlTextServer.py @ 478:cd37d6f8d5e8 elementtree

more cleanup
author casties
date Fri, 12 Aug 2011 16:41:39 +0200
parents 17f0290b2327
children 7ca8ac7db06e
comparison
equal deleted inserted replaced
477:17f0290b2327 478:cd37d6f8d5e8
87 logging.debug("getSearch()") 87 logging.debug("getSearch()")
88 docpath = docinfo['textURLPath'] 88 docpath = docinfo['textURLPath']
89 url = docinfo['url'] 89 url = docinfo['url']
90 pagesize = pageinfo['queryPageSize'] 90 pagesize = pageinfo['queryPageSize']
91 pn = pageinfo.get('searchPN',1) 91 pn = pageinfo.get('searchPN',1)
92 sn = pageinfo['sn'] 92 sn = pageinfo.get('sn',None) #TODO: is this s now?
93 highlightQuery = pageinfo['highlightQuery'] 93 highlightQuery = pageinfo['highlightQuery']
94 query =pageinfo['query'] 94 query =pageinfo['query']
95 queryType =pageinfo['queryType'] 95 queryType =pageinfo['queryType']
96 viewMode= pageinfo['viewMode'] 96 viewMode= pageinfo['viewMode']
97 tocMode = pageinfo['tocMode'] 97 tocMode = pageinfo['tocMode']
267 267
268 268
269 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None): 269 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None):
270 """returns single page from fulltext""" 270 """returns single page from fulltext"""
271 logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn)) 271 logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn))
272 # check for cached text -- but this shouldn't be called twice 272 # check for cached text -- but ideally this shouldn't be called twice
273 if pageinfo.has_key('textPage'): 273 if pageinfo.has_key('textPage'):
274 logging.debug("getTextPage: using cached text") 274 logging.debug("getTextPage: using cached text")
275 return pageinfo['textPage'] 275 return pageinfo['textPage']
276 276
277 docpath = docinfo['textURLPath'] 277 docpath = docinfo['textURLPath']
285 sn = pageinfo.get('sn', None) 285 sn = pageinfo.get('sn', None)
286 highlightQuery = pageinfo.get('highlightQuery', None) 286 highlightQuery = pageinfo.get('highlightQuery', None)
287 tocMode = pageinfo.get('tocMode', None) 287 tocMode = pageinfo.get('tocMode', None)
288 tocPN = pageinfo.get('tocPN',None) 288 tocPN = pageinfo.get('tocPN',None)
289 characterNormalization = pageinfo.get('characterNormalization', None) 289 characterNormalization = pageinfo.get('characterNormalization', None)
290 selfurl = docinfo['viewerUrl'] 290
291 selfurl = docinfo['viewerUrl']
291 292
292 if mode == "dict" or mode == "text_dict": 293 if mode == "dict" or mode == "text_dict":
293 # dict is called textPollux in the backend 294 # dict is called textPollux in the backend
294 textmode = "textPollux" 295 textmode = "textPollux"
295 elif not mode: 296 elif not mode:
320 pagediv = div 321 pagediv = div
321 break 322 break
322 323
323 # plain text mode 324 # plain text mode
324 if mode == "text": 325 if mode == "text":
326 # get full url assuming documentViewer is parent
327 selfurl = self.getLink()
325 if pagediv is not None: 328 if pagediv is not None:
326 links = pagediv.findall(".//a") 329 links = pagediv.findall(".//a")
327 for l in links: 330 for l in links:
328 href = l.get('href') 331 href = l.get('href')
329 if href and href.startswith('#note-'): 332 if href and href.startswith('#note-'):
330 href = href.replace('#note-',"?mode=%s&url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn)) 333 href = href.replace('#note-',"%s#note-"%selfurl)
331 l.set('href', href) 334 l.set('href', href)
332 335
333 return serialize(pagediv) 336 return serialize(pagediv)
334 337
335 # text-with-links mode 338 # text-with-links mode
336 elif mode == "dict": 339 elif mode == "dict":
337 if pagediv is not None: 340 if pagediv is not None:
341 viewerurl = docinfo['viewerUrl']
342 selfurl = self.getLink()
338 # check all a-tags 343 # check all a-tags
339 links = pagediv.findall(".//a") 344 links = pagediv.findall(".//a")
340 for l in links: 345 for l in links:
341 href = l.get('href') 346 href = l.get('href')
342 347
343 if href: 348 if href:
344 # is link with href 349 # is link with href
345 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'): 350 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'):
346 # is pollux link 351 # is dictionary link - change href (keeping parameters)
347 selfurl = self.absolute_url() 352 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl))
348 # change href 353 # add target to open new page
349 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/head_main_voc'%selfurl))
350 # add target
351 l.set('target', '_blank') 354 l.set('target', '_blank')
352 355
353 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): 356 # TODO: is this needed?
357 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):
354 selfurl = self.absolute_url() 358 selfurl = self.absolute_url()
355 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) 359 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl))
356 l.set('target', '_blank') 360 l.set('target', '_blank')
357 l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") 361 l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
358 l.set('ondblclick', 'popupWin.focus();') 362 l.set('ondblclick', 'popupWin.focus();')
359 363
360 if href.startswith('#note-'): 364 if href.startswith('#note-'):
361 l.set('href', href.replace('#note-',"?mode=%s&url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn))) 365 # note link
366 l.set('href', href.replace('#note-',"%s#note-"%selfurl))
362 367
363 return serialize(pagediv) 368 return serialize(pagediv)
364 369
365 # xml mode 370 # xml mode
366 elif mode == "xml": 371 elif mode == "xml":
387 392
388 return serialize(pagediv) 393 return serialize(pagediv)
389 394
390 return "no text here" 395 return "no text here"
391 396
392 # WTF: is this needed?
393 def getOrigPages(self, docinfo=None, pageinfo=None):
394 logging.debug("CALLED: getOrigPages!")
395 if not pageinfo.has_key('pageNumberOrig'):
396 logging.warning("getOrigPages: not in pageinfo!")
397 return None
398
399 return pageinfo['pageNumberOrig']
400
401 # WTF: is this needed?
402 def getOrigPagesNorm(self, docinfo=None, pageinfo=None):
403 logging.debug("CALLED: getOrigPagesNorm!")
404 if not pageinfo.has_key('pageNumberOrigNorm'):
405 logging.warning("getOrigPagesNorm: not in pageinfo!")
406 return None
407
408 return pageinfo['pageNumberOrigNorm']
409
410 # TODO: should be getWordInfo 397 # TODO: should be getWordInfo
411 def getTranslate(self, word=None, language=None): 398 def getWordInfo(self, word='', language='', display=''):
412 """translate into another languages""" 399 """show information (like dictionaries) about word"""
413 data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&output=html") 400 data = self.getServerData("lt/wordInfo.xql","language=%s&word=%s&display=%s&output=html"%(language,urllib.quote(word),urllib.quote(display)))
414 return data 401 return data
415 402
416 # WTF: what does this do? 403 # WTF: what does this do?
417 def getLemma(self, lemma=None, language=None): 404 def getLemma(self, lemma=None, language=None):
418 """simular words lemma """ 405 """simular words lemma """