Changeset 455:0a53fea83df7 in documentViewer
- Timestamp:
- Jul 15, 2011, 7:34:41 PM (13 years ago)
- Branch:
- elementtree
- Files:
-
- 1 added
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
MpdlXmlTextServer.py
r453 r455 2 2 from OFS.SimpleItem import SimpleItem 3 3 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 4 4 5 from Ft.Xml import EMPTY_NAMESPACE, Parse 5 6 from Ft.Xml.Domlette import NonvalidatingReader … … 9 10 import xml.etree.ElementTree as ET 10 11 11 import md5 12 import sys 12 import re 13 13 import logging 14 14 import urllib 15 15 import documentViewer 16 16 #from documentViewer import getTextFromNode, serializeNode 17 18 def intOr0(s, default=0): 19 """convert s to int or return default""" 20 try: 21 return int(s) 22 except: 23 return default 17 24 18 25 def getText(node): … … 45 52 return "" 46 53 # ET: 47 #text = node.text or ""48 #for e in node:49 #text += gettext(e)50 #if e.tail:51 #text += e.tail54 # text = node.text or "" 55 # for e in node: 56 # text += gettext(e) 57 # if e.tail: 58 # text += e.tail 52 59 53 60 # 4Suite: … … 83 90 manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals()) 84 91 85 def __init__(self,id,title="",serverUrl="http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40): 86 #def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/", serverName=None, timeout=40): 92 def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40): 87 93 88 94 """constructor""" … … 104 110 return documentViewer.getHttpData(url,data,timeout=self.timeout) 105 111 112 # WTF: what does this really do? can it be integrated in getPage? 106 113 def getSearch(self, pageinfo=None, docinfo=None): 107 114 """get search list""" 115 logging.debug("getSearch()") 108 116 docpath = docinfo['textURLPath'] 109 117 url = docinfo['url'] … … 208 216 myList= "" 209 217 text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn)) 210 dom = Parse(text)211 result = dom. xpath("//result/resultPage/place")218 dom = ET.fromstring(text) 219 result = dom.findall(".//result/resultPage/place") 212 220 for l in result: 213 hrefNode= l.getAttributeNodeNS(None, u"id") 214 href= hrefNode.nodeValue 221 href = l.get("id") 215 222 hrefList.append(href) 223 # WTF: what does this do? 216 224 myList = ",".join(hrefList) 217 225 #logging.debug("getGisPlaces :%s"%(myList)) … … 228 236 myList="" 229 237 text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath)) 230 dom = Parse(text)231 result = dom. xpath("//result/resultPage/place")238 dom = ET.fromstring(text) 239 result = dom.findall(".//result/resultPage/place") 232 240 233 241 for l in result: 234 hrefNode = l.getAttributeNodeNS(None, u"id") 235 href= hrefNode.nodeValue 242 href = l.get("id") 236 243 hrefList.append(href) 244 # WTF: what does this do? 237 245 myList = ",".join(hrefList) 238 246 #logging.debug("getALLGisPlaces :%s"%(myList)) 239 247 return myList 240 248 241 242 def getTextPage(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None): 243 """returns single page from fulltext""" 244 docpath = docinfo['textURLPath'] 245 path = docinfo['textURLPath'] 246 url = docinfo.get('url',None) 247 name = docinfo.get('name',None) 248 pn =pageinfo['current'] 249 sn = pageinfo['sn'] 250 #optionToggle =pageinfo ['optionToggle'] 251 highlightQuery = pageinfo['highlightQuery'] 252 #mode = pageinfo ['viewMode'] 253 tocMode = pageinfo['tocMode'] 254 characterNormalization=pageinfo['characterNormalization'] 255 tocPN = pageinfo['tocPN'] 256 selfurl = self.absolute_url() 257 if mode == "text_dict": 258 textmode = "textPollux" 259 else: 260 textmode = mode 261 262 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) 263 if highlightQuery is not None: 264 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) 265 266 pagexml = self.getServerData("page-fragment.xql",textParam) 267 dom = ET.fromstring(pagexml) 268 #dom = NonvalidatingReader.parseStream(pagexml) 269 270 #original Pages 271 #pagedivs = dom.xpath("//div[@class='pageNumberOrig']") 272 273 """if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): 274 if len(pagedivs)>0: 275 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) 276 logging.debug("ORIGINAL PAGE: %s"%(docinfo['pageNumberOrig'])) 277 278 #original Pages Norm 279 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']") 280 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"): 281 if len(pagedivs)>0: 282 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0]) 283 logging.debug("ORIGINAL PAGE NORM: %s"%(docinfo['pageNumberOrigNorm'])) 284 """ 285 #figureEntries 286 # pagedivs = dom.xpath("//div[@class='countFigureEntries']") 287 # if pagedivs == dom.xpath("//div[@class='countFigureEntries']"): 288 # if len(pagedivs)>0: 289 # docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0]) 290 # s = getTextFromNode(pagedivs[0]) 291 # if s=='0': 292 # try: 293 # docinfo['countFigureEntries'] = int(s) 294 # except: 295 # docinfo['countFigureEntries'] = 0 296 # else: 297 # s1 = int(s)/30+1 298 # try: 299 # docinfo['countFigureEntries'] = int(s1) 300 # except: 301 # docinfo['countFigureEntries'] = 0 302 # 303 # #allPlaces 304 # pagedivs = dom.xpath("//div[@class='countPlaces']") 305 # if pagedivs == dom.xpath("//div[@class='countPlaces']"): 306 # if len(pagedivs)>0: 307 # docinfo['countPlaces']= getTextFromNode(pagedivs[0]) 308 # s = getTextFromNode(pagedivs[0]) 309 # try: 310 # docinfo['countPlaces'] = int(s) 311 # except: 312 # docinfo['countPlaces'] = 0 313 # 314 # #tocEntries 315 # pagedivs = dom.xpath("//div[@class='countTocEntries']") 316 # if pagedivs == dom.xpath("//div[@class='countTocEntries']"): 317 # if len(pagedivs)>0: 318 # docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0])) 319 # s = getTextFromNode(pagedivs[0]) 320 # if s=='0': 321 # try: 322 # docinfo['countTocEntries'] = int(s) 323 # except: 324 # docinfo['countTocEntries'] = 0 325 # else: 326 # s1 = int(s)/30+1 327 # try: 328 # docinfo['countTocEntries'] = int(s1) 329 # except: 330 # docinfo['countTocEntries'] = 0 331 332 #numTextPages 333 #pagedivs = dom.xpath("//div[@class='countPages']") 249 def processPageInfo(self, dom, docinfo, pageinfo): 250 """processes page info divs from dom and stores in docinfo and pageinfo""" 251 # process all toplevel divs 334 252 alldivs = dom.findall(".//div") 335 253 pagediv = None 336 254 for div in alldivs: 337 255 dc = div.get('class') 256 257 # page content div 338 258 if dc == 'pageContent': 339 259 pagediv = div 260 261 # pageNumberOrig 262 elif dc == 'pageNumberOrig': 263 pageinfo['pageNumberOrig'] = div.text 340 264 341 if dc == 'countPages': 342 try: 343 np = int(div.text) 344 docinfo['numPages'] = np 345 pageinfo['end'] = min(pageinfo['end'], np) 346 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) 347 if np % pageinfo['groupsize'] > 0: 348 pageinfo['numgroups'] += 1 349 350 except: 351 docinfo['numPages'] = 0 352 265 # pageNumberOrigNorm 266 elif dc == 'pageNumberOrigNorm': 267 pageinfo['pageNumberOrigNorm'] = div.text 268 269 # pageNumberOrigNorm 270 elif dc == 'countFigureEntries': 271 docinfo['countFigureEntries'] = intOr0(div.text) 272 273 # pageNumberOrigNorm 274 elif dc == 'countTocEntries': 275 # WTF: s1 = int(s)/30+1 276 docinfo['countTocEntries'] = intOr0(div.text) 277 278 # numTextPages 279 elif dc == 'countPages': 280 np = intOr0(div.text) 281 if np > 0: 282 docinfo['numTextPages'] = np 283 if docinfo.get('numPages', 0) == 0: 284 # seems to be text-only 285 docinfo['numTextPages'] = np 286 pageinfo['end'] = min(pageinfo['end'], np) 287 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) 288 if np % pageinfo['groupsize'] > 0: 289 pageinfo['numgroups'] += 1 290 291 return 292 293 294 def getTextPage(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None): 295 """returns single page from fulltext""" 296 logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn)) 297 # check for cached text -- but this shouldn't be called twice 298 if pageinfo.has_key('textPage'): 299 logging.debug("getTextPage: using cached text") 300 return pageinfo['textPage'] 301 302 docpath = docinfo['textURLPath'] 303 # just checking 304 if pageinfo['current'] != pn: 305 logging.warning("getTextPage: current!=pn!") 306 307 # stuff for constructing full urls 308 url = docinfo['url'] 309 urlmode = docinfo['mode'] 310 sn = pageinfo.get('sn', None) 311 highlightQuery = pageinfo.get('highlightQuery', None) 312 tocMode = pageinfo.get('tocMode', None) 313 tocPN = pageinfo.get('tocPN',None) 314 characterNormalization = pageinfo.get('characterNormalization', None) 315 selfurl = docinfo['viewerUrl'] 316 317 if mode == "text_dict": 318 # text_dict is called textPollux in the backend 319 textmode = "textPollux" 320 else: 321 textmode = mode 322 323 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) 324 if highlightQuery: 325 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) 326 327 # fetch the page 328 pagexml = self.getServerData("page-fragment.xql",textParam) 329 dom = ET.fromstring(pagexml) 330 # extract additional info 331 self.processPageInfo(dom, docinfo, pageinfo) 332 # page content is in <div class="pageContent"> 333 pagediv = None 334 # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent'] 335 alldivs = dom.findall(".//div") 336 for div in alldivs: 337 dc = div.get('class') 338 # page content div 339 if dc == 'pageContent': 340 pagediv = div 353 341 break 354 355 # ROC: why?356 # else:357 # #no full text -- init to 0358 # docinfo['pageNumberOrig'] = 0359 # docinfo['countFigureEntries'] = 0360 # docinfo['countPlaces'] = 0361 # docinfo['countTocEntries'] = 0362 # docinfo['numPages'] = 0363 # docinfo['pageNumberOrigNorm'] = 0364 # #return docinfo365 342 366 343 # plain text mode 367 344 if mode == "text": 368 #pagedivs = dom.xpath("/div")369 345 if pagediv: 370 346 links = pagediv.findall(".//a") … … 372 348 href = l.get('href') 373 349 if href and href.startswith('#note-'): 374 href = href.replace('#note-',"? url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn))350 href = href.replace('#note-',"?mode=%s&url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn)) 375 351 l.set('href', href) 376 logging.debug("page=%s"%ET.tostring(pagediv, 'UTF-8')) 352 377 353 return serialize(pagediv) 378 354 379 if mode == "xml":380 if pagediv:381 return serialize(pagediv)382 383 if mode == "pureXml":384 if pagediv:385 return serialize(pagediv)386 387 if mode == "gis":388 if pagediv:389 # check all a-tags390 links = pagediv.findall(".//a")391 for l in links:392 href = l.get('href')393 if href:394 if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'):395 l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name))396 l.set('target', '_blank')397 398 return serialize(pagenode)399 400 355 # text-with-links mode 401 if mode == "text_dict":356 elif mode == "text_dict": 402 357 if pagediv: 403 358 # check all a-tags … … 424 379 425 380 if href.startswith('#note-'): 426 l.set('href', href.replace('#note-',"? url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn)))381 l.set('href', href.replace('#note-',"?mode=%s&url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn))) 427 382 428 383 return serialize(pagediv) 429 384 385 # xml mode 386 elif mode == "xml": 387 if pagediv: 388 return serialize(pagediv) 389 390 # pureXml mode 391 elif mode == "pureXml": 392 if pagediv: 393 return serialize(pagediv) 394 395 # gis mode 396 elif mode == "gis": 397 name = docinfo['name'] 398 if pagediv: 399 # check all a-tags 400 links = pagediv.findall(".//a") 401 for l in links: 402 href = l.get('href') 403 if href: 404 if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'): 405 l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name)) 406 l.set('target', '_blank') 407 408 return serialize(pagediv) 409 430 410 return "no text here" 431 411 412 # WTF: is this needed? 432 413 def getOrigPages(self, docinfo=None, pageinfo=None): 433 docpath = docinfo['textURLPath'] 434 pn =pageinfo['current'] 435 selfurl = self.absolute_url() 436 pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn)) 437 dom = Parse(pagexml) 438 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") 439 if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): 440 if len(pagedivs)>0: 441 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) 442 return docinfo['pageNumberOrig'] 443 414 logging.debug("CALLED: getOrigPages!") 415 if not pageinfo.has_key('pageNumberOrig'): 416 logging.warning("getOrigPages: not in pageinfo!") 417 return None 418 419 return pageinfo['pageNumberOrig'] 420 421 # WTF: is this needed? 444 422 def getOrigPagesNorm(self, docinfo=None, pageinfo=None): 445 docpath = docinfo['textURLPath'] 446 pn =pageinfo['current'] 447 selfurl = self.absolute_url() 448 pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn)) 449 dom = Parse(pagexml) 450 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']") 451 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"): 452 if len(pagedivs)>0: 453 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0]) 454 return docinfo['pageNumberOrigNorm'] 455 423 logging.debug("CALLED: getOrigPagesNorm!") 424 if not pageinfo.has_key('pageNumberOrigNorm'): 425 logging.warning("getOrigPagesNorm: not in pageinfo!") 426 return None 427 428 return pageinfo['pageNumberOrigNorm'] 456 429 430 # TODO: should be getWordInfo 457 431 def getTranslate(self, word=None, language=None): 458 432 """translate into another languages""" 459 433 data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&output=html") 460 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))461 434 return data 462 435 436 # WTF: what does this do? 463 437 def getLemma(self, lemma=None, language=None): 464 438 """simular words lemma """ … … 466 440 return data 467 441 442 # WTF: what does this do? 468 443 def getLemmaQuery(self, query=None, language=None): 469 444 """simular words lemma """ … … 471 446 return data 472 447 448 # WTF: what does this do? 473 449 def getLex(self, query=None, language=None): 474 450 #simular words lemma 475 451 data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query)) 476 452 return data 477 453 454 # WTF: what does this do? 478 455 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): 479 456 #number of … … 494 471 495 472 def getToc(self, mode="text", docinfo=None): 496 """loads table of contents and stores in docinfo""" 473 """loads table of contents and stores XML in docinfo""" 474 logging.debug("getToc mode=%s"%mode) 497 475 if mode == "none": 498 return docinfo 476 return docinfo 477 499 478 if 'tocSize_%s'%mode in docinfo: 500 479 # cached toc … … 512 491 tocSize = 0 513 492 tocDiv = None 514 493 # fetch full toc 515 494 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) 516 517 # post-processing downloaded xml 518 pagedom = Parse(pagexml) 519 # get number of entries 520 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 521 if len(numdivs) > 0: 522 tocSize = int(getTextFromNode(numdivs[0])) 523 docinfo['tocSize_%s'%mode] = tocSize 495 dom = ET.fromstring(pagexml) 496 # page content is in <div class="queryResultPage"> 497 pagediv = None 498 # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage'] 499 alldivs = dom.findall("div") 500 for div in alldivs: 501 dc = div.get('class') 502 # page content div 503 if dc == 'queryResultPage': 504 pagediv = div 505 506 elif dc == 'queryResultHits': 507 docinfo['tocSize_%s'%mode] = intOr0(div.text) 508 509 if pagediv: 510 # # split xml in chunks 511 # tocs = [] 512 # tocdivs = pagediv.findall('div') 513 # for p in zip(tocdivs[::2], tocdivs[1::2]): 514 # toc = serialize(p[0]) 515 # toc += serialize(p[1]) 516 # tocs.append(toc) 517 # logging.debug("pair: %s"%(toc)) 518 # store XML in docinfo 519 docinfo['tocXML_%s'%mode] = ET.tostring(pagediv, 'UTF-8') 520 524 521 return docinfo 525 522 526 523 def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None): 527 524 """returns single page from the table of contents""" 528 # TODO: this should use the cached TOC525 logging.debug("getTocPage mode=%s, pn=%s"%(mode,pn)) 529 526 if mode == "text": 530 527 queryType = "toc" 531 528 else: 532 529 queryType = mode 533 docpath = docinfo['textURLPath'] 534 path = docinfo['textURLPath'] 535 pagesize = pageinfo['tocPageSize'] 536 pn = pageinfo['tocPN'] 530 531 # check for cached TOC 532 if not docinfo.has_key('tocXML_%s'%mode): 533 self.getToc(mode=mode, docinfo=docinfo) 534 535 tocxml = docinfo.get('tocXML_%s'%mode, None) 536 if not tocxml: 537 logging.error("getTocPage: unable to find tocXML") 538 return "No ToC" 539 540 pagesize = int(pageinfo['tocPageSize']) 537 541 url = docinfo['url'] 538 selfurl = self.absolute_url() 542 urlmode = docinfo['mode'] 543 selfurl = docinfo['viewerUrl'] 539 544 viewMode= pageinfo['viewMode'] 540 characterNormalization = pageinfo ['characterNormalization']541 #optionToggle =pageinfo ['optionToggle']542 545 tocMode = pageinfo['tocMode'] 543 tocPN = pageinfo['tocPN'] 544 545 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm"%(docpath,queryType, pagesize, pn)) 546 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) 547 text = page.replace('mode=image','mode=texttool') 548 return text 546 tocPN = int(pageinfo['tocPN']) 547 548 fulltoc = ET.fromstring(tocxml) 549 550 if fulltoc: 551 # paginate 552 #start = (pn - 1) * pagesize * 2 553 #end = start + pagesize * 2 554 #tocdivs = fulltoc[start:end] 555 tocdivs = fulltoc 556 557 # check all a-tags 558 links = tocdivs.findall(".//a") 559 for l in links: 560 href = l.get('href') 561 if href: 562 # take pn from href 563 m = re.match(r'page-fragment\.xql.*pn=(\d+)', href) 564 if m is not None: 565 # and create new url 566 l.set('href', '%s?mode=%s&url=%s&viewMode=%s&pn=%s&tocMode=%s&tocPN=%s'%(selfurl, urlmode, url, viewMode, m.group(1), tocMode, tocPN)) 567 else: 568 logging.warning("getTocPage: Problem with link=%s"%href) 569 570 return serialize(tocdivs) 571 549 572 550 573 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): 551 #def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):552 574 """change settings""" 553 575 self.title=title … … 570 592 if RESPONSE is not None: 571 593 RESPONSE.redirect('manage_main') 594 595 -
documentViewer.py
r454 r455 355 355 # get table of contents 356 356 docinfo = self.getToc(mode=tocMode, docinfo=docinfo) 357 358 if viewMode=="auto": # automodus gewaehlt 359 if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert 357 358 # auto viewMode: text_dict if text else images 359 if viewMode=="auto": 360 if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 361 #texturl gesetzt und textViewer konfiguriert 360 362 viewMode="text_dict" 361 363 else: 362 364 viewMode="images" 363 365 364 pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) 365 366 if (docinfo.get('textURLPath',None)): 367 page = self.getTextPage(mode=viewMode, docinfo=docinfo, pageinfo=pageinfo) 366 pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode) 367 368 if viewMode != 'images' and docinfo.get('textURLPath', None): 369 # get full text page 370 page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo) 368 371 pageinfo['textPage'] = page 369 tt = getattr(self, 'template') 370 pt = getattr(tt, 'viewer_main') 371 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) 372 373 # get template /template/viewer_main 374 pt = getattr(self.template, 'viewer_main') 375 # and execute with parameters 376 return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk)) 372 377 373 378 def generateMarks(self,mk): … … 867 872 # check if its still current 868 873 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: 869 logging.debug("documentViewer (getdocinfo) docinfo in session : %s"%docinfo)874 logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys()) 870 875 return docinfo 876 871 877 # new docinfo 872 878 docinfo = {'mode': mode, 'url': url} 873 if mode=="texttool": #index.meta with texttool information 879 # add self url 880 docinfo['viewerUrl'] = self.getDocumentViewerURL() 881 if mode=="texttool": 882 # index.meta with texttool information 874 883 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) 875 884 elif mode=="imagepath": 885 # folder with images, index.meta optional 876 886 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) 877 887 elif mode=="filepath": 888 # filename 878 889 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) 879 890 else: -
documentViewer_old.py
r453 r455 10 10 from Ft.Xml import EMPTY_NAMESPACE, Parse 11 11 import Ft.Xml.Domlette 12 13 import xml.etree.ElementTree as ET14 15 12 import os.path 16 13 import sys … … 36 33 return int(default) 37 34 38 def getTextFromNode(node ):35 def getTextFromNode(nodename): 39 36 """get the cdata content of a node""" 40 if node is None:37 if nodename is None: 41 38 return "" 42 # ET: 43 text = node.text or "" 44 for e in node: 45 text += gettext(e) 46 if e.tail: 47 text += e.tail 48 49 # 4Suite: 50 #nodelist=node.childNodes 51 #text = "" 52 #for n in nodelist: 53 # if n.nodeType == node.TEXT_NODE: 54 # text = text + n.data 55 56 return text 39 nodelist=nodename.childNodes 40 rc = "" 41 for node in nodelist: 42 if node.nodeType == node.TEXT_NODE: 43 rc = rc + node.data 44 return rc 57 45 58 46 def serializeNode(node, encoding="utf-8"): 59 47 """returns a string containing node as XML""" 60 s = ET.tostring(node) 61 62 # 4Suite: 63 # stream = cStringIO.StringIO() 64 # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) 65 # s = stream.getvalue() 66 # stream.close() 48 stream = cStringIO.StringIO() 49 #logging.debug("BUF: %s"%(stream)) 50 Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) 51 s = stream.getvalue() 52 #logging.debug("BUF: %s"%(s)) 53 stream.close() 67 54 return s 68 55 … … 510 497 raise IOError("Unable to get dir-info from %s"%(infoUrl)) 511 498 512 dom = ET.fromstring(txt).getroot() 513 #dom = Parse(txt) 514 sizes=dom.find("//dir/size") 515 #sizes=dom.xpath("//dir/size") 499 dom = Parse(txt) 500 sizes=dom.xpath("//dir/size") 516 501 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes) 517 502 … … 562 547 raise IOError("Unable to read index meta from %s"%(url)) 563 548 564 dom = ET.fromstring(txt).getroot() 565 #dom = Parse(txt) 549 dom = Parse(txt) 566 550 return dom 567 551 … … 582 566 raise IOError("Unable to read infoXMLfrom %s"%(url)) 583 567 584 dom = ET.fromstring(txt).getroot() 585 #dom = Parse(txt) 568 dom = Parse(txt) 586 569 return dom 587 570 … … 601 584 dom = self.getDomFromIndexMeta(path) 602 585 603 acctype = dom.find("//access-conditions/access/@type") 604 #acctype = dom.xpath("//access-conditions/access/@type") 586 acctype = dom.xpath("//access-conditions/access/@type") 605 587 if acctype and (len(acctype)>0): 606 588 access=acctype[0].value … … 628 610 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) 629 611 # put in all raw bib fields as dict "bib" 630 bib = dom.find("//bib/*") 631 #bib = dom.xpath("//bib/*") 612 bib = dom.xpath("//bib/*") 632 613 if bib and len(bib)>0: 633 614 bibinfo = {} … … 638 619 # extract some fields (author, title, year) according to their mapping 639 620 metaData=self.metadata.main.meta.bib 640 bibtype=dom.find("//bib/@type") 641 #bibtype=dom.xpath("//bib/@type") 621 bibtype=dom.xpath("//bib/@type") 642 622 if bibtype and (len(bibtype)>0): 643 623 bibtype=bibtype[0].value
Note: See TracChangeset
for help on using the changeset viewer.