Changeset 453:beb7ccb92564 in documentViewer for documentViewer.py
- Timestamp:
- Jul 14, 2011, 5:43:56 PM (13 years ago)
- Branch:
- elementtree
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
documentViewer.py
r405 r453 8 8 from Products.zogiLib.zogiLib import browserCheck 9 9 10 from Ft.Xml import EMPTY_NAMESPACE, Parse 11 import Ft.Xml.Domlette 10 #from Ft.Xml import EMPTY_NAMESPACE, Parse 11 #import Ft.Xml.Domlette 12 13 import xml.etree.ElementTree as ET 14 12 15 import os.path 13 16 import sys … … 17 20 import math 18 21 import urlparse 19 import cStringIO20 22 import re 21 23 import string … … 33 35 return int(default) 34 36 35 def getText FromNode(nodename):37 def getText(node): 36 38 """get the cdata content of a node""" 37 if node nameis None:39 if node is None: 38 40 return "" 39 nodelist=nodename.childNodes 40 rc = "" 41 for node in nodelist: 42 if node.nodeType == node.TEXT_NODE: 43 rc = rc + node.data 44 return rc 45 46 def serializeNode(node, encoding='utf-8'): 41 # ET: 42 text = node.text or "" 43 for e in node: 44 text += gettext(e) 45 if e.tail: 46 text += e.tail 47 48 # 4Suite: 49 #nodelist=node.childNodes 50 #text = "" 51 #for n in nodelist: 52 # if n.nodeType == node.TEXT_NODE: 53 # text = text + n.data 54 55 return text 56 57 getTextFromNode = getText 58 59 def serializeNode(node, encoding="utf-8"): 47 60 """returns a string containing node as XML""" 48 buf = cStringIO.StringIO() 49 Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding) 50 s = buf.getvalue() 51 buf.close() 61 s = ET.tostring(node) 62 63 # 4Suite: 64 # stream = cStringIO.StringIO() 65 # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) 66 # s = stream.getvalue() 67 # stream.close() 52 68 return s 53 69 … … 59 75 bt['isIE'] = False 60 76 bt['isN4'] = False 77 bt['versFirefox']="" 78 bt['versIE']="" 79 bt['versSafariChrome']="" 80 bt['versOpera']="" 81 61 82 if string.find(ua, 'MSIE') > -1: 62 83 bt['isIE'] = True 63 84 else: 64 85 bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1) 65 86 # Safari oder Chrome identification 87 try: 88 nav = ua[string.find(ua, '('):] 89 nav1=ua[string.find(ua,')'):] 90 nav2=nav1[string.find(nav1,'('):] 91 nav3=nav2[string.find(nav2,')'):] 92 ie = string.split(nav, "; ")[1] 93 ie1 =string.split(nav1, " ")[2] 94 ie2 =string.split(nav3, " ")[1] 95 ie3 =string.split(nav3, " ")[2] 96 if string.find(ie3, "Safari") >-1: 97 bt['versSafariChrome']=string.split(ie2, "/")[1] 98 except: pass 99 # IE identification 66 100 try: 67 101 nav = ua[string.find(ua, '('):] … … 69 103 if string.find(ie, "MSIE") > -1: 70 104 bt['versIE'] = string.split(ie, " ")[1] 71 except: pass 105 except:pass 106 # Firefox identification 107 try: 108 nav = ua[string.find(ua, '('):] 109 nav1=ua[string.find(ua,')'):] 110 if string.find(ie1, "Firefox") >-1: 111 nav5= string.split(ie1, "/")[1] 112 logging.debug("FIREFOX: %s"%(nav5)) 113 bt['versFirefox']=nav5[0:3] 114 except:pass 115 #Opera identification 116 try: 117 if string.find(ua,"Opera") >-1: 118 nav = ua[string.find(ua, '('):] 119 nav1=nav[string.find(nav,')'):] 120 bt['versOpera']=string.split(nav1,"/")[2] 121 except:pass 72 122 73 123 bt['isMac'] = string.find(ua, 'Macintosh') > -1 … … 128 178 #return None 129 179 130 131 132 180 ## 133 181 ## documentViewer class … … 148 196 toc_figures = PageTemplateFile('zpt/toc_figures', globals()) 149 197 page_main_images = PageTemplateFile('zpt/page_main_images', globals()) 198 page_main_double = PageTemplateFile('zpt/page_main_double', globals()) 150 199 page_main_text = PageTemplateFile('zpt/page_main_text', globals()) 151 200 page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) … … 197 246 return self.template.fulltextclient.getTextPage(**args) 198 247 248 def getOrigPages(self, **args): 249 """get page""" 250 return self.template.fulltextclient.getOrigPages(**args) 251 252 def getOrigPagesNorm(self, **args): 253 """get page""" 254 return self.template.fulltextclient.getOrigPagesNorm(**args) 255 199 256 def getQuery(self, **args): 200 """get query """257 """get query in search""" 201 258 return self.template.fulltextclient.getQuery(**args) 202 203 def getQueryResultHits(self, **args): 204 """get query""" 205 return self.template.fulltextclient.getQueryResultHits(**args) 206 207 def getQueryResultHitsText(self, **args): 208 """get query""" 209 return self.template.fulltextclient.getQueryResultHitsText(**args) 210 211 def getQueryResultHitsFigures(self, **args): 212 """get query""" 213 return self.template.fulltextclient.getQueryResultHitsFigures(**args) 214 215 def getPDF(self, **args): 216 """get query""" 217 return self.template.fulltextclient.getPDF(**args) 218 259 219 260 def getSearch(self, **args): 220 261 """get search""" … … 228 269 """get all gis places """ 229 270 return self.template.fulltextclient.getAllGisPlaces(**args) 230 231 def getOrigPages(self, **args): 232 """get original page number """ 233 return self.template.fulltextclient.getOrigPages(**args) 234 235 def getNumPages(self, docinfo): 236 """get numpages""" 237 return self.template.fulltextclient.getNumPages(docinfo) 238 239 def getNumTextPages(self, docinfo): 240 """get numpages text""" 241 return self.template.fulltextclient.getNumTextPages(docinfo) 242 271 243 272 def getTranslate(self, **args): 244 273 """get translate""" … … 248 277 """get lemma""" 249 278 return self.template.fulltextclient.getLemma(**args) 279 280 def getLemmaQuery(self, **args): 281 """get query""" 282 return self.template.fulltextclient.getLemmaQuery(**args) 283 284 def getLex(self, **args): 285 """get lex""" 286 return self.template.fulltextclient.getLex(**args) 250 287 251 288 def getToc(self, **args): … … 284 321 285 322 if viewMode=="auto": # automodus gewaehlt 286 if docinfo.has_key("textURL") or docinfo. has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert323 if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert 287 324 viewMode="text" 288 325 else: … … 320 357 321 358 if viewMode=="auto": # automodus gewaehlt 322 if docinfo.has_key('textURL') or docinfo. has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert359 if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert 323 360 viewMode="text_dict" 324 361 else: … … 327 364 pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) 328 365 329 pt = getattr(self.template, 'viewer_main') 366 if (docinfo.get('textURLPath',None)): 367 page = self.getTextPage(mode=viewMode, docinfo=docinfo, pageinfo=pageinfo) 368 pageinfo['textPage'] = page 369 tt = getattr(self, 'template') 370 pt = getattr(tt, 'viewer_main') 330 371 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) 331 372 … … 343 384 def getBrowser(self): 344 385 """getBrowser the version of browser """ 345 names="" 346 names = browserCheck(self) 347 #logging.debug("XXXXXXXXXXXXXXXX: %s"%names) 348 return names 386 bt = browserCheck(self) 387 logging.debug("BROWSER VERSION: %s"%(bt)) 388 return bt 349 389 350 390 def findDigilibUrl(self): … … 365 405 return style 366 406 367 def getLink(self,param=None,val=None): 368 """link to documentviewer with parameter param set to val""" 369 params=self.REQUEST.form.copy() 407 def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'): 408 """returns URL to documentviewer with parameter param set to val or from dict params""" 409 # copy existing request params 410 urlParams=self.REQUEST.form.copy() 411 # change single param 370 412 if param is not None: 371 413 if val is None: 372 if params.has_key(param):373 del params[param]414 if urlParams.has_key(param): 415 del urlParams[param] 374 416 else: 375 params[param] = str(val)417 urlParams[param] = str(val) 376 418 377 if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 378 params["mode"] = "imagepath" 379 params["url"] = getParentDir(params["url"]) 419 # change more params 420 if params is not None: 421 for k in params.keys(): 422 v = params[k] 423 if v is None: 424 # val=None removes param 425 if urlParams.has_key(k): 426 del urlParams[k] 427 428 else: 429 urlParams[k] = v 430 431 # FIXME: does this belong here? 432 if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 433 urlParams["mode"] = "imagepath" 434 urlParams["url"] = getParentDir(urlParams["url"]) 380 435 381 # quote values and assemble into query string 382 #ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 383 ps = urllib.urlencode(params) 384 url=self.REQUEST['URL1']+"?"+ps 436 # quote values and assemble into query string (not escaping '/') 437 ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()]) 438 #ps = urllib.urlencode(urlParams) 439 if baseUrl is None: 440 baseUrl = self.REQUEST['URL1'] 441 442 url = "%s?%s"%(baseUrl, ps) 385 443 return url 386 444 387 def getLinkAmp(self,param=None,val=None): 445 446 def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None): 388 447 """link to documentviewer with parameter param set to val""" 389 params=self.REQUEST.form.copy() 390 if param is not None: 391 if val is None: 392 if params.has_key(param): 393 del params[param] 394 else: 395 params[param] = str(val) 396 397 # quote values and assemble into query string 398 logging.debug("XYXXXXX: %s"%repr(params.items())) 399 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 400 url=self.REQUEST['URL1']+"?"+ps 401 return url 448 return self.getLink(param, val, params, baseUrl, '&') 402 449 403 450 def getInfo_xml(self,url,mode): … … 464 511 raise IOError("Unable to get dir-info from %s"%(infoUrl)) 465 512 466 dom = Parse(txt) 467 sizes=dom.xpath("//dir/size") 468 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes) 469 470 if sizes: 471 docinfo['numPages'] = int(getTextFromNode(sizes[0])) 513 dom = ET.fromstring(txt) 514 #dom = Parse(txt) 515 size=getText(dom.find("size")) 516 #sizes=dom.xpath("//dir/size") 517 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size) 518 519 if size: 520 docinfo['numPages'] = int(size) 472 521 else: 473 522 docinfo['numPages'] = 0 … … 514 563 raise IOError("Unable to read index meta from %s"%(url)) 515 564 516 dom = Parse(txt) 565 dom = ET.fromstring(txt) 566 #dom = Parse(txt) 517 567 return dom 518 568 … … 533 583 raise IOError("Unable to read infoXMLfrom %s"%(url)) 534 584 535 dom = Parse(txt) 585 dom = ET.fromstring(txt) 586 #dom = Parse(txt) 536 587 return dom 537 588 … … 551 602 dom = self.getDomFromIndexMeta(path) 552 603 553 acctype = dom.xpath("//access-conditions/access/@type") 554 if acctype and (len(acctype)>0): 555 access=acctype[0].value 556 if access in ['group', 'institution']: 557 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() 604 acc = dom.find(".//access-conditions/access") 605 if acc is not None: 606 acctype = acc.get('type') 607 #acctype = dom.xpath("//access-conditions/access/@type") 608 if acctype: 609 access=acctype 610 if access in ['group', 'institution']: 611 access = dom.find(".//access-conditions/access/name").text.lower() 558 612 559 613 docinfo['accessType'] = access … … 577 631 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) 578 632 # put in all raw bib fields as dict "bib" 579 bib = dom.xpath("//bib/*") 633 bib = dom.find(".//bib/*") 634 #bib = dom.xpath("//bib/*") 580 635 if bib and len(bib)>0: 581 636 bibinfo = {} … … 586 641 # extract some fields (author, title, year) according to their mapping 587 642 metaData=self.metadata.main.meta.bib 588 bib type=dom.xpath("//bib/@type")589 if bibtype and (len(bibtype)>0):590 bibtype=bibtype[0].value591 else:643 bib = dom.find(".//bib") 644 bibtype=bib.get("type") 645 #bibtype=dom.xpath("//bib/@type") 646 if not bibtype: 592 647 bibtype="generic" 593 648 … … 598 653 logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype)) 599 654 # if there is no mapping bibmap is empty (mapping sometimes has empty fields) 600 if len(bibmap) > 0 and len(bibmap['author'][0]) > 0 :655 if len(bibmap) > 0 and len(bibmap['author'][0]) > 0 or len(bibmap['title'][0]) > 0: 601 656 try: 602 docinfo['author']=getText FromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])657 docinfo['author']=getText(bib.find(bibmap['author'][0])) 603 658 except: pass 604 659 try: 605 docinfo['title']=getText FromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])660 docinfo['title']=getText(bib.find(bibmap['title'][0])) 606 661 except: pass 607 662 try: 608 docinfo['year']=getText FromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])663 docinfo['year']=getText(bib.find(bibmap['year'][0])) 609 664 except: pass 610 logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) 611 try: 612 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) 613 except: 614 docinfo['lang']='' 615 665 666 # ROC: why is this here? 667 # logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) 668 # try: 669 # docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0]) 670 # except: 671 # docinfo['lang']='' 672 # try: 673 # docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0]) 674 # except: 675 # docinfo['city']='' 676 # try: 677 # docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0]) 678 # except: 679 # docinfo['number_of_pages']='' 680 # try: 681 # docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0]) 682 # except: 683 # docinfo['series_volume']='' 684 # try: 685 # docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0]) 686 # except: 687 # docinfo['number_of_volumes']='' 688 # try: 689 # docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0]) 690 # except: 691 # docinfo['translator']='' 692 # try: 693 # docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0]) 694 # except: 695 # docinfo['edition']='' 696 # try: 697 # docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0]) 698 # except: 699 # docinfo['series_author']='' 700 # try: 701 # docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0]) 702 # except: 703 # docinfo['publisher']='' 704 # try: 705 # docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0]) 706 # except: 707 # docinfo['series_title']='' 708 # try: 709 # docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0]) 710 # except: 711 # docinfo['isbn_issn']='' 616 712 return docinfo 617 713 … … 627 723 dom = self.getDomFromIndexMeta(path) 628 724 629 docinfo['name']=getText FromNode(dom.xpath("/resource/name")[0])725 docinfo['name']=getText(dom.find("name")) 630 726 logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) 631 727 return docinfo … … 644 740 archiveName = None 645 741 646 archiveNames = dom.xpath("//resource/name") 647 if archiveNames and (len(archiveNames) > 0): 648 archiveName = getTextFromNode(archiveNames[0]) 649 else: 742 archiveName = getTextFromNode(dom.find("name")) 743 if not archiveName: 650 744 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) 651 745 652 archivePaths = dom.xpath("//resource/archive-path") 653 if archivePaths and (len(archivePaths) > 0): 654 archivePath = getTextFromNode(archivePaths[0]) 746 archivePath = getTextFromNode(dom.find("archive-path")) 747 if archivePath: 655 748 # clean up archive path 656 749 if archivePath[0] != '/': … … 668 761 raise IOError("Missing archive-path (for text-tool) in %s" % (url)) 669 762 670 imageDirs = dom.xpath("//texttool/image") 671 if imageDirs and (len(imageDirs) > 0): 672 imageDir = getTextFromNode(imageDirs[0]) 673 674 else: 763 imageDir = getText(dom.find(".//texttool/image")) 764 765 if not imageDir: 675 766 # we balk with no image tag / not necessary anymore because textmode is now standard 676 767 #raise IOError("No text-tool info in %s"%(url)) … … 689 780 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir 690 781 691 viewerUrls = dom.xpath("//texttool/digiliburlprefix") 692 if viewerUrls and (len(viewerUrls) > 0): 693 viewerUrl = getTextFromNode(viewerUrls[0]) 782 viewerUrl = getText(dom.find(".//texttool/digiliburlprefix")) 783 if viewerUrl: 694 784 docinfo['viewerURL'] = viewerUrl 695 785 696 786 # old style text URL 697 textUrls = dom.xpath("//texttool/text") 698 if textUrls and (len(textUrls) > 0): 699 textUrl = getTextFromNode(textUrls[0]) 787 textUrl = getText(dom.find(".//texttool/text")) 788 if textUrl: 700 789 if urlparse.urlparse(textUrl)[0] == "": #keine url 701 790 textUrl = os.path.join(archivePath, textUrl) … … 707 796 708 797 # new style text-url-path 709 textUrls = dom.xpath("//texttool/text-url-path") 710 if textUrls and (len(textUrls) > 0): 711 textUrl = getTextFromNode(textUrls[0]) 798 textUrl = getText(dom.find(".//texttool/text-url-path")) 799 if textUrl: 712 800 docinfo['textURLPath'] = textUrl 713 if not docinfo['imagePath']: 801 textUrlkurz = string.split(textUrl, ".")[0] 802 docinfo['textURLPathkurz'] = textUrlkurz 803 #if not docinfo['imagePath']: 714 804 # text-only, no page images 715 docinfo = self.getNumTextPages(docinfo) 805 #docinfo = self.getNumTextPages(docinfo) 806 716 807 717 presentationUrl s = dom.xpath("//texttool/presentation")808 presentationUrl = getText(dom.find(".//texttool/presentation")) 718 809 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag 719 810 docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) 720 811 721 812 722 if presentationUrl s and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen813 if presentationUrl: # ueberschreibe diese durch presentation informationen 723 814 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten 724 815 # durch den relativen Pfad auf die presentation infos 725 presentationPath = getTextFromNode(presentationUrls[0])816 presentationPath = presentationUrl 726 817 if url.endswith("index.meta"): 727 818 presentationUrl = url.replace('index.meta', presentationPath) … … 741 832 dom=self.getPresentationInfoXML(url) 742 833 try: 743 docinfo['author']=getText FromNode(dom.xpath("//author")[0])834 docinfo['author']=getText(dom.find(".//author")) 744 835 except: 745 836 pass 746 837 try: 747 docinfo['title']=getText FromNode(dom.xpath("//title")[0])838 docinfo['title']=getText(dom.find(".//title")) 748 839 except: 749 840 pass 750 841 try: 751 docinfo['year']=getText FromNode(dom.xpath("//date")[0])842 docinfo['year']=getText(dom.find(".//date")) 752 843 except: 753 844 pass … … 797 888 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) 798 889 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 799 890 891 # FIXME: fake texturlpath 892 if not docinfo.has_key('textURLPath'): 893 docinfo['textURLPath'] = None 894 800 895 logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) 801 896 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%) … … 827 922 pageinfo['viewMode'] = viewMode 828 923 pageinfo['tocMode'] = tocMode 829 pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg PlusNorm')830 pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','')924 pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg') 925 #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1') 831 926 pageinfo['query'] = self.REQUEST.get('query','') 832 927 pageinfo['queryType'] = self.REQUEST.get('queryType','') … … 836 931 pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') 837 932 pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10') 838 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') 933 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') 839 934 toc = int (pageinfo['tocPN']) 840 935 pageinfo['textPages'] =int (toc) 841 842 843 936 844 937 if 'tocSize_%s'%tocMode in docinfo:
Note: See TracChangeset
for help on using the changeset viewer.