documentViewer/documentViewer.py - view

File: [Repository] / documentViewer / documentViewer.py
Revision 1.44: download - view: text, annotated - select for diffs - revision graph
Thu Apr 8 11:04:51 2010 UTC (14 years, 2 months ago) by casties
Branches: MAIN
CVS tags: HEAD

new version with new full-text infrastructure and some more changed templates

1: 2: from OFS.Folder import Folder 3: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 4: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 5: from AccessControl import ClassSecurityInfo 6: from AccessControl import getSecurityManager 7: from Globals import package_home 8: 9: from Ft.Xml.Domlette import NonvalidatingReader 10: from Ft.Xml.Domlette import PrettyPrint, Print 11: from Ft.Xml import EMPTY_NAMESPACE, Parse 12: 13: 14: import Ft.Xml.XPath 15: import cStringIO 16: import xmlrpclib 17: import os.path 18: import sys 19: import cgi 20: import urllib 21: import logging 22: import math 23: 24: import urlparse 25: from types import * 26: 27: def logger(txt,method,txt2): 28: """logging""" 29: logging.info(txt+ txt2) 30: 31: 32: def getInt(number, default=0): 33: """returns always an int (0 in case of problems)""" 34: try: 35: return int(number) 36: except: 37: return int(default) 38: 39: def getTextFromNode(nodename): 40: """get the cdata content of a node""" 41: if nodename is None: 42: return "" 43: nodelist=nodename.childNodes 44: rc = "" 45: for node in nodelist: 46: if node.nodeType == node.TEXT_NODE: 47: rc = rc + node.data 48: return rc 49: 50: def serializeNode(node, encoding='utf-8'): 51: """returns a string containing node as XML""" 52: buf = cStringIO.StringIO() 53: Print(node, stream=buf, encoding=encoding) 54: s = buf.getvalue() 55: buf.close() 56: return s 57: 58: 59: def getParentDir(path): 60: """returns pathname shortened by one""" 61: return '/'.join(path.split('/')[0:-1]) 62: 63: 64: import socket 65: 66: def urlopen(url,timeout=2): 67: """urlopen mit timeout""" 68: socket.setdefaulttimeout(timeout) 69: ret=urllib.urlopen(url) 70: socket.setdefaulttimeout(5) 71: return ret 72: 73: 74: ## 75: ## documentViewer class 76: ## 77: class documentViewer(Folder): 78: """document viewer""" 79: #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?" 80: 81: meta_type="Document viewer" 82: 83: security=ClassSecurityInfo() 84: manage_options=Folder.manage_options+( 85: {'label':'main config','action':'changeDocumentViewerForm'}, 86: ) 87: 88: # templates and forms 89: viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 90: toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) 91: toc_text = PageTemplateFile('zpt/toc_text', globals()) 92: toc_figures = PageTemplateFile('zpt/toc_figures', globals()) 93: page_main_images = PageTemplateFile('zpt/page_main_images', globals()) 94: page_main_text = PageTemplateFile('zpt/page_main_text', globals()) 95: page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) 96: head_main = PageTemplateFile('zpt/head_main', globals()) 97: docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) 98: info_xml = PageTemplateFile('zpt/info_xml', globals()) 99: 100: thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) 101: security.declareProtected('View management screens','changeDocumentViewerForm') 102: changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 103: 104: 105: def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): 106: """init document viewer""" 107: self.id=id 108: self.title=title 109: self.thumbcols = thumbcols 110: self.thumbrows = thumbrows 111: # authgroups is list of authorized groups (delimited by ,) 112: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 113: # create template folder so we can always use template.something 114: 115: templateFolder = Folder('template') 116: #self['template'] = templateFolder # Zope-2.12 style 117: self._setObject('template',templateFolder) # old style 118: try: 119: from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy 120: xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False) 121: #templateFolder['fulltextclient'] = xmlRpcClient 122: templateFolder._setObject('fulltextclient',xmlRpcClient) 123: except Exception, e: 124: logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e)) 125: try: 126: from Products.zogiLib.zogiLib import zogiLib 127: zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") 128: #templateFolder['zogilib'] = zogilib 129: templateFolder._setObject('zogilib',zogilib) 130: except Exception, e: 131: logging.error("Unable to create zogiLib for zogilib: "+str(e)) 132: 133: 134: security.declareProtected('View','thumbs_rss') 135: def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): 136: ''' 137: view it 138: @param mode: defines how to access the document behind url 139: @param url: url which contains display information 140: @param viewMode: if images display images, if text display text, default is images (text,images or auto) 141: 142: ''' 143: logging.debug("HHHHHHHHHHHHHH:load the rss") 144: logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 145: 146: if not hasattr(self, 'template'): 147: # create template folder if it doesn't exist 148: self.manage_addFolder('template') 149: 150: if not self.digilibBaseUrl: 151: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 152: 153: docinfo = self.getDocinfo(mode=mode,url=url) 154: pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) 155: pt = getattr(self.template, 'thumbs_main_rss') 156: 157: if viewMode=="auto": # automodus gewaehlt 158: if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert 159: viewMode="text" 160: else: 161: viewMode="images" 162: 163: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 164: 165: security.declareProtected('View','index_html') 166: def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): 167: ''' 168: view it 169: @param mode: defines how to access the document behind url 170: @param url: url which contains display information 171: @param viewMode: if images display images, if text display text, default is auto (text,images or auto) 172: @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures) 173: ''' 174: 175: logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 176: 177: if not hasattr(self, 'template'): 178: # this won't work 179: logging.error("template folder missing!") 180: return "ERROR: template folder missing!" 181: 182: if not getattr(self, 'digilibBaseUrl', None): 183: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 184: 185: docinfo = self.getDocinfo(mode=mode,url=url) 186: pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) 187: if tocMode != "thumbs": 188: # get table of contents 189: docinfo = self.getToc(mode=tocMode, docinfo=docinfo) 190: 191: if viewMode=="auto": # automodus gewaehlt 192: if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert 193: viewMode="text" 194: else: 195: viewMode="images" 196: 197: pt = getattr(self.template, 'viewer_main') 198: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) 199: 200: def generateMarks(self,mk): 201: ret="" 202: if mk is None: 203: return "" 204: if type(mk) is not ListType: 205: mk=[mk] 206: for m in mk: 207: ret+="mk=%s"%m 208: return ret 209: 210: 211: def findDigilibUrl(self): 212: """try to get the digilib URL from zogilib""" 213: url = self.template.zogilib.getDLBaseUrl() 214: return url 215: 216: def getStyle(self, idx, selected, style=""): 217: """returns a string with the given style and append 'sel' if path == selected.""" 218: #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) 219: if idx == selected: 220: return style + 'sel' 221: else: 222: return style 223: 224: def getLink(self,param=None,val=None): 225: """link to documentviewer with parameter param set to val""" 226: params=self.REQUEST.form.copy() 227: if param is not None: 228: if val is None: 229: if params.has_key(param): 230: del params[param] 231: else: 232: params[param] = str(val) 233: 234: if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 235: params["mode"] = "imagepath" 236: params["url"] = getParentDir(params["url"]) 237: 238: # quote values and assemble into query string 239: ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 240: url=self.REQUEST['URL1']+"?"+ps 241: return url 242: 243: def getLinkAmp(self,param=None,val=None): 244: """link to documentviewer with parameter param set to val""" 245: params=self.REQUEST.form.copy() 246: if param is not None: 247: if val is None: 248: if params.has_key(param): 249: del params[param] 250: else: 251: params[param] = str(val) 252: 253: # quote values and assemble into query string 254: logging.info("XYXXXXX: %s"%repr(params.items())) 255: ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 256: url=self.REQUEST['URL1']+"?"+ps 257: return url 258: 259: def getInfo_xml(self,url,mode): 260: """returns info about the document as XML""" 261: 262: if not self.digilibBaseUrl: 263: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 264: 265: docinfo = self.getDocinfo(mode=mode,url=url) 266: pt = getattr(self.template, 'info_xml') 267: return pt(docinfo=docinfo) 268: 269: 270: def isAccessible(self, docinfo): 271: """returns if access to the resource is granted""" 272: access = docinfo.get('accessType', None) 273: logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access) 274: if access is not None and access == 'free': 275: logger("documentViewer (accessOK)", logging.INFO, "access is free") 276: return True 277: elif access is None or access in self.authgroups: 278: # only local access -- only logged in users 279: user = getSecurityManager().getUser() 280: if user is not None: 281: #print "user: ", user 282: return (user.getUserName() != "Anonymous User") 283: else: 284: return False 285: 286: logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access) 287: return False 288: 289: 290: def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): 291: """gibt param von dlInfo aus""" 292: num_retries = 3 293: if docinfo is None: 294: docinfo = {} 295: 296: for x in range(cut): 297: 298: path=getParentDir(path) 299: 300: infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 301: 302: logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl)) 303: 304: for cnt in range(num_retries): 305: try: 306: # dom = NonvalidatingReader.parseUri(imageUrl) 307: txt=urllib.urlopen(infoUrl).read() 308: dom = Parse(txt) 309: break 310: except: 311: logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt)) 312: else: 313: raise IOError("Unable to get dir-info from %s"%(infoUrl)) 314: 315: sizes=dom.xpath("//dir/size") 316: logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes) 317: 318: if sizes: 319: docinfo['numPages'] = int(getTextFromNode(sizes[0])) 320: else: 321: docinfo['numPages'] = 0 322: 323: # TODO: produce and keep list of image names and numbers 324: 325: return docinfo 326: 327: 328: def getIndexMeta(self, url): 329: """returns dom of index.meta document at url""" 330: num_retries = 3 331: dom = None 332: metaUrl = None 333: if url.startswith("http://"): 334: # real URL 335: metaUrl = url 336: else: 337: # online path 338: server=self.digilibBaseUrl+"/servlet/Texter?fn=" 339: metaUrl=server+url.replace("/mpiwg/online","") 340: if not metaUrl.endswith("index.meta"): 341: metaUrl += "/index.meta" 342: logging.debug("METAURL: %s"%metaUrl) 343: for cnt in range(num_retries): 344: try: 345: # patch dirk encoding fehler treten dann nicht mehr auf 346: # dom = NonvalidatingReader.parseUri(metaUrl) 347: txt=urllib.urlopen(metaUrl).read() 348: dom = Parse(txt) 349: break 350: except: 351: logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) 352: 353: if dom is None: 354: raise IOError("Unable to read index meta from %s"%(url)) 355: 356: return dom 357: 358: def getPresentationInfoXML(self, url): 359: """returns dom of info.xml document at url""" 360: num_retries = 3 361: dom = None 362: metaUrl = None 363: if url.startswith("http://"): 364: # real URL 365: metaUrl = url 366: else: 367: # online path 368: server=self.digilibBaseUrl+"/servlet/Texter?fn=" 369: metaUrl=server+url.replace("/mpiwg/online","") 370: 371: for cnt in range(num_retries): 372: try: 373: # patch dirk encoding fehler treten dann nicht mehr auf 374: # dom = NonvalidatingReader.parseUri(metaUrl) 375: txt=urllib.urlopen(metaUrl).read() 376: dom = Parse(txt) 377: break 378: except: 379: logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) 380: 381: if dom is None: 382: raise IOError("Unable to read infoXMLfrom %s"%(url)) 383: 384: return dom 385: 386: 387: def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 388: """gets authorization info from the index.meta file at path or given by dom""" 389: logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path)) 390: 391: access = None 392: 393: if docinfo is None: 394: docinfo = {} 395: 396: if dom is None: 397: for x in range(cut): 398: path=getParentDir(path) 399: dom = self.getIndexMeta(path) 400: 401: acctype = dom.xpath("//access-conditions/access/@type") 402: if acctype and (len(acctype)>0): 403: access=acctype[0].value 404: if access in ['group', 'institution']: 405: access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() 406: 407: docinfo['accessType'] = access 408: return docinfo 409: 410: 411: def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 412: """gets bibliographical info from the index.meta file at path or given by dom""" 413: logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) 414: 415: if docinfo is None: 416: docinfo = {} 417: 418: if dom is None: 419: for x in range(cut): 420: path=getParentDir(path) 421: dom = self.getIndexMeta(path) 422: 423: logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) 424: # put in all raw bib fields as dict "bib" 425: bib = dom.xpath("//bib/*") 426: if bib and len(bib)>0: 427: bibinfo = {} 428: for e in bib: 429: bibinfo[e.localName] = getTextFromNode(e) 430: docinfo['bib'] = bibinfo 431: 432: # extract some fields (author, title, year) according to their mapping 433: metaData=self.metadata.main.meta.bib 434: bibtype=dom.xpath("//bib/@type") 435: if bibtype and (len(bibtype)>0): 436: bibtype=bibtype[0].value 437: else: 438: bibtype="generic" 439: 440: bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) 441: docinfo['bib_type'] = bibtype 442: bibmap=metaData.generateMappingForType(bibtype) 443: # if there is no mapping bibmap is empty (mapping sometimes has empty fields) 444: if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: 445: try: 446: docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) 447: except: pass 448: try: 449: docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) 450: except: pass 451: try: 452: docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) 453: except: pass 454: logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) 455: try: 456: docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) 457: except: 458: docinfo['lang']='' 459: 460: return docinfo 461: 462: 463: def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): 464: """parse texttool tag in index meta""" 465: logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url)) 466: if docinfo is None: 467: docinfo = {} 468: if docinfo.get('lang', None) is None: 469: docinfo['lang'] = '' # default keine Sprache gesetzt 470: if dom is None: 471: dom = self.getIndexMeta(url) 472: 473: archivePath = None 474: archiveName = None 475: 476: archiveNames = dom.xpath("//resource/name") 477: if archiveNames and (len(archiveNames) > 0): 478: archiveName = getTextFromNode(archiveNames[0]) 479: else: 480: logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url)) 481: 482: archivePaths = dom.xpath("//resource/archive-path") 483: if archivePaths and (len(archivePaths) > 0): 484: archivePath = getTextFromNode(archivePaths[0]) 485: # clean up archive path 486: if archivePath[0] != '/': 487: archivePath = '/' + archivePath 488: if archiveName and (not archivePath.endswith(archiveName)): 489: archivePath += "/" + archiveName 490: else: 491: # try to get archive-path from url 492: logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url)) 493: if (not url.startswith('http')): 494: archivePath = url.replace('index.meta', '') 495: 496: if archivePath is None: 497: # we balk without archive-path 498: raise IOError("Missing archive-path (for text-tool) in %s" % (url)) 499: 500: imageDirs = dom.xpath("//texttool/image") 501: if imageDirs and (len(imageDirs) > 0): 502: imageDir = getTextFromNode(imageDirs[0]) 503: 504: else: 505: # we balk with no image tag / not necessary anymore because textmode is now standard 506: #raise IOError("No text-tool info in %s"%(url)) 507: imageDir = "" 508: #xquery="//pb" 509: docinfo['imagePath'] = "" # keine Bilder 510: docinfo['imageURL'] = "" 511: 512: if imageDir and archivePath: 513: #print "image: ", imageDir, " archivepath: ", archivePath 514: imageDir = os.path.join(archivePath, imageDir) 515: imageDir = imageDir.replace("/mpiwg/online", '') 516: docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) 517: docinfo['imagePath'] = imageDir 518: 519: docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir 520: 521: viewerUrls = dom.xpath("//texttool/digiliburlprefix") 522: if viewerUrls and (len(viewerUrls) > 0): 523: viewerUrl = getTextFromNode(viewerUrls[0]) 524: docinfo['viewerURL'] = viewerUrl 525: 526: textUrls = dom.xpath("//texttool/text") 527: if textUrls and (len(textUrls) > 0): 528: textUrl = getTextFromNode(textUrls[0]) 529: if urlparse.urlparse(textUrl)[0] == "": #keine url 530: textUrl = os.path.join(archivePath, textUrl) 531: # fix URLs starting with /mpiwg/online 532: if textUrl.startswith("/mpiwg/online"): 533: textUrl = textUrl.replace("/mpiwg/online", '', 1) 534: 535: docinfo['textURL'] = textUrl 536: 537: textUrls = dom.xpath("//texttool/text-url-path") 538: if textUrls and (len(textUrls) > 0): 539: textUrl = getTextFromNode(textUrls[0]) 540: docinfo['textURLPath'] = textUrl 541: 542: presentationUrls = dom.xpath("//texttool/presentation") 543: docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag 544: 545: if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 546: # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten 547: # durch den relativen Pfad auf die presentation infos 548: presentationPath = getTextFromNode(presentationUrls[0]) 549: if url.endswith("index.meta"): 550: presentationUrl = url.replace('index.meta', presentationPath) 551: else: 552: presentationUrl = url + "/" + presentationPath 553: docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht 554: docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) 555: 556: docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info 557: 558: return docinfo 559: 560: 561: def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): 562: """gets the bibliographical information from the preseantion entry in texttools 563: """ 564: dom=self.getPresentationInfoXML(url) 565: try: 566: docinfo['author']=getTextFromNode(dom.xpath("//author")[0]) 567: except: 568: pass 569: try: 570: docinfo['title']=getTextFromNode(dom.xpath("//title")[0]) 571: except: 572: pass 573: try: 574: docinfo['year']=getTextFromNode(dom.xpath("//date")[0]) 575: except: 576: pass 577: return docinfo 578: 579: def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): 580: """path ist the path to the images it assumes that the index.meta file is one level higher.""" 581: logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path)) 582: if docinfo is None: 583: docinfo = {} 584: path=path.replace("/mpiwg/online","") 585: docinfo['imagePath'] = path 586: docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) 587: 588: pathorig=path 589: for x in range(cut): 590: path=getParentDir(path) 591: logging.error("PATH:"+path) 592: imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path 593: docinfo['imageURL'] = imageUrl 594: 595: #path ist the path to the images it assumes that the index.meta file is one level higher. 596: docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 597: docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 598: return docinfo 599: 600: 601: def getDocinfo(self, mode, url): 602: """returns docinfo depending on mode""" 603: logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url)) 604: # look for cached docinfo in session 605: if self.REQUEST.SESSION.has_key('docinfo'): 606: docinfo = self.REQUEST.SESSION['docinfo'] 607: # check if its still current 608: if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: 609: logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo) 610: return docinfo 611: # new docinfo 612: docinfo = {'mode': mode, 'url': url} 613: if mode=="texttool": #index.meta with texttool information 614: docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) 615: elif mode=="imagepath": 616: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) 617: elif mode=="filepath": 618: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) 619: else: 620: logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!") 621: raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 622: 623: logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo) 624: self.REQUEST.SESSION['docinfo'] = docinfo 625: return docinfo 626: 627: 628: def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): 629: """returns pageinfo with the given parameters""" 630: pageinfo = {} 631: current = getInt(current) 632: pageinfo['current'] = current 633: rows = int(rows or self.thumbrows) 634: pageinfo['rows'] = rows 635: cols = int(cols or self.thumbcols) 636: pageinfo['cols'] = cols 637: grpsize = cols * rows 638: pageinfo['groupsize'] = grpsize 639: start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) 640: # int(current / grpsize) * grpsize +1)) 641: pageinfo['start'] = start 642: pageinfo['end'] = start + grpsize 643: if (docinfo is not None) and ('numPages' in docinfo): 644: np = int(docinfo['numPages']) 645: pageinfo['end'] = min(pageinfo['end'], np) 646: pageinfo['numgroups'] = int(np / grpsize) 647: if np % grpsize > 0: 648: pageinfo['numgroups'] += 1 649: 650: pageinfo['viewMode'] = viewMode 651: pageinfo['tocMode'] = tocMode 652: pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '10') 653: pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') 654: 655: return pageinfo 656: 657: 658: 659: def getNumPages(self,docinfo=None): 660: """get list of pages from fulltext and put in docinfo""" 661: xquery = '//pb' 662: text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) 663: # TODO: better processing of the page list. do we need the info somewhere else also? 664: docinfo['numPages'] = text.count("<pb ") 665: return docinfo 666: 667: def getTextPage(self, mode="text", pn=1, docinfo=None): 668: """returns single page from fulltext""" 669: docpath = docinfo['textURLPath'] 670: if mode == "text_dict": 671: textmode = "textPollux" 672: else: 673: textmode = mode 674: 675: pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn), outputUnicode=False) 676: # post-processing downloaded xml 677: pagedom = Parse(pagexml) 678: # plain text mode 679: if mode == "text": 680: # first div contains text 681: pagedivs = pagedom.xpath("/div") 682: if len(pagedivs) > 0: 683: pagenode = pagedivs[0] 684: return serializeNode(pagenode) 685: 686: # text-with-links mode 687: if mode == "text_dict": 688: # first div contains text 689: pagedivs = pagedom.xpath("/div") 690: if len(pagedivs) > 0: 691: pagenode = pagedivs[0] 692: # check all a-tags 693: links = pagenode.xpath("//a") 694: for l in links: 695: hrefNode = l.getAttributeNodeNS(None, u"href") 696: if hrefNode: 697: # is link with href 698: href = hrefNode.nodeValue 699: if href.startswith('lt/lex.xql'): 700: # is pollux link 701: selfurl = self.absolute_url() 702: # change href 703: hrefNode.nodeValue = href.replace('lt/lex.xql','%s/head_main_voc'%selfurl) 704: # add target 705: l.setAttributeNS(None, 'target', '_blank') 706: return serializeNode(pagenode) 707: 708: return "no text here" 709: 710: def getToc(self, mode="text", docinfo=None): 711: """loads table of contents and stores in docinfo""" 712: logging.debug("documentViewer (gettoc) mode: %s"%(mode)) 713: if 'tocSize_%s'%mode in docinfo: 714: # cached toc 715: return docinfo 716: 717: docpath = docinfo['textURLPath'] 718: # we need to set a result set size 719: pagesize = 1000 720: pn = 1 721: if mode == "text": 722: queryType = "toc" 723: else: 724: queryType = mode 725: # number of entries in toc 726: tocSize = 0 727: tocDiv = None 728: pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) 729: # post-processing downloaded xml 730: pagedom = Parse(pagexml) 731: # get number of entries 732: numdivs = pagedom.xpath("//div[@class='queryResultHits']") 733: if len(numdivs) > 0: 734: tocSize = int(getTextFromNode(numdivs[0])) 735: # div contains text 736: #pagedivs = pagedom.xpath("//div[@class='queryResultPage']") 737: #if len(pagedivs) > 0: 738: # tocDiv = pagedivs[0] 739: 740: docinfo['tocSize_%s'%mode] = tocSize 741: #docinfo['tocDiv_%s'%mode] = tocDiv 742: return docinfo 743: 744: def getTocPage(self, mode="toc", pn=1, pageinfo=None, docinfo=None): 745: """returns single page from the table of contents""" 746: # TODO: this should use the cached TOC 747: if mode == "text": 748: queryType = "toc" 749: else: 750: queryType = mode 751: docpath = docinfo['textURLPath'] 752: pagesize = pageinfo['tocPageSize'] 753: pn = pageinfo['tocPN'] 754: pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) 755: # post-processing downloaded xml 756: pagedom = Parse(pagexml) 757: # div contains text 758: pagedivs = pagedom.xpath("//div[@class='queryResultPage']") 759: if len(pagedivs) > 0: 760: pagenode = pagedivs[0] 761: return serializeNode(pagenode) 762: else: 763: return "No TOC!" 764: 765: 766: def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): 767: """init document viewer""" 768: self.title=title 769: self.digilibBaseUrl = digilibBaseUrl 770: self.thumbrows = thumbrows 771: self.thumbcols = thumbcols 772: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 773: if RESPONSE is not None: 774: RESPONSE.redirect('manage_main') 775: 776: 777: 778: def manage_AddDocumentViewerForm(self): 779: """add the viewer form""" 780: pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) 781: return pt() 782: 783: def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None): 784: """add the viewer""" 785: newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName) 786: self._setObject(id,newObj) 787: 788: if RESPONSE is not None: 789: RESPONSE.redirect('manage_main') 790: 791: 792: ## 793: ## DocumentViewerTemplate class 794: ## 795: class DocumentViewerTemplate(ZopePageTemplate): 796: """Template for document viewer""" 797: meta_type="DocumentViewer Template" 798: 799: 800: def manage_addDocumentViewerTemplateForm(self): 801: """Form for adding""" 802: pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) 803: return pt() 804: 805: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, 806: REQUEST=None, submit=None): 807: "Add a Page Template with optional file content." 808: 809: self._setObject(id, DocumentViewerTemplate(id)) 810: ob = getattr(self, id) 811: txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read() 812: logging.info("txt %s:"%txt) 813: ob.pt_edit(txt,"text/html") 814: if title: 815: ob.pt_setTitle(title) 816: try: 817: u = self.DestinationURL() 818: except AttributeError: 819: u = REQUEST['URL1'] 820: 821: u = "%s/%s" % (u, urllib.quote(id)) 822: REQUEST.RESPONSE.redirect(u+'/manage_main') 823: return '' 824: 825: 826: