documentViewer/documentViewer.py - view

File: [Repository] / documentViewer / documentViewer.py
Revision 1.21: download - view: text, annotated - select for diffs - revision graph
Thu Jan 11 20:27:17 2007 UTC (18 years, 6 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD

textviewer now integrated, new modus auto introduced as standard for viewing

1: 2: 3: from OFS.Folder import Folder 4: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 5: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 6: from AccessControl import ClassSecurityInfo 7: from AccessControl import getSecurityManager 8: from Globals import package_home 9: 10: from Ft.Xml.Domlette import NonvalidatingReader 11: from Ft.Xml.Domlette import PrettyPrint, Print 12: from Ft.Xml import EMPTY_NAMESPACE, Parse 13: 14: import Ft.Xml.XPath 15: 16: import os.path 17: import sys 18: import cgi 19: import urllib 20: import logging 21: import zLOG 22: import urlparse 23: 24: def getInt(number, default=0): 25: """returns always an int (0 in case of problems)""" 26: try: 27: return int(number) 28: except: 29: return default 30: 31: def getTextFromNode(nodename): 32: """get the cdata content of a node""" 33: if nodename is None: 34: return "" 35: nodelist=nodename.childNodes 36: rc = "" 37: for node in nodelist: 38: if node.nodeType == node.TEXT_NODE: 39: rc = rc + node.data 40: return rc 41: 42: 43: def getParentDir(path): 44: """returns pathname shortened by one""" 45: return '/'.join(path.split('/')[0:-1]) 46: 47: 48: import socket 49: 50: def urlopen(url,timeout=2): 51: """urlopen mit timeout""" 52: socket.setdefaulttimeout(timeout) 53: ret=urllib.urlopen(url) 54: socket.setdefaulttimeout(5) 55: return ret 56: 57: 58: ## 59: ## documentViewer class 60: ## 61: class documentViewer(Folder): 62: """document viewer""" 63: #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?" 64: 65: meta_type="Document viewer" 66: 67: security=ClassSecurityInfo() 68: manage_options=Folder.manage_options+( 69: {'label':'main config','action':'changeDocumentViewerForm'}, 70: ) 71: 72: # templates and forms 73: viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 74: thumbs_main = PageTemplateFile('zpt/thumbs_main', globals()) 75: image_main = PageTemplateFile('zpt/image_main', globals()) 76: head_main = PageTemplateFile('zpt/head_main', globals()) 77: docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) 78: 79: security.declareProtected('View management screens','changeDocumentViewerForm') 80: changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 81: 82: 83: def __init__(self,id,imageViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): 84: """init document viewer""" 85: self.id=id 86: self.title=title 87: self.imageViewerUrl=imageViewerUrl 88: self.textViewerUrl=textViewerUrl 89: 90: if not digilibBaseUrl: 91: self.digilibBaseUrl = self.findDigilibUrl() 92: else: 93: self.digilibBaseUrl = digilibBaseUrl 94: self.thumbcols = thumbcols 95: self.thumbrows = thumbrows 96: # authgroups is list of authorized groups (delimited by ,) 97: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 98: # add template folder so we can always use template.something 99: self.manage_addFolder('template') 100: 101: 102: security.declareProtected('View','index_html') 103: def index_html(self,mode,url,viewMode="auto",start=None,pn=1): 104: ''' 105: view it 106: @param mode: defines which type of document is behind url (text,images or auto) 107: @param url: url which contains display information 108: @param viewMode: if images display images, if text display text, default is images 109: 110: ''' 111: 112: zLOG.LOG("documentViewer (index)", zLOG.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 113: 114: if not hasattr(self, 'template'): 115: # create template folder if it doesn't exist 116: self.manage_addFolder('template') 117: 118: if not self.digilibBaseUrl: 119: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 120: 121: docinfo = self.getDocinfo(mode=mode,url=url) 122: pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) 123: pt = getattr(self.template, 'viewer_main') 124: 125: if viewMode=="auto": # automodus gewaehlt 126: if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert 127: viewMode="text" 128: else: 129: viewMode="images" 130: 131: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 132: 133: 134: def getLink(self,param=None,val=None): 135: """link to documentviewer with parameter param set to val""" 136: params=self.REQUEST.form.copy() 137: if param is not None: 138: if val is None: 139: if params.has_key(param): 140: del params[param] 141: else: 142: params[param] = str(val) 143: 144: # quote values and assemble into query string 145: ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 146: url=self.REQUEST['URL1']+"?"+ps 147: return url 148: 149: 150: def getStyle(self, idx, selected, style=""): 151: """returns a string with the given style and append 'sel' if path == selected.""" 152: #zLOG.LOG("documentViewer (getstyle)", zLOG.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) 153: if idx == selected: 154: return style + 'sel' 155: else: 156: return style 157: 158: 159: def isAccessible(self, docinfo): 160: """returns if access to the resource is granted""" 161: access = docinfo.get('accessType', None) 162: zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "access type %s"%access) 163: if access is not None and access == 'free': 164: zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "access is free") 165: return True 166: elif access is None or access in self.authgroups: 167: # only local access -- only logged in users 168: user = getSecurityManager().getUser() 169: if user is not None: 170: #print "user: ", user 171: return (user.getUserName() != "Anonymous User") 172: else: 173: return False 174: 175: zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "unknown access type %s"%access) 176: return False 177: 178: 179: def getDirinfoFromDigilib(self,path,docinfo=None): 180: """gibt param von dlInfo aus""" 181: num_retries = 3 182: if docinfo is None: 183: docinfo = {} 184: 185: infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 186: 187: zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo from %s"%(infoUrl)) 188: 189: for cnt in range(num_retries): 190: try: 191: # dom = NonvalidatingReader.parseUri(imageUrl) 192: txt=urllib.urlopen(infoUrl).read() 193: dom = Parse(txt) 194: break 195: except: 196: zLOG.LOG("documentViewer (getdirinfofromdigilib)", zLOG.ERROR, "error reading %s (try %d)"%(infoUrl,cnt)) 197: else: 198: raise IOError("Unable to get dir-info from %s"%(infoUrl)) 199: 200: sizes=dom.xpath("//dir/size") 201: zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:size"%sizes) 202: 203: if sizes: 204: docinfo['numPages'] = int(getTextFromNode(sizes[0])) 205: else: 206: docinfo['numPages'] = 0 207: 208: return docinfo 209: 210: 211: def getIndexMeta(self, url): 212: """returns dom of index.meta document at url""" 213: num_retries = 3 214: dom = None 215: metaUrl = None 216: if url.startswith("http://"): 217: # real URL 218: metaUrl = url 219: else: 220: # online path 221: server=self.digilibBaseUrl+"/servlet/Texter?fn=" 222: metaUrl=server+url.replace("/mpiwg/online","") 223: if not metaUrl.endswith("index.meta"): 224: metaUrl += "/index.meta" 225: print metaUrl 226: for cnt in range(num_retries): 227: try: 228: # patch dirk encoding fehler treten dann nicht mehr auf 229: # dom = NonvalidatingReader.parseUri(metaUrl) 230: txt=urllib.urlopen(metaUrl).read() 231: dom = Parse(txt) 232: break 233: except: 234: zLOG.LOG("ERROR documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) 235: 236: if dom is None: 237: raise IOError("Unable to read index meta from %s"%(url)) 238: 239: return dom 240: 241: def getPresentationInfoXML(self, url): 242: """returns dom of info.xml document at url""" 243: num_retries = 3 244: dom = None 245: metaUrl = None 246: if url.startswith("http://"): 247: # real URL 248: metaUrl = url 249: else: 250: # online path 251: server=self.digilibBaseUrl+"/servlet/Texter?fn=" 252: metaUrl=server+url.replace("/mpiwg/online","") 253: 254: 255: for cnt in range(num_retries): 256: try: 257: # patch dirk encoding fehler treten dann nicht mehr auf 258: # dom = NonvalidatingReader.parseUri(metaUrl) 259: txt=urllib.urlopen(metaUrl).read() 260: dom = Parse(txt) 261: break 262: except: 263: zLOG.LOG("ERROR documentViewer (getPresentationInfoXML)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) 264: 265: if dom is None: 266: raise IOError("Unable to read infoXMLfrom %s"%(url)) 267: 268: return dom 269: 270: 271: def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None): 272: """gets authorization info from the index.meta file at path or given by dom""" 273: zLOG.LOG("documentViewer (getauthinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) 274: 275: access = None 276: 277: if docinfo is None: 278: docinfo = {} 279: 280: if dom is None: 281: dom = self.getIndexMeta(getParentDir(path)) 282: 283: acctype = dom.xpath("//access-conditions/access/@type") 284: if acctype and (len(acctype)>0): 285: access=acctype[0].value 286: if access in ['group', 'institution']: 287: access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() 288: 289: docinfo['accessType'] = access 290: return docinfo 291: 292: 293: def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None): 294: """gets bibliographical info from the index.meta file at path or given by dom""" 295: zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) 296: 297: if docinfo is None: 298: docinfo = {} 299: 300: if dom is None: 301: dom = self.getIndexMeta(getParentDir(path)) 302: 303: metaData=self.metadata.main.meta.bib 304: bibtype=dom.xpath("//bib/@type") 305: if bibtype and (len(bibtype)>0): 306: bibtype=bibtype[0].value 307: else: 308: bibtype="generic" 309: bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) 310: bibmap=metaData.generateMappingForType(bibtype) 311: #print "bibmap: ", bibmap, " for: ", bibtype 312: # if there is no mapping bibmap is empty (mapping sometimes has empty fields) 313: if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: 314: docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) 315: docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) 316: docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) 317: 318: return docinfo 319: 320: 321: def getDocinfoFromTextTool(self,url,dom=None,docinfo=None): 322: """parse texttool tag in index meta""" 323: zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url)) 324: if docinfo is None: 325: docinfo = {} 326: 327: if dom is None: 328: dom = self.getIndexMeta(url) 329: 330: archivePath = None 331: archiveName = None 332: 333: archiveNames=dom.xpath("//resource/name") 334: if archiveNames and (len(archiveNames)>0): 335: archiveName=getTextFromNode(archiveNames[0]) 336: else: 337: zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.WARNING,"resource/name missing in: %s"%(url)) 338: 339: archivePaths=dom.xpath("//resource/archive-path") 340: if archivePaths and (len(archivePaths)>0): 341: archivePath=getTextFromNode(archivePaths[0]) 342: # clean up archive path 343: if archivePath[0] != '/': 344: archivePath = '/' + archivePath 345: if archiveName and (not archivePath.endswith(archiveName)): 346: archivePath += "/" + archiveName 347: else: 348: # try to get archive-path from url 349: zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.WARNING,"resource/archive-path missing in: %s"%(url)) 350: if (not url.startswith('http')): 351: archivePath = url.replace('index.meta', '') 352: 353: if archivePath is None: 354: # we balk without archive-path 355: raise IOError("Missing archive-path (for text-tool) in %s"%(url)) 356: 357: imageDirs=dom.xpath("//texttool/image") 358: if imageDirs and (len(imageDirs)>0): 359: imageDir=getTextFromNode(imageDirs[0]) 360: else: 361: # we balk with no image tag 362: raise IOError("No text-tool info in %s"%(url)) 363: 364: if imageDir and archivePath: 365: #print "image: ", imageDir, " archivepath: ", archivePath 366: imageDir=os.path.join(archivePath,imageDir) 367: imageDir=imageDir.replace("/mpiwg/online",'') 368: docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo) 369: docinfo['imagePath'] = imageDir 370: docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir 371: 372: viewerUrls=dom.xpath("//texttool/digiliburlprefix") 373: if viewerUrls and (len(viewerUrls)>0): 374: viewerUrl=getTextFromNode(viewerUrls[0]) 375: docinfo['viewerURL'] = viewerUrl 376: 377: textUrls=dom.xpath("//texttool/text") 378: if textUrls and (len(textUrls)>0): 379: textUrl=getTextFromNode(textUrls[0]) 380: if urlparse.urlparse(textUrl)[0]=="": #keine url 381: textUrl=os.path.join(archivePath,textUrl) 382: 383: docinfo['textURL'] = textUrl 384: 385: 386: presentationUrls=dom.xpath("//texttool/presentation") 387: if presentationUrls and (len(presentationUrls)>0): 388: # presentation url ergiebt sich ersetzen von index.meta in der url der f�r die Metadaten 389: # durch den relativen Pfad auf die presentation infos 390: presentationUrl=url.replace('index.meta',getTextFromNode(presentationUrls[0])) 391: 392: docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom) 393: else: 394: docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) 395: docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) 396: return docinfo 397: 398: 399: def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): 400: """gets the bibliographical information from the preseantion entry in texttools 401: """ 402: dom=self.getPresentationInfoXML(url) 403: docinfo['author']=getTextFromNode(dom.xpath("//author")[0]) 404: docinfo['title']=getTextFromNode(dom.xpath("//title")[0]) 405: docinfo['year']=getTextFromNode(dom.xpath("//date")[0]) 406: return docinfo 407: 408: def getDocinfoFromImagePath(self,path,docinfo=None): 409: """path ist the path to the images it assumes that the index.meta file is one level higher.""" 410: zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path)) 411: if docinfo is None: 412: docinfo = {} 413: path=path.replace("/mpiwg/online","") 414: docinfo['imagePath'] = path 415: docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo) 416: imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path 417: docinfo['imageURL'] = imageUrl 418: 419: docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo) 420: docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo) 421: return docinfo 422: 423: 424: def getDocinfo(self, mode, url): 425: """returns docinfo depending on mode""" 426: zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url)) 427: # look for cached docinfo in session 428: if self.REQUEST.SESSION.has_key('docinfo'): 429: docinfo = self.REQUEST.SESSION['docinfo'] 430: # check if its still current 431: if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: 432: zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo) 433: return docinfo 434: # new docinfo 435: docinfo = {'mode': mode, 'url': url} 436: if mode=="texttool": #index.meta with texttool information 437: docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) 438: elif mode=="imagepath": 439: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) 440: else: 441: zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!") 442: raise ValueError("Unknown mode %s"%(mode)) 443: 444: zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo) 445: self.REQUEST.SESSION['docinfo'] = docinfo 446: return docinfo 447: 448: 449: def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None): 450: """returns pageinfo with the given parameters""" 451: pageinfo = {} 452: current = getInt(current) 453: pageinfo['current'] = current 454: rows = int(rows or self.thumbrows) 455: pageinfo['rows'] = rows 456: cols = int(cols or self.thumbcols) 457: pageinfo['cols'] = cols 458: grpsize = cols * rows 459: pageinfo['groupsize'] = grpsize 460: start = getInt(start, default=(int(current / grpsize) * grpsize +1)) 461: pageinfo['start'] = start 462: pageinfo['end'] = start + grpsize 463: if docinfo is not None: 464: np = int(docinfo['numPages']) 465: pageinfo['end'] = min(pageinfo['end'], np) 466: pageinfo['numgroups'] = int(np / grpsize) 467: if np % grpsize > 0: 468: pageinfo['numgroups'] += 1 469: 470: return pageinfo 471: 472: def text(self,mode,url,pn): 473: """give text""" 474: if mode=="texttool": #index.meta with texttool information 475: (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) 476: 477: #print textpath 478: try: 479: dom = NonvalidatingReader.parseUri(textpath) 480: except: 481: return None 482: 483: list=[] 484: nodes=dom.xpath("//pb") 485: 486: node=nodes[int(pn)-1] 487: 488: p=node 489: 490: while p.tagName!="p": 491: p=p.parentNode 492: 493: 494: endNode=nodes[int(pn)] 495: 496: 497: e=endNode 498: 499: while e.tagName!="p": 500: e=e.parentNode 501: 502: 503: next=node.parentNode 504: 505: #sammle s 506: while next and (next!=endNode.parentNode): 507: list.append(next) 508: next=next.nextSibling 509: list.append(endNode.parentNode) 510: 511: if p==e:# beide im selben paragraphen 512: pass 513: # else: 514: # next=p 515: # while next!=e: 516: # print next,e 517: # list.append(next) 518: # next=next.nextSibling 519: # 520: # for x in list: 521: # PrettyPrint(x) 522: # 523: # return list 524: # 525: 526: def findDigilibUrl(self): 527: """try to get the digilib URL from zogilib""" 528: url = self.imageViewerUrl[:-1] + "/getScalerUrl" 529: #print urlparse.urlparse(url)[0] 530: #print urlparse.urljoin(self.absolute_url(),url) 531: logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0]) 532: logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url)) 533: 534: try: 535: if urlparse.urlparse(url)[0]=='': #relative path 536: url=urlparse.urljoin(self.absolute_url()+"/",url) 537: 538: scaler = urlopen(url).read() 539: return scaler.replace("/servlet/Scaler?", "") 540: except: 541: return None 542: 543: def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): 544: """init document viewer""" 545: self.title=title 546: self.imageViewerUrl=imageViewerUrl 547: self.textViewerUrl=textViewerUrl 548: self.digilibBaseUrl = digilibBaseUrl 549: self.thumbrows = thumbrows 550: self.thumbcols = thumbcols 551: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 552: if RESPONSE is not None: 553: RESPONSE.redirect('manage_main') 554: 555: 556: 557: 558: # security.declareProtected('View management screens','renameImageForm') 559: 560: def manage_AddDocumentViewerForm(self): 561: """add the viewer form""" 562: pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) 563: return pt() 564: 565: def manage_AddDocumentViewer(self,id,imageViewerUrl="",textViewerUrl="",title="",RESPONSE=None): 566: """add the viewer""" 567: newObj=documentViewer(id,imageViewerUrl,title=title,textViewerUrl=textViewerUrl) 568: self._setObject(id,newObj) 569: 570: if RESPONSE is not None: 571: RESPONSE.redirect('manage_main') 572: 573: 574: ## 575: ## DocumentViewerTemplate class 576: ## 577: class DocumentViewerTemplate(ZopePageTemplate): 578: """Template for document viewer""" 579: meta_type="DocumentViewer Template" 580: 581: 582: def manage_addDocumentViewerTemplateForm(self): 583: """Form for adding""" 584: pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) 585: return pt() 586: 587: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, 588: REQUEST=None, submit=None): 589: "Add a Page Template with optional file content." 590: 591: self._setObject(id, DocumentViewerTemplate(id)) 592: ob = getattr(self, id) 593: ob.pt_edit(open(os.path.join(package_home(globals()),'zpt/viewer_main.zpt')).read(),None) 594: if title: 595: ob.pt_setTitle(title) 596: try: 597: u = self.DestinationURL() 598: except AttributeError: 599: u = REQUEST['URL1'] 600: 601: u = "%s/%s" % (u, urllib.quote(id)) 602: REQUEST.RESPONSE.redirect(u+'/manage_main') 603: return '' 604: 605: 606: