documentViewer/documentViewer.py - view

File: [Repository] / documentViewer / documentViewer.py
Revision 1.23: download - view: text, annotated - select for diffs - revision graph
Fri Apr 20 14:46:05 2007 UTC (17 years, 2 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD

NEW - # 44: ECHO - vollstŠndige bibliographische Angabe
https://itgroup.mpiwg-berlin.mpg.de:8080/tracs/vision/ticket/44

minor bug

1: 2: 3: from OFS.Folder import Folder 4: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 5: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 6: from AccessControl import ClassSecurityInfo 7: from AccessControl import getSecurityManager 8: from Globals import package_home 9: 10: from Ft.Xml.Domlette import NonvalidatingReader 11: from Ft.Xml.Domlette import PrettyPrint, Print 12: from Ft.Xml import EMPTY_NAMESPACE, Parse 13: 14: import Ft.Xml.XPath 15: 16: import os.path 17: import sys 18: import cgi 19: import urllib 20: import logging 21: 22: import urlparse 23: 24: def logger(txt,method,txt2): 25: """logging""" 26: logging.info(txt+ txt2) 27: 28: 29: def getInt(number, default=0): 30: """returns always an int (0 in case of problems)""" 31: try: 32: return int(number) 33: except: 34: return default 35: 36: def getTextFromNode(nodename): 37: """get the cdata content of a node""" 38: if nodename is None: 39: return "" 40: nodelist=nodename.childNodes 41: rc = "" 42: for node in nodelist: 43: if node.nodeType == node.TEXT_NODE: 44: rc = rc + node.data 45: return rc 46: 47: 48: def getParentDir(path): 49: """returns pathname shortened by one""" 50: return '/'.join(path.split('/')[0:-1]) 51: 52: 53: import socket 54: 55: def urlopen(url,timeout=2): 56: """urlopen mit timeout""" 57: socket.setdefaulttimeout(timeout) 58: ret=urllib.urlopen(url) 59: socket.setdefaulttimeout(5) 60: return ret 61: 62: 63: ## 64: ## documentViewer class 65: ## 66: class documentViewer(Folder): 67: """document viewer""" 68: #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?" 69: 70: meta_type="Document viewer" 71: 72: security=ClassSecurityInfo() 73: manage_options=Folder.manage_options+( 74: {'label':'main config','action':'changeDocumentViewerForm'}, 75: ) 76: 77: # templates and forms 78: viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 79: thumbs_main = PageTemplateFile('zpt/thumbs_main', globals()) 80: image_main = PageTemplateFile('zpt/image_main', globals()) 81: head_main = PageTemplateFile('zpt/head_main', globals()) 82: docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) 83: 84: security.declareProtected('View management screens','changeDocumentViewerForm') 85: changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 86: 87: 88: def __init__(self,id,imageViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): 89: """init document viewer""" 90: self.id=id 91: self.title=title 92: self.imageViewerUrl=imageViewerUrl 93: self.textViewerUrl=textViewerUrl 94: 95: if not digilibBaseUrl: 96: self.digilibBaseUrl = self.findDigilibUrl() 97: else: 98: self.digilibBaseUrl = digilibBaseUrl 99: self.thumbcols = thumbcols 100: self.thumbrows = thumbrows 101: # authgroups is list of authorized groups (delimited by ,) 102: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 103: # add template folder so we can always use template.something 104: self.manage_addFolder('template') 105: 106: 107: security.declareProtected('View','index_html') 108: def index_html(self,mode,url,viewMode="auto",start=None,pn=1): 109: ''' 110: view it 111: @param mode: defines which type of document is behind url (text,images or auto) 112: @param url: url which contains display information 113: @param viewMode: if images display images, if text display text, default is images 114: 115: ''' 116: 117: logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 118: 119: if not hasattr(self, 'template'): 120: # create template folder if it doesn't exist 121: self.manage_addFolder('template') 122: 123: if not self.digilibBaseUrl: 124: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 125: 126: docinfo = self.getDocinfo(mode=mode,url=url) 127: pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) 128: pt = getattr(self.template, 'viewer_main') 129: 130: if viewMode=="auto": # automodus gewaehlt 131: if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert 132: viewMode="text" 133: else: 134: viewMode="images" 135: 136: 137: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 138: 139: 140: def getLink(self,param=None,val=None): 141: """link to documentviewer with parameter param set to val""" 142: params=self.REQUEST.form.copy() 143: if param is not None: 144: if val is None: 145: if params.has_key(param): 146: del params[param] 147: else: 148: params[param] = str(val) 149: 150: # quote values and assemble into query string 151: ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 152: url=self.REQUEST['URL1']+"?"+ps 153: return url 154: 155: 156: def getStyle(self, idx, selected, style=""): 157: """returns a string with the given style and append 'sel' if path == selected.""" 158: #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) 159: if idx == selected: 160: return style + 'sel' 161: else: 162: return style 163: 164: 165: def isAccessible(self, docinfo): 166: """returns if access to the resource is granted""" 167: access = docinfo.get('accessType', None) 168: logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access) 169: if access is not None and access == 'free': 170: logger("documentViewer (accessOK)", logging.INFO, "access is free") 171: return True 172: elif access is None or access in self.authgroups: 173: # only local access -- only logged in users 174: user = getSecurityManager().getUser() 175: if user is not None: 176: #print "user: ", user 177: return (user.getUserName() != "Anonymous User") 178: else: 179: return False 180: 181: logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access) 182: return False 183: 184: 185: def getDirinfoFromDigilib(self,path,docinfo=None): 186: """gibt param von dlInfo aus""" 187: num_retries = 3 188: if docinfo is None: 189: docinfo = {} 190: 191: infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 192: 193: logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl)) 194: 195: for cnt in range(num_retries): 196: try: 197: # dom = NonvalidatingReader.parseUri(imageUrl) 198: txt=urllib.urlopen(infoUrl).read() 199: dom = Parse(txt) 200: break 201: except: 202: logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt)) 203: else: 204: raise IOError("Unable to get dir-info from %s"%(infoUrl)) 205: 206: sizes=dom.xpath("//dir/size") 207: logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes) 208: 209: if sizes: 210: docinfo['numPages'] = int(getTextFromNode(sizes[0])) 211: else: 212: docinfo['numPages'] = 0 213: 214: return docinfo 215: 216: 217: def getIndexMeta(self, url): 218: """returns dom of index.meta document at url""" 219: num_retries = 3 220: dom = None 221: metaUrl = None 222: if url.startswith("http://"): 223: # real URL 224: metaUrl = url 225: else: 226: # online path 227: server=self.digilibBaseUrl+"/servlet/Texter?fn=" 228: metaUrl=server+url.replace("/mpiwg/online","") 229: if not metaUrl.endswith("index.meta"): 230: metaUrl += "/index.meta" 231: print metaUrl 232: for cnt in range(num_retries): 233: try: 234: # patch dirk encoding fehler treten dann nicht mehr auf 235: # dom = NonvalidatingReader.parseUri(metaUrl) 236: txt=urllib.urlopen(metaUrl).read() 237: dom = Parse(txt) 238: break 239: except: 240: logger("ERROR documentViewer (getIndexMata)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) 241: 242: if dom is None: 243: raise IOError("Unable to read index meta from %s"%(url)) 244: 245: return dom 246: 247: def getPresentationInfoXML(self, url): 248: """returns dom of info.xml document at url""" 249: num_retries = 3 250: dom = None 251: metaUrl = None 252: if url.startswith("http://"): 253: # real URL 254: metaUrl = url 255: else: 256: # online path 257: server=self.digilibBaseUrl+"/servlet/Texter?fn=" 258: metaUrl=server+url.replace("/mpiwg/online","") 259: 260: 261: for cnt in range(num_retries): 262: try: 263: # patch dirk encoding fehler treten dann nicht mehr auf 264: # dom = NonvalidatingReader.parseUri(metaUrl) 265: txt=urllib.urlopen(metaUrl).read() 266: dom = Parse(txt) 267: break 268: except: 269: logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) 270: 271: if dom is None: 272: raise IOError("Unable to read infoXMLfrom %s"%(url)) 273: 274: return dom 275: 276: 277: def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None): 278: """gets authorization info from the index.meta file at path or given by dom""" 279: logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path)) 280: 281: access = None 282: 283: if docinfo is None: 284: docinfo = {} 285: 286: if dom is None: 287: dom = self.getIndexMeta(getParentDir(path)) 288: 289: acctype = dom.xpath("//access-conditions/access/@type") 290: if acctype and (len(acctype)>0): 291: access=acctype[0].value 292: if access in ['group', 'institution']: 293: access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() 294: 295: docinfo['accessType'] = access 296: return docinfo 297: 298: 299: def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None): 300: """gets bibliographical info from the index.meta file at path or given by dom""" 301: logger("documentViewer (getbibinfofromindexmeta)", logging.INFO,"path: %s"%(path)) 302: 303: if docinfo is None: 304: docinfo = {} 305: 306: if dom is None: 307: dom = self.getIndexMeta(getParentDir(path)) 308: 309: metaData=self.metadata.main.meta.bib 310: bibtype=dom.xpath("//bib/@type") 311: if bibtype and (len(bibtype)>0): 312: bibtype=bibtype[0].value 313: else: 314: bibtype="generic" 315: bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) 316: bibmap=metaData.generateMappingForType(bibtype) 317: #print "bibmap: ", bibmap, " for: ", bibtype 318: # if there is no mapping bibmap is empty (mapping sometimes has empty fields) 319: if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: 320: docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) 321: docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) 322: docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) 323: 324: logging.info("bla") 325: try: 326: docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) 327: except: 328: docinfo['lang']='' 329: return docinfo 330: 331: 332: def getDocinfoFromTextTool(self,url,dom=None,docinfo=None): 333: """parse texttool tag in index meta""" 334: logger("documentViewer (getdocinfofromtexttool)", logging.INFO,"url: %s"%(url)) 335: if docinfo is None: 336: docinfo = {} 337: 338: if docinfo.get('lang',None) is None: 339: docinfo['lang']='' # default keine Sprache gesetzt 340: if dom is None: 341: dom = self.getIndexMeta(url) 342: 343: archivePath = None 344: archiveName = None 345: 346: archiveNames=dom.xpath("//resource/name") 347: if archiveNames and (len(archiveNames)>0): 348: archiveName=getTextFromNode(archiveNames[0]) 349: else: 350: logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/name missing in: %s"%(url)) 351: 352: archivePaths=dom.xpath("//resource/archive-path") 353: if archivePaths and (len(archivePaths)>0): 354: archivePath=getTextFromNode(archivePaths[0]) 355: # clean up archive path 356: if archivePath[0] != '/': 357: archivePath = '/' + archivePath 358: if archiveName and (not archivePath.endswith(archiveName)): 359: archivePath += "/" + archiveName 360: else: 361: # try to get archive-path from url 362: logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/archive-path missing in: %s"%(url)) 363: if (not url.startswith('http')): 364: archivePath = url.replace('index.meta', '') 365: 366: if archivePath is None: 367: # we balk without archive-path 368: raise IOError("Missing archive-path (for text-tool) in %s"%(url)) 369: 370: imageDirs=dom.xpath("//texttool/image") 371: if imageDirs and (len(imageDirs)>0): 372: imageDir=getTextFromNode(imageDirs[0]) 373: else: 374: # we balk with no image tag / not necessary anymore because textmode is now standard 375: #raise IOError("No text-tool info in %s"%(url)) 376: imageDir="" 377: docinfo['numPages']=1 # im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht 378: 379: docinfo['imagePath'] = "" # keine Bilder 380: docinfo['imageURL'] = "" 381: 382: if imageDir and archivePath: 383: #print "image: ", imageDir, " archivepath: ", archivePath 384: imageDir=os.path.join(archivePath,imageDir) 385: imageDir=imageDir.replace("/mpiwg/online",'') 386: docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo) 387: docinfo['imagePath'] = imageDir 388: docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir 389: 390: viewerUrls=dom.xpath("//texttool/digiliburlprefix") 391: if viewerUrls and (len(viewerUrls)>0): 392: viewerUrl=getTextFromNode(viewerUrls[0]) 393: docinfo['viewerURL'] = viewerUrl 394: 395: textUrls=dom.xpath("//texttool/text") 396: if textUrls and (len(textUrls)>0): 397: textUrl=getTextFromNode(textUrls[0]) 398: if urlparse.urlparse(textUrl)[0]=="": #keine url 399: textUrl=os.path.join(archivePath,textUrl) 400: 401: docinfo['textURL'] = textUrl 402: 403: 404: presentationUrls=dom.xpath("//texttool/presentation") 405: docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) # get info von bib tag 406: 407: if presentationUrls and (len(presentationUrls)>0): # ueberschreibe diese durch presentation informationen 408: # presentation url ergiebt sich ersetzen von index.meta in der url der fŸr die Metadaten 409: # durch den relativen Pfad auf die presentation infos 410: presentationUrl=url.replace('index.meta',getTextFromNode(presentationUrls[0])) 411: 412: docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom) 413: 414: 415: 416: return docinfo 417: 418: 419: def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): 420: """gets the bibliographical information from the preseantion entry in texttools 421: """ 422: dom=self.getPresentationInfoXML(url) 423: docinfo['author']=getTextFromNode(dom.xpath("//author")[0]) 424: docinfo['title']=getTextFromNode(dom.xpath("//title")[0]) 425: docinfo['year']=getTextFromNode(dom.xpath("//date")[0]) 426: return docinfo 427: 428: def getDocinfoFromImagePath(self,path,docinfo=None): 429: """path ist the path to the images it assumes that the index.meta file is one level higher.""" 430: logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path)) 431: if docinfo is None: 432: docinfo = {} 433: path=path.replace("/mpiwg/online","") 434: docinfo['imagePath'] = path 435: docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo) 436: imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path 437: docinfo['imageURL'] = imageUrl 438: 439: docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo) 440: docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo) 441: return docinfo 442: 443: 444: def getDocinfo(self, mode, url): 445: """returns docinfo depending on mode""" 446: logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url)) 447: # look for cached docinfo in session 448: if self.REQUEST.SESSION.has_key('docinfo'): 449: docinfo = self.REQUEST.SESSION['docinfo'] 450: # check if its still current 451: if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: 452: logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo) 453: return docinfo 454: # new docinfo 455: docinfo = {'mode': mode, 'url': url} 456: if mode=="texttool": #index.meta with texttool information 457: docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) 458: elif mode=="imagepath": 459: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) 460: else: 461: logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!") 462: raise ValueError("Unknown mode %s"%(mode)) 463: 464: logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo) 465: self.REQUEST.SESSION['docinfo'] = docinfo 466: return docinfo 467: 468: 469: def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None): 470: """returns pageinfo with the given parameters""" 471: pageinfo = {} 472: current = getInt(current) 473: pageinfo['current'] = current 474: rows = int(rows or self.thumbrows) 475: pageinfo['rows'] = rows 476: cols = int(cols or self.thumbcols) 477: pageinfo['cols'] = cols 478: grpsize = cols * rows 479: pageinfo['groupsize'] = grpsize 480: start = getInt(start, default=(int(current / grpsize) * grpsize +1)) 481: pageinfo['start'] = start 482: pageinfo['end'] = start + grpsize 483: if docinfo is not None: 484: np = int(docinfo['numPages']) 485: pageinfo['end'] = min(pageinfo['end'], np) 486: pageinfo['numgroups'] = int(np / grpsize) 487: if np % grpsize > 0: 488: pageinfo['numgroups'] += 1 489: 490: return pageinfo 491: 492: def text(self,mode,url,pn): 493: """give text""" 494: if mode=="texttool": #index.meta with texttool information 495: (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) 496: 497: #print textpath 498: try: 499: dom = NonvalidatingReader.parseUri(textpath) 500: except: 501: return None 502: 503: list=[] 504: nodes=dom.xpath("//pb") 505: 506: node=nodes[int(pn)-1] 507: 508: p=node 509: 510: while p.tagName!="p": 511: p=p.parentNode 512: 513: 514: endNode=nodes[int(pn)] 515: 516: 517: e=endNode 518: 519: while e.tagName!="p": 520: e=e.parentNode 521: 522: 523: next=node.parentNode 524: 525: #sammle s 526: while next and (next!=endNode.parentNode): 527: list.append(next) 528: next=next.nextSibling 529: list.append(endNode.parentNode) 530: 531: if p==e:# beide im selben paragraphen 532: pass 533: # else: 534: # next=p 535: # while next!=e: 536: # print next,e 537: # list.append(next) 538: # next=next.nextSibling 539: # 540: # for x in list: 541: # PrettyPrint(x) 542: # 543: # return list 544: # 545: 546: def findDigilibUrl(self): 547: """try to get the digilib URL from zogilib""" 548: url = self.imageViewerUrl[:-1] + "/getScalerUrl" 549: #print urlparse.urlparse(url)[0] 550: #print urlparse.urljoin(self.absolute_url(),url) 551: logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0]) 552: logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url)) 553: 554: try: 555: if urlparse.urlparse(url)[0]=='': #relative path 556: url=urlparse.urljoin(self.absolute_url()+"/",url) 557: 558: scaler = urlopen(url).read() 559: return scaler.replace("/servlet/Scaler?", "") 560: except: 561: return None 562: 563: def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): 564: """init document viewer""" 565: self.title=title 566: self.imageViewerUrl=imageViewerUrl 567: self.textViewerUrl=textViewerUrl 568: self.digilibBaseUrl = digilibBaseUrl 569: self.thumbrows = thumbrows 570: self.thumbcols = thumbcols 571: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 572: if RESPONSE is not None: 573: RESPONSE.redirect('manage_main') 574: 575: 576: 577: 578: # security.declareProtected('View management screens','renameImageForm') 579: 580: def manage_AddDocumentViewerForm(self): 581: """add the viewer form""" 582: pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) 583: return pt() 584: 585: def manage_AddDocumentViewer(self,id,imageViewerUrl="",textViewerUrl="",title="",RESPONSE=None): 586: """add the viewer""" 587: newObj=documentViewer(id,imageViewerUrl,title=title,textViewerUrl=textViewerUrl) 588: self._setObject(id,newObj) 589: 590: if RESPONSE is not None: 591: RESPONSE.redirect('manage_main') 592: 593: 594: ## 595: ## DocumentViewerTemplate class 596: ## 597: class DocumentViewerTemplate(ZopePageTemplate): 598: """Template for document viewer""" 599: meta_type="DocumentViewer Template" 600: 601: 602: def manage_addDocumentViewerTemplateForm(self): 603: """Form for adding""" 604: pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) 605: return pt() 606: 607: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, 608: REQUEST=None, submit=None): 609: "Add a Page Template with optional file content." 610: 611: self._setObject(id, DocumentViewerTemplate(id)) 612: ob = getattr(self, id) 613: txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read() 614: logging.info("txt %s:"%txt) 615: ob.pt_edit(txt,"text/html") 616: if title: 617: ob.pt_setTitle(title) 618: try: 619: u = self.DestinationURL() 620: except AttributeError: 621: u = REQUEST['URL1'] 622: 623: u = "%s/%s" % (u, urllib.quote(id)) 624: REQUEST.RESPONSE.redirect(u+'/manage_main') 625: return '' 626: 627: 628: