documentViewer/documentViewer.py - view

File: [Repository] / documentViewer / documentViewer.py
Revision 1.9: download - view: text, annotated - select for diffs - revision graph
Wed Apr 12 17:47:53 2006 UTC (18 years, 1 month ago) by casties
Branches: MAIN
CVS tags: HEAD

version 0.2.4
* minor improvements
* tries more than one time to read info from digilib

from OFS.Folder import Folder from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate from Products.PageTemplates.PageTemplateFile import PageTemplateFile from AccessControl import ClassSecurityInfo from AccessControl import getSecurityManager from Globals import package_home from Ft.Xml.Domlette import NonvalidatingReader from Ft.Xml.Domlette import PrettyPrint, Print from Ft.Xml import EMPTY_NAMESPACE import Ft.Xml.XPath import os.path import sys import cgi import urllib import zLOG def getInt(number, default=0): """returns always an int (0 in case of problems)""" try: return int(number) except: return default def getTextFromNode(nodename): if nodename is None: return "" nodelist=nodename.childNodes rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc def getParentDir(path): """returns pathname shortened by one""" return '/'.join(path.split('/')[0:-1]) import socket def urlopen(url,timeout=2): """urlopen mit timeout""" socket.setdefaulttimeout(timeout) ret=urllib.urlopen(url) socket.setdefaulttimeout(5) return ret ## ## documentViewer class ## class documentViewer(Folder): """document viewer""" meta_type="Document viewer" security=ClassSecurityInfo() manage_options=Folder.manage_options+( {'label':'main config','action':'changeDocumentViewerForm'}, ) # templates and forms viewer_main = PageTemplateFile('zpt/viewer_main', globals()) thumbs_main = PageTemplateFile('zpt/thumbs_main', globals()) image_main = PageTemplateFile('zpt/image_main', globals()) head_main = PageTemplateFile('zpt/head_main', globals()) docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) security.declareProtected('View management screens','changeDocumentViewerForm') changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) def __init__(self,id,imageViewerUrl,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): """init document viewer""" self.id=id self.title=title self.imageViewerUrl=imageViewerUrl if not digilibBaseUrl: self.digilibBaseUrl = self.findDigilibUrl() else: self.digilibBaseUrl = digilibBaseUrl self.thumbcols = thumbcols self.thumbrows = thumbrows # authgroups is list of authorized groups (delimited by ,) self.authgroups = [s.strip().lower() for s in authgroups.split(',')] # add template folder so we can always use template.something self.manage_addFolder('template') security.declareProtected('View','index_html') def index_html(self,mode,url,start=None,pn=1): ''' view it @param mode: defines which type of document is behind url @param url: url which contains display information ''' zLOG.LOG("documentViewer (index)", zLOG.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) if not hasattr(self, 'template'): # create template folder if it doesn't exist self.manage_addFolder('template') if not self.digilibBaseUrl: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" docinfo = self.getDocinfo(mode=mode,url=url) pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) pt = getattr(self.template, 'viewer_main') return pt(docinfo=docinfo,pageinfo=pageinfo) def getLink(self,param=None,val=None): """link to documentviewer with parameter param set to val""" params=self.REQUEST.form.copy() if param is not None: if val is None: if params.has_key(param): del params[param] else: params[param] = str(val) # quote values and assemble into query string ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) url=self.REQUEST['URL1']+"?"+ps return url def getStyle(self, idx, selected, style=""): """returns a string with the given style and append 'sel' if path == selected.""" #zLOG.LOG("documentViewer (getstyle)", zLOG.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) if idx == selected: return style + 'sel' else: return style def isAccessible(self, docinfo): """returns if access to the resource is granted""" access = docinfo.get('accessType', None) if access is None: # no information - no access #TODO: check return True elif access == 'free': return True elif access in self.authgroups: # only local access -- only logged in users user = getSecurityManager().getUser() if user is not None: #print "user: ", user return (user.getUserName() != "Anonymous User") else: return False zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "unknown access type %s"%access) return False def getDirinfoFromDigilib(self,path,docinfo=None): """gibt param von dlInfo aus""" if docinfo is None: docinfo = {} imageUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo from %s"%(imageUrl)) for cnt in (1,2,3): try: dom = NonvalidatingReader.parseUri(imageUrl) break except: zLOG.LOG("documentViewer (getdirinfofromdigilib)", zLOG.ERROR, "error reading %s (try %d)"%(imageUrl,cnt)) else: raise IOError("Unable to get dirinfo from %s"%(imageUrl)) params=dom.xpath("//dir/size") zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:size"%params) if params: docinfo['numPages'] = int(getTextFromNode(params[0])) else: docinfo['numPages'] = 0 return docinfo def getIndexMeta(self, url): """returns dom of index.meta document at url""" dom = None if url.startswith("http://"): # real URL try: dom = NonvalidatingReader.parseUri(url) except: zLOG.LOG("documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) raise IOError("Unable to get info from %s"%(url)) else: # online path server=self.digilibBaseUrl+"/servlet/Texter?fn=" metaUrl=server+url if not metaUrl.endswith("index.meta"): metaUrl += "/index.meta" try: dom = NonvalidatingReader.parseUri(metaUrl) except: zLOG.LOG("documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) raise IOError("Unable to get info from %s"%(url)) return dom def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None): """gets authorization info from the index.meta file at path or given by dom""" zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) access = None if docinfo is None: docinfo = {} if dom is None: dom = self.getIndexMeta(getParentDir(path)) acctype = dom.xpath("//access-conditions/access/@type") if acctype and (len(acctype)>0): access=acctype[0].value if access in ['group', 'institution']: access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() docinfo['accessType'] = access return docinfo def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None): """gets bibliographical info from the index.meta file at path or given by dom""" zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) if docinfo is None: docinfo = {} if dom is None: dom = self.getIndexMeta(getParentDir(path)) metaData=self.metadata.main.meta.bib bibtype=dom.xpath("//bib/@type") if bibtype and (len(bibtype)>0): bibtype=bibtype[0].value else: bibtype="generic" bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) bibmap=metaData.generateMappingForType(bibtype) #print "bibmap: ", bibmap, " for: ", bibtype # if there is no mapping bibmap is empty (mapping sometimes has empty fields) if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) return docinfo def getDocinfoFromTextTool(self,url,dom=None,docinfo=None): """parse texttool tag in index meta""" zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url)) if docinfo is None: docinfo = {} if dom is None: dom = self.getIndexMeta(url) archiveNames=dom.xpath("//resource/name") if archiveNames and (len(archiveNames)>0): archiveName=getTextFromNode(archiveNames[0]) archivePaths=dom.xpath("//resource/archive-path") if archivePaths and (len(archivePaths)>0): archivePath=getTextFromNode(archivePaths[0]) # clean up archive path if archivePath[0] != '/': archivePath = '/' + archivePath if not archivePath.endswith(archiveName): archivePath += "/" + archiveName else: archivePath=None imageDirs=dom.xpath("//texttool/image") if imageDirs and (len(imageDirs)>0): imageDir=getTextFromNode(imageDirs[0]) else: imageDir=None if imageDir and archivePath: #print "image: ", imageDir, " archivepath: ", archivePath imageDir=os.path.join(archivePath,imageDir) imageDir=imageDir.replace("/mpiwg/online",'') docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo) docinfo['imagePath'] = imageDir docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir viewerUrls=dom.xpath("//texttool/digiliburlprefix") if viewerUrls and (len(viewerUrls)>0): viewerUrl=getTextFromNode(viewerUrls[0]) docinfo['viewerURL'] = viewerUrl textUrls=dom.xpath("//texttool/text") if textUrls and (len(textUrls)>0): textUrl=getTextFromNode(textUrls[0]) docinfo['textURL'] = textUrl docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) return docinfo def getDocinfoFromImagePath(self,path,docinfo=None): """path ist the path to the images it assumes that the index.meta file is one level higher.""" zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path)) if docinfo is None: docinfo = {} path=path.replace("/mpiwg/online","") docinfo['imagePath'] = path docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo) imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path docinfo['imageURL'] = imageUrl docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo) docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo) return docinfo def getDocinfo(self, mode, url): """returns docinfo depending on mode""" zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url)) # look for cached docinfo in session if self.REQUEST.SESSION.has_key('docinfo'): docinfo = self.REQUEST.SESSION['docinfo'] # check if its still current if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo) return docinfo # new docinfo docinfo = {'mode': mode, 'url': url} if mode=="texttool": #index.meta with texttool information docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) elif mode=="imagepath": docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) else: zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!") zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo) self.REQUEST.SESSION['docinfo'] = docinfo return docinfo def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None): """returns pageinfo with the given parameters""" pageinfo = {} current = getInt(current) pageinfo['current'] = current rows = int(rows or self.thumbrows) pageinfo['rows'] = rows cols = int(cols or self.thumbcols) pageinfo['cols'] = cols grpsize = cols * rows pageinfo['groupsize'] = grpsize start = getInt(start, default=(int(current / grpsize) * grpsize +1)) pageinfo['start'] = start pageinfo['end'] = start + grpsize if docinfo is not None: np = int(docinfo['numPages']) pageinfo['end'] = min(pageinfo['end'], np) pageinfo['numgroups'] = int(np / grpsize) if np % grpsize > 0: pageinfo['numgroups'] += 1 return pageinfo def text(self,mode,url,pn): """give text""" if mode=="texttool": #index.meta with texttool information (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) #print textpath try: dom = NonvalidatingReader.parseUri(textpath) except: return None list=[] nodes=dom.xpath("//pb") node=nodes[int(pn)-1] p=node while p.tagName!="p": p=p.parentNode endNode=nodes[int(pn)] e=endNode while e.tagName!="p": e=e.parentNode next=node.parentNode #sammle s while next and (next!=endNode.parentNode): list.append(next) next=next.nextSibling list.append(endNode.parentNode) if p==e:# beide im selben paragraphen pass # else: # next=p # while next!=e: # print next,e # list.append(next) # next=next.nextSibling # # for x in list: # PrettyPrint(x) # # return list # def findDigilibUrl(self): """try to get the digilib URL from zogilib""" url = self.imageViewerUrl[:-1] + "/getScalerUrl" try: scaler = urlopen(url).read() return scaler.replace("/servlet/Scaler?", "") except: return None def changeDocumentViewer(self,imageViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): """init document viewer""" self.title=title self.imageViewerUrl=imageViewerUrl self.digilibBaseUrl = digilibBaseUrl self.thumbrows = thumbrows self.thumbcols = thumbcols self.authgroups = [s.strip().lower() for s in authgroups.split(',')] if RESPONSE is not None: RESPONSE.redirect('manage_main') # security.declareProtected('View management screens','renameImageForm') def manage_AddDocumentViewerForm(self): """add the viewer form""" pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) return pt() def manage_AddDocumentViewer(self,id,imageViewerUrl="",title="",RESPONSE=None): """add the viewer""" newObj=documentViewer(id,imageViewerUrl,title) self._setObject(id,newObj) if RESPONSE is not None: RESPONSE.redirect('manage_main') ## ## DocumentViewerTemplate class ## class DocumentViewerTemplate(ZopePageTemplate): """Template for document viewer""" meta_type="DocumentViewer Template" def manage_addDocumentViewerTemplateForm(self): """Form for adding""" pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) return pt() def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, REQUEST=None, submit=None): "Add a Page Template with optional file content." self._setObject(id, DocumentViewerTemplate(id)) ob = getattr(self, id) ob.pt_edit(open(os.path.join(package_home(globals()),'zpt/viewer_main.zpt')).read(),None) if title: ob.pt_setTitle(title) try: u = self.DestinationURL() except AttributeError: u = REQUEST['URL1'] u = "%s/%s" % (u, urllib.quote(id)) REQUEST.RESPONSE.redirect(u+'/manage_main') return ''