--- documentViewer/documentViewer.py 2006/04/10 10:13:01 1.5 +++ documentViewer/documentViewer.py 2006/06/13 14:57:46 1.10.2.1 @@ -1,10 +1,8 @@ - -genericDigilib="http://nausikaa2.rz-berlin.mpg.de/digitallibrary/" - from OFS.Folder import Folder from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate from Products.PageTemplates.PageTemplateFile import PageTemplateFile from AccessControl import ClassSecurityInfo +from AccessControl import getSecurityManager from Globals import package_home from Ft.Xml.Domlette import NonvalidatingReader @@ -14,6 +12,7 @@ from Ft.Xml import EMPTY_NAMESPACE import Ft.Xml.XPath import os.path +import sys import cgi import urllib import zLOG @@ -24,9 +23,10 @@ def getInt(number, default=0): return int(number) except: return default - def getTextFromNode(nodename): + if nodename is None: + return "" nodelist=nodename.childNodes rc = "" for node in nodelist: @@ -34,33 +34,20 @@ def getTextFromNode(nodename): rc = rc + node.data return rc + +def getParentDir(path): + """returns pathname shortened by one""" + return '/'.join(path.split('/')[0:-1]) + + import socket -def urlopen(url): +def urlopen(url,timeout=2): """urlopen mit timeout""" - socket.setdefaulttimeout(2) + socket.setdefaulttimeout(timeout) ret=urllib.urlopen(url) socket.setdefaulttimeout(5) return ret - -def getParamFromDigilib(path,param): - """gibt param von dlInfo aus""" - imageUrl=genericDigilib+"/dirInfo-xml.jsp?mo=dir&fn="+path - - zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo (%s) from %s"%(param,imageUrl)) - - try: - dom = NonvalidatingReader.parseUri(imageUrl) - except: - return None - - params=dom.xpath("//dir/%s"%param) - zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:%s"%params) - - if params: - return getTextFromNode(params[0]) - - ## @@ -87,7 +74,7 @@ class documentViewer(Folder): changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) - def __init__(self,id,imageViewerUrl,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10): + def __init__(self,id,imageViewerUrl,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): """init document viewer""" self.id=id self.title=title @@ -98,6 +85,8 @@ class documentViewer(Folder): self.digilibBaseUrl = digilibBaseUrl self.thumbcols = thumbcols self.thumbrows = thumbrows + # authgroups is list of authorized groups (delimited by ,) + self.authgroups = [s.strip().lower() for s in authgroups.split(',')] # add template folder so we can always use template.something self.manage_addFolder('template') @@ -111,11 +100,9 @@ class documentViewer(Folder): ''' zLOG.LOG("documentViewer (index)", zLOG.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) - print "dlbaseurl:", self.digilibBaseUrl if not hasattr(self, 'template'): # create template folder if it doesn't exist - print "no template folder -- creating" self.manage_addFolder('template') if not self.digilibBaseUrl: @@ -129,14 +116,17 @@ class documentViewer(Folder): def getLink(self,param=None,val=None): """link to documentviewer with parameter param set to val""" - params=cgi.parse_qs(self.REQUEST['QUERY_STRING']) + params=self.REQUEST.form.copy() if param is not None: - if val is None and params.has_key(param): - del params[param] + if val is None: + if params.has_key(param): + del params[param] else: - params[param] = val - - url=self.REQUEST['URL']+"?"+urllib.urlencode(params, doseq=True) + params[param] = str(val) + + # quote values and assemble into query string + ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) + url=self.REQUEST['URL1']+"?"+ps return url @@ -146,25 +136,126 @@ class documentViewer(Folder): if idx == selected: return style + 'sel' else: - return style + return style - def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None): - """gets bibliographical info from the index.meta file at url or given by dom""" - zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) + def isAccessible(self, docinfo): + """returns if access to the resource is granted""" + access = docinfo.get('accessType', None) + if access is None: + # no information - no access + #TODO: check + return True + elif access == 'free': + return True + elif access in self.authgroups: + # only local access -- only logged in users + user = getSecurityManager().getUser() + if user is not None: + #print "user: ", user + return (user.getUserName() != "Anonymous User") + else: + return False + zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "unknown access type %s"%access) + return False + + + def getDirinfoFromDigilib(self,path,docinfo=None): + """gibt param von dlInfo aus""" if docinfo is None: docinfo = {} - if dom is None: - server="http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=" - path="/".join(path.split("/")[0:-1]) - metaUrl=server+path+"/index.meta" + imageUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path + + zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo from %s"%(imageUrl)) + + for cnt in (1,2,3): + try: + dom = NonvalidatingReader.parseUri(imageUrl) + break + except: + zLOG.LOG("documentViewer (getdirinfofromdigilib)", zLOG.ERROR, "error reading %s (try %d)"%(imageUrl,cnt)) + else: + raise IOError("Unable to get dir-info from %s"%(imageUrl)) + + sizes=dom.xpath("//dir/size") + zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:size"%sizes) + + if sizes: + docinfo['numPages'] = int(getTextFromNode(sizes[0])) + else: + docinfo['numPages'] = 0 + + return docinfo + + + def getIndexMeta(self, url): + """returns dom of index.meta document at url""" + num_retries = 3 + dom = None + metaUrl = None + if url.startswith("http://"): + # real URL + metaUrl = url + try: + dom = NonvalidatingReader.parseUri(url) + except: + zLOG.LOG("documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) + raise IOError("Unable to read index.meta from %s"%(url)) + else: + # online path + server=self.digilibBaseUrl+"/servlet/Texter?fn=" + metaUrl=server+url + if not metaUrl.endswith("index.meta"): + metaUrl += "/index.meta" + + for n in range(num_retries): try: dom = NonvalidatingReader.parseUri(metaUrl) + zLOG.LOG("documentViewer (getIndexMata)", zLOG.INFO,metaUrl) + break except: - return docinfo + zLOG.LOG("ERROR documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) + + if dom is None: + raise IOError("Unable to read index meta from %s"%(url)) + + return dom + + + def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None): + """gets authorization info from the index.meta file at path or given by dom""" + zLOG.LOG("documentViewer (getauthinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) + + access = None + + if docinfo is None: + docinfo = {} + + if dom is None: + dom = self.getIndexMeta(getParentDir(path)) + + acctype = dom.xpath("//access-conditions/access/@type") + if acctype and (len(acctype)>0): + access=acctype[0].value + if access in ['group', 'institution']: + access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() + + docinfo['accessType'] = access + return docinfo + + def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None): + """gets bibliographical info from the index.meta file at path or given by dom""" + zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) + + if docinfo is None: + docinfo = {} + + if dom is None: + dom = self.getIndexMeta(getParentDir(path)) + metaData=self.metadata.main.meta.bib bibtype=dom.xpath("//bib/@type") if bibtype and (len(bibtype)>0): @@ -173,7 +264,9 @@ class documentViewer(Folder): bibtype="generic" bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) bibmap=metaData.generateMappingForType(bibtype) - if len(bibmap) > 0: + #print "bibmap: ", bibmap, " for: ", bibtype + # if there is no mapping bibmap is empty (mapping sometimes has empty fields) + if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) @@ -181,52 +274,57 @@ class documentViewer(Folder): return docinfo - def getDocinfoFromTextTool(self,url,docinfo=None): + def getDocinfoFromTextTool(self,url,dom=None,docinfo=None): """parse texttool tag in index meta""" zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url)) if docinfo is None: docinfo = {} - try: - dom = NonvalidatingReader.parseUri(url) - except: - zLOG.LOG("documentViewer (parseUrlTexttool)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) - return docinfo + if dom is None: + dom = self.getIndexMeta(url) - archivePaths=dom.xpath("//resource/archive-path") + archiveNames=dom.xpath("//resource/name") + if archiveNames and (len(archiveNames)>0): + archiveName=getTextFromNode(archiveNames[0]) + archivePaths=dom.xpath("//resource/archive-path") if archivePaths and (len(archivePaths)>0): archivePath=getTextFromNode(archivePaths[0]) + # clean up archive path + if archivePath[0] != '/': + archivePath = '/' + archivePath + if not archivePath.endswith(archiveName): + archivePath += "/" + archiveName else: archivePath=None - images=dom.xpath("//texttool/image") - - if images and (len(images)>0): - image=getTextFromNode(images[0]) + imageDirs=dom.xpath("//texttool/image") + if imageDirs and (len(imageDirs)>0): + imageDir=getTextFromNode(imageDirs[0]) else: - image=None + # we balk with no image tag + raise IOError("No text-tool info in %s"%(url)) - if image and archivePath: - image=os.path.join(archivePath,image) - image=image.replace("/mpiwg/online",'') - pt=getParamFromDigilib(image,'size') - docinfo['imagePath'] = image - docinfo['numPages'] = pt + if imageDir and archivePath: + #print "image: ", imageDir, " archivepath: ", archivePath + imageDir=os.path.join(archivePath,imageDir) + imageDir=imageDir.replace("/mpiwg/online",'') + docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo) + docinfo['imagePath'] = imageDir + docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir viewerUrls=dom.xpath("//texttool/digiliburlprefix") - if viewerUrls and (len(viewerUrls)>0): viewerUrl=getTextFromNode(viewerUrls[0]) - docinfo['imageURL'] = viewerURL + docinfo['viewerURL'] = viewerUrl textUrls=dom.xpath("//texttool/text") - if textUrls and (len(textUrls)>0): textUrl=getTextFromNode(textUrls[0]) - docinfo['textURL'] = textURL + docinfo['textURL'] = textUrl docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) + docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) return docinfo @@ -235,14 +333,14 @@ class documentViewer(Folder): zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path)) if docinfo is None: docinfo = {} - docinfo['imagePath'] = path path=path.replace("/mpiwg/online","") - pt=getParamFromDigilib(path,'size') - docinfo['numPages'] = pt - imageUrl=genericDigilib+"/servlet/Scaler?fn=%s"%path + docinfo['imagePath'] = path + docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo) + imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path docinfo['imageURL'] = imageUrl docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo) + docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo) return docinfo @@ -264,6 +362,8 @@ class documentViewer(Folder): docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) else: zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!") + raise ValueError("Unknown mode %s"%(mode)) + zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo) self.REQUEST.SESSION['docinfo'] = docinfo return docinfo @@ -281,7 +381,6 @@ class documentViewer(Folder): grpsize = cols * rows pageinfo['groupsize'] = grpsize start = getInt(start, default=(int(current / grpsize) * grpsize +1)) - print "start3:", start pageinfo['start'] = start pageinfo['end'] = start + grpsize if docinfo is not None: @@ -298,7 +397,7 @@ class documentViewer(Folder): if mode=="texttool": #index.meta with texttool information (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) - print textpath + #print textpath try: dom = NonvalidatingReader.parseUri(textpath) except: @@ -356,14 +455,14 @@ class documentViewer(Folder): except: return None - def changeDocumentViewer(self,imageViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,RESPONSE=None): + def changeDocumentViewer(self,imageViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): """init document viewer""" self.title=title self.imageViewerUrl=imageViewerUrl self.digilibBaseUrl = digilibBaseUrl self.thumbrows = thumbrows self.thumbcols = thumbcols - + self.authgroups = [s.strip().lower() for s in authgroups.split(',')] if RESPONSE is not None: RESPONSE.redirect('manage_main')