--- documentViewer/documentViewer.py 2006/04/10 19:51:50 1.7 +++ documentViewer/documentViewer.py 2006/06/13 15:10:35 1.11 @@ -1,15 +1,13 @@ - -genericDigilib="http://nausikaa2.rz-berlin.mpg.de/digitallibrary/" - from OFS.Folder import Folder from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate from Products.PageTemplates.PageTemplateFile import PageTemplateFile from AccessControl import ClassSecurityInfo +from AccessControl import getSecurityManager from Globals import package_home from Ft.Xml.Domlette import NonvalidatingReader from Ft.Xml.Domlette import PrettyPrint, Print -from Ft.Xml import EMPTY_NAMESPACE +from Ft.Xml import EMPTY_NAMESPACE, Parse import Ft.Xml.XPath @@ -25,9 +23,10 @@ def getInt(number, default=0): return int(number) except: return default - def getTextFromNode(nodename): + if nodename is None: + return "" nodelist=nodename.childNodes rc = "" for node in nodelist: @@ -35,11 +34,17 @@ def getTextFromNode(nodename): rc = rc + node.data return rc + +def getParentDir(path): + """returns pathname shortened by one""" + return '/'.join(path.split('/')[0:-1]) + + import socket -def urlopen(url): +def urlopen(url,timeout=2): """urlopen mit timeout""" - socket.setdefaulttimeout(2) + socket.setdefaulttimeout(timeout) ret=urllib.urlopen(url) socket.setdefaulttimeout(5) return ret @@ -69,7 +74,7 @@ class documentViewer(Folder): changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) - def __init__(self,id,imageViewerUrl,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10): + def __init__(self,id,imageViewerUrl,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): """init document viewer""" self.id=id self.title=title @@ -80,6 +85,8 @@ class documentViewer(Folder): self.digilibBaseUrl = digilibBaseUrl self.thumbcols = thumbcols self.thumbrows = thumbrows + # authgroups is list of authorized groups (delimited by ,) + self.authgroups = [s.strip().lower() for s in authgroups.split(',')] # add template folder so we can always use template.something self.manage_addFolder('template') @@ -109,17 +116,17 @@ class documentViewer(Folder): def getLink(self,param=None,val=None): """link to documentviewer with parameter param set to val""" - params=cgi.parse_qs(self.REQUEST['QUERY_STRING']) + params=self.REQUEST.form.copy() if param is not None: if val is None: if params.has_key(param): del params[param] else: - params[param] = [str(val)] + params[param] = str(val) - ps = "&".join(["%s=%s"%(k,urllib.quote(v[0])) for (k, v) in params.items()]) - url=self.REQUEST['URL']+"?"+ps - #url=self.REQUEST['URL']+"?"+urllib.urlencode(params, doseq=True) + # quote values and assemble into query string + ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) + url=self.REQUEST['URL1']+"?"+ps return url @@ -129,9 +136,31 @@ class documentViewer(Folder): if idx == selected: return style + 'sel' else: - return style + return style + def isAccessible(self, docinfo): + """returns if access to the resource is granted""" + access = docinfo.get('accessType', None) + if access is None: + # no information - no access + #TODO: check + return True + elif access == 'free': + return True + elif access in self.authgroups: + # only local access -- only logged in users + user = getSecurityManager().getUser() + if user is not None: + #print "user: ", user + return (user.getUserName() != "Anonymous User") + else: + return False + + zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "unknown access type %s"%access) + return False + + def getDirinfoFromDigilib(self,path,docinfo=None): """gibt param von dlInfo aus""" if docinfo is None: @@ -141,39 +170,86 @@ class documentViewer(Folder): zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo from %s"%(imageUrl)) - try: - dom = NonvalidatingReader.parseUri(imageUrl) - except: - zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.ERROR, "error reading %s"%(imageUrl)) - raise IOError("Unable to get dirinfo from %s"%(imageUrl)) + for cnt in (1,2,3): + try: + dom = NonvalidatingReader.parseUri(imageUrl) + break + except: + zLOG.LOG("documentViewer (getdirinfofromdigilib)", zLOG.ERROR, "error reading %s (try %d)"%(imageUrl,cnt)) + else: + raise IOError("Unable to get dir-info from %s"%(imageUrl)) - params=dom.xpath("//dir/size") - zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:size"%params) + sizes=dom.xpath("//dir/size") + zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:size"%sizes) - if params: - docinfo['numPages'] = getTextFromNode(params[0]) + if sizes: + docinfo['numPages'] = int(getTextFromNode(sizes[0])) else: docinfo['numPages'] = 0 return docinfo + + def getIndexMeta(self, url): + """returns dom of index.meta document at url""" + dom = None + if url.startswith("http://"): + # real URL + try: + dom = NonvalidatingReader.parseUri(url) + except: + zLOG.LOG("documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) + raise IOError("Unable to read index.meta from %s"%(url)) + else: + # online path + server=self.digilibBaseUrl+"/servlet/Texter?fn=" + metaUrl=server+url + if not metaUrl.endswith("index.meta"): + metaUrl += "/index.meta" + try: + # patch dirk encoding fehler treten dann nicht mehr auf + # dom = NonvalidatingReader.parseUri(metaUrl) + txt=urllib.urlopen(metaUrl).read() + dom = Parse(txt) + except: + zLOG.LOG("documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) + raise IOError("Unable to read index meta from %s"%(metaUrl)) + + return dom + + + def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None): + """gets authorization info from the index.meta file at path or given by dom""" + zLOG.LOG("documentViewer (getauthinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) + + access = None + + if docinfo is None: + docinfo = {} + + if dom is None: + dom = self.getIndexMeta(getParentDir(path)) + + acctype = dom.xpath("//access-conditions/access/@type") + if acctype and (len(acctype)>0): + access=acctype[0].value + if access in ['group', 'institution']: + access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() + + docinfo['accessType'] = access + return docinfo + def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None): - """gets bibliographical info from the index.meta file at url or given by dom""" + """gets bibliographical info from the index.meta file at path or given by dom""" zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) if docinfo is None: docinfo = {} if dom is None: - server=self.digilibBaseUrl+"/servlet/Texter?fn=" - path="/".join(path.split("/")[0:-1]) - metaUrl=server+path+"/index.meta" - try: - dom = NonvalidatingReader.parseUri(metaUrl) - except: - return docinfo - + dom = self.getIndexMeta(getParentDir(path)) + metaData=self.metadata.main.meta.bib bibtype=dom.xpath("//bib/@type") if bibtype and (len(bibtype)>0): @@ -182,7 +258,8 @@ class documentViewer(Folder): bibtype="generic" bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) bibmap=metaData.generateMappingForType(bibtype) - print "bibmap: ", bibmap, " for: ", bibtype + #print "bibmap: ", bibmap, " for: ", bibtype + # if there is no mapping bibmap is empty (mapping sometimes has empty fields) if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) @@ -191,36 +268,44 @@ class documentViewer(Folder): return docinfo - def getDocinfoFromTextTool(self,url,docinfo=None): + def getDocinfoFromTextTool(self,url,dom=None,docinfo=None): """parse texttool tag in index meta""" zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url)) if docinfo is None: docinfo = {} - try: - dom = NonvalidatingReader.parseUri(url) - except: - zLOG.LOG("documentViewer (parseUrlTexttool)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) - raise IOError("Unable to get texttool info from %s"%(url)) + if dom is None: + dom = self.getIndexMeta(url) + + archiveNames=dom.xpath("//resource/name") + if archiveNames and (len(archiveNames)>0): + archiveName=getTextFromNode(archiveNames[0]) archivePaths=dom.xpath("//resource/archive-path") if archivePaths and (len(archivePaths)>0): archivePath=getTextFromNode(archivePaths[0]) + # clean up archive path + if archivePath[0] != '/': + archivePath = '/' + archivePath + if not archivePath.endswith(archiveName): + archivePath += "/" + archiveName else: archivePath=None - images=dom.xpath("//texttool/image") - if images and (len(images)>0): - image=getTextFromNode(images[0]) + imageDirs=dom.xpath("//texttool/image") + if imageDirs and (len(imageDirs)>0): + imageDir=getTextFromNode(imageDirs[0]) else: - image=None + # we balk with no image tag + raise IOError("No text-tool info in %s"%(url)) - if image and archivePath: - image=os.path.join(archivePath,image) - image=image.replace("/mpiwg/online",'') - docinfo=self.getDirinfoFromDigilib(image,docinfo=docinfo) - docinfo['imagePath'] = image - docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+image + if imageDir and archivePath: + #print "image: ", imageDir, " archivepath: ", archivePath + imageDir=os.path.join(archivePath,imageDir) + imageDir=imageDir.replace("/mpiwg/online",'') + docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo) + docinfo['imagePath'] = imageDir + docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir viewerUrls=dom.xpath("//texttool/digiliburlprefix") if viewerUrls and (len(viewerUrls)>0): @@ -233,6 +318,7 @@ class documentViewer(Folder): docinfo['textURL'] = textUrl docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) + docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) return docinfo @@ -248,6 +334,7 @@ class documentViewer(Folder): docinfo['imageURL'] = imageUrl docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo) + docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo) return docinfo @@ -269,6 +356,8 @@ class documentViewer(Folder): docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) else: zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!") + raise ValueError("Unknown mode %s"%(mode)) + zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo) self.REQUEST.SESSION['docinfo'] = docinfo return docinfo @@ -302,7 +391,7 @@ class documentViewer(Folder): if mode=="texttool": #index.meta with texttool information (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) - print textpath + #print textpath try: dom = NonvalidatingReader.parseUri(textpath) except: @@ -360,14 +449,14 @@ class documentViewer(Folder): except: return None - def changeDocumentViewer(self,imageViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,RESPONSE=None): + def changeDocumentViewer(self,imageViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): """init document viewer""" self.title=title self.imageViewerUrl=imageViewerUrl self.digilibBaseUrl = digilibBaseUrl self.thumbrows = thumbrows self.thumbcols = thumbcols - + self.authgroups = [s.strip().lower() for s in authgroups.split(',')] if RESPONSE is not None: RESPONSE.redirect('manage_main')