version 1.7, 2006/04/10 19:51:50
|
version 1.15, 2006/07/14 16:22:36
|
Line 1
|
Line 1
|
|
|
genericDigilib="http://nausikaa2.rz-berlin.mpg.de/digitallibrary/" |
|
|
|
from OFS.Folder import Folder |
from OFS.Folder import Folder |
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate |
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate |
from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
from AccessControl import ClassSecurityInfo |
from AccessControl import ClassSecurityInfo |
|
from AccessControl import getSecurityManager |
from Globals import package_home |
from Globals import package_home |
|
|
from Ft.Xml.Domlette import NonvalidatingReader |
from Ft.Xml.Domlette import NonvalidatingReader |
from Ft.Xml.Domlette import PrettyPrint, Print |
from Ft.Xml.Domlette import PrettyPrint, Print |
from Ft.Xml import EMPTY_NAMESPACE |
from Ft.Xml import EMPTY_NAMESPACE, Parse |
|
|
import Ft.Xml.XPath |
import Ft.Xml.XPath |
|
|
Line 26 def getInt(number, default=0):
|
Line 24 def getInt(number, default=0):
|
except: |
except: |
return default |
return default |
|
|
|
|
def getTextFromNode(nodename): |
def getTextFromNode(nodename): |
|
if nodename is None: |
|
return "" |
nodelist=nodename.childNodes |
nodelist=nodename.childNodes |
rc = "" |
rc = "" |
for node in nodelist: |
for node in nodelist: |
Line 35 def getTextFromNode(nodename):
|
Line 34 def getTextFromNode(nodename):
|
rc = rc + node.data |
rc = rc + node.data |
return rc |
return rc |
|
|
|
|
|
def getParentDir(path): |
|
"""returns pathname shortened by one""" |
|
return '/'.join(path.split('/')[0:-1]) |
|
|
|
|
import socket |
import socket |
|
|
def urlopen(url): |
def urlopen(url,timeout=2): |
"""urlopen mit timeout""" |
"""urlopen mit timeout""" |
socket.setdefaulttimeout(2) |
socket.setdefaulttimeout(timeout) |
ret=urllib.urlopen(url) |
ret=urllib.urlopen(url) |
socket.setdefaulttimeout(5) |
socket.setdefaulttimeout(5) |
return ret |
return ret |
Line 69 class documentViewer(Folder):
|
Line 74 class documentViewer(Folder):
|
changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) |
changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) |
|
|
|
|
def __init__(self,id,imageViewerUrl,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10): |
def __init__(self,id,imageViewerUrl,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): |
"""init document viewer""" |
"""init document viewer""" |
self.id=id |
self.id=id |
self.title=title |
self.title=title |
Line 80 class documentViewer(Folder):
|
Line 85 class documentViewer(Folder):
|
self.digilibBaseUrl = digilibBaseUrl |
self.digilibBaseUrl = digilibBaseUrl |
self.thumbcols = thumbcols |
self.thumbcols = thumbcols |
self.thumbrows = thumbrows |
self.thumbrows = thumbrows |
|
# authgroups is list of authorized groups (delimited by ,) |
|
self.authgroups = [s.strip().lower() for s in authgroups.split(',')] |
# add template folder so we can always use template.something |
# add template folder so we can always use template.something |
self.manage_addFolder('template') |
self.manage_addFolder('template') |
|
|
Line 109 class documentViewer(Folder):
|
Line 116 class documentViewer(Folder):
|
|
|
def getLink(self,param=None,val=None): |
def getLink(self,param=None,val=None): |
"""link to documentviewer with parameter param set to val""" |
"""link to documentviewer with parameter param set to val""" |
params=cgi.parse_qs(self.REQUEST['QUERY_STRING']) |
params=self.REQUEST.form.copy() |
if param is not None: |
if param is not None: |
if val is None: |
if val is None: |
if params.has_key(param): |
if params.has_key(param): |
del params[param] |
del params[param] |
else: |
else: |
params[param] = [str(val)] |
params[param] = str(val) |
|
|
ps = "&".join(["%s=%s"%(k,urllib.quote(v[0])) for (k, v) in params.items()]) |
# quote values and assemble into query string |
url=self.REQUEST['URL']+"?"+ps |
ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) |
#url=self.REQUEST['URL']+"?"+urllib.urlencode(params, doseq=True) |
url=self.REQUEST['URL1']+"?"+ps |
return url |
return url |
|
|
|
|
Line 132 class documentViewer(Folder):
|
Line 139 class documentViewer(Folder):
|
return style |
return style |
|
|
|
|
|
def isAccessible(self, docinfo): |
|
"""returns if access to the resource is granted""" |
|
access = docinfo.get('accessType', None) |
|
zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "access type %s"%access) |
|
if access is None: |
|
# no information - no access |
|
return False |
|
elif access == 'free': |
|
zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "access is free") |
|
return True |
|
elif access in self.authgroups: |
|
# only local access -- only logged in users |
|
user = getSecurityManager().getUser() |
|
if user is not None: |
|
#print "user: ", user |
|
return (user.getUserName() != "Anonymous User") |
|
else: |
|
return False |
|
|
|
zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "unknown access type %s"%access) |
|
return False |
|
|
|
|
def getDirinfoFromDigilib(self,path,docinfo=None): |
def getDirinfoFromDigilib(self,path,docinfo=None): |
"""gibt param von dlInfo aus""" |
"""gibt param von dlInfo aus""" |
|
num_retries = 3 |
if docinfo is None: |
if docinfo is None: |
docinfo = {} |
docinfo = {} |
|
|
imageUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path |
infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path |
|
|
zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo from %s"%(imageUrl)) |
zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo from %s"%(infoUrl)) |
|
|
|
for cnt in range(num_retries): |
try: |
try: |
dom = NonvalidatingReader.parseUri(imageUrl) |
# dom = NonvalidatingReader.parseUri(imageUrl) |
|
txt=urllib.urlopen(infoUrl).read() |
|
dom = Parse(txt) |
|
break |
except: |
except: |
zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.ERROR, "error reading %s"%(imageUrl)) |
zLOG.LOG("documentViewer (getdirinfofromdigilib)", zLOG.ERROR, "error reading %s (try %d)"%(infoUrl,cnt)) |
raise IOError("Unable to get dirinfo from %s"%(imageUrl)) |
else: |
|
raise IOError("Unable to get dir-info from %s"%(infoUrl)) |
|
|
params=dom.xpath("//dir/size") |
sizes=dom.xpath("//dir/size") |
zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:size"%params) |
zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:size"%sizes) |
|
|
if params: |
if sizes: |
docinfo['numPages'] = getTextFromNode(params[0]) |
docinfo['numPages'] = int(getTextFromNode(sizes[0])) |
else: |
else: |
docinfo['numPages'] = 0 |
docinfo['numPages'] = 0 |
|
|
return docinfo |
return docinfo |
|
|
|
|
|
def getIndexMeta(self, url): |
|
"""returns dom of index.meta document at url""" |
|
num_retries = 3 |
|
dom = None |
|
metaUrl = None |
|
if url.startswith("http://"): |
|
# real URL |
|
metaUrl = url |
|
else: |
|
# online path |
|
server=self.digilibBaseUrl+"/servlet/Texter?fn=" |
|
metaUrl=server+url.replace("/mpiwg/online","") |
|
if not metaUrl.endswith("index.meta"): |
|
metaUrl += "/index.meta" |
|
|
|
for cnt in range(num_retries): |
|
try: |
|
# patch dirk encoding fehler treten dann nicht mehr auf |
|
# dom = NonvalidatingReader.parseUri(metaUrl) |
|
txt=urllib.urlopen(metaUrl).read() |
|
dom = Parse(txt) |
|
break |
|
except: |
|
zLOG.LOG("ERROR documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) |
|
|
|
if dom is None: |
|
raise IOError("Unable to read index meta from %s"%(url)) |
|
|
|
return dom |
|
|
|
|
|
def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None): |
|
"""gets authorization info from the index.meta file at path or given by dom""" |
|
zLOG.LOG("documentViewer (getauthinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) |
|
|
|
access = None |
|
|
|
if docinfo is None: |
|
docinfo = {} |
|
|
|
if dom is None: |
|
dom = self.getIndexMeta(getParentDir(path)) |
|
|
|
acctype = dom.xpath("//access-conditions/access/@type") |
|
if acctype and (len(acctype)>0): |
|
access=acctype[0].value |
|
if access in ['group', 'institution']: |
|
access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() |
|
|
|
docinfo['accessType'] = access |
|
return docinfo |
|
|
|
|
def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None): |
def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None): |
"""gets bibliographical info from the index.meta file at url or given by dom""" |
"""gets bibliographical info from the index.meta file at path or given by dom""" |
zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) |
zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) |
|
|
if docinfo is None: |
if docinfo is None: |
docinfo = {} |
docinfo = {} |
|
|
if dom is None: |
if dom is None: |
server=self.digilibBaseUrl+"/servlet/Texter?fn=" |
dom = self.getIndexMeta(getParentDir(path)) |
path="/".join(path.split("/")[0:-1]) |
|
metaUrl=server+path+"/index.meta" |
|
try: |
|
dom = NonvalidatingReader.parseUri(metaUrl) |
|
except: |
|
return docinfo |
|
|
|
metaData=self.metadata.main.meta.bib |
metaData=self.metadata.main.meta.bib |
bibtype=dom.xpath("//bib/@type") |
bibtype=dom.xpath("//bib/@type") |
Line 182 class documentViewer(Folder):
|
Line 265 class documentViewer(Folder):
|
bibtype="generic" |
bibtype="generic" |
bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) |
bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) |
bibmap=metaData.generateMappingForType(bibtype) |
bibmap=metaData.generateMappingForType(bibtype) |
print "bibmap: ", bibmap, " for: ", bibtype |
#print "bibmap: ", bibmap, " for: ", bibtype |
|
# if there is no mapping bibmap is empty (mapping sometimes has empty fields) |
if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: |
if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: |
docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) |
docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) |
docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) |
docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) |
Line 191 class documentViewer(Folder):
|
Line 275 class documentViewer(Folder):
|
return docinfo |
return docinfo |
|
|
|
|
def getDocinfoFromTextTool(self,url,docinfo=None): |
def getDocinfoFromTextTool(self,url,dom=None,docinfo=None): |
"""parse texttool tag in index meta""" |
"""parse texttool tag in index meta""" |
zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url)) |
zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url)) |
if docinfo is None: |
if docinfo is None: |
docinfo = {} |
docinfo = {} |
|
|
try: |
if dom is None: |
dom = NonvalidatingReader.parseUri(url) |
dom = self.getIndexMeta(url) |
except: |
|
zLOG.LOG("documentViewer (parseUrlTexttool)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) |
archiveNames=dom.xpath("//resource/name") |
raise IOError("Unable to get texttool info from %s"%(url)) |
if archiveNames and (len(archiveNames)>0): |
|
archiveName=getTextFromNode(archiveNames[0]) |
|
|
archivePaths=dom.xpath("//resource/archive-path") |
archivePaths=dom.xpath("//resource/archive-path") |
if archivePaths and (len(archivePaths)>0): |
if archivePaths and (len(archivePaths)>0): |
archivePath=getTextFromNode(archivePaths[0]) |
archivePath=getTextFromNode(archivePaths[0]) |
|
# clean up archive path |
|
if archivePath[0] != '/': |
|
archivePath = '/' + archivePath |
|
if not archivePath.endswith(archiveName): |
|
archivePath += "/" + archiveName |
else: |
else: |
archivePath=None |
archivePath=None |
|
|
images=dom.xpath("//texttool/image") |
imageDirs=dom.xpath("//texttool/image") |
if images and (len(images)>0): |
if imageDirs and (len(imageDirs)>0): |
image=getTextFromNode(images[0]) |
imageDir=getTextFromNode(imageDirs[0]) |
else: |
else: |
image=None |
# we balk with no image tag |
|
raise IOError("No text-tool info in %s"%(url)) |
if image and archivePath: |
|
image=os.path.join(archivePath,image) |
if imageDir and archivePath: |
image=image.replace("/mpiwg/online",'') |
#print "image: ", imageDir, " archivepath: ", archivePath |
docinfo=self.getDirinfoFromDigilib(image,docinfo=docinfo) |
imageDir=os.path.join(archivePath,imageDir) |
docinfo['imagePath'] = image |
imageDir=imageDir.replace("/mpiwg/online",'') |
docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+image |
docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo) |
|
docinfo['imagePath'] = imageDir |
|
docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir |
|
|
viewerUrls=dom.xpath("//texttool/digiliburlprefix") |
viewerUrls=dom.xpath("//texttool/digiliburlprefix") |
if viewerUrls and (len(viewerUrls)>0): |
if viewerUrls and (len(viewerUrls)>0): |
Line 233 class documentViewer(Folder):
|
Line 325 class documentViewer(Folder):
|
docinfo['textURL'] = textUrl |
docinfo['textURL'] = textUrl |
|
|
docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) |
docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) |
|
docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) |
return docinfo |
return docinfo |
|
|
|
|
Line 248 class documentViewer(Folder):
|
Line 341 class documentViewer(Folder):
|
docinfo['imageURL'] = imageUrl |
docinfo['imageURL'] = imageUrl |
|
|
docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo) |
docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo) |
|
docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo) |
return docinfo |
return docinfo |
|
|
|
|
Line 269 class documentViewer(Folder):
|
Line 363 class documentViewer(Folder):
|
docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) |
docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) |
else: |
else: |
zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!") |
zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!") |
|
raise ValueError("Unknown mode %s"%(mode)) |
|
|
zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo) |
zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo) |
self.REQUEST.SESSION['docinfo'] = docinfo |
self.REQUEST.SESSION['docinfo'] = docinfo |
return docinfo |
return docinfo |
Line 302 class documentViewer(Folder):
|
Line 398 class documentViewer(Folder):
|
if mode=="texttool": #index.meta with texttool information |
if mode=="texttool": #index.meta with texttool information |
(viewerUrl,imagepath,textpath)=parseUrlTextTool(url) |
(viewerUrl,imagepath,textpath)=parseUrlTextTool(url) |
|
|
print textpath |
#print textpath |
try: |
try: |
dom = NonvalidatingReader.parseUri(textpath) |
dom = NonvalidatingReader.parseUri(textpath) |
except: |
except: |
Line 360 class documentViewer(Folder):
|
Line 456 class documentViewer(Folder):
|
except: |
except: |
return None |
return None |
|
|
def changeDocumentViewer(self,imageViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,RESPONSE=None): |
def changeDocumentViewer(self,imageViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): |
"""init document viewer""" |
"""init document viewer""" |
self.title=title |
self.title=title |
self.imageViewerUrl=imageViewerUrl |
self.imageViewerUrl=imageViewerUrl |
self.digilibBaseUrl = digilibBaseUrl |
self.digilibBaseUrl = digilibBaseUrl |
self.thumbrows = thumbrows |
self.thumbrows = thumbrows |
self.thumbcols = thumbcols |
self.thumbcols = thumbcols |
|
self.authgroups = [s.strip().lower() for s in authgroups.split(',')] |
if RESPONSE is not None: |
if RESPONSE is not None: |
RESPONSE.redirect('manage_main') |
RESPONSE.redirect('manage_main') |
|
|