1:
2:
3: from OFS.Folder import Folder
4: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
5: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
6: from AccessControl import ClassSecurityInfo
7: from AccessControl import getSecurityManager
8: from Globals import package_home
9:
10: from Ft.Xml.Domlette import NonvalidatingReader
11: from Ft.Xml.Domlette import PrettyPrint, Print
12: from Ft.Xml import EMPTY_NAMESPACE, Parse
13:
14: import Ft.Xml.XPath
15:
16: import os.path
17: import sys
18: import cgi
19: import urllib
20: import logging
21: import zLOG
22: import urlparse
23:
24: def getInt(number, default=0):
25: """returns always an int (0 in case of problems)"""
26: try:
27: return int(number)
28: except:
29: return default
30:
31: def getTextFromNode(nodename):
32: """get the cdata content of a node"""
33: if nodename is None:
34: return ""
35: nodelist=nodename.childNodes
36: rc = ""
37: for node in nodelist:
38: if node.nodeType == node.TEXT_NODE:
39: rc = rc + node.data
40: return rc
41:
42:
43: def getParentDir(path):
44: """returns pathname shortened by one"""
45: return '/'.join(path.split('/')[0:-1])
46:
47:
48: import socket
49:
50: def urlopen(url,timeout=2):
51: """urlopen mit timeout"""
52: socket.setdefaulttimeout(timeout)
53: ret=urllib.urlopen(url)
54: socket.setdefaulttimeout(5)
55: return ret
56:
57:
58: ##
59: ## documentViewer class
60: ##
61: class documentViewer(Folder):
62: """document viewer"""
63: #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
64:
65: meta_type="Document viewer"
66:
67: security=ClassSecurityInfo()
68: manage_options=Folder.manage_options+(
69: {'label':'main config','action':'changeDocumentViewerForm'},
70: )
71:
72: # templates and forms
73: viewer_main = PageTemplateFile('zpt/viewer_main', globals())
74: thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())
75: image_main = PageTemplateFile('zpt/image_main', globals())
76: head_main = PageTemplateFile('zpt/head_main', globals())
77: docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
78:
79: security.declareProtected('View management screens','changeDocumentViewerForm')
80: changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
81:
82:
83: def __init__(self,id,imageViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):
84: """init document viewer"""
85: self.id=id
86: self.title=title
87: self.imageViewerUrl=imageViewerUrl
88: self.textViewerUrl=textViewerUrl
89:
90: if not digilibBaseUrl:
91: self.digilibBaseUrl = self.findDigilibUrl()
92: else:
93: self.digilibBaseUrl = digilibBaseUrl
94: self.thumbcols = thumbcols
95: self.thumbrows = thumbrows
96: # authgroups is list of authorized groups (delimited by ,)
97: self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
98: # add template folder so we can always use template.something
99: self.manage_addFolder('template')
100:
101:
102: security.declareProtected('View','index_html')
103: def index_html(self,mode,url,viewMode="images",start=None,pn=1):
104: '''
105: view it
106: @param mode: defines which type of document is behind url
107: @param url: url which contains display information
108: @param viewMode: if images display images, if text display text, default is images
109:
110: '''
111:
112: zLOG.LOG("documentViewer (index)", zLOG.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
113:
114: if not hasattr(self, 'template'):
115: # create template folder if it doesn't exist
116: self.manage_addFolder('template')
117:
118: if not self.digilibBaseUrl:
119: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
120:
121: docinfo = self.getDocinfo(mode=mode,url=url)
122: pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
123: pt = getattr(self.template, 'viewer_main')
124: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
125:
126:
127: def getLink(self,param=None,val=None):
128: """link to documentviewer with parameter param set to val"""
129: params=self.REQUEST.form.copy()
130: if param is not None:
131: if val is None:
132: if params.has_key(param):
133: del params[param]
134: else:
135: params[param] = str(val)
136:
137: # quote values and assemble into query string
138: ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
139: url=self.REQUEST['URL1']+"?"+ps
140: return url
141:
142:
143: def getStyle(self, idx, selected, style=""):
144: """returns a string with the given style and append 'sel' if path == selected."""
145: #zLOG.LOG("documentViewer (getstyle)", zLOG.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
146: if idx == selected:
147: return style + 'sel'
148: else:
149: return style
150:
151:
152: def isAccessible(self, docinfo):
153: """returns if access to the resource is granted"""
154: access = docinfo.get('accessType', None)
155: zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "access type %s"%access)
156: if access is not None and access == 'free':
157: zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "access is free")
158: return True
159: elif access is None or access in self.authgroups:
160: # only local access -- only logged in users
161: user = getSecurityManager().getUser()
162: if user is not None:
163: #print "user: ", user
164: return (user.getUserName() != "Anonymous User")
165: else:
166: return False
167:
168: zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "unknown access type %s"%access)
169: return False
170:
171:
172: def getDirinfoFromDigilib(self,path,docinfo=None):
173: """gibt param von dlInfo aus"""
174: num_retries = 3
175: if docinfo is None:
176: docinfo = {}
177:
178: infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
179:
180: zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo from %s"%(infoUrl))
181:
182: for cnt in range(num_retries):
183: try:
184: # dom = NonvalidatingReader.parseUri(imageUrl)
185: txt=urllib.urlopen(infoUrl).read()
186: dom = Parse(txt)
187: break
188: except:
189: zLOG.LOG("documentViewer (getdirinfofromdigilib)", zLOG.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
190: else:
191: raise IOError("Unable to get dir-info from %s"%(infoUrl))
192:
193: sizes=dom.xpath("//dir/size")
194: zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:size"%sizes)
195:
196: if sizes:
197: docinfo['numPages'] = int(getTextFromNode(sizes[0]))
198: else:
199: docinfo['numPages'] = 0
200:
201: return docinfo
202:
203:
204: def getIndexMeta(self, url):
205: """returns dom of index.meta document at url"""
206: num_retries = 3
207: dom = None
208: metaUrl = None
209: if url.startswith("http://"):
210: # real URL
211: metaUrl = url
212: else:
213: # online path
214: server=self.digilibBaseUrl+"/servlet/Texter?fn="
215: metaUrl=server+url.replace("/mpiwg/online","")
216: if not metaUrl.endswith("index.meta"):
217: metaUrl += "/index.meta"
218: print metaUrl
219: for cnt in range(num_retries):
220: try:
221: # patch dirk encoding fehler treten dann nicht mehr auf
222: # dom = NonvalidatingReader.parseUri(metaUrl)
223: txt=urllib.urlopen(metaUrl).read()
224: dom = Parse(txt)
225: break
226: except:
227: zLOG.LOG("ERROR documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])
228:
229: if dom is None:
230: raise IOError("Unable to read index meta from %s"%(url))
231:
232: return dom
233:
234: def getPresentationInfoXML(self, url):
235: """returns dom of info.xml document at url"""
236: num_retries = 3
237: dom = None
238: metaUrl = None
239: if url.startswith("http://"):
240: # real URL
241: metaUrl = url
242: else:
243: # online path
244: server=self.digilibBaseUrl+"/servlet/Texter?fn="
245: metaUrl=server+url.replace("/mpiwg/online","")
246:
247:
248: for cnt in range(num_retries):
249: try:
250: # patch dirk encoding fehler treten dann nicht mehr auf
251: # dom = NonvalidatingReader.parseUri(metaUrl)
252: txt=urllib.urlopen(metaUrl).read()
253: dom = Parse(txt)
254: break
255: except:
256: zLOG.LOG("ERROR documentViewer (getPresentationInfoXML)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])
257:
258: if dom is None:
259: raise IOError("Unable to read infoXMLfrom %s"%(url))
260:
261: return dom
262:
263:
264: def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None):
265: """gets authorization info from the index.meta file at path or given by dom"""
266: zLOG.LOG("documentViewer (getauthinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))
267:
268: access = None
269:
270: if docinfo is None:
271: docinfo = {}
272:
273: if dom is None:
274: dom = self.getIndexMeta(getParentDir(path))
275:
276: acctype = dom.xpath("//access-conditions/access/@type")
277: if acctype and (len(acctype)>0):
278: access=acctype[0].value
279: if access in ['group', 'institution']:
280: access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
281:
282: docinfo['accessType'] = access
283: return docinfo
284:
285:
286: def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None):
287: """gets bibliographical info from the index.meta file at path or given by dom"""
288: zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))
289:
290: if docinfo is None:
291: docinfo = {}
292:
293: if dom is None:
294: dom = self.getIndexMeta(getParentDir(path))
295:
296: metaData=self.metadata.main.meta.bib
297: bibtype=dom.xpath("//bib/@type")
298: if bibtype and (len(bibtype)>0):
299: bibtype=bibtype[0].value
300: else:
301: bibtype="generic"
302: bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
303: bibmap=metaData.generateMappingForType(bibtype)
304: #print "bibmap: ", bibmap, " for: ", bibtype
305: # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
306: if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
307: docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
308: docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
309: docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
310:
311: return docinfo
312:
313:
314: def getDocinfoFromTextTool(self,url,dom=None,docinfo=None):
315: """parse texttool tag in index meta"""
316: zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url))
317: if docinfo is None:
318: docinfo = {}
319:
320: if dom is None:
321: dom = self.getIndexMeta(url)
322:
323: archivePath = None
324: archiveName = None
325:
326: archiveNames=dom.xpath("//resource/name")
327: if archiveNames and (len(archiveNames)>0):
328: archiveName=getTextFromNode(archiveNames[0])
329: else:
330: zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.WARNING,"resource/name missing in: %s"%(url))
331:
332: archivePaths=dom.xpath("//resource/archive-path")
333: if archivePaths and (len(archivePaths)>0):
334: archivePath=getTextFromNode(archivePaths[0])
335: # clean up archive path
336: if archivePath[0] != '/':
337: archivePath = '/' + archivePath
338: if archiveName and (not archivePath.endswith(archiveName)):
339: archivePath += "/" + archiveName
340: else:
341: # try to get archive-path from url
342: zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.WARNING,"resource/archive-path missing in: %s"%(url))
343: if (not url.startswith('http')):
344: archivePath = url.replace('index.meta', '')
345:
346: if archivePath is None:
347: # we balk without archive-path
348: raise IOError("Missing archive-path (for text-tool) in %s"%(url))
349:
350: imageDirs=dom.xpath("//texttool/image")
351: if imageDirs and (len(imageDirs)>0):
352: imageDir=getTextFromNode(imageDirs[0])
353: else:
354: # we balk with no image tag
355: raise IOError("No text-tool info in %s"%(url))
356:
357: if imageDir and archivePath:
358: #print "image: ", imageDir, " archivepath: ", archivePath
359: imageDir=os.path.join(archivePath,imageDir)
360: imageDir=imageDir.replace("/mpiwg/online",'')
361: docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo)
362: docinfo['imagePath'] = imageDir
363: docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir
364:
365: viewerUrls=dom.xpath("//texttool/digiliburlprefix")
366: if viewerUrls and (len(viewerUrls)>0):
367: viewerUrl=getTextFromNode(viewerUrls[0])
368: docinfo['viewerURL'] = viewerUrl
369:
370: textUrls=dom.xpath("//texttool/text")
371: if textUrls and (len(textUrls)>0):
372: textUrl=getTextFromNode(textUrls[0])
373: if urlparse.urlparse(textUrl)[0]=="": #keine url
374: textUrl=os.path.join(archivePath,textUrl)
375:
376: docinfo['textURL'] = textUrl
377:
378:
379: presentationUrls=dom.xpath("//texttool/presentation")
380: if presentationUrls and (len(presentationUrls)>0):
381: # presentation url ergiebt sich ersetzen von index.meta in der url der fŸr die Metadaten
382: # durch den relativen Pfad auf die presentation infos
383: presentationUrl=url.replace('index.meta',getTextFromNode(presentationUrls[0]))
384:
385: docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom)
386: else:
387: docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)
388: docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)
389: return docinfo
390:
391:
392: def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
393: """gets the bibliographical information from the preseantion entry in texttools
394: """
395: dom=self.getPresentationInfoXML(url)
396: docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
397: docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
398: docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
399: return docinfo
400:
401: def getDocinfoFromImagePath(self,path,docinfo=None):
402: """path ist the path to the images it assumes that the index.meta file is one level higher."""
403: zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path))
404: if docinfo is None:
405: docinfo = {}
406: path=path.replace("/mpiwg/online","")
407: docinfo['imagePath'] = path
408: docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo)
409: imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
410: docinfo['imageURL'] = imageUrl
411:
412: docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo)
413: docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo)
414: return docinfo
415:
416:
417: def getDocinfo(self, mode, url):
418: """returns docinfo depending on mode"""
419: zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url))
420: # look for cached docinfo in session
421: # XXXX Sesion abgeschaltet
422: if self.REQUEST.SESSION.has_key('docinfo_XX'):
423: docinfo = self.REQUEST.SESSION['docinfo']
424: # check if its still current
425: if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
426: zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo)
427: return docinfo
428: # new docinfo
429: docinfo = {'mode': mode, 'url': url}
430: if mode=="texttool": #index.meta with texttool information
431: docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
432: elif mode=="imagepath":
433: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
434: else:
435: zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!")
436: raise ValueError("Unknown mode %s"%(mode))
437:
438: zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo)
439: self.REQUEST.SESSION['docinfo'] = docinfo
440: return docinfo
441:
442:
443: def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):
444: """returns pageinfo with the given parameters"""
445: pageinfo = {}
446: current = getInt(current)
447: pageinfo['current'] = current
448: rows = int(rows or self.thumbrows)
449: pageinfo['rows'] = rows
450: cols = int(cols or self.thumbcols)
451: pageinfo['cols'] = cols
452: grpsize = cols * rows
453: pageinfo['groupsize'] = grpsize
454: start = getInt(start, default=(int(current / grpsize) * grpsize +1))
455: pageinfo['start'] = start
456: pageinfo['end'] = start + grpsize
457: if docinfo is not None:
458: np = int(docinfo['numPages'])
459: pageinfo['end'] = min(pageinfo['end'], np)
460: pageinfo['numgroups'] = int(np / grpsize)
461: if np % grpsize > 0:
462: pageinfo['numgroups'] += 1
463:
464: return pageinfo
465:
466: def text(self,mode,url,pn):
467: """give text"""
468: if mode=="texttool": #index.meta with texttool information
469: (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
470:
471: #print textpath
472: try:
473: dom = NonvalidatingReader.parseUri(textpath)
474: except:
475: return None
476:
477: list=[]
478: nodes=dom.xpath("//pb")
479:
480: node=nodes[int(pn)-1]
481:
482: p=node
483:
484: while p.tagName!="p":
485: p=p.parentNode
486:
487:
488: endNode=nodes[int(pn)]
489:
490:
491: e=endNode
492:
493: while e.tagName!="p":
494: e=e.parentNode
495:
496:
497: next=node.parentNode
498:
499: #sammle s
500: while next and (next!=endNode.parentNode):
501: list.append(next)
502: next=next.nextSibling
503: list.append(endNode.parentNode)
504:
505: if p==e:# beide im selben paragraphen
506: pass
507: # else:
508: # next=p
509: # while next!=e:
510: # print next,e
511: # list.append(next)
512: # next=next.nextSibling
513: #
514: # for x in list:
515: # PrettyPrint(x)
516: #
517: # return list
518: #
519:
520: def findDigilibUrl(self):
521: """try to get the digilib URL from zogilib"""
522: url = self.imageViewerUrl[:-1] + "/getScalerUrl"
523: #print urlparse.urlparse(url)[0]
524: #print urlparse.urljoin(self.absolute_url(),url)
525: logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0])
526: logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url))
527:
528: try:
529: if urlparse.urlparse(url)[0]=='': #relative path
530: url=urlparse.urljoin(self.absolute_url()+"/",url)
531:
532: scaler = urlopen(url).read()
533: return scaler.replace("/servlet/Scaler?", "")
534: except:
535: return None
536:
537: def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
538: """init document viewer"""
539: self.title=title
540: self.imageViewerUrl=imageViewerUrl
541: self.textViewerUrl=textViewerUrl
542: self.digilibBaseUrl = digilibBaseUrl
543: self.thumbrows = thumbrows
544: self.thumbcols = thumbcols
545: self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
546: if RESPONSE is not None:
547: RESPONSE.redirect('manage_main')
548:
549:
550:
551:
552: # security.declareProtected('View management screens','renameImageForm')
553:
554: def manage_AddDocumentViewerForm(self):
555: """add the viewer form"""
556: pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
557: return pt()
558:
559: def manage_AddDocumentViewer(self,id,imageViewerUrl="",textViewerUrl="",title="",RESPONSE=None):
560: """add the viewer"""
561: newObj=documentViewer(id,imageViewerUrl,title=title,textViewerUrl=textViewerUrl)
562: self._setObject(id,newObj)
563:
564: if RESPONSE is not None:
565: RESPONSE.redirect('manage_main')
566:
567:
568: ##
569: ## DocumentViewerTemplate class
570: ##
571: class DocumentViewerTemplate(ZopePageTemplate):
572: """Template for document viewer"""
573: meta_type="DocumentViewer Template"
574:
575:
576: def manage_addDocumentViewerTemplateForm(self):
577: """Form for adding"""
578: pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
579: return pt()
580:
581: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
582: REQUEST=None, submit=None):
583: "Add a Page Template with optional file content."
584:
585: self._setObject(id, DocumentViewerTemplate(id))
586: ob = getattr(self, id)
587: ob.pt_edit(open(os.path.join(package_home(globals()),'zpt/viewer_main.zpt')).read(),None)
588: if title:
589: ob.pt_setTitle(title)
590: try:
591: u = self.DestinationURL()
592: except AttributeError:
593: u = REQUEST['URL1']
594:
595: u = "%s/%s" % (u, urllib.quote(id))
596: REQUEST.RESPONSE.redirect(u+'/manage_main')
597: return ''
598:
599:
600:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>