1:
2:
3: from OFS.Folder import Folder
4: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
5: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
6: from AccessControl import ClassSecurityInfo
7: from AccessControl import getSecurityManager
8: from Globals import package_home
9:
10: from Ft.Xml.Domlette import NonvalidatingReader
11: from Ft.Xml.Domlette import PrettyPrint, Print
12: from Ft.Xml import EMPTY_NAMESPACE, Parse
13:
14: import Ft.Xml.XPath
15:
16: import os.path
17: import sys
18: import cgi
19: import urllib
20: import logging
21:
22: import urlparse
23:
24: def logger(txt,method,txt2):
25: """logging"""
26: logging.info(txt+ txt2)
27:
28:
29: def getInt(number, default=0):
30: """returns always an int (0 in case of problems)"""
31: try:
32: return int(number)
33: except:
34: return default
35:
36: def getTextFromNode(nodename):
37: """get the cdata content of a node"""
38: if nodename is None:
39: return ""
40: nodelist=nodename.childNodes
41: rc = ""
42: for node in nodelist:
43: if node.nodeType == node.TEXT_NODE:
44: rc = rc + node.data
45: return rc
46:
47:
48: def getParentDir(path):
49: """returns pathname shortened by one"""
50: return '/'.join(path.split('/')[0:-1])
51:
52:
53: import socket
54:
55: def urlopen(url,timeout=2):
56: """urlopen mit timeout"""
57: socket.setdefaulttimeout(timeout)
58: ret=urllib.urlopen(url)
59: socket.setdefaulttimeout(5)
60: return ret
61:
62:
63: ##
64: ## documentViewer class
65: ##
66: class documentViewer(Folder):
67: """document viewer"""
68: #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
69:
70: meta_type="Document viewer"
71:
72: security=ClassSecurityInfo()
73: manage_options=Folder.manage_options+(
74: {'label':'main config','action':'changeDocumentViewerForm'},
75: )
76:
77: # templates and forms
78: viewer_main = PageTemplateFile('zpt/viewer_main', globals())
79: thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())
80: image_main = PageTemplateFile('zpt/image_main', globals())
81: head_main = PageTemplateFile('zpt/head_main', globals())
82: docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
83:
84: security.declareProtected('View management screens','changeDocumentViewerForm')
85: changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
86:
87:
88: def __init__(self,id,imageViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):
89: """init document viewer"""
90: self.id=id
91: self.title=title
92: self.imageViewerUrl=imageViewerUrl
93: self.textViewerUrl=textViewerUrl
94:
95: if not digilibBaseUrl:
96: self.digilibBaseUrl = self.findDigilibUrl()
97: else:
98: self.digilibBaseUrl = digilibBaseUrl
99: self.thumbcols = thumbcols
100: self.thumbrows = thumbrows
101: # authgroups is list of authorized groups (delimited by ,)
102: self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
103: # add template folder so we can always use template.something
104: self.manage_addFolder('template')
105:
106:
107: security.declareProtected('View','index_html')
108: def index_html(self,mode,url,viewMode="auto",start=None,pn=1):
109: '''
110: view it
111: @param mode: defines which type of document is behind url (text,images or auto)
112: @param url: url which contains display information
113: @param viewMode: if images display images, if text display text, default is images
114:
115: '''
116:
117: logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
118:
119: if not hasattr(self, 'template'):
120: # create template folder if it doesn't exist
121: self.manage_addFolder('template')
122:
123: if not self.digilibBaseUrl:
124: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
125:
126: docinfo = self.getDocinfo(mode=mode,url=url)
127: pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
128: pt = getattr(self.template, 'viewer_main')
129:
130: if viewMode=="auto": # automodus gewaehlt
131: if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
132: viewMode="text"
133: else:
134: viewMode="images"
135:
136:
137: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
138:
139:
140: def getLink(self,param=None,val=None):
141: """link to documentviewer with parameter param set to val"""
142: params=self.REQUEST.form.copy()
143: if param is not None:
144: if val is None:
145: if params.has_key(param):
146: del params[param]
147: else:
148: params[param] = str(val)
149:
150: # quote values and assemble into query string
151: ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
152: url=self.REQUEST['URL1']+"?"+ps
153: return url
154:
155:
156: def getStyle(self, idx, selected, style=""):
157: """returns a string with the given style and append 'sel' if path == selected."""
158: #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
159: if idx == selected:
160: return style + 'sel'
161: else:
162: return style
163:
164:
165: def isAccessible(self, docinfo):
166: """returns if access to the resource is granted"""
167: access = docinfo.get('accessType', None)
168: logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)
169: if access is not None and access == 'free':
170: logger("documentViewer (accessOK)", logging.INFO, "access is free")
171: return True
172: elif access is None or access in self.authgroups:
173: # only local access -- only logged in users
174: user = getSecurityManager().getUser()
175: if user is not None:
176: #print "user: ", user
177: return (user.getUserName() != "Anonymous User")
178: else:
179: return False
180:
181: logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)
182: return False
183:
184:
185: def getDirinfoFromDigilib(self,path,docinfo=None):
186: """gibt param von dlInfo aus"""
187: num_retries = 3
188: if docinfo is None:
189: docinfo = {}
190:
191: infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
192:
193: logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))
194:
195: for cnt in range(num_retries):
196: try:
197: # dom = NonvalidatingReader.parseUri(imageUrl)
198: txt=urllib.urlopen(infoUrl).read()
199: dom = Parse(txt)
200: break
201: except:
202: logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
203: else:
204: raise IOError("Unable to get dir-info from %s"%(infoUrl))
205:
206: sizes=dom.xpath("//dir/size")
207: logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)
208:
209: if sizes:
210: docinfo['numPages'] = int(getTextFromNode(sizes[0]))
211: else:
212: docinfo['numPages'] = 0
213:
214: return docinfo
215:
216:
217: def getIndexMeta(self, url):
218: """returns dom of index.meta document at url"""
219: num_retries = 3
220: dom = None
221: metaUrl = None
222: if url.startswith("http://"):
223: # real URL
224: metaUrl = url
225: else:
226: # online path
227: server=self.digilibBaseUrl+"/servlet/Texter?fn="
228: metaUrl=server+url.replace("/mpiwg/online","")
229: if not metaUrl.endswith("index.meta"):
230: metaUrl += "/index.meta"
231: print metaUrl
232: for cnt in range(num_retries):
233: try:
234: # patch dirk encoding fehler treten dann nicht mehr auf
235: # dom = NonvalidatingReader.parseUri(metaUrl)
236: txt=urllib.urlopen(metaUrl).read()
237: dom = Parse(txt)
238: break
239: except:
240: logger("ERROR documentViewer (getIndexMata)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
241:
242: if dom is None:
243: raise IOError("Unable to read index meta from %s"%(url))
244:
245: return dom
246:
247: def getPresentationInfoXML(self, url):
248: """returns dom of info.xml document at url"""
249: num_retries = 3
250: dom = None
251: metaUrl = None
252: if url.startswith("http://"):
253: # real URL
254: metaUrl = url
255: else:
256: # online path
257: server=self.digilibBaseUrl+"/servlet/Texter?fn="
258: metaUrl=server+url.replace("/mpiwg/online","")
259:
260:
261: for cnt in range(num_retries):
262: try:
263: # patch dirk encoding fehler treten dann nicht mehr auf
264: # dom = NonvalidatingReader.parseUri(metaUrl)
265: txt=urllib.urlopen(metaUrl).read()
266: dom = Parse(txt)
267: break
268: except:
269: logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
270:
271: if dom is None:
272: raise IOError("Unable to read infoXMLfrom %s"%(url))
273:
274: return dom
275:
276:
277: def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None):
278: """gets authorization info from the index.meta file at path or given by dom"""
279: logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))
280:
281: access = None
282:
283: if docinfo is None:
284: docinfo = {}
285:
286: if dom is None:
287: dom = self.getIndexMeta(getParentDir(path))
288:
289: acctype = dom.xpath("//access-conditions/access/@type")
290: if acctype and (len(acctype)>0):
291: access=acctype[0].value
292: if access in ['group', 'institution']:
293: access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
294:
295: docinfo['accessType'] = access
296: return docinfo
297:
298:
299: def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None):
300: """gets bibliographical info from the index.meta file at path or given by dom"""
301: logger("documentViewer (getbibinfofromindexmeta)", logging.INFO,"path: %s"%(path))
302:
303: if docinfo is None:
304: docinfo = {}
305:
306: if dom is None:
307: dom = self.getIndexMeta(getParentDir(path))
308:
309: metaData=self.metadata.main.meta.bib
310: bibtype=dom.xpath("//bib/@type")
311: if bibtype and (len(bibtype)>0):
312: bibtype=bibtype[0].value
313: else:
314: bibtype="generic"
315: bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
316: bibmap=metaData.generateMappingForType(bibtype)
317: #print "bibmap: ", bibmap, " for: ", bibtype
318: # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
319: if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
320: docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
321: docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
322: docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
323:
324: logging.info("bla")
325: try:
326: docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
327: except:
328: docinfo['lang']=''
329: return docinfo
330:
331:
332: def getDocinfoFromTextTool(self,url,dom=None,docinfo=None):
333: """parse texttool tag in index meta"""
334: logger("documentViewer (getdocinfofromtexttool)", logging.INFO,"url: %s"%(url))
335: if docinfo is None:
336: docinfo = {}
337:
338: if docinfo.get('lang',None) is None:
339: docinfo['lang']='' # default keine Sprache gesetzt
340: if dom is None:
341: dom = self.getIndexMeta(url)
342:
343: archivePath = None
344: archiveName = None
345:
346: archiveNames=dom.xpath("//resource/name")
347: if archiveNames and (len(archiveNames)>0):
348: archiveName=getTextFromNode(archiveNames[0])
349: else:
350: logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/name missing in: %s"%(url))
351:
352: archivePaths=dom.xpath("//resource/archive-path")
353: if archivePaths and (len(archivePaths)>0):
354: archivePath=getTextFromNode(archivePaths[0])
355: # clean up archive path
356: if archivePath[0] != '/':
357: archivePath = '/' + archivePath
358: if archiveName and (not archivePath.endswith(archiveName)):
359: archivePath += "/" + archiveName
360: else:
361: # try to get archive-path from url
362: logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/archive-path missing in: %s"%(url))
363: if (not url.startswith('http')):
364: archivePath = url.replace('index.meta', '')
365:
366: if archivePath is None:
367: # we balk without archive-path
368: raise IOError("Missing archive-path (for text-tool) in %s"%(url))
369:
370: imageDirs=dom.xpath("//texttool/image")
371: if imageDirs and (len(imageDirs)>0):
372: imageDir=getTextFromNode(imageDirs[0])
373: else:
374: # we balk with no image tag / not necessary anymore because textmode is now standard
375: #raise IOError("No text-tool info in %s"%(url))
376: imageDir=""
377: docinfo['numPages']=1 # im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht
378:
379: docinfo['imagePath'] = "" # keine Bilder
380: docinfo['imageURL'] = ""
381:
382: if imageDir and archivePath:
383: #print "image: ", imageDir, " archivepath: ", archivePath
384: imageDir=os.path.join(archivePath,imageDir)
385: imageDir=imageDir.replace("/mpiwg/online",'')
386: docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo)
387: docinfo['imagePath'] = imageDir
388: docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir
389:
390: viewerUrls=dom.xpath("//texttool/digiliburlprefix")
391: if viewerUrls and (len(viewerUrls)>0):
392: viewerUrl=getTextFromNode(viewerUrls[0])
393: docinfo['viewerURL'] = viewerUrl
394:
395: textUrls=dom.xpath("//texttool/text")
396: if textUrls and (len(textUrls)>0):
397: textUrl=getTextFromNode(textUrls[0])
398: if urlparse.urlparse(textUrl)[0]=="": #keine url
399: textUrl=os.path.join(archivePath,textUrl)
400:
401: docinfo['textURL'] = textUrl
402:
403: presentationUrls=dom.xpath("//texttool/presentation")
404: docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) # get info von bib tag
405:
406: if presentationUrls and (len(presentationUrls)>0): # ueberschreibe diese durch presentation informationen
407: # presentation url ergiebt sich ersetzen von index.meta in der url der fŸr die Metadaten
408: # durch den relativen Pfad auf die presentation infos
409: presentationUrl=url.replace('index.meta',getTextFromNode(presentationUrls[0]))
410: docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom)
411:
412: docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo)
413: return docinfo
414:
415:
416: def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
417: """gets the bibliographical information from the preseantion entry in texttools
418: """
419: dom=self.getPresentationInfoXML(url)
420: docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
421: docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
422: docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
423: return docinfo
424:
425: def getDocinfoFromImagePath(self,path,docinfo=None):
426: """path ist the path to the images it assumes that the index.meta file is one level higher."""
427: logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))
428: if docinfo is None:
429: docinfo = {}
430: path=path.replace("/mpiwg/online","")
431: docinfo['imagePath'] = path
432: docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo)
433: imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
434: docinfo['imageURL'] = imageUrl
435:
436: docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo)
437: docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo)
438: return docinfo
439:
440:
441: def getDocinfo(self, mode, url):
442: """returns docinfo depending on mode"""
443: logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))
444: # look for cached docinfo in session
445: if self.REQUEST.SESSION.has_key('docinfo'):
446: docinfo = self.REQUEST.SESSION['docinfo']
447: # check if its still current
448: if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
449: logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)
450: return docinfo
451: # new docinfo
452: docinfo = {'mode': mode, 'url': url}
453: if mode=="texttool": #index.meta with texttool information
454: docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
455: elif mode=="imagepath":
456: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
457: else:
458: logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")
459: raise ValueError("Unknown mode %s"%(mode))
460:
461: logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)
462: self.REQUEST.SESSION['docinfo'] = docinfo
463: return docinfo
464:
465:
466: def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):
467: """returns pageinfo with the given parameters"""
468: pageinfo = {}
469: current = getInt(current)
470: pageinfo['current'] = current
471: rows = int(rows or self.thumbrows)
472: pageinfo['rows'] = rows
473: cols = int(cols or self.thumbcols)
474: pageinfo['cols'] = cols
475: grpsize = cols * rows
476: pageinfo['groupsize'] = grpsize
477: start = getInt(start, default=(int(current / grpsize) * grpsize +1))
478: pageinfo['start'] = start
479: pageinfo['end'] = start + grpsize
480: if docinfo is not None:
481: np = int(docinfo['numPages'])
482: pageinfo['end'] = min(pageinfo['end'], np)
483: pageinfo['numgroups'] = int(np / grpsize)
484: if np % grpsize > 0:
485: pageinfo['numgroups'] += 1
486:
487: return pageinfo
488:
489: def text(self,mode,url,pn):
490: """give text"""
491: if mode=="texttool": #index.meta with texttool information
492: (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
493:
494: #print textpath
495: try:
496: dom = NonvalidatingReader.parseUri(textpath)
497: except:
498: return None
499:
500: list=[]
501: nodes=dom.xpath("//pb")
502:
503: node=nodes[int(pn)-1]
504:
505: p=node
506:
507: while p.tagName!="p":
508: p=p.parentNode
509:
510:
511: endNode=nodes[int(pn)]
512:
513:
514: e=endNode
515:
516: while e.tagName!="p":
517: e=e.parentNode
518:
519:
520: next=node.parentNode
521:
522: #sammle s
523: while next and (next!=endNode.parentNode):
524: list.append(next)
525: next=next.nextSibling
526: list.append(endNode.parentNode)
527:
528: if p==e:# beide im selben paragraphen
529: pass
530: # else:
531: # next=p
532: # while next!=e:
533: # print next,e
534: # list.append(next)
535: # next=next.nextSibling
536: #
537: # for x in list:
538: # PrettyPrint(x)
539: #
540: # return list
541: #
542:
543: def findDigilibUrl(self):
544: """try to get the digilib URL from zogilib"""
545: url = self.imageViewerUrl[:-1] + "/getScalerUrl"
546: #print urlparse.urlparse(url)[0]
547: #print urlparse.urljoin(self.absolute_url(),url)
548: logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0])
549: logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url))
550:
551: try:
552: if urlparse.urlparse(url)[0]=='': #relative path
553: url=urlparse.urljoin(self.absolute_url()+"/",url)
554:
555: scaler = urlopen(url).read()
556: return scaler.replace("/servlet/Scaler?", "")
557: except:
558: return None
559:
560: def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
561: """init document viewer"""
562: self.title=title
563: self.imageViewerUrl=imageViewerUrl
564: self.textViewerUrl=textViewerUrl
565: self.digilibBaseUrl = digilibBaseUrl
566: self.thumbrows = thumbrows
567: self.thumbcols = thumbcols
568: self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
569: if RESPONSE is not None:
570: RESPONSE.redirect('manage_main')
571:
572:
573:
574:
575: # security.declareProtected('View management screens','renameImageForm')
576:
577: def manage_AddDocumentViewerForm(self):
578: """add the viewer form"""
579: pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
580: return pt()
581:
582: def manage_AddDocumentViewer(self,id,imageViewerUrl="",textViewerUrl="",title="",RESPONSE=None):
583: """add the viewer"""
584: newObj=documentViewer(id,imageViewerUrl,title=title,textViewerUrl=textViewerUrl)
585: self._setObject(id,newObj)
586:
587: if RESPONSE is not None:
588: RESPONSE.redirect('manage_main')
589:
590:
591: ##
592: ## DocumentViewerTemplate class
593: ##
594: class DocumentViewerTemplate(ZopePageTemplate):
595: """Template for document viewer"""
596: meta_type="DocumentViewer Template"
597:
598:
599: def manage_addDocumentViewerTemplateForm(self):
600: """Form for adding"""
601: pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
602: return pt()
603:
604: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
605: REQUEST=None, submit=None):
606: "Add a Page Template with optional file content."
607:
608: self._setObject(id, DocumentViewerTemplate(id))
609: ob = getattr(self, id)
610: txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
611: logging.info("txt %s:"%txt)
612: ob.pt_edit(txt,"text/html")
613: if title:
614: ob.pt_setTitle(title)
615: try:
616: u = self.DestinationURL()
617: except AttributeError:
618: u = REQUEST['URL1']
619:
620: u = "%s/%s" % (u, urllib.quote(id))
621: REQUEST.RESPONSE.redirect(u+'/manage_main')
622: return ''
623:
624:
625:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>