1:
2:
3: from OFS.Folder import Folder
4: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
5: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
6: from AccessControl import ClassSecurityInfo
7: from AccessControl import getSecurityManager
8: from Globals import package_home
9:
10: from Ft.Xml.Domlette import NonvalidatingReader
11: from Ft.Xml.Domlette import PrettyPrint, Print
12: from Ft.Xml import EMPTY_NAMESPACE, Parse
13:
14: import Ft.Xml.XPath
15:
16: import os.path
17: import sys
18: import cgi
19: import urllib
20: import logging
21:
22: import urlparse
23:
24: def logger(txt,method,txt2):
25: """logging"""
26: logging.info(txt+ txt2)
27:
28:
29: def getInt(number, default=0):
30: """returns always an int (0 in case of problems)"""
31: try:
32: return int(number)
33: except:
34: return default
35:
36: def getTextFromNode(nodename):
37: """get the cdata content of a node"""
38: if nodename is None:
39: return ""
40: nodelist=nodename.childNodes
41: rc = ""
42: for node in nodelist:
43: if node.nodeType == node.TEXT_NODE:
44: rc = rc + node.data
45: return rc
46:
47:
48: def getParentDir(path):
49: """returns pathname shortened by one"""
50: return '/'.join(path.split('/')[0:-1])
51:
52:
53: import socket
54:
55: def urlopen(url,timeout=2):
56: """urlopen mit timeout"""
57: socket.setdefaulttimeout(timeout)
58: ret=urllib.urlopen(url)
59: socket.setdefaulttimeout(5)
60: return ret
61:
62:
63: ##
64: ## documentViewer class
65: ##
66: class documentViewer(Folder):
67: """document viewer"""
68: #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
69:
70: meta_type="Document viewer"
71:
72: security=ClassSecurityInfo()
73: manage_options=Folder.manage_options+(
74: {'label':'main config','action':'changeDocumentViewerForm'},
75: )
76:
77: # templates and forms
78: viewer_main = PageTemplateFile('zpt/viewer_main', globals())
79: thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())
80: image_main = PageTemplateFile('zpt/image_main', globals())
81: head_main = PageTemplateFile('zpt/head_main', globals())
82: docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
83:
84: security.declareProtected('View management screens','changeDocumentViewerForm')
85: changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
86:
87:
88: def __init__(self,id,imageViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):
89: """init document viewer"""
90: self.id=id
91: self.title=title
92: self.imageViewerUrl=imageViewerUrl
93: self.textViewerUrl=textViewerUrl
94:
95: if not digilibBaseUrl:
96: self.digilibBaseUrl = self.findDigilibUrl()
97: else:
98: self.digilibBaseUrl = digilibBaseUrl
99: self.thumbcols = thumbcols
100: self.thumbrows = thumbrows
101: # authgroups is list of authorized groups (delimited by ,)
102: self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
103: # add template folder so we can always use template.something
104: self.manage_addFolder('template')
105:
106:
107: security.declareProtected('View','index_html')
108: def index_html(self,mode,url,viewMode="auto",start=None,pn=1):
109: '''
110: view it
111: @param mode: defines which type of document is behind url (text,images or auto)
112: @param url: url which contains display information
113: @param viewMode: if images display images, if text display text, default is images
114:
115: '''
116:
117: logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
118:
119: if not hasattr(self, 'template'):
120: # create template folder if it doesn't exist
121: self.manage_addFolder('template')
122:
123: if not self.digilibBaseUrl:
124: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
125:
126: docinfo = self.getDocinfo(mode=mode,url=url)
127: pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
128: pt = getattr(self.template, 'viewer_main')
129:
130: if viewMode=="auto": # automodus gewaehlt
131: if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
132: viewMode="text"
133: else:
134: viewMode="images"
135:
136:
137: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
138:
139:
140: def getLink(self,param=None,val=None):
141: """link to documentviewer with parameter param set to val"""
142: params=self.REQUEST.form.copy()
143: if param is not None:
144: if val is None:
145: if params.has_key(param):
146: del params[param]
147: else:
148: params[param] = str(val)
149:
150: # quote values and assemble into query string
151: ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
152: url=self.REQUEST['URL1']+"?"+ps
153: return url
154:
155:
156: def getStyle(self, idx, selected, style=""):
157: """returns a string with the given style and append 'sel' if path == selected."""
158: #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
159: if idx == selected:
160: return style + 'sel'
161: else:
162: return style
163:
164:
165: def isAccessible(self, docinfo):
166: """returns if access to the resource is granted"""
167: access = docinfo.get('accessType', None)
168: logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)
169: if access is not None and access == 'free':
170: logger("documentViewer (accessOK)", logging.INFO, "access is free")
171: return True
172: elif access is None or access in self.authgroups:
173: # only local access -- only logged in users
174: user = getSecurityManager().getUser()
175: if user is not None:
176: #print "user: ", user
177: return (user.getUserName() != "Anonymous User")
178: else:
179: return False
180:
181: logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)
182: return False
183:
184:
185: def getDirinfoFromDigilib(self,path,docinfo=None):
186: """gibt param von dlInfo aus"""
187: num_retries = 3
188: if docinfo is None:
189: docinfo = {}
190:
191: infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
192:
193: logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))
194:
195: for cnt in range(num_retries):
196: try:
197: # dom = NonvalidatingReader.parseUri(imageUrl)
198: txt=urllib.urlopen(infoUrl).read()
199: dom = Parse(txt)
200: break
201: except:
202: logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
203: else:
204: raise IOError("Unable to get dir-info from %s"%(infoUrl))
205:
206: sizes=dom.xpath("//dir/size")
207: logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)
208:
209: if sizes:
210: docinfo['numPages'] = int(getTextFromNode(sizes[0]))
211: else:
212: docinfo['numPages'] = 0
213:
214: return docinfo
215:
216:
217: def getIndexMeta(self, url):
218: """returns dom of index.meta document at url"""
219: num_retries = 3
220: dom = None
221: metaUrl = None
222: if url.startswith("http://"):
223: # real URL
224: metaUrl = url
225: else:
226: # online path
227: server=self.digilibBaseUrl+"/servlet/Texter?fn="
228: metaUrl=server+url.replace("/mpiwg/online","")
229: if not metaUrl.endswith("index.meta"):
230: metaUrl += "/index.meta"
231: print metaUrl
232: for cnt in range(num_retries):
233: try:
234: # patch dirk encoding fehler treten dann nicht mehr auf
235: # dom = NonvalidatingReader.parseUri(metaUrl)
236: txt=urllib.urlopen(metaUrl).read()
237: dom = Parse(txt)
238: break
239: except:
240: logger("ERROR documentViewer (getIndexMata)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
241:
242: if dom is None:
243: raise IOError("Unable to read index meta from %s"%(url))
244:
245: return dom
246:
247: def getPresentationInfoXML(self, url):
248: """returns dom of info.xml document at url"""
249: num_retries = 3
250: dom = None
251: metaUrl = None
252: if url.startswith("http://"):
253: # real URL
254: metaUrl = url
255: else:
256: # online path
257: server=self.digilibBaseUrl+"/servlet/Texter?fn="
258: metaUrl=server+url.replace("/mpiwg/online","")
259:
260:
261: for cnt in range(num_retries):
262: try:
263: # patch dirk encoding fehler treten dann nicht mehr auf
264: # dom = NonvalidatingReader.parseUri(metaUrl)
265: txt=urllib.urlopen(metaUrl).read()
266: dom = Parse(txt)
267: break
268: except:
269: logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
270:
271: if dom is None:
272: raise IOError("Unable to read infoXMLfrom %s"%(url))
273:
274: return dom
275:
276:
277: def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None):
278: """gets authorization info from the index.meta file at path or given by dom"""
279: logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))
280:
281: access = None
282:
283: if docinfo is None:
284: docinfo = {}
285:
286: if dom is None:
287: dom = self.getIndexMeta(getParentDir(path))
288:
289: acctype = dom.xpath("//access-conditions/access/@type")
290: if acctype and (len(acctype)>0):
291: access=acctype[0].value
292: if access in ['group', 'institution']:
293: access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
294:
295: docinfo['accessType'] = access
296: return docinfo
297:
298:
299: def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None):
300: """gets bibliographical info from the index.meta file at path or given by dom"""
301: logger("documentViewer (getbibinfofromindexmeta)", logging.INFO,"path: %s"%(path))
302:
303: if docinfo is None:
304: docinfo = {}
305:
306: if dom is None:
307: dom = self.getIndexMeta(getParentDir(path))
308:
309: metaData=self.metadata.main.meta.bib
310: bibtype=dom.xpath("//bib/@type")
311: if bibtype and (len(bibtype)>0):
312: bibtype=bibtype[0].value
313: else:
314: bibtype="generic"
315: bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
316: bibmap=metaData.generateMappingForType(bibtype)
317: #print "bibmap: ", bibmap, " for: ", bibtype
318: # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
319: if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
320: docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
321: docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
322: docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
323:
324: logging.info("bla")
325: try:
326: docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
327: except:
328: docinfo['lang']=''
329: return docinfo
330:
331:
332: def getDocinfoFromTextTool(self,url,dom=None,docinfo=None):
333: """parse texttool tag in index meta"""
334: logger("documentViewer (getdocinfofromtexttool)", logging.INFO,"url: %s"%(url))
335: if docinfo is None:
336: docinfo = {}
337:
338: if docinfo.get('lang',None) is None:
339: docinfo['lang']='' # default keine Sprache gesetzt
340: if dom is None:
341: dom = self.getIndexMeta(url)
342:
343: archivePath = None
344: archiveName = None
345:
346: archiveNames=dom.xpath("//resource/name")
347: if archiveNames and (len(archiveNames)>0):
348: archiveName=getTextFromNode(archiveNames[0])
349: else:
350: logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/name missing in: %s"%(url))
351:
352: archivePaths=dom.xpath("//resource/archive-path")
353: if archivePaths and (len(archivePaths)>0):
354: archivePath=getTextFromNode(archivePaths[0])
355: # clean up archive path
356: if archivePath[0] != '/':
357: archivePath = '/' + archivePath
358: if archiveName and (not archivePath.endswith(archiveName)):
359: archivePath += "/" + archiveName
360: else:
361: # try to get archive-path from url
362: logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/archive-path missing in: %s"%(url))
363: if (not url.startswith('http')):
364: archivePath = url.replace('index.meta', '')
365:
366: if archivePath is None:
367: # we balk without archive-path
368: raise IOError("Missing archive-path (for text-tool) in %s"%(url))
369:
370: imageDirs=dom.xpath("//texttool/image")
371: if imageDirs and (len(imageDirs)>0):
372: imageDir=getTextFromNode(imageDirs[0])
373: else:
374: # we balk with no image tag / not necessary anymore because textmode is now standard
375: #raise IOError("No text-tool info in %s"%(url))
376: imageDir=""
377: docinfo['numPages']=1 # im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht
378:
379: docinfo['imagePath'] = "" # keine Bilder
380: docinfo['imageURL'] = ""
381:
382: if imageDir and archivePath:
383: #print "image: ", imageDir, " archivepath: ", archivePath
384: imageDir=os.path.join(archivePath,imageDir)
385: imageDir=imageDir.replace("/mpiwg/online",'')
386: docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo)
387: docinfo['imagePath'] = imageDir
388: docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir
389:
390: viewerUrls=dom.xpath("//texttool/digiliburlprefix")
391: if viewerUrls and (len(viewerUrls)>0):
392: viewerUrl=getTextFromNode(viewerUrls[0])
393: docinfo['viewerURL'] = viewerUrl
394:
395: textUrls=dom.xpath("//texttool/text")
396: if textUrls and (len(textUrls)>0):
397: textUrl=getTextFromNode(textUrls[0])
398: if urlparse.urlparse(textUrl)[0]=="": #keine url
399: textUrl=os.path.join(archivePath,textUrl)
400:
401: docinfo['textURL'] = textUrl
402:
403:
404: presentationUrls=dom.xpath("//texttool/presentation")
405: docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) # get info von bib tag
406:
407: if presentationUrls and (len(presentationUrls)>0): # ueberschreibe diese durch presentation informationen
408: # presentation url ergiebt sich ersetzen von index.meta in der url der fr die Metadaten
409: # durch den relativen Pfad auf die presentation infos
410: presentationUrl=url.replace('index.meta',getTextFromNode(presentationUrls[0]))
411:
412: docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom)
413:
414:
415:
416: return docinfo
417:
418:
419: def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
420: """gets the bibliographical information from the preseantion entry in texttools
421: """
422: dom=self.getPresentationInfoXML(url)
423: docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
424: docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
425: docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
426: return docinfo
427:
428: def getDocinfoFromImagePath(self,path,docinfo=None):
429: """path ist the path to the images it assumes that the index.meta file is one level higher."""
430: logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))
431: if docinfo is None:
432: docinfo = {}
433: path=path.replace("/mpiwg/online","")
434: docinfo['imagePath'] = path
435: docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo)
436: imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
437: docinfo['imageURL'] = imageUrl
438:
439: docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo)
440: docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo)
441: return docinfo
442:
443:
444: def getDocinfo(self, mode, url):
445: """returns docinfo depending on mode"""
446: logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))
447: # look for cached docinfo in session
448: if self.REQUEST.SESSION.has_key('docinfo'):
449: docinfo = self.REQUEST.SESSION['docinfo']
450: # check if its still current
451: if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
452: logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)
453: return docinfo
454: # new docinfo
455: docinfo = {'mode': mode, 'url': url}
456: if mode=="texttool": #index.meta with texttool information
457: docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
458: elif mode=="imagepath":
459: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
460: else:
461: logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")
462: raise ValueError("Unknown mode %s"%(mode))
463:
464: logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)
465: self.REQUEST.SESSION['docinfo'] = docinfo
466: return docinfo
467:
468:
469: def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):
470: """returns pageinfo with the given parameters"""
471: pageinfo = {}
472: current = getInt(current)
473: pageinfo['current'] = current
474: rows = int(rows or self.thumbrows)
475: pageinfo['rows'] = rows
476: cols = int(cols or self.thumbcols)
477: pageinfo['cols'] = cols
478: grpsize = cols * rows
479: pageinfo['groupsize'] = grpsize
480: start = getInt(start, default=(int(current / grpsize) * grpsize +1))
481: pageinfo['start'] = start
482: pageinfo['end'] = start + grpsize
483: if docinfo is not None:
484: np = int(docinfo['numPages'])
485: pageinfo['end'] = min(pageinfo['end'], np)
486: pageinfo['numgroups'] = int(np / grpsize)
487: if np % grpsize > 0:
488: pageinfo['numgroups'] += 1
489:
490: return pageinfo
491:
492: def text(self,mode,url,pn):
493: """give text"""
494: if mode=="texttool": #index.meta with texttool information
495: (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
496:
497: #print textpath
498: try:
499: dom = NonvalidatingReader.parseUri(textpath)
500: except:
501: return None
502:
503: list=[]
504: nodes=dom.xpath("//pb")
505:
506: node=nodes[int(pn)-1]
507:
508: p=node
509:
510: while p.tagName!="p":
511: p=p.parentNode
512:
513:
514: endNode=nodes[int(pn)]
515:
516:
517: e=endNode
518:
519: while e.tagName!="p":
520: e=e.parentNode
521:
522:
523: next=node.parentNode
524:
525: #sammle s
526: while next and (next!=endNode.parentNode):
527: list.append(next)
528: next=next.nextSibling
529: list.append(endNode.parentNode)
530:
531: if p==e:# beide im selben paragraphen
532: pass
533: # else:
534: # next=p
535: # while next!=e:
536: # print next,e
537: # list.append(next)
538: # next=next.nextSibling
539: #
540: # for x in list:
541: # PrettyPrint(x)
542: #
543: # return list
544: #
545:
546: def findDigilibUrl(self):
547: """try to get the digilib URL from zogilib"""
548: url = self.imageViewerUrl[:-1] + "/getScalerUrl"
549: #print urlparse.urlparse(url)[0]
550: #print urlparse.urljoin(self.absolute_url(),url)
551: logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0])
552: logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url))
553:
554: try:
555: if urlparse.urlparse(url)[0]=='': #relative path
556: url=urlparse.urljoin(self.absolute_url()+"/",url)
557:
558: scaler = urlopen(url).read()
559: return scaler.replace("/servlet/Scaler?", "")
560: except:
561: return None
562:
563: def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
564: """init document viewer"""
565: self.title=title
566: self.imageViewerUrl=imageViewerUrl
567: self.textViewerUrl=textViewerUrl
568: self.digilibBaseUrl = digilibBaseUrl
569: self.thumbrows = thumbrows
570: self.thumbcols = thumbcols
571: self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
572: if RESPONSE is not None:
573: RESPONSE.redirect('manage_main')
574:
575:
576:
577:
578: # security.declareProtected('View management screens','renameImageForm')
579:
580: def manage_AddDocumentViewerForm(self):
581: """add the viewer form"""
582: pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
583: return pt()
584:
585: def manage_AddDocumentViewer(self,id,imageViewerUrl="",textViewerUrl="",title="",RESPONSE=None):
586: """add the viewer"""
587: newObj=documentViewer(id,imageViewerUrl,title=title,textViewerUrl=textViewerUrl)
588: self._setObject(id,newObj)
589:
590: if RESPONSE is not None:
591: RESPONSE.redirect('manage_main')
592:
593:
594: ##
595: ## DocumentViewerTemplate class
596: ##
597: class DocumentViewerTemplate(ZopePageTemplate):
598: """Template for document viewer"""
599: meta_type="DocumentViewer Template"
600:
601:
602: def manage_addDocumentViewerTemplateForm(self):
603: """Form for adding"""
604: pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
605: return pt()
606:
607: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
608: REQUEST=None, submit=None):
609: "Add a Page Template with optional file content."
610:
611: self._setObject(id, DocumentViewerTemplate(id))
612: ob = getattr(self, id)
613: txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
614: logging.info("txt %s:"%txt)
615: ob.pt_edit(txt,"text/html")
616: if title:
617: ob.pt_setTitle(title)
618: try:
619: u = self.DestinationURL()
620: except AttributeError:
621: u = REQUEST['URL1']
622:
623: u = "%s/%s" % (u, urllib.quote(id))
624: REQUEST.RESPONSE.redirect(u+'/manage_main')
625: return ''
626:
627:
628:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>