Mercurial > hg > documentViewer
annotate documentViewer.py @ 39:1dd90aabd366
added retry when reading index meta from texter applet
author | casties |
---|---|
date | Tue, 13 Jun 2006 17:21:47 +0200 |
parents | 025d3b6cba51 |
children | 749ee5389892 |
rev | line source |
---|---|
0 | 1 from OFS.Folder import Folder |
2 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate | |
3 from Products.PageTemplates.PageTemplateFile import PageTemplateFile | |
4 from AccessControl import ClassSecurityInfo | |
32 | 5 from AccessControl import getSecurityManager |
0 | 6 from Globals import package_home |
7 | |
8 from Ft.Xml.Domlette import NonvalidatingReader | |
9 from Ft.Xml.Domlette import PrettyPrint, Print | |
38 | 10 from Ft.Xml import EMPTY_NAMESPACE, Parse |
0 | 11 |
12 import Ft.Xml.XPath | |
13 | |
14 import os.path | |
31 | 15 import sys |
0 | 16 import cgi |
17 import urllib | |
22 | 18 import zLOG |
0 | 19 |
25 | 20 def getInt(number, default=0): |
21 """returns always an int (0 in case of problems)""" | |
22 try: | |
23 return int(number) | |
24 except: | |
25 return default | |
26 | |
0 | 27 def getTextFromNode(nodename): |
32 | 28 if nodename is None: |
29 return "" | |
0 | 30 nodelist=nodename.childNodes |
31 rc = "" | |
32 for node in nodelist: | |
33 if node.nodeType == node.TEXT_NODE: | |
34 rc = rc + node.data | |
35 return rc | |
36 | |
35 | 37 |
38 def getParentDir(path): | |
39 """returns pathname shortened by one""" | |
40 return '/'.join(path.split('/')[0:-1]) | |
41 | |
42 | |
0 | 43 import socket |
44 | |
32 | 45 def urlopen(url,timeout=2): |
0 | 46 """urlopen mit timeout""" |
32 | 47 socket.setdefaulttimeout(timeout) |
0 | 48 ret=urllib.urlopen(url) |
49 socket.setdefaulttimeout(5) | |
50 return ret | |
51 | |
52 | |
22 | 53 ## |
54 ## documentViewer class | |
55 ## | |
56 class documentViewer(Folder): | |
0 | 57 """document viewer""" |
58 | |
59 meta_type="Document viewer" | |
60 | |
61 security=ClassSecurityInfo() | |
22 | 62 manage_options=Folder.manage_options+( |
0 | 63 {'label':'main config','action':'changeDocumentViewerForm'}, |
64 ) | |
65 | |
22 | 66 # templates and forms |
67 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) | |
68 thumbs_main = PageTemplateFile('zpt/thumbs_main', globals()) | |
69 image_main = PageTemplateFile('zpt/image_main', globals()) | |
70 head_main = PageTemplateFile('zpt/head_main', globals()) | |
71 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) | |
72 | |
73 security.declareProtected('View management screens','changeDocumentViewerForm') | |
74 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) | |
75 | |
0 | 76 |
32 | 77 def __init__(self,id,imageViewerUrl,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): |
0 | 78 """init document viewer""" |
79 self.id=id | |
80 self.title=title | |
81 self.imageViewerUrl=imageViewerUrl | |
25 | 82 if not digilibBaseUrl: |
22 | 83 self.digilibBaseUrl = self.findDigilibUrl() |
25 | 84 else: |
85 self.digilibBaseUrl = digilibBaseUrl | |
86 self.thumbcols = thumbcols | |
87 self.thumbrows = thumbrows | |
32 | 88 # authgroups is list of authorized groups (delimited by ,) |
89 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] | |
22 | 90 # add template folder so we can always use template.something |
91 self.manage_addFolder('template') | |
92 | |
93 | |
94 security.declareProtected('View','index_html') | |
25 | 95 def index_html(self,mode,url,start=None,pn=1): |
22 | 96 ''' |
97 view it | |
98 @param mode: defines which type of document is behind url | |
99 @param url: url which contains display information | |
100 ''' | |
0 | 101 |
22 | 102 zLOG.LOG("documentViewer (index)", zLOG.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) |
103 | |
104 if not hasattr(self, 'template'): | |
105 # create template folder if it doesn't exist | |
106 self.manage_addFolder('template') | |
107 | |
108 if not self.digilibBaseUrl: | |
109 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" | |
110 | |
25 | 111 docinfo = self.getDocinfo(mode=mode,url=url) |
112 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) | |
22 | 113 pt = getattr(self.template, 'viewer_main') |
114 return pt(docinfo=docinfo,pageinfo=pageinfo) | |
0 | 115 |
116 | |
25 | 117 def getLink(self,param=None,val=None): |
118 """link to documentviewer with parameter param set to val""" | |
35 | 119 params=self.REQUEST.form.copy() |
25 | 120 if param is not None: |
31 | 121 if val is None: |
122 if params.has_key(param): | |
123 del params[param] | |
25 | 124 else: |
35 | 125 params[param] = str(val) |
31 | 126 |
35 | 127 # quote values and assemble into query string |
128 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) | |
129 url=self.REQUEST['URL1']+"?"+ps | |
25 | 130 return url |
131 | |
0 | 132 |
22 | 133 def getStyle(self, idx, selected, style=""): |
25 | 134 """returns a string with the given style and append 'sel' if path == selected.""" |
22 | 135 #zLOG.LOG("documentViewer (getstyle)", zLOG.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) |
136 if idx == selected: | |
137 return style + 'sel' | |
138 else: | |
35 | 139 return style |
0 | 140 |
35 | 141 |
142 def isAccessible(self, docinfo): | |
32 | 143 """returns if access to the resource is granted""" |
144 access = docinfo.get('accessType', None) | |
145 if access is None: | |
35 | 146 # no information - no access |
147 #TODO: check | |
32 | 148 return True |
149 elif access == 'free': | |
150 return True | |
35 | 151 elif access in self.authgroups: |
152 # only local access -- only logged in users | |
153 user = getSecurityManager().getUser() | |
154 if user is not None: | |
155 #print "user: ", user | |
156 return (user.getUserName() != "Anonymous User") | |
157 else: | |
158 return False | |
32 | 159 |
35 | 160 zLOG.LOG("documentViewer (accessOK)", zLOG.INFO, "unknown access type %s"%access) |
32 | 161 return False |
35 | 162 |
32 | 163 |
31 | 164 def getDirinfoFromDigilib(self,path,docinfo=None): |
29 | 165 """gibt param von dlInfo aus""" |
31 | 166 if docinfo is None: |
167 docinfo = {} | |
168 | |
29 | 169 imageUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path |
170 | |
31 | 171 zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo from %s"%(imageUrl)) |
29 | 172 |
35 | 173 for cnt in (1,2,3): |
174 try: | |
175 dom = NonvalidatingReader.parseUri(imageUrl) | |
176 break | |
177 except: | |
178 zLOG.LOG("documentViewer (getdirinfofromdigilib)", zLOG.ERROR, "error reading %s (try %d)"%(imageUrl,cnt)) | |
179 else: | |
37 | 180 raise IOError("Unable to get dir-info from %s"%(imageUrl)) |
29 | 181 |
37 | 182 sizes=dom.xpath("//dir/size") |
183 zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:size"%sizes) | |
29 | 184 |
37 | 185 if sizes: |
186 docinfo['numPages'] = int(getTextFromNode(sizes[0])) | |
31 | 187 else: |
188 docinfo['numPages'] = 0 | |
189 | |
190 return docinfo | |
32 | 191 |
29 | 192 |
35 | 193 def getIndexMeta(self, url): |
194 """returns dom of index.meta document at url""" | |
39
1dd90aabd366
added retry when reading index meta from texter applet
casties
parents:
38
diff
changeset
|
195 num_retries = 3 |
35 | 196 dom = None |
39
1dd90aabd366
added retry when reading index meta from texter applet
casties
parents:
38
diff
changeset
|
197 metaUrl = None |
35 | 198 if url.startswith("http://"): |
199 # real URL | |
39
1dd90aabd366
added retry when reading index meta from texter applet
casties
parents:
38
diff
changeset
|
200 metaUrl = url |
35 | 201 else: |
202 # online path | |
203 server=self.digilibBaseUrl+"/servlet/Texter?fn=" | |
204 metaUrl=server+url | |
205 if not metaUrl.endswith("index.meta"): | |
206 metaUrl += "/index.meta" | |
39
1dd90aabd366
added retry when reading index meta from texter applet
casties
parents:
38
diff
changeset
|
207 |
1dd90aabd366
added retry when reading index meta from texter applet
casties
parents:
38
diff
changeset
|
208 for n in range(num_retries): |
35 | 209 try: |
39
1dd90aabd366
added retry when reading index meta from texter applet
casties
parents:
38
diff
changeset
|
210 # patch dirk encoding fehler treten dann nicht mehr auf |
38 | 211 # dom = NonvalidatingReader.parseUri(metaUrl) |
39
1dd90aabd366
added retry when reading index meta from texter applet
casties
parents:
38
diff
changeset
|
212 txt=urllib.urlopen(metaUrl).read() |
1dd90aabd366
added retry when reading index meta from texter applet
casties
parents:
38
diff
changeset
|
213 dom = Parse(txt) |
35 | 214 except: |
39
1dd90aabd366
added retry when reading index meta from texter applet
casties
parents:
38
diff
changeset
|
215 zLOG.LOG("ERROR documentViewer (getIndexMata)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) |
1dd90aabd366
added retry when reading index meta from texter applet
casties
parents:
38
diff
changeset
|
216 |
1dd90aabd366
added retry when reading index meta from texter applet
casties
parents:
38
diff
changeset
|
217 if dom is None: |
1dd90aabd366
added retry when reading index meta from texter applet
casties
parents:
38
diff
changeset
|
218 raise IOError("Unable to read index meta from %s"%(url)) |
35 | 219 |
220 return dom | |
221 | |
222 | |
32 | 223 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None): |
35 | 224 """gets authorization info from the index.meta file at path or given by dom""" |
37 | 225 zLOG.LOG("documentViewer (getauthinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) |
32 | 226 |
227 access = None | |
228 | |
229 if docinfo is None: | |
230 docinfo = {} | |
231 | |
232 if dom is None: | |
35 | 233 dom = self.getIndexMeta(getParentDir(path)) |
32 | 234 |
235 acctype = dom.xpath("//access-conditions/access/@type") | |
236 if acctype and (len(acctype)>0): | |
237 access=acctype[0].value | |
35 | 238 if access in ['group', 'institution']: |
32 | 239 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() |
240 | |
241 docinfo['accessType'] = access | |
242 return docinfo | |
29 | 243 |
32 | 244 |
22 | 245 def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None): |
35 | 246 """gets bibliographical info from the index.meta file at path or given by dom""" |
22 | 247 zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path)) |
20 | 248 |
22 | 249 if docinfo is None: |
250 docinfo = {} | |
251 | |
252 if dom is None: | |
35 | 253 dom = self.getIndexMeta(getParentDir(path)) |
254 | |
25 | 255 metaData=self.metadata.main.meta.bib |
256 bibtype=dom.xpath("//bib/@type") | |
257 if bibtype and (len(bibtype)>0): | |
258 bibtype=bibtype[0].value | |
20 | 259 else: |
25 | 260 bibtype="generic" |
261 bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) | |
262 bibmap=metaData.generateMappingForType(bibtype) | |
35 | 263 #print "bibmap: ", bibmap, " for: ", bibtype |
32 | 264 # if there is no mapping bibmap is empty (mapping sometimes has empty fields) |
31 | 265 if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: |
25 | 266 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) |
267 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) | |
268 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) | |
22 | 269 |
270 return docinfo | |
271 | |
272 | |
32 | 273 def getDocinfoFromTextTool(self,url,dom=None,docinfo=None): |
22 | 274 """parse texttool tag in index meta""" |
275 zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url)) | |
276 if docinfo is None: | |
277 docinfo = {} | |
278 | |
32 | 279 if dom is None: |
35 | 280 dom = self.getIndexMeta(url) |
32 | 281 |
282 archiveNames=dom.xpath("//resource/name") | |
283 if archiveNames and (len(archiveNames)>0): | |
284 archiveName=getTextFromNode(archiveNames[0]) | |
22 | 285 |
286 archivePaths=dom.xpath("//resource/archive-path") | |
287 if archivePaths and (len(archivePaths)>0): | |
288 archivePath=getTextFromNode(archivePaths[0]) | |
32 | 289 # clean up archive path |
290 if archivePath[0] != '/': | |
291 archivePath = '/' + archivePath | |
292 if not archivePath.endswith(archiveName): | |
293 archivePath += "/" + archiveName | |
22 | 294 else: |
295 archivePath=None | |
296 | |
35 | 297 imageDirs=dom.xpath("//texttool/image") |
298 if imageDirs and (len(imageDirs)>0): | |
299 imageDir=getTextFromNode(imageDirs[0]) | |
22 | 300 else: |
37 | 301 # we balk with no image tag |
302 raise IOError("No text-tool info in %s"%(url)) | |
22 | 303 |
35 | 304 if imageDir and archivePath: |
305 #print "image: ", imageDir, " archivepath: ", archivePath | |
306 imageDir=os.path.join(archivePath,imageDir) | |
307 imageDir=imageDir.replace("/mpiwg/online",'') | |
308 docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo) | |
309 docinfo['imagePath'] = imageDir | |
310 docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir | |
22 | 311 |
312 viewerUrls=dom.xpath("//texttool/digiliburlprefix") | |
313 if viewerUrls and (len(viewerUrls)>0): | |
314 viewerUrl=getTextFromNode(viewerUrls[0]) | |
31 | 315 docinfo['viewerURL'] = viewerUrl |
22 | 316 |
317 textUrls=dom.xpath("//texttool/text") | |
318 if textUrls and (len(textUrls)>0): | |
319 textUrl=getTextFromNode(textUrls[0]) | |
31 | 320 docinfo['textURL'] = textUrl |
22 | 321 |
322 docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) | |
32 | 323 docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) |
22 | 324 return docinfo |
325 | |
326 | |
327 def getDocinfoFromImagePath(self,path,docinfo=None): | |
328 """path ist the path to the images it assumes that the index.meta file is one level higher.""" | |
329 zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path)) | |
330 if docinfo is None: | |
331 docinfo = {} | |
29 | 332 path=path.replace("/mpiwg/online","") |
22 | 333 docinfo['imagePath'] = path |
31 | 334 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo) |
335 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path | |
22 | 336 docinfo['imageURL'] = imageUrl |
337 | |
338 docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo) | |
32 | 339 docinfo = self.getAuthinfoFromIndexMeta(path,docinfo=docinfo) |
22 | 340 return docinfo |
20 | 341 |
22 | 342 |
343 def getDocinfo(self, mode, url): | |
344 """returns docinfo depending on mode""" | |
345 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url)) | |
346 # look for cached docinfo in session | |
347 if self.REQUEST.SESSION.has_key('docinfo'): | |
348 docinfo = self.REQUEST.SESSION['docinfo'] | |
349 # check if its still current | |
350 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: | |
351 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo) | |
352 return docinfo | |
353 # new docinfo | |
354 docinfo = {'mode': mode, 'url': url} | |
355 if mode=="texttool": #index.meta with texttool information | |
356 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) | |
357 elif mode=="imagepath": | |
358 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) | |
359 else: | |
360 zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!") | |
37 | 361 raise ValueError("Unknown mode %s"%(mode)) |
362 | |
22 | 363 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo) |
364 self.REQUEST.SESSION['docinfo'] = docinfo | |
365 return docinfo | |
20 | 366 |
367 | |
25 | 368 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None): |
22 | 369 """returns pageinfo with the given parameters""" |
370 pageinfo = {} | |
25 | 371 current = getInt(current) |
372 pageinfo['current'] = current | |
373 rows = int(rows or self.thumbrows) | |
374 pageinfo['rows'] = rows | |
375 cols = int(cols or self.thumbcols) | |
376 pageinfo['cols'] = cols | |
377 grpsize = cols * rows | |
378 pageinfo['groupsize'] = grpsize | |
379 start = getInt(start, default=(int(current / grpsize) * grpsize +1)) | |
22 | 380 pageinfo['start'] = start |
25 | 381 pageinfo['end'] = start + grpsize |
382 if docinfo is not None: | |
383 np = int(docinfo['numPages']) | |
384 pageinfo['end'] = min(pageinfo['end'], np) | |
385 pageinfo['numgroups'] = int(np / grpsize) | |
386 if np % grpsize > 0: | |
387 pageinfo['numgroups'] += 1 | |
388 | |
22 | 389 return pageinfo |
390 | |
0 | 391 def text(self,mode,url,pn): |
392 """give text""" | |
393 if mode=="texttool": #index.meta with texttool information | |
394 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) | |
395 | |
35 | 396 #print textpath |
0 | 397 try: |
398 dom = NonvalidatingReader.parseUri(textpath) | |
399 except: | |
400 return None | |
401 | |
402 list=[] | |
403 nodes=dom.xpath("//pb") | |
404 | |
405 node=nodes[int(pn)-1] | |
406 | |
407 p=node | |
408 | |
409 while p.tagName!="p": | |
410 p=p.parentNode | |
411 | |
412 | |
413 endNode=nodes[int(pn)] | |
414 | |
415 | |
416 e=endNode | |
417 | |
418 while e.tagName!="p": | |
419 e=e.parentNode | |
420 | |
421 | |
422 next=node.parentNode | |
423 | |
424 #sammle s | |
425 while next and (next!=endNode.parentNode): | |
426 list.append(next) | |
427 next=next.nextSibling | |
428 list.append(endNode.parentNode) | |
429 | |
430 if p==e:# beide im selben paragraphen | |
20 | 431 pass |
432 # else: | |
433 # next=p | |
434 # while next!=e: | |
435 # print next,e | |
436 # list.append(next) | |
437 # next=next.nextSibling | |
438 # | |
439 # for x in list: | |
440 # PrettyPrint(x) | |
441 # | |
442 # return list | |
22 | 443 # |
444 | |
445 def findDigilibUrl(self): | |
446 """try to get the digilib URL from zogilib""" | |
447 url = self.imageViewerUrl[:-1] + "/getScalerUrl" | |
448 try: | |
449 scaler = urlopen(url).read() | |
450 return scaler.replace("/servlet/Scaler?", "") | |
451 except: | |
452 return None | |
453 | |
32 | 454 def changeDocumentViewer(self,imageViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): |
22 | 455 """init document viewer""" |
456 self.title=title | |
457 self.imageViewerUrl=imageViewerUrl | |
458 self.digilibBaseUrl = digilibBaseUrl | |
25 | 459 self.thumbrows = thumbrows |
460 self.thumbcols = thumbcols | |
32 | 461 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] |
22 | 462 if RESPONSE is not None: |
463 RESPONSE.redirect('manage_main') | |
0 | 464 |
465 | |
466 | |
467 | |
468 # security.declareProtected('View management screens','renameImageForm') | |
469 | |
470 def manage_AddDocumentViewerForm(self): | |
471 """add the viewer form""" | |
22 | 472 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) |
0 | 473 return pt() |
474 | |
475 def manage_AddDocumentViewer(self,id,imageViewerUrl="",title="",RESPONSE=None): | |
476 """add the viewer""" | |
477 newObj=documentViewer(id,imageViewerUrl,title) | |
478 self._setObject(id,newObj) | |
479 | |
480 if RESPONSE is not None: | |
481 RESPONSE.redirect('manage_main') | |
22 | 482 |
483 | |
484 ## | |
485 ## DocumentViewerTemplate class | |
486 ## | |
487 class DocumentViewerTemplate(ZopePageTemplate): | |
488 """Template for document viewer""" | |
489 meta_type="DocumentViewer Template" | |
490 | |
491 | |
492 def manage_addDocumentViewerTemplateForm(self): | |
493 """Form for adding""" | |
494 pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) | |
495 return pt() | |
496 | |
497 def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, | |
498 REQUEST=None, submit=None): | |
499 "Add a Page Template with optional file content." | |
500 | |
501 self._setObject(id, DocumentViewerTemplate(id)) | |
502 ob = getattr(self, id) | |
503 ob.pt_edit(open(os.path.join(package_home(globals()),'zpt/viewer_main.zpt')).read(),None) | |
504 if title: | |
505 ob.pt_setTitle(title) | |
506 try: | |
507 u = self.DestinationURL() | |
508 except AttributeError: | |
509 u = REQUEST['URL1'] | |
510 | |
511 u = "%s/%s" % (u, urllib.quote(id)) | |
512 REQUEST.RESPONSE.redirect(u+'/manage_main') | |
513 return '' | |
514 | |
515 | |
0 | 516 |