0
|
1
|
|
2 genericDigilib="http://nausikaa2.rz-berlin.mpg.de/digitallibrary/"
|
|
3
|
|
4 from OFS.Folder import Folder
|
|
5 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
|
|
6 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
|
|
7 from AccessControl import ClassSecurityInfo
|
|
8 from Globals import package_home
|
|
9
|
|
10 from Ft.Xml.Domlette import NonvalidatingReader
|
|
11 from Ft.Xml.Domlette import PrettyPrint, Print
|
|
12 from Ft.Xml import EMPTY_NAMESPACE
|
|
13
|
|
14 import Ft.Xml.XPath
|
|
15
|
|
16 import os.path
|
|
17 import cgi
|
|
18 import urllib
|
22
|
19 import zLOG
|
0
|
20
|
25
|
21 def getInt(number, default=0):
|
|
22 """returns always an int (0 in case of problems)"""
|
|
23 try:
|
|
24 return int(number)
|
|
25 except:
|
|
26 return default
|
|
27
|
|
28
|
0
|
29 def getTextFromNode(nodename):
|
|
30 nodelist=nodename.childNodes
|
|
31 rc = ""
|
|
32 for node in nodelist:
|
|
33 if node.nodeType == node.TEXT_NODE:
|
|
34 rc = rc + node.data
|
|
35 return rc
|
|
36
|
|
37 import socket
|
|
38
|
|
39 def urlopen(url):
|
|
40 """urlopen mit timeout"""
|
|
41 socket.setdefaulttimeout(2)
|
|
42 ret=urllib.urlopen(url)
|
|
43 socket.setdefaulttimeout(5)
|
|
44 return ret
|
|
45
|
|
46 def getParamFromDigilib(path,param):
|
|
47 """gibt param von dlInfo aus"""
|
22
|
48 imageUrl=genericDigilib+"/dirInfo-xml.jsp?mo=dir&fn="+path
|
0
|
49
|
22
|
50 zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo (%s) from %s"%(param,imageUrl))
|
|
51
|
0
|
52 try:
|
|
53 dom = NonvalidatingReader.parseUri(imageUrl)
|
|
54 except:
|
|
55 return None
|
|
56
|
22
|
57 params=dom.xpath("//dir/%s"%param)
|
|
58 zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:%s"%params)
|
0
|
59
|
|
60 if params:
|
22
|
61 return getTextFromNode(params[0])
|
|
62
|
0
|
63
|
|
64
|
|
65
|
22
|
66 ##
|
|
67 ## documentViewer class
|
|
68 ##
|
|
69 class documentViewer(Folder):
|
0
|
70 """document viewer"""
|
|
71
|
|
72 meta_type="Document viewer"
|
|
73
|
|
74 security=ClassSecurityInfo()
|
22
|
75 manage_options=Folder.manage_options+(
|
0
|
76 {'label':'main config','action':'changeDocumentViewerForm'},
|
|
77 )
|
|
78
|
22
|
79 # templates and forms
|
|
80 viewer_main = PageTemplateFile('zpt/viewer_main', globals())
|
|
81 thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())
|
|
82 image_main = PageTemplateFile('zpt/image_main', globals())
|
|
83 head_main = PageTemplateFile('zpt/head_main', globals())
|
|
84 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
|
|
85
|
|
86 security.declareProtected('View management screens','changeDocumentViewerForm')
|
|
87 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
|
|
88
|
0
|
89
|
25
|
90 def __init__(self,id,imageViewerUrl,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10):
|
0
|
91 """init document viewer"""
|
|
92 self.id=id
|
|
93 self.title=title
|
|
94 self.imageViewerUrl=imageViewerUrl
|
25
|
95 if not digilibBaseUrl:
|
22
|
96 self.digilibBaseUrl = self.findDigilibUrl()
|
25
|
97 else:
|
|
98 self.digilibBaseUrl = digilibBaseUrl
|
|
99 self.thumbcols = thumbcols
|
|
100 self.thumbrows = thumbrows
|
22
|
101 # add template folder so we can always use template.something
|
|
102 self.manage_addFolder('template')
|
|
103
|
|
104
|
|
105 security.declareProtected('View','index_html')
|
25
|
106 def index_html(self,mode,url,start=None,pn=1):
|
22
|
107 '''
|
|
108 view it
|
|
109 @param mode: defines which type of document is behind url
|
|
110 @param url: url which contains display information
|
|
111 '''
|
0
|
112
|
22
|
113 zLOG.LOG("documentViewer (index)", zLOG.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
|
|
114 print "dlbaseurl:", self.digilibBaseUrl
|
|
115
|
|
116 if not hasattr(self, 'template'):
|
|
117 # create template folder if it doesn't exist
|
|
118 print "no template folder -- creating"
|
|
119 self.manage_addFolder('template')
|
|
120
|
|
121 if not self.digilibBaseUrl:
|
|
122 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
|
|
123
|
|
124 print "dlbaseurl:", self.digilibBaseUrl
|
|
125
|
25
|
126 docinfo = self.getDocinfo(mode=mode,url=url)
|
|
127 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
|
22
|
128 pt = getattr(self.template, 'viewer_main')
|
|
129 return pt(docinfo=docinfo,pageinfo=pageinfo)
|
0
|
130
|
|
131
|
25
|
132 def getLink(self,param=None,val=None):
|
|
133 """link to documentviewer with parameter param set to val"""
|
|
134 params=cgi.parse_qs(self.REQUEST['QUERY_STRING'])
|
|
135 if param is not None:
|
|
136 if val is None and params.has_key(param):
|
|
137 del params[param]
|
|
138 else:
|
|
139 params[param] = val
|
|
140
|
|
141 url=self.REQUEST['URL']+"?"+urllib.urlencode(params, doseq=True)
|
|
142 return url
|
|
143
|
0
|
144
|
22
|
145 def getStyle(self, idx, selected, style=""):
|
25
|
146 """returns a string with the given style and append 'sel' if path == selected."""
|
22
|
147 #zLOG.LOG("documentViewer (getstyle)", zLOG.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
|
|
148 if idx == selected:
|
|
149 return style + 'sel'
|
|
150 else:
|
|
151 return style
|
0
|
152
|
|
153
|
22
|
154 def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None):
|
|
155 """gets bibliographical info from the index.meta file at url or given by dom"""
|
|
156 zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))
|
20
|
157
|
22
|
158 if docinfo is None:
|
|
159 docinfo = {}
|
|
160
|
|
161 if dom is None:
|
|
162 server="http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn="
|
|
163 path="/".join(path.split("/")[0:-1])
|
|
164 metaUrl=server+path+"/index.meta"
|
|
165 try:
|
|
166 dom = NonvalidatingReader.parseUri(metaUrl)
|
|
167 except:
|
|
168 return docinfo
|
20
|
169
|
25
|
170 metaData=self.metadata.main.meta.bib
|
|
171 bibtype=dom.xpath("//bib/@type")
|
|
172 if bibtype and (len(bibtype)>0):
|
|
173 bibtype=bibtype[0].value
|
20
|
174 else:
|
25
|
175 bibtype="generic"
|
|
176 bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
|
|
177 bibmap=metaData.generateMappingForType(bibtype)
|
|
178 if len(bibmap) > 0:
|
|
179 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
|
|
180 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
|
|
181 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
|
22
|
182
|
|
183 return docinfo
|
|
184
|
|
185
|
|
186 def getDocinfoFromTextTool(self,url,docinfo=None):
|
|
187 """parse texttool tag in index meta"""
|
|
188 zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url))
|
|
189 if docinfo is None:
|
|
190 docinfo = {}
|
|
191
|
|
192 try:
|
|
193 dom = NonvalidatingReader.parseUri(url)
|
|
194 except:
|
|
195 zLOG.LOG("documentViewer (parseUrlTexttool)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])
|
|
196 return docinfo
|
|
197
|
|
198 archivePaths=dom.xpath("//resource/archive-path")
|
|
199
|
|
200 if archivePaths and (len(archivePaths)>0):
|
|
201 archivePath=getTextFromNode(archivePaths[0])
|
|
202 else:
|
|
203 archivePath=None
|
|
204
|
|
205 images=dom.xpath("//texttool/image")
|
|
206
|
|
207 if images and (len(images)>0):
|
|
208 image=getTextFromNode(images[0])
|
|
209 else:
|
|
210 image=None
|
|
211
|
|
212 if image and archivePath:
|
|
213 image=os.path.join(archivePath,image)
|
|
214 image=image.replace("/mpiwg/online",'')
|
|
215 pt=getParamFromDigilib(image,'size')
|
|
216 docinfo['imagePath'] = image
|
25
|
217 docinfo['numPages'] = pt
|
22
|
218
|
|
219 viewerUrls=dom.xpath("//texttool/digiliburlprefix")
|
|
220
|
|
221 if viewerUrls and (len(viewerUrls)>0):
|
|
222 viewerUrl=getTextFromNode(viewerUrls[0])
|
|
223 docinfo['imageURL'] = viewerURL
|
|
224
|
|
225 textUrls=dom.xpath("//texttool/text")
|
|
226
|
|
227 if textUrls and (len(textUrls)>0):
|
|
228 textUrl=getTextFromNode(textUrls[0])
|
|
229 docinfo['textURL'] = textURL
|
|
230
|
|
231 docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)
|
|
232 return docinfo
|
|
233
|
|
234
|
|
235 def getDocinfoFromImagePath(self,path,docinfo=None):
|
|
236 """path ist the path to the images it assumes that the index.meta file is one level higher."""
|
|
237 zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path))
|
|
238 if docinfo is None:
|
|
239 docinfo = {}
|
|
240 docinfo['imagePath'] = path
|
|
241 path=path.replace("/mpiwg/online","")
|
|
242 pt=getParamFromDigilib(path,'size')
|
25
|
243 docinfo['numPages'] = pt
|
22
|
244 imageUrl=genericDigilib+"/servlet/Scaler?fn=%s"%path
|
|
245 docinfo['imageURL'] = imageUrl
|
|
246
|
|
247 docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo)
|
|
248 return docinfo
|
20
|
249
|
22
|
250
|
|
251 def getDocinfo(self, mode, url):
|
|
252 """returns docinfo depending on mode"""
|
|
253 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url))
|
|
254 # look for cached docinfo in session
|
|
255 if self.REQUEST.SESSION.has_key('docinfo'):
|
|
256 docinfo = self.REQUEST.SESSION['docinfo']
|
|
257 # check if its still current
|
|
258 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
|
|
259 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo)
|
|
260 return docinfo
|
|
261 # new docinfo
|
|
262 docinfo = {'mode': mode, 'url': url}
|
|
263 if mode=="texttool": #index.meta with texttool information
|
|
264 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
|
|
265 elif mode=="imagepath":
|
|
266 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
|
|
267 else:
|
|
268 zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!")
|
|
269 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo)
|
|
270 self.REQUEST.SESSION['docinfo'] = docinfo
|
|
271 return docinfo
|
20
|
272
|
|
273
|
25
|
274 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):
|
22
|
275 """returns pageinfo with the given parameters"""
|
|
276 pageinfo = {}
|
25
|
277 current = getInt(current)
|
|
278 pageinfo['current'] = current
|
|
279 rows = int(rows or self.thumbrows)
|
|
280 pageinfo['rows'] = rows
|
|
281 cols = int(cols or self.thumbcols)
|
|
282 pageinfo['cols'] = cols
|
|
283 grpsize = cols * rows
|
|
284 pageinfo['groupsize'] = grpsize
|
|
285 start = getInt(start, default=(int(current / grpsize) * grpsize +1))
|
|
286 print "start3:", start
|
22
|
287 pageinfo['start'] = start
|
25
|
288 pageinfo['end'] = start + grpsize
|
|
289 if docinfo is not None:
|
|
290 np = int(docinfo['numPages'])
|
|
291 pageinfo['end'] = min(pageinfo['end'], np)
|
|
292 pageinfo['numgroups'] = int(np / grpsize)
|
|
293 if np % grpsize > 0:
|
|
294 pageinfo['numgroups'] += 1
|
|
295
|
22
|
296 return pageinfo
|
|
297
|
0
|
298 def text(self,mode,url,pn):
|
|
299 """give text"""
|
|
300 if mode=="texttool": #index.meta with texttool information
|
|
301 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
|
|
302
|
|
303 print textpath
|
|
304 try:
|
|
305 dom = NonvalidatingReader.parseUri(textpath)
|
|
306 except:
|
|
307 return None
|
|
308
|
|
309 list=[]
|
|
310 nodes=dom.xpath("//pb")
|
|
311
|
|
312 node=nodes[int(pn)-1]
|
|
313
|
|
314 p=node
|
|
315
|
|
316 while p.tagName!="p":
|
|
317 p=p.parentNode
|
|
318
|
|
319
|
|
320 endNode=nodes[int(pn)]
|
|
321
|
|
322
|
|
323 e=endNode
|
|
324
|
|
325 while e.tagName!="p":
|
|
326 e=e.parentNode
|
|
327
|
|
328
|
|
329 next=node.parentNode
|
|
330
|
|
331 #sammle s
|
|
332 while next and (next!=endNode.parentNode):
|
|
333 list.append(next)
|
|
334 next=next.nextSibling
|
|
335 list.append(endNode.parentNode)
|
|
336
|
|
337 if p==e:# beide im selben paragraphen
|
20
|
338 pass
|
|
339 # else:
|
|
340 # next=p
|
|
341 # while next!=e:
|
|
342 # print next,e
|
|
343 # list.append(next)
|
|
344 # next=next.nextSibling
|
|
345 #
|
|
346 # for x in list:
|
|
347 # PrettyPrint(x)
|
|
348 #
|
|
349 # return list
|
22
|
350 #
|
|
351
|
|
352 def findDigilibUrl(self):
|
|
353 """try to get the digilib URL from zogilib"""
|
|
354 url = self.imageViewerUrl[:-1] + "/getScalerUrl"
|
|
355 try:
|
|
356 scaler = urlopen(url).read()
|
|
357 return scaler.replace("/servlet/Scaler?", "")
|
|
358 except:
|
|
359 return None
|
|
360
|
25
|
361 def changeDocumentViewer(self,imageViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,RESPONSE=None):
|
22
|
362 """init document viewer"""
|
|
363 self.title=title
|
|
364 self.imageViewerUrl=imageViewerUrl
|
|
365 self.digilibBaseUrl = digilibBaseUrl
|
25
|
366 self.thumbrows = thumbrows
|
|
367 self.thumbcols = thumbcols
|
0
|
368
|
22
|
369 if RESPONSE is not None:
|
|
370 RESPONSE.redirect('manage_main')
|
0
|
371
|
|
372
|
|
373
|
|
374
|
|
375 # security.declareProtected('View management screens','renameImageForm')
|
|
376
|
|
377 def manage_AddDocumentViewerForm(self):
|
|
378 """add the viewer form"""
|
22
|
379 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
|
0
|
380 return pt()
|
|
381
|
|
382 def manage_AddDocumentViewer(self,id,imageViewerUrl="",title="",RESPONSE=None):
|
|
383 """add the viewer"""
|
|
384 newObj=documentViewer(id,imageViewerUrl,title)
|
|
385 self._setObject(id,newObj)
|
|
386
|
|
387 if RESPONSE is not None:
|
|
388 RESPONSE.redirect('manage_main')
|
22
|
389
|
|
390
|
|
391 ##
|
|
392 ## DocumentViewerTemplate class
|
|
393 ##
|
|
394 class DocumentViewerTemplate(ZopePageTemplate):
|
|
395 """Template for document viewer"""
|
|
396 meta_type="DocumentViewer Template"
|
|
397
|
|
398
|
|
399 def manage_addDocumentViewerTemplateForm(self):
|
|
400 """Form for adding"""
|
|
401 pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
|
|
402 return pt()
|
|
403
|
|
404 def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
|
|
405 REQUEST=None, submit=None):
|
|
406 "Add a Page Template with optional file content."
|
|
407
|
|
408 self._setObject(id, DocumentViewerTemplate(id))
|
|
409 ob = getattr(self, id)
|
|
410 ob.pt_edit(open(os.path.join(package_home(globals()),'zpt/viewer_main.zpt')).read(),None)
|
|
411 if title:
|
|
412 ob.pt_setTitle(title)
|
|
413 try:
|
|
414 u = self.DestinationURL()
|
|
415 except AttributeError:
|
|
416 u = REQUEST['URL1']
|
|
417
|
|
418 u = "%s/%s" % (u, urllib.quote(id))
|
|
419 REQUEST.RESPONSE.redirect(u+'/manage_main')
|
|
420 return ''
|
|
421
|
|
422
|
0
|
423 |