comparison documentViewer_old.py @ 453:beb7ccb92564 elementtree

first version using elementtree instead of 4suite xml
author casties
date Thu, 14 Jul 2011 19:43:56 +0200
parents
children 0a53fea83df7
comparison
equal deleted inserted replaced
408:4e84c53e49b3 453:beb7ccb92564
1
2 from OFS.Folder import Folder
3 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5 from AccessControl import ClassSecurityInfo
6 from AccessControl import getSecurityManager
7 from Globals import package_home
8 from Products.zogiLib.zogiLib import browserCheck
9
10 from Ft.Xml import EMPTY_NAMESPACE, Parse
11 import Ft.Xml.Domlette
12
13 import xml.etree.ElementTree as ET
14
15 import os.path
16 import sys
17 import urllib
18 import urllib2
19 import logging
20 import math
21 import urlparse
22 import cStringIO
23 import re
24 import string
25
26 def logger(txt,method,txt2):
27 """logging"""
28 logging.info(txt+ txt2)
29
30
31 def getInt(number, default=0):
32 """returns always an int (0 in case of problems)"""
33 try:
34 return int(number)
35 except:
36 return int(default)
37
38 def getTextFromNode(node):
39 """get the cdata content of a node"""
40 if node is None:
41 return ""
42 # ET:
43 text = node.text or ""
44 for e in node:
45 text += gettext(e)
46 if e.tail:
47 text += e.tail
48
49 # 4Suite:
50 #nodelist=node.childNodes
51 #text = ""
52 #for n in nodelist:
53 # if n.nodeType == node.TEXT_NODE:
54 # text = text + n.data
55
56 return text
57
58 def serializeNode(node, encoding="utf-8"):
59 """returns a string containing node as XML"""
60 s = ET.tostring(node)
61
62 # 4Suite:
63 # stream = cStringIO.StringIO()
64 # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
65 # s = stream.getvalue()
66 # stream.close()
67 return s
68
69 def browserCheck(self):
70 """check the browsers request to find out the browser type"""
71 bt = {}
72 ua = self.REQUEST.get_header("HTTP_USER_AGENT")
73 bt['ua'] = ua
74 bt['isIE'] = False
75 bt['isN4'] = False
76 bt['versFirefox']=""
77 bt['versIE']=""
78 bt['versSafariChrome']=""
79 bt['versOpera']=""
80
81 if string.find(ua, 'MSIE') > -1:
82 bt['isIE'] = True
83 else:
84 bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
85 # Safari oder Chrome identification
86 try:
87 nav = ua[string.find(ua, '('):]
88 nav1=ua[string.find(ua,')'):]
89 nav2=nav1[string.find(nav1,'('):]
90 nav3=nav2[string.find(nav2,')'):]
91 ie = string.split(nav, "; ")[1]
92 ie1 =string.split(nav1, " ")[2]
93 ie2 =string.split(nav3, " ")[1]
94 ie3 =string.split(nav3, " ")[2]
95 if string.find(ie3, "Safari") >-1:
96 bt['versSafariChrome']=string.split(ie2, "/")[1]
97 except: pass
98 # IE identification
99 try:
100 nav = ua[string.find(ua, '('):]
101 ie = string.split(nav, "; ")[1]
102 if string.find(ie, "MSIE") > -1:
103 bt['versIE'] = string.split(ie, " ")[1]
104 except:pass
105 # Firefox identification
106 try:
107 nav = ua[string.find(ua, '('):]
108 nav1=ua[string.find(ua,')'):]
109 if string.find(ie1, "Firefox") >-1:
110 nav5= string.split(ie1, "/")[1]
111 logging.debug("FIREFOX: %s"%(nav5))
112 bt['versFirefox']=nav5[0:3]
113 except:pass
114 #Opera identification
115 try:
116 if string.find(ua,"Opera") >-1:
117 nav = ua[string.find(ua, '('):]
118 nav1=nav[string.find(nav,')'):]
119 bt['versOpera']=string.split(nav1,"/")[2]
120 except:pass
121
122 bt['isMac'] = string.find(ua, 'Macintosh') > -1
123 bt['isWin'] = string.find(ua, 'Windows') > -1
124 bt['isIEWin'] = bt['isIE'] and bt['isWin']
125 bt['isIEMac'] = bt['isIE'] and bt['isMac']
126 bt['staticHTML'] = False
127
128 return bt
129
130
131 def getParentDir(path):
132 """returns pathname shortened by one"""
133 return '/'.join(path.split('/')[0:-1])
134
135
136 def getHttpData(url, data=None, num_tries=3, timeout=10):
137 """returns result from url+data HTTP request"""
138 # we do GET (by appending data to url)
139 if isinstance(data, str) or isinstance(data, unicode):
140 # if data is string then append
141 url = "%s?%s"%(url,data)
142 elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
143 # urlencode
144 url = "%s?%s"%(url,urllib.urlencode(data))
145
146 response = None
147 errmsg = None
148 for cnt in range(num_tries):
149 try:
150 logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
151 if sys.version_info < (2, 6):
152 # set timeout on socket -- ugly :-(
153 import socket
154 socket.setdefaulttimeout(float(timeout))
155 response = urllib2.urlopen(url)
156 else:
157 response = urllib2.urlopen(url,timeout=float(timeout))
158 # check result?
159 break
160 except urllib2.HTTPError, e:
161 logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
162 errmsg = str(e)
163 # stop trying
164 break
165 except urllib2.URLError, e:
166 logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
167 errmsg = str(e)
168 # stop trying
169 #break
170
171 if response is not None:
172 data = response.read()
173 response.close()
174 return data
175
176 raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
177 #return None
178
179 ##
180 ## documentViewer class
181 ##
182 class documentViewer(Folder):
183 """document viewer"""
184 meta_type="Document viewer"
185
186 security=ClassSecurityInfo()
187 manage_options=Folder.manage_options+(
188 {'label':'main config','action':'changeDocumentViewerForm'},
189 )
190
191 # templates and forms
192 viewer_main = PageTemplateFile('zpt/viewer_main', globals())
193 toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
194 toc_text = PageTemplateFile('zpt/toc_text', globals())
195 toc_figures = PageTemplateFile('zpt/toc_figures', globals())
196 page_main_images = PageTemplateFile('zpt/page_main_images', globals())
197 page_main_double = PageTemplateFile('zpt/page_main_double', globals())
198 page_main_text = PageTemplateFile('zpt/page_main_text', globals())
199 page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
200 page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
201 page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
202 page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
203 head_main = PageTemplateFile('zpt/head_main', globals())
204 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
205 info_xml = PageTemplateFile('zpt/info_xml', globals())
206
207
208 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
209 security.declareProtected('View management screens','changeDocumentViewerForm')
210 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
211
212
213 def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
214 """init document viewer"""
215 self.id=id
216 self.title=title
217 self.thumbcols = thumbcols
218 self.thumbrows = thumbrows
219 # authgroups is list of authorized groups (delimited by ,)
220 self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
221 # create template folder so we can always use template.something
222
223 templateFolder = Folder('template')
224 #self['template'] = templateFolder # Zope-2.12 style
225 self._setObject('template',templateFolder) # old style
226 try:
227 import MpdlXmlTextServer
228 textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
229 #templateFolder['fulltextclient'] = xmlRpcClient
230 templateFolder._setObject('fulltextclient',textServer)
231 except Exception, e:
232 logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
233 try:
234 from Products.zogiLib.zogiLib import zogiLib
235 zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
236 #templateFolder['zogilib'] = zogilib
237 templateFolder._setObject('zogilib',zogilib)
238 except Exception, e:
239 logging.error("Unable to create zogiLib for zogilib: "+str(e))
240
241
242 # proxy text server methods to fulltextclient
243 def getTextPage(self, **args):
244 """get page"""
245 return self.template.fulltextclient.getTextPage(**args)
246
247 def getOrigPages(self, **args):
248 """get page"""
249 return self.template.fulltextclient.getOrigPages(**args)
250
251 def getOrigPagesNorm(self, **args):
252 """get page"""
253 return self.template.fulltextclient.getOrigPagesNorm(**args)
254
255 def getQuery(self, **args):
256 """get query in search"""
257 return self.template.fulltextclient.getQuery(**args)
258
259 def getSearch(self, **args):
260 """get search"""
261 return self.template.fulltextclient.getSearch(**args)
262
263 def getGisPlaces(self, **args):
264 """get gis places"""
265 return self.template.fulltextclient.getGisPlaces(**args)
266
267 def getAllGisPlaces(self, **args):
268 """get all gis places """
269 return self.template.fulltextclient.getAllGisPlaces(**args)
270
271 def getTranslate(self, **args):
272 """get translate"""
273 return self.template.fulltextclient.getTranslate(**args)
274
275 def getLemma(self, **args):
276 """get lemma"""
277 return self.template.fulltextclient.getLemma(**args)
278
279 def getLemmaQuery(self, **args):
280 """get query"""
281 return self.template.fulltextclient.getLemmaQuery(**args)
282
283 def getLex(self, **args):
284 """get lex"""
285 return self.template.fulltextclient.getLex(**args)
286
287 def getToc(self, **args):
288 """get toc"""
289 return self.template.fulltextclient.getToc(**args)
290
291 def getTocPage(self, **args):
292 """get tocpage"""
293 return self.template.fulltextclient.getTocPage(**args)
294
295
296 security.declareProtected('View','thumbs_rss')
297 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
298 '''
299 view it
300 @param mode: defines how to access the document behind url
301 @param url: url which contains display information
302 @param viewMode: if images display images, if text display text, default is images (text,images or auto)
303
304 '''
305 logging.debug("HHHHHHHHHHHHHH:load the rss")
306 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
307
308 if not hasattr(self, 'template'):
309 # create template folder if it doesn't exist
310 self.manage_addFolder('template')
311
312 if not self.digilibBaseUrl:
313 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
314
315 docinfo = self.getDocinfo(mode=mode,url=url)
316 #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
317 pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
318 ''' ZDES '''
319 pt = getattr(self.template, 'thumbs_main_rss')
320
321 if viewMode=="auto": # automodus gewaehlt
322 if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
323 viewMode="text"
324 else:
325 viewMode="images"
326
327 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
328
329 security.declareProtected('View','index_html')
330 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
331 '''
332 view it
333 @param mode: defines how to access the document behind url
334 @param url: url which contains display information
335 @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
336 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
337 @param characterNormalization type of text display (reg, norm, none)
338 @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
339 '''
340
341 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
342
343 if not hasattr(self, 'template'):
344 # this won't work
345 logging.error("template folder missing!")
346 return "ERROR: template folder missing!"
347
348 if not getattr(self, 'digilibBaseUrl', None):
349 self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
350
351 docinfo = self.getDocinfo(mode=mode,url=url)
352
353 if tocMode != "thumbs":
354 # get table of contents
355 docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
356
357 if viewMode=="auto": # automodus gewaehlt
358 if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
359 viewMode="text_dict"
360 else:
361 viewMode="images"
362
363 pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
364
365 if (docinfo.get('textURLPath',None)):
366 page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)
367 pageinfo['textPage'] = page
368 tt = getattr(self, 'template')
369 pt = getattr(tt, 'viewer_main')
370 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
371
372 def generateMarks(self,mk):
373 ret=""
374 if mk is None:
375 return ""
376 if not isinstance(mk, list):
377 mk=[mk]
378 for m in mk:
379 ret+="mk=%s"%m
380 return ret
381
382
383 def getBrowser(self):
384 """getBrowser the version of browser """
385 bt = browserCheck(self)
386 logging.debug("BROWSER VERSION: %s"%(bt))
387 return bt
388
389 def findDigilibUrl(self):
390 """try to get the digilib URL from zogilib"""
391 url = self.template.zogilib.getDLBaseUrl()
392 return url
393
394 def getDocumentViewerURL(self):
395 """returns the URL of this instance"""
396 return self.absolute_url()
397
398 def getStyle(self, idx, selected, style=""):
399 """returns a string with the given style and append 'sel' if path == selected."""
400 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
401 if idx == selected:
402 return style + 'sel'
403 else:
404 return style
405
406 def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
407 """returns URL to documentviewer with parameter param set to val or from dict params"""
408 # copy existing request params
409 urlParams=self.REQUEST.form.copy()
410 # change single param
411 if param is not None:
412 if val is None:
413 if urlParams.has_key(param):
414 del urlParams[param]
415 else:
416 urlParams[param] = str(val)
417
418 # change more params
419 if params is not None:
420 for k in params.keys():
421 v = params[k]
422 if v is None:
423 # val=None removes param
424 if urlParams.has_key(k):
425 del urlParams[k]
426
427 else:
428 urlParams[k] = v
429
430 # FIXME: does this belong here?
431 if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
432 urlParams["mode"] = "imagepath"
433 urlParams["url"] = getParentDir(urlParams["url"])
434
435 # quote values and assemble into query string (not escaping '/')
436 ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
437 #ps = urllib.urlencode(urlParams)
438 if baseUrl is None:
439 baseUrl = self.REQUEST['URL1']
440
441 url = "%s?%s"%(baseUrl, ps)
442 return url
443
444
445 def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
446 """link to documentviewer with parameter param set to val"""
447 return self.getLink(param, val, params, baseUrl, '&amp;')
448
449 def getInfo_xml(self,url,mode):
450 """returns info about the document as XML"""
451
452 if not self.digilibBaseUrl:
453 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
454
455 docinfo = self.getDocinfo(mode=mode,url=url)
456 pt = getattr(self.template, 'info_xml')
457 return pt(docinfo=docinfo)
458
459 def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
460 """returns new option state"""
461 if not self.REQUEST.SESSION.has_key(optionName):
462 # not in session -- initial
463 opt = {'lastState': newState, 'state': initialState}
464 else:
465 opt = self.REQUEST.SESSION.get(optionName)
466 if opt['lastState'] != newState:
467 # state in session has changed -- toggle
468 opt['state'] = not opt['state']
469 opt['lastState'] = newState
470
471 self.REQUEST.SESSION[optionName] = opt
472 return opt['state']
473
474 def isAccessible(self, docinfo):
475 """returns if access to the resource is granted"""
476 access = docinfo.get('accessType', None)
477 logging.debug("documentViewer (accessOK) access type %s"%access)
478 if access is not None and access == 'free':
479 logging.debug("documentViewer (accessOK) access is free")
480 return True
481 elif access is None or access in self.authgroups:
482 # only local access -- only logged in users
483 user = getSecurityManager().getUser()
484 logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
485 if user is not None:
486 #print "user: ", user
487 return (user.getUserName() != "Anonymous User")
488 else:
489 return False
490
491 logging.error("documentViewer (accessOK) unknown access type %s"%access)
492 return False
493
494
495 def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
496 """gibt param von dlInfo aus"""
497 if docinfo is None:
498 docinfo = {}
499
500 for x in range(cut):
501
502 path=getParentDir(path)
503
504 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
505
506 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
507
508 txt = getHttpData(infoUrl)
509 if txt is None:
510 raise IOError("Unable to get dir-info from %s"%(infoUrl))
511
512 dom = ET.fromstring(txt).getroot()
513 #dom = Parse(txt)
514 sizes=dom.find("//dir/size")
515 #sizes=dom.xpath("//dir/size")
516 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
517
518 if sizes:
519 docinfo['numPages'] = int(getTextFromNode(sizes[0]))
520 else:
521 docinfo['numPages'] = 0
522
523 # TODO: produce and keep list of image names and numbers
524
525 return docinfo
526
527 def getIndexMetaPath(self,url):
528 """gib nur den Pfad zurueck"""
529 regexp = re.compile(r".*(experimental|permanent)/(.*)")
530 regpath = regexp.match(url)
531 if (regpath==None):
532 return ""
533 logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
534 return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
535
536
537
538 def getIndexMetaUrl(self,url):
539 """returns utr of index.meta document at url"""
540
541 metaUrl = None
542 if url.startswith("http://"):
543 # real URL
544 metaUrl = url
545 else:
546 # online path
547 server=self.digilibBaseUrl+"/servlet/Texter?fn="
548 metaUrl=server+url.replace("/mpiwg/online","")
549 if not metaUrl.endswith("index.meta"):
550 metaUrl += "/index.meta"
551
552 return metaUrl
553
554 def getDomFromIndexMeta(self, url):
555 """get dom from index meta"""
556 dom = None
557 metaUrl = self.getIndexMetaUrl(url)
558
559 logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
560 txt=getHttpData(metaUrl)
561 if txt is None:
562 raise IOError("Unable to read index meta from %s"%(url))
563
564 dom = ET.fromstring(txt).getroot()
565 #dom = Parse(txt)
566 return dom
567
568 def getPresentationInfoXML(self, url):
569 """returns dom of info.xml document at url"""
570 dom = None
571 metaUrl = None
572 if url.startswith("http://"):
573 # real URL
574 metaUrl = url
575 else:
576 # online path
577 server=self.digilibBaseUrl+"/servlet/Texter?fn="
578 metaUrl=server+url.replace("/mpiwg/online","")
579
580 txt=getHttpData(metaUrl)
581 if txt is None:
582 raise IOError("Unable to read infoXMLfrom %s"%(url))
583
584 dom = ET.fromstring(txt).getroot()
585 #dom = Parse(txt)
586 return dom
587
588
589 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
590 """gets authorization info from the index.meta file at path or given by dom"""
591 logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
592
593 access = None
594
595 if docinfo is None:
596 docinfo = {}
597
598 if dom is None:
599 for x in range(cut):
600 path=getParentDir(path)
601 dom = self.getDomFromIndexMeta(path)
602
603 acctype = dom.find("//access-conditions/access/@type")
604 #acctype = dom.xpath("//access-conditions/access/@type")
605 if acctype and (len(acctype)>0):
606 access=acctype[0].value
607 if access in ['group', 'institution']:
608 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
609
610 docinfo['accessType'] = access
611 return docinfo
612
613
614 def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
615 """gets bibliographical info from the index.meta file at path or given by dom"""
616 logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
617
618 if docinfo is None:
619 docinfo = {}
620
621 if dom is None:
622 for x in range(cut):
623 path=getParentDir(path)
624 dom = self.getDomFromIndexMeta(path)
625
626 docinfo['indexMetaPath']=self.getIndexMetaPath(path);
627
628 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
629 # put in all raw bib fields as dict "bib"
630 bib = dom.find("//bib/*")
631 #bib = dom.xpath("//bib/*")
632 if bib and len(bib)>0:
633 bibinfo = {}
634 for e in bib:
635 bibinfo[e.localName] = getTextFromNode(e)
636 docinfo['bib'] = bibinfo
637
638 # extract some fields (author, title, year) according to their mapping
639 metaData=self.metadata.main.meta.bib
640 bibtype=dom.find("//bib/@type")
641 #bibtype=dom.xpath("//bib/@type")
642 if bibtype and (len(bibtype)>0):
643 bibtype=bibtype[0].value
644 else:
645 bibtype="generic"
646
647 bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
648 docinfo['bib_type'] = bibtype
649 bibmap=metaData.generateMappingForType(bibtype)
650 logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
651 logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
652 # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
653 if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
654 try:
655 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
656 except: pass
657 try:
658 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
659 except: pass
660 try:
661 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
662 except: pass
663 logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
664 try:
665 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
666 except:
667 docinfo['lang']=''
668 try:
669 docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0])
670 except:
671 docinfo['city']=''
672 try:
673 docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0])
674 except:
675 docinfo['number_of_pages']=''
676 try:
677 docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0])
678 except:
679 docinfo['series_volume']=''
680 try:
681 docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0])
682 except:
683 docinfo['number_of_volumes']=''
684 try:
685 docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0])
686 except:
687 docinfo['translator']=''
688 try:
689 docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0])
690 except:
691 docinfo['edition']=''
692 try:
693 docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0])
694 except:
695 docinfo['series_author']=''
696 try:
697 docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0])
698 except:
699 docinfo['publisher']=''
700 try:
701 docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0])
702 except:
703 docinfo['series_title']=''
704 try:
705 docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0])
706 except:
707 docinfo['isbn_issn']=''
708 return docinfo
709
710
711 def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
712 """gets name info from the index.meta file at path or given by dom"""
713 if docinfo is None:
714 docinfo = {}
715
716 if dom is None:
717 for x in range(cut):
718 path=getParentDir(path)
719 dom = self.getDomFromIndexMeta(path)
720
721 docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
722 logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
723 return docinfo
724
725 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
726 """parse texttool tag in index meta"""
727 logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
728 if docinfo is None:
729 docinfo = {}
730 if docinfo.get('lang', None) is None:
731 docinfo['lang'] = '' # default keine Sprache gesetzt
732 if dom is None:
733 dom = self.getDomFromIndexMeta(url)
734
735 archivePath = None
736 archiveName = None
737
738 archiveNames = dom.xpath("//resource/name")
739 if archiveNames and (len(archiveNames) > 0):
740 archiveName = getTextFromNode(archiveNames[0])
741 else:
742 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
743
744 archivePaths = dom.xpath("//resource/archive-path")
745 if archivePaths and (len(archivePaths) > 0):
746 archivePath = getTextFromNode(archivePaths[0])
747 # clean up archive path
748 if archivePath[0] != '/':
749 archivePath = '/' + archivePath
750 if archiveName and (not archivePath.endswith(archiveName)):
751 archivePath += "/" + archiveName
752 else:
753 # try to get archive-path from url
754 logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
755 if (not url.startswith('http')):
756 archivePath = url.replace('index.meta', '')
757
758 if archivePath is None:
759 # we balk without archive-path
760 raise IOError("Missing archive-path (for text-tool) in %s" % (url))
761
762 imageDirs = dom.xpath("//texttool/image")
763 if imageDirs and (len(imageDirs) > 0):
764 imageDir = getTextFromNode(imageDirs[0])
765
766 else:
767 # we balk with no image tag / not necessary anymore because textmode is now standard
768 #raise IOError("No text-tool info in %s"%(url))
769 imageDir = ""
770 #xquery="//pb"
771 docinfo['imagePath'] = "" # keine Bilder
772 docinfo['imageURL'] = ""
773
774 if imageDir and archivePath:
775 #print "image: ", imageDir, " archivepath: ", archivePath
776 imageDir = os.path.join(archivePath, imageDir)
777 imageDir = imageDir.replace("/mpiwg/online", '')
778 docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
779 docinfo['imagePath'] = imageDir
780
781 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
782
783 viewerUrls = dom.xpath("//texttool/digiliburlprefix")
784 if viewerUrls and (len(viewerUrls) > 0):
785 viewerUrl = getTextFromNode(viewerUrls[0])
786 docinfo['viewerURL'] = viewerUrl
787
788 # old style text URL
789 textUrls = dom.xpath("//texttool/text")
790 if textUrls and (len(textUrls) > 0):
791 textUrl = getTextFromNode(textUrls[0])
792 if urlparse.urlparse(textUrl)[0] == "": #keine url
793 textUrl = os.path.join(archivePath, textUrl)
794 # fix URLs starting with /mpiwg/online
795 if textUrl.startswith("/mpiwg/online"):
796 textUrl = textUrl.replace("/mpiwg/online", '', 1)
797
798 docinfo['textURL'] = textUrl
799
800 # new style text-url-path
801 textUrls = dom.xpath("//texttool/text-url-path")
802 if textUrls and (len(textUrls) > 0):
803 textUrl = getTextFromNode(textUrls[0])
804 docinfo['textURLPath'] = textUrl
805 textUrlkurz = string.split(textUrl, ".")[0]
806 docinfo['textURLPathkurz'] = textUrlkurz
807 #if not docinfo['imagePath']:
808 # text-only, no page images
809 #docinfo = self.getNumTextPages(docinfo)
810
811
812 presentationUrls = dom.xpath("//texttool/presentation")
813 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
814 docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
815
816
817 if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
818 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
819 # durch den relativen Pfad auf die presentation infos
820 presentationPath = getTextFromNode(presentationUrls[0])
821 if url.endswith("index.meta"):
822 presentationUrl = url.replace('index.meta', presentationPath)
823 else:
824 presentationUrl = url + "/" + presentationPath
825
826 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
827
828 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
829
830 return docinfo
831
832
833 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
834 """gets the bibliographical information from the preseantion entry in texttools
835 """
836 dom=self.getPresentationInfoXML(url)
837 try:
838 docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
839 except:
840 pass
841 try:
842 docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
843 except:
844 pass
845 try:
846 docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
847 except:
848 pass
849 return docinfo
850
851 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
852 """path ist the path to the images it assumes that the index.meta file is one level higher."""
853 logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
854 if docinfo is None:
855 docinfo = {}
856 path=path.replace("/mpiwg/online","")
857 docinfo['imagePath'] = path
858 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
859
860 pathorig=path
861 for x in range(cut):
862 path=getParentDir(path)
863 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
864 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
865 docinfo['imageURL'] = imageUrl
866
867 #path ist the path to the images it assumes that the index.meta file is one level higher.
868 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
869 docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
870 return docinfo
871
872
873 def getDocinfo(self, mode, url):
874 """returns docinfo depending on mode"""
875 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
876 # look for cached docinfo in session
877 if self.REQUEST.SESSION.has_key('docinfo'):
878 docinfo = self.REQUEST.SESSION['docinfo']
879 # check if its still current
880 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
881 logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
882 return docinfo
883 # new docinfo
884 docinfo = {'mode': mode, 'url': url}
885 if mode=="texttool": #index.meta with texttool information
886 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
887 elif mode=="imagepath":
888 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
889 elif mode=="filepath":
890 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
891 else:
892 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
893 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
894
895 # FIXME: fake texturlpath
896 if not docinfo.has_key('textURLPath'):
897 docinfo['textURLPath'] = None
898
899 logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
900 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
901 self.REQUEST.SESSION['docinfo'] = docinfo
902 return docinfo
903
904 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
905 """returns pageinfo with the given parameters"""
906 pageinfo = {}
907 current = getInt(current)
908
909 pageinfo['current'] = current
910 rows = int(rows or self.thumbrows)
911 pageinfo['rows'] = rows
912 cols = int(cols or self.thumbcols)
913 pageinfo['cols'] = cols
914 grpsize = cols * rows
915 pageinfo['groupsize'] = grpsize
916 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
917 # int(current / grpsize) * grpsize +1))
918 pageinfo['start'] = start
919 pageinfo['end'] = start + grpsize
920 if (docinfo is not None) and ('numPages' in docinfo):
921 np = int(docinfo['numPages'])
922 pageinfo['end'] = min(pageinfo['end'], np)
923 pageinfo['numgroups'] = int(np / grpsize)
924 if np % grpsize > 0:
925 pageinfo['numgroups'] += 1
926 pageinfo['viewMode'] = viewMode
927 pageinfo['tocMode'] = tocMode
928 pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
929 #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
930 pageinfo['query'] = self.REQUEST.get('query','')
931 pageinfo['queryType'] = self.REQUEST.get('queryType','')
932 pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
933 pageinfo['textPN'] = self.REQUEST.get('textPN','1')
934 pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
935 pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
936 pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
937 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
938 toc = int (pageinfo['tocPN'])
939 pageinfo['textPages'] =int (toc)
940
941 if 'tocSize_%s'%tocMode in docinfo:
942 tocSize = int(docinfo['tocSize_%s'%tocMode])
943 tocPageSize = int(pageinfo['tocPageSize'])
944 # cached toc
945 if tocSize%tocPageSize>0:
946 tocPages=tocSize/tocPageSize+1
947 else:
948 tocPages=tocSize/tocPageSize
949 pageinfo['tocPN'] = min (tocPages,toc)
950 pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
951 pageinfo['sn'] =self.REQUEST.get('sn','')
952 return pageinfo
953
954 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
955 """init document viewer"""
956 self.title=title
957 self.digilibBaseUrl = digilibBaseUrl
958 self.thumbrows = thumbrows
959 self.thumbcols = thumbcols
960 self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
961 if RESPONSE is not None:
962 RESPONSE.redirect('manage_main')
963
964 def manage_AddDocumentViewerForm(self):
965 """add the viewer form"""
966 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
967 return pt()
968
969 def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
970 """add the viewer"""
971 newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
972 self._setObject(id,newObj)
973
974 if RESPONSE is not None:
975 RESPONSE.redirect('manage_main')
976
977 ## DocumentViewerTemplate class
978 class DocumentViewerTemplate(ZopePageTemplate):
979 """Template for document viewer"""
980 meta_type="DocumentViewer Template"
981
982
983 def manage_addDocumentViewerTemplateForm(self):
984 """Form for adding"""
985 pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
986 return pt()
987
988 def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
989 REQUEST=None, submit=None):
990 "Add a Page Template with optional file content."
991
992 self._setObject(id, DocumentViewerTemplate(id))
993 ob = getattr(self, id)
994 txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
995 logging.info("txt %s:"%txt)
996 ob.pt_edit(txt,"text/html")
997 if title:
998 ob.pt_setTitle(title)
999 try:
1000 u = self.DestinationURL()
1001 except AttributeError:
1002 u = REQUEST['URL1']
1003
1004 u = "%s/%s" % (u, urllib.quote(id))
1005 REQUEST.RESPONSE.redirect(u+'/manage_main')
1006 return ''
1007
1008
1009