Mercurial > hg > documentViewer
comparison documentViewer_old.py @ 453:beb7ccb92564 elementtree
first version using elementtree instead of 4suite xml
author | casties |
---|---|
date | Thu, 14 Jul 2011 19:43:56 +0200 |
parents | |
children | 0a53fea83df7 |
comparison
equal
deleted
inserted
replaced
408:4e84c53e49b3 | 453:beb7ccb92564 |
---|---|
1 | |
2 from OFS.Folder import Folder | |
3 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate | |
4 from Products.PageTemplates.PageTemplateFile import PageTemplateFile | |
5 from AccessControl import ClassSecurityInfo | |
6 from AccessControl import getSecurityManager | |
7 from Globals import package_home | |
8 from Products.zogiLib.zogiLib import browserCheck | |
9 | |
10 from Ft.Xml import EMPTY_NAMESPACE, Parse | |
11 import Ft.Xml.Domlette | |
12 | |
13 import xml.etree.ElementTree as ET | |
14 | |
15 import os.path | |
16 import sys | |
17 import urllib | |
18 import urllib2 | |
19 import logging | |
20 import math | |
21 import urlparse | |
22 import cStringIO | |
23 import re | |
24 import string | |
25 | |
26 def logger(txt,method,txt2): | |
27 """logging""" | |
28 logging.info(txt+ txt2) | |
29 | |
30 | |
31 def getInt(number, default=0): | |
32 """returns always an int (0 in case of problems)""" | |
33 try: | |
34 return int(number) | |
35 except: | |
36 return int(default) | |
37 | |
38 def getTextFromNode(node): | |
39 """get the cdata content of a node""" | |
40 if node is None: | |
41 return "" | |
42 # ET: | |
43 text = node.text or "" | |
44 for e in node: | |
45 text += gettext(e) | |
46 if e.tail: | |
47 text += e.tail | |
48 | |
49 # 4Suite: | |
50 #nodelist=node.childNodes | |
51 #text = "" | |
52 #for n in nodelist: | |
53 # if n.nodeType == node.TEXT_NODE: | |
54 # text = text + n.data | |
55 | |
56 return text | |
57 | |
58 def serializeNode(node, encoding="utf-8"): | |
59 """returns a string containing node as XML""" | |
60 s = ET.tostring(node) | |
61 | |
62 # 4Suite: | |
63 # stream = cStringIO.StringIO() | |
64 # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) | |
65 # s = stream.getvalue() | |
66 # stream.close() | |
67 return s | |
68 | |
69 def browserCheck(self): | |
70 """check the browsers request to find out the browser type""" | |
71 bt = {} | |
72 ua = self.REQUEST.get_header("HTTP_USER_AGENT") | |
73 bt['ua'] = ua | |
74 bt['isIE'] = False | |
75 bt['isN4'] = False | |
76 bt['versFirefox']="" | |
77 bt['versIE']="" | |
78 bt['versSafariChrome']="" | |
79 bt['versOpera']="" | |
80 | |
81 if string.find(ua, 'MSIE') > -1: | |
82 bt['isIE'] = True | |
83 else: | |
84 bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1) | |
85 # Safari oder Chrome identification | |
86 try: | |
87 nav = ua[string.find(ua, '('):] | |
88 nav1=ua[string.find(ua,')'):] | |
89 nav2=nav1[string.find(nav1,'('):] | |
90 nav3=nav2[string.find(nav2,')'):] | |
91 ie = string.split(nav, "; ")[1] | |
92 ie1 =string.split(nav1, " ")[2] | |
93 ie2 =string.split(nav3, " ")[1] | |
94 ie3 =string.split(nav3, " ")[2] | |
95 if string.find(ie3, "Safari") >-1: | |
96 bt['versSafariChrome']=string.split(ie2, "/")[1] | |
97 except: pass | |
98 # IE identification | |
99 try: | |
100 nav = ua[string.find(ua, '('):] | |
101 ie = string.split(nav, "; ")[1] | |
102 if string.find(ie, "MSIE") > -1: | |
103 bt['versIE'] = string.split(ie, " ")[1] | |
104 except:pass | |
105 # Firefox identification | |
106 try: | |
107 nav = ua[string.find(ua, '('):] | |
108 nav1=ua[string.find(ua,')'):] | |
109 if string.find(ie1, "Firefox") >-1: | |
110 nav5= string.split(ie1, "/")[1] | |
111 logging.debug("FIREFOX: %s"%(nav5)) | |
112 bt['versFirefox']=nav5[0:3] | |
113 except:pass | |
114 #Opera identification | |
115 try: | |
116 if string.find(ua,"Opera") >-1: | |
117 nav = ua[string.find(ua, '('):] | |
118 nav1=nav[string.find(nav,')'):] | |
119 bt['versOpera']=string.split(nav1,"/")[2] | |
120 except:pass | |
121 | |
122 bt['isMac'] = string.find(ua, 'Macintosh') > -1 | |
123 bt['isWin'] = string.find(ua, 'Windows') > -1 | |
124 bt['isIEWin'] = bt['isIE'] and bt['isWin'] | |
125 bt['isIEMac'] = bt['isIE'] and bt['isMac'] | |
126 bt['staticHTML'] = False | |
127 | |
128 return bt | |
129 | |
130 | |
131 def getParentDir(path): | |
132 """returns pathname shortened by one""" | |
133 return '/'.join(path.split('/')[0:-1]) | |
134 | |
135 | |
136 def getHttpData(url, data=None, num_tries=3, timeout=10): | |
137 """returns result from url+data HTTP request""" | |
138 # we do GET (by appending data to url) | |
139 if isinstance(data, str) or isinstance(data, unicode): | |
140 # if data is string then append | |
141 url = "%s?%s"%(url,data) | |
142 elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple): | |
143 # urlencode | |
144 url = "%s?%s"%(url,urllib.urlencode(data)) | |
145 | |
146 response = None | |
147 errmsg = None | |
148 for cnt in range(num_tries): | |
149 try: | |
150 logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) | |
151 if sys.version_info < (2, 6): | |
152 # set timeout on socket -- ugly :-( | |
153 import socket | |
154 socket.setdefaulttimeout(float(timeout)) | |
155 response = urllib2.urlopen(url) | |
156 else: | |
157 response = urllib2.urlopen(url,timeout=float(timeout)) | |
158 # check result? | |
159 break | |
160 except urllib2.HTTPError, e: | |
161 logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) | |
162 errmsg = str(e) | |
163 # stop trying | |
164 break | |
165 except urllib2.URLError, e: | |
166 logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) | |
167 errmsg = str(e) | |
168 # stop trying | |
169 #break | |
170 | |
171 if response is not None: | |
172 data = response.read() | |
173 response.close() | |
174 return data | |
175 | |
176 raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) | |
177 #return None | |
178 | |
179 ## | |
180 ## documentViewer class | |
181 ## | |
182 class documentViewer(Folder): | |
183 """document viewer""" | |
184 meta_type="Document viewer" | |
185 | |
186 security=ClassSecurityInfo() | |
187 manage_options=Folder.manage_options+( | |
188 {'label':'main config','action':'changeDocumentViewerForm'}, | |
189 ) | |
190 | |
191 # templates and forms | |
192 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) | |
193 toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) | |
194 toc_text = PageTemplateFile('zpt/toc_text', globals()) | |
195 toc_figures = PageTemplateFile('zpt/toc_figures', globals()) | |
196 page_main_images = PageTemplateFile('zpt/page_main_images', globals()) | |
197 page_main_double = PageTemplateFile('zpt/page_main_double', globals()) | |
198 page_main_text = PageTemplateFile('zpt/page_main_text', globals()) | |
199 page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) | |
200 page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals()) | |
201 page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) | |
202 page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals()) | |
203 head_main = PageTemplateFile('zpt/head_main', globals()) | |
204 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) | |
205 info_xml = PageTemplateFile('zpt/info_xml', globals()) | |
206 | |
207 | |
208 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) | |
209 security.declareProtected('View management screens','changeDocumentViewerForm') | |
210 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) | |
211 | |
212 | |
213 def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"): | |
214 """init document viewer""" | |
215 self.id=id | |
216 self.title=title | |
217 self.thumbcols = thumbcols | |
218 self.thumbrows = thumbrows | |
219 # authgroups is list of authorized groups (delimited by ,) | |
220 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] | |
221 # create template folder so we can always use template.something | |
222 | |
223 templateFolder = Folder('template') | |
224 #self['template'] = templateFolder # Zope-2.12 style | |
225 self._setObject('template',templateFolder) # old style | |
226 try: | |
227 import MpdlXmlTextServer | |
228 textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName) | |
229 #templateFolder['fulltextclient'] = xmlRpcClient | |
230 templateFolder._setObject('fulltextclient',textServer) | |
231 except Exception, e: | |
232 logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e)) | |
233 try: | |
234 from Products.zogiLib.zogiLib import zogiLib | |
235 zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") | |
236 #templateFolder['zogilib'] = zogilib | |
237 templateFolder._setObject('zogilib',zogilib) | |
238 except Exception, e: | |
239 logging.error("Unable to create zogiLib for zogilib: "+str(e)) | |
240 | |
241 | |
242 # proxy text server methods to fulltextclient | |
243 def getTextPage(self, **args): | |
244 """get page""" | |
245 return self.template.fulltextclient.getTextPage(**args) | |
246 | |
247 def getOrigPages(self, **args): | |
248 """get page""" | |
249 return self.template.fulltextclient.getOrigPages(**args) | |
250 | |
251 def getOrigPagesNorm(self, **args): | |
252 """get page""" | |
253 return self.template.fulltextclient.getOrigPagesNorm(**args) | |
254 | |
255 def getQuery(self, **args): | |
256 """get query in search""" | |
257 return self.template.fulltextclient.getQuery(**args) | |
258 | |
259 def getSearch(self, **args): | |
260 """get search""" | |
261 return self.template.fulltextclient.getSearch(**args) | |
262 | |
263 def getGisPlaces(self, **args): | |
264 """get gis places""" | |
265 return self.template.fulltextclient.getGisPlaces(**args) | |
266 | |
267 def getAllGisPlaces(self, **args): | |
268 """get all gis places """ | |
269 return self.template.fulltextclient.getAllGisPlaces(**args) | |
270 | |
271 def getTranslate(self, **args): | |
272 """get translate""" | |
273 return self.template.fulltextclient.getTranslate(**args) | |
274 | |
275 def getLemma(self, **args): | |
276 """get lemma""" | |
277 return self.template.fulltextclient.getLemma(**args) | |
278 | |
279 def getLemmaQuery(self, **args): | |
280 """get query""" | |
281 return self.template.fulltextclient.getLemmaQuery(**args) | |
282 | |
283 def getLex(self, **args): | |
284 """get lex""" | |
285 return self.template.fulltextclient.getLex(**args) | |
286 | |
287 def getToc(self, **args): | |
288 """get toc""" | |
289 return self.template.fulltextclient.getToc(**args) | |
290 | |
291 def getTocPage(self, **args): | |
292 """get tocpage""" | |
293 return self.template.fulltextclient.getTocPage(**args) | |
294 | |
295 | |
296 security.declareProtected('View','thumbs_rss') | |
297 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): | |
298 ''' | |
299 view it | |
300 @param mode: defines how to access the document behind url | |
301 @param url: url which contains display information | |
302 @param viewMode: if images display images, if text display text, default is images (text,images or auto) | |
303 | |
304 ''' | |
305 logging.debug("HHHHHHHHHHHHHH:load the rss") | |
306 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) | |
307 | |
308 if not hasattr(self, 'template'): | |
309 # create template folder if it doesn't exist | |
310 self.manage_addFolder('template') | |
311 | |
312 if not self.digilibBaseUrl: | |
313 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" | |
314 | |
315 docinfo = self.getDocinfo(mode=mode,url=url) | |
316 #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) | |
317 pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo) | |
318 ''' ZDES ''' | |
319 pt = getattr(self.template, 'thumbs_main_rss') | |
320 | |
321 if viewMode=="auto": # automodus gewaehlt | |
322 if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert | |
323 viewMode="text" | |
324 else: | |
325 viewMode="images" | |
326 | |
327 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) | |
328 | |
329 security.declareProtected('View','index_html') | |
330 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): | |
331 ''' | |
332 view it | |
333 @param mode: defines how to access the document behind url | |
334 @param url: url which contains display information | |
335 @param viewMode: if images display images, if text display text, default is auto (text,images or auto) | |
336 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) | |
337 @param characterNormalization type of text display (reg, norm, none) | |
338 @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) | |
339 ''' | |
340 | |
341 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) | |
342 | |
343 if not hasattr(self, 'template'): | |
344 # this won't work | |
345 logging.error("template folder missing!") | |
346 return "ERROR: template folder missing!" | |
347 | |
348 if not getattr(self, 'digilibBaseUrl', None): | |
349 self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" | |
350 | |
351 docinfo = self.getDocinfo(mode=mode,url=url) | |
352 | |
353 if tocMode != "thumbs": | |
354 # get table of contents | |
355 docinfo = self.getToc(mode=tocMode, docinfo=docinfo) | |
356 | |
357 if viewMode=="auto": # automodus gewaehlt | |
358 if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert | |
359 viewMode="text_dict" | |
360 else: | |
361 viewMode="images" | |
362 | |
363 pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) | |
364 | |
365 if (docinfo.get('textURLPath',None)): | |
366 page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo) | |
367 pageinfo['textPage'] = page | |
368 tt = getattr(self, 'template') | |
369 pt = getattr(tt, 'viewer_main') | |
370 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) | |
371 | |
372 def generateMarks(self,mk): | |
373 ret="" | |
374 if mk is None: | |
375 return "" | |
376 if not isinstance(mk, list): | |
377 mk=[mk] | |
378 for m in mk: | |
379 ret+="mk=%s"%m | |
380 return ret | |
381 | |
382 | |
383 def getBrowser(self): | |
384 """getBrowser the version of browser """ | |
385 bt = browserCheck(self) | |
386 logging.debug("BROWSER VERSION: %s"%(bt)) | |
387 return bt | |
388 | |
389 def findDigilibUrl(self): | |
390 """try to get the digilib URL from zogilib""" | |
391 url = self.template.zogilib.getDLBaseUrl() | |
392 return url | |
393 | |
394 def getDocumentViewerURL(self): | |
395 """returns the URL of this instance""" | |
396 return self.absolute_url() | |
397 | |
398 def getStyle(self, idx, selected, style=""): | |
399 """returns a string with the given style and append 'sel' if path == selected.""" | |
400 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) | |
401 if idx == selected: | |
402 return style + 'sel' | |
403 else: | |
404 return style | |
405 | |
406 def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'): | |
407 """returns URL to documentviewer with parameter param set to val or from dict params""" | |
408 # copy existing request params | |
409 urlParams=self.REQUEST.form.copy() | |
410 # change single param | |
411 if param is not None: | |
412 if val is None: | |
413 if urlParams.has_key(param): | |
414 del urlParams[param] | |
415 else: | |
416 urlParams[param] = str(val) | |
417 | |
418 # change more params | |
419 if params is not None: | |
420 for k in params.keys(): | |
421 v = params[k] | |
422 if v is None: | |
423 # val=None removes param | |
424 if urlParams.has_key(k): | |
425 del urlParams[k] | |
426 | |
427 else: | |
428 urlParams[k] = v | |
429 | |
430 # FIXME: does this belong here? | |
431 if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath | |
432 urlParams["mode"] = "imagepath" | |
433 urlParams["url"] = getParentDir(urlParams["url"]) | |
434 | |
435 # quote values and assemble into query string (not escaping '/') | |
436 ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()]) | |
437 #ps = urllib.urlencode(urlParams) | |
438 if baseUrl is None: | |
439 baseUrl = self.REQUEST['URL1'] | |
440 | |
441 url = "%s?%s"%(baseUrl, ps) | |
442 return url | |
443 | |
444 | |
445 def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None): | |
446 """link to documentviewer with parameter param set to val""" | |
447 return self.getLink(param, val, params, baseUrl, '&') | |
448 | |
449 def getInfo_xml(self,url,mode): | |
450 """returns info about the document as XML""" | |
451 | |
452 if not self.digilibBaseUrl: | |
453 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" | |
454 | |
455 docinfo = self.getDocinfo(mode=mode,url=url) | |
456 pt = getattr(self.template, 'info_xml') | |
457 return pt(docinfo=docinfo) | |
458 | |
459 def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True): | |
460 """returns new option state""" | |
461 if not self.REQUEST.SESSION.has_key(optionName): | |
462 # not in session -- initial | |
463 opt = {'lastState': newState, 'state': initialState} | |
464 else: | |
465 opt = self.REQUEST.SESSION.get(optionName) | |
466 if opt['lastState'] != newState: | |
467 # state in session has changed -- toggle | |
468 opt['state'] = not opt['state'] | |
469 opt['lastState'] = newState | |
470 | |
471 self.REQUEST.SESSION[optionName] = opt | |
472 return opt['state'] | |
473 | |
474 def isAccessible(self, docinfo): | |
475 """returns if access to the resource is granted""" | |
476 access = docinfo.get('accessType', None) | |
477 logging.debug("documentViewer (accessOK) access type %s"%access) | |
478 if access is not None and access == 'free': | |
479 logging.debug("documentViewer (accessOK) access is free") | |
480 return True | |
481 elif access is None or access in self.authgroups: | |
482 # only local access -- only logged in users | |
483 user = getSecurityManager().getUser() | |
484 logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr())) | |
485 if user is not None: | |
486 #print "user: ", user | |
487 return (user.getUserName() != "Anonymous User") | |
488 else: | |
489 return False | |
490 | |
491 logging.error("documentViewer (accessOK) unknown access type %s"%access) | |
492 return False | |
493 | |
494 | |
495 def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): | |
496 """gibt param von dlInfo aus""" | |
497 if docinfo is None: | |
498 docinfo = {} | |
499 | |
500 for x in range(cut): | |
501 | |
502 path=getParentDir(path) | |
503 | |
504 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path | |
505 | |
506 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) | |
507 | |
508 txt = getHttpData(infoUrl) | |
509 if txt is None: | |
510 raise IOError("Unable to get dir-info from %s"%(infoUrl)) | |
511 | |
512 dom = ET.fromstring(txt).getroot() | |
513 #dom = Parse(txt) | |
514 sizes=dom.find("//dir/size") | |
515 #sizes=dom.xpath("//dir/size") | |
516 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes) | |
517 | |
518 if sizes: | |
519 docinfo['numPages'] = int(getTextFromNode(sizes[0])) | |
520 else: | |
521 docinfo['numPages'] = 0 | |
522 | |
523 # TODO: produce and keep list of image names and numbers | |
524 | |
525 return docinfo | |
526 | |
527 def getIndexMetaPath(self,url): | |
528 """gib nur den Pfad zurueck""" | |
529 regexp = re.compile(r".*(experimental|permanent)/(.*)") | |
530 regpath = regexp.match(url) | |
531 if (regpath==None): | |
532 return "" | |
533 logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2)) | |
534 return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2)) | |
535 | |
536 | |
537 | |
538 def getIndexMetaUrl(self,url): | |
539 """returns utr of index.meta document at url""" | |
540 | |
541 metaUrl = None | |
542 if url.startswith("http://"): | |
543 # real URL | |
544 metaUrl = url | |
545 else: | |
546 # online path | |
547 server=self.digilibBaseUrl+"/servlet/Texter?fn=" | |
548 metaUrl=server+url.replace("/mpiwg/online","") | |
549 if not metaUrl.endswith("index.meta"): | |
550 metaUrl += "/index.meta" | |
551 | |
552 return metaUrl | |
553 | |
554 def getDomFromIndexMeta(self, url): | |
555 """get dom from index meta""" | |
556 dom = None | |
557 metaUrl = self.getIndexMetaUrl(url) | |
558 | |
559 logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl) | |
560 txt=getHttpData(metaUrl) | |
561 if txt is None: | |
562 raise IOError("Unable to read index meta from %s"%(url)) | |
563 | |
564 dom = ET.fromstring(txt).getroot() | |
565 #dom = Parse(txt) | |
566 return dom | |
567 | |
568 def getPresentationInfoXML(self, url): | |
569 """returns dom of info.xml document at url""" | |
570 dom = None | |
571 metaUrl = None | |
572 if url.startswith("http://"): | |
573 # real URL | |
574 metaUrl = url | |
575 else: | |
576 # online path | |
577 server=self.digilibBaseUrl+"/servlet/Texter?fn=" | |
578 metaUrl=server+url.replace("/mpiwg/online","") | |
579 | |
580 txt=getHttpData(metaUrl) | |
581 if txt is None: | |
582 raise IOError("Unable to read infoXMLfrom %s"%(url)) | |
583 | |
584 dom = ET.fromstring(txt).getroot() | |
585 #dom = Parse(txt) | |
586 return dom | |
587 | |
588 | |
589 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): | |
590 """gets authorization info from the index.meta file at path or given by dom""" | |
591 logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) | |
592 | |
593 access = None | |
594 | |
595 if docinfo is None: | |
596 docinfo = {} | |
597 | |
598 if dom is None: | |
599 for x in range(cut): | |
600 path=getParentDir(path) | |
601 dom = self.getDomFromIndexMeta(path) | |
602 | |
603 acctype = dom.find("//access-conditions/access/@type") | |
604 #acctype = dom.xpath("//access-conditions/access/@type") | |
605 if acctype and (len(acctype)>0): | |
606 access=acctype[0].value | |
607 if access in ['group', 'institution']: | |
608 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() | |
609 | |
610 docinfo['accessType'] = access | |
611 return docinfo | |
612 | |
613 | |
614 def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): | |
615 """gets bibliographical info from the index.meta file at path or given by dom""" | |
616 logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) | |
617 | |
618 if docinfo is None: | |
619 docinfo = {} | |
620 | |
621 if dom is None: | |
622 for x in range(cut): | |
623 path=getParentDir(path) | |
624 dom = self.getDomFromIndexMeta(path) | |
625 | |
626 docinfo['indexMetaPath']=self.getIndexMetaPath(path); | |
627 | |
628 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) | |
629 # put in all raw bib fields as dict "bib" | |
630 bib = dom.find("//bib/*") | |
631 #bib = dom.xpath("//bib/*") | |
632 if bib and len(bib)>0: | |
633 bibinfo = {} | |
634 for e in bib: | |
635 bibinfo[e.localName] = getTextFromNode(e) | |
636 docinfo['bib'] = bibinfo | |
637 | |
638 # extract some fields (author, title, year) according to their mapping | |
639 metaData=self.metadata.main.meta.bib | |
640 bibtype=dom.find("//bib/@type") | |
641 #bibtype=dom.xpath("//bib/@type") | |
642 if bibtype and (len(bibtype)>0): | |
643 bibtype=bibtype[0].value | |
644 else: | |
645 bibtype="generic" | |
646 | |
647 bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) | |
648 docinfo['bib_type'] = bibtype | |
649 bibmap=metaData.generateMappingForType(bibtype) | |
650 logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap)) | |
651 logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype)) | |
652 # if there is no mapping bibmap is empty (mapping sometimes has empty fields) | |
653 if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: | |
654 try: | |
655 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) | |
656 except: pass | |
657 try: | |
658 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) | |
659 except: pass | |
660 try: | |
661 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) | |
662 except: pass | |
663 logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) | |
664 try: | |
665 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) | |
666 except: | |
667 docinfo['lang']='' | |
668 try: | |
669 docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0]) | |
670 except: | |
671 docinfo['city']='' | |
672 try: | |
673 docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0]) | |
674 except: | |
675 docinfo['number_of_pages']='' | |
676 try: | |
677 docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0]) | |
678 except: | |
679 docinfo['series_volume']='' | |
680 try: | |
681 docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0]) | |
682 except: | |
683 docinfo['number_of_volumes']='' | |
684 try: | |
685 docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0]) | |
686 except: | |
687 docinfo['translator']='' | |
688 try: | |
689 docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0]) | |
690 except: | |
691 docinfo['edition']='' | |
692 try: | |
693 docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0]) | |
694 except: | |
695 docinfo['series_author']='' | |
696 try: | |
697 docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0]) | |
698 except: | |
699 docinfo['publisher']='' | |
700 try: | |
701 docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0]) | |
702 except: | |
703 docinfo['series_title']='' | |
704 try: | |
705 docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0]) | |
706 except: | |
707 docinfo['isbn_issn']='' | |
708 return docinfo | |
709 | |
710 | |
711 def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): | |
712 """gets name info from the index.meta file at path or given by dom""" | |
713 if docinfo is None: | |
714 docinfo = {} | |
715 | |
716 if dom is None: | |
717 for x in range(cut): | |
718 path=getParentDir(path) | |
719 dom = self.getDomFromIndexMeta(path) | |
720 | |
721 docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0]) | |
722 logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) | |
723 return docinfo | |
724 | |
725 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): | |
726 """parse texttool tag in index meta""" | |
727 logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) | |
728 if docinfo is None: | |
729 docinfo = {} | |
730 if docinfo.get('lang', None) is None: | |
731 docinfo['lang'] = '' # default keine Sprache gesetzt | |
732 if dom is None: | |
733 dom = self.getDomFromIndexMeta(url) | |
734 | |
735 archivePath = None | |
736 archiveName = None | |
737 | |
738 archiveNames = dom.xpath("//resource/name") | |
739 if archiveNames and (len(archiveNames) > 0): | |
740 archiveName = getTextFromNode(archiveNames[0]) | |
741 else: | |
742 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) | |
743 | |
744 archivePaths = dom.xpath("//resource/archive-path") | |
745 if archivePaths and (len(archivePaths) > 0): | |
746 archivePath = getTextFromNode(archivePaths[0]) | |
747 # clean up archive path | |
748 if archivePath[0] != '/': | |
749 archivePath = '/' + archivePath | |
750 if archiveName and (not archivePath.endswith(archiveName)): | |
751 archivePath += "/" + archiveName | |
752 else: | |
753 # try to get archive-path from url | |
754 logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) | |
755 if (not url.startswith('http')): | |
756 archivePath = url.replace('index.meta', '') | |
757 | |
758 if archivePath is None: | |
759 # we balk without archive-path | |
760 raise IOError("Missing archive-path (for text-tool) in %s" % (url)) | |
761 | |
762 imageDirs = dom.xpath("//texttool/image") | |
763 if imageDirs and (len(imageDirs) > 0): | |
764 imageDir = getTextFromNode(imageDirs[0]) | |
765 | |
766 else: | |
767 # we balk with no image tag / not necessary anymore because textmode is now standard | |
768 #raise IOError("No text-tool info in %s"%(url)) | |
769 imageDir = "" | |
770 #xquery="//pb" | |
771 docinfo['imagePath'] = "" # keine Bilder | |
772 docinfo['imageURL'] = "" | |
773 | |
774 if imageDir and archivePath: | |
775 #print "image: ", imageDir, " archivepath: ", archivePath | |
776 imageDir = os.path.join(archivePath, imageDir) | |
777 imageDir = imageDir.replace("/mpiwg/online", '') | |
778 docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) | |
779 docinfo['imagePath'] = imageDir | |
780 | |
781 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir | |
782 | |
783 viewerUrls = dom.xpath("//texttool/digiliburlprefix") | |
784 if viewerUrls and (len(viewerUrls) > 0): | |
785 viewerUrl = getTextFromNode(viewerUrls[0]) | |
786 docinfo['viewerURL'] = viewerUrl | |
787 | |
788 # old style text URL | |
789 textUrls = dom.xpath("//texttool/text") | |
790 if textUrls and (len(textUrls) > 0): | |
791 textUrl = getTextFromNode(textUrls[0]) | |
792 if urlparse.urlparse(textUrl)[0] == "": #keine url | |
793 textUrl = os.path.join(archivePath, textUrl) | |
794 # fix URLs starting with /mpiwg/online | |
795 if textUrl.startswith("/mpiwg/online"): | |
796 textUrl = textUrl.replace("/mpiwg/online", '', 1) | |
797 | |
798 docinfo['textURL'] = textUrl | |
799 | |
800 # new style text-url-path | |
801 textUrls = dom.xpath("//texttool/text-url-path") | |
802 if textUrls and (len(textUrls) > 0): | |
803 textUrl = getTextFromNode(textUrls[0]) | |
804 docinfo['textURLPath'] = textUrl | |
805 textUrlkurz = string.split(textUrl, ".")[0] | |
806 docinfo['textURLPathkurz'] = textUrlkurz | |
807 #if not docinfo['imagePath']: | |
808 # text-only, no page images | |
809 #docinfo = self.getNumTextPages(docinfo) | |
810 | |
811 | |
812 presentationUrls = dom.xpath("//texttool/presentation") | |
813 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag | |
814 docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) | |
815 | |
816 | |
817 if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen | |
818 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten | |
819 # durch den relativen Pfad auf die presentation infos | |
820 presentationPath = getTextFromNode(presentationUrls[0]) | |
821 if url.endswith("index.meta"): | |
822 presentationUrl = url.replace('index.meta', presentationPath) | |
823 else: | |
824 presentationUrl = url + "/" + presentationPath | |
825 | |
826 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) | |
827 | |
828 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info | |
829 | |
830 return docinfo | |
831 | |
832 | |
833 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): | |
834 """gets the bibliographical information from the preseantion entry in texttools | |
835 """ | |
836 dom=self.getPresentationInfoXML(url) | |
837 try: | |
838 docinfo['author']=getTextFromNode(dom.xpath("//author")[0]) | |
839 except: | |
840 pass | |
841 try: | |
842 docinfo['title']=getTextFromNode(dom.xpath("//title")[0]) | |
843 except: | |
844 pass | |
845 try: | |
846 docinfo['year']=getTextFromNode(dom.xpath("//date")[0]) | |
847 except: | |
848 pass | |
849 return docinfo | |
850 | |
851 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): | |
852 """path ist the path to the images it assumes that the index.meta file is one level higher.""" | |
853 logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) | |
854 if docinfo is None: | |
855 docinfo = {} | |
856 path=path.replace("/mpiwg/online","") | |
857 docinfo['imagePath'] = path | |
858 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) | |
859 | |
860 pathorig=path | |
861 for x in range(cut): | |
862 path=getParentDir(path) | |
863 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) | |
864 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path | |
865 docinfo['imageURL'] = imageUrl | |
866 | |
867 #path ist the path to the images it assumes that the index.meta file is one level higher. | |
868 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) | |
869 docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) | |
870 return docinfo | |
871 | |
872 | |
873 def getDocinfo(self, mode, url): | |
874 """returns docinfo depending on mode""" | |
875 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) | |
876 # look for cached docinfo in session | |
877 if self.REQUEST.SESSION.has_key('docinfo'): | |
878 docinfo = self.REQUEST.SESSION['docinfo'] | |
879 # check if its still current | |
880 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: | |
881 logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo) | |
882 return docinfo | |
883 # new docinfo | |
884 docinfo = {'mode': mode, 'url': url} | |
885 if mode=="texttool": #index.meta with texttool information | |
886 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) | |
887 elif mode=="imagepath": | |
888 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) | |
889 elif mode=="filepath": | |
890 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) | |
891 else: | |
892 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) | |
893 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) | |
894 | |
895 # FIXME: fake texturlpath | |
896 if not docinfo.has_key('textURLPath'): | |
897 docinfo['textURLPath'] = None | |
898 | |
899 logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) | |
900 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%) | |
901 self.REQUEST.SESSION['docinfo'] = docinfo | |
902 return docinfo | |
903 | |
904 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): | |
905 """returns pageinfo with the given parameters""" | |
906 pageinfo = {} | |
907 current = getInt(current) | |
908 | |
909 pageinfo['current'] = current | |
910 rows = int(rows or self.thumbrows) | |
911 pageinfo['rows'] = rows | |
912 cols = int(cols or self.thumbcols) | |
913 pageinfo['cols'] = cols | |
914 grpsize = cols * rows | |
915 pageinfo['groupsize'] = grpsize | |
916 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) | |
917 # int(current / grpsize) * grpsize +1)) | |
918 pageinfo['start'] = start | |
919 pageinfo['end'] = start + grpsize | |
920 if (docinfo is not None) and ('numPages' in docinfo): | |
921 np = int(docinfo['numPages']) | |
922 pageinfo['end'] = min(pageinfo['end'], np) | |
923 pageinfo['numgroups'] = int(np / grpsize) | |
924 if np % grpsize > 0: | |
925 pageinfo['numgroups'] += 1 | |
926 pageinfo['viewMode'] = viewMode | |
927 pageinfo['tocMode'] = tocMode | |
928 pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg') | |
929 #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1') | |
930 pageinfo['query'] = self.REQUEST.get('query','') | |
931 pageinfo['queryType'] = self.REQUEST.get('queryType','') | |
932 pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') | |
933 pageinfo['textPN'] = self.REQUEST.get('textPN','1') | |
934 pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','') | |
935 pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') | |
936 pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10') | |
937 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') | |
938 toc = int (pageinfo['tocPN']) | |
939 pageinfo['textPages'] =int (toc) | |
940 | |
941 if 'tocSize_%s'%tocMode in docinfo: | |
942 tocSize = int(docinfo['tocSize_%s'%tocMode]) | |
943 tocPageSize = int(pageinfo['tocPageSize']) | |
944 # cached toc | |
945 if tocSize%tocPageSize>0: | |
946 tocPages=tocSize/tocPageSize+1 | |
947 else: | |
948 tocPages=tocSize/tocPageSize | |
949 pageinfo['tocPN'] = min (tocPages,toc) | |
950 pageinfo['searchPN'] =self.REQUEST.get('searchPN','1') | |
951 pageinfo['sn'] =self.REQUEST.get('sn','') | |
952 return pageinfo | |
953 | |
954 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): | |
955 """init document viewer""" | |
956 self.title=title | |
957 self.digilibBaseUrl = digilibBaseUrl | |
958 self.thumbrows = thumbrows | |
959 self.thumbcols = thumbcols | |
960 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] | |
961 if RESPONSE is not None: | |
962 RESPONSE.redirect('manage_main') | |
963 | |
964 def manage_AddDocumentViewerForm(self): | |
965 """add the viewer form""" | |
966 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) | |
967 return pt() | |
968 | |
969 def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None): | |
970 """add the viewer""" | |
971 newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName) | |
972 self._setObject(id,newObj) | |
973 | |
974 if RESPONSE is not None: | |
975 RESPONSE.redirect('manage_main') | |
976 | |
977 ## DocumentViewerTemplate class | |
978 class DocumentViewerTemplate(ZopePageTemplate): | |
979 """Template for document viewer""" | |
980 meta_type="DocumentViewer Template" | |
981 | |
982 | |
983 def manage_addDocumentViewerTemplateForm(self): | |
984 """Form for adding""" | |
985 pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) | |
986 return pt() | |
987 | |
988 def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, | |
989 REQUEST=None, submit=None): | |
990 "Add a Page Template with optional file content." | |
991 | |
992 self._setObject(id, DocumentViewerTemplate(id)) | |
993 ob = getattr(self, id) | |
994 txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read() | |
995 logging.info("txt %s:"%txt) | |
996 ob.pt_edit(txt,"text/html") | |
997 if title: | |
998 ob.pt_setTitle(title) | |
999 try: | |
1000 u = self.DestinationURL() | |
1001 except AttributeError: | |
1002 u = REQUEST['URL1'] | |
1003 | |
1004 u = "%s/%s" % (u, urllib.quote(id)) | |
1005 REQUEST.RESPONSE.redirect(u+'/manage_main') | |
1006 return '' | |
1007 | |
1008 | |
1009 |