1: from OFS.Folder import Folder
2: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
3: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
4: from AccessControl import ClassSecurityInfo
5: from AccessControl import getSecurityManager
6: from Globals import package_home
7:
8: #from Ft.Xml import EMPTY_NAMESPACE, Parse
9: #import Ft.Xml.Domlette
10:
11: import xml.etree.ElementTree as ET
12:
13: import os.path
14: import sys
15: import urllib
16: import logging
17: import math
18: import urlparse
19: import re
20: import string
21:
22: from SrvTxtUtils import getInt, getText, getHttpData
23:
24: def logger(txt,method,txt2):
25: """logging"""
26: logging.info(txt+ txt2)
27:
28:
29: def serializeNode(node, encoding="utf-8"):
30: """returns a string containing node as XML"""
31: s = ET.tostring(node)
32:
33: # 4Suite:
34: # stream = cStringIO.StringIO()
35: # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
36: # s = stream.getvalue()
37: # stream.close()
38: return s
39:
40: def browserCheck(self):
41: """check the browsers request to find out the browser type"""
42: bt = {}
43: ua = self.REQUEST.get_header("HTTP_USER_AGENT")
44: bt['ua'] = ua
45: bt['isIE'] = False
46: bt['isN4'] = False
47: bt['versFirefox']=""
48: bt['versIE']=""
49: bt['versSafariChrome']=""
50: bt['versOpera']=""
51:
52: if string.find(ua, 'MSIE') > -1:
53: bt['isIE'] = True
54: else:
55: bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
56: # Safari oder Chrome identification
57: try:
58: nav = ua[string.find(ua, '('):]
59: nav1=ua[string.find(ua,')'):]
60: nav2=nav1[string.find(nav1,'('):]
61: nav3=nav2[string.find(nav2,')'):]
62: ie = string.split(nav, "; ")[1]
63: ie1 =string.split(nav1, " ")[2]
64: ie2 =string.split(nav3, " ")[1]
65: ie3 =string.split(nav3, " ")[2]
66: if string.find(ie3, "Safari") >-1:
67: bt['versSafariChrome']=string.split(ie2, "/")[1]
68: except: pass
69: # IE identification
70: try:
71: nav = ua[string.find(ua, '('):]
72: ie = string.split(nav, "; ")[1]
73: if string.find(ie, "MSIE") > -1:
74: bt['versIE'] = string.split(ie, " ")[1]
75: except:pass
76: # Firefox identification
77: try:
78: nav = ua[string.find(ua, '('):]
79: nav1=ua[string.find(ua,')'):]
80: if string.find(ie1, "Firefox") >-1:
81: nav5= string.split(ie1, "/")[1]
82: logging.debug("FIREFOX: %s"%(nav5))
83: bt['versFirefox']=nav5[0:3]
84: except:pass
85: #Opera identification
86: try:
87: if string.find(ua,"Opera") >-1:
88: nav = ua[string.find(ua, '('):]
89: nav1=nav[string.find(nav,')'):]
90: bt['versOpera']=string.split(nav1,"/")[2]
91: except:pass
92:
93: bt['isMac'] = string.find(ua, 'Macintosh') > -1
94: bt['isWin'] = string.find(ua, 'Windows') > -1
95: bt['isIEWin'] = bt['isIE'] and bt['isWin']
96: bt['isIEMac'] = bt['isIE'] and bt['isMac']
97: bt['staticHTML'] = False
98:
99: return bt
100:
101: def getParentPath(path, cnt=1):
102: """returns pathname shortened by cnt"""
103: # make sure path doesn't end with /
104: path = path.rstrip('/')
105: # split by /, shorten, and reassemble
106: return '/'.join(path.split('/')[0:-cnt])
107:
108:
109: ##
110: ## documentViewer class
111: ##
112: class documentViewer(Folder):
113: """document viewer"""
114: meta_type="Document viewer"
115:
116: security=ClassSecurityInfo()
117: manage_options=Folder.manage_options+(
118: {'label':'main config','action':'changeDocumentViewerForm'},
119: )
120:
121: metadataService = None
122: """MetaDataFolder instance"""
123:
124: # templates and forms
125: viewer_text = PageTemplateFile('zpt/viewer_text', globals())
126: viewer_main = PageTemplateFile('zpt/viewer_main', globals())
127: toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
128: toc_text = PageTemplateFile('zpt/toc_text', globals())
129: toc_figures = PageTemplateFile('zpt/toc_figures', globals())
130: page_main_images = PageTemplateFile('zpt/page_main_images', globals())
131: page_main_double = PageTemplateFile('zpt/page_main_double', globals())
132: page_main_text = PageTemplateFile('zpt/page_main_text', globals())
133: page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
134: page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
135: page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
136: page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
137: head_main = PageTemplateFile('zpt/head_main', globals())
138: docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
139: info_xml = PageTemplateFile('zpt/info_xml', globals())
140:
141:
142: thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
143:
144:
145: def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
146: """init document viewer"""
147: self.id=id
148: self.title=title
149: self.thumbcols = thumbcols
150: self.thumbrows = thumbrows
151: # authgroups is list of authorized groups (delimited by ,)
152: self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
153: # create template folder so we can always use template.something
154:
155: templateFolder = Folder('template')
156: #self['template'] = templateFolder # Zope-2.12 style
157: self._setObject('template',templateFolder) # old style
158: try:
159: import MpdlXmlTextServer
160: textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
161: #templateFolder['fulltextclient'] = xmlRpcClient
162: templateFolder._setObject('fulltextclient',textServer)
163: except Exception, e:
164: logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
165:
166: try:
167: from Products.zogiLib.zogiLib import zogiLib
168: zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
169: #templateFolder['zogilib'] = zogilib
170: templateFolder._setObject('zogilib',zogilib)
171: except Exception, e:
172: logging.error("Unable to create zogiLib for zogilib: "+str(e))
173:
174: try:
175: # assume MetaDataFolder instance is called metadata
176: self.metadataService = getattr(self, 'metadata')
177: except Exception, e:
178: logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
179:
180: if digilibBaseUrl is not None:
181: self.digilibBaseUrl = digilibBaseUrl
182:
183:
184: # proxy text server methods to fulltextclient
185: def getTextPage(self, **args):
186: """get page"""
187: return self.template.fulltextclient.getTextPage(**args)
188:
189: def getOrigPages(self, **args):
190: """get page"""
191: return self.template.fulltextclient.getOrigPages(**args)
192:
193: def getOrigPagesNorm(self, **args):
194: """get page"""
195: return self.template.fulltextclient.getOrigPagesNorm(**args)
196:
197: def getQuery(self, **args):
198: """get query in search"""
199: return self.template.fulltextclient.getQuery(**args)
200:
201: def getSearch(self, **args):
202: """get search"""
203: return self.template.fulltextclient.getSearch(**args)
204:
205: def getGisPlaces(self, **args):
206: """get gis places"""
207: return self.template.fulltextclient.getGisPlaces(**args)
208:
209: def getAllGisPlaces(self, **args):
210: """get all gis places """
211: return self.template.fulltextclient.getAllGisPlaces(**args)
212:
213: def getWordInfo(self, **args):
214: """get translate"""
215: return self.template.fulltextclient.getWordInfo(**args)
216:
217: def getLemma(self, **args):
218: """get lemma"""
219: return self.template.fulltextclient.getLemma(**args)
220:
221: def getLemmaQuery(self, **args):
222: """get query"""
223: return self.template.fulltextclient.getLemmaQuery(**args)
224:
225: def getLex(self, **args):
226: """get lex"""
227: return self.template.fulltextclient.getLex(**args)
228:
229: def getToc(self, **args):
230: """get toc"""
231: return self.template.fulltextclient.getToc(**args)
232:
233: def getTocPage(self, **args):
234: """get tocpage"""
235: return self.template.fulltextclient.getTocPage(**args)
236:
237:
238: security.declareProtected('View','thumbs_rss')
239: def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
240: '''
241: view it
242: @param mode: defines how to access the document behind url
243: @param url: url which contains display information
244: @param viewMode: if images display images, if text display text, default is images (text,images or auto)
245:
246: '''
247: logging.debug("HHHHHHHHHHHHHH:load the rss")
248: logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
249:
250: if not hasattr(self, 'template'):
251: # create template folder if it doesn't exist
252: self.manage_addFolder('template')
253:
254: if not self.digilibBaseUrl:
255: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
256:
257: docinfo = self.getDocinfo(mode=mode,url=url)
258: #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
259: pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
260: ''' ZDES '''
261: pt = getattr(self.template, 'thumbs_main_rss')
262:
263: if viewMode=="auto": # automodus gewaehlt
264: if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
265: viewMode="text"
266: else:
267: viewMode="images"
268:
269: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
270:
271:
272: security.declareProtected('View','index_html')
273: def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1):
274: """
275: view page
276: @param url: url which contains display information
277: @param mode: defines how to access the document behind url
278: @param viewMode: 'images': display images, 'text': display text, default is 'auto'
279: @param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text'
280: @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
281: """
282:
283: logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn))
284:
285: if not hasattr(self, 'template'):
286: # this won't work
287: logging.error("template folder missing!")
288: return "ERROR: template folder missing!"
289:
290: if not getattr(self, 'digilibBaseUrl', None):
291: self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
292:
293: docinfo = self.getDocinfo(mode=mode,url=url)
294:
295: if tocMode != "thumbs":
296: # get table of contents
297: docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
298:
299: # auto viewMode: text if there is a text else images
300: if viewMode=="auto":
301: if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
302: viewMode = "text"
303: viewType = "dict"
304: else:
305: viewMode = "images"
306:
307: elif viewMode == "text_dict":
308: # legacy fix
309: viewMode = "text"
310: viewType = "dict"
311:
312: # stringify viewType
313: if isinstance(viewType, list):
314: logging.debug("index_html: viewType is list:%s"%viewType)
315: viewType = ','.join([t for t in viewType if t])
316:
317: pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode)
318:
319: # get template /template/viewer_$viewMode
320: pt = getattr(self.template, 'viewer_%s'%viewMode, None)
321: if pt is None:
322: logging.error("No template for viewMode=%s!"%viewMode)
323: # TODO: error page?
324: return "No template for viewMode=%s!"%viewMode
325:
326: # and execute with parameters
327: return pt(docinfo=docinfo, pageinfo=pageinfo)
328:
329: def generateMarks(self,mk):
330: ret=""
331: if mk is None:
332: return ""
333: if not isinstance(mk, list):
334: mk=[mk]
335: for m in mk:
336: ret+="mk=%s"%m
337: return ret
338:
339:
340: def getBrowser(self):
341: """getBrowser the version of browser """
342: bt = browserCheck(self)
343: logging.debug("BROWSER VERSION: %s"%(bt))
344: return bt
345:
346: def findDigilibUrl(self):
347: """try to get the digilib URL from zogilib"""
348: url = self.template.zogilib.getDLBaseUrl()
349: return url
350:
351: def getDocumentViewerURL(self):
352: """returns the URL of this instance"""
353: return self.absolute_url()
354:
355: def getStyle(self, idx, selected, style=""):
356: """returns a string with the given style and append 'sel' if path == selected."""
357: #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
358: if idx == selected:
359: return style + 'sel'
360: else:
361: return style
362:
363: def getParams(self, param=None, val=None, params=None, duplicates=None):
364: """returns dict with URL parameters.
365:
366: Takes URL parameters and additionally param=val or dict params.
367: Deletes key if value is None."""
368: # copy existing request params
369: newParams=self.REQUEST.form.copy()
370: # change single param
371: if param is not None:
372: if val is None:
373: if newParams.has_key(param):
374: del newParams[param]
375: else:
376: newParams[param] = str(val)
377:
378: # change more params
379: if params is not None:
380: for (k, v) in params.items():
381: if v is None:
382: # val=None removes param
383: if newParams.has_key(k):
384: del newParams[k]
385:
386: else:
387: newParams[k] = v
388:
389: if duplicates:
390: # eliminate lists (coming from duplicate keys)
391: for (k,v) in newParams.items():
392: if isinstance(v, list):
393: if duplicates == 'comma':
394: # make comma-separated list of non-empty entries
395: newParams[k] = ','.join([t for t in v if t])
396: elif duplicates == 'first':
397: # take first non-empty entry
398: newParams[k] = [t for t in v if t][0]
399:
400: return newParams
401:
402: def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'):
403: """returns URL to documentviewer with parameter param set to val or from dict params"""
404: urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
405: # quote values and assemble into query string (not escaping '/')
406: ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
407: if baseUrl is None:
408: baseUrl = self.getDocumentViewerURL()
409:
410: url = "%s?%s"%(baseUrl, ps)
411: return url
412:
413: def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'):
414: """link to documentviewer with parameter param set to val"""
415: return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates)
416:
417:
418: def getInfo_xml(self,url,mode):
419: """returns info about the document as XML"""
420: if not self.digilibBaseUrl:
421: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
422:
423: docinfo = self.getDocinfo(mode=mode,url=url)
424: pt = getattr(self.template, 'info_xml')
425: return pt(docinfo=docinfo)
426:
427: def isAccessible(self, docinfo):
428: """returns if access to the resource is granted"""
429: access = docinfo.get('accessType', None)
430: logging.debug("documentViewer (accessOK) access type %s"%access)
431: if access == 'free':
432: logging.debug("documentViewer (accessOK) access is free")
433: return True
434:
435: elif access is None or access in self.authgroups:
436: # only local access -- only logged in users
437: user = getSecurityManager().getUser()
438: logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
439: if user is not None:
440: #print "user: ", user
441: return (user.getUserName() != "Anonymous User")
442: else:
443: return False
444:
445: logging.error("documentViewer (accessOK) unknown access type %s"%access)
446: return False
447:
448:
449:
450: def getDocinfo(self, mode, url):
451: """returns docinfo depending on mode"""
452: logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
453: # look for cached docinfo in session
454: if self.REQUEST.SESSION.has_key('docinfo'):
455: docinfo = self.REQUEST.SESSION['docinfo']
456: # check if its still current
457: if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
458: logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
459: return docinfo
460:
461: # new docinfo
462: docinfo = {'mode': mode, 'url': url}
463: # add self url
464: docinfo['viewerUrl'] = self.getDocumentViewerURL()
465: docinfo['digilibBaseUrl'] = self.digilibBaseUrl
466: # get index.meta DOM
467: docUrl = None
468: metaDom = None
469: if mode=="texttool":
470: # url points to document dir or index.meta
471: metaDom = self.metadataService.getDomFromPathOrUrl(url)
472: docUrl = url.replace('/index.meta', '')
473: if metaDom is None:
474: raise IOError("Unable to find index.meta for mode=texttool!")
475:
476: elif mode=="imagepath":
477: # url points to folder with images, index.meta optional
478: # asssume index.meta in parent dir
479: docUrl = getParentPath(url)
480: metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
481:
482: elif mode=="filepath":
483: # url points to image file, index.meta optional
484: # asssume index.meta is two path segments up
485: docUrl = getParentPath(url, 2)
486: metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
487:
488: else:
489: logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
490: raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
491:
492: docinfo['documentUrl'] = docUrl
493: # process index.meta contents
494: if metaDom is not None and metaDom.tag == 'resource':
495: # document directory name and path
496: resource = self.metadataService.getResourceData(dom=metaDom)
497: if resource:
498: docinfo = self.getDocinfoFromResource(docinfo, resource)
499:
500: # texttool info
501: texttool = self.metadataService.getTexttoolData(dom=metaDom)
502: if texttool:
503: docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
504:
505: # bib info
506: bib = self.metadataService.getBibData(dom=metaDom)
507: if bib:
508: docinfo = self.getDocinfoFromBib(docinfo, bib)
509: else:
510: # no bib - try info.xml
511: docinfo = self.getDocinfoFromPresentationInfoXml(docinfo)
512:
513: # auth info
514: access = self.metadataService.getAccessData(dom=metaDom)
515: if access:
516: docinfo = self.getDocinfoFromAccess(docinfo, access)
517:
518: # attribution info
519: attribution = self.metadataService.getAttributionData(dom=metaDom)
520: if attribution:
521: logging.debug("getDocinfo: attribution=%s"%repr(attribution))
522: docinfo['attribution'] = attribution
523: #docinfo = self.getDocinfoFromAccess(docinfo, access)
524:
525: # copyright info
526: copyright = self.metadataService.getCopyrightData(dom=metaDom)
527: if copyright:
528: logging.debug("getDocinfo: copyright=%s"%repr(copyright))
529: docinfo['copyright'] = copyright
530: #docinfo = self.getDocinfoFromAccess(docinfo, access)
531:
532: # image path
533: if mode != 'texttool':
534: # override image path from texttool with url
535: docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1)
536:
537:
538:
539: # number of images from digilib
540: if docinfo.get('imagePath', None):
541: docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
542: docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
543:
544: logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
545: #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
546: # store in session
547: self.REQUEST.SESSION['docinfo'] = docinfo
548: return docinfo
549:
550: def getDocinfoFromResource(self, docinfo, resource):
551: """reads contents of resource element into docinfo"""
552: docName = resource.get('name', None)
553: docinfo['documentName'] = docName
554: docPath = resource.get('archive-path', None)
555: if docPath:
556: # clean up document path
557: if docPath[0] != '/':
558: docPath = '/' + docPath
559:
560: if docName and (not docPath.endswith(docName)):
561: docPath += "/" + docName
562:
563: else:
564: # use docUrl as docPath
565: docUrl = docinfo['documentURL']
566: if not docUrl.startswith('http:'):
567: docPath = docUrl
568: if docPath:
569: # fix URLs starting with /mpiwg/online
570: docPath = docPath.replace('/mpiwg/online', '', 1)
571:
572: docinfo['documentPath'] = docPath
573: return docinfo
574:
575: def getDocinfoFromTexttool(self, docinfo, texttool):
576: """reads contents of texttool element into docinfo"""
577: # image dir
578: imageDir = texttool.get('image', None)
579: docPath = docinfo.get('documentPath', None)
580: if imageDir and docPath:
581: #print "image: ", imageDir, " archivepath: ", archivePath
582: imageDir = os.path.join(docPath, imageDir)
583: imageDir = imageDir.replace('/mpiwg/online', '', 1)
584: docinfo['imagePath'] = imageDir
585:
586: # old style text URL
587: textUrl = texttool.get('text', None)
588: if textUrl and docPath:
589: if urlparse.urlparse(textUrl)[0] == "": #keine url
590: textUrl = os.path.join(docPath, textUrl)
591:
592: docinfo['textURL'] = textUrl
593:
594: # new style text-url-path
595: textUrl = texttool.get('text-url-path', None)
596: if textUrl:
597: docinfo['textURLPath'] = textUrl
598:
599: # page flow
600: docinfo['pageFlow'] = texttool.get('page-flow', 'ltr')
601:
602: # odd pages are left
603: docinfo['oddPage'] = texttool.get('odd-scan-position', 'left')
604:
605: # number of title page (0: not defined)
606: docinfo['titlePage'] = texttool.get('title-scan-no', 0)
607:
608: # old presentation stuff
609: presentation = texttool.get('presentation', None)
610: if presentation and docPath:
611: if presentation.startswith('http:'):
612: docinfo['presentationUrl'] = presentation
613: else:
614: docinfo['presentationUrl'] = os.path.join(docPath, presentation)
615:
616:
617: return docinfo
618:
619: def getDocinfoFromBib(self, docinfo, bib):
620: """reads contents of bib element into docinfo"""
621: logging.debug("getDocinfoFromBib bib=%s"%repr(bib))
622: # put all raw bib fields in dict "bib"
623: docinfo['bib'] = bib
624: bibtype = bib.get('@type', None)
625: docinfo['bibType'] = bibtype
626: # also store DC metadata for convenience
627: dc = self.metadataService.getDCMappedData(bib)
628: docinfo['creator'] = dc.get('creator',None)
629: docinfo['title'] = dc.get('title',None)
630: docinfo['date'] = dc.get('date',None)
631: return docinfo
632:
633: def getDocinfoFromAccess(self, docinfo, acc):
634: """reads contents of access element into docinfo"""
635: #TODO: also read resource type
636: logging.debug("getDocinfoFromAccess acc=%s"%repr(acc))
637: try:
638: acctype = acc['@attr']['type']
639: if acctype:
640: access=acctype
641: if access in ['group', 'institution']:
642: access = acc['name'].lower()
643:
644: docinfo['accessType'] = access
645:
646: except:
647: pass
648:
649: return docinfo
650:
651: def getDocinfoFromDigilib(self, docinfo, path):
652: infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
653: # fetch data
654: txt = getHttpData(infoUrl)
655: if not txt:
656: logging.error("Unable to get dir-info from %s"%(infoUrl))
657: return docinfo
658:
659: dom = ET.fromstring(txt)
660: size = getText(dom.find("size"))
661: logging.debug("getDocinfoFromDigilib: size=%s"%size)
662: if size:
663: docinfo['numPages'] = int(size)
664: else:
665: docinfo['numPages'] = 0
666:
667: # TODO: produce and keep list of image names and numbers
668: return docinfo
669:
670:
671: def getDocinfoFromPresentationInfoXml(self,docinfo):
672: """gets DC-like bibliographical information from the presentation entry in texttools"""
673: url = docinfo.get('presentationUrl', None)
674: if not url:
675: logging.error("getDocinfoFromPresentation: no URL!")
676: return docinfo
677:
678: dom = None
679: metaUrl = None
680: if url.startswith("http://"):
681: # real URL
682: metaUrl = url
683: else:
684: # online path
685:
686: server=self.digilibBaseUrl+"/servlet/Texter?fn="
687: metaUrl=server+url
688:
689: txt=getHttpData(metaUrl)
690: if txt is None:
691: logging.error("Unable to read info.xml from %s"%(url))
692: return docinfo
693:
694: dom = ET.fromstring(txt)
695: docinfo['creator']=getText(dom.find(".//author"))
696: docinfo['title']=getText(dom.find(".//title"))
697: docinfo['date']=getText(dom.find(".//date"))
698: return docinfo
699:
700:
701: def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None):
702: """returns pageinfo with the given parameters"""
703: logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewType=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewType,tocMode))
704: pageinfo = {}
705: pageinfo['viewMode'] = viewMode
706: pageinfo['viewType'] = viewType
707: pageinfo['tocMode'] = tocMode
708:
709: current = getInt(current)
710: pageinfo['current'] = current
711: rows = int(rows or self.thumbrows)
712: pageinfo['rows'] = rows
713: cols = int(cols or self.thumbcols)
714: pageinfo['cols'] = cols
715: grpsize = cols * rows
716: pageinfo['groupsize'] = grpsize
717: # is start is empty use one around current
718: start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
719: # int(current / grpsize) * grpsize +1))
720: pageinfo['start'] = start
721: pn = self.REQUEST.get('pn','1')
722: pageinfo['pn'] = pn
723: np = int(docinfo.get('numPages', 0))
724: if np == 0:
725: # numPages unknown - maybe we can get it from text page
726: if docinfo.get('textURLPath', None):
727: # cache text page as well
728: pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
729: np = int(docinfo.get('numPages', 0))
730:
731: pageinfo['numgroups'] = int(np / grpsize)
732: if np % grpsize > 0:
733: pageinfo['numgroups'] += 1
734:
735: pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl'
736: oddScanLeft = docinfo.get('oddPage', 'left') != 'right'
737: # add zeroth page for two columns
738: pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft))
739: pageinfo['pageZero'] = pageZero
740: pageinfo['pageList'] = self.getPageList(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
741:
742: pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
743: pageinfo['query'] = self.REQUEST.get('query','')
744: pageinfo['queryType'] = self.REQUEST.get('queryType','')
745: pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
746: pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
747: pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
748: pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10))
749: pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
750: pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1'))
751:
752: # limit tocPN
753: if 'tocSize_%s'%tocMode in docinfo:
754: tocSize = docinfo['tocSize_%s'%tocMode]
755: tocPageSize = pageinfo['tocPageSize']
756: # cached toc
757: if tocSize%tocPageSize>0:
758: tocPages=tocSize/tocPageSize+1
759: else:
760: tocPages=tocSize/tocPageSize
761:
762: pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
763:
764: return pageinfo
765:
766:
767: def getPageList(self, start=None, rows=None, cols=None, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
768: """returns array of page informations for one screenfull of thumbnails"""
769: if maxIdx == 0:
770: maxIdx = start + rows * cols
771:
772: pages = []
773: if pageZero and start == 1:
774: # correct beginning
775: idx = 0
776: else:
777: idx = start
778:
779: for r in range(rows):
780: row = []
781: for c in range(cols):
782: if idx < minIdx or idx > maxIdx:
783: page = {'idx':None}
784: else:
785: page = {'idx':idx}
786:
787: idx += 1
788: if pageFlowLtr:
789: row.append(page)
790: else:
791: row.insert(0, page)
792:
793: pages.append(row)
794:
795: logging.debug("getPageList returns=%s"%(pages))
796: return pages
797:
798:
799: security.declareProtected('View management screens','changeDocumentViewerForm')
800: changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
801:
802: def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
803: """init document viewer"""
804: self.title=title
805: self.digilibBaseUrl = digilibBaseUrl
806: self.thumbrows = thumbrows
807: self.thumbcols = thumbcols
808: self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
809: try:
810: # assume MetaDataFolder instance is called metadata
811: self.metadataService = getattr(self, 'metadata')
812: except Exception, e:
813: logging.error("Unable to find MetaDataFolder 'metadata': "+str(e))
814:
815: if RESPONSE is not None:
816: RESPONSE.redirect('manage_main')
817:
818: def manage_AddDocumentViewerForm(self):
819: """add the viewer form"""
820: pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
821: return pt()
822:
823: def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
824: """add the viewer"""
825: newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
826: self._setObject(id,newObj)
827:
828: if RESPONSE is not None:
829: RESPONSE.redirect('manage_main')
830:
831: ## DocumentViewerTemplate class
832: class DocumentViewerTemplate(ZopePageTemplate):
833: """Template for document viewer"""
834: meta_type="DocumentViewer Template"
835:
836:
837: def manage_addDocumentViewerTemplateForm(self):
838: """Form for adding"""
839: pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
840: return pt()
841:
842: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
843: REQUEST=None, submit=None):
844: "Add a Page Template with optional file content."
845:
846: self._setObject(id, DocumentViewerTemplate(id))
847: ob = getattr(self, id)
848: txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
849: logging.info("txt %s:"%txt)
850: ob.pt_edit(txt,"text/html")
851: if title:
852: ob.pt_setTitle(title)
853: try:
854: u = self.DestinationURL()
855: except AttributeError:
856: u = REQUEST['URL1']
857:
858: u = "%s/%s" % (u, urllib.quote(id))
859: REQUEST.RESPONSE.redirect(u+'/manage_main')
860: return ''
861:
862:
863:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>