documentViewer/documentViewer.py - view

File: [Repository] / documentViewer / documentViewer.py
Revision 1.175.2.27: download - view: text, annotated - select for diffs - revision graph
Tue Aug 16 16:27:08 2011 UTC (12 years, 10 months ago) by casties
Branches: elementtree
Diff to: branchpoint 1.175: preferred, unified

more new template stuff. more batching methods in documentViewer.

1: from OFS.Folder import Folder 2: from OFS.Image import File 3: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 4: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 5: from AccessControl import ClassSecurityInfo 6: from AccessControl import getSecurityManager 7: from Globals import package_home 8: 9: #from Ft.Xml import EMPTY_NAMESPACE, Parse 10: #import Ft.Xml.Domlette 11: 12: import xml.etree.ElementTree as ET 13: 14: import os.path 15: import sys 16: import urllib 17: import logging 18: import math 19: import urlparse 20: import re 21: import string 22: 23: from SrvTxtUtils import getInt, getText, getHttpData 24: 25: def logger(txt,method,txt2): 26: """logging""" 27: logging.info(txt+ txt2) 28: 29: 30: def serializeNode(node, encoding="utf-8"): 31: """returns a string containing node as XML""" 32: s = ET.tostring(node) 33: 34: # 4Suite: 35: # stream = cStringIO.StringIO() 36: # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) 37: # s = stream.getvalue() 38: # stream.close() 39: return s 40: 41: def browserCheck(self): 42: """check the browsers request to find out the browser type""" 43: bt = {} 44: ua = self.REQUEST.get_header("HTTP_USER_AGENT") 45: bt['ua'] = ua 46: bt['isIE'] = False 47: bt['isN4'] = False 48: bt['versFirefox']="" 49: bt['versIE']="" 50: bt['versSafariChrome']="" 51: bt['versOpera']="" 52: 53: if string.find(ua, 'MSIE') > -1: 54: bt['isIE'] = True 55: else: 56: bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1) 57: # Safari oder Chrome identification 58: try: 59: nav = ua[string.find(ua, '('):] 60: nav1=ua[string.find(ua,')'):] 61: nav2=nav1[string.find(nav1,'('):] 62: nav3=nav2[string.find(nav2,')'):] 63: ie = string.split(nav, "; ")[1] 64: ie1 =string.split(nav1, " ")[2] 65: ie2 =string.split(nav3, " ")[1] 66: ie3 =string.split(nav3, " ")[2] 67: if string.find(ie3, "Safari") >-1: 68: bt['versSafariChrome']=string.split(ie2, "/")[1] 69: except: pass 70: # IE identification 71: try: 72: nav = ua[string.find(ua, '('):] 73: ie = string.split(nav, "; ")[1] 74: if string.find(ie, "MSIE") > -1: 75: bt['versIE'] = string.split(ie, " ")[1] 76: except:pass 77: # Firefox identification 78: try: 79: nav = ua[string.find(ua, '('):] 80: nav1=ua[string.find(ua,')'):] 81: if string.find(ie1, "Firefox") >-1: 82: nav5= string.split(ie1, "/")[1] 83: logging.debug("FIREFOX: %s"%(nav5)) 84: bt['versFirefox']=nav5[0:3] 85: except:pass 86: #Opera identification 87: try: 88: if string.find(ua,"Opera") >-1: 89: nav = ua[string.find(ua, '('):] 90: nav1=nav[string.find(nav,')'):] 91: bt['versOpera']=string.split(nav1,"/")[2] 92: except:pass 93: 94: bt['isMac'] = string.find(ua, 'Macintosh') > -1 95: bt['isWin'] = string.find(ua, 'Windows') > -1 96: bt['isIEWin'] = bt['isIE'] and bt['isWin'] 97: bt['isIEMac'] = bt['isIE'] and bt['isMac'] 98: bt['staticHTML'] = False 99: 100: return bt 101: 102: def getParentPath(path, cnt=1): 103: """returns pathname shortened by cnt""" 104: # make sure path doesn't end with / 105: path = path.rstrip('/') 106: # split by /, shorten, and reassemble 107: return '/'.join(path.split('/')[0:-cnt]) 108: 109: 110: ## 111: ## documentViewer class 112: ## 113: class documentViewer(Folder): 114: """document viewer""" 115: meta_type="Document viewer" 116: 117: security=ClassSecurityInfo() 118: manage_options=Folder.manage_options+( 119: {'label':'main config','action':'changeDocumentViewerForm'}, 120: ) 121: 122: metadataService = None 123: """MetaDataFolder instance""" 124: 125: # templates and forms 126: viewer_text = PageTemplateFile('zpt/viewer_text', globals()) 127: viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 128: toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) 129: toc_text = PageTemplateFile('zpt/toc_text', globals()) 130: toc_figures = PageTemplateFile('zpt/toc_figures', globals()) 131: page_main_images = PageTemplateFile('zpt/page_main_images', globals()) 132: page_main_double = PageTemplateFile('zpt/page_main_double', globals()) 133: page_main_text = PageTemplateFile('zpt/page_main_text', globals()) 134: page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) 135: page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals()) 136: page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) 137: page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals()) 138: head_main = PageTemplateFile('zpt/head_main', globals()) 139: info_xml = PageTemplateFile('zpt/info_xml', globals()) 140: # TODO: can this be nicer? 141: docuviewer_css = File('docuviewer_css','',open(os.path.join(package_home(globals()),'css/docuviewer.css')), content_type='text/css') 142: 143: 144: thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) 145: 146: 147: def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"): 148: """init document viewer""" 149: self.id=id 150: self.title=title 151: self.thumbcols = thumbcols 152: self.thumbrows = thumbrows 153: # authgroups is list of authorized groups (delimited by ,) 154: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 155: # create template folder so we can always use template.something 156: 157: templateFolder = Folder('template') 158: #self['template'] = templateFolder # Zope-2.12 style 159: self._setObject('template',templateFolder) # old style 160: try: 161: import MpdlXmlTextServer 162: textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName) 163: #templateFolder['fulltextclient'] = xmlRpcClient 164: templateFolder._setObject('fulltextclient',textServer) 165: except Exception, e: 166: logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e)) 167: 168: try: 169: from Products.zogiLib.zogiLib import zogiLib 170: zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") 171: #templateFolder['zogilib'] = zogilib 172: templateFolder._setObject('zogilib',zogilib) 173: except Exception, e: 174: logging.error("Unable to create zogiLib for zogilib: "+str(e)) 175: 176: try: 177: # assume MetaDataFolder instance is called metadata 178: self.metadataService = getattr(self, 'metadata') 179: except Exception, e: 180: logging.error("Unable to find MetaDataFolder 'metadata': "+str(e)) 181: 182: if digilibBaseUrl is not None: 183: self.digilibBaseUrl = digilibBaseUrl 184: 185: 186: # proxy text server methods to fulltextclient 187: def getTextPage(self, **args): 188: """get page""" 189: return self.template.fulltextclient.getTextPage(**args) 190: 191: def getOrigPages(self, **args): 192: """get page""" 193: return self.template.fulltextclient.getOrigPages(**args) 194: 195: def getOrigPagesNorm(self, **args): 196: """get page""" 197: return self.template.fulltextclient.getOrigPagesNorm(**args) 198: 199: def getQuery(self, **args): 200: """get query in search""" 201: return self.template.fulltextclient.getQuery(**args) 202: 203: def getSearch(self, **args): 204: """get search""" 205: return self.template.fulltextclient.getSearch(**args) 206: 207: def getGisPlaces(self, **args): 208: """get gis places""" 209: return self.template.fulltextclient.getGisPlaces(**args) 210: 211: def getAllGisPlaces(self, **args): 212: """get all gis places """ 213: return self.template.fulltextclient.getAllGisPlaces(**args) 214: 215: def getWordInfo(self, **args): 216: """get translate""" 217: return self.template.fulltextclient.getWordInfo(**args) 218: 219: def getLemma(self, **args): 220: """get lemma""" 221: return self.template.fulltextclient.getLemma(**args) 222: 223: def getLemmaQuery(self, **args): 224: """get query""" 225: return self.template.fulltextclient.getLemmaQuery(**args) 226: 227: def getLex(self, **args): 228: """get lex""" 229: return self.template.fulltextclient.getLex(**args) 230: 231: def getToc(self, **args): 232: """get toc""" 233: return self.template.fulltextclient.getToc(**args) 234: 235: def getTocPage(self, **args): 236: """get tocpage""" 237: return self.template.fulltextclient.getTocPage(**args) 238: 239: 240: security.declareProtected('View','thumbs_rss') 241: def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): 242: ''' 243: view it 244: @param mode: defines how to access the document behind url 245: @param url: url which contains display information 246: @param viewMode: if images display images, if text display text, default is images (text,images or auto) 247: 248: ''' 249: logging.debug("HHHHHHHHHHHHHH:load the rss") 250: logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 251: 252: if not hasattr(self, 'template'): 253: # create template folder if it doesn't exist 254: self.manage_addFolder('template') 255: 256: if not self.digilibBaseUrl: 257: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 258: 259: docinfo = self.getDocinfo(mode=mode,url=url) 260: #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) 261: pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo) 262: ''' ZDES ''' 263: pt = getattr(self.template, 'thumbs_main_rss') 264: 265: if viewMode=="auto": # automodus gewaehlt 266: if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert 267: viewMode="text" 268: else: 269: viewMode="images" 270: 271: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 272: 273: 274: security.declareProtected('View','index_html') 275: def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1): 276: """ 277: view page 278: @param url: url which contains display information 279: @param mode: defines how to access the document behind url 280: @param viewMode: 'images': display images, 'text': display text, default is 'auto' 281: @param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text' 282: @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) 283: """ 284: 285: logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn)) 286: 287: if not hasattr(self, 'template'): 288: # this won't work 289: logging.error("template folder missing!") 290: return "ERROR: template folder missing!" 291: 292: if not getattr(self, 'digilibBaseUrl', None): 293: self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" 294: 295: docinfo = self.getDocinfo(mode=mode,url=url) 296: 297: if tocMode != "thumbs": 298: # get table of contents 299: docinfo = self.getToc(mode=tocMode, docinfo=docinfo) 300: 301: # auto viewMode: text if there is a text else images 302: if viewMode=="auto": 303: if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 304: viewMode = "text" 305: viewType = "dict" 306: else: 307: viewMode = "images" 308: 309: elif viewMode == "text_dict": 310: # legacy fix 311: viewMode = "text" 312: viewType = "dict" 313: 314: # stringify viewType 315: if isinstance(viewType, list): 316: logging.debug("index_html: viewType is list:%s"%viewType) 317: viewType = ','.join([t for t in viewType if t]) 318: 319: pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode) 320: 321: # get template /template/viewer_$viewMode 322: pt = getattr(self.template, 'viewer_%s'%viewMode, None) 323: if pt is None: 324: logging.error("No template for viewMode=%s!"%viewMode) 325: # TODO: error page? 326: return "No template for viewMode=%s!"%viewMode 327: 328: # and execute with parameters 329: return pt(docinfo=docinfo, pageinfo=pageinfo) 330: 331: def generateMarks(self,mk): 332: ret="" 333: if mk is None: 334: return "" 335: if not isinstance(mk, list): 336: mk=[mk] 337: for m in mk: 338: ret+="mk=%s"%m 339: return ret 340: 341: 342: def getBrowser(self): 343: """getBrowser the version of browser """ 344: bt = browserCheck(self) 345: logging.debug("BROWSER VERSION: %s"%(bt)) 346: return bt 347: 348: def findDigilibUrl(self): 349: """try to get the digilib URL from zogilib""" 350: url = self.template.zogilib.getDLBaseUrl() 351: return url 352: 353: def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None): 354: """returns URL to digilib Scaler with params""" 355: url = None 356: if docinfo is not None: 357: url = docinfo.get('imageURL', None) 358: 359: if url is None: 360: url = "%s/servlet/Scaler?"%self.digilibBaseUrl 361: if fn is None and docinfo is not None: 362: fn = docinfo.get('imagePath','') 363: 364: url += "fn=%s"%fn 365: 366: if pn: 367: url += "&pn=%s"%pn 368: 369: url += "&dw=%s&dh=%s"%(dw,dh) 370: return url 371: 372: def getDocumentViewerURL(self): 373: """returns the URL of this instance""" 374: return self.absolute_url() 375: 376: def getStyle(self, idx, selected, style=""): 377: """returns a string with the given style and append 'sel' if idx == selected.""" 378: #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) 379: if idx == selected: 380: return style + 'sel' 381: else: 382: return style 383: 384: def getParams(self, param=None, val=None, params=None, duplicates=None): 385: """returns dict with URL parameters. 386: 387: Takes URL parameters and additionally param=val or dict params. 388: Deletes key if value is None.""" 389: # copy existing request params 390: newParams=self.REQUEST.form.copy() 391: # change single param 392: if param is not None: 393: if val is None: 394: if newParams.has_key(param): 395: del newParams[param] 396: else: 397: newParams[param] = str(val) 398: 399: # change more params 400: if params is not None: 401: for (k, v) in params.items(): 402: if v is None: 403: # val=None removes param 404: if newParams.has_key(k): 405: del newParams[k] 406: 407: else: 408: newParams[k] = v 409: 410: if duplicates: 411: # eliminate lists (coming from duplicate keys) 412: for (k,v) in newParams.items(): 413: if isinstance(v, list): 414: if duplicates == 'comma': 415: # make comma-separated list of non-empty entries 416: newParams[k] = ','.join([t for t in v if t]) 417: elif duplicates == 'first': 418: # take first non-empty entry 419: newParams[k] = [t for t in v if t][0] 420: 421: return newParams 422: 423: def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'): 424: """returns URL to documentviewer with parameter param set to val or from dict params""" 425: urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates) 426: # quote values and assemble into query string (not escaping '/') 427: ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()]) 428: if baseUrl is None: 429: baseUrl = self.getDocumentViewerURL() 430: 431: url = "%s?%s"%(baseUrl, ps) 432: return url 433: 434: def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'): 435: """link to documentviewer with parameter param set to val""" 436: return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates) 437: 438: 439: def getInfo_xml(self,url,mode): 440: """returns info about the document as XML""" 441: if not self.digilibBaseUrl: 442: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 443: 444: docinfo = self.getDocinfo(mode=mode,url=url) 445: pt = getattr(self.template, 'info_xml') 446: return pt(docinfo=docinfo) 447: 448: def isAccessible(self, docinfo): 449: """returns if access to the resource is granted""" 450: access = docinfo.get('accessType', None) 451: logging.debug("documentViewer (accessOK) access type %s"%access) 452: if access == 'free': 453: logging.debug("documentViewer (accessOK) access is free") 454: return True 455: 456: elif access is None or access in self.authgroups: 457: # only local access -- only logged in users 458: user = getSecurityManager().getUser() 459: logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr())) 460: if user is not None: 461: #print "user: ", user 462: return (user.getUserName() != "Anonymous User") 463: else: 464: return False 465: 466: logging.error("documentViewer (accessOK) unknown access type %s"%access) 467: return False 468: 469: 470: 471: def getDocinfo(self, mode, url): 472: """returns docinfo depending on mode""" 473: logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url)) 474: # look for cached docinfo in session 475: if self.REQUEST.SESSION.has_key('docinfo'): 476: docinfo = self.REQUEST.SESSION['docinfo'] 477: # check if its still current 478: if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url: 479: logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys()) 480: return docinfo 481: 482: # new docinfo 483: docinfo = {'mode': mode, 'url': url} 484: # add self url 485: docinfo['viewerUrl'] = self.getDocumentViewerURL() 486: docinfo['digilibBaseUrl'] = self.digilibBaseUrl 487: # get index.meta DOM 488: docUrl = None 489: metaDom = None 490: if mode=="texttool": 491: # url points to document dir or index.meta 492: metaDom = self.metadataService.getDomFromPathOrUrl(url) 493: docUrl = url.replace('/index.meta', '') 494: if metaDom is None: 495: raise IOError("Unable to find index.meta for mode=texttool!") 496: 497: elif mode=="imagepath": 498: # url points to folder with images, index.meta optional 499: # asssume index.meta in parent dir 500: docUrl = getParentPath(url) 501: metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) 502: 503: elif mode=="filepath": 504: # url points to image file, index.meta optional 505: # asssume index.meta is two path segments up 506: docUrl = getParentPath(url, 2) 507: metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) 508: 509: else: 510: logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) 511: raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 512: 513: docinfo['documentUrl'] = docUrl 514: # process index.meta contents 515: if metaDom is not None and metaDom.tag == 'resource': 516: # document directory name and path 517: resource = self.metadataService.getResourceData(dom=metaDom) 518: if resource: 519: docinfo = self.getDocinfoFromResource(docinfo, resource) 520: 521: # texttool info 522: texttool = self.metadataService.getTexttoolData(dom=metaDom) 523: if texttool: 524: docinfo = self.getDocinfoFromTexttool(docinfo, texttool) 525: 526: # bib info 527: bib = self.metadataService.getBibData(dom=metaDom) 528: if bib: 529: docinfo = self.getDocinfoFromBib(docinfo, bib) 530: else: 531: # no bib - try info.xml 532: docinfo = self.getDocinfoFromPresentationInfoXml(docinfo) 533: 534: # auth info 535: access = self.metadataService.getAccessData(dom=metaDom) 536: if access: 537: docinfo = self.getDocinfoFromAccess(docinfo, access) 538: 539: # attribution info 540: attribution = self.metadataService.getAttributionData(dom=metaDom) 541: if attribution: 542: logging.debug("getDocinfo: attribution=%s"%repr(attribution)) 543: docinfo['attribution'] = attribution 544: #docinfo = self.getDocinfoFromAccess(docinfo, access) 545: 546: # copyright info 547: copyright = self.metadataService.getCopyrightData(dom=metaDom) 548: if copyright: 549: logging.debug("getDocinfo: copyright=%s"%repr(copyright)) 550: docinfo['copyright'] = copyright 551: #docinfo = self.getDocinfoFromAccess(docinfo, access) 552: 553: # image path 554: if mode != 'texttool': 555: # override image path from texttool with url 556: docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1) 557: 558: # number of images from digilib 559: if docinfo.get('imagePath', None): 560: docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath'] 561: docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath']) 562: 563: logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) 564: #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) 565: # store in session 566: self.REQUEST.SESSION['docinfo'] = docinfo 567: return docinfo 568: 569: def getDocinfoFromResource(self, docinfo, resource): 570: """reads contents of resource element into docinfo""" 571: docName = resource.get('name', None) 572: docinfo['documentName'] = docName 573: docPath = resource.get('archive-path', None) 574: if docPath: 575: # clean up document path 576: if docPath[0] != '/': 577: docPath = '/' + docPath 578: 579: if docName and (not docPath.endswith(docName)): 580: docPath += "/" + docName 581: 582: else: 583: # use docUrl as docPath 584: docUrl = docinfo['documentURL'] 585: if not docUrl.startswith('http:'): 586: docPath = docUrl 587: if docPath: 588: # fix URLs starting with /mpiwg/online 589: docPath = docPath.replace('/mpiwg/online', '', 1) 590: 591: docinfo['documentPath'] = docPath 592: return docinfo 593: 594: def getDocinfoFromTexttool(self, docinfo, texttool): 595: """reads contents of texttool element into docinfo""" 596: # image dir 597: imageDir = texttool.get('image', None) 598: docPath = docinfo.get('documentPath', None) 599: if imageDir and docPath: 600: #print "image: ", imageDir, " archivepath: ", archivePath 601: imageDir = os.path.join(docPath, imageDir) 602: imageDir = imageDir.replace('/mpiwg/online', '', 1) 603: docinfo['imagePath'] = imageDir 604: 605: # old style text URL 606: textUrl = texttool.get('text', None) 607: if textUrl and docPath: 608: if urlparse.urlparse(textUrl)[0] == "": #keine url 609: textUrl = os.path.join(docPath, textUrl) 610: 611: docinfo['textURL'] = textUrl 612: 613: # new style text-url-path 614: textUrl = texttool.get('text-url-path', None) 615: if textUrl: 616: docinfo['textURLPath'] = textUrl 617: 618: # page flow 619: docinfo['pageFlow'] = texttool.get('page-flow', 'ltr') 620: 621: # odd pages are left 622: docinfo['oddPage'] = texttool.get('odd-scan-position', 'left') 623: 624: # number of title page (0: not defined) 625: docinfo['titlePage'] = texttool.get('title-scan-no', 0) 626: 627: # old presentation stuff 628: presentation = texttool.get('presentation', None) 629: if presentation and docPath: 630: if presentation.startswith('http:'): 631: docinfo['presentationUrl'] = presentation 632: else: 633: docinfo['presentationUrl'] = os.path.join(docPath, presentation) 634: 635: 636: return docinfo 637: 638: def getDocinfoFromBib(self, docinfo, bib): 639: """reads contents of bib element into docinfo""" 640: logging.debug("getDocinfoFromBib bib=%s"%repr(bib)) 641: # put all raw bib fields in dict "bib" 642: docinfo['bib'] = bib 643: bibtype = bib.get('@type', None) 644: docinfo['bibType'] = bibtype 645: # also store DC metadata for convenience 646: dc = self.metadataService.getDCMappedData(bib) 647: docinfo['creator'] = dc.get('creator',None) 648: docinfo['title'] = dc.get('title',None) 649: docinfo['date'] = dc.get('date',None) 650: return docinfo 651: 652: def getDocinfoFromAccess(self, docinfo, acc): 653: """reads contents of access element into docinfo""" 654: #TODO: also read resource type 655: logging.debug("getDocinfoFromAccess acc=%s"%repr(acc)) 656: try: 657: acctype = acc['@attr']['type'] 658: if acctype: 659: access=acctype 660: if access in ['group', 'institution']: 661: access = acc['name'].lower() 662: 663: docinfo['accessType'] = access 664: 665: except: 666: pass 667: 668: return docinfo 669: 670: def getDocinfoFromDigilib(self, docinfo, path): 671: infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 672: # fetch data 673: txt = getHttpData(infoUrl) 674: if not txt: 675: logging.error("Unable to get dir-info from %s"%(infoUrl)) 676: return docinfo 677: 678: dom = ET.fromstring(txt) 679: size = getText(dom.find("size")) 680: logging.debug("getDocinfoFromDigilib: size=%s"%size) 681: if size: 682: docinfo['numPages'] = int(size) 683: else: 684: docinfo['numPages'] = 0 685: 686: # TODO: produce and keep list of image names and numbers 687: return docinfo 688: 689: 690: def getDocinfoFromPresentationInfoXml(self,docinfo): 691: """gets DC-like bibliographical information from the presentation entry in texttools""" 692: url = docinfo.get('presentationUrl', None) 693: if not url: 694: logging.error("getDocinfoFromPresentation: no URL!") 695: return docinfo 696: 697: dom = None 698: metaUrl = None 699: if url.startswith("http://"): 700: # real URL 701: metaUrl = url 702: else: 703: # online path 704: 705: server=self.digilibBaseUrl+"/servlet/Texter?fn=" 706: metaUrl=server+url 707: 708: txt=getHttpData(metaUrl) 709: if txt is None: 710: logging.error("Unable to read info.xml from %s"%(url)) 711: return docinfo 712: 713: dom = ET.fromstring(txt) 714: docinfo['creator']=getText(dom.find(".//author")) 715: docinfo['title']=getText(dom.find(".//title")) 716: docinfo['date']=getText(dom.find(".//date")) 717: return docinfo 718: 719: 720: def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None): 721: """returns pageinfo with the given parameters""" 722: logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewType=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewType,tocMode)) 723: pageinfo = {} 724: pageinfo['viewMode'] = viewMode 725: pageinfo['viewType'] = viewType 726: pageinfo['tocMode'] = tocMode 727: 728: current = getInt(current) 729: pageinfo['current'] = current 730: pageinfo['pn'] = current 731: rows = int(rows or self.thumbrows) 732: pageinfo['rows'] = rows 733: cols = int(cols or self.thumbcols) 734: pageinfo['cols'] = cols 735: grpsize = cols * rows 736: pageinfo['groupsize'] = grpsize 737: # is start is empty use one around current 738: start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) 739: # int(current / grpsize) * grpsize +1)) 740: pageinfo['start'] = start 741: 742: np = int(docinfo.get('numPages', 0)) 743: if np == 0: 744: # numPages unknown - maybe we can get it from text page 745: if docinfo.get('textURLPath', None): 746: # cache text page as well 747: pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=current, docinfo=docinfo, pageinfo=pageinfo) 748: np = int(docinfo.get('numPages', 0)) 749: 750: pageinfo['numgroups'] = int(np / grpsize) 751: if np % grpsize > 0: 752: pageinfo['numgroups'] += 1 753: 754: pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl' 755: oddScanLeft = docinfo.get('oddPage', 'left') != 'right' 756: # add zeroth page for two columns 757: pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft)) 758: pageinfo['pageZero'] = pageZero 759: pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np) 760: 761: # TODO: do we need this here? 762: pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg') 763: pageinfo['query'] = self.REQUEST.get('query','') 764: pageinfo['queryType'] = self.REQUEST.get('queryType','') 765: pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') 766: pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','') 767: pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30)) 768: pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10)) 769: pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1')) 770: pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1')) 771: 772: # limit tocPN 773: if 'tocSize_%s'%tocMode in docinfo: 774: tocSize = docinfo['tocSize_%s'%tocMode] 775: tocPageSize = pageinfo['tocPageSize'] 776: # cached toc 777: if tocSize%tocPageSize>0: 778: tocPages=tocSize/tocPageSize+1 779: else: 780: tocPages=tocSize/tocPageSize 781: 782: pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN']) 783: 784: return pageinfo 785: 786: 787: def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0): 788: """returns dict with array of page informations for one screenfull of thumbnails""" 789: batch = {} 790: grpsize = rows * cols 791: if maxIdx == 0: 792: maxIdx = start + grpsize 793: 794: nb = int(math.ceil(maxIdx / float(grpsize))) 795: # list of all batch start and end points 796: batches = [] 797: if pageZero: 798: ofs = 0 799: else: 800: ofs = 1 801: 802: for i in range(nb): 803: s = i * grpsize + ofs 804: e = min((i + 1) * grpsize + ofs - 1, maxIdx) 805: batches.append({'start':s, 'end':e}) 806: 807: batch['batches'] = batches 808: 809: pages = [] 810: if pageZero and start == 1: 811: # correct beginning 812: idx = 0 813: else: 814: idx = start 815: 816: for r in range(rows): 817: row = [] 818: for c in range(cols): 819: if idx < minIdx or idx > maxIdx: 820: page = {'idx':None} 821: else: 822: page = {'idx':idx} 823: 824: idx += 1 825: if pageFlowLtr: 826: row.append(page) 827: else: 828: row.insert(0, page) 829: 830: pages.append(row) 831: 832: if start > 1: 833: batch['prevStart'] = max(start - grpsize, 1) 834: else: 835: batch['prevStart'] = None 836: 837: if start + grpsize < maxIdx: 838: batch['nextStart'] = start + grpsize 839: else: 840: batch['nextStart'] = None 841: 842: batch['pages'] = pages 843: return batch 844: 845: def getBatch(self, start=1, size=10, end=0, data=None, fullData=True): 846: """returns dict with information for one screenfull of data.""" 847: batch = {} 848: if end == 0: 849: end = start + size 850: 851: nb = int(math.ceil(end / float(size))) 852: # list of all batch start and end points 853: batches = [] 854: for i in range(nb): 855: s = i * size + 1 856: e = min((i + 1) * size, end) 857: batches.append({'start':s, 'end':e}) 858: 859: batch['batches'] = batches 860: # list of elements in this batch 861: this = [] 862: j = 0 863: for i in range(start, min(start+size, end)): 864: if data: 865: if fullData: 866: d = data[i] 867: else: 868: d = data[j] 869: j += 1 870: 871: else: 872: d = i+1 873: 874: this.append(d) 875: 876: batch['this'] = this 877: if start > 1: 878: batch['prevStart'] = max(start - size, 1) 879: else: 880: batch['prevStart'] = None 881: 882: if start + size < end: 883: batch['nextStart'] = start + size 884: else: 885: batch['nextStart'] = None 886: 887: return batch 888: 889: 890: security.declareProtected('View management screens','changeDocumentViewerForm') 891: changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 892: 893: def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): 894: """init document viewer""" 895: self.title=title 896: self.digilibBaseUrl = digilibBaseUrl 897: self.thumbrows = thumbrows 898: self.thumbcols = thumbcols 899: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 900: try: 901: # assume MetaDataFolder instance is called metadata 902: self.metadataService = getattr(self, 'metadata') 903: except Exception, e: 904: logging.error("Unable to find MetaDataFolder 'metadata': "+str(e)) 905: 906: if RESPONSE is not None: 907: RESPONSE.redirect('manage_main') 908: 909: def manage_AddDocumentViewerForm(self): 910: """add the viewer form""" 911: pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) 912: return pt() 913: 914: def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None): 915: """add the viewer""" 916: newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName) 917: self._setObject(id,newObj) 918: 919: if RESPONSE is not None: 920: RESPONSE.redirect('manage_main') 921: 922: ## DocumentViewerTemplate class 923: class DocumentViewerTemplate(ZopePageTemplate): 924: """Template for document viewer""" 925: meta_type="DocumentViewer Template" 926: 927: 928: def manage_addDocumentViewerTemplateForm(self): 929: """Form for adding""" 930: pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) 931: return pt() 932: 933: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, 934: REQUEST=None, submit=None): 935: "Add a Page Template with optional file content." 936: 937: self._setObject(id, DocumentViewerTemplate(id)) 938: ob = getattr(self, id) 939: txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read() 940: logging.info("txt %s:"%txt) 941: ob.pt_edit(txt,"text/html") 942: if title: 943: ob.pt_setTitle(title) 944: try: 945: u = self.DestinationURL() 946: except AttributeError: 947: u = REQUEST['URL1'] 948: 949: u = "%s/%s" % (u, urllib.quote(id)) 950: REQUEST.RESPONSE.redirect(u+'/manage_main') 951: return '' 952: 953: 954: