documentViewer/documentViewer.py - view

File: [Repository] / documentViewer / documentViewer.py
Revision 1.175.2.26: download - view: text, annotated - select for diffs - revision graph
Mon Aug 15 19:09:08 2011 UTC (12 years, 10 months ago) by casties
Branches: elementtree
Diff to: branchpoint 1.175: preferred, unified

more new template stuff

1: from OFS.Folder import Folder 2: from OFS.Image import File 3: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 4: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 5: from AccessControl import ClassSecurityInfo 6: from AccessControl import getSecurityManager 7: from Globals import package_home 8: 9: #from Ft.Xml import EMPTY_NAMESPACE, Parse 10: #import Ft.Xml.Domlette 11: 12: import xml.etree.ElementTree as ET 13: 14: import os.path 15: import sys 16: import urllib 17: import logging 18: import math 19: import urlparse 20: import re 21: import string 22: 23: from SrvTxtUtils import getInt, getText, getHttpData 24: 25: def logger(txt,method,txt2): 26: """logging""" 27: logging.info(txt+ txt2) 28: 29: 30: def serializeNode(node, encoding="utf-8"): 31: """returns a string containing node as XML""" 32: s = ET.tostring(node) 33: 34: # 4Suite: 35: # stream = cStringIO.StringIO() 36: # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) 37: # s = stream.getvalue() 38: # stream.close() 39: return s 40: 41: def browserCheck(self): 42: """check the browsers request to find out the browser type""" 43: bt = {} 44: ua = self.REQUEST.get_header("HTTP_USER_AGENT") 45: bt['ua'] = ua 46: bt['isIE'] = False 47: bt['isN4'] = False 48: bt['versFirefox']="" 49: bt['versIE']="" 50: bt['versSafariChrome']="" 51: bt['versOpera']="" 52: 53: if string.find(ua, 'MSIE') > -1: 54: bt['isIE'] = True 55: else: 56: bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1) 57: # Safari oder Chrome identification 58: try: 59: nav = ua[string.find(ua, '('):] 60: nav1=ua[string.find(ua,')'):] 61: nav2=nav1[string.find(nav1,'('):] 62: nav3=nav2[string.find(nav2,')'):] 63: ie = string.split(nav, "; ")[1] 64: ie1 =string.split(nav1, " ")[2] 65: ie2 =string.split(nav3, " ")[1] 66: ie3 =string.split(nav3, " ")[2] 67: if string.find(ie3, "Safari") >-1: 68: bt['versSafariChrome']=string.split(ie2, "/")[1] 69: except: pass 70: # IE identification 71: try: 72: nav = ua[string.find(ua, '('):] 73: ie = string.split(nav, "; ")[1] 74: if string.find(ie, "MSIE") > -1: 75: bt['versIE'] = string.split(ie, " ")[1] 76: except:pass 77: # Firefox identification 78: try: 79: nav = ua[string.find(ua, '('):] 80: nav1=ua[string.find(ua,')'):] 81: if string.find(ie1, "Firefox") >-1: 82: nav5= string.split(ie1, "/")[1] 83: logging.debug("FIREFOX: %s"%(nav5)) 84: bt['versFirefox']=nav5[0:3] 85: except:pass 86: #Opera identification 87: try: 88: if string.find(ua,"Opera") >-1: 89: nav = ua[string.find(ua, '('):] 90: nav1=nav[string.find(nav,')'):] 91: bt['versOpera']=string.split(nav1,"/")[2] 92: except:pass 93: 94: bt['isMac'] = string.find(ua, 'Macintosh') > -1 95: bt['isWin'] = string.find(ua, 'Windows') > -1 96: bt['isIEWin'] = bt['isIE'] and bt['isWin'] 97: bt['isIEMac'] = bt['isIE'] and bt['isMac'] 98: bt['staticHTML'] = False 99: 100: return bt 101: 102: def getParentPath(path, cnt=1): 103: """returns pathname shortened by cnt""" 104: # make sure path doesn't end with / 105: path = path.rstrip('/') 106: # split by /, shorten, and reassemble 107: return '/'.join(path.split('/')[0:-cnt]) 108: 109: 110: ## 111: ## documentViewer class 112: ## 113: class documentViewer(Folder): 114: """document viewer""" 115: meta_type="Document viewer" 116: 117: security=ClassSecurityInfo() 118: manage_options=Folder.manage_options+( 119: {'label':'main config','action':'changeDocumentViewerForm'}, 120: ) 121: 122: metadataService = None 123: """MetaDataFolder instance""" 124: 125: # templates and forms 126: viewer_text = PageTemplateFile('zpt/viewer_text', globals()) 127: viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 128: toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) 129: toc_text = PageTemplateFile('zpt/toc_text', globals()) 130: toc_figures = PageTemplateFile('zpt/toc_figures', globals()) 131: page_main_images = PageTemplateFile('zpt/page_main_images', globals()) 132: page_main_double = PageTemplateFile('zpt/page_main_double', globals()) 133: page_main_text = PageTemplateFile('zpt/page_main_text', globals()) 134: page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) 135: page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals()) 136: page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) 137: page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals()) 138: head_main = PageTemplateFile('zpt/head_main', globals()) 139: info_xml = PageTemplateFile('zpt/info_xml', globals()) 140: # TODO: can this be nicer? 141: docuviewer_css = File('docuviewer_css','',open(os.path.join(package_home(globals()),'css/docuviewer.css')), content_type='text/css') 142: 143: 144: thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) 145: 146: 147: def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"): 148: """init document viewer""" 149: self.id=id 150: self.title=title 151: self.thumbcols = thumbcols 152: self.thumbrows = thumbrows 153: # authgroups is list of authorized groups (delimited by ,) 154: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 155: # create template folder so we can always use template.something 156: 157: templateFolder = Folder('template') 158: #self['template'] = templateFolder # Zope-2.12 style 159: self._setObject('template',templateFolder) # old style 160: try: 161: import MpdlXmlTextServer 162: textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName) 163: #templateFolder['fulltextclient'] = xmlRpcClient 164: templateFolder._setObject('fulltextclient',textServer) 165: except Exception, e: 166: logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e)) 167: 168: try: 169: from Products.zogiLib.zogiLib import zogiLib 170: zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") 171: #templateFolder['zogilib'] = zogilib 172: templateFolder._setObject('zogilib',zogilib) 173: except Exception, e: 174: logging.error("Unable to create zogiLib for zogilib: "+str(e)) 175: 176: try: 177: # assume MetaDataFolder instance is called metadata 178: self.metadataService = getattr(self, 'metadata') 179: except Exception, e: 180: logging.error("Unable to find MetaDataFolder 'metadata': "+str(e)) 181: 182: if digilibBaseUrl is not None: 183: self.digilibBaseUrl = digilibBaseUrl 184: 185: 186: # proxy text server methods to fulltextclient 187: def getTextPage(self, **args): 188: """get page""" 189: return self.template.fulltextclient.getTextPage(**args) 190: 191: def getOrigPages(self, **args): 192: """get page""" 193: return self.template.fulltextclient.getOrigPages(**args) 194: 195: def getOrigPagesNorm(self, **args): 196: """get page""" 197: return self.template.fulltextclient.getOrigPagesNorm(**args) 198: 199: def getQuery(self, **args): 200: """get query in search""" 201: return self.template.fulltextclient.getQuery(**args) 202: 203: def getSearch(self, **args): 204: """get search""" 205: return self.template.fulltextclient.getSearch(**args) 206: 207: def getGisPlaces(self, **args): 208: """get gis places""" 209: return self.template.fulltextclient.getGisPlaces(**args) 210: 211: def getAllGisPlaces(self, **args): 212: """get all gis places """ 213: return self.template.fulltextclient.getAllGisPlaces(**args) 214: 215: def getWordInfo(self, **args): 216: """get translate""" 217: return self.template.fulltextclient.getWordInfo(**args) 218: 219: def getLemma(self, **args): 220: """get lemma""" 221: return self.template.fulltextclient.getLemma(**args) 222: 223: def getLemmaQuery(self, **args): 224: """get query""" 225: return self.template.fulltextclient.getLemmaQuery(**args) 226: 227: def getLex(self, **args): 228: """get lex""" 229: return self.template.fulltextclient.getLex(**args) 230: 231: def getToc(self, **args): 232: """get toc""" 233: return self.template.fulltextclient.getToc(**args) 234: 235: def getTocPage(self, **args): 236: """get tocpage""" 237: return self.template.fulltextclient.getTocPage(**args) 238: 239: 240: security.declareProtected('View','thumbs_rss') 241: def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): 242: ''' 243: view it 244: @param mode: defines how to access the document behind url 245: @param url: url which contains display information 246: @param viewMode: if images display images, if text display text, default is images (text,images or auto) 247: 248: ''' 249: logging.debug("HHHHHHHHHHHHHH:load the rss") 250: logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 251: 252: if not hasattr(self, 'template'): 253: # create template folder if it doesn't exist 254: self.manage_addFolder('template') 255: 256: if not self.digilibBaseUrl: 257: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 258: 259: docinfo = self.getDocinfo(mode=mode,url=url) 260: #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) 261: pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo) 262: ''' ZDES ''' 263: pt = getattr(self.template, 'thumbs_main_rss') 264: 265: if viewMode=="auto": # automodus gewaehlt 266: if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert 267: viewMode="text" 268: else: 269: viewMode="images" 270: 271: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 272: 273: 274: security.declareProtected('View','index_html') 275: def index_html(self,url,mode="texttool",viewMode="auto",viewType=None,tocMode="thumbs",start=1,pn=1): 276: """ 277: view page 278: @param url: url which contains display information 279: @param mode: defines how to access the document behind url 280: @param viewMode: 'images': display images, 'text': display text, default is 'auto' 281: @param viewType: sub-type of viewMode, e.g. 'dict' for viewMode='text' 282: @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) 283: """ 284: 285: logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewType=%s start=%s pn=%s"%(mode,url,viewMode,viewType,start,pn)) 286: 287: if not hasattr(self, 'template'): 288: # this won't work 289: logging.error("template folder missing!") 290: return "ERROR: template folder missing!" 291: 292: if not getattr(self, 'digilibBaseUrl', None): 293: self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" 294: 295: docinfo = self.getDocinfo(mode=mode,url=url) 296: 297: if tocMode != "thumbs": 298: # get table of contents 299: docinfo = self.getToc(mode=tocMode, docinfo=docinfo) 300: 301: # auto viewMode: text if there is a text else images 302: if viewMode=="auto": 303: if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 304: viewMode = "text" 305: viewType = "dict" 306: else: 307: viewMode = "images" 308: 309: elif viewMode == "text_dict": 310: # legacy fix 311: viewMode = "text" 312: viewType = "dict" 313: 314: # stringify viewType 315: if isinstance(viewType, list): 316: logging.debug("index_html: viewType is list:%s"%viewType) 317: viewType = ','.join([t for t in viewType if t]) 318: 319: pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewType=viewType, tocMode=tocMode) 320: 321: # get template /template/viewer_$viewMode 322: pt = getattr(self.template, 'viewer_%s'%viewMode, None) 323: if pt is None: 324: logging.error("No template for viewMode=%s!"%viewMode) 325: # TODO: error page? 326: return "No template for viewMode=%s!"%viewMode 327: 328: # and execute with parameters 329: return pt(docinfo=docinfo, pageinfo=pageinfo) 330: 331: def generateMarks(self,mk): 332: ret="" 333: if mk is None: 334: return "" 335: if not isinstance(mk, list): 336: mk=[mk] 337: for m in mk: 338: ret+="mk=%s"%m 339: return ret 340: 341: 342: def getBrowser(self): 343: """getBrowser the version of browser """ 344: bt = browserCheck(self) 345: logging.debug("BROWSER VERSION: %s"%(bt)) 346: return bt 347: 348: def findDigilibUrl(self): 349: """try to get the digilib URL from zogilib""" 350: url = self.template.zogilib.getDLBaseUrl() 351: return url 352: 353: def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None): 354: """returns URL to digilib Scaler with params""" 355: url = None 356: if docinfo is not None: 357: url = docinfo.get('imageURL', None) 358: 359: if url is None: 360: url = "%s/servlet/Scaler?"%self.digilibBaseUrl 361: if fn is None and docinfo is not None: 362: fn = docinfo.get('imagePath','') 363: 364: url += "fn=%s"%fn 365: 366: if pn: 367: url += "&pn=%s"%pn 368: 369: url += "&dw=%s&dh=%s"%(dw,dh) 370: return url 371: 372: def getDocumentViewerURL(self): 373: """returns the URL of this instance""" 374: return self.absolute_url() 375: 376: def getStyle(self, idx, selected, style=""): 377: """returns a string with the given style and append 'sel' if idx == selected.""" 378: #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) 379: if idx == selected: 380: return style + 'sel' 381: else: 382: return style 383: 384: def getParams(self, param=None, val=None, params=None, duplicates=None): 385: """returns dict with URL parameters. 386: 387: Takes URL parameters and additionally param=val or dict params. 388: Deletes key if value is None.""" 389: # copy existing request params 390: newParams=self.REQUEST.form.copy() 391: # change single param 392: if param is not None: 393: if val is None: 394: if newParams.has_key(param): 395: del newParams[param] 396: else: 397: newParams[param] = str(val) 398: 399: # change more params 400: if params is not None: 401: for (k, v) in params.items(): 402: if v is None: 403: # val=None removes param 404: if newParams.has_key(k): 405: del newParams[k] 406: 407: else: 408: newParams[k] = v 409: 410: if duplicates: 411: # eliminate lists (coming from duplicate keys) 412: for (k,v) in newParams.items(): 413: if isinstance(v, list): 414: if duplicates == 'comma': 415: # make comma-separated list of non-empty entries 416: newParams[k] = ','.join([t for t in v if t]) 417: elif duplicates == 'first': 418: # take first non-empty entry 419: newParams[k] = [t for t in v if t][0] 420: 421: return newParams 422: 423: def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'): 424: """returns URL to documentviewer with parameter param set to val or from dict params""" 425: urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates) 426: # quote values and assemble into query string (not escaping '/') 427: ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()]) 428: if baseUrl is None: 429: baseUrl = self.getDocumentViewerURL() 430: 431: url = "%s?%s"%(baseUrl, ps) 432: return url 433: 434: def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'): 435: """link to documentviewer with parameter param set to val""" 436: return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates) 437: 438: 439: def getInfo_xml(self,url,mode): 440: """returns info about the document as XML""" 441: if not self.digilibBaseUrl: 442: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 443: 444: docinfo = self.getDocinfo(mode=mode,url=url) 445: pt = getattr(self.template, 'info_xml') 446: return pt(docinfo=docinfo) 447: 448: def isAccessible(self, docinfo): 449: """returns if access to the resource is granted""" 450: access = docinfo.get('accessType', None) 451: logging.debug("documentViewer (accessOK) access type %s"%access) 452: if access == 'free': 453: logging.debug("documentViewer (accessOK) access is free") 454: return True 455: 456: elif access is None or access in self.authgroups: 457: # only local access -- only logged in users 458: user = getSecurityManager().getUser() 459: logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr())) 460: if user is not None: 461: #print "user: ", user 462: return (user.getUserName() != "Anonymous User") 463: else: 464: return False 465: 466: logging.error("documentViewer (accessOK) unknown access type %s"%access) 467: return False 468: 469: 470: 471: def getDocinfo(self, mode, url): 472: """returns docinfo depending on mode""" 473: logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url)) 474: # look for cached docinfo in session 475: if self.REQUEST.SESSION.has_key('docinfo'): 476: docinfo = self.REQUEST.SESSION['docinfo'] 477: # check if its still current 478: if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url: 479: logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys()) 480: return docinfo 481: 482: # new docinfo 483: docinfo = {'mode': mode, 'url': url} 484: # add self url 485: docinfo['viewerUrl'] = self.getDocumentViewerURL() 486: docinfo['digilibBaseUrl'] = self.digilibBaseUrl 487: # get index.meta DOM 488: docUrl = None 489: metaDom = None 490: if mode=="texttool": 491: # url points to document dir or index.meta 492: metaDom = self.metadataService.getDomFromPathOrUrl(url) 493: docUrl = url.replace('/index.meta', '') 494: if metaDom is None: 495: raise IOError("Unable to find index.meta for mode=texttool!") 496: 497: elif mode=="imagepath": 498: # url points to folder with images, index.meta optional 499: # asssume index.meta in parent dir 500: docUrl = getParentPath(url) 501: metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) 502: 503: elif mode=="filepath": 504: # url points to image file, index.meta optional 505: # asssume index.meta is two path segments up 506: docUrl = getParentPath(url, 2) 507: metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) 508: 509: else: 510: logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) 511: raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 512: 513: docinfo['documentUrl'] = docUrl 514: # process index.meta contents 515: if metaDom is not None and metaDom.tag == 'resource': 516: # document directory name and path 517: resource = self.metadataService.getResourceData(dom=metaDom) 518: if resource: 519: docinfo = self.getDocinfoFromResource(docinfo, resource) 520: 521: # texttool info 522: texttool = self.metadataService.getTexttoolData(dom=metaDom) 523: if texttool: 524: docinfo = self.getDocinfoFromTexttool(docinfo, texttool) 525: 526: # bib info 527: bib = self.metadataService.getBibData(dom=metaDom) 528: if bib: 529: docinfo = self.getDocinfoFromBib(docinfo, bib) 530: else: 531: # no bib - try info.xml 532: docinfo = self.getDocinfoFromPresentationInfoXml(docinfo) 533: 534: # auth info 535: access = self.metadataService.getAccessData(dom=metaDom) 536: if access: 537: docinfo = self.getDocinfoFromAccess(docinfo, access) 538: 539: # attribution info 540: attribution = self.metadataService.getAttributionData(dom=metaDom) 541: if attribution: 542: logging.debug("getDocinfo: attribution=%s"%repr(attribution)) 543: docinfo['attribution'] = attribution 544: #docinfo = self.getDocinfoFromAccess(docinfo, access) 545: 546: # copyright info 547: copyright = self.metadataService.getCopyrightData(dom=metaDom) 548: if copyright: 549: logging.debug("getDocinfo: copyright=%s"%repr(copyright)) 550: docinfo['copyright'] = copyright 551: #docinfo = self.getDocinfoFromAccess(docinfo, access) 552: 553: # image path 554: if mode != 'texttool': 555: # override image path from texttool with url 556: docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1) 557: 558: 559: 560: # number of images from digilib 561: if docinfo.get('imagePath', None): 562: docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath'] 563: docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath']) 564: 565: logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) 566: #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) 567: # store in session 568: self.REQUEST.SESSION['docinfo'] = docinfo 569: return docinfo 570: 571: def getDocinfoFromResource(self, docinfo, resource): 572: """reads contents of resource element into docinfo""" 573: docName = resource.get('name', None) 574: docinfo['documentName'] = docName 575: docPath = resource.get('archive-path', None) 576: if docPath: 577: # clean up document path 578: if docPath[0] != '/': 579: docPath = '/' + docPath 580: 581: if docName and (not docPath.endswith(docName)): 582: docPath += "/" + docName 583: 584: else: 585: # use docUrl as docPath 586: docUrl = docinfo['documentURL'] 587: if not docUrl.startswith('http:'): 588: docPath = docUrl 589: if docPath: 590: # fix URLs starting with /mpiwg/online 591: docPath = docPath.replace('/mpiwg/online', '', 1) 592: 593: docinfo['documentPath'] = docPath 594: return docinfo 595: 596: def getDocinfoFromTexttool(self, docinfo, texttool): 597: """reads contents of texttool element into docinfo""" 598: # image dir 599: imageDir = texttool.get('image', None) 600: docPath = docinfo.get('documentPath', None) 601: if imageDir and docPath: 602: #print "image: ", imageDir, " archivepath: ", archivePath 603: imageDir = os.path.join(docPath, imageDir) 604: imageDir = imageDir.replace('/mpiwg/online', '', 1) 605: docinfo['imagePath'] = imageDir 606: 607: # old style text URL 608: textUrl = texttool.get('text', None) 609: if textUrl and docPath: 610: if urlparse.urlparse(textUrl)[0] == "": #keine url 611: textUrl = os.path.join(docPath, textUrl) 612: 613: docinfo['textURL'] = textUrl 614: 615: # new style text-url-path 616: textUrl = texttool.get('text-url-path', None) 617: if textUrl: 618: docinfo['textURLPath'] = textUrl 619: 620: # page flow 621: docinfo['pageFlow'] = texttool.get('page-flow', 'ltr') 622: 623: # odd pages are left 624: docinfo['oddPage'] = texttool.get('odd-scan-position', 'left') 625: 626: # number of title page (0: not defined) 627: docinfo['titlePage'] = texttool.get('title-scan-no', 0) 628: 629: # old presentation stuff 630: presentation = texttool.get('presentation', None) 631: if presentation and docPath: 632: if presentation.startswith('http:'): 633: docinfo['presentationUrl'] = presentation 634: else: 635: docinfo['presentationUrl'] = os.path.join(docPath, presentation) 636: 637: 638: return docinfo 639: 640: def getDocinfoFromBib(self, docinfo, bib): 641: """reads contents of bib element into docinfo""" 642: logging.debug("getDocinfoFromBib bib=%s"%repr(bib)) 643: # put all raw bib fields in dict "bib" 644: docinfo['bib'] = bib 645: bibtype = bib.get('@type', None) 646: docinfo['bibType'] = bibtype 647: # also store DC metadata for convenience 648: dc = self.metadataService.getDCMappedData(bib) 649: docinfo['creator'] = dc.get('creator',None) 650: docinfo['title'] = dc.get('title',None) 651: docinfo['date'] = dc.get('date',None) 652: return docinfo 653: 654: def getDocinfoFromAccess(self, docinfo, acc): 655: """reads contents of access element into docinfo""" 656: #TODO: also read resource type 657: logging.debug("getDocinfoFromAccess acc=%s"%repr(acc)) 658: try: 659: acctype = acc['@attr']['type'] 660: if acctype: 661: access=acctype 662: if access in ['group', 'institution']: 663: access = acc['name'].lower() 664: 665: docinfo['accessType'] = access 666: 667: except: 668: pass 669: 670: return docinfo 671: 672: def getDocinfoFromDigilib(self, docinfo, path): 673: infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 674: # fetch data 675: txt = getHttpData(infoUrl) 676: if not txt: 677: logging.error("Unable to get dir-info from %s"%(infoUrl)) 678: return docinfo 679: 680: dom = ET.fromstring(txt) 681: size = getText(dom.find("size")) 682: logging.debug("getDocinfoFromDigilib: size=%s"%size) 683: if size: 684: docinfo['numPages'] = int(size) 685: else: 686: docinfo['numPages'] = 0 687: 688: # TODO: produce and keep list of image names and numbers 689: return docinfo 690: 691: 692: def getDocinfoFromPresentationInfoXml(self,docinfo): 693: """gets DC-like bibliographical information from the presentation entry in texttools""" 694: url = docinfo.get('presentationUrl', None) 695: if not url: 696: logging.error("getDocinfoFromPresentation: no URL!") 697: return docinfo 698: 699: dom = None 700: metaUrl = None 701: if url.startswith("http://"): 702: # real URL 703: metaUrl = url 704: else: 705: # online path 706: 707: server=self.digilibBaseUrl+"/servlet/Texter?fn=" 708: metaUrl=server+url 709: 710: txt=getHttpData(metaUrl) 711: if txt is None: 712: logging.error("Unable to read info.xml from %s"%(url)) 713: return docinfo 714: 715: dom = ET.fromstring(txt) 716: docinfo['creator']=getText(dom.find(".//author")) 717: docinfo['title']=getText(dom.find(".//title")) 718: docinfo['date']=getText(dom.find(".//date")) 719: return docinfo 720: 721: 722: def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewType=None, tocMode=None): 723: """returns pageinfo with the given parameters""" 724: logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewType=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewType,tocMode)) 725: pageinfo = {} 726: pageinfo['viewMode'] = viewMode 727: pageinfo['viewType'] = viewType 728: pageinfo['tocMode'] = tocMode 729: 730: current = getInt(current) 731: pageinfo['current'] = current 732: pageinfo['pn'] = current 733: rows = int(rows or self.thumbrows) 734: pageinfo['rows'] = rows 735: cols = int(cols or self.thumbcols) 736: pageinfo['cols'] = cols 737: grpsize = cols * rows 738: pageinfo['groupsize'] = grpsize 739: # is start is empty use one around current 740: start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) 741: # int(current / grpsize) * grpsize +1)) 742: pageinfo['start'] = start 743: 744: np = int(docinfo.get('numPages', 0)) 745: if np == 0: 746: # numPages unknown - maybe we can get it from text page 747: if docinfo.get('textURLPath', None): 748: # cache text page as well 749: pageinfo['textPage'] = self.getTextPage(mode=viewType, pn=current, docinfo=docinfo, pageinfo=pageinfo) 750: np = int(docinfo.get('numPages', 0)) 751: 752: pageinfo['numgroups'] = int(np / grpsize) 753: if np % grpsize > 0: 754: pageinfo['numgroups'] += 1 755: 756: pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl' 757: oddScanLeft = docinfo.get('oddPage', 'left') != 'right' 758: # add zeroth page for two columns 759: pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft)) 760: pageinfo['pageZero'] = pageZero 761: pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np) 762: 763: # TODO: do we need this here? 764: pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg') 765: pageinfo['query'] = self.REQUEST.get('query','') 766: pageinfo['queryType'] = self.REQUEST.get('queryType','') 767: pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') 768: pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','') 769: pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30)) 770: pageinfo['queryPageSize'] = getInt(self.REQUEST.get('queryPageSize', 10)) 771: pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1')) 772: pageinfo['searchPN'] = getInt(self.REQUEST.get('searchPN','1')) 773: 774: # limit tocPN 775: if 'tocSize_%s'%tocMode in docinfo: 776: tocSize = docinfo['tocSize_%s'%tocMode] 777: tocPageSize = pageinfo['tocPageSize'] 778: # cached toc 779: if tocSize%tocPageSize>0: 780: tocPages=tocSize/tocPageSize+1 781: else: 782: tocPages=tocSize/tocPageSize 783: 784: pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN']) 785: 786: return pageinfo 787: 788: 789: def getPageBatch(self, start=None, rows=None, cols=None, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0): 790: """returns dict with array of page informations for one screenfull of thumbnails""" 791: grpsize = rows * cols 792: if maxIdx == 0: 793: maxIdx = start + grpsize 794: 795: pages = [] 796: if pageZero and start == 1: 797: # correct beginning 798: idx = 0 799: else: 800: idx = start 801: 802: for r in range(rows): 803: row = [] 804: for c in range(cols): 805: if idx < minIdx or idx > maxIdx: 806: page = {'idx':None} 807: else: 808: page = {'idx':idx} 809: 810: idx += 1 811: if pageFlowLtr: 812: row.append(page) 813: else: 814: row.insert(0, page) 815: 816: pages.append(row) 817: 818: batch = {} 819: if start > 1: 820: batch['prevStart'] = max(start - grpsize, 1) 821: else: 822: batch['prevStart'] = None 823: 824: if start + grpsize < maxIdx: 825: batch['nextStart'] = start + grpsize 826: else: 827: batch['nextStart'] = None 828: 829: batch['pages'] = pages 830: #logging.debug("getPageList returns=%s"%(batch)) 831: return batch 832: 833: 834: security.declareProtected('View management screens','changeDocumentViewerForm') 835: changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 836: 837: def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): 838: """init document viewer""" 839: self.title=title 840: self.digilibBaseUrl = digilibBaseUrl 841: self.thumbrows = thumbrows 842: self.thumbcols = thumbcols 843: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 844: try: 845: # assume MetaDataFolder instance is called metadata 846: self.metadataService = getattr(self, 'metadata') 847: except Exception, e: 848: logging.error("Unable to find MetaDataFolder 'metadata': "+str(e)) 849: 850: if RESPONSE is not None: 851: RESPONSE.redirect('manage_main') 852: 853: def manage_AddDocumentViewerForm(self): 854: """add the viewer form""" 855: pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) 856: return pt() 857: 858: def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None): 859: """add the viewer""" 860: newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName) 861: self._setObject(id,newObj) 862: 863: if RESPONSE is not None: 864: RESPONSE.redirect('manage_main') 865: 866: ## DocumentViewerTemplate class 867: class DocumentViewerTemplate(ZopePageTemplate): 868: """Template for document viewer""" 869: meta_type="DocumentViewer Template" 870: 871: 872: def manage_addDocumentViewerTemplateForm(self): 873: """Form for adding""" 874: pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) 875: return pt() 876: 877: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, 878: REQUEST=None, submit=None): 879: "Add a Page Template with optional file content." 880: 881: self._setObject(id, DocumentViewerTemplate(id)) 882: ob = getattr(self, id) 883: txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read() 884: logging.info("txt %s:"%txt) 885: ob.pt_edit(txt,"text/html") 886: if title: 887: ob.pt_setTitle(title) 888: try: 889: u = self.DestinationURL() 890: except AttributeError: 891: u = REQUEST['URL1'] 892: 893: u = "%s/%s" % (u, urllib.quote(id)) 894: REQUEST.RESPONSE.redirect(u+'/manage_main') 895: return '' 896: 897: 898: