documentViewer/documentViewer.py - view

File: [Repository] / documentViewer / documentViewer.py
Revision 1.175.2.10: download - view: text, annotated - select for diffs - revision graph
Thu Jul 28 13:00:07 2011 UTC (12 years, 11 months ago) by casties
Branches: elementtree
Diff to: branchpoint 1.175: preferred, unified

more renovation

1: from OFS.Folder import Folder 2: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 3: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 4: from AccessControl import ClassSecurityInfo 5: from AccessControl import getSecurityManager 6: from Globals import package_home 7: 8: #from Ft.Xml import EMPTY_NAMESPACE, Parse 9: #import Ft.Xml.Domlette 10: 11: import xml.etree.ElementTree as ET 12: 13: import os.path 14: import sys 15: import urllib 16: import logging 17: import math 18: import urlparse 19: import re 20: import string 21: 22: from SrvTxtUtils import getInt, getText, getHttpData 23: 24: def logger(txt,method,txt2): 25: """logging""" 26: logging.info(txt+ txt2) 27: 28: 29: def serializeNode(node, encoding="utf-8"): 30: """returns a string containing node as XML""" 31: s = ET.tostring(node) 32: 33: # 4Suite: 34: # stream = cStringIO.StringIO() 35: # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) 36: # s = stream.getvalue() 37: # stream.close() 38: return s 39: 40: def browserCheck(self): 41: """check the browsers request to find out the browser type""" 42: bt = {} 43: ua = self.REQUEST.get_header("HTTP_USER_AGENT") 44: bt['ua'] = ua 45: bt['isIE'] = False 46: bt['isN4'] = False 47: bt['versFirefox']="" 48: bt['versIE']="" 49: bt['versSafariChrome']="" 50: bt['versOpera']="" 51: 52: if string.find(ua, 'MSIE') > -1: 53: bt['isIE'] = True 54: else: 55: bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1) 56: # Safari oder Chrome identification 57: try: 58: nav = ua[string.find(ua, '('):] 59: nav1=ua[string.find(ua,')'):] 60: nav2=nav1[string.find(nav1,'('):] 61: nav3=nav2[string.find(nav2,')'):] 62: ie = string.split(nav, "; ")[1] 63: ie1 =string.split(nav1, " ")[2] 64: ie2 =string.split(nav3, " ")[1] 65: ie3 =string.split(nav3, " ")[2] 66: if string.find(ie3, "Safari") >-1: 67: bt['versSafariChrome']=string.split(ie2, "/")[1] 68: except: pass 69: # IE identification 70: try: 71: nav = ua[string.find(ua, '('):] 72: ie = string.split(nav, "; ")[1] 73: if string.find(ie, "MSIE") > -1: 74: bt['versIE'] = string.split(ie, " ")[1] 75: except:pass 76: # Firefox identification 77: try: 78: nav = ua[string.find(ua, '('):] 79: nav1=ua[string.find(ua,')'):] 80: if string.find(ie1, "Firefox") >-1: 81: nav5= string.split(ie1, "/")[1] 82: logging.debug("FIREFOX: %s"%(nav5)) 83: bt['versFirefox']=nav5[0:3] 84: except:pass 85: #Opera identification 86: try: 87: if string.find(ua,"Opera") >-1: 88: nav = ua[string.find(ua, '('):] 89: nav1=nav[string.find(nav,')'):] 90: bt['versOpera']=string.split(nav1,"/")[2] 91: except:pass 92: 93: bt['isMac'] = string.find(ua, 'Macintosh') > -1 94: bt['isWin'] = string.find(ua, 'Windows') > -1 95: bt['isIEWin'] = bt['isIE'] and bt['isWin'] 96: bt['isIEMac'] = bt['isIE'] and bt['isMac'] 97: bt['staticHTML'] = False 98: 99: return bt 100: 101: def getParentDir(path): 102: """returns pathname shortened by one""" 103: return '/'.join(path.split('/')[0:-1]) 104: 105: 106: ## 107: ## documentViewer class 108: ## 109: class documentViewer(Folder): 110: """document viewer""" 111: meta_type="Document viewer" 112: 113: security=ClassSecurityInfo() 114: manage_options=Folder.manage_options+( 115: {'label':'main config','action':'changeDocumentViewerForm'}, 116: ) 117: 118: metadataService = None 119: """MetaDataFolder instance""" 120: 121: # templates and forms 122: viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 123: toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) 124: toc_text = PageTemplateFile('zpt/toc_text', globals()) 125: toc_figures = PageTemplateFile('zpt/toc_figures', globals()) 126: page_main_images = PageTemplateFile('zpt/page_main_images', globals()) 127: page_main_double = PageTemplateFile('zpt/page_main_double', globals()) 128: page_main_text = PageTemplateFile('zpt/page_main_text', globals()) 129: page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) 130: page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals()) 131: page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) 132: page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals()) 133: head_main = PageTemplateFile('zpt/head_main', globals()) 134: docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) 135: info_xml = PageTemplateFile('zpt/info_xml', globals()) 136: 137: 138: thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) 139: 140: 141: def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"): 142: """init document viewer""" 143: self.id=id 144: self.title=title 145: self.thumbcols = thumbcols 146: self.thumbrows = thumbrows 147: # authgroups is list of authorized groups (delimited by ,) 148: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 149: # create template folder so we can always use template.something 150: 151: templateFolder = Folder('template') 152: #self['template'] = templateFolder # Zope-2.12 style 153: self._setObject('template',templateFolder) # old style 154: try: 155: import MpdlXmlTextServer 156: textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName) 157: #templateFolder['fulltextclient'] = xmlRpcClient 158: templateFolder._setObject('fulltextclient',textServer) 159: except Exception, e: 160: logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e)) 161: 162: try: 163: from Products.zogiLib.zogiLib import zogiLib 164: zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") 165: #templateFolder['zogilib'] = zogilib 166: templateFolder._setObject('zogilib',zogilib) 167: except Exception, e: 168: logging.error("Unable to create zogiLib for zogilib: "+str(e)) 169: 170: try: 171: # assume MetaDataFolder instance is called metadata 172: self.metadataService = getattr(self, 'metadata') 173: except Exception, e: 174: logging.error("Unable to find MetaDataFolder 'metadata': "+str(e)) 175: 176: 177: # proxy text server methods to fulltextclient 178: def getTextPage(self, **args): 179: """get page""" 180: return self.template.fulltextclient.getTextPage(**args) 181: 182: def getOrigPages(self, **args): 183: """get page""" 184: return self.template.fulltextclient.getOrigPages(**args) 185: 186: def getOrigPagesNorm(self, **args): 187: """get page""" 188: return self.template.fulltextclient.getOrigPagesNorm(**args) 189: 190: def getQuery(self, **args): 191: """get query in search""" 192: return self.template.fulltextclient.getQuery(**args) 193: 194: def getSearch(self, **args): 195: """get search""" 196: return self.template.fulltextclient.getSearch(**args) 197: 198: def getGisPlaces(self, **args): 199: """get gis places""" 200: return self.template.fulltextclient.getGisPlaces(**args) 201: 202: def getAllGisPlaces(self, **args): 203: """get all gis places """ 204: return self.template.fulltextclient.getAllGisPlaces(**args) 205: 206: def getTranslate(self, **args): 207: """get translate""" 208: return self.template.fulltextclient.getTranslate(**args) 209: 210: def getLemma(self, **args): 211: """get lemma""" 212: return self.template.fulltextclient.getLemma(**args) 213: 214: def getLemmaQuery(self, **args): 215: """get query""" 216: return self.template.fulltextclient.getLemmaQuery(**args) 217: 218: def getLex(self, **args): 219: """get lex""" 220: return self.template.fulltextclient.getLex(**args) 221: 222: def getToc(self, **args): 223: """get toc""" 224: return self.template.fulltextclient.getToc(**args) 225: 226: def getTocPage(self, **args): 227: """get tocpage""" 228: return self.template.fulltextclient.getTocPage(**args) 229: 230: 231: security.declareProtected('View','thumbs_rss') 232: def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): 233: ''' 234: view it 235: @param mode: defines how to access the document behind url 236: @param url: url which contains display information 237: @param viewMode: if images display images, if text display text, default is images (text,images or auto) 238: 239: ''' 240: logging.debug("HHHHHHHHHHHHHH:load the rss") 241: logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 242: 243: if not hasattr(self, 'template'): 244: # create template folder if it doesn't exist 245: self.manage_addFolder('template') 246: 247: if not self.digilibBaseUrl: 248: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 249: 250: docinfo = self.getDocinfo(mode=mode,url=url) 251: #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) 252: pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo) 253: ''' ZDES ''' 254: pt = getattr(self.template, 'thumbs_main_rss') 255: 256: if viewMode=="auto": # automodus gewaehlt 257: if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert 258: viewMode="text" 259: else: 260: viewMode="images" 261: 262: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 263: 264: security.declareProtected('View','index_html') 265: def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): 266: ''' 267: view it 268: @param mode: defines how to access the document behind url 269: @param url: url which contains display information 270: @param viewMode: if images display images, if text display text, default is auto (text,images or auto) 271: @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) 272: @param characterNormalization type of text display (reg, norm, none) 273: @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) 274: ''' 275: 276: logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 277: 278: if not hasattr(self, 'template'): 279: # this won't work 280: logging.error("template folder missing!") 281: return "ERROR: template folder missing!" 282: 283: if not getattr(self, 'digilibBaseUrl', None): 284: self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" 285: 286: docinfo = self.getDocinfo(mode=mode,url=url) 287: 288: if tocMode != "thumbs": 289: # get table of contents 290: docinfo = self.getToc(mode=tocMode, docinfo=docinfo) 291: 292: # auto viewMode: text_dict if text else images 293: if viewMode=="auto": 294: if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 295: #texturl gesetzt und textViewer konfiguriert 296: viewMode="text_dict" 297: else: 298: viewMode="images" 299: 300: pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode) 301: 302: if viewMode != 'images' and docinfo.get('textURLPath', None): 303: # get full text page 304: page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo) 305: pageinfo['textPage'] = page 306: 307: # get template /template/viewer_main 308: pt = getattr(self.template, 'viewer_main') 309: # and execute with parameters 310: return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk)) 311: 312: def generateMarks(self,mk): 313: ret="" 314: if mk is None: 315: return "" 316: if not isinstance(mk, list): 317: mk=[mk] 318: for m in mk: 319: ret+="mk=%s"%m 320: return ret 321: 322: 323: def getBrowser(self): 324: """getBrowser the version of browser """ 325: bt = browserCheck(self) 326: logging.debug("BROWSER VERSION: %s"%(bt)) 327: return bt 328: 329: def findDigilibUrl(self): 330: """try to get the digilib URL from zogilib""" 331: url = self.template.zogilib.getDLBaseUrl() 332: return url 333: 334: def getDocumentViewerURL(self): 335: """returns the URL of this instance""" 336: return self.absolute_url() 337: 338: def getStyle(self, idx, selected, style=""): 339: """returns a string with the given style and append 'sel' if path == selected.""" 340: #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) 341: if idx == selected: 342: return style + 'sel' 343: else: 344: return style 345: 346: def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'): 347: """returns URL to documentviewer with parameter param set to val or from dict params""" 348: # copy existing request params 349: urlParams=self.REQUEST.form.copy() 350: # change single param 351: if param is not None: 352: if val is None: 353: if urlParams.has_key(param): 354: del urlParams[param] 355: else: 356: urlParams[param] = str(val) 357: 358: # change more params 359: if params is not None: 360: for k in params.keys(): 361: v = params[k] 362: if v is None: 363: # val=None removes param 364: if urlParams.has_key(k): 365: del urlParams[k] 366: 367: else: 368: urlParams[k] = v 369: 370: # FIXME: does this belong here? 371: if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 372: urlParams["mode"] = "imagepath" 373: urlParams["url"] = getParentDir(urlParams["url"]) 374: 375: # quote values and assemble into query string (not escaping '/') 376: ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()]) 377: #ps = urllib.urlencode(urlParams) 378: if baseUrl is None: 379: baseUrl = self.REQUEST['URL1'] 380: 381: url = "%s?%s"%(baseUrl, ps) 382: return url 383: 384: 385: def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None): 386: """link to documentviewer with parameter param set to val""" 387: return self.getLink(param, val, params, baseUrl, '&') 388: 389: def getInfo_xml(self,url,mode): 390: """returns info about the document as XML""" 391: 392: if not self.digilibBaseUrl: 393: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 394: 395: docinfo = self.getDocinfo(mode=mode,url=url) 396: pt = getattr(self.template, 'info_xml') 397: return pt(docinfo=docinfo) 398: 399: def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True): 400: """returns new option state""" 401: if not self.REQUEST.SESSION.has_key(optionName): 402: # not in session -- initial 403: opt = {'lastState': newState, 'state': initialState} 404: else: 405: opt = self.REQUEST.SESSION.get(optionName) 406: if opt['lastState'] != newState: 407: # state in session has changed -- toggle 408: opt['state'] = not opt['state'] 409: opt['lastState'] = newState 410: 411: self.REQUEST.SESSION[optionName] = opt 412: return opt['state'] 413: 414: def isAccessible(self, docinfo): 415: """returns if access to the resource is granted""" 416: access = docinfo.get('accessType', None) 417: logging.debug("documentViewer (accessOK) access type %s"%access) 418: if access is not None and access == 'free': 419: logging.debug("documentViewer (accessOK) access is free") 420: return True 421: elif access is None or access in self.authgroups: 422: # only local access -- only logged in users 423: user = getSecurityManager().getUser() 424: logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr())) 425: if user is not None: 426: #print "user: ", user 427: return (user.getUserName() != "Anonymous User") 428: else: 429: return False 430: 431: logging.error("documentViewer (accessOK) unknown access type %s"%access) 432: return False 433: 434: 435: def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): 436: """gibt param von dlInfo aus""" 437: if docinfo is None: 438: docinfo = {} 439: 440: for x in range(cut): 441: path=getParentDir(path) 442: 443: infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 444: 445: logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) 446: 447: txt = getHttpData(infoUrl) 448: if txt is None: 449: raise IOError("Unable to get dir-info from %s"%(infoUrl)) 450: 451: dom = ET.fromstring(txt) 452: #dom = Parse(txt) 453: size=getText(dom.find("size")) 454: #sizes=dom.xpath("//dir/size") 455: logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size) 456: 457: if size: 458: docinfo['numPages'] = int(size) 459: else: 460: docinfo['numPages'] = 0 461: 462: # TODO: produce and keep list of image names and numbers 463: 464: return docinfo 465: 466: def getIndexMetaPath(self,url): 467: """gib nur den Pfad zurueck""" 468: regexp = re.compile(r".*(experimental|permanent)/(.*)") 469: regpath = regexp.match(url) 470: if (regpath==None): 471: return "" 472: logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2)) 473: return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2)) 474: 475: 476: 477: def getIndexMetaUrl(self,url): 478: """returns utr of index.meta document at url""" 479: 480: metaUrl = None 481: if url.startswith("http://"): 482: # real URL 483: metaUrl = url 484: else: 485: # online path 486: server=self.digilibBaseUrl+"/servlet/Texter?fn=" 487: metaUrl=server+url.replace("/mpiwg/online","") 488: if not metaUrl.endswith("index.meta"): 489: metaUrl += "/index.meta" 490: 491: return metaUrl 492: 493: def getDomFromIndexMeta(self, url): 494: """get dom from index meta""" 495: dom = None 496: metaUrl = self.getIndexMetaUrl(url) 497: 498: logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl) 499: txt=getHttpData(metaUrl) 500: if txt is None: 501: raise IOError("Unable to read index meta from %s"%(url)) 502: 503: dom = ET.fromstring(txt) 504: #dom = Parse(txt) 505: return dom 506: 507: def getPresentationInfoXML(self, url): 508: """returns dom of info.xml document at url""" 509: dom = None 510: metaUrl = None 511: if url.startswith("http://"): 512: # real URL 513: metaUrl = url 514: else: 515: # online path 516: server=self.digilibBaseUrl+"/servlet/Texter?fn=" 517: metaUrl=server+url.replace("/mpiwg/online","") 518: 519: txt=getHttpData(metaUrl) 520: if txt is None: 521: raise IOError("Unable to read infoXMLfrom %s"%(url)) 522: 523: dom = ET.fromstring(txt) 524: #dom = Parse(txt) 525: return dom 526: 527: 528: def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 529: """gets authorization info from the index.meta file at path or given by dom""" 530: logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) 531: 532: access = None 533: 534: if docinfo is None: 535: docinfo = {} 536: 537: if dom is None: 538: for x in range(cut): 539: path=getParentDir(path) 540: dom = self.getDomFromIndexMeta(path) 541: 542: acc = dom.find(".//access-conditions/access") 543: if acc is not None: 544: acctype = acc.get('type') 545: #acctype = dom.xpath("//access-conditions/access/@type") 546: if acctype: 547: access=acctype 548: if access in ['group', 'institution']: 549: access = dom.find(".//access-conditions/access/name").text.lower() 550: 551: docinfo['accessType'] = access 552: return docinfo 553: 554: 555: def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 556: """gets bibliographical info from the index.meta file at path or given by dom""" 557: logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) 558: 559: if docinfo is None: 560: docinfo = {} 561: 562: if dom is None: 563: for x in range(cut): 564: path=getParentDir(path) 565: dom = self.getDomFromIndexMeta(path) 566: 567: docinfo['indexMetaPath']=self.getIndexMetaPath(path); 568: 569: logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) 570: if self.metadataService is not None: 571: # put all raw bib fields in dict "bib" 572: bib = self.metadataService.getBibData(dom=dom) 573: docinfo['bib'] = bib 574: bibtype = bib.get('@type', None) 575: docinfo['bib_type'] = bibtype 576: # also store DC metadata for convenience 577: dc = self.metadataService.getDCMappedData(bib) 578: docinfo['creator'] = dc.get('creator',None) 579: docinfo['title'] = dc.get('title',None) 580: docinfo['date'] = dc.get('date',None) 581: else: 582: logging.error("MetadataService not found!") 583: return docinfo 584: 585: 586: # TODO: is this needed? 587: def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 588: """gets name info from the index.meta file at path or given by dom""" 589: if docinfo is None: 590: docinfo = {} 591: 592: if dom is None: 593: for x in range(cut): 594: path=getParentDir(path) 595: dom = self.getDomFromIndexMeta(path) 596: 597: docinfo['name']=getText(dom.find("name")) 598: logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) 599: return docinfo 600: 601: 602: def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): 603: """parse texttool tag in index meta""" 604: logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) 605: if docinfo is None: 606: docinfo = {} 607: if docinfo.get('lang', None) is None: 608: docinfo['lang'] = '' # default keine Sprache gesetzt 609: if dom is None: 610: dom = self.getDomFromIndexMeta(url) 611: 612: texttool = self.metadata.getTexttoolData(dom=dom) 613: 614: archivePath = None 615: archiveName = None 616: 617: archiveName = getText(dom.find("name")) 618: if not archiveName: 619: logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) 620: 621: archivePath = getText(dom.find("archive-path")) 622: if archivePath: 623: # clean up archive path 624: if archivePath[0] != '/': 625: archivePath = '/' + archivePath 626: if archiveName and (not archivePath.endswith(archiveName)): 627: archivePath += "/" + archiveName 628: else: 629: # try to get archive-path from url 630: logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) 631: if (not url.startswith('http')): 632: archivePath = url.replace('index.meta', '') 633: 634: if archivePath is None: 635: # we balk without archive-path 636: raise IOError("Missing archive-path (for text-tool) in %s" % (url)) 637: 638: imageDir = texttool.get('image', None) 639: 640: if not imageDir: 641: # we balk with no image tag / not necessary anymore because textmode is now standard 642: #raise IOError("No text-tool info in %s"%(url)) 643: imageDir = "" 644: #xquery="//pb" 645: docinfo['imagePath'] = "" # keine Bilder 646: docinfo['imageURL'] = "" 647: 648: if imageDir and archivePath: 649: #print "image: ", imageDir, " archivepath: ", archivePath 650: imageDir = os.path.join(archivePath, imageDir) 651: imageDir = imageDir.replace("/mpiwg/online", '') 652: docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) 653: docinfo['imagePath'] = imageDir 654: 655: docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir 656: 657: viewerUrl = texttool.get('digiliburlprefix', None) 658: if viewerUrl: 659: docinfo['viewerURL'] = viewerUrl 660: 661: # old style text URL 662: textUrl = texttool.get('text', None) 663: if textUrl: 664: if urlparse.urlparse(textUrl)[0] == "": #keine url 665: textUrl = os.path.join(archivePath, textUrl) 666: # fix URLs starting with /mpiwg/online 667: if textUrl.startswith("/mpiwg/online"): 668: textUrl = textUrl.replace("/mpiwg/online", '', 1) 669: 670: docinfo['textURL'] = textUrl 671: 672: # new style text-url-path 673: textUrl = texttool.get('text-url-path', None) 674: if textUrl: 675: docinfo['textURLPath'] = textUrl 676: textUrlkurz = string.split(textUrl, ".")[0] 677: docinfo['textURLPathkurz'] = textUrlkurz 678: #if not docinfo['imagePath']: 679: # text-only, no page images 680: #docinfo = self.getNumTextPages(docinfo) 681: 682: # get bib info 683: docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag 684: # TODO: is this needed here? 685: docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) 686: 687: # TODO: what to do with presentation? 688: presentationUrl = texttool.get('presentation', None) 689: if presentationUrl: # ueberschreibe diese durch presentation informationen 690: # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten 691: # durch den relativen Pfad auf die presentation infos 692: presentationPath = presentationUrl 693: if url.endswith("index.meta"): 694: presentationUrl = url.replace('index.meta', presentationPath) 695: else: 696: presentationUrl = url + "/" + presentationPath 697: 698: docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) 699: 700: # get authorization 701: docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info 702: 703: return docinfo 704: 705: 706: def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): 707: """gets the bibliographical information from the preseantion entry in texttools 708: """ 709: dom=self.getPresentationInfoXML(url) 710: docinfo['author']=getText(dom.find(".//author")) 711: docinfo['title']=getText(dom.find(".//title")) 712: docinfo['year']=getText(dom.find(".//date")) 713: return docinfo 714: 715: def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): 716: """path ist the path to the images it assumes that the index.meta file is one level higher.""" 717: logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) 718: if docinfo is None: 719: docinfo = {} 720: path=path.replace("/mpiwg/online","") 721: docinfo['imagePath'] = path 722: docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) 723: 724: pathorig=path 725: for x in range(cut): 726: path=getParentDir(path) 727: logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) 728: imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path 729: docinfo['imageURL'] = imageUrl 730: 731: #TODO: use getDocinfoFromIndexMeta 732: #path ist the path to the images it assumes that the index.meta file is one level higher. 733: docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 734: docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 735: return docinfo 736: 737: 738: def getDocinfo(self, mode, url): 739: """returns docinfo depending on mode""" 740: logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) 741: # look for cached docinfo in session 742: if self.REQUEST.SESSION.has_key('docinfo'): 743: docinfo = self.REQUEST.SESSION['docinfo'] 744: # check if its still current 745: if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: 746: logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys()) 747: return docinfo 748: 749: # new docinfo 750: docinfo = {'mode': mode, 'url': url} 751: # add self url 752: docinfo['viewerUrl'] = self.getDocumentViewerURL() 753: if mode=="texttool": 754: # index.meta with texttool information 755: docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) 756: elif mode=="imagepath": 757: # folder with images, index.meta optional 758: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) 759: elif mode=="filepath": 760: # filename 761: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) 762: else: 763: logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) 764: raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 765: 766: logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) 767: #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) 768: # store in session 769: self.REQUEST.SESSION['docinfo'] = docinfo 770: return docinfo 771: 772: def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): 773: """returns pageinfo with the given parameters""" 774: pageinfo = {} 775: current = getInt(current) 776: 777: pageinfo['current'] = current 778: rows = int(rows or self.thumbrows) 779: pageinfo['rows'] = rows 780: cols = int(cols or self.thumbcols) 781: pageinfo['cols'] = cols 782: grpsize = cols * rows 783: pageinfo['groupsize'] = grpsize 784: # what does this do? 785: start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) 786: # int(current / grpsize) * grpsize +1)) 787: pageinfo['start'] = start 788: pageinfo['end'] = start + grpsize 789: if (docinfo is not None) and ('numPages' in docinfo): 790: np = int(docinfo['numPages']) 791: pageinfo['end'] = min(pageinfo['end'], np) 792: pageinfo['numgroups'] = int(np / grpsize) 793: if np % grpsize > 0: 794: pageinfo['numgroups'] += 1 795: 796: pageinfo['viewMode'] = viewMode 797: pageinfo['tocMode'] = tocMode 798: pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg') 799: #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1') 800: pageinfo['query'] = self.REQUEST.get('query','') 801: pageinfo['queryType'] = self.REQUEST.get('queryType','') 802: pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') 803: pageinfo['textPN'] = self.REQUEST.get('textPN','1') 804: pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','') 805: pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') 806: pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10') 807: pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') 808: # WTF?: 809: toc = int(pageinfo['tocPN']) 810: pageinfo['textPages'] =int(toc) 811: 812: # What does this do? 813: if 'tocSize_%s'%tocMode in docinfo: 814: tocSize = int(docinfo['tocSize_%s'%tocMode]) 815: tocPageSize = int(pageinfo['tocPageSize']) 816: # cached toc 817: if tocSize%tocPageSize>0: 818: tocPages=tocSize/tocPageSize+1 819: else: 820: tocPages=tocSize/tocPageSize 821: 822: pageinfo['tocPN'] = min(tocPages,toc) 823: 824: pageinfo['searchPN'] =self.REQUEST.get('searchPN','1') 825: pageinfo['sn'] =self.REQUEST.get('sn','') 826: return pageinfo 827: 828: 829: security.declareProtected('View management screens','changeDocumentViewerForm') 830: changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 831: 832: def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): 833: """init document viewer""" 834: self.title=title 835: self.digilibBaseUrl = digilibBaseUrl 836: self.thumbrows = thumbrows 837: self.thumbcols = thumbcols 838: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 839: try: 840: # assume MetaDataFolder instance is called metadata 841: self.metadataService = getattr(self, 'metadata') 842: except Exception, e: 843: logging.error("Unable to find MetaDataFolder 'metadata': "+str(e)) 844: 845: if RESPONSE is not None: 846: RESPONSE.redirect('manage_main') 847: 848: def manage_AddDocumentViewerForm(self): 849: """add the viewer form""" 850: pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) 851: return pt() 852: 853: def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None): 854: """add the viewer""" 855: newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName) 856: self._setObject(id,newObj) 857: 858: if RESPONSE is not None: 859: RESPONSE.redirect('manage_main') 860: 861: ## DocumentViewerTemplate class 862: class DocumentViewerTemplate(ZopePageTemplate): 863: """Template for document viewer""" 864: meta_type="DocumentViewer Template" 865: 866: 867: def manage_addDocumentViewerTemplateForm(self): 868: """Form for adding""" 869: pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) 870: return pt() 871: 872: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, 873: REQUEST=None, submit=None): 874: "Add a Page Template with optional file content." 875: 876: self._setObject(id, DocumentViewerTemplate(id)) 877: ob = getattr(self, id) 878: txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read() 879: logging.info("txt %s:"%txt) 880: ob.pt_edit(txt,"text/html") 881: if title: 882: ob.pt_setTitle(title) 883: try: 884: u = self.DestinationURL() 885: except AttributeError: 886: u = REQUEST['URL1'] 887: 888: u = "%s/%s" % (u, urllib.quote(id)) 889: REQUEST.RESPONSE.redirect(u+'/manage_main') 890: return '' 891: 892: 893: