documentViewer/documentViewer.py - view

File: [Repository] / documentViewer / documentViewer.py
Revision 1.49: download - view: text, annotated - select for diffs - revision graph
Wed May 5 15:42:56 2010 UTC (15 years, 2 months ago) by abukhman
Branches: MAIN
CVS tags: HEAD

Last update

1: 2: from OFS.Folder import Folder 3: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 4: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 5: from AccessControl import ClassSecurityInfo 6: from AccessControl import getSecurityManager 7: from Globals import package_home 8: 9: from Ft.Xml.Domlette import NonvalidatingReader 10: from Ft.Xml.Domlette import PrettyPrint, Print 11: from Ft.Xml import EMPTY_NAMESPACE, Parse 12: 13: from xml.dom.minidom import parse, parseString 14: 15: 16: 17: import Ft.Xml.XPath 18: import cStringIO 19: import xmlrpclib 20: import os.path 21: import sys 22: import cgi 23: import urllib 24: import logging 25: import math 26: 27: import urlparse 28: from types import * 29: 30: def logger(txt,method,txt2): 31: """logging""" 32: logging.info(txt+ txt2) 33: 34: 35: def getInt(number, default=0): 36: """returns always an int (0 in case of problems)""" 37: try: 38: return int(number) 39: except: 40: return int(default) 41: 42: def getTextFromNode(nodename): 43: """get the cdata content of a node""" 44: if nodename is None: 45: return "" 46: nodelist=nodename.childNodes 47: rc = "" 48: for node in nodelist: 49: if node.nodeType == node.TEXT_NODE: 50: rc = rc + node.data 51: return rc 52: 53: def serializeNode(node, encoding='utf-8'): 54: """returns a string containing node as XML""" 55: buf = cStringIO.StringIO() 56: Print(node, stream=buf, encoding=encoding) 57: s = buf.getvalue() 58: buf.close() 59: return s 60: 61: 62: def getParentDir(path): 63: """returns pathname shortened by one""" 64: return '/'.join(path.split('/')[0:-1]) 65: 66: 67: import socket 68: 69: def urlopen(url,timeout=2): 70: """urlopen mit timeout""" 71: socket.setdefaulttimeout(timeout) 72: ret=urllib.urlopen(url) 73: socket.setdefaulttimeout(5) 74: return ret 75: 76: 77: ## 78: ## documentViewer class 79: ## 80: class documentViewer(Folder): 81: """document viewer""" 82: #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?" 83: 84: meta_type="Document viewer" 85: 86: security=ClassSecurityInfo() 87: manage_options=Folder.manage_options+( 88: {'label':'main config','action':'changeDocumentViewerForm'}, 89: ) 90: 91: # templates and forms 92: viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 93: toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) 94: toc_text = PageTemplateFile('zpt/toc_text', globals()) 95: toc_figures = PageTemplateFile('zpt/toc_figures', globals()) 96: page_main_images = PageTemplateFile('zpt/page_main_images', globals()) 97: page_main_text = PageTemplateFile('zpt/page_main_text', globals()) 98: page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) 99: page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) 100: head_main = PageTemplateFile('zpt/head_main', globals()) 101: docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) 102: info_xml = PageTemplateFile('zpt/info_xml', globals()) 103: 104: thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) 105: security.declareProtected('View management screens','changeDocumentViewerForm') 106: changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 107: 108: 109: def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"): 110: """init document viewer""" 111: self.id=id 112: self.title=title 113: self.thumbcols = thumbcols 114: self.thumbrows = thumbrows 115: # authgroups is list of authorized groups (delimited by ,) 116: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 117: # create template folder so we can always use template.something 118: 119: templateFolder = Folder('template') 120: #self['template'] = templateFolder # Zope-2.12 style 121: self._setObject('template',templateFolder) # old style 122: try: 123: from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy 124: xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False) 125: #templateFolder['fulltextclient'] = xmlRpcClient 126: templateFolder._setObject('fulltextclient',xmlRpcClient) 127: except Exception, e: 128: logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e)) 129: try: 130: from Products.zogiLib.zogiLib import zogiLib 131: zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") 132: #templateFolder['zogilib'] = zogilib 133: templateFolder._setObject('zogilib',zogilib) 134: except Exception, e: 135: logging.error("Unable to create zogiLib for zogilib: "+str(e)) 136: 137: 138: security.declareProtected('View','thumbs_rss') 139: def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): 140: ''' 141: view it 142: @param mode: defines how to access the document behind url 143: @param url: url which contains display information 144: @param viewMode: if images display images, if text display text, default is images (text,images or auto) 145: 146: ''' 147: logging.debug("HHHHHHHHHHHHHH:load the rss") 148: logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 149: 150: if not hasattr(self, 'template'): 151: # create template folder if it doesn't exist 152: self.manage_addFolder('template') 153: 154: if not self.digilibBaseUrl: 155: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 156: 157: docinfo = self.getDocinfo(mode=mode,url=url) 158: pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) 159: pt = getattr(self.template, 'thumbs_main_rss') 160: 161: if viewMode=="auto": # automodus gewaehlt 162: if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert 163: viewMode="text" 164: else: 165: viewMode="images" 166: 167: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 168: 169: security.declareProtected('View','index_html') 170: def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None): 171: ''' 172: view it 173: @param mode: defines how to access the document behind url 174: @param url: url which contains display information 175: @param viewMode: if images display images, if text display text, default is auto (text,images or auto) 176: @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) 177: @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) 178: ''' 179: 180: logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 181: 182: if not hasattr(self, 'template'): 183: # this won't work 184: logging.error("template folder missing!") 185: return "ERROR: template folder missing!" 186: 187: if not getattr(self, 'digilibBaseUrl', None): 188: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 189: 190: docinfo = self.getDocinfo(mode=mode,url=url) 191: 192: 193: if tocMode != "thumbs": 194: # get table of contents 195: docinfo = self.getToc(mode=tocMode, docinfo=docinfo) 196: 197: pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) 198: 199: if viewMode=="auto": # automodus gewaehlt 200: if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert 201: viewMode="text" 202: else: 203: viewMode="images" 204: 205: pt = getattr(self.template, 'viewer_main') 206: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) 207: 208: def generateMarks(self,mk): 209: ret="" 210: if mk is None: 211: return "" 212: if type(mk) is not ListType: 213: mk=[mk] 214: for m in mk: 215: ret+="mk=%s"%m 216: return ret 217: 218: 219: def findDigilibUrl(self): 220: """try to get the digilib URL from zogilib""" 221: url = self.template.zogilib.getDLBaseUrl() 222: return url 223: 224: def getStyle(self, idx, selected, style=""): 225: """returns a string with the given style and append 'sel' if path == selected.""" 226: #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) 227: if idx == selected: 228: return style + 'sel' 229: else: 230: return style 231: 232: def getLink(self,param=None,val=None): 233: """link to documentviewer with parameter param set to val""" 234: params=self.REQUEST.form.copy() 235: if param is not None: 236: if val is None: 237: if params.has_key(param): 238: del params[param] 239: else: 240: params[param] = str(val) 241: 242: if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 243: params["mode"] = "imagepath" 244: params["url"] = getParentDir(params["url"]) 245: 246: # quote values and assemble into query string 247: ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 248: url=self.REQUEST['URL1']+"?"+ps 249: return url 250: 251: def getLinkAmp(self,param=None,val=None): 252: """link to documentviewer with parameter param set to val""" 253: params=self.REQUEST.form.copy() 254: if param is not None: 255: if val is None: 256: if params.has_key(param): 257: del params[param] 258: else: 259: params[param] = str(val) 260: 261: # quote values and assemble into query string 262: logging.info("XYXXXXX: %s"%repr(params.items())) 263: ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 264: url=self.REQUEST['URL1']+"?"+ps 265: return url 266: 267: def getInfo_xml(self,url,mode): 268: """returns info about the document as XML""" 269: 270: if not self.digilibBaseUrl: 271: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 272: 273: docinfo = self.getDocinfo(mode=mode,url=url) 274: pt = getattr(self.template, 'info_xml') 275: return pt(docinfo=docinfo) 276: 277: 278: def isAccessible(self, docinfo): 279: """returns if access to the resource is granted""" 280: access = docinfo.get('accessType', None) 281: logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access) 282: if access is not None and access == 'free': 283: logger("documentViewer (accessOK)", logging.INFO, "access is free") 284: return True 285: elif access is None or access in self.authgroups: 286: # only local access -- only logged in users 287: user = getSecurityManager().getUser() 288: if user is not None: 289: #print "user: ", user 290: return (user.getUserName() != "Anonymous User") 291: else: 292: return False 293: 294: logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access) 295: return False 296: 297: 298: def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): 299: """gibt param von dlInfo aus""" 300: num_retries = 3 301: if docinfo is None: 302: docinfo = {} 303: 304: for x in range(cut): 305: 306: path=getParentDir(path) 307: 308: infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 309: 310: logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl)) 311: 312: for cnt in range(num_retries): 313: try: 314: # dom = NonvalidatingReader.parseUri(imageUrl) 315: txt=urllib.urlopen(infoUrl).read() 316: dom = Parse(txt) 317: break 318: except: 319: logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt)) 320: else: 321: raise IOError("Unable to get dir-info from %s"%(infoUrl)) 322: 323: sizes=dom.xpath("//dir/size") 324: logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes) 325: 326: if sizes: 327: docinfo['numPages'] = int(getTextFromNode(sizes[0])) 328: else: 329: docinfo['numPages'] = 0 330: 331: # TODO: produce and keep list of image names and numbers 332: 333: return docinfo 334: 335: 336: def getIndexMeta(self, url): 337: """returns dom of index.meta document at url""" 338: num_retries = 3 339: dom = None 340: metaUrl = None 341: if url.startswith("http://"): 342: # real URL 343: metaUrl = url 344: else: 345: # online path 346: server=self.digilibBaseUrl+"/servlet/Texter?fn=" 347: metaUrl=server+url.replace("/mpiwg/online","") 348: if not metaUrl.endswith("index.meta"): 349: metaUrl += "/index.meta" 350: logging.debug("METAURL: %s"%metaUrl) 351: for cnt in range(num_retries): 352: try: 353: # patch dirk encoding fehler treten dann nicht mehr auf 354: # dom = NonvalidatingReader.parseUri(metaUrl) 355: txt=urllib.urlopen(metaUrl).read() 356: dom = Parse(txt) 357: break 358: except: 359: logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) 360: 361: if dom is None: 362: raise IOError("Unable to read index meta from %s"%(url)) 363: 364: return dom 365: 366: def getPresentationInfoXML(self, url): 367: """returns dom of info.xml document at url""" 368: num_retries = 3 369: dom = None 370: metaUrl = None 371: if url.startswith("http://"): 372: # real URL 373: metaUrl = url 374: else: 375: # online path 376: server=self.digilibBaseUrl+"/servlet/Texter?fn=" 377: metaUrl=server+url.replace("/mpiwg/online","") 378: 379: for cnt in range(num_retries): 380: try: 381: # patch dirk encoding fehler treten dann nicht mehr auf 382: # dom = NonvalidatingReader.parseUri(metaUrl) 383: txt=urllib.urlopen(metaUrl).read() 384: dom = Parse(txt) 385: break 386: except: 387: logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) 388: 389: if dom is None: 390: raise IOError("Unable to read infoXMLfrom %s"%(url)) 391: 392: return dom 393: 394: 395: def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 396: """gets authorization info from the index.meta file at path or given by dom""" 397: logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path)) 398: 399: access = None 400: 401: if docinfo is None: 402: docinfo = {} 403: 404: if dom is None: 405: for x in range(cut): 406: path=getParentDir(path) 407: dom = self.getIndexMeta(path) 408: 409: acctype = dom.xpath("//access-conditions/access/@type") 410: if acctype and (len(acctype)>0): 411: access=acctype[0].value 412: if access in ['group', 'institution']: 413: access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() 414: 415: docinfo['accessType'] = access 416: return docinfo 417: 418: 419: def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 420: """gets bibliographical info from the index.meta file at path or given by dom""" 421: logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) 422: 423: if docinfo is None: 424: docinfo = {} 425: 426: if dom is None: 427: for x in range(cut): 428: path=getParentDir(path) 429: dom = self.getIndexMeta(path) 430: 431: logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) 432: # put in all raw bib fields as dict "bib" 433: bib = dom.xpath("//bib/*") 434: if bib and len(bib)>0: 435: bibinfo = {} 436: for e in bib: 437: bibinfo[e.localName] = getTextFromNode(e) 438: docinfo['bib'] = bibinfo 439: 440: # extract some fields (author, title, year) according to their mapping 441: metaData=self.metadata.main.meta.bib 442: bibtype=dom.xpath("//bib/@type") 443: if bibtype and (len(bibtype)>0): 444: bibtype=bibtype[0].value 445: else: 446: bibtype="generic" 447: 448: bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) 449: docinfo['bib_type'] = bibtype 450: bibmap=metaData.generateMappingForType(bibtype) 451: # if there is no mapping bibmap is empty (mapping sometimes has empty fields) 452: if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: 453: try: 454: docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) 455: except: pass 456: try: 457: docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) 458: except: pass 459: try: 460: docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) 461: except: pass 462: logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) 463: try: 464: docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) 465: except: 466: docinfo['lang']='' 467: 468: return docinfo 469: 470: 471: def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): 472: """parse texttool tag in index meta""" 473: logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url)) 474: if docinfo is None: 475: docinfo = {} 476: if docinfo.get('lang', None) is None: 477: docinfo['lang'] = '' # default keine Sprache gesetzt 478: if dom is None: 479: dom = self.getIndexMeta(url) 480: 481: archivePath = None 482: archiveName = None 483: 484: archiveNames = dom.xpath("//resource/name") 485: if archiveNames and (len(archiveNames) > 0): 486: archiveName = getTextFromNode(archiveNames[0]) 487: else: 488: logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url)) 489: 490: archivePaths = dom.xpath("//resource/archive-path") 491: if archivePaths and (len(archivePaths) > 0): 492: archivePath = getTextFromNode(archivePaths[0]) 493: # clean up archive path 494: if archivePath[0] != '/': 495: archivePath = '/' + archivePath 496: if archiveName and (not archivePath.endswith(archiveName)): 497: archivePath += "/" + archiveName 498: else: 499: # try to get archive-path from url 500: logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url)) 501: if (not url.startswith('http')): 502: archivePath = url.replace('index.meta', '') 503: 504: if archivePath is None: 505: # we balk without archive-path 506: raise IOError("Missing archive-path (for text-tool) in %s" % (url)) 507: 508: imageDirs = dom.xpath("//texttool/image") 509: if imageDirs and (len(imageDirs) > 0): 510: imageDir = getTextFromNode(imageDirs[0]) 511: 512: else: 513: # we balk with no image tag / not necessary anymore because textmode is now standard 514: #raise IOError("No text-tool info in %s"%(url)) 515: imageDir = "" 516: #xquery="//pb" 517: docinfo['imagePath'] = "" # keine Bilder 518: docinfo['imageURL'] = "" 519: 520: if imageDir and archivePath: 521: #print "image: ", imageDir, " archivepath: ", archivePath 522: imageDir = os.path.join(archivePath, imageDir) 523: imageDir = imageDir.replace("/mpiwg/online", '') 524: docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) 525: docinfo['imagePath'] = imageDir 526: 527: docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir 528: 529: viewerUrls = dom.xpath("//texttool/digiliburlprefix") 530: if viewerUrls and (len(viewerUrls) > 0): 531: viewerUrl = getTextFromNode(viewerUrls[0]) 532: docinfo['viewerURL'] = viewerUrl 533: 534: textUrls = dom.xpath("//texttool/text") 535: if textUrls and (len(textUrls) > 0): 536: textUrl = getTextFromNode(textUrls[0]) 537: if urlparse.urlparse(textUrl)[0] == "": #keine url 538: textUrl = os.path.join(archivePath, textUrl) 539: # fix URLs starting with /mpiwg/online 540: if textUrl.startswith("/mpiwg/online"): 541: textUrl = textUrl.replace("/mpiwg/online", '', 1) 542: 543: docinfo['textURL'] = textUrl 544: 545: textUrls = dom.xpath("//texttool/text-url-path") 546: if textUrls and (len(textUrls) > 0): 547: textUrl = getTextFromNode(textUrls[0]) 548: docinfo['textURLPath'] = textUrl 549: 550: presentationUrls = dom.xpath("//texttool/presentation") 551: docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag 552: 553: if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 554: # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten 555: # durch den relativen Pfad auf die presentation infos 556: presentationPath = getTextFromNode(presentationUrls[0]) 557: if url.endswith("index.meta"): 558: presentationUrl = url.replace('index.meta', presentationPath) 559: else: 560: presentationUrl = url + "/" + presentationPath 561: docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht 562: docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) 563: 564: docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info 565: 566: return docinfo 567: 568: 569: def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): 570: """gets the bibliographical information from the preseantion entry in texttools 571: """ 572: dom=self.getPresentationInfoXML(url) 573: try: 574: docinfo['author']=getTextFromNode(dom.xpath("//author")[0]) 575: except: 576: pass 577: try: 578: docinfo['title']=getTextFromNode(dom.xpath("//title")[0]) 579: except: 580: pass 581: try: 582: docinfo['year']=getTextFromNode(dom.xpath("//date")[0]) 583: except: 584: pass 585: return docinfo 586: 587: def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): 588: """path ist the path to the images it assumes that the index.meta file is one level higher.""" 589: logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path)) 590: if docinfo is None: 591: docinfo = {} 592: path=path.replace("/mpiwg/online","") 593: docinfo['imagePath'] = path 594: docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) 595: 596: pathorig=path 597: for x in range(cut): 598: path=getParentDir(path) 599: logging.error("PATH:"+path) 600: imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path 601: docinfo['imageURL'] = imageUrl 602: 603: #path ist the path to the images it assumes that the index.meta file is one level higher. 604: docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 605: docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 606: return docinfo 607: 608: 609: def getDocinfo(self, mode, url): 610: """returns docinfo depending on mode""" 611: logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url)) 612: # look for cached docinfo in session 613: if self.REQUEST.SESSION.has_key('docinfo'): 614: docinfo = self.REQUEST.SESSION['docinfo'] 615: # check if its still current 616: if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: 617: logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo) 618: return docinfo 619: # new docinfo 620: docinfo = {'mode': mode, 'url': url} 621: if mode=="texttool": #index.meta with texttool information 622: docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) 623: elif mode=="imagepath": 624: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) 625: elif mode=="filepath": 626: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) 627: else: 628: logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!") 629: raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 630: 631: logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo) 632: self.REQUEST.SESSION['docinfo'] = docinfo 633: return docinfo 634: 635: 636: def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): 637: """returns pageinfo with the given parameters""" 638: pageinfo = {} 639: current = getInt(current) 640: pageinfo['current'] = current 641: rows = int(rows or self.thumbrows) 642: pageinfo['rows'] = rows 643: cols = int(cols or self.thumbcols) 644: pageinfo['cols'] = cols 645: grpsize = cols * rows 646: pageinfo['groupsize'] = grpsize 647: start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) 648: # int(current / grpsize) * grpsize +1)) 649: pageinfo['start'] = start 650: pageinfo['end'] = start + grpsize 651: if (docinfo is not None) and ('numPages' in docinfo): 652: np = int(docinfo['numPages']) 653: pageinfo['end'] = min(pageinfo['end'], np) 654: pageinfo['numgroups'] = int(np / grpsize) 655: if np % grpsize > 0: 656: pageinfo['numgroups'] += 1 657: 658: 659: pageinfo['viewMode'] = viewMode 660: pageinfo['tocMode'] = tocMode 661: pageinfo['query'] = self.REQUEST.get('query',' ') 662: pageinfo['queryType'] = self.REQUEST.get('queryType',' ') 663: pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') 664: 665: pageinfo['textPN'] = self.REQUEST.get('textPN','1') 666: 667: pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') 668: pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '20') 669: pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') 670: toc = int (pageinfo['tocPN']) 671: pageinfo['textPages'] =int (toc) 672: 673: if 'tocSize_%s'%tocMode in docinfo: 674: tocSize = int(docinfo['tocSize_%s'%tocMode]) 675: tocPageSize = int(pageinfo['tocPageSize']) 676: # cached toc 677: 678: if tocSize%tocPageSize>0: 679: tocPages=tocSize/tocPageSize+1 680: else: 681: tocPages=tocSize/tocPageSize 682: pageinfo['tocPN'] = min (tocPages,toc) 683: 684: pageinfo['searchPN'] =self.REQUEST.get('searchPN','1') 685: pageinfo['sn'] =self.REQUEST.get('sn','1') 686: 687: return pageinfo 688: 689: def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None): 690: """get search list""" 691: docpath = docinfo['textURLPath'] 692: pagesize = pageinfo['queryPageSize'] 693: pn = pageinfo['searchPN'] 694: sn = pageinfo['sn'] 695: query =pageinfo['query'] 696: queryType =pageinfo['queryType'] 697: viewMode= pageinfo['viewMode'] 698: tocMode = pageinfo['tocMode'] 699: tocPN = pageinfo['tocPN'] 700: selfurl = self.absolute_url() 701: page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode) ,outputUnicode=False) 702: pagexml = page.replace('?document=/echo/la/Benedetti_1585.xml','?url=/mpiwg/online/permanent/library/163127KK') 703: #hrefNode.nodeValue =pagexml.replace('mode=text','%s&mode=texttool'%selfurl) 704: pagedom = Parse(pagexml) 705: #logging.debug("documentViewer (gettoc) pagedom: %s"%(pagedom)) 706: if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): 707: pagedivs = pagedom.xpath("//div[@class='queryResultPage']") 708: if len(pagedivs)>0: 709: pagenode=pagedivs[0] 710: links=pagenode.xpath("//a") 711: for l in links: 712: hrefNode = l.getAttributeNodeNS(None, u"href") 713: if hrefNode: 714: href = hrefNode.nodeValue 715: if href.startswith('page-fragment.xql'): 716: selfurl = self.absolute_url() 717: #l.setAttributeNS(None, "span class = 'hit highlight'", "background-color: #77DD77;") 718: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) 719: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) 720: l.setAttributeNS(None, "span class = 'hit'", "background-color: #77DD77;") 721: return serializeNode(pagenode) 722: 723: if (queryType=="fulltextMorph"): 724: pagedivs = pagedom.xpath("//div[@class='queryResult']") 725: 726: if len(pagedivs)>0: 727: pagenode=pagedivs[0] 728: links=pagenode.xpath("//a") 729: for l in links: 730: hrefNode = l.getAttributeNodeNS(None, u"href") 731: if hrefNode: 732: href = hrefNode.nodeValue 733: if href.startswith('page-fragment.xql'): 734: selfurl = self.absolute_url() 735: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) 736: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) 737: if href.startswith('../lt/lemma.xql'): 738: selfurl = self.absolute_url() 739: hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) 740: l.setAttributeNS(None, 'target', '_blank') 741: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 742: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 743: return serializeNode(pagenode) 744: 745: if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): 746: pagedivs= pagedom.xpath("//div[@class='queryResultPage']") 747: if len(pagedivs)>0: 748: pagenode=pagedivs[0] 749: links=pagenode.xpath("//a") 750: for l in links: 751: hrefNode = l.getAttributeNodeNS(None, u"href") 752: if hrefNode: 753: href = hrefNode.nodeValue 754: hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn)) 755: 756: if href.startswith('../lt/lex.xql'): 757: selfurl = self.absolute_url() 758: hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl) 759: l.setAttributeNS(None, 'target', '_blank') 760: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 761: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 762: if href.startswith('../lt/lemma.xql'): 763: selfurl = self.absolute_url() 764: hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) 765: l.setAttributeNS(None, 'target', '_blank') 766: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 767: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 768: return serializeNode(pagenode) 769: return "xexe" 770: 771: def getNumPages(self,docinfo=None): 772: """get list of pages from fulltext and put in docinfo""" 773: xquery = '//pb' 774: text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) 775: # TODO: better processing of the page list. do we need the info somewhere else also? 776: docinfo['numPages'] = text.count("<pb ") 777: return docinfo 778: 779: def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None,): 780: """returns single page from fulltext""" 781: docpath = docinfo['textURLPath'] 782: if mode == "text_dict": 783: textmode = "textPollux" 784: else: 785: textmode = mode 786: 787: #selfurl = self.absolute_url() 788: #viewMode= pageinfo['viewMode'] 789: #tocMode = pageinfo['tocMode'] 790: #tocPN = pageinfo['tocPN'] 791: 792: pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn), outputUnicode=False) 793: # post-processing downloaded xml 794: pagedom = Parse(pagexml) 795: # plain text mode 796: if mode == "text": 797: # first div contains text 798: pagedivs = pagedom.xpath("/div") 799: #queryResultPage 800: if len(pagedivs) > 0: 801: pagenode = pagedivs[0] 802: return serializeNode(pagenode) 803: if mode == "xml": 804: # first div contains text 805: pagedivs = pagedom.xpath("/div") 806: if len(pagedivs) > 0: 807: pagenode = pagedivs[0] 808: return serializeNode(pagenode) 809: # text-with-links mode 810: if mode == "text_dict": 811: # first div contains text 812: pagedivs = pagedom.xpath("/div") 813: if len(pagedivs) > 0: 814: pagenode = pagedivs[0] 815: # check all a-tags 816: links = pagenode.xpath("//a") 817: for l in links: 818: hrefNode = l.getAttributeNodeNS(None, u"href") 819: if hrefNode: 820: # is link with href 821: href = hrefNode.nodeValue 822: if href.startswith('lt/lex.xql'): 823: # is pollux link 824: selfurl = self.absolute_url() 825: # change href 826: hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl) 827: # add target 828: l.setAttributeNS(None, 'target', '_blank') 829: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") 830: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 831: 832: if href.startswith('lt/lemma.xql'): 833: selfurl = self.absolute_url() 834: hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) 835: l.setAttributeNS(None, 'target', '_blank') 836: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") 837: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 838: return serializeNode(pagenode) 839: 840: return "no text here" 841: 842: def getTranslate(self, query=None, language=None): 843: """translate into another languages""" 844: pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","query=%s&language=%s"%(query,language),outputUnicode=False) 845: return pagexml 846: 847: def getLemma(self, lemma=None, language=None): 848: """simular words lemma """ 849: pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","lemma=%s&language=%s"%(lemma,language),outputUnicode=False) 850: return pagexml 851: 852: def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): 853: """number of""" 854: docpath = docinfo['textURLPath'] 855: pagesize = pageinfo['queryPageSize'] 856: pn = pageinfo['searchPN'] 857: query =pageinfo['query'] 858: queryType =pageinfo['queryType'] 859: 860: tocSearch = 0 861: tocDiv = None 862: pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) 863: 864: pagedom = Parse(pagexml) 865: numdivs = pagedom.xpath("//div[@class='queryResultHits']") 866: tocSearch = int(getTextFromNode(numdivs[0])) 867: tc=int((tocSearch/20)+1) 868: logging.debug("documentViewer (gettoc) tc: %s"%(tc)) 869: return tc 870: 871: def getToc(self, mode="text", docinfo=None): 872: """loads table of contents and stores in docinfo""" 873: logging.debug("documentViewer (gettoc) mode: %s"%(mode)) 874: if 'tocSize_%s'%mode in docinfo: 875: # cached toc 876: return docinfo 877: docpath = docinfo['textURLPath'] 878: # we need to set a result set size 879: pagesize = 1000 880: pn = 1 881: if mode == "text": 882: queryType = "toc" 883: else: 884: queryType = mode 885: # number of entries in toc 886: tocSize = 0 887: tocDiv = None 888: pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) 889: # post-processing downloaded xml 890: pagedom = Parse(pagexml) 891: # get number of entries 892: numdivs = pagedom.xpath("//div[@class='queryResultHits']") 893: if len(numdivs) > 0: 894: tocSize = int(getTextFromNode(numdivs[0])) 895: # div contains text 896: #pagedivs = pagedom.xpath("//div[@class='queryResultPage']") 897: #if len(pagedivs) > 0: 898: # tocDiv = pagedivs[0] 899: 900: docinfo['tocSize_%s'%mode] = tocSize 901: #docinfo['tocDiv_%s'%mode] = tocDiv 902: return docinfo 903: 904: def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None): 905: """returns single page from the table of contents""" 906: # TODO: this should use the cached TOC 907: if mode == "text": 908: queryType = "toc" 909: else: 910: queryType = mode 911: docpath = docinfo['textURLPath'] 912: pagesize = pageinfo['tocPageSize'] 913: pn = pageinfo['tocPN'] 914: 915: selfurl = self.absolute_url() 916: viewMode= pageinfo['viewMode'] 917: tocMode = pageinfo['tocMode'] 918: tocPN = pageinfo['tocPN'] 919: 920: pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn), outputUnicode=False) 921: page = pagexml.replace('page-fragment.xql?document=/echo/la/Benedetti_1585.xml','%s?url=/mpiwg/online/permanent/library/163127KK&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl, viewMode, tocMode, tocPN)) 922: text = page.replace('mode=image','mode=texttool') 923: return text 924: # post-processing downloaded xml 925: #pagedom = Parse(text) 926: # div contains text 927: #pagedivs = pagedom.xpath("//div[@class='queryResultPage']") 928: #if len(pagedivs) > 0: 929: # pagenode = pagedivs[0] 930: # return serializeNode(pagenode) 931: #else: 932: # return "No TOC!" 933: 934: 935: def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): 936: """init document viewer""" 937: self.title=title 938: self.digilibBaseUrl = digilibBaseUrl 939: self.thumbrows = thumbrows 940: self.thumbcols = thumbcols 941: self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 942: if RESPONSE is not None: 943: RESPONSE.redirect('manage_main') 944: 945: 946: 947: def manage_AddDocumentViewerForm(self): 948: """add the viewer form""" 949: pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) 950: return pt() 951: 952: def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None): 953: """add the viewer""" 954: newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName) 955: self._setObject(id,newObj) 956: 957: if RESPONSE is not None: 958: RESPONSE.redirect('manage_main') 959: 960: 961: ## 962: ## DocumentViewerTemplate class 963: ## 964: class DocumentViewerTemplate(ZopePageTemplate): 965: """Template for document viewer""" 966: meta_type="DocumentViewer Template" 967: 968: 969: def manage_addDocumentViewerTemplateForm(self): 970: """Form for adding""" 971: pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) 972: return pt() 973: 974: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, 975: REQUEST=None, submit=None): 976: "Add a Page Template with optional file content." 977: 978: self._setObject(id, DocumentViewerTemplate(id)) 979: ob = getattr(self, id) 980: txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read() 981: logging.info("txt %s:"%txt) 982: ob.pt_edit(txt,"text/html") 983: if title: 984: ob.pt_setTitle(title) 985: try: 986: u = self.DestinationURL() 987: except AttributeError: 988: u = REQUEST['URL1'] 989: 990: u = "%s/%s" % (u, urllib.quote(id)) 991: REQUEST.RESPONSE.redirect(u+'/manage_main') 992: return '' 993: 994: 995: