Annotation of documentViewer/documentViewer_old.py, revision 1.1.2.1
1.1.2.1 ! casties 1:
! 2: from OFS.Folder import Folder
! 3: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
! 4: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
! 5: from AccessControl import ClassSecurityInfo
! 6: from AccessControl import getSecurityManager
! 7: from Globals import package_home
! 8: from Products.zogiLib.zogiLib import browserCheck
! 9:
! 10: from Ft.Xml import EMPTY_NAMESPACE, Parse
! 11: import Ft.Xml.Domlette
! 12:
! 13: import xml.etree.ElementTree as ET
! 14:
! 15: import os.path
! 16: import sys
! 17: import urllib
! 18: import urllib2
! 19: import logging
! 20: import math
! 21: import urlparse
! 22: import cStringIO
! 23: import re
! 24: import string
! 25:
! 26: def logger(txt,method,txt2):
! 27: """logging"""
! 28: logging.info(txt+ txt2)
! 29:
! 30:
! 31: def getInt(number, default=0):
! 32: """returns always an int (0 in case of problems)"""
! 33: try:
! 34: return int(number)
! 35: except:
! 36: return int(default)
! 37:
! 38: def getTextFromNode(node):
! 39: """get the cdata content of a node"""
! 40: if node is None:
! 41: return ""
! 42: # ET:
! 43: text = node.text or ""
! 44: for e in node:
! 45: text += gettext(e)
! 46: if e.tail:
! 47: text += e.tail
! 48:
! 49: # 4Suite:
! 50: #nodelist=node.childNodes
! 51: #text = ""
! 52: #for n in nodelist:
! 53: # if n.nodeType == node.TEXT_NODE:
! 54: # text = text + n.data
! 55:
! 56: return text
! 57:
! 58: def serializeNode(node, encoding="utf-8"):
! 59: """returns a string containing node as XML"""
! 60: s = ET.tostring(node)
! 61:
! 62: # 4Suite:
! 63: # stream = cStringIO.StringIO()
! 64: # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
! 65: # s = stream.getvalue()
! 66: # stream.close()
! 67: return s
! 68:
! 69: def browserCheck(self):
! 70: """check the browsers request to find out the browser type"""
! 71: bt = {}
! 72: ua = self.REQUEST.get_header("HTTP_USER_AGENT")
! 73: bt['ua'] = ua
! 74: bt['isIE'] = False
! 75: bt['isN4'] = False
! 76: bt['versFirefox']=""
! 77: bt['versIE']=""
! 78: bt['versSafariChrome']=""
! 79: bt['versOpera']=""
! 80:
! 81: if string.find(ua, 'MSIE') > -1:
! 82: bt['isIE'] = True
! 83: else:
! 84: bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
! 85: # Safari oder Chrome identification
! 86: try:
! 87: nav = ua[string.find(ua, '('):]
! 88: nav1=ua[string.find(ua,')'):]
! 89: nav2=nav1[string.find(nav1,'('):]
! 90: nav3=nav2[string.find(nav2,')'):]
! 91: ie = string.split(nav, "; ")[1]
! 92: ie1 =string.split(nav1, " ")[2]
! 93: ie2 =string.split(nav3, " ")[1]
! 94: ie3 =string.split(nav3, " ")[2]
! 95: if string.find(ie3, "Safari") >-1:
! 96: bt['versSafariChrome']=string.split(ie2, "/")[1]
! 97: except: pass
! 98: # IE identification
! 99: try:
! 100: nav = ua[string.find(ua, '('):]
! 101: ie = string.split(nav, "; ")[1]
! 102: if string.find(ie, "MSIE") > -1:
! 103: bt['versIE'] = string.split(ie, " ")[1]
! 104: except:pass
! 105: # Firefox identification
! 106: try:
! 107: nav = ua[string.find(ua, '('):]
! 108: nav1=ua[string.find(ua,')'):]
! 109: if string.find(ie1, "Firefox") >-1:
! 110: nav5= string.split(ie1, "/")[1]
! 111: logging.debug("FIREFOX: %s"%(nav5))
! 112: bt['versFirefox']=nav5[0:3]
! 113: except:pass
! 114: #Opera identification
! 115: try:
! 116: if string.find(ua,"Opera") >-1:
! 117: nav = ua[string.find(ua, '('):]
! 118: nav1=nav[string.find(nav,')'):]
! 119: bt['versOpera']=string.split(nav1,"/")[2]
! 120: except:pass
! 121:
! 122: bt['isMac'] = string.find(ua, 'Macintosh') > -1
! 123: bt['isWin'] = string.find(ua, 'Windows') > -1
! 124: bt['isIEWin'] = bt['isIE'] and bt['isWin']
! 125: bt['isIEMac'] = bt['isIE'] and bt['isMac']
! 126: bt['staticHTML'] = False
! 127:
! 128: return bt
! 129:
! 130:
! 131: def getParentDir(path):
! 132: """returns pathname shortened by one"""
! 133: return '/'.join(path.split('/')[0:-1])
! 134:
! 135:
! 136: def getHttpData(url, data=None, num_tries=3, timeout=10):
! 137: """returns result from url+data HTTP request"""
! 138: # we do GET (by appending data to url)
! 139: if isinstance(data, str) or isinstance(data, unicode):
! 140: # if data is string then append
! 141: url = "%s?%s"%(url,data)
! 142: elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
! 143: # urlencode
! 144: url = "%s?%s"%(url,urllib.urlencode(data))
! 145:
! 146: response = None
! 147: errmsg = None
! 148: for cnt in range(num_tries):
! 149: try:
! 150: logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
! 151: if sys.version_info < (2, 6):
! 152: # set timeout on socket -- ugly :-(
! 153: import socket
! 154: socket.setdefaulttimeout(float(timeout))
! 155: response = urllib2.urlopen(url)
! 156: else:
! 157: response = urllib2.urlopen(url,timeout=float(timeout))
! 158: # check result?
! 159: break
! 160: except urllib2.HTTPError, e:
! 161: logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
! 162: errmsg = str(e)
! 163: # stop trying
! 164: break
! 165: except urllib2.URLError, e:
! 166: logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
! 167: errmsg = str(e)
! 168: # stop trying
! 169: #break
! 170:
! 171: if response is not None:
! 172: data = response.read()
! 173: response.close()
! 174: return data
! 175:
! 176: raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
! 177: #return None
! 178:
! 179: ##
! 180: ## documentViewer class
! 181: ##
! 182: class documentViewer(Folder):
! 183: """document viewer"""
! 184: meta_type="Document viewer"
! 185:
! 186: security=ClassSecurityInfo()
! 187: manage_options=Folder.manage_options+(
! 188: {'label':'main config','action':'changeDocumentViewerForm'},
! 189: )
! 190:
! 191: # templates and forms
! 192: viewer_main = PageTemplateFile('zpt/viewer_main', globals())
! 193: toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
! 194: toc_text = PageTemplateFile('zpt/toc_text', globals())
! 195: toc_figures = PageTemplateFile('zpt/toc_figures', globals())
! 196: page_main_images = PageTemplateFile('zpt/page_main_images', globals())
! 197: page_main_double = PageTemplateFile('zpt/page_main_double', globals())
! 198: page_main_text = PageTemplateFile('zpt/page_main_text', globals())
! 199: page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
! 200: page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
! 201: page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
! 202: page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals())
! 203: head_main = PageTemplateFile('zpt/head_main', globals())
! 204: docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
! 205: info_xml = PageTemplateFile('zpt/info_xml', globals())
! 206:
! 207:
! 208: thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
! 209: security.declareProtected('View management screens','changeDocumentViewerForm')
! 210: changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
! 211:
! 212:
! 213: def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
! 214: """init document viewer"""
! 215: self.id=id
! 216: self.title=title
! 217: self.thumbcols = thumbcols
! 218: self.thumbrows = thumbrows
! 219: # authgroups is list of authorized groups (delimited by ,)
! 220: self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
! 221: # create template folder so we can always use template.something
! 222:
! 223: templateFolder = Folder('template')
! 224: #self['template'] = templateFolder # Zope-2.12 style
! 225: self._setObject('template',templateFolder) # old style
! 226: try:
! 227: import MpdlXmlTextServer
! 228: textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
! 229: #templateFolder['fulltextclient'] = xmlRpcClient
! 230: templateFolder._setObject('fulltextclient',textServer)
! 231: except Exception, e:
! 232: logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
! 233: try:
! 234: from Products.zogiLib.zogiLib import zogiLib
! 235: zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
! 236: #templateFolder['zogilib'] = zogilib
! 237: templateFolder._setObject('zogilib',zogilib)
! 238: except Exception, e:
! 239: logging.error("Unable to create zogiLib for zogilib: "+str(e))
! 240:
! 241:
! 242: # proxy text server methods to fulltextclient
! 243: def getTextPage(self, **args):
! 244: """get page"""
! 245: return self.template.fulltextclient.getTextPage(**args)
! 246:
! 247: def getOrigPages(self, **args):
! 248: """get page"""
! 249: return self.template.fulltextclient.getOrigPages(**args)
! 250:
! 251: def getOrigPagesNorm(self, **args):
! 252: """get page"""
! 253: return self.template.fulltextclient.getOrigPagesNorm(**args)
! 254:
! 255: def getQuery(self, **args):
! 256: """get query in search"""
! 257: return self.template.fulltextclient.getQuery(**args)
! 258:
! 259: def getSearch(self, **args):
! 260: """get search"""
! 261: return self.template.fulltextclient.getSearch(**args)
! 262:
! 263: def getGisPlaces(self, **args):
! 264: """get gis places"""
! 265: return self.template.fulltextclient.getGisPlaces(**args)
! 266:
! 267: def getAllGisPlaces(self, **args):
! 268: """get all gis places """
! 269: return self.template.fulltextclient.getAllGisPlaces(**args)
! 270:
! 271: def getTranslate(self, **args):
! 272: """get translate"""
! 273: return self.template.fulltextclient.getTranslate(**args)
! 274:
! 275: def getLemma(self, **args):
! 276: """get lemma"""
! 277: return self.template.fulltextclient.getLemma(**args)
! 278:
! 279: def getLemmaQuery(self, **args):
! 280: """get query"""
! 281: return self.template.fulltextclient.getLemmaQuery(**args)
! 282:
! 283: def getLex(self, **args):
! 284: """get lex"""
! 285: return self.template.fulltextclient.getLex(**args)
! 286:
! 287: def getToc(self, **args):
! 288: """get toc"""
! 289: return self.template.fulltextclient.getToc(**args)
! 290:
! 291: def getTocPage(self, **args):
! 292: """get tocpage"""
! 293: return self.template.fulltextclient.getTocPage(**args)
! 294:
! 295:
! 296: security.declareProtected('View','thumbs_rss')
! 297: def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
! 298: '''
! 299: view it
! 300: @param mode: defines how to access the document behind url
! 301: @param url: url which contains display information
! 302: @param viewMode: if images display images, if text display text, default is images (text,images or auto)
! 303:
! 304: '''
! 305: logging.debug("HHHHHHHHHHHHHH:load the rss")
! 306: logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
! 307:
! 308: if not hasattr(self, 'template'):
! 309: # create template folder if it doesn't exist
! 310: self.manage_addFolder('template')
! 311:
! 312: if not self.digilibBaseUrl:
! 313: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
! 314:
! 315: docinfo = self.getDocinfo(mode=mode,url=url)
! 316: #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
! 317: pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo)
! 318: ''' ZDES '''
! 319: pt = getattr(self.template, 'thumbs_main_rss')
! 320:
! 321: if viewMode=="auto": # automodus gewaehlt
! 322: if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
! 323: viewMode="text"
! 324: else:
! 325: viewMode="images"
! 326:
! 327: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
! 328:
! 329: security.declareProtected('View','index_html')
! 330: def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
! 331: '''
! 332: view it
! 333: @param mode: defines how to access the document behind url
! 334: @param url: url which contains display information
! 335: @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
! 336: @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
! 337: @param characterNormalization type of text display (reg, norm, none)
! 338: @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
! 339: '''
! 340:
! 341: logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
! 342:
! 343: if not hasattr(self, 'template'):
! 344: # this won't work
! 345: logging.error("template folder missing!")
! 346: return "ERROR: template folder missing!"
! 347:
! 348: if not getattr(self, 'digilibBaseUrl', None):
! 349: self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
! 350:
! 351: docinfo = self.getDocinfo(mode=mode,url=url)
! 352:
! 353: if tocMode != "thumbs":
! 354: # get table of contents
! 355: docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
! 356:
! 357: if viewMode=="auto": # automodus gewaehlt
! 358: if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
! 359: viewMode="text_dict"
! 360: else:
! 361: viewMode="images"
! 362:
! 363: pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
! 364:
! 365: if (docinfo.get('textURLPath',None)):
! 366: page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)
! 367: pageinfo['textPage'] = page
! 368: tt = getattr(self, 'template')
! 369: pt = getattr(tt, 'viewer_main')
! 370: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
! 371:
! 372: def generateMarks(self,mk):
! 373: ret=""
! 374: if mk is None:
! 375: return ""
! 376: if not isinstance(mk, list):
! 377: mk=[mk]
! 378: for m in mk:
! 379: ret+="mk=%s"%m
! 380: return ret
! 381:
! 382:
! 383: def getBrowser(self):
! 384: """getBrowser the version of browser """
! 385: bt = browserCheck(self)
! 386: logging.debug("BROWSER VERSION: %s"%(bt))
! 387: return bt
! 388:
! 389: def findDigilibUrl(self):
! 390: """try to get the digilib URL from zogilib"""
! 391: url = self.template.zogilib.getDLBaseUrl()
! 392: return url
! 393:
! 394: def getDocumentViewerURL(self):
! 395: """returns the URL of this instance"""
! 396: return self.absolute_url()
! 397:
! 398: def getStyle(self, idx, selected, style=""):
! 399: """returns a string with the given style and append 'sel' if path == selected."""
! 400: #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
! 401: if idx == selected:
! 402: return style + 'sel'
! 403: else:
! 404: return style
! 405:
! 406: def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'):
! 407: """returns URL to documentviewer with parameter param set to val or from dict params"""
! 408: # copy existing request params
! 409: urlParams=self.REQUEST.form.copy()
! 410: # change single param
! 411: if param is not None:
! 412: if val is None:
! 413: if urlParams.has_key(param):
! 414: del urlParams[param]
! 415: else:
! 416: urlParams[param] = str(val)
! 417:
! 418: # change more params
! 419: if params is not None:
! 420: for k in params.keys():
! 421: v = params[k]
! 422: if v is None:
! 423: # val=None removes param
! 424: if urlParams.has_key(k):
! 425: del urlParams[k]
! 426:
! 427: else:
! 428: urlParams[k] = v
! 429:
! 430: # FIXME: does this belong here?
! 431: if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
! 432: urlParams["mode"] = "imagepath"
! 433: urlParams["url"] = getParentDir(urlParams["url"])
! 434:
! 435: # quote values and assemble into query string (not escaping '/')
! 436: ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
! 437: #ps = urllib.urlencode(urlParams)
! 438: if baseUrl is None:
! 439: baseUrl = self.REQUEST['URL1']
! 440:
! 441: url = "%s?%s"%(baseUrl, ps)
! 442: return url
! 443:
! 444:
! 445: def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None):
! 446: """link to documentviewer with parameter param set to val"""
! 447: return self.getLink(param, val, params, baseUrl, '&')
! 448:
! 449: def getInfo_xml(self,url,mode):
! 450: """returns info about the document as XML"""
! 451:
! 452: if not self.digilibBaseUrl:
! 453: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
! 454:
! 455: docinfo = self.getDocinfo(mode=mode,url=url)
! 456: pt = getattr(self.template, 'info_xml')
! 457: return pt(docinfo=docinfo)
! 458:
! 459: def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):
! 460: """returns new option state"""
! 461: if not self.REQUEST.SESSION.has_key(optionName):
! 462: # not in session -- initial
! 463: opt = {'lastState': newState, 'state': initialState}
! 464: else:
! 465: opt = self.REQUEST.SESSION.get(optionName)
! 466: if opt['lastState'] != newState:
! 467: # state in session has changed -- toggle
! 468: opt['state'] = not opt['state']
! 469: opt['lastState'] = newState
! 470:
! 471: self.REQUEST.SESSION[optionName] = opt
! 472: return opt['state']
! 473:
! 474: def isAccessible(self, docinfo):
! 475: """returns if access to the resource is granted"""
! 476: access = docinfo.get('accessType', None)
! 477: logging.debug("documentViewer (accessOK) access type %s"%access)
! 478: if access is not None and access == 'free':
! 479: logging.debug("documentViewer (accessOK) access is free")
! 480: return True
! 481: elif access is None or access in self.authgroups:
! 482: # only local access -- only logged in users
! 483: user = getSecurityManager().getUser()
! 484: logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
! 485: if user is not None:
! 486: #print "user: ", user
! 487: return (user.getUserName() != "Anonymous User")
! 488: else:
! 489: return False
! 490:
! 491: logging.error("documentViewer (accessOK) unknown access type %s"%access)
! 492: return False
! 493:
! 494:
! 495: def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
! 496: """gibt param von dlInfo aus"""
! 497: if docinfo is None:
! 498: docinfo = {}
! 499:
! 500: for x in range(cut):
! 501:
! 502: path=getParentDir(path)
! 503:
! 504: infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
! 505:
! 506: logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
! 507:
! 508: txt = getHttpData(infoUrl)
! 509: if txt is None:
! 510: raise IOError("Unable to get dir-info from %s"%(infoUrl))
! 511:
! 512: dom = ET.fromstring(txt).getroot()
! 513: #dom = Parse(txt)
! 514: sizes=dom.find("//dir/size")
! 515: #sizes=dom.xpath("//dir/size")
! 516: logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
! 517:
! 518: if sizes:
! 519: docinfo['numPages'] = int(getTextFromNode(sizes[0]))
! 520: else:
! 521: docinfo['numPages'] = 0
! 522:
! 523: # TODO: produce and keep list of image names and numbers
! 524:
! 525: return docinfo
! 526:
! 527: def getIndexMetaPath(self,url):
! 528: """gib nur den Pfad zurueck"""
! 529: regexp = re.compile(r".*(experimental|permanent)/(.*)")
! 530: regpath = regexp.match(url)
! 531: if (regpath==None):
! 532: return ""
! 533: logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))
! 534: return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
! 535:
! 536:
! 537:
! 538: def getIndexMetaUrl(self,url):
! 539: """returns utr of index.meta document at url"""
! 540:
! 541: metaUrl = None
! 542: if url.startswith("http://"):
! 543: # real URL
! 544: metaUrl = url
! 545: else:
! 546: # online path
! 547: server=self.digilibBaseUrl+"/servlet/Texter?fn="
! 548: metaUrl=server+url.replace("/mpiwg/online","")
! 549: if not metaUrl.endswith("index.meta"):
! 550: metaUrl += "/index.meta"
! 551:
! 552: return metaUrl
! 553:
! 554: def getDomFromIndexMeta(self, url):
! 555: """get dom from index meta"""
! 556: dom = None
! 557: metaUrl = self.getIndexMetaUrl(url)
! 558:
! 559: logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
! 560: txt=getHttpData(metaUrl)
! 561: if txt is None:
! 562: raise IOError("Unable to read index meta from %s"%(url))
! 563:
! 564: dom = ET.fromstring(txt).getroot()
! 565: #dom = Parse(txt)
! 566: return dom
! 567:
! 568: def getPresentationInfoXML(self, url):
! 569: """returns dom of info.xml document at url"""
! 570: dom = None
! 571: metaUrl = None
! 572: if url.startswith("http://"):
! 573: # real URL
! 574: metaUrl = url
! 575: else:
! 576: # online path
! 577: server=self.digilibBaseUrl+"/servlet/Texter?fn="
! 578: metaUrl=server+url.replace("/mpiwg/online","")
! 579:
! 580: txt=getHttpData(metaUrl)
! 581: if txt is None:
! 582: raise IOError("Unable to read infoXMLfrom %s"%(url))
! 583:
! 584: dom = ET.fromstring(txt).getroot()
! 585: #dom = Parse(txt)
! 586: return dom
! 587:
! 588:
! 589: def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
! 590: """gets authorization info from the index.meta file at path or given by dom"""
! 591: logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
! 592:
! 593: access = None
! 594:
! 595: if docinfo is None:
! 596: docinfo = {}
! 597:
! 598: if dom is None:
! 599: for x in range(cut):
! 600: path=getParentDir(path)
! 601: dom = self.getDomFromIndexMeta(path)
! 602:
! 603: acctype = dom.find("//access-conditions/access/@type")
! 604: #acctype = dom.xpath("//access-conditions/access/@type")
! 605: if acctype and (len(acctype)>0):
! 606: access=acctype[0].value
! 607: if access in ['group', 'institution']:
! 608: access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
! 609:
! 610: docinfo['accessType'] = access
! 611: return docinfo
! 612:
! 613:
! 614: def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
! 615: """gets bibliographical info from the index.meta file at path or given by dom"""
! 616: logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
! 617:
! 618: if docinfo is None:
! 619: docinfo = {}
! 620:
! 621: if dom is None:
! 622: for x in range(cut):
! 623: path=getParentDir(path)
! 624: dom = self.getDomFromIndexMeta(path)
! 625:
! 626: docinfo['indexMetaPath']=self.getIndexMetaPath(path);
! 627:
! 628: logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
! 629: # put in all raw bib fields as dict "bib"
! 630: bib = dom.find("//bib/*")
! 631: #bib = dom.xpath("//bib/*")
! 632: if bib and len(bib)>0:
! 633: bibinfo = {}
! 634: for e in bib:
! 635: bibinfo[e.localName] = getTextFromNode(e)
! 636: docinfo['bib'] = bibinfo
! 637:
! 638: # extract some fields (author, title, year) according to their mapping
! 639: metaData=self.metadata.main.meta.bib
! 640: bibtype=dom.find("//bib/@type")
! 641: #bibtype=dom.xpath("//bib/@type")
! 642: if bibtype and (len(bibtype)>0):
! 643: bibtype=bibtype[0].value
! 644: else:
! 645: bibtype="generic"
! 646:
! 647: bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
! 648: docinfo['bib_type'] = bibtype
! 649: bibmap=metaData.generateMappingForType(bibtype)
! 650: logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
! 651: logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
! 652: # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
! 653: if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
! 654: try:
! 655: docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
! 656: except: pass
! 657: try:
! 658: docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
! 659: except: pass
! 660: try:
! 661: docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
! 662: except: pass
! 663: logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
! 664: try:
! 665: docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
! 666: except:
! 667: docinfo['lang']=''
! 668: try:
! 669: docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0])
! 670: except:
! 671: docinfo['city']=''
! 672: try:
! 673: docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0])
! 674: except:
! 675: docinfo['number_of_pages']=''
! 676: try:
! 677: docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0])
! 678: except:
! 679: docinfo['series_volume']=''
! 680: try:
! 681: docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0])
! 682: except:
! 683: docinfo['number_of_volumes']=''
! 684: try:
! 685: docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0])
! 686: except:
! 687: docinfo['translator']=''
! 688: try:
! 689: docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0])
! 690: except:
! 691: docinfo['edition']=''
! 692: try:
! 693: docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0])
! 694: except:
! 695: docinfo['series_author']=''
! 696: try:
! 697: docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0])
! 698: except:
! 699: docinfo['publisher']=''
! 700: try:
! 701: docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0])
! 702: except:
! 703: docinfo['series_title']=''
! 704: try:
! 705: docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0])
! 706: except:
! 707: docinfo['isbn_issn']=''
! 708: return docinfo
! 709:
! 710:
! 711: def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
! 712: """gets name info from the index.meta file at path or given by dom"""
! 713: if docinfo is None:
! 714: docinfo = {}
! 715:
! 716: if dom is None:
! 717: for x in range(cut):
! 718: path=getParentDir(path)
! 719: dom = self.getDomFromIndexMeta(path)
! 720:
! 721: docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])
! 722: logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
! 723: return docinfo
! 724:
! 725: def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
! 726: """parse texttool tag in index meta"""
! 727: logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
! 728: if docinfo is None:
! 729: docinfo = {}
! 730: if docinfo.get('lang', None) is None:
! 731: docinfo['lang'] = '' # default keine Sprache gesetzt
! 732: if dom is None:
! 733: dom = self.getDomFromIndexMeta(url)
! 734:
! 735: archivePath = None
! 736: archiveName = None
! 737:
! 738: archiveNames = dom.xpath("//resource/name")
! 739: if archiveNames and (len(archiveNames) > 0):
! 740: archiveName = getTextFromNode(archiveNames[0])
! 741: else:
! 742: logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
! 743:
! 744: archivePaths = dom.xpath("//resource/archive-path")
! 745: if archivePaths and (len(archivePaths) > 0):
! 746: archivePath = getTextFromNode(archivePaths[0])
! 747: # clean up archive path
! 748: if archivePath[0] != '/':
! 749: archivePath = '/' + archivePath
! 750: if archiveName and (not archivePath.endswith(archiveName)):
! 751: archivePath += "/" + archiveName
! 752: else:
! 753: # try to get archive-path from url
! 754: logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
! 755: if (not url.startswith('http')):
! 756: archivePath = url.replace('index.meta', '')
! 757:
! 758: if archivePath is None:
! 759: # we balk without archive-path
! 760: raise IOError("Missing archive-path (for text-tool) in %s" % (url))
! 761:
! 762: imageDirs = dom.xpath("//texttool/image")
! 763: if imageDirs and (len(imageDirs) > 0):
! 764: imageDir = getTextFromNode(imageDirs[0])
! 765:
! 766: else:
! 767: # we balk with no image tag / not necessary anymore because textmode is now standard
! 768: #raise IOError("No text-tool info in %s"%(url))
! 769: imageDir = ""
! 770: #xquery="//pb"
! 771: docinfo['imagePath'] = "" # keine Bilder
! 772: docinfo['imageURL'] = ""
! 773:
! 774: if imageDir and archivePath:
! 775: #print "image: ", imageDir, " archivepath: ", archivePath
! 776: imageDir = os.path.join(archivePath, imageDir)
! 777: imageDir = imageDir.replace("/mpiwg/online", '')
! 778: docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
! 779: docinfo['imagePath'] = imageDir
! 780:
! 781: docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
! 782:
! 783: viewerUrls = dom.xpath("//texttool/digiliburlprefix")
! 784: if viewerUrls and (len(viewerUrls) > 0):
! 785: viewerUrl = getTextFromNode(viewerUrls[0])
! 786: docinfo['viewerURL'] = viewerUrl
! 787:
! 788: # old style text URL
! 789: textUrls = dom.xpath("//texttool/text")
! 790: if textUrls and (len(textUrls) > 0):
! 791: textUrl = getTextFromNode(textUrls[0])
! 792: if urlparse.urlparse(textUrl)[0] == "": #keine url
! 793: textUrl = os.path.join(archivePath, textUrl)
! 794: # fix URLs starting with /mpiwg/online
! 795: if textUrl.startswith("/mpiwg/online"):
! 796: textUrl = textUrl.replace("/mpiwg/online", '', 1)
! 797:
! 798: docinfo['textURL'] = textUrl
! 799:
! 800: # new style text-url-path
! 801: textUrls = dom.xpath("//texttool/text-url-path")
! 802: if textUrls and (len(textUrls) > 0):
! 803: textUrl = getTextFromNode(textUrls[0])
! 804: docinfo['textURLPath'] = textUrl
! 805: textUrlkurz = string.split(textUrl, ".")[0]
! 806: docinfo['textURLPathkurz'] = textUrlkurz
! 807: #if not docinfo['imagePath']:
! 808: # text-only, no page images
! 809: #docinfo = self.getNumTextPages(docinfo)
! 810:
! 811:
! 812: presentationUrls = dom.xpath("//texttool/presentation")
! 813: docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
! 814: docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
! 815:
! 816:
! 817: if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
! 818: # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
! 819: # durch den relativen Pfad auf die presentation infos
! 820: presentationPath = getTextFromNode(presentationUrls[0])
! 821: if url.endswith("index.meta"):
! 822: presentationUrl = url.replace('index.meta', presentationPath)
! 823: else:
! 824: presentationUrl = url + "/" + presentationPath
! 825:
! 826: docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
! 827:
! 828: docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
! 829:
! 830: return docinfo
! 831:
! 832:
! 833: def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
! 834: """gets the bibliographical information from the preseantion entry in texttools
! 835: """
! 836: dom=self.getPresentationInfoXML(url)
! 837: try:
! 838: docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
! 839: except:
! 840: pass
! 841: try:
! 842: docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
! 843: except:
! 844: pass
! 845: try:
! 846: docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
! 847: except:
! 848: pass
! 849: return docinfo
! 850:
! 851: def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
! 852: """path ist the path to the images it assumes that the index.meta file is one level higher."""
! 853: logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
! 854: if docinfo is None:
! 855: docinfo = {}
! 856: path=path.replace("/mpiwg/online","")
! 857: docinfo['imagePath'] = path
! 858: docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
! 859:
! 860: pathorig=path
! 861: for x in range(cut):
! 862: path=getParentDir(path)
! 863: logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
! 864: imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
! 865: docinfo['imageURL'] = imageUrl
! 866:
! 867: #path ist the path to the images it assumes that the index.meta file is one level higher.
! 868: docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
! 869: docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
! 870: return docinfo
! 871:
! 872:
! 873: def getDocinfo(self, mode, url):
! 874: """returns docinfo depending on mode"""
! 875: logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
! 876: # look for cached docinfo in session
! 877: if self.REQUEST.SESSION.has_key('docinfo'):
! 878: docinfo = self.REQUEST.SESSION['docinfo']
! 879: # check if its still current
! 880: if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
! 881: logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
! 882: return docinfo
! 883: # new docinfo
! 884: docinfo = {'mode': mode, 'url': url}
! 885: if mode=="texttool": #index.meta with texttool information
! 886: docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
! 887: elif mode=="imagepath":
! 888: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
! 889: elif mode=="filepath":
! 890: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
! 891: else:
! 892: logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
! 893: raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
! 894:
! 895: # FIXME: fake texturlpath
! 896: if not docinfo.has_key('textURLPath'):
! 897: docinfo['textURLPath'] = None
! 898:
! 899: logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
! 900: #logging.debug("documentViewer (getdocinfo) docinfo: %s"%)
! 901: self.REQUEST.SESSION['docinfo'] = docinfo
! 902: return docinfo
! 903:
! 904: def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
! 905: """returns pageinfo with the given parameters"""
! 906: pageinfo = {}
! 907: current = getInt(current)
! 908:
! 909: pageinfo['current'] = current
! 910: rows = int(rows or self.thumbrows)
! 911: pageinfo['rows'] = rows
! 912: cols = int(cols or self.thumbcols)
! 913: pageinfo['cols'] = cols
! 914: grpsize = cols * rows
! 915: pageinfo['groupsize'] = grpsize
! 916: start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
! 917: # int(current / grpsize) * grpsize +1))
! 918: pageinfo['start'] = start
! 919: pageinfo['end'] = start + grpsize
! 920: if (docinfo is not None) and ('numPages' in docinfo):
! 921: np = int(docinfo['numPages'])
! 922: pageinfo['end'] = min(pageinfo['end'], np)
! 923: pageinfo['numgroups'] = int(np / grpsize)
! 924: if np % grpsize > 0:
! 925: pageinfo['numgroups'] += 1
! 926: pageinfo['viewMode'] = viewMode
! 927: pageinfo['tocMode'] = tocMode
! 928: pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
! 929: #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1')
! 930: pageinfo['query'] = self.REQUEST.get('query','')
! 931: pageinfo['queryType'] = self.REQUEST.get('queryType','')
! 932: pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
! 933: pageinfo['textPN'] = self.REQUEST.get('textPN','1')
! 934: pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
! 935: pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
! 936: pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
! 937: pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
! 938: toc = int (pageinfo['tocPN'])
! 939: pageinfo['textPages'] =int (toc)
! 940:
! 941: if 'tocSize_%s'%tocMode in docinfo:
! 942: tocSize = int(docinfo['tocSize_%s'%tocMode])
! 943: tocPageSize = int(pageinfo['tocPageSize'])
! 944: # cached toc
! 945: if tocSize%tocPageSize>0:
! 946: tocPages=tocSize/tocPageSize+1
! 947: else:
! 948: tocPages=tocSize/tocPageSize
! 949: pageinfo['tocPN'] = min (tocPages,toc)
! 950: pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
! 951: pageinfo['sn'] =self.REQUEST.get('sn','')
! 952: return pageinfo
! 953:
! 954: def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
! 955: """init document viewer"""
! 956: self.title=title
! 957: self.digilibBaseUrl = digilibBaseUrl
! 958: self.thumbrows = thumbrows
! 959: self.thumbcols = thumbcols
! 960: self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
! 961: if RESPONSE is not None:
! 962: RESPONSE.redirect('manage_main')
! 963:
! 964: def manage_AddDocumentViewerForm(self):
! 965: """add the viewer form"""
! 966: pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
! 967: return pt()
! 968:
! 969: def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
! 970: """add the viewer"""
! 971: newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
! 972: self._setObject(id,newObj)
! 973:
! 974: if RESPONSE is not None:
! 975: RESPONSE.redirect('manage_main')
! 976:
! 977: ## DocumentViewerTemplate class
! 978: class DocumentViewerTemplate(ZopePageTemplate):
! 979: """Template for document viewer"""
! 980: meta_type="DocumentViewer Template"
! 981:
! 982:
! 983: def manage_addDocumentViewerTemplateForm(self):
! 984: """Form for adding"""
! 985: pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
! 986: return pt()
! 987:
! 988: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
! 989: REQUEST=None, submit=None):
! 990: "Add a Page Template with optional file content."
! 991:
! 992: self._setObject(id, DocumentViewerTemplate(id))
! 993: ob = getattr(self, id)
! 994: txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
! 995: logging.info("txt %s:"%txt)
! 996: ob.pt_edit(txt,"text/html")
! 997: if title:
! 998: ob.pt_setTitle(title)
! 999: try:
! 1000: u = self.DestinationURL()
! 1001: except AttributeError:
! 1002: u = REQUEST['URL1']
! 1003:
! 1004: u = "%s/%s" % (u, urllib.quote(id))
! 1005: REQUEST.RESPONSE.redirect(u+'/manage_main')
! 1006: return ''
! 1007:
! 1008:
! 1009:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>