documentViewer/documentViewer.py - view

File: [Repository] / documentViewer / documentViewer.py
Revision 1.175.2.1: download - view: text, annotated - select for diffs - revision graph
Thu Jul 14 17:43:56 2011 UTC (12 years, 11 months ago) by casties
Branches: elementtree
Diff to: branchpoint 1.175: preferred, colored

first version using elementtree instead of 4suite xml

from OFS.Folder import Folder from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate from Products.PageTemplates.PageTemplateFile import PageTemplateFile from AccessControl import ClassSecurityInfo from AccessControl import getSecurityManager from Globals import package_home from Products.zogiLib.zogiLib import browserCheck #from Ft.Xml import EMPTY_NAMESPACE, Parse #import Ft.Xml.Domlette import xml.etree.ElementTree as ET import os.path import sys import urllib import urllib2 import logging import math import urlparse import re import string def logger(txt,method,txt2): """logging""" logging.info(txt+ txt2) def getInt(number, default=0): """returns always an int (0 in case of problems)""" try: return int(number) except: return int(default) def getText(node): """get the cdata content of a node""" if node is None: return "" # ET: text = node.text or "" for e in node: text += gettext(e) if e.tail: text += e.tail # 4Suite: #nodelist=node.childNodes #text = "" #for n in nodelist: # if n.nodeType == node.TEXT_NODE: # text = text + n.data return text getTextFromNode = getText def serializeNode(node, encoding="utf-8"): """returns a string containing node as XML""" s = ET.tostring(node) # 4Suite: # stream = cStringIO.StringIO() # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) # s = stream.getvalue() # stream.close() return s def browserCheck(self): """check the browsers request to find out the browser type""" bt = {} ua = self.REQUEST.get_header("HTTP_USER_AGENT") bt['ua'] = ua bt['isIE'] = False bt['isN4'] = False bt['versFirefox']="" bt['versIE']="" bt['versSafariChrome']="" bt['versOpera']="" if string.find(ua, 'MSIE') > -1: bt['isIE'] = True else: bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1) # Safari oder Chrome identification try: nav = ua[string.find(ua, '('):] nav1=ua[string.find(ua,')'):] nav2=nav1[string.find(nav1,'('):] nav3=nav2[string.find(nav2,')'):] ie = string.split(nav, "; ")[1] ie1 =string.split(nav1, " ")[2] ie2 =string.split(nav3, " ")[1] ie3 =string.split(nav3, " ")[2] if string.find(ie3, "Safari") >-1: bt['versSafariChrome']=string.split(ie2, "/")[1] except: pass # IE identification try: nav = ua[string.find(ua, '('):] ie = string.split(nav, "; ")[1] if string.find(ie, "MSIE") > -1: bt['versIE'] = string.split(ie, " ")[1] except:pass # Firefox identification try: nav = ua[string.find(ua, '('):] nav1=ua[string.find(ua,')'):] if string.find(ie1, "Firefox") >-1: nav5= string.split(ie1, "/")[1] logging.debug("FIREFOX: %s"%(nav5)) bt['versFirefox']=nav5[0:3] except:pass #Opera identification try: if string.find(ua,"Opera") >-1: nav = ua[string.find(ua, '('):] nav1=nav[string.find(nav,')'):] bt['versOpera']=string.split(nav1,"/")[2] except:pass bt['isMac'] = string.find(ua, 'Macintosh') > -1 bt['isWin'] = string.find(ua, 'Windows') > -1 bt['isIEWin'] = bt['isIE'] and bt['isWin'] bt['isIEMac'] = bt['isIE'] and bt['isMac'] bt['staticHTML'] = False return bt def getParentDir(path): """returns pathname shortened by one""" return '/'.join(path.split('/')[0:-1]) def getHttpData(url, data=None, num_tries=3, timeout=10): """returns result from url+data HTTP request""" # we do GET (by appending data to url) if isinstance(data, str) or isinstance(data, unicode): # if data is string then append url = "%s?%s"%(url,data) elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple): # urlencode url = "%s?%s"%(url,urllib.urlencode(data)) response = None errmsg = None for cnt in range(num_tries): try: logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) if sys.version_info < (2, 6): # set timeout on socket -- ugly :-( import socket socket.setdefaulttimeout(float(timeout)) response = urllib2.urlopen(url) else: response = urllib2.urlopen(url,timeout=float(timeout)) # check result? break except urllib2.HTTPError, e: logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) errmsg = str(e) # stop trying break except urllib2.URLError, e: logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) errmsg = str(e) # stop trying #break if response is not None: data = response.read() response.close() return data raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) #return None ## ## documentViewer class ## class documentViewer(Folder): """document viewer""" meta_type="Document viewer" security=ClassSecurityInfo() manage_options=Folder.manage_options+( {'label':'main config','action':'changeDocumentViewerForm'}, ) # templates and forms viewer_main = PageTemplateFile('zpt/viewer_main', globals()) toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) toc_text = PageTemplateFile('zpt/toc_text', globals()) toc_figures = PageTemplateFile('zpt/toc_figures', globals()) page_main_images = PageTemplateFile('zpt/page_main_images', globals()) page_main_double = PageTemplateFile('zpt/page_main_double', globals()) page_main_text = PageTemplateFile('zpt/page_main_text', globals()) page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals()) page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals()) head_main = PageTemplateFile('zpt/head_main', globals()) docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) info_xml = PageTemplateFile('zpt/info_xml', globals()) thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) security.declareProtected('View management screens','changeDocumentViewerForm') changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"): """init document viewer""" self.id=id self.title=title self.thumbcols = thumbcols self.thumbrows = thumbrows # authgroups is list of authorized groups (delimited by ,) self.authgroups = [s.strip().lower() for s in authgroups.split(',')] # create template folder so we can always use template.something templateFolder = Folder('template') #self['template'] = templateFolder # Zope-2.12 style self._setObject('template',templateFolder) # old style try: import MpdlXmlTextServer textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName) #templateFolder['fulltextclient'] = xmlRpcClient templateFolder._setObject('fulltextclient',textServer) except Exception, e: logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e)) try: from Products.zogiLib.zogiLib import zogiLib zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") #templateFolder['zogilib'] = zogilib templateFolder._setObject('zogilib',zogilib) except Exception, e: logging.error("Unable to create zogiLib for zogilib: "+str(e)) # proxy text server methods to fulltextclient def getTextPage(self, **args): """get page""" return self.template.fulltextclient.getTextPage(**args) def getOrigPages(self, **args): """get page""" return self.template.fulltextclient.getOrigPages(**args) def getOrigPagesNorm(self, **args): """get page""" return self.template.fulltextclient.getOrigPagesNorm(**args) def getQuery(self, **args): """get query in search""" return self.template.fulltextclient.getQuery(**args) def getSearch(self, **args): """get search""" return self.template.fulltextclient.getSearch(**args) def getGisPlaces(self, **args): """get gis places""" return self.template.fulltextclient.getGisPlaces(**args) def getAllGisPlaces(self, **args): """get all gis places """ return self.template.fulltextclient.getAllGisPlaces(**args) def getTranslate(self, **args): """get translate""" return self.template.fulltextclient.getTranslate(**args) def getLemma(self, **args): """get lemma""" return self.template.fulltextclient.getLemma(**args) def getLemmaQuery(self, **args): """get query""" return self.template.fulltextclient.getLemmaQuery(**args) def getLex(self, **args): """get lex""" return self.template.fulltextclient.getLex(**args) def getToc(self, **args): """get toc""" return self.template.fulltextclient.getToc(**args) def getTocPage(self, **args): """get tocpage""" return self.template.fulltextclient.getTocPage(**args) security.declareProtected('View','thumbs_rss') def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): ''' view it @param mode: defines how to access the document behind url @param url: url which contains display information @param viewMode: if images display images, if text display text, default is images (text,images or auto) ''' logging.debug("HHHHHHHHHHHHHH:load the rss") logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) if not hasattr(self, 'template'): # create template folder if it doesn't exist self.manage_addFolder('template') if not self.digilibBaseUrl: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" docinfo = self.getDocinfo(mode=mode,url=url) #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo) ''' ZDES ''' pt = getattr(self.template, 'thumbs_main_rss') if viewMode=="auto": # automodus gewaehlt if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert viewMode="text" else: viewMode="images" return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) security.declareProtected('View','index_html') def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): ''' view it @param mode: defines how to access the document behind url @param url: url which contains display information @param viewMode: if images display images, if text display text, default is auto (text,images or auto) @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) @param characterNormalization type of text display (reg, norm, none) @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) ''' logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) if not hasattr(self, 'template'): # this won't work logging.error("template folder missing!") return "ERROR: template folder missing!" if not getattr(self, 'digilibBaseUrl', None): self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" docinfo = self.getDocinfo(mode=mode,url=url) if tocMode != "thumbs": # get table of contents docinfo = self.getToc(mode=tocMode, docinfo=docinfo) if viewMode=="auto": # automodus gewaehlt if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert viewMode="text_dict" else: viewMode="images" pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) if (docinfo.get('textURLPath',None)): page = self.getTextPage(mode=viewMode, docinfo=docinfo, pageinfo=pageinfo) pageinfo['textPage'] = page tt = getattr(self, 'template') pt = getattr(tt, 'viewer_main') return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) def generateMarks(self,mk): ret="" if mk is None: return "" if not isinstance(mk, list): mk=[mk] for m in mk: ret+="mk=%s"%m return ret def getBrowser(self): """getBrowser the version of browser """ bt = browserCheck(self) logging.debug("BROWSER VERSION: %s"%(bt)) return bt def findDigilibUrl(self): """try to get the digilib URL from zogilib""" url = self.template.zogilib.getDLBaseUrl() return url def getDocumentViewerURL(self): """returns the URL of this instance""" return self.absolute_url() def getStyle(self, idx, selected, style=""): """returns a string with the given style and append 'sel' if path == selected.""" #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) if idx == selected: return style + 'sel' else: return style def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'): """returns URL to documentviewer with parameter param set to val or from dict params""" # copy existing request params urlParams=self.REQUEST.form.copy() # change single param if param is not None: if val is None: if urlParams.has_key(param): del urlParams[param] else: urlParams[param] = str(val) # change more params if params is not None: for k in params.keys(): v = params[k] if v is None: # val=None removes param if urlParams.has_key(k): del urlParams[k] else: urlParams[k] = v # FIXME: does this belong here? if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath urlParams["mode"] = "imagepath" urlParams["url"] = getParentDir(urlParams["url"]) # quote values and assemble into query string (not escaping '/') ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()]) #ps = urllib.urlencode(urlParams) if baseUrl is None: baseUrl = self.REQUEST['URL1'] url = "%s?%s"%(baseUrl, ps) return url def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None): """link to documentviewer with parameter param set to val""" return self.getLink(param, val, params, baseUrl, '&') def getInfo_xml(self,url,mode): """returns info about the document as XML""" if not self.digilibBaseUrl: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" docinfo = self.getDocinfo(mode=mode,url=url) pt = getattr(self.template, 'info_xml') return pt(docinfo=docinfo) def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True): """returns new option state""" if not self.REQUEST.SESSION.has_key(optionName): # not in session -- initial opt = {'lastState': newState, 'state': initialState} else: opt = self.REQUEST.SESSION.get(optionName) if opt['lastState'] != newState: # state in session has changed -- toggle opt['state'] = not opt['state'] opt['lastState'] = newState self.REQUEST.SESSION[optionName] = opt return opt['state'] def isAccessible(self, docinfo): """returns if access to the resource is granted""" access = docinfo.get('accessType', None) logging.debug("documentViewer (accessOK) access type %s"%access) if access is not None and access == 'free': logging.debug("documentViewer (accessOK) access is free") return True elif access is None or access in self.authgroups: # only local access -- only logged in users user = getSecurityManager().getUser() logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr())) if user is not None: #print "user: ", user return (user.getUserName() != "Anonymous User") else: return False logging.error("documentViewer (accessOK) unknown access type %s"%access) return False def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): """gibt param von dlInfo aus""" if docinfo is None: docinfo = {} for x in range(cut): path=getParentDir(path) infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) txt = getHttpData(infoUrl) if txt is None: raise IOError("Unable to get dir-info from %s"%(infoUrl)) dom = ET.fromstring(txt) #dom = Parse(txt) size=getText(dom.find("size")) #sizes=dom.xpath("//dir/size") logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size) if size: docinfo['numPages'] = int(size) else: docinfo['numPages'] = 0 # TODO: produce and keep list of image names and numbers return docinfo def getIndexMetaPath(self,url): """gib nur den Pfad zurueck""" regexp = re.compile(r".*(experimental|permanent)/(.*)") regpath = regexp.match(url) if (regpath==None): return "" logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2)) return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2)) def getIndexMetaUrl(self,url): """returns utr of index.meta document at url""" metaUrl = None if url.startswith("http://"): # real URL metaUrl = url else: # online path server=self.digilibBaseUrl+"/servlet/Texter?fn=" metaUrl=server+url.replace("/mpiwg/online","") if not metaUrl.endswith("index.meta"): metaUrl += "/index.meta" return metaUrl def getDomFromIndexMeta(self, url): """get dom from index meta""" dom = None metaUrl = self.getIndexMetaUrl(url) logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl) txt=getHttpData(metaUrl) if txt is None: raise IOError("Unable to read index meta from %s"%(url)) dom = ET.fromstring(txt) #dom = Parse(txt) return dom def getPresentationInfoXML(self, url): """returns dom of info.xml document at url""" dom = None metaUrl = None if url.startswith("http://"): # real URL metaUrl = url else: # online path server=self.digilibBaseUrl+"/servlet/Texter?fn=" metaUrl=server+url.replace("/mpiwg/online","") txt=getHttpData(metaUrl) if txt is None: raise IOError("Unable to read infoXMLfrom %s"%(url)) dom = ET.fromstring(txt) #dom = Parse(txt) return dom def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): """gets authorization info from the index.meta file at path or given by dom""" logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) access = None if docinfo is None: docinfo = {} if dom is None: for x in range(cut): path=getParentDir(path) dom = self.getDomFromIndexMeta(path) acc = dom.find(".//access-conditions/access") if acc is not None: acctype = acc.get('type') #acctype = dom.xpath("//access-conditions/access/@type") if acctype: access=acctype if access in ['group', 'institution']: access = dom.find(".//access-conditions/access/name").text.lower() docinfo['accessType'] = access return docinfo def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): """gets bibliographical info from the index.meta file at path or given by dom""" logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) if docinfo is None: docinfo = {} if dom is None: for x in range(cut): path=getParentDir(path) dom = self.getDomFromIndexMeta(path) docinfo['indexMetaPath']=self.getIndexMetaPath(path); logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) # put in all raw bib fields as dict "bib" bib = dom.find(".//bib/*") #bib = dom.xpath("//bib/*") if bib and len(bib)>0: bibinfo = {} for e in bib: bibinfo[e.localName] = getTextFromNode(e) docinfo['bib'] = bibinfo # extract some fields (author, title, year) according to their mapping metaData=self.metadata.main.meta.bib bib = dom.find(".//bib") bibtype=bib.get("type") #bibtype=dom.xpath("//bib/@type") if not bibtype: bibtype="generic" bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) docinfo['bib_type'] = bibtype bibmap=metaData.generateMappingForType(bibtype) logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap)) logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype)) # if there is no mapping bibmap is empty (mapping sometimes has empty fields) if len(bibmap) > 0 and len(bibmap['author'][0]) > 0 or len(bibmap['title'][0]) > 0: try: docinfo['author']=getText(bib.find(bibmap['author'][0])) except: pass try: docinfo['title']=getText(bib.find(bibmap['title'][0])) except: pass try: docinfo['year']=getText(bib.find(bibmap['year'][0])) except: pass # ROC: why is this here? # logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) # try: # docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0]) # except: # docinfo['lang']='' # try: # docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0]) # except: # docinfo['city']='' # try: # docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0]) # except: # docinfo['number_of_pages']='' # try: # docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0]) # except: # docinfo['series_volume']='' # try: # docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0]) # except: # docinfo['number_of_volumes']='' # try: # docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0]) # except: # docinfo['translator']='' # try: # docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0]) # except: # docinfo['edition']='' # try: # docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0]) # except: # docinfo['series_author']='' # try: # docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0]) # except: # docinfo['publisher']='' # try: # docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0]) # except: # docinfo['series_title']='' # try: # docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0]) # except: # docinfo['isbn_issn']='' return docinfo def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): """gets name info from the index.meta file at path or given by dom""" if docinfo is None: docinfo = {} if dom is None: for x in range(cut): path=getParentDir(path) dom = self.getDomFromIndexMeta(path) docinfo['name']=getText(dom.find("name")) logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) return docinfo def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): """parse texttool tag in index meta""" logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) if docinfo is None: docinfo = {} if docinfo.get('lang', None) is None: docinfo['lang'] = '' # default keine Sprache gesetzt if dom is None: dom = self.getDomFromIndexMeta(url) archivePath = None archiveName = None archiveName = getTextFromNode(dom.find("name")) if not archiveName: logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) archivePath = getTextFromNode(dom.find("archive-path")) if archivePath: # clean up archive path if archivePath[0] != '/': archivePath = '/' + archivePath if archiveName and (not archivePath.endswith(archiveName)): archivePath += "/" + archiveName else: # try to get archive-path from url logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) if (not url.startswith('http')): archivePath = url.replace('index.meta', '') if archivePath is None: # we balk without archive-path raise IOError("Missing archive-path (for text-tool) in %s" % (url)) imageDir = getText(dom.find(".//texttool/image")) if not imageDir: # we balk with no image tag / not necessary anymore because textmode is now standard #raise IOError("No text-tool info in %s"%(url)) imageDir = "" #xquery="//pb" docinfo['imagePath'] = "" # keine Bilder docinfo['imageURL'] = "" if imageDir and archivePath: #print "image: ", imageDir, " archivepath: ", archivePath imageDir = os.path.join(archivePath, imageDir) imageDir = imageDir.replace("/mpiwg/online", '') docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) docinfo['imagePath'] = imageDir docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir viewerUrl = getText(dom.find(".//texttool/digiliburlprefix")) if viewerUrl: docinfo['viewerURL'] = viewerUrl # old style text URL textUrl = getText(dom.find(".//texttool/text")) if textUrl: if urlparse.urlparse(textUrl)[0] == "": #keine url textUrl = os.path.join(archivePath, textUrl) # fix URLs starting with /mpiwg/online if textUrl.startswith("/mpiwg/online"): textUrl = textUrl.replace("/mpiwg/online", '', 1) docinfo['textURL'] = textUrl # new style text-url-path textUrl = getText(dom.find(".//texttool/text-url-path")) if textUrl: docinfo['textURLPath'] = textUrl textUrlkurz = string.split(textUrl, ".")[0] docinfo['textURLPathkurz'] = textUrlkurz #if not docinfo['imagePath']: # text-only, no page images #docinfo = self.getNumTextPages(docinfo) presentationUrl = getText(dom.find(".//texttool/presentation")) docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) if presentationUrl: # ueberschreibe diese durch presentation informationen # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten # durch den relativen Pfad auf die presentation infos presentationPath = presentationUrl if url.endswith("index.meta"): presentationUrl = url.replace('index.meta', presentationPath) else: presentationUrl = url + "/" + presentationPath docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info return docinfo def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): """gets the bibliographical information from the preseantion entry in texttools """ dom=self.getPresentationInfoXML(url) try: docinfo['author']=getText(dom.find(".//author")) except: pass try: docinfo['title']=getText(dom.find(".//title")) except: pass try: docinfo['year']=getText(dom.find(".//date")) except: pass return docinfo def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): """path ist the path to the images it assumes that the index.meta file is one level higher.""" logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) if docinfo is None: docinfo = {} path=path.replace("/mpiwg/online","") docinfo['imagePath'] = path docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) pathorig=path for x in range(cut): path=getParentDir(path) logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path docinfo['imageURL'] = imageUrl #path ist the path to the images it assumes that the index.meta file is one level higher. docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) return docinfo def getDocinfo(self, mode, url): """returns docinfo depending on mode""" logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) # look for cached docinfo in session if self.REQUEST.SESSION.has_key('docinfo'): docinfo = self.REQUEST.SESSION['docinfo'] # check if its still current if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo) return docinfo # new docinfo docinfo = {'mode': mode, 'url': url} if mode=="texttool": #index.meta with texttool information docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) elif mode=="imagepath": docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) elif mode=="filepath": docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) else: logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) # FIXME: fake texturlpath if not docinfo.has_key('textURLPath'): docinfo['textURLPath'] = None logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) #logging.debug("documentViewer (getdocinfo) docinfo: %s"%) self.REQUEST.SESSION['docinfo'] = docinfo return docinfo def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): """returns pageinfo with the given parameters""" pageinfo = {} current = getInt(current) pageinfo['current'] = current rows = int(rows or self.thumbrows) pageinfo['rows'] = rows cols = int(cols or self.thumbcols) pageinfo['cols'] = cols grpsize = cols * rows pageinfo['groupsize'] = grpsize start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) # int(current / grpsize) * grpsize +1)) pageinfo['start'] = start pageinfo['end'] = start + grpsize if (docinfo is not None) and ('numPages' in docinfo): np = int(docinfo['numPages']) pageinfo['end'] = min(pageinfo['end'], np) pageinfo['numgroups'] = int(np / grpsize) if np % grpsize > 0: pageinfo['numgroups'] += 1 pageinfo['viewMode'] = viewMode pageinfo['tocMode'] = tocMode pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg') #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1') pageinfo['query'] = self.REQUEST.get('query','') pageinfo['queryType'] = self.REQUEST.get('queryType','') pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') pageinfo['textPN'] = self.REQUEST.get('textPN','1') pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','') pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10') pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') toc = int (pageinfo['tocPN']) pageinfo['textPages'] =int (toc) if 'tocSize_%s'%tocMode in docinfo: tocSize = int(docinfo['tocSize_%s'%tocMode]) tocPageSize = int(pageinfo['tocPageSize']) # cached toc if tocSize%tocPageSize>0: tocPages=tocSize/tocPageSize+1 else: tocPages=tocSize/tocPageSize pageinfo['tocPN'] = min (tocPages,toc) pageinfo['searchPN'] =self.REQUEST.get('searchPN','1') pageinfo['sn'] =self.REQUEST.get('sn','') return pageinfo def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): """init document viewer""" self.title=title self.digilibBaseUrl = digilibBaseUrl self.thumbrows = thumbrows self.thumbcols = thumbcols self.authgroups = [s.strip().lower() for s in authgroups.split(',')] if RESPONSE is not None: RESPONSE.redirect('manage_main') def manage_AddDocumentViewerForm(self): """add the viewer form""" pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) return pt() def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None): """add the viewer""" newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName) self._setObject(id,newObj) if RESPONSE is not None: RESPONSE.redirect('manage_main') ## DocumentViewerTemplate class class DocumentViewerTemplate(ZopePageTemplate): """Template for document viewer""" meta_type="DocumentViewer Template" def manage_addDocumentViewerTemplateForm(self): """Form for adding""" pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) return pt() def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, REQUEST=None, submit=None): "Add a Page Template with optional file content." self._setObject(id, DocumentViewerTemplate(id)) ob = getattr(self, id) txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read() logging.info("txt %s:"%txt) ob.pt_edit(txt,"text/html") if title: ob.pt_setTitle(title) try: u = self.DestinationURL() except AttributeError: u = REQUEST['URL1'] u = "%s/%s" % (u, urllib.quote(id)) REQUEST.RESPONSE.redirect(u+'/manage_main') return ''