Annotation of documentViewer/documentViewer.py, revision 1.69.2.3
1.18 dwinter 1:
1.1 dwinter 2: from OFS.Folder import Folder
3: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
1.22 dwinter 4: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
1.50 abukhman 5: from Products.PythonScripts.standard import url_quote
1.1 dwinter 6: from AccessControl import ClassSecurityInfo
1.8 casties 7: from AccessControl import getSecurityManager
1.1 dwinter 8: from Globals import package_home
9:
10: from Ft.Xml.Domlette import NonvalidatingReader
11: from Ft.Xml.Domlette import PrettyPrint, Print
1.11 casties 12: from Ft.Xml import EMPTY_NAMESPACE, Parse
1.1 dwinter 13:
1.47 abukhman 14: from xml.dom.minidom import parse, parseString
15:
1.69.2.1 abukhman 16: from extraFunction import *
1.47 abukhman 17:
1.42 abukhman 18:
1.1 dwinter 19: import Ft.Xml.XPath
1.43 casties 20: import cStringIO
1.42 abukhman 21: import xmlrpclib
1.1 dwinter 22: import os.path
1.7 casties 23: import sys
1.1 dwinter 24: import cgi
25: import urllib
1.20 dwinter 26: import logging
1.28 casties 27: import math
1.22 dwinter 28:
1.18 dwinter 29: import urlparse
1.37 dwinter 30: from types import *
1.43 casties 31:
1.22 dwinter 32: def logger(txt,method,txt2):
33: """logging"""
34: logging.info(txt+ txt2)
35:
36:
1.4 casties 37: def getInt(number, default=0):
38: """returns always an int (0 in case of problems)"""
39: try:
40: return int(number)
41: except:
1.29 casties 42: return int(default)
1.4 casties 43:
1.1 dwinter 44: def getTextFromNode(nodename):
1.18 dwinter 45: """get the cdata content of a node"""
1.8 casties 46: if nodename is None:
47: return ""
1.1 dwinter 48: nodelist=nodename.childNodes
49: rc = ""
50: for node in nodelist:
51: if node.nodeType == node.TEXT_NODE:
52: rc = rc + node.data
53: return rc
54:
1.43 casties 55: def serializeNode(node, encoding='utf-8'):
56: """returns a string containing node as XML"""
57: buf = cStringIO.StringIO()
58: Print(node, stream=buf, encoding=encoding)
59: s = buf.getvalue()
60: buf.close()
61: return s
62:
1.9 casties 63:
64: def getParentDir(path):
65: """returns pathname shortened by one"""
66: return '/'.join(path.split('/')[0:-1])
67:
68:
1.69.2.3! casties 69: def getHttpData(url, data=None, num_tries=3, timeout=10):
! 70: """returns result from url+data HTTP request"""
! 71: # we do GET (by appending data to url)
! 72: if isinstance(data, str) or isinstance(data, unicode):
! 73: # if data is string then append
! 74: url = "%s?%s"%(url,data)
! 75: elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
! 76: # urlencode
! 77: url = "%s?%s"%(url,urllib.urlencode(data))
! 78:
! 79: response = None
! 80: errmsg = None
! 81: for cnt in range(num_tries):
! 82: try:
! 83: logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
! 84: if sys.version_info < (2, 6):
! 85: # set timeout on socket -- ugly :-(
! 86: import socket
! 87: socket.setdefaulttimeout(timeout)
! 88: response = urllib2.urlopen(url)
! 89: else:
! 90: response = urllib2.urlopen(url,timeout=float(timeout))
! 91: # check result?
! 92: break
! 93: except urllib2.HTTPError, e:
! 94: logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
! 95: errmsg = str(e)
! 96: # stop trying
! 97: break
! 98: except urllib2.URLError, e:
! 99: logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
! 100: errmsg = str(e)
! 101: # stop trying
! 102: #break
! 103:
! 104: if response is not None:
! 105: data = response.read()
! 106: response.close()
! 107: return data
! 108:
! 109: raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
! 110: #return None
1.1 dwinter 111:
112:
113:
1.3 casties 114: ##
115: ## documentViewer class
116: ##
1.69.2.3! casties 117: class documentViewer(Folder):
1.1 dwinter 118: """document viewer"""
119: meta_type="Document viewer"
120:
121: security=ClassSecurityInfo()
1.3 casties 122: manage_options=Folder.manage_options+(
1.1 dwinter 123: {'label':'main config','action':'changeDocumentViewerForm'},
124: )
125:
1.3 casties 126: # templates and forms
127: viewer_main = PageTemplateFile('zpt/viewer_main', globals())
1.44 casties 128: toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
129: toc_text = PageTemplateFile('zpt/toc_text', globals())
130: toc_figures = PageTemplateFile('zpt/toc_figures', globals())
1.43 casties 131: page_main_images = PageTemplateFile('zpt/page_main_images', globals())
132: page_main_text = PageTemplateFile('zpt/page_main_text', globals())
1.44 casties 133: page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
1.48 abukhman 134: page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
1.3 casties 135: head_main = PageTemplateFile('zpt/head_main', globals())
136: docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
1.26 casties 137: info_xml = PageTemplateFile('zpt/info_xml', globals())
1.69.2.1 abukhman 138:
139:
1.32 dwinter 140: thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
1.3 casties 141: security.declareProtected('View management screens','changeDocumentViewerForm')
142: changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
143:
1.1 dwinter 144:
1.45 abukhman 145: def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
1.1 dwinter 146: """init document viewer"""
147: self.id=id
148: self.title=title
1.4 casties 149: self.thumbcols = thumbcols
150: self.thumbrows = thumbrows
1.8 casties 151: # authgroups is list of authorized groups (delimited by ,)
152: self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1.43 casties 153: # create template folder so we can always use template.something
154:
155: templateFolder = Folder('template')
156: #self['template'] = templateFolder # Zope-2.12 style
157: self._setObject('template',templateFolder) # old style
158: try:
1.69.2.3! casties 159: import MpdlXmlTextServer
! 160: textServer = MpdlXmlTextServer(id='fulltextclient')
1.43 casties 161: #templateFolder['fulltextclient'] = xmlRpcClient
1.69.2.3! casties 162: templateFolder._setObject('fulltextclient',textServer)
1.43 casties 163: except Exception, e:
1.69.2.3! casties 164: logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
1.43 casties 165: try:
166: from Products.zogiLib.zogiLib import zogiLib
167: zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
168: #templateFolder['zogilib'] = zogilib
169: templateFolder._setObject('zogilib',zogilib)
170: except Exception, e:
171: logging.error("Unable to create zogiLib for zogilib: "+str(e))
172:
1.69.2.3! casties 173:
! 174: # proxy text server methods to fulltextclient
! 175: def getTextPage(self, **args):
! 176: """get page"""
! 177: return self.template.fulltextclient.getTextPage(**args)
! 178:
! 179: def getQuery(self, **args):
! 180: """get query"""
! 181: return self.template.fulltextclient.getQuery(**args)
! 182:
! 183: def getSearch(self, **args):
! 184: """get search"""
! 185: return self.template.fulltextclient.getSearch(**args)
! 186:
! 187: def getNumPages(self, **args):
! 188: """get numpages"""
! 189: return self.template.fulltextclient.getNumPages(**args)
! 190:
! 191: def getTranslate(self, **args):
! 192: """get translate"""
! 193: return self.template.fulltextclient.getTranslate(**args)
! 194:
! 195: def getLemma(self, **args):
! 196: """get lemma"""
! 197: return self.template.fulltextclient.getLemma(**args)
! 198:
! 199: def getToc(self, **args):
! 200: """get toc"""
! 201: return self.template.fulltextclient.getToc(**args)
! 202:
! 203: def getTocPage(self, **args):
! 204: """get tocpage"""
! 205: return self.template.fulltextclient.getTocPage(**args)
1.3 casties 206:
1.69.2.3! casties 207:
1.32 dwinter 208: security.declareProtected('View','thumbs_rss')
209: def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
210: '''
211: view it
212: @param mode: defines how to access the document behind url
213: @param url: url which contains display information
214: @param viewMode: if images display images, if text display text, default is images (text,images or auto)
215:
216: '''
1.43 casties 217: logging.debug("HHHHHHHHHHHHHH:load the rss")
1.32 dwinter 218: logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
219:
220: if not hasattr(self, 'template'):
221: # create template folder if it doesn't exist
222: self.manage_addFolder('template')
223:
224: if not self.digilibBaseUrl:
225: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
226:
227: docinfo = self.getDocinfo(mode=mode,url=url)
228: pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
229: pt = getattr(self.template, 'thumbs_main_rss')
230:
231: if viewMode=="auto": # automodus gewaehlt
232: if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
233: viewMode="text"
234: else:
235: viewMode="images"
236:
237: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
238:
1.3 casties 239: security.declareProtected('View','index_html')
1.45 abukhman 240: def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None):
1.3 casties 241: '''
242: view it
1.26 casties 243: @param mode: defines how to access the document behind url
1.3 casties 244: @param url: url which contains display information
1.44 casties 245: @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
1.48 abukhman 246: @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
1.49 abukhman 247: @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
1.3 casties 248: '''
249:
1.43 casties 250: logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
1.1 dwinter 251:
1.3 casties 252: if not hasattr(self, 'template'):
1.43 casties 253: # this won't work
254: logging.error("template folder missing!")
255: return "ERROR: template folder missing!"
1.3 casties 256:
1.43 casties 257: if not getattr(self, 'digilibBaseUrl', None):
1.3 casties 258: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
259:
1.4 casties 260: docinfo = self.getDocinfo(mode=mode,url=url)
1.47 abukhman 261:
262:
1.44 casties 263: if tocMode != "thumbs":
264: # get table of contents
265: docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
1.47 abukhman 266:
1.21 dwinter 267: if viewMode=="auto": # automodus gewaehlt
1.43 casties 268: if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert
1.68 casties 269: viewMode="text_dict"
1.21 dwinter 270: else:
271: viewMode="images"
1.44 casties 272:
1.68 casties 273: pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
274:
1.44 casties 275: pt = getattr(self.template, 'viewer_main')
1.37 dwinter 276: return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
1.1 dwinter 277:
1.36 dwinter 278: def generateMarks(self,mk):
279: ret=""
1.44 casties 280: if mk is None:
281: return ""
282: if type(mk) is not ListType:
283: mk=[mk]
1.36 dwinter 284: for m in mk:
1.37 dwinter 285: ret+="mk=%s"%m
1.36 dwinter 286: return ret
1.43 casties 287:
1.44 casties 288:
1.43 casties 289: def findDigilibUrl(self):
290: """try to get the digilib URL from zogilib"""
291: url = self.template.zogilib.getDLBaseUrl()
292: return url
1.67 casties 293:
294: def getDocumentViewerURL(self):
295: """returns the URL of this instance"""
296: return self.absolute_url()
1.43 casties 297:
298: def getStyle(self, idx, selected, style=""):
299: """returns a string with the given style and append 'sel' if path == selected."""
300: #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
301: if idx == selected:
302: return style + 'sel'
303: else:
304: return style
1.36 dwinter 305:
1.4 casties 306: def getLink(self,param=None,val=None):
307: """link to documentviewer with parameter param set to val"""
1.9 casties 308: params=self.REQUEST.form.copy()
1.4 casties 309: if param is not None:
1.7 casties 310: if val is None:
311: if params.has_key(param):
312: del params[param]
1.4 casties 313: else:
1.9 casties 314: params[param] = str(val)
1.43 casties 315:
316: if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
1.38 dwinter 317: params["mode"] = "imagepath"
318: params["url"] = getParentDir(params["url"])
1.7 casties 319:
1.9 casties 320: # quote values and assemble into query string
321: ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
322: url=self.REQUEST['URL1']+"?"+ps
1.4 casties 323: return url
324:
1.32 dwinter 325: def getLinkAmp(self,param=None,val=None):
326: """link to documentviewer with parameter param set to val"""
327: params=self.REQUEST.form.copy()
328: if param is not None:
329: if val is None:
330: if params.has_key(param):
331: del params[param]
332: else:
333: params[param] = str(val)
334:
335: # quote values and assemble into query string
1.69.2.2 casties 336: logging.debug("XYXXXXX: %s"%repr(params.items()))
1.32 dwinter 337: ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
338: url=self.REQUEST['URL1']+"?"+ps
339: return url
1.40 casties 340:
1.26 casties 341: def getInfo_xml(self,url,mode):
342: """returns info about the document as XML"""
343:
344: if not self.digilibBaseUrl:
345: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
346:
347: docinfo = self.getDocinfo(mode=mode,url=url)
348: pt = getattr(self.template, 'info_xml')
349: return pt(docinfo=docinfo)
350:
1.4 casties 351:
1.9 casties 352: def isAccessible(self, docinfo):
1.8 casties 353: """returns if access to the resource is granted"""
354: access = docinfo.get('accessType', None)
1.69.2.2 casties 355: logging.debug("documentViewer (accessOK) access type %s"%access)
1.17 casties 356: if access is not None and access == 'free':
1.69.2.2 casties 357: logging.debug("documentViewer (accessOK) access is free")
1.8 casties 358: return True
1.17 casties 359: elif access is None or access in self.authgroups:
1.9 casties 360: # only local access -- only logged in users
361: user = getSecurityManager().getUser()
362: if user is not None:
363: #print "user: ", user
364: return (user.getUserName() != "Anonymous User")
365: else:
366: return False
1.8 casties 367:
1.69.2.2 casties 368: logging.debug("documentViewer (accessOK) unknown access type %s"%access)
1.8 casties 369: return False
1.9 casties 370:
1.8 casties 371:
1.35 dwinter 372: def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
1.6 casties 373: """gibt param von dlInfo aus"""
1.7 casties 374: if docinfo is None:
375: docinfo = {}
1.35 dwinter 376:
377: for x in range(cut):
1.38 dwinter 378:
1.35 dwinter 379: path=getParentDir(path)
1.38 dwinter 380:
1.13 casties 381: infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
1.6 casties 382:
1.69.2.2 casties 383: logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
1.6 casties 384:
1.69.2.3! casties 385: txt = getHttpData(infoUrl)
! 386: if txt is None:
1.13 casties 387: raise IOError("Unable to get dir-info from %s"%(infoUrl))
1.69.2.3! casties 388:
! 389: dom = Parse(txt)
1.10 casties 390: sizes=dom.xpath("//dir/size")
1.69.2.2 casties 391: logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
1.6 casties 392:
1.10 casties 393: if sizes:
394: docinfo['numPages'] = int(getTextFromNode(sizes[0]))
1.7 casties 395: else:
396: docinfo['numPages'] = 0
1.43 casties 397:
398: # TODO: produce and keep list of image names and numbers
1.7 casties 399:
400: return docinfo
1.8 casties 401:
1.6 casties 402:
1.9 casties 403: def getIndexMeta(self, url):
404: """returns dom of index.meta document at url"""
405: dom = None
1.12 casties 406: metaUrl = None
1.9 casties 407: if url.startswith("http://"):
408: # real URL
1.12 casties 409: metaUrl = url
1.9 casties 410: else:
411: # online path
412: server=self.digilibBaseUrl+"/servlet/Texter?fn="
1.13 casties 413: metaUrl=server+url.replace("/mpiwg/online","")
1.9 casties 414: if not metaUrl.endswith("index.meta"):
415: metaUrl += "/index.meta"
1.12 casties 416:
1.69.2.3! casties 417: logging.debug("(getIndexMeta): METAURL: %s"%metaUrl)
! 418: txt=getHttpData(metaUrl)
! 419: if txt is None:
1.12 casties 420: raise IOError("Unable to read index meta from %s"%(url))
1.69.2.3! casties 421:
! 422: dom = Parse(txt)
1.9 casties 423: return dom
1.20 dwinter 424:
425: def getPresentationInfoXML(self, url):
426: """returns dom of info.xml document at url"""
427: dom = None
428: metaUrl = None
429: if url.startswith("http://"):
430: # real URL
431: metaUrl = url
432: else:
433: # online path
434: server=self.digilibBaseUrl+"/servlet/Texter?fn="
435: metaUrl=server+url.replace("/mpiwg/online","")
436:
1.69.2.3! casties 437: txt=getHttpData(metaUrl)
! 438: if txt is None:
1.20 dwinter 439: raise IOError("Unable to read infoXMLfrom %s"%(url))
1.69.2.3! casties 440:
! 441: dom = Parse(txt)
1.20 dwinter 442: return dom
1.9 casties 443:
444:
1.33 dwinter 445: def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
1.9 casties 446: """gets authorization info from the index.meta file at path or given by dom"""
1.69.2.2 casties 447: logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
1.8 casties 448:
449: access = None
450:
451: if docinfo is None:
452: docinfo = {}
453:
454: if dom is None:
1.38 dwinter 455: for x in range(cut):
1.33 dwinter 456: path=getParentDir(path)
457: dom = self.getIndexMeta(path)
1.18 dwinter 458:
1.8 casties 459: acctype = dom.xpath("//access-conditions/access/@type")
460: if acctype and (len(acctype)>0):
461: access=acctype[0].value
1.9 casties 462: if access in ['group', 'institution']:
1.8 casties 463: access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
464:
465: docinfo['accessType'] = access
466: return docinfo
1.6 casties 467:
1.8 casties 468:
1.33 dwinter 469: def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
1.9 casties 470: """gets bibliographical info from the index.meta file at path or given by dom"""
1.27 casties 471: logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
1.2 dwinter 472:
1.3 casties 473: if docinfo is None:
474: docinfo = {}
1.38 dwinter 475:
1.3 casties 476: if dom is None:
1.38 dwinter 477: for x in range(cut):
1.33 dwinter 478: path=getParentDir(path)
479: dom = self.getIndexMeta(path)
1.39 dwinter 480:
481: logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
1.27 casties 482: # put in all raw bib fields as dict "bib"
483: bib = dom.xpath("//bib/*")
484: if bib and len(bib)>0:
485: bibinfo = {}
486: for e in bib:
487: bibinfo[e.localName] = getTextFromNode(e)
488: docinfo['bib'] = bibinfo
489:
490: # extract some fields (author, title, year) according to their mapping
1.4 casties 491: metaData=self.metadata.main.meta.bib
492: bibtype=dom.xpath("//bib/@type")
493: if bibtype and (len(bibtype)>0):
494: bibtype=bibtype[0].value
1.2 dwinter 495: else:
1.4 casties 496: bibtype="generic"
1.27 casties 497:
1.4 casties 498: bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
1.27 casties 499: docinfo['bib_type'] = bibtype
1.4 casties 500: bibmap=metaData.generateMappingForType(bibtype)
1.8 casties 501: # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
1.7 casties 502: if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
1.30 casties 503: try:
504: docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
505: except: pass
506: try:
507: docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
508: except: pass
509: try:
510: docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
511: except: pass
1.27 casties 512: logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
1.22 dwinter 513: try:
514: docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
515: except:
516: docinfo['lang']=''
1.27 casties 517:
1.3 casties 518: return docinfo
1.42 abukhman 519:
520:
1.43 casties 521: def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
522: """parse texttool tag in index meta"""
1.69.2.2 casties 523: logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
1.43 casties 524: if docinfo is None:
525: docinfo = {}
526: if docinfo.get('lang', None) is None:
527: docinfo['lang'] = '' # default keine Sprache gesetzt
528: if dom is None:
529: dom = self.getIndexMeta(url)
530:
531: archivePath = None
532: archiveName = None
533:
534: archiveNames = dom.xpath("//resource/name")
535: if archiveNames and (len(archiveNames) > 0):
536: archiveName = getTextFromNode(archiveNames[0])
537: else:
1.69.2.2 casties 538: logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
1.43 casties 539:
540: archivePaths = dom.xpath("//resource/archive-path")
541: if archivePaths and (len(archivePaths) > 0):
542: archivePath = getTextFromNode(archivePaths[0])
543: # clean up archive path
544: if archivePath[0] != '/':
545: archivePath = '/' + archivePath
546: if archiveName and (not archivePath.endswith(archiveName)):
547: archivePath += "/" + archiveName
548: else:
549: # try to get archive-path from url
1.69.2.2 casties 550: logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
1.43 casties 551: if (not url.startswith('http')):
552: archivePath = url.replace('index.meta', '')
553:
554: if archivePath is None:
555: # we balk without archive-path
556: raise IOError("Missing archive-path (for text-tool) in %s" % (url))
557:
558: imageDirs = dom.xpath("//texttool/image")
559: if imageDirs and (len(imageDirs) > 0):
560: imageDir = getTextFromNode(imageDirs[0])
561:
562: else:
563: # we balk with no image tag / not necessary anymore because textmode is now standard
564: #raise IOError("No text-tool info in %s"%(url))
565: imageDir = ""
566: #xquery="//pb"
567: docinfo['imagePath'] = "" # keine Bilder
568: docinfo['imageURL'] = ""
569:
570: if imageDir and archivePath:
571: #print "image: ", imageDir, " archivepath: ", archivePath
572: imageDir = os.path.join(archivePath, imageDir)
573: imageDir = imageDir.replace("/mpiwg/online", '')
574: docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
575: docinfo['imagePath'] = imageDir
576:
577: docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
578:
579: viewerUrls = dom.xpath("//texttool/digiliburlprefix")
580: if viewerUrls and (len(viewerUrls) > 0):
581: viewerUrl = getTextFromNode(viewerUrls[0])
582: docinfo['viewerURL'] = viewerUrl
583:
584: textUrls = dom.xpath("//texttool/text")
585: if textUrls and (len(textUrls) > 0):
586: textUrl = getTextFromNode(textUrls[0])
587: if urlparse.urlparse(textUrl)[0] == "": #keine url
588: textUrl = os.path.join(archivePath, textUrl)
589: # fix URLs starting with /mpiwg/online
590: if textUrl.startswith("/mpiwg/online"):
591: textUrl = textUrl.replace("/mpiwg/online", '', 1)
592:
593: docinfo['textURL'] = textUrl
594:
595: textUrls = dom.xpath("//texttool/text-url-path")
596: if textUrls and (len(textUrls) > 0):
597: textUrl = getTextFromNode(textUrls[0])
1.51 casties 598: docinfo['textURLPath'] = textUrl
599: if not docinfo['imagePath']:
600: # text-only, no page images
601: docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht
1.43 casties 602:
603: presentationUrls = dom.xpath("//texttool/presentation")
604: docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
605:
606: if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
607: # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
608: # durch den relativen Pfad auf die presentation infos
609: presentationPath = getTextFromNode(presentationUrls[0])
610: if url.endswith("index.meta"):
611: presentationUrl = url.replace('index.meta', presentationPath)
612: else:
613: presentationUrl = url + "/" + presentationPath
1.51 casties 614:
1.43 casties 615: docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
616:
617: docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
1.3 casties 618:
1.43 casties 619: return docinfo
1.3 casties 620:
1.20 dwinter 621:
622: def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
623: """gets the bibliographical information from the preseantion entry in texttools
624: """
625: dom=self.getPresentationInfoXML(url)
1.29 casties 626: try:
627: docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
628: except:
629: pass
630: try:
631: docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
632: except:
633: pass
634: try:
635: docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
636: except:
637: pass
1.20 dwinter 638: return docinfo
639:
1.33 dwinter 640: def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
1.3 casties 641: """path ist the path to the images it assumes that the index.meta file is one level higher."""
1.69.2.2 casties 642: logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
1.3 casties 643: if docinfo is None:
644: docinfo = {}
1.6 casties 645: path=path.replace("/mpiwg/online","")
1.3 casties 646: docinfo['imagePath'] = path
1.35 dwinter 647: docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
1.38 dwinter 648:
1.39 dwinter 649: pathorig=path
1.38 dwinter 650: for x in range(cut):
651: path=getParentDir(path)
1.69.2.2 casties 652: logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
1.7 casties 653: imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
1.3 casties 654: docinfo['imageURL'] = imageUrl
655:
1.39 dwinter 656: #path ist the path to the images it assumes that the index.meta file is one level higher.
657: docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
658: docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
1.3 casties 659: return docinfo
660:
1.2 dwinter 661:
1.3 casties 662: def getDocinfo(self, mode, url):
663: """returns docinfo depending on mode"""
1.69.2.2 casties 664: logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
1.3 casties 665: # look for cached docinfo in session
1.21 dwinter 666: if self.REQUEST.SESSION.has_key('docinfo'):
1.3 casties 667: docinfo = self.REQUEST.SESSION['docinfo']
668: # check if its still current
669: if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
1.69.2.2 casties 670: logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
1.3 casties 671: return docinfo
672: # new docinfo
673: docinfo = {'mode': mode, 'url': url}
674: if mode=="texttool": #index.meta with texttool information
675: docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
676: elif mode=="imagepath":
677: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
1.33 dwinter 678: elif mode=="filepath":
1.37 dwinter 679: docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
1.3 casties 680: else:
1.69.2.2 casties 681: logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
1.44 casties 682: raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
1.10 casties 683:
1.69.2.2 casties 684: logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
1.3 casties 685: self.REQUEST.SESSION['docinfo'] = docinfo
686: return docinfo
1.69 abukhman 687:
1.44 casties 688: def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
1.3 casties 689: """returns pageinfo with the given parameters"""
690: pageinfo = {}
1.4 casties 691: current = getInt(current)
692: pageinfo['current'] = current
693: rows = int(rows or self.thumbrows)
694: pageinfo['rows'] = rows
695: cols = int(cols or self.thumbcols)
696: pageinfo['cols'] = cols
697: grpsize = cols * rows
698: pageinfo['groupsize'] = grpsize
1.28 casties 699: start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
700: # int(current / grpsize) * grpsize +1))
1.3 casties 701: pageinfo['start'] = start
1.4 casties 702: pageinfo['end'] = start + grpsize
1.44 casties 703: if (docinfo is not None) and ('numPages' in docinfo):
1.4 casties 704: np = int(docinfo['numPages'])
705: pageinfo['end'] = min(pageinfo['end'], np)
706: pageinfo['numgroups'] = int(np / grpsize)
707: if np % grpsize > 0:
1.69 abukhman 708: pageinfo['numgroups'] += 1
1.44 casties 709: pageinfo['viewMode'] = viewMode
710: pageinfo['tocMode'] = tocMode
1.45 abukhman 711: pageinfo['query'] = self.REQUEST.get('query',' ')
712: pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
713: pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
1.48 abukhman 714: pageinfo['textPN'] = self.REQUEST.get('textPN','1')
1.55 abukhman 715: pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
1.45 abukhman 716: pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
1.54 abukhman 717: pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
1.44 casties 718: pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
1.48 abukhman 719: toc = int (pageinfo['tocPN'])
720: pageinfo['textPages'] =int (toc)
721:
722: if 'tocSize_%s'%tocMode in docinfo:
723: tocSize = int(docinfo['tocSize_%s'%tocMode])
724: tocPageSize = int(pageinfo['tocPageSize'])
1.69 abukhman 725: # cached toc
1.48 abukhman 726: if tocSize%tocPageSize>0:
727: tocPages=tocSize/tocPageSize+1
728: else:
729: tocPages=tocSize/tocPageSize
1.69 abukhman 730: pageinfo['tocPN'] = min (tocPages,toc)
1.45 abukhman 731: pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
1.59 abukhman 732: pageinfo['sn'] =self.REQUEST.get('sn','')
1.3 casties 733: return pageinfo
734:
1.69 abukhman 735: def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
1.3 casties 736: """init document viewer"""
737: self.title=title
738: self.digilibBaseUrl = digilibBaseUrl
1.4 casties 739: self.thumbrows = thumbrows
740: self.thumbcols = thumbcols
1.8 casties 741: self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
1.3 casties 742: if RESPONSE is not None:
743: RESPONSE.redirect('manage_main')
1.1 dwinter 744:
745: def manage_AddDocumentViewerForm(self):
746: """add the viewer form"""
1.3 casties 747: pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1.1 dwinter 748: return pt()
749:
1.43 casties 750: def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
1.1 dwinter 751: """add the viewer"""
1.43 casties 752: newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
1.1 dwinter 753: self._setObject(id,newObj)
754:
755: if RESPONSE is not None:
756: RESPONSE.redirect('manage_main')
1.3 casties 757:
758: ## DocumentViewerTemplate class
759: class DocumentViewerTemplate(ZopePageTemplate):
760: """Template for document viewer"""
761: meta_type="DocumentViewer Template"
762:
763:
764: def manage_addDocumentViewerTemplateForm(self):
765: """Form for adding"""
766: pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
767: return pt()
768:
769: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
770: REQUEST=None, submit=None):
771: "Add a Page Template with optional file content."
772:
773: self._setObject(id, DocumentViewerTemplate(id))
774: ob = getattr(self, id)
1.23 dwinter 775: txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
776: logging.info("txt %s:"%txt)
777: ob.pt_edit(txt,"text/html")
1.3 casties 778: if title:
779: ob.pt_setTitle(title)
780: try:
781: u = self.DestinationURL()
782: except AttributeError:
783: u = REQUEST['URL1']
784:
785: u = "%s/%s" % (u, urllib.quote(id))
786: REQUEST.RESPONSE.redirect(u+'/manage_main')
787: return ''
788:
789:
1.14 casties 790:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>