documentViewer/documentViewer.py - diff

Return to documentViewer.py CVS log

Up to [Repository] / documentViewer

Diff for /documentViewer/documentViewer.py between versions 1.175 and 1.175.2.3

version 1.175, 2011/06/14 09:57:11	version 1.175.2.3, 2011/07/15 19:34:41
Line 7 from AccessControl import getSecurityMan	Line 7 from AccessControl import getSecurityMan
from Globals import package_home	from Globals import package_home
from Products.zogiLib.zogiLib import browserCheck	from Products.zogiLib.zogiLib import browserCheck

from Ft.Xml import EMPTY_NAMESPACE, Parse	#from Ft.Xml import EMPTY_NAMESPACE, Parse
import Ft.Xml.Domlette	#import Ft.Xml.Domlette

	import xml.etree.ElementTree as ET

import os.path	import os.path
import sys	import sys
import urllib	import urllib
Line 16 import urllib2	Line 19 import urllib2
import logging	import logging
import math	import math
import urlparse	import urlparse
import cStringIO
import re	import re
import string	import string

Line 32 def getInt(number, default=0):	Line 34 def getInt(number, default=0):
except:	except:
return int(default)	return int(default)

def getTextFromNode(nodename):	def getText(node):
"""get the cdata content of a node"""	"""get the cdata content of a node"""
if nodename is None:	if node is None:
return ""	return ""
nodelist=nodename.childNodes	# ET:
rc = ""	text = node.text or ""
for node in nodelist:	for e in node:
if node.nodeType == node.TEXT_NODE:	text += gettext(e)
rc = rc + node.data	if e.tail:
return rc	text += e.tail

	# 4Suite:
	#nodelist=node.childNodes
	#text = ""
	#for n in nodelist:
	# if n.nodeType == node.TEXT_NODE:
	# text = text + n.data

	return text

	getTextFromNode = getText

def serializeNode(node, encoding="utf-8"):	def serializeNode(node, encoding="utf-8"):
"""returns a string containing node as XML"""	"""returns a string containing node as XML"""
stream = cStringIO.StringIO()	s = ET.tostring(node)
#logging.debug("BUF: %s"%(stream))
Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)	# 4Suite:
s = stream.getvalue()	# stream = cStringIO.StringIO()
#logging.debug("BUF: %s"%(s))	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
stream.close()	# s = stream.getvalue()
	# stream.close()
return s	return s

def browserCheck(self):	def browserCheck(self):
Line 341 class documentViewer(Folder):	Line 355 class documentViewer(Folder):
# get table of contents	# get table of contents
docinfo = self.getToc(mode=tocMode, docinfo=docinfo)	docinfo = self.getToc(mode=tocMode, docinfo=docinfo)

if viewMode=="auto": # automodus gewaehlt	# auto viewMode: text_dict if text else images
if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert	if viewMode=="auto":
	if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
	#texturl gesetzt und textViewer konfiguriert
viewMode="text_dict"	viewMode="text_dict"
else:	else:
viewMode="images"	viewMode="images"

pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)

if (docinfo.get('textURLPath',None)):	if viewMode != 'images' and docinfo.get('textURLPath', None):
page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)	# get full text page
	page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
pageinfo['textPage'] = page	pageinfo['textPage'] = page
tt = getattr(self, 'template')
pt = getattr(tt, 'viewer_main')	# get template /template/viewer_main
	pt = getattr(self.template, 'viewer_main')
	# and execute with parameters
return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))	return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))

def generateMarks(self,mk):	def generateMarks(self,mk):
Line 485 class documentViewer(Folder):	Line 504 class documentViewer(Folder):
docinfo = {}	docinfo = {}

for x in range(cut):	for x in range(cut):

path=getParentDir(path)	path=getParentDir(path)

infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path	infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
Line 496 class documentViewer(Folder):	Line 514 class documentViewer(Folder):
if txt is None:	if txt is None:
raise IOError("Unable to get dir-info from %s"%(infoUrl))	raise IOError("Unable to get dir-info from %s"%(infoUrl))

dom = Parse(txt)	dom = ET.fromstring(txt)
sizes=dom.xpath("//dir/size")	#dom = Parse(txt)
logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)	size=getText(dom.find("size"))
	#sizes=dom.xpath("//dir/size")
	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)

if sizes:	if size:
docinfo['numPages'] = int(getTextFromNode(sizes[0]))	docinfo['numPages'] = int(size)
else:	else:
docinfo['numPages'] = 0	docinfo['numPages'] = 0

Line 546 class documentViewer(Folder):	Line 566 class documentViewer(Folder):
if txt is None:	if txt is None:
raise IOError("Unable to read index meta from %s"%(url))	raise IOError("Unable to read index meta from %s"%(url))

dom = Parse(txt)	dom = ET.fromstring(txt)
	#dom = Parse(txt)
return dom	return dom

def getPresentationInfoXML(self, url):	def getPresentationInfoXML(self, url):
Line 565 class documentViewer(Folder):	Line 586 class documentViewer(Folder):
if txt is None:	if txt is None:
raise IOError("Unable to read infoXMLfrom %s"%(url))	raise IOError("Unable to read infoXMLfrom %s"%(url))

dom = Parse(txt)	dom = ET.fromstring(txt)
	#dom = Parse(txt)
return dom	return dom


Line 583 class documentViewer(Folder):	Line 605 class documentViewer(Folder):
path=getParentDir(path)	path=getParentDir(path)
dom = self.getDomFromIndexMeta(path)	dom = self.getDomFromIndexMeta(path)

acctype = dom.xpath("//access-conditions/access/@type")	acc = dom.find(".//access-conditions/access")
if acctype and (len(acctype)>0):	if acc is not None:
access=acctype[0].value	acctype = acc.get('type')
	#acctype = dom.xpath("//access-conditions/access/@type")
	if acctype:
	access=acctype
if access in ['group', 'institution']:	if access in ['group', 'institution']:
access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()	access = dom.find(".//access-conditions/access/name").text.lower()

docinfo['accessType'] = access	docinfo['accessType'] = access
return docinfo	return docinfo
Line 609 class documentViewer(Folder):	Line 634 class documentViewer(Folder):

logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
# put in all raw bib fields as dict "bib"	# put in all raw bib fields as dict "bib"
bib = dom.xpath("//bib/*")	bib = dom.find(".//bib")
if bib and len(bib)>0:	#bib = dom.xpath("//bib/*")
	if bib is not None:
bibinfo = {}	bibinfo = {}
for e in bib:	for e in bib:
bibinfo[e.localName] = getTextFromNode(e)	bibinfo[e.tag] = getText(e)

docinfo['bib'] = bibinfo	docinfo['bib'] = bibinfo

# extract some fields (author, title, year) according to their mapping	# extract some fields (author, title, year) according to their mapping
metaData=self.metadata.main.meta.bib	metaData=self.metadata.main.meta.bib
bibtype=dom.xpath("//bib/@type")	bibtype=bib.get("type")
if bibtype and (len(bibtype)>0):	#bibtype=dom.xpath("//bib/@type")
bibtype=bibtype[0].value	if not bibtype:
else:
bibtype="generic"	bibtype="generic"

bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
Line 630 class documentViewer(Folder):	Line 656 class documentViewer(Folder):
logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
# if there is no mapping bibmap is empty (mapping sometimes has empty fields)	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0 or len(bibmap['title'][0]) > 0:
try:	try:
docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])	docinfo['author']=getText(bib.find(bibmap['author'][0]))
except: pass	except: pass
try:	try:
docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])	docinfo['title']=getText(bib.find(bibmap['title'][0]))
except: pass	except: pass
try:	try:
docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])	docinfo['year']=getText(bib.find(bibmap['year'][0]))
except: pass	except: pass
logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
try:	# ROC: why is this here?
docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])	# logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
except:	# try:
docinfo['lang']=''	# docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0])
try:	# except:
docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0])	# docinfo['lang']=''
except:	# try:
docinfo['city']=''	# docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0])
try:	# except:
docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0])	# docinfo['city']=''
except:	# try:
docinfo['number_of_pages']=''	# docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0])
try:	# except:
docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0])	# docinfo['number_of_pages']=''
except:	# try:
docinfo['series_volume']=''	# docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0])
try:	# except:
docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0])	# docinfo['series_volume']=''
except:	# try:
docinfo['number_of_volumes']=''	# docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0])
try:	# except:
docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0])	# docinfo['number_of_volumes']=''
except:	# try:
docinfo['translator']=''	# docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0])
try:	# except:
docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0])	# docinfo['translator']=''
except:	# try:
docinfo['edition']=''	# docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0])
try:	# except:
docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0])	# docinfo['edition']=''
except:	# try:
docinfo['series_author']=''	# docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0])
try:	# except:
docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0])	# docinfo['series_author']=''
except:	# try:
docinfo['publisher']=''	# docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0])
try:	# except:
docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0])	# docinfo['publisher']=''
except:	# try:
docinfo['series_title']=''	# docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0])
try:	# except:
docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0])	# docinfo['series_title']=''
except:	# try:
docinfo['isbn_issn']=''	# docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0])
	# except:
	# docinfo['isbn_issn']=''
return docinfo	return docinfo


	# TODO: is this needed?
def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):	def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
"""gets name info from the index.meta file at path or given by dom"""	"""gets name info from the index.meta file at path or given by dom"""
if docinfo is None:	if docinfo is None:
Line 698 class documentViewer(Folder):	Line 727 class documentViewer(Folder):
path=getParentDir(path)	path=getParentDir(path)
dom = self.getDomFromIndexMeta(path)	dom = self.getDomFromIndexMeta(path)

docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])	docinfo['name']=getText(dom.find("name"))
logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
return docinfo	return docinfo

Line 715 class documentViewer(Folder):	Line 744 class documentViewer(Folder):
archivePath = None	archivePath = None
archiveName = None	archiveName = None

archiveNames = dom.xpath("//resource/name")	archiveName = getText(dom.find("name"))
if archiveNames and (len(archiveNames) > 0):	if not archiveName:
archiveName = getTextFromNode(archiveNames[0])
else:
logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))

archivePaths = dom.xpath("//resource/archive-path")	archivePath = getText(dom.find("archive-path"))
if archivePaths and (len(archivePaths) > 0):	if archivePath:
archivePath = getTextFromNode(archivePaths[0])
# clean up archive path	# clean up archive path
if archivePath[0] != '/':	if archivePath[0] != '/':
archivePath = '/' + archivePath	archivePath = '/' + archivePath
Line 739 class documentViewer(Folder):	Line 765 class documentViewer(Folder):
# we balk without archive-path	# we balk without archive-path
raise IOError("Missing archive-path (for text-tool) in %s" % (url))	raise IOError("Missing archive-path (for text-tool) in %s" % (url))

imageDirs = dom.xpath("//texttool/image")	imageDir = getText(dom.find(".//texttool/image"))
if imageDirs and (len(imageDirs) > 0):
imageDir = getTextFromNode(imageDirs[0])

else:	if not imageDir:
# we balk with no image tag / not necessary anymore because textmode is now standard	# we balk with no image tag / not necessary anymore because textmode is now standard
#raise IOError("No text-tool info in %s"%(url))	#raise IOError("No text-tool info in %s"%(url))
imageDir = ""	imageDir = ""
Line 760 class documentViewer(Folder):	Line 784 class documentViewer(Folder):

docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir

viewerUrls = dom.xpath("//texttool/digiliburlprefix")	viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))
if viewerUrls and (len(viewerUrls) > 0):	if viewerUrl:
viewerUrl = getTextFromNode(viewerUrls[0])
docinfo['viewerURL'] = viewerUrl	docinfo['viewerURL'] = viewerUrl

# old style text URL	# old style text URL
textUrls = dom.xpath("//texttool/text")	textUrl = getText(dom.find(".//texttool/text"))
if textUrls and (len(textUrls) > 0):	if textUrl:
textUrl = getTextFromNode(textUrls[0])
if urlparse.urlparse(textUrl)[0] == "": #keine url	if urlparse.urlparse(textUrl)[0] == "": #keine url
textUrl = os.path.join(archivePath, textUrl)	textUrl = os.path.join(archivePath, textUrl)
# fix URLs starting with /mpiwg/online	# fix URLs starting with /mpiwg/online
Line 778 class documentViewer(Folder):	Line 800 class documentViewer(Folder):
docinfo['textURL'] = textUrl	docinfo['textURL'] = textUrl

# new style text-url-path	# new style text-url-path
textUrls = dom.xpath("//texttool/text-url-path")	textUrl = getText(dom.find(".//texttool/text-url-path"))
if textUrls and (len(textUrls) > 0):	if textUrl:
textUrl = getTextFromNode(textUrls[0])
docinfo['textURLPath'] = textUrl	docinfo['textURLPath'] = textUrl
textUrlkurz = string.split(textUrl, ".")[0]	textUrlkurz = string.split(textUrl, ".")[0]
docinfo['textURLPathkurz'] = textUrlkurz	docinfo['textURLPathkurz'] = textUrlkurz
Line 789 class documentViewer(Folder):	Line 810 class documentViewer(Folder):
#docinfo = self.getNumTextPages(docinfo)	#docinfo = self.getNumTextPages(docinfo)


presentationUrls = dom.xpath("//texttool/presentation")	presentationUrl = getText(dom.find(".//texttool/presentation"))
docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
	# TODO: is this needed here?
docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)


if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen	if presentationUrl: # ueberschreibe diese durch presentation informationen
# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
# durch den relativen Pfad auf die presentation infos	# durch den relativen Pfad auf die presentation infos
presentationPath = getTextFromNode(presentationUrls[0])	presentationPath = presentationUrl
if url.endswith("index.meta"):	if url.endswith("index.meta"):
presentationUrl = url.replace('index.meta', presentationPath)	presentationUrl = url.replace('index.meta', presentationPath)
else:	else:
Line 814 class documentViewer(Folder):	Line 836 class documentViewer(Folder):
"""gets the bibliographical information from the preseantion entry in texttools	"""gets the bibliographical information from the preseantion entry in texttools
"""	"""
dom=self.getPresentationInfoXML(url)	dom=self.getPresentationInfoXML(url)
try:	docinfo['author']=getText(dom.find(".//author"))
docinfo['author']=getTextFromNode(dom.xpath("//author")[0])	docinfo['title']=getText(dom.find(".//title"))
except:	docinfo['year']=getText(dom.find(".//date"))
pass
try:
docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
except:
pass
try:
docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
except:
pass
return docinfo	return docinfo

def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):	def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
Line 858 class documentViewer(Folder):	Line 871 class documentViewer(Folder):
docinfo = self.REQUEST.SESSION['docinfo']	docinfo = self.REQUEST.SESSION['docinfo']
# check if its still current	# check if its still current
if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:	if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)	logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
return docinfo	return docinfo

# new docinfo	# new docinfo
docinfo = {'mode': mode, 'url': url}	docinfo = {'mode': mode, 'url': url}
if mode=="texttool": #index.meta with texttool information	# add self url
	docinfo['viewerUrl'] = self.getDocumentViewerURL()
	if mode=="texttool":
	# index.meta with texttool information
docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)	docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
elif mode=="imagepath":	elif mode=="imagepath":
	# folder with images, index.meta optional
docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
elif mode=="filepath":	elif mode=="filepath":
	# filename
docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)	docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
else:	else:
logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)	logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.175
changed lines
	Added in v.1.175.2.3