documentViewer/documentViewer.py - diff

Return to documentViewer.py CVS log

Up to [Repository] / documentViewer

Diff for /documentViewer/documentViewer.py between versions 1.175 and 1.175.2.1

version 1.175, 2011/06/14 09:57:11	version 1.175.2.1, 2011/07/14 17:43:56
Line 7 from AccessControl import getSecurityMan	Line 7 from AccessControl import getSecurityMan
from Globals import package_home	from Globals import package_home
from Products.zogiLib.zogiLib import browserCheck	from Products.zogiLib.zogiLib import browserCheck

from Ft.Xml import EMPTY_NAMESPACE, Parse	#from Ft.Xml import EMPTY_NAMESPACE, Parse
import Ft.Xml.Domlette	#import Ft.Xml.Domlette

	import xml.etree.ElementTree as ET

import os.path	import os.path
import sys	import sys
import urllib	import urllib
Line 16 import urllib2	Line 19 import urllib2
import logging	import logging
import math	import math
import urlparse	import urlparse
import cStringIO
import re	import re
import string	import string

Line 32 def getInt(number, default=0):	Line 34 def getInt(number, default=0):
except:	except:
return int(default)	return int(default)

def getTextFromNode(nodename):	def getText(node):
"""get the cdata content of a node"""	"""get the cdata content of a node"""
if nodename is None:	if node is None:
return ""	return ""
nodelist=nodename.childNodes	# ET:
rc = ""	text = node.text or ""
for node in nodelist:	for e in node:
if node.nodeType == node.TEXT_NODE:	text += gettext(e)
rc = rc + node.data	if e.tail:
return rc	text += e.tail

	# 4Suite:
	#nodelist=node.childNodes
	#text = ""
	#for n in nodelist:
	# if n.nodeType == node.TEXT_NODE:
	# text = text + n.data

	return text

	getTextFromNode = getText

def serializeNode(node, encoding="utf-8"):	def serializeNode(node, encoding="utf-8"):
"""returns a string containing node as XML"""	"""returns a string containing node as XML"""
stream = cStringIO.StringIO()	s = ET.tostring(node)
#logging.debug("BUF: %s"%(stream))
Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)	# 4Suite:
s = stream.getvalue()	# stream = cStringIO.StringIO()
#logging.debug("BUF: %s"%(s))	# Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
stream.close()	# s = stream.getvalue()
	# stream.close()
return s	return s

def browserCheck(self):	def browserCheck(self):
Line 350 class documentViewer(Folder):	Line 364 class documentViewer(Folder):
pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)	pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)

if (docinfo.get('textURLPath',None)):	if (docinfo.get('textURLPath',None)):
page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo)	page = self.getTextPage(mode=viewMode, docinfo=docinfo, pageinfo=pageinfo)
pageinfo['textPage'] = page	pageinfo['textPage'] = page
tt = getattr(self, 'template')	tt = getattr(self, 'template')
pt = getattr(tt, 'viewer_main')	pt = getattr(tt, 'viewer_main')
Line 496 class documentViewer(Folder):	Line 510 class documentViewer(Folder):
if txt is None:	if txt is None:
raise IOError("Unable to get dir-info from %s"%(infoUrl))	raise IOError("Unable to get dir-info from %s"%(infoUrl))

dom = Parse(txt)	dom = ET.fromstring(txt)
sizes=dom.xpath("//dir/size")	#dom = Parse(txt)
logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)	size=getText(dom.find("size"))
	#sizes=dom.xpath("//dir/size")
	logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size)

if sizes:	if size:
docinfo['numPages'] = int(getTextFromNode(sizes[0]))	docinfo['numPages'] = int(size)
else:	else:
docinfo['numPages'] = 0	docinfo['numPages'] = 0

Line 546 class documentViewer(Folder):	Line 562 class documentViewer(Folder):
if txt is None:	if txt is None:
raise IOError("Unable to read index meta from %s"%(url))	raise IOError("Unable to read index meta from %s"%(url))

dom = Parse(txt)	dom = ET.fromstring(txt)
	#dom = Parse(txt)
return dom	return dom

def getPresentationInfoXML(self, url):	def getPresentationInfoXML(self, url):
Line 565 class documentViewer(Folder):	Line 582 class documentViewer(Folder):
if txt is None:	if txt is None:
raise IOError("Unable to read infoXMLfrom %s"%(url))	raise IOError("Unable to read infoXMLfrom %s"%(url))

dom = Parse(txt)	dom = ET.fromstring(txt)
	#dom = Parse(txt)
return dom	return dom


Line 583 class documentViewer(Folder):	Line 601 class documentViewer(Folder):
path=getParentDir(path)	path=getParentDir(path)
dom = self.getDomFromIndexMeta(path)	dom = self.getDomFromIndexMeta(path)

acctype = dom.xpath("//access-conditions/access/@type")	acc = dom.find(".//access-conditions/access")
if acctype and (len(acctype)>0):	if acc is not None:
access=acctype[0].value	acctype = acc.get('type')
	#acctype = dom.xpath("//access-conditions/access/@type")
	if acctype:
	access=acctype
if access in ['group', 'institution']:	if access in ['group', 'institution']:
access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()	access = dom.find(".//access-conditions/access/name").text.lower()

docinfo['accessType'] = access	docinfo['accessType'] = access
return docinfo	return docinfo
Line 609 class documentViewer(Folder):	Line 630 class documentViewer(Folder):

logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))	logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
# put in all raw bib fields as dict "bib"	# put in all raw bib fields as dict "bib"
bib = dom.xpath("//bib/*")	bib = dom.find(".//bib/*")
	#bib = dom.xpath("//bib/*")
if bib and len(bib)>0:	if bib and len(bib)>0:
bibinfo = {}	bibinfo = {}
for e in bib:	for e in bib:
Line 618 class documentViewer(Folder):	Line 640 class documentViewer(Folder):

# extract some fields (author, title, year) according to their mapping	# extract some fields (author, title, year) according to their mapping
metaData=self.metadata.main.meta.bib	metaData=self.metadata.main.meta.bib
bibtype=dom.xpath("//bib/@type")	bib = dom.find(".//bib")
if bibtype and (len(bibtype)>0):	bibtype=bib.get("type")
bibtype=bibtype[0].value	#bibtype=dom.xpath("//bib/@type")
else:	if not bibtype:
bibtype="generic"	bibtype="generic"

bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)	bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
Line 630 class documentViewer(Folder):	Line 652 class documentViewer(Folder):
logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))	logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))	logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
# if there is no mapping bibmap is empty (mapping sometimes has empty fields)	# if there is no mapping bibmap is empty (mapping sometimes has empty fields)
if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:	if len(bibmap) > 0 and len(bibmap['author'][0]) > 0 or len(bibmap['title'][0]) > 0:
try:	try:
docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])	docinfo['author']=getText(bib.find(bibmap['author'][0]))
except: pass	except: pass
try:	try:
docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])	docinfo['title']=getText(bib.find(bibmap['title'][0]))
except: pass	except: pass
try:	try:
docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])	docinfo['year']=getText(bib.find(bibmap['year'][0]))
except: pass	except: pass
logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
try:	# ROC: why is this here?
docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])	# logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
except:	# try:
docinfo['lang']=''	# docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0])
try:	# except:
docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0])	# docinfo['lang']=''
except:	# try:
docinfo['city']=''	# docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0])
try:	# except:
docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0])	# docinfo['city']=''
except:	# try:
docinfo['number_of_pages']=''	# docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0])
try:	# except:
docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0])	# docinfo['number_of_pages']=''
except:	# try:
docinfo['series_volume']=''	# docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0])
try:	# except:
docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0])	# docinfo['series_volume']=''
except:	# try:
docinfo['number_of_volumes']=''	# docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0])
try:	# except:
docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0])	# docinfo['number_of_volumes']=''
except:	# try:
docinfo['translator']=''	# docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0])
try:	# except:
docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0])	# docinfo['translator']=''
except:	# try:
docinfo['edition']=''	# docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0])
try:	# except:
docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0])	# docinfo['edition']=''
except:	# try:
docinfo['series_author']=''	# docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0])
try:	# except:
docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0])	# docinfo['series_author']=''
except:	# try:
docinfo['publisher']=''	# docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0])
try:	# except:
docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0])	# docinfo['publisher']=''
except:	# try:
docinfo['series_title']=''	# docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0])
try:	# except:
docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0])	# docinfo['series_title']=''
except:	# try:
docinfo['isbn_issn']=''	# docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0])
	# except:
	# docinfo['isbn_issn']=''
return docinfo	return docinfo


Line 698 class documentViewer(Folder):	Line 722 class documentViewer(Folder):
path=getParentDir(path)	path=getParentDir(path)
dom = self.getDomFromIndexMeta(path)	dom = self.getDomFromIndexMeta(path)

docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0])	docinfo['name']=getText(dom.find("name"))
logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])	logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
return docinfo	return docinfo

Line 715 class documentViewer(Folder):	Line 739 class documentViewer(Folder):
archivePath = None	archivePath = None
archiveName = None	archiveName = None

archiveNames = dom.xpath("//resource/name")	archiveName = getTextFromNode(dom.find("name"))
if archiveNames and (len(archiveNames) > 0):	if not archiveName:
archiveName = getTextFromNode(archiveNames[0])
else:
logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))	logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))

archivePaths = dom.xpath("//resource/archive-path")	archivePath = getTextFromNode(dom.find("archive-path"))
if archivePaths and (len(archivePaths) > 0):	if archivePath:
archivePath = getTextFromNode(archivePaths[0])
# clean up archive path	# clean up archive path
if archivePath[0] != '/':	if archivePath[0] != '/':
archivePath = '/' + archivePath	archivePath = '/' + archivePath
Line 739 class documentViewer(Folder):	Line 760 class documentViewer(Folder):
# we balk without archive-path	# we balk without archive-path
raise IOError("Missing archive-path (for text-tool) in %s" % (url))	raise IOError("Missing archive-path (for text-tool) in %s" % (url))

imageDirs = dom.xpath("//texttool/image")	imageDir = getText(dom.find(".//texttool/image"))
if imageDirs and (len(imageDirs) > 0):
imageDir = getTextFromNode(imageDirs[0])

else:	if not imageDir:
# we balk with no image tag / not necessary anymore because textmode is now standard	# we balk with no image tag / not necessary anymore because textmode is now standard
#raise IOError("No text-tool info in %s"%(url))	#raise IOError("No text-tool info in %s"%(url))
imageDir = ""	imageDir = ""
Line 760 class documentViewer(Folder):	Line 779 class documentViewer(Folder):

docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir	docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir

viewerUrls = dom.xpath("//texttool/digiliburlprefix")	viewerUrl = getText(dom.find(".//texttool/digiliburlprefix"))
if viewerUrls and (len(viewerUrls) > 0):	if viewerUrl:
viewerUrl = getTextFromNode(viewerUrls[0])
docinfo['viewerURL'] = viewerUrl	docinfo['viewerURL'] = viewerUrl

# old style text URL	# old style text URL
textUrls = dom.xpath("//texttool/text")	textUrl = getText(dom.find(".//texttool/text"))
if textUrls and (len(textUrls) > 0):	if textUrl:
textUrl = getTextFromNode(textUrls[0])
if urlparse.urlparse(textUrl)[0] == "": #keine url	if urlparse.urlparse(textUrl)[0] == "": #keine url
textUrl = os.path.join(archivePath, textUrl)	textUrl = os.path.join(archivePath, textUrl)
# fix URLs starting with /mpiwg/online	# fix URLs starting with /mpiwg/online
Line 778 class documentViewer(Folder):	Line 795 class documentViewer(Folder):
docinfo['textURL'] = textUrl	docinfo['textURL'] = textUrl

# new style text-url-path	# new style text-url-path
textUrls = dom.xpath("//texttool/text-url-path")	textUrl = getText(dom.find(".//texttool/text-url-path"))
if textUrls and (len(textUrls) > 0):	if textUrl:
textUrl = getTextFromNode(textUrls[0])
docinfo['textURLPath'] = textUrl	docinfo['textURLPath'] = textUrl
textUrlkurz = string.split(textUrl, ".")[0]	textUrlkurz = string.split(textUrl, ".")[0]
docinfo['textURLPathkurz'] = textUrlkurz	docinfo['textURLPathkurz'] = textUrlkurz
Line 789 class documentViewer(Folder):	Line 805 class documentViewer(Folder):
#docinfo = self.getNumTextPages(docinfo)	#docinfo = self.getNumTextPages(docinfo)


presentationUrls = dom.xpath("//texttool/presentation")	presentationUrl = getText(dom.find(".//texttool/presentation"))
docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag	docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)	docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)


if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen	if presentationUrl: # ueberschreibe diese durch presentation informationen
# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten	# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
# durch den relativen Pfad auf die presentation infos	# durch den relativen Pfad auf die presentation infos
presentationPath = getTextFromNode(presentationUrls[0])	presentationPath = presentationUrl
if url.endswith("index.meta"):	if url.endswith("index.meta"):
presentationUrl = url.replace('index.meta', presentationPath)	presentationUrl = url.replace('index.meta', presentationPath)
else:	else:
Line 815 class documentViewer(Folder):	Line 831 class documentViewer(Folder):
"""	"""
dom=self.getPresentationInfoXML(url)	dom=self.getPresentationInfoXML(url)
try:	try:
docinfo['author']=getTextFromNode(dom.xpath("//author")[0])	docinfo['author']=getText(dom.find(".//author"))
except:	except:
pass	pass
try:	try:
docinfo['title']=getTextFromNode(dom.xpath("//title")[0])	docinfo['title']=getText(dom.find(".//title"))
except:	except:
pass	pass
try:	try:
docinfo['year']=getTextFromNode(dom.xpath("//date")[0])	docinfo['year']=getText(dom.find(".//date"))
except:	except:
pass	pass
return docinfo	return docinfo

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.175
changed lines
	Added in v.1.175.2.1