# HG changeset patch
# User casties
# Date 1310758481 -7200
# Node ID 0a53fea83df734daeb10daadf1050b06751ad142
# Parent 73e3273c76248a1b7127e0815b0f5d9745077f3f
more work renovating
diff -r 73e3273c7624 -r 0a53fea83df7 MpdlXmlTextServer.py
--- a/MpdlXmlTextServer.py Fri Jul 15 11:02:26 2011 +0200
+++ b/MpdlXmlTextServer.py Fri Jul 15 21:34:41 2011 +0200
@@ -1,6 +1,7 @@
from OFS.SimpleItem import SimpleItem
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
+
from Ft.Xml import EMPTY_NAMESPACE, Parse
from Ft.Xml.Domlette import NonvalidatingReader
import Ft.Xml.Domlette
@@ -8,13 +9,19 @@
import xml.etree.ElementTree as ET
-import md5
-import sys
+import re
import logging
import urllib
import documentViewer
#from documentViewer import getTextFromNode, serializeNode
+def intOr0(s, default=0):
+ """convert s to int or return default"""
+ try:
+ return int(s)
+ except:
+ return default
+
def getText(node):
"""get the cdata content of a node"""
if node is None:
@@ -44,11 +51,11 @@
if node is None:
return ""
# ET:
- #text = node.text or ""
- #for e in node:
- # text += gettext(e)
- # if e.tail:
- # text += e.tail
+# text = node.text or ""
+# for e in node:
+# text += gettext(e)
+# if e.tail:
+# text += e.tail
# 4Suite:
nodelist=node.childNodes
@@ -82,8 +89,7 @@
manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals())
- def __init__(self,id,title="",serverUrl="http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40):
- #def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/", serverName=None, timeout=40):
+ def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40):
"""constructor"""
self.id=id
@@ -103,8 +109,10 @@
url = self.serverUrl+method
return documentViewer.getHttpData(url,data,timeout=self.timeout)
+ # WTF: what does this really do? can it be integrated in getPage?
def getSearch(self, pageinfo=None, docinfo=None):
"""get search list"""
+ logging.debug("getSearch()")
docpath = docinfo['textURLPath']
url = docinfo['url']
pagesize = pageinfo['queryPageSize']
@@ -207,12 +215,12 @@
hrefList=[]
myList= ""
text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn))
- dom = Parse(text)
- result = dom.xpath("//result/resultPage/place")
+ dom = ET.fromstring(text)
+ result = dom.findall(".//result/resultPage/place")
for l in result:
- hrefNode= l.getAttributeNodeNS(None, u"id")
- href= hrefNode.nodeValue
+ href = l.get("id")
hrefList.append(href)
+ # WTF: what does this do?
myList = ",".join(hrefList)
#logging.debug("getGisPlaces :%s"%(myList))
return myList
@@ -227,178 +235,125 @@
hrefList=[]
myList=""
text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath))
- dom =Parse(text)
- result = dom.xpath("//result/resultPage/place")
+ dom = ET.fromstring(text)
+ result = dom.findall(".//result/resultPage/place")
for l in result:
- hrefNode = l.getAttributeNodeNS(None, u"id")
- href= hrefNode.nodeValue
+ href = l.get("id")
hrefList.append(href)
+ # WTF: what does this do?
myList = ",".join(hrefList)
#logging.debug("getALLGisPlaces :%s"%(myList))
return myList
+ def processPageInfo(self, dom, docinfo, pageinfo):
+ """processes page info divs from dom and stores in docinfo and pageinfo"""
+ # process all toplevel divs
+ alldivs = dom.findall(".//div")
+ pagediv = None
+ for div in alldivs:
+ dc = div.get('class')
+
+ # page content div
+ if dc == 'pageContent':
+ pagediv = div
+
+ # pageNumberOrig
+ elif dc == 'pageNumberOrig':
+ pageinfo['pageNumberOrig'] = div.text
+
+ # pageNumberOrigNorm
+ elif dc == 'pageNumberOrigNorm':
+ pageinfo['pageNumberOrigNorm'] = div.text
+
+ # pageNumberOrigNorm
+ elif dc == 'countFigureEntries':
+ docinfo['countFigureEntries'] = intOr0(div.text)
+
+ # pageNumberOrigNorm
+ elif dc == 'countTocEntries':
+ # WTF: s1 = int(s)/30+1
+ docinfo['countTocEntries'] = intOr0(div.text)
+
+ # numTextPages
+ elif dc == 'countPages':
+ np = intOr0(div.text)
+ if np > 0:
+ docinfo['numTextPages'] = np
+ if docinfo.get('numPages', 0) == 0:
+ # seems to be text-only
+ docinfo['numTextPages'] = np
+ pageinfo['end'] = min(pageinfo['end'], np)
+ pageinfo['numgroups'] = int(np / pageinfo['groupsize'])
+ if np % pageinfo['groupsize'] > 0:
+ pageinfo['numgroups'] += 1
+
+ return
+
def getTextPage(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None):
"""returns single page from fulltext"""
+ logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn))
+ # check for cached text -- but this shouldn't be called twice
+ if pageinfo.has_key('textPage'):
+ logging.debug("getTextPage: using cached text")
+ return pageinfo['textPage']
+
docpath = docinfo['textURLPath']
- path = docinfo['textURLPath']
- url = docinfo.get('url',None)
- name = docinfo.get('name',None)
- pn =pageinfo['current']
- sn = pageinfo['sn']
- #optionToggle =pageinfo ['optionToggle']
- highlightQuery = pageinfo['highlightQuery']
- #mode = pageinfo ['viewMode']
- tocMode = pageinfo['tocMode']
- characterNormalization=pageinfo['characterNormalization']
- tocPN = pageinfo['tocPN']
- selfurl = self.absolute_url()
+ # just checking
+ if pageinfo['current'] != pn:
+ logging.warning("getTextPage: current!=pn!")
+
+ # stuff for constructing full urls
+ url = docinfo['url']
+ urlmode = docinfo['mode']
+ sn = pageinfo.get('sn', None)
+ highlightQuery = pageinfo.get('highlightQuery', None)
+ tocMode = pageinfo.get('tocMode', None)
+ tocPN = pageinfo.get('tocPN',None)
+ characterNormalization = pageinfo.get('characterNormalization', None)
+ selfurl = docinfo['viewerUrl']
+
if mode == "text_dict":
+ # text_dict is called textPollux in the backend
textmode = "textPollux"
else:
textmode = mode
textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization)
- if highlightQuery is not None:
+ if highlightQuery:
textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn)
+ # fetch the page
pagexml = self.getServerData("page-fragment.xql",textParam)
dom = ET.fromstring(pagexml)
- #dom = NonvalidatingReader.parseStream(pagexml)
-
- #original Pages
- #pagedivs = dom.xpath("//div[@class='pageNumberOrig']")
-
- """if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"):
- if len(pagedivs)>0:
- docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0])
- logging.debug("ORIGINAL PAGE: %s"%(docinfo['pageNumberOrig']))
-
- #original Pages Norm
- pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']")
- if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"):
- if len(pagedivs)>0:
- docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0])
- logging.debug("ORIGINAL PAGE NORM: %s"%(docinfo['pageNumberOrigNorm']))
- """
- #figureEntries
-# pagedivs = dom.xpath("//div[@class='countFigureEntries']")
-# if pagedivs == dom.xpath("//div[@class='countFigureEntries']"):
-# if len(pagedivs)>0:
-# docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0])
-# s = getTextFromNode(pagedivs[0])
-# if s=='0':
-# try:
-# docinfo['countFigureEntries'] = int(s)
-# except:
-# docinfo['countFigureEntries'] = 0
-# else:
-# s1 = int(s)/30+1
-# try:
-# docinfo['countFigureEntries'] = int(s1)
-# except:
-# docinfo['countFigureEntries'] = 0
-#
-# #allPlaces
-# pagedivs = dom.xpath("//div[@class='countPlaces']")
-# if pagedivs == dom.xpath("//div[@class='countPlaces']"):
-# if len(pagedivs)>0:
-# docinfo['countPlaces']= getTextFromNode(pagedivs[0])
-# s = getTextFromNode(pagedivs[0])
-# try:
-# docinfo['countPlaces'] = int(s)
-# except:
-# docinfo['countPlaces'] = 0
-#
-# #tocEntries
-# pagedivs = dom.xpath("//div[@class='countTocEntries']")
-# if pagedivs == dom.xpath("//div[@class='countTocEntries']"):
-# if len(pagedivs)>0:
-# docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0]))
-# s = getTextFromNode(pagedivs[0])
-# if s=='0':
-# try:
-# docinfo['countTocEntries'] = int(s)
-# except:
-# docinfo['countTocEntries'] = 0
-# else:
-# s1 = int(s)/30+1
-# try:
-# docinfo['countTocEntries'] = int(s1)
-# except:
-# docinfo['countTocEntries'] = 0
-
- #numTextPages
- #pagedivs = dom.xpath("//div[@class='countPages']")
+ # extract additional info
+ self.processPageInfo(dom, docinfo, pageinfo)
+ # page content is in
+ pagediv = None
+ # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent']
alldivs = dom.findall(".//div")
- pagediv = None
for div in alldivs:
dc = div.get('class')
+ # page content div
if dc == 'pageContent':
pagediv = div
-
- if dc == 'countPages':
- try:
- np = int(div.text)
- docinfo['numPages'] = np
- pageinfo['end'] = min(pageinfo['end'], np)
- pageinfo['numgroups'] = int(np / pageinfo['groupsize'])
- if np % pageinfo['groupsize'] > 0:
- pageinfo['numgroups'] += 1
-
- except:
- docinfo['numPages'] = 0
-
break
-
-# ROC: why?
-# else:
-# #no full text -- init to 0
-# docinfo['pageNumberOrig'] = 0
-# docinfo['countFigureEntries'] = 0
-# docinfo['countPlaces'] = 0
-# docinfo['countTocEntries'] = 0
-# docinfo['numPages'] = 0
-# docinfo['pageNumberOrigNorm'] = 0
-# #return docinfo
# plain text mode
if mode == "text":
- #pagedivs = dom.xpath("/div")
if pagediv:
links = pagediv.findall(".//a")
for l in links:
href = l.get('href')
if href and href.startswith('#note-'):
- href = href.replace('#note-',"?url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn))
+ href = href.replace('#note-',"?mode=%s&url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn))
l.set('href', href)
- logging.debug("page=%s"%ET.tostring(pagediv, 'UTF-8'))
- return serialize(pagediv)
-
- if mode == "xml":
- if pagediv:
+
return serialize(pagediv)
- if mode == "pureXml":
- if pagediv:
- return serialize(pagediv)
-
- if mode == "gis":
- if pagediv:
- # check all a-tags
- links = pagediv.findall(".//a")
- for l in links:
- href = l.get('href')
- if href:
- if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'):
- l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name))
- l.set('target', '_blank')
-
- return serialize(pagenode)
-
# text-with-links mode
- if mode == "text_dict":
+ elif mode == "text_dict":
if pagediv:
# check all a-tags
links = pagediv.findall(".//a")
@@ -423,58 +378,80 @@
l.set('ondblclick', 'popupWin.focus();')
if href.startswith('#note-'):
- l.set('href', href.replace('#note-',"?url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn)))
+ l.set('href', href.replace('#note-',"?mode=%s&url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn)))
return serialize(pagediv)
+ # xml mode
+ elif mode == "xml":
+ if pagediv:
+ return serialize(pagediv)
+
+ # pureXml mode
+ elif mode == "pureXml":
+ if pagediv:
+ return serialize(pagediv)
+
+ # gis mode
+ elif mode == "gis":
+ name = docinfo['name']
+ if pagediv:
+ # check all a-tags
+ links = pagediv.findall(".//a")
+ for l in links:
+ href = l.get('href')
+ if href:
+ if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'):
+ l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name))
+ l.set('target', '_blank')
+
+ return serialize(pagediv)
+
return "no text here"
+ # WTF: is this needed?
def getOrigPages(self, docinfo=None, pageinfo=None):
- docpath = docinfo['textURLPath']
- pn =pageinfo['current']
- selfurl = self.absolute_url()
- pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn))
- dom = Parse(pagexml)
- pagedivs = dom.xpath("//div[@class='pageNumberOrig']")
- if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"):
- if len(pagedivs)>0:
- docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0])
- return docinfo['pageNumberOrig']
+ logging.debug("CALLED: getOrigPages!")
+ if not pageinfo.has_key('pageNumberOrig'):
+ logging.warning("getOrigPages: not in pageinfo!")
+ return None
+
+ return pageinfo['pageNumberOrig']
+ # WTF: is this needed?
def getOrigPagesNorm(self, docinfo=None, pageinfo=None):
- docpath = docinfo['textURLPath']
- pn =pageinfo['current']
- selfurl = self.absolute_url()
- pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn))
- dom = Parse(pagexml)
- pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']")
- if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"):
- if len(pagedivs)>0:
- docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0])
- return docinfo['pageNumberOrigNorm']
-
+ logging.debug("CALLED: getOrigPagesNorm!")
+ if not pageinfo.has_key('pageNumberOrigNorm'):
+ logging.warning("getOrigPagesNorm: not in pageinfo!")
+ return None
+
+ return pageinfo['pageNumberOrigNorm']
+ # TODO: should be getWordInfo
def getTranslate(self, word=None, language=None):
"""translate into another languages"""
data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&output=html")
- #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
return data
+ # WTF: what does this do?
def getLemma(self, lemma=None, language=None):
"""simular words lemma """
data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html")
return data
+ # WTF: what does this do?
def getLemmaQuery(self, query=None, language=None):
"""simular words lemma """
data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html")
return data
+ # WTF: what does this do?
def getLex(self, query=None, language=None):
#simular words lemma
data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query))
return data
-
+
+ # WTF: what does this do?
def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
#number of
docpath = docinfo['textURLPath']
@@ -493,9 +470,11 @@
return tc
def getToc(self, mode="text", docinfo=None):
- """loads table of contents and stores in docinfo"""
+ """loads table of contents and stores XML in docinfo"""
+ logging.debug("getToc mode=%s"%mode)
if mode == "none":
- return docinfo
+ return docinfo
+
if 'tocSize_%s'%mode in docinfo:
# cached toc
return docinfo
@@ -511,44 +490,87 @@
# number of entries in toc
tocSize = 0
tocDiv = None
-
+ # fetch full toc
pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
-
- # post-processing downloaded xml
- pagedom = Parse(pagexml)
- # get number of entries
- numdivs = pagedom.xpath("//div[@class='queryResultHits']")
- if len(numdivs) > 0:
- tocSize = int(getTextFromNode(numdivs[0]))
- docinfo['tocSize_%s'%mode] = tocSize
+ dom = ET.fromstring(pagexml)
+ # page content is in
+ pagediv = None
+ # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage']
+ alldivs = dom.findall("div")
+ for div in alldivs:
+ dc = div.get('class')
+ # page content div
+ if dc == 'queryResultPage':
+ pagediv = div
+
+ elif dc == 'queryResultHits':
+ docinfo['tocSize_%s'%mode] = intOr0(div.text)
+
+ if pagediv:
+# # split xml in chunks
+# tocs = []
+# tocdivs = pagediv.findall('div')
+# for p in zip(tocdivs[::2], tocdivs[1::2]):
+# toc = serialize(p[0])
+# toc += serialize(p[1])
+# tocs.append(toc)
+# logging.debug("pair: %s"%(toc))
+ # store XML in docinfo
+ docinfo['tocXML_%s'%mode] = ET.tostring(pagediv, 'UTF-8')
+
return docinfo
def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
"""returns single page from the table of contents"""
- # TODO: this should use the cached TOC
+ logging.debug("getTocPage mode=%s, pn=%s"%(mode,pn))
if mode == "text":
queryType = "toc"
else:
queryType = mode
- docpath = docinfo['textURLPath']
- path = docinfo['textURLPath']
- pagesize = pageinfo['tocPageSize']
- pn = pageinfo['tocPN']
+
+ # check for cached TOC
+ if not docinfo.has_key('tocXML_%s'%mode):
+ self.getToc(mode=mode, docinfo=docinfo)
+
+ tocxml = docinfo.get('tocXML_%s'%mode, None)
+ if not tocxml:
+ logging.error("getTocPage: unable to find tocXML")
+ return "No ToC"
+
+ pagesize = int(pageinfo['tocPageSize'])
url = docinfo['url']
- selfurl = self.absolute_url()
+ urlmode = docinfo['mode']
+ selfurl = docinfo['viewerUrl']
viewMode= pageinfo['viewMode']
- characterNormalization = pageinfo ['characterNormalization']
- #optionToggle =pageinfo ['optionToggle']
tocMode = pageinfo['tocMode']
- tocPN = pageinfo['tocPN']
+ tocPN = int(pageinfo['tocPN'])
+
+ fulltoc = ET.fromstring(tocxml)
- data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm"%(docpath,queryType, pagesize, pn))
- page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
- text = page.replace('mode=image','mode=texttool')
- return text
+ if fulltoc:
+ # paginate
+ #start = (pn - 1) * pagesize * 2
+ #end = start + pagesize * 2
+ #tocdivs = fulltoc[start:end]
+ tocdivs = fulltoc
+
+ # check all a-tags
+ links = tocdivs.findall(".//a")
+ for l in links:
+ href = l.get('href')
+ if href:
+ # take pn from href
+ m = re.match(r'page-fragment\.xql.*pn=(\d+)', href)
+ if m is not None:
+ # and create new url
+ l.set('href', '%s?mode=%s&url=%s&viewMode=%s&pn=%s&tocMode=%s&tocPN=%s'%(selfurl, urlmode, url, viewMode, m.group(1), tocMode, tocPN))
+ else:
+ logging.warning("getTocPage: Problem with link=%s"%href)
+
+ return serialize(tocdivs)
+
def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
- #def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):
"""change settings"""
self.title=title
self.timeout = timeout
@@ -568,4 +590,6 @@
newObj = MpdlXmlTextServer(id,title,serverUrl,timeout)
self.Destination()._setObject(id, newObj)
if RESPONSE is not None:
- RESPONSE.redirect('manage_main')
\ No newline at end of file
+ RESPONSE.redirect('manage_main')
+
+
\ No newline at end of file
diff -r 73e3273c7624 -r 0a53fea83df7 MpdlXmlTextServer_old.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MpdlXmlTextServer_old.py Fri Jul 15 21:34:41 2011 +0200
@@ -0,0 +1,520 @@
+
+from OFS.SimpleItem import SimpleItem
+from Products.PageTemplates.PageTemplateFile import PageTemplateFile
+from Ft.Xml import EMPTY_NAMESPACE, Parse
+from Ft.Xml.Domlette import NonvalidatingReader
+
+import md5
+import sys
+import logging
+import urllib
+import documentViewer
+from documentViewer import getTextFromNode, serializeNode
+
+class MpdlXmlTextServer(SimpleItem):
+ """TextServer implementation for MPDL-XML eXist server"""
+ meta_type="MPDL-XML TextServer"
+
+ manage_options=(
+ {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'},
+ )+SimpleItem.manage_options
+
+ manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals())
+
+ def __init__(self,id,title="",serverUrl="http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40):
+ #def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/", serverName=None, timeout=40):
+
+ """constructor"""
+ self.id=id
+ self.title=title
+ self.timeout = timeout
+ if serverName is None:
+ self.serverUrl = serverUrl
+ else:
+ self.serverUrl = "http://%s/mpdl/interface/"%serverName
+
+ def getHttpData(self, url, data=None):
+ """returns result from url+data HTTP request"""
+ return documentViewer.getHttpData(url,data,timeout=self.timeout)
+
+ def getServerData(self, method, data=None):
+ """returns result from text server for method+data"""
+ url = self.serverUrl+method
+ return documentViewer.getHttpData(url,data,timeout=self.timeout)
+
+ def getSearch(self, pageinfo=None, docinfo=None):
+ """get search list"""
+ docpath = docinfo['textURLPath']
+ url = docinfo['url']
+ pagesize = pageinfo['queryPageSize']
+ pn = pageinfo.get('searchPN',1)
+ sn = pageinfo['sn']
+ highlightQuery = pageinfo['highlightQuery']
+ query =pageinfo['query']
+ queryType =pageinfo['queryType']
+ viewMode= pageinfo['viewMode']
+ tocMode = pageinfo['tocMode']
+ characterNormalization = pageinfo['characterNormalization']
+ #optionToggle = pageinfo['optionToggle']
+ tocPN = pageinfo['tocPN']
+ selfurl = self.absolute_url()
+ data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery)))
+ pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
+ pagedom = Parse(pagexml)
+
+ """
+ pagedivs = pagedom.xpath("//div[@class='queryResultHits']")
+ if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")):
+ if len(pagedivs)>0:
+ docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0]))
+ s = getTextFromNode(pagedivs[0])
+ s1 = int(s)/10+1
+ try:
+ docinfo['queryResultHits'] = int(s1)
+ logging.debug("SEARCH ENTRIES: %s"%(s1))
+ except:
+ docinfo['queryResultHits'] = 0
+ """
+ if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
+ pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
+ if len(pagedivs)>0:
+ pagenode=pagedivs[0]
+ links=pagenode.xpath("//a")
+ for l in links:
+ hrefNode = l.getAttributeNodeNS(None, u"href")
+ if hrefNode:
+ href = hrefNode.nodeValue
+ if href.startswith('page-fragment.xql'):
+ selfurl = self.absolute_url()
+ pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization))
+ hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
+ #logging.debug("PUREXML :%s"%(serializeNode(pagenode)))
+ return serializeNode(pagenode)
+ if (queryType=="fulltextMorph"):
+ pagedivs = pagedom.xpath("//div[@class='queryResult']")
+ if len(pagedivs)>0:
+ pagenode=pagedivs[0]
+ links=pagenode.xpath("//a")
+ for l in links:
+ hrefNode = l.getAttributeNodeNS(None, u"href")
+ if hrefNode:
+ href = hrefNode.nodeValue
+ if href.startswith('page-fragment.xql'):
+ selfurl = self.absolute_url()
+ pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization))
+ hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
+ if href.startswith('../lt/lemma.xql'):
+ hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl))
+ l.setAttributeNS(None, 'target', '_blank')
+ l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
+ l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
+ pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']")
+ return serializeNode(pagenode)
+ if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
+ pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
+ if len(pagedivs)>0:
+ pagenode=pagedivs[0]
+ links=pagenode.xpath("//a")
+ for l in links:
+ hrefNode = l.getAttributeNodeNS(None, u"href")
+ if hrefNode:
+ href = hrefNode.nodeValue
+ hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization))
+ if href.startswith('../lt/lex.xql'):
+ hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl)
+ l.setAttributeNS(None, 'target', '_blank')
+ l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
+ l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
+ if href.startswith('../lt/lemma.xql'):
+ hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl))
+ l.setAttributeNS(None, 'target', '_blank')
+ l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
+ l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
+ return serializeNode(pagenode)
+ return "no text here"
+
+ def getGisPlaces(self, docinfo=None, pageinfo=None):
+ """ Show all Gis Places of whole Page"""
+ xpath='//place'
+ docpath = docinfo.get('textURLPath',None)
+ if not docpath:
+ return None
+
+ url = docinfo['url']
+ selfurl = self.absolute_url()
+ pn = pageinfo['current']
+ hrefList=[]
+ myList= ""
+ text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn))
+ dom = Parse(text)
+ result = dom.xpath("//result/resultPage/place")
+ for l in result:
+ hrefNode= l.getAttributeNodeNS(None, u"id")
+ href= hrefNode.nodeValue
+ hrefList.append(href)
+ myList = ",".join(hrefList)
+ #logging.debug("getGisPlaces :%s"%(myList))
+ return myList
+
+ def getAllGisPlaces (self, docinfo=None, pageinfo=None):
+ """Show all Gis Places of whole Book """
+ xpath ='//echo:place'
+ docpath =docinfo['textURLPath']
+ url = docinfo['url']
+ selfurl =self.absolute_url()
+ pn =pageinfo['current']
+ hrefList=[]
+ myList=""
+ text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath))
+ dom =Parse(text)
+ result = dom.xpath("//result/resultPage/place")
+
+ for l in result:
+ hrefNode = l.getAttributeNodeNS(None, u"id")
+ href= hrefNode.nodeValue
+ hrefList.append(href)
+ myList = ",".join(hrefList)
+ #logging.debug("getALLGisPlaces :%s"%(myList))
+ return myList
+
+
+ def getTextPage(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None):
+ """returns single page from fulltext"""
+ docpath = docinfo['textURLPath']
+ path = docinfo['textURLPath']
+ url = docinfo.get('url',None)
+ name = docinfo.get('name',None)
+ pn =pageinfo['current']
+ sn = pageinfo['sn']
+ #optionToggle =pageinfo ['optionToggle']
+ highlightQuery = pageinfo['highlightQuery']
+ #mode = pageinfo ['viewMode']
+ tocMode = pageinfo['tocMode']
+ characterNormalization=pageinfo['characterNormalization']
+ tocPN = pageinfo['tocPN']
+ selfurl = self.absolute_url()
+ if mode == "text_dict":
+ textmode = "textPollux"
+ else:
+ textmode = mode
+
+ textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization)
+ if highlightQuery is not None:
+ textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn)
+
+ pagexml = self.getServerData("page-fragment.xql",textParam)
+ dom = Parse(pagexml)
+ #dom = NonvalidatingReader.parseStream(pagexml)
+
+ #original Pages
+ pagedivs = dom.xpath("//div[@class='pageNumberOrig']")
+
+ """if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"):
+ if len(pagedivs)>0:
+ docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0])
+ logging.debug("ORIGINAL PAGE: %s"%(docinfo['pageNumberOrig']))
+
+ #original Pages Norm
+ pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']")
+ if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"):
+ if len(pagedivs)>0:
+ docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0])
+ logging.debug("ORIGINAL PAGE NORM: %s"%(docinfo['pageNumberOrigNorm']))
+ """
+ #figureEntries
+ pagedivs = dom.xpath("//div[@class='countFigureEntries']")
+ if pagedivs == dom.xpath("//div[@class='countFigureEntries']"):
+ if len(pagedivs)>0:
+ docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0])
+ s = getTextFromNode(pagedivs[0])
+ if s=='0':
+ try:
+ docinfo['countFigureEntries'] = int(s)
+ except:
+ docinfo['countFigureEntries'] = 0
+ else:
+ s1 = int(s)/30+1
+ try:
+ docinfo['countFigureEntries'] = int(s1)
+ except:
+ docinfo['countFigureEntries'] = 0
+
+ #allPlaces
+ pagedivs = dom.xpath("//div[@class='countPlaces']")
+ if pagedivs == dom.xpath("//div[@class='countPlaces']"):
+ if len(pagedivs)>0:
+ docinfo['countPlaces']= getTextFromNode(pagedivs[0])
+ s = getTextFromNode(pagedivs[0])
+ try:
+ docinfo['countPlaces'] = int(s)
+ except:
+ docinfo['countPlaces'] = 0
+
+ #tocEntries
+ pagedivs = dom.xpath("//div[@class='countTocEntries']")
+ if pagedivs == dom.xpath("//div[@class='countTocEntries']"):
+ if len(pagedivs)>0:
+ docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0]))
+ s = getTextFromNode(pagedivs[0])
+ if s=='0':
+ try:
+ docinfo['countTocEntries'] = int(s)
+ except:
+ docinfo['countTocEntries'] = 0
+ else:
+ s1 = int(s)/30+1
+ try:
+ docinfo['countTocEntries'] = int(s1)
+ except:
+ docinfo['countTocEntries'] = 0
+
+ #numTextPages
+ pagedivs = dom.xpath("//div[@class='countPages']")
+ if pagedivs == dom.xpath("//div[@class='countPages']"):
+ if len(pagedivs)>0:
+ docinfo['numPages'] = getTextFromNode(pagedivs[0])
+ s = getTextFromNode(pagedivs[0])
+
+ try:
+ docinfo['numPages'] = int(s)
+ #logging.debug("PAGE NUMBER: %s"%(s))
+
+ np = docinfo['numPages']
+ pageinfo['end'] = min(pageinfo['end'], np)
+ pageinfo['numgroups'] = int(np / pageinfo['groupsize'])
+ if np % pageinfo['groupsize'] > 0:
+ pageinfo['numgroups'] += 1
+ except:
+ docinfo['numPages'] = 0
+
+ else:
+ #no full text -- init to 0
+ docinfo['pageNumberOrig'] = 0
+ docinfo['countFigureEntries'] = 0
+ docinfo['countPlaces'] = 0
+ docinfo['countTocEntries'] = 0
+ docinfo['numPages'] = 0
+ docinfo['pageNumberOrigNorm'] = 0
+ #return docinfo
+
+ # plain text mode
+ if mode == "text":
+ # first div contains text
+ pagedivs = dom.xpath("/div")
+ if len(pagedivs) > 0:
+ pagenode = pagedivs[0]
+ links = pagenode.xpath("//a")
+ for l in links:
+ hrefNode = l.getAttributeNodeNS(None, u"href")
+ if hrefNode:
+ href= hrefNode.nodeValue
+ if href.startswith('#note-'):
+ hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn))
+ return serializeNode(pagenode)
+ if mode == "xml":
+ # first div contains text
+ pagedivs = dom.xpath("/div")
+ if len(pagedivs) > 0:
+ pagenode = pagedivs[0]
+ return serializeNode(pagenode)
+ if mode == "gis":
+ # first div contains text
+ pagedivs = dom.xpath("/div")
+ if len(pagedivs) > 0:
+ pagenode = pagedivs[0]
+ links =pagenode.xpath("//a")
+ for l in links:
+ hrefNode =l.getAttributeNodeNS(None, u"href")
+ if hrefNode:
+ href=hrefNode.nodeValue
+ if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'):
+ hrefNode.nodeValue =href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name)
+ l.setAttributeNS(None, 'target', '_blank')
+ return serializeNode(pagenode)
+
+ if mode == "pureXml":
+ # first div contains text
+ pagedivs = dom.xpath("/div")
+ if len(pagedivs) > 0:
+ pagenode = pagedivs[0]
+ return serializeNode(pagenode)
+ # text-with-links mode
+ if mode == "text_dict":
+ # first div contains text
+ #mode = pageinfo ['viewMode']
+ pagedivs = dom.xpath("/div")
+ if len(pagedivs) > 0:
+ pagenode = pagedivs[0]
+ # check all a-tags
+ links = pagenode.xpath("//a")
+
+ for l in links:
+ hrefNode = l.getAttributeNodeNS(None, u"href")
+
+ if hrefNode:
+ # is link with href
+ href = hrefNode.nodeValue
+ if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'):
+ # is pollux link
+ selfurl = self.absolute_url()
+ # change href
+ hrefNode.nodeValue = href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/head_main_voc'%selfurl)
+ # add target
+ l.setAttributeNS(None, 'target', '_blank')
+ #l.setAttributeNS(None, 'onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
+ #l.setAttributeNS(None, "ondblclick", "popupWin.focus();")
+ #window.open("this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=yes, scrollbars=1'"); return false;")
+
+ if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):
+ selfurl = self.absolute_url()
+ hrefNode.nodeValue = href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)
+ l.setAttributeNS(None, 'target', '_blank')
+ l.setAttributeNS(None, 'onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
+ l.setAttributeNS(None, 'ondblclick', 'popupWin.focus();')
+
+ if href.startswith('#note-'):
+ hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn))
+
+ return serializeNode(pagenode)
+ return "no text here"
+
+ def getOrigPages(self, docinfo=None, pageinfo=None):
+ docpath = docinfo['textURLPath']
+ pn =pageinfo['current']
+ selfurl = self.absolute_url()
+ pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn))
+ dom = Parse(pagexml)
+ pagedivs = dom.xpath("//div[@class='pageNumberOrig']")
+ if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"):
+ if len(pagedivs)>0:
+ docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0])
+ return docinfo['pageNumberOrig']
+
+ def getOrigPagesNorm(self, docinfo=None, pageinfo=None):
+ docpath = docinfo['textURLPath']
+ pn =pageinfo['current']
+ selfurl = self.absolute_url()
+ pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn))
+ dom = Parse(pagexml)
+ pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']")
+ if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"):
+ if len(pagedivs)>0:
+ docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0])
+ return docinfo['pageNumberOrigNorm']
+
+
+ def getTranslate(self, word=None, language=None):
+ """translate into another languages"""
+ data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&output=html")
+ #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
+ return data
+
+ def getLemma(self, lemma=None, language=None):
+ """simular words lemma """
+ data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html")
+ return data
+
+ def getLemmaQuery(self, query=None, language=None):
+ """simular words lemma """
+ data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html")
+ return data
+
+ def getLex(self, query=None, language=None):
+ #simular words lemma
+ data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query))
+ return data
+
+ def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
+ #number of
+ docpath = docinfo['textURLPath']
+ pagesize = pageinfo['queryPageSize']
+ pn = pageinfo['searchPN']
+ query =pageinfo['query']
+ queryType =pageinfo['queryType']
+ tocSearch = 0
+ tocDiv = None
+
+ pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn))
+ pagedom = Parse(pagexml)
+ numdivs = pagedom.xpath("//div[@class='queryResultHits']")
+ tocSearch = int(getTextFromNode(numdivs[0]))
+ tc=int((tocSearch/10)+1)
+ return tc
+
+ def getToc(self, mode="text", docinfo=None):
+ """loads table of contents and stores in docinfo"""
+ if mode == "none":
+ return docinfo
+ if 'tocSize_%s'%mode in docinfo:
+ # cached toc
+ return docinfo
+
+ docpath = docinfo['textURLPath']
+ # we need to set a result set size
+ pagesize = 1000
+ pn = 1
+ if mode == "text":
+ queryType = "toc"
+ else:
+ queryType = mode
+ # number of entries in toc
+ tocSize = 0
+ tocDiv = None
+
+ pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
+
+ # post-processing downloaded xml
+ pagedom = Parse(pagexml)
+ # get number of entries
+ numdivs = pagedom.xpath("//div[@class='queryResultHits']")
+ if len(numdivs) > 0:
+ tocSize = int(getTextFromNode(numdivs[0]))
+ docinfo['tocSize_%s'%mode] = tocSize
+ return docinfo
+
+ def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
+ """returns single page from the table of contents"""
+ # TODO: this should use the cached TOC
+ if mode == "text":
+ queryType = "toc"
+ else:
+ queryType = mode
+ docpath = docinfo['textURLPath']
+ path = docinfo['textURLPath']
+ pagesize = pageinfo['tocPageSize']
+ pn = pageinfo['tocPN']
+ url = docinfo['url']
+ selfurl = self.absolute_url()
+ viewMode= pageinfo['viewMode']
+ characterNormalization = pageinfo ['characterNormalization']
+ #optionToggle =pageinfo ['optionToggle']
+ tocMode = pageinfo['tocMode']
+ tocPN = pageinfo['tocPN']
+
+ data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm"%(docpath,queryType, pagesize, pn))
+ page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
+ text = page.replace('mode=image','mode=texttool')
+ return text
+
+ def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
+ #def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):
+ """change settings"""
+ self.title=title
+ self.timeout = timeout
+ self.serverUrl = serverUrl
+ if RESPONSE is not None:
+ RESPONSE.redirect('manage_main')
+
+# management methods
+def manage_addMpdlXmlTextServerForm(self):
+ """Form for adding"""
+ pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self)
+ return pt()
+
+def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
+#def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):
+ """add zogiimage"""
+ newObj = MpdlXmlTextServer(id,title,serverUrl,timeout)
+ self.Destination()._setObject(id, newObj)
+ if RESPONSE is not None:
+ RESPONSE.redirect('manage_main')
\ No newline at end of file
diff -r 73e3273c7624 -r 0a53fea83df7 documentViewer.py
--- a/documentViewer.py Fri Jul 15 11:02:26 2011 +0200
+++ b/documentViewer.py Fri Jul 15 21:34:41 2011 +0200
@@ -354,21 +354,26 @@
if tocMode != "thumbs":
# get table of contents
docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
-
- if viewMode=="auto": # automodus gewaehlt
- if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert
+
+ # auto viewMode: text_dict if text else images
+ if viewMode=="auto":
+ if docinfo.get('textURL', None) or docinfo.get('textURLPath', None):
+ #texturl gesetzt und textViewer konfiguriert
viewMode="text_dict"
else:
viewMode="images"
- pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
+ pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode)
- if (docinfo.get('textURLPath',None)):
- page = self.getTextPage(mode=viewMode, docinfo=docinfo, pageinfo=pageinfo)
+ if viewMode != 'images' and docinfo.get('textURLPath', None):
+ # get full text page
+ page = self.getTextPage(mode=viewMode, pn=pn, docinfo=docinfo, pageinfo=pageinfo)
pageinfo['textPage'] = page
- tt = getattr(self, 'template')
- pt = getattr(tt, 'viewer_main')
- return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
+
+ # get template /template/viewer_main
+ pt = getattr(self.template, 'viewer_main')
+ # and execute with parameters
+ return pt(docinfo=docinfo, pageinfo=pageinfo, viewMode=viewMode, mk=self.generateMarks(mk))
def generateMarks(self,mk):
ret=""
@@ -866,15 +871,21 @@
docinfo = self.REQUEST.SESSION['docinfo']
# check if its still current
if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
- logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
+ logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys())
return docinfo
+
# new docinfo
docinfo = {'mode': mode, 'url': url}
- if mode=="texttool": #index.meta with texttool information
+ # add self url
+ docinfo['viewerUrl'] = self.getDocumentViewerURL()
+ if mode=="texttool":
+ # index.meta with texttool information
docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
elif mode=="imagepath":
+ # folder with images, index.meta optional
docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
elif mode=="filepath":
+ # filename
docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
else:
logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
diff -r 73e3273c7624 -r 0a53fea83df7 documentViewer_old.py
--- a/documentViewer_old.py Fri Jul 15 11:02:26 2011 +0200
+++ b/documentViewer_old.py Fri Jul 15 21:34:41 2011 +0200
@@ -9,9 +9,6 @@
from Ft.Xml import EMPTY_NAMESPACE, Parse
import Ft.Xml.Domlette
-
-import xml.etree.ElementTree as ET
-
import os.path
import sys
import urllib
@@ -35,35 +32,25 @@
except:
return int(default)
-def getTextFromNode(node):
+def getTextFromNode(nodename):
"""get the cdata content of a node"""
- if node is None:
+ if nodename is None:
return ""
- # ET:
- text = node.text or ""
- for e in node:
- text += gettext(e)
- if e.tail:
- text += e.tail
-
- # 4Suite:
- #nodelist=node.childNodes
- #text = ""
- #for n in nodelist:
- # if n.nodeType == node.TEXT_NODE:
- # text = text + n.data
-
- return text
+ nodelist=nodename.childNodes
+ rc = ""
+ for node in nodelist:
+ if node.nodeType == node.TEXT_NODE:
+ rc = rc + node.data
+ return rc
def serializeNode(node, encoding="utf-8"):
"""returns a string containing node as XML"""
- s = ET.tostring(node)
-
- # 4Suite:
- # stream = cStringIO.StringIO()
- # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
- # s = stream.getvalue()
- # stream.close()
+ stream = cStringIO.StringIO()
+ #logging.debug("BUF: %s"%(stream))
+ Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
+ s = stream.getvalue()
+ #logging.debug("BUF: %s"%(s))
+ stream.close()
return s
def browserCheck(self):
@@ -509,10 +496,8 @@
if txt is None:
raise IOError("Unable to get dir-info from %s"%(infoUrl))
- dom = ET.fromstring(txt).getroot()
- #dom = Parse(txt)
- sizes=dom.find("//dir/size")
- #sizes=dom.xpath("//dir/size")
+ dom = Parse(txt)
+ sizes=dom.xpath("//dir/size")
logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
if sizes:
@@ -561,8 +546,7 @@
if txt is None:
raise IOError("Unable to read index meta from %s"%(url))
- dom = ET.fromstring(txt).getroot()
- #dom = Parse(txt)
+ dom = Parse(txt)
return dom
def getPresentationInfoXML(self, url):
@@ -581,8 +565,7 @@
if txt is None:
raise IOError("Unable to read infoXMLfrom %s"%(url))
- dom = ET.fromstring(txt).getroot()
- #dom = Parse(txt)
+ dom = Parse(txt)
return dom
@@ -600,8 +583,7 @@
path=getParentDir(path)
dom = self.getDomFromIndexMeta(path)
- acctype = dom.find("//access-conditions/access/@type")
- #acctype = dom.xpath("//access-conditions/access/@type")
+ acctype = dom.xpath("//access-conditions/access/@type")
if acctype and (len(acctype)>0):
access=acctype[0].value
if access in ['group', 'institution']:
@@ -627,8 +609,7 @@
logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
# put in all raw bib fields as dict "bib"
- bib = dom.find("//bib/*")
- #bib = dom.xpath("//bib/*")
+ bib = dom.xpath("//bib/*")
if bib and len(bib)>0:
bibinfo = {}
for e in bib:
@@ -637,8 +618,7 @@
# extract some fields (author, title, year) according to their mapping
metaData=self.metadata.main.meta.bib
- bibtype=dom.find("//bib/@type")
- #bibtype=dom.xpath("//bib/@type")
+ bibtype=dom.xpath("//bib/@type")
if bibtype and (len(bibtype)>0):
bibtype=bibtype[0].value
else: