from OFS.Folder import Folder
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.PythonScripts.standard import url_quote
from Ft.Xml.Domlette import NonvalidatingReader
from Ft.Xml.Domlette import PrettyPrint, Print
from Ft.Xml import EMPTY_NAMESPACE, Parse
from xml.dom.minidom import parse, parseString
import Ft.Xml.XPath
import cStringIO
import xmlrpclib
import os.path
import sys
import cgi
import urllib
import logging
import math
import documentViewer
import urllib2
import urllib
import urlparse
from types import *
def getTextFromNode(nodename):
"get the cdata content of a node"
if nodename is None:
return ""
nodelist=nodename.childNodes
rc = ""
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
rc = rc + node.data
return rc
def serializeNode(node, encoding='utf-8'):
"returns a string containing node as XML"
buf = cStringIO.StringIO()
Print(node, stream=buf, encoding=encoding)
s = buf.getvalue()
buf.close()
return s
class extraFunction(Folder):
def __init__(self,id, title=""):
self.id=id
self.title=title
def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None):
"""get search list"""
docpath = docinfo['textURLPath']
url = docinfo['url']
logging.debug("documentViewer (gettoc) docpath: %s"%(docpath))
logging.debug("documentViewer (gettoc) url: %s"%(url))
pagesize = pageinfo['queryPageSize']
pn = pageinfo['searchPN']
sn = pageinfo['sn']
highlightQuery = pageinfo['highlightQuery']
query =pageinfo['query']
queryType =pageinfo['queryType']
viewMode= pageinfo['viewMode']
tocMode = pageinfo['tocMode']
tocPN = pageinfo['tocPN']
selfurl = self.absolute_url()
page = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery))
#page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False)
data = page.read()
page.close()
pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
pagedom = Parse(pagexml)
if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
if len(pagedivs)>0:
pagenode=pagedivs[0]
links=pagenode.xpath("//a")
for l in links:
hrefNode = l.getAttributeNodeNS(None, u"href")
if hrefNode:
href = hrefNode.nodeValue
if href.startswith('page-fragment.xql'):
selfurl = self.absolute_url()
pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
return serializeNode(pagenode)
if (queryType=="fulltextMorph"):
pagedivs = pagedom.xpath("//div[@class='queryResult']")
if len(pagedivs)>0:
pagenode=pagedivs[0]
links=pagenode.xpath("//a")
for l in links:
hrefNode = l.getAttributeNodeNS(None, u"href")
if hrefNode:
href = hrefNode.nodeValue
if href.startswith('page-fragment.xql'):
selfurl = self.absolute_url()
pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
if href.startswith('../lt/lemma.xql'):
hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl))
l.setAttributeNS(None, 'target', '_blank')
l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']")
return serializeNode(pagenode)
if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
if len(pagedivs)>0:
pagenode=pagedivs[0]
links=pagenode.xpath("//a")
for l in links:
hrefNode = l.getAttributeNodeNS(None, u"href")
if hrefNode:
href = hrefNode.nodeValue
hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn))
if href.startswith('../lt/lex.xql'):
hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl)
l.setAttributeNS(None, 'target', '_blank')
l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
if href.startswith('../lt/lemma.xql'):
hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
l.setAttributeNS(None, 'target', '_blank')
l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
return serializeNode(pagenode)
return "no text here"
def getNumPages(self,docinfo=None):
"""get list of pages from fulltext and put in docinfo"""
xquery = '//pb'
text = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
#text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
docinfo['numPages'] = text.count("<pb ")
return docinfo
def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None):
"""returns single page from fulltext"""
docpath = docinfo['textURLPath']
path = docinfo['textURLPath']
url = docinfo['url']
viewMode= pageinfo['viewMode']
tocMode = pageinfo['tocMode']
tocPN = pageinfo['tocPN']
selfurl = self.absolute_url()
if mode == "text_dict":
textmode = "textPollux"
else:
textmode = mode
textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn)
if highlightQuery is not None:
textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn)
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam)
"""pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)"""
pagedom = Parse(pagexml)
# plain text mode
if mode == "text":
# first div contains text
pagedivs = pagedom.xpath("/div")
if len(pagedivs) > 0:
pagenode = pagedivs[0]
links = pagenode.xpath("//a")
for l in links:
hrefNode = l.getAttributeNodeNS(None, u"href")
if hrefNode:
href= hrefNode.nodeValue
if href.startswith('#note-'):
hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
return serializeNode(pagenode)
if mode == "xml":
# first div contains text
pagedivs = pagedom.xpath("/div")
if len(pagedivs) > 0:
pagenode = pagedivs[0]
return serializeNode(pagenode)
if mode == "pureXml":
# first div contains text
pagedivs = pagedom.xpath("/div")
if len(pagedivs) > 0:
pagenode = pagedivs[0]
return serializeNode(pagenode)
# text-with-links mode
if mode == "text_dict":
# first div contains text
pagedivs = pagedom.xpath("/div")
if len(pagedivs) > 0:
pagenode = pagedivs[0]
# check all a-tags
links = pagenode.xpath("//a")
for l in links:
hrefNode = l.getAttributeNodeNS(None, u"href")
if hrefNode:
# is link with href
href = hrefNode.nodeValue
if href.startswith('lt/lex.xql'):
# is pollux link
selfurl = self.absolute_url()
# change href
hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl)
# add target
l.setAttributeNS(None, 'target', '_blank')
l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
if href.startswith('lt/lemma.xql'):
selfurl = self.absolute_url()
hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
l.setAttributeNS(None, 'target', '_blank')
l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
if href.startswith('#note-'):
hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
return serializeNode(pagenode)
return "no text here"
def getTranslate(self, query=None, language=None):
"""translate into another languages"""
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
data = pagexml.read()
pagexml.close()
return data
def getLemma(self, lemma=None, language=None):
"""simular words lemma """
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
data = pagexml.read()
pagexml.close()
return data
def getLemmaNew(self, query=None, language=None):
"""simular words lemma """
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
data = pagexml.read()
pagexml.close()
return data
def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
"""number of"""
docpath = docinfo['textURLPath']
pagesize = pageinfo['queryPageSize']
pn = pageinfo['searchPN']
query =pageinfo['query']
queryType =pageinfo['queryType']
tocSearch = 0
tocDiv = None
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn))
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)
pagedom = Parse(pagexml)
numdivs = pagedom.xpath("//div[@class='queryResultHits']")
tocSearch = int(getTextFromNode(numdivs[0]))
tc=int((tocSearch/10)+1)
logging.debug("documentViewer (gettoc) tc: %s"%(tc))
return tc
def getToc(self, mode="text", docinfo=None):
"""loads table of contents and stores in docinfo"""
logging.debug("documentViewer (gettoc) mode: %s"%(mode))
if mode == "none":
return docinfo
if 'tocSize_%s'%mode in docinfo:
# cached toc
return docinfo
docpath = docinfo['textURLPath']
# we need to set a result set size
pagesize = 1000
pn = 1
if mode == "text":
queryType = "toc"
else:
queryType = mode
# number of entries in toc
tocSize = 0
tocDiv = None
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
# post-processing downloaded xml
pagedom = Parse(pagexml)
# get number of entries
numdivs = pagedom.xpath("//div[@class='queryResultHits']")
if len(numdivs) > 0:
tocSize = int(getTextFromNode(numdivs[0]))
docinfo['tocSize_%s'%mode] = tocSize
return docinfo
def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
"""returns single page from the table of contents"""
# TODO: this should use the cached TOC
if mode == "text":
queryType = "toc"
else:
queryType = mode
docpath = docinfo['textURLPath']
path = docinfo['textURLPath']
pagesize = pageinfo['tocPageSize']
pn = pageinfo['tocPN']
url = docinfo['url']
selfurl = self.absolute_url()
viewMode= pageinfo['viewMode']
tocMode = pageinfo['tocMode']
tocPN = pageinfo['tocPN']
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
data = pagexml.read()
pagexml.close()
page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
text = page.replace('mode=image','mode=texttool')
return text
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>