Annotation of documentViewer/extraFunction.py, revision 1.1.2.2
1.1.2.1 abukhman 1:
2: from OFS.Folder import Folder
3: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5: from Products.PythonScripts.standard import url_quote
6:
7:
8: from Ft.Xml.Domlette import NonvalidatingReader
9: from Ft.Xml.Domlette import PrettyPrint, Print
10: from Ft.Xml import EMPTY_NAMESPACE, Parse
11:
12: from xml.dom.minidom import parse, parseString
13:
14: import Ft.Xml.XPath
15: import cStringIO
16: import xmlrpclib
17: import os.path
18: import sys
19: import cgi
20: import urllib
21: import logging
22: import math
23: import documentViewer
1.1.2.2 ! abukhman 24: import urllib2
! 25: import urllib
1.1.2.1 abukhman 26: import urlparse
27: from types import *
28:
29: def getTextFromNode(nodename):
30: "get the cdata content of a node"
31: if nodename is None:
32: return ""
33: nodelist=nodename.childNodes
34: rc = ""
35: for node in nodelist:
36: if node.nodeType == node.TEXT_NODE:
37: rc = rc + node.data
38: return rc
39:
40: def serializeNode(node, encoding='utf-8'):
41: "returns a string containing node as XML"
42: buf = cStringIO.StringIO()
43: Print(node, stream=buf, encoding=encoding)
44: s = buf.getvalue()
45: buf.close()
46: return s
47:
48:
49: class extraFunction(Folder):
50:
51:
52: def __init__(self,id, title=""):
53:
54: self.id=id
55: self.title=title
56:
57: def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None):
58: """get search list"""
59: docpath = docinfo['textURLPath']
60: url = docinfo['url']
61: logging.debug("documentViewer (gettoc) docpath: %s"%(docpath))
62: logging.debug("documentViewer (gettoc) url: %s"%(url))
63: pagesize = pageinfo['queryPageSize']
64: pn = pageinfo['searchPN']
65: sn = pageinfo['sn']
66: highlightQuery = pageinfo['highlightQuery']
67: query =pageinfo['query']
68: queryType =pageinfo['queryType']
69: viewMode= pageinfo['viewMode']
70: tocMode = pageinfo['tocMode']
71: tocPN = pageinfo['tocPN']
72: selfurl = self.absolute_url()
1.1.2.2 ! abukhman 73:
! 74: page = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery))
! 75: #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False)
! 76: data = page.read()
! 77: page.close()
! 78:
! 79: pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
1.1.2.1 abukhman 80: pagedom = Parse(pagexml)
81: if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
82: pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
83: if len(pagedivs)>0:
84: pagenode=pagedivs[0]
85: links=pagenode.xpath("//a")
86: for l in links:
87: hrefNode = l.getAttributeNodeNS(None, u"href")
88: if hrefNode:
89: href = hrefNode.nodeValue
90: if href.startswith('page-fragment.xql'):
91: selfurl = self.absolute_url()
92: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
93: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
94: return serializeNode(pagenode)
95: if (queryType=="fulltextMorph"):
96: pagedivs = pagedom.xpath("//div[@class='queryResult']")
97: if len(pagedivs)>0:
98: pagenode=pagedivs[0]
99: links=pagenode.xpath("//a")
100: for l in links:
101: hrefNode = l.getAttributeNodeNS(None, u"href")
102: if hrefNode:
103: href = hrefNode.nodeValue
104: if href.startswith('page-fragment.xql'):
105: selfurl = self.absolute_url()
106: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
107: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
108: if href.startswith('../lt/lemma.xql'):
109: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl))
110: l.setAttributeNS(None, 'target', '_blank')
111: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
112: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
113: pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']")
114: return serializeNode(pagenode)
115: if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
116: pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
117: if len(pagedivs)>0:
118: pagenode=pagedivs[0]
119: links=pagenode.xpath("//a")
120: for l in links:
121: hrefNode = l.getAttributeNodeNS(None, u"href")
122: if hrefNode:
123: href = hrefNode.nodeValue
124: hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn))
125: if href.startswith('../lt/lex.xql'):
126: hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl)
127: l.setAttributeNS(None, 'target', '_blank')
128: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
129: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
130: if href.startswith('../lt/lemma.xql'):
131: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
132: l.setAttributeNS(None, 'target', '_blank')
133: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
134: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
135: return serializeNode(pagenode)
136: return "no text here"
137:
138: def getNumPages(self,docinfo=None):
139: """get list of pages from fulltext and put in docinfo"""
140: xquery = '//pb'
1.1.2.2 ! abukhman 141: text = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
! 142: #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
1.1.2.1 abukhman 143: docinfo['numPages'] = text.count("<pb ")
144: return docinfo
145:
146: def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None):
147: """returns single page from fulltext"""
148: docpath = docinfo['textURLPath']
149: path = docinfo['textURLPath']
150: url = docinfo['url']
151: viewMode= pageinfo['viewMode']
152: tocMode = pageinfo['tocMode']
153: tocPN = pageinfo['tocPN']
154: selfurl = self.absolute_url()
155: if mode == "text_dict":
156: textmode = "textPollux"
157: else:
158: textmode = mode
159:
160: textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn)
161: if highlightQuery is not None:
162: textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn)
1.1.2.2 ! abukhman 163:
! 164: pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam)
! 165: """pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)"""
! 166:
1.1.2.1 abukhman 167: pagedom = Parse(pagexml)
168: # plain text mode
169: if mode == "text":
170: # first div contains text
171: pagedivs = pagedom.xpath("/div")
172: if len(pagedivs) > 0:
173: pagenode = pagedivs[0]
174: links = pagenode.xpath("//a")
175: for l in links:
176: hrefNode = l.getAttributeNodeNS(None, u"href")
177: if hrefNode:
178: href= hrefNode.nodeValue
179: if href.startswith('#note-'):
180: hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
181: return serializeNode(pagenode)
182: if mode == "xml":
183: # first div contains text
184: pagedivs = pagedom.xpath("/div")
185: if len(pagedivs) > 0:
186: pagenode = pagedivs[0]
187: return serializeNode(pagenode)
188: if mode == "pureXml":
189: # first div contains text
190: pagedivs = pagedom.xpath("/div")
191: if len(pagedivs) > 0:
192: pagenode = pagedivs[0]
193: return serializeNode(pagenode)
194: # text-with-links mode
195: if mode == "text_dict":
196: # first div contains text
197: pagedivs = pagedom.xpath("/div")
198: if len(pagedivs) > 0:
199: pagenode = pagedivs[0]
200: # check all a-tags
201: links = pagenode.xpath("//a")
202: for l in links:
203: hrefNode = l.getAttributeNodeNS(None, u"href")
204: if hrefNode:
205: # is link with href
206: href = hrefNode.nodeValue
207: if href.startswith('lt/lex.xql'):
208: # is pollux link
209: selfurl = self.absolute_url()
210: # change href
211: hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl)
212: # add target
213: l.setAttributeNS(None, 'target', '_blank')
214: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
215: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
216: if href.startswith('lt/lemma.xql'):
217: selfurl = self.absolute_url()
218: hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
219: l.setAttributeNS(None, 'target', '_blank')
220: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
221: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
222: if href.startswith('#note-'):
223: hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
224: return serializeNode(pagenode)
225: return "no text here"
226:
227: def getTranslate(self, query=None, language=None):
228: """translate into another languages"""
1.1.2.2 ! abukhman 229: pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
! 230: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
! 231: data = pagexml.read()
! 232: pagexml.close()
! 233: return data
1.1.2.1 abukhman 234:
235: def getLemma(self, lemma=None, language=None):
236: """simular words lemma """
1.1.2.2 ! abukhman 237: pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
! 238: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
! 239: data = pagexml.read()
! 240: pagexml.close()
! 241: return data
1.1.2.1 abukhman 242:
243: def getLemmaNew(self, query=None, language=None):
244: """simular words lemma """
1.1.2.2 ! abukhman 245:
! 246: pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
! 247: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
! 248: data = pagexml.read()
! 249: pagexml.close()
! 250: return data
1.1.2.1 abukhman 251:
252: def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
253: """number of"""
254: docpath = docinfo['textURLPath']
255: pagesize = pageinfo['queryPageSize']
256: pn = pageinfo['searchPN']
257: query =pageinfo['query']
258: queryType =pageinfo['queryType']
259: tocSearch = 0
260: tocDiv = None
1.1.2.2 ! abukhman 261:
! 262: pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn))
! 263: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)
1.1.2.1 abukhman 264: pagedom = Parse(pagexml)
265: numdivs = pagedom.xpath("//div[@class='queryResultHits']")
266: tocSearch = int(getTextFromNode(numdivs[0]))
267: tc=int((tocSearch/10)+1)
268: logging.debug("documentViewer (gettoc) tc: %s"%(tc))
269: return tc
270:
271: def getToc(self, mode="text", docinfo=None):
272: """loads table of contents and stores in docinfo"""
273: logging.debug("documentViewer (gettoc) mode: %s"%(mode))
274: if mode == "none":
275: return docinfo
276: if 'tocSize_%s'%mode in docinfo:
277: # cached toc
278: return docinfo
279:
280: docpath = docinfo['textURLPath']
281: # we need to set a result set size
282: pagesize = 1000
283: pn = 1
284: if mode == "text":
285: queryType = "toc"
286: else:
287: queryType = mode
288: # number of entries in toc
289: tocSize = 0
290: tocDiv = None
1.1.2.2 ! abukhman 291:
! 292: pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
! 293: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
1.1.2.1 abukhman 294: # post-processing downloaded xml
295: pagedom = Parse(pagexml)
296: # get number of entries
297: numdivs = pagedom.xpath("//div[@class='queryResultHits']")
298: if len(numdivs) > 0:
299: tocSize = int(getTextFromNode(numdivs[0]))
300: docinfo['tocSize_%s'%mode] = tocSize
301: return docinfo
302:
303: def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
304: """returns single page from the table of contents"""
305: # TODO: this should use the cached TOC
306: if mode == "text":
307: queryType = "toc"
308: else:
309: queryType = mode
310: docpath = docinfo['textURLPath']
311: path = docinfo['textURLPath']
312: pagesize = pageinfo['tocPageSize']
313: pn = pageinfo['tocPN']
314: url = docinfo['url']
315: selfurl = self.absolute_url()
316: viewMode= pageinfo['viewMode']
317: tocMode = pageinfo['tocMode']
1.1.2.2 ! abukhman 318: tocPN = pageinfo['tocPN']
! 319:
! 320: pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
! 321: data = pagexml.read()
! 322: pagexml.close()
! 323:
! 324: page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
1.1.2.1 abukhman 325: text = page.replace('mode=image','mode=texttool')
326: return text
327:
328:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>