comparison extraFunction.py @ 2:8cc283757c39 modularisierung

New Version
author abukhman
date Mon, 14 Jun 2010 12:50:06 +0200
parents
children 3ba8479c7aba
comparison
equal deleted inserted replaced
1:312446f900da 2:8cc283757c39
1
2 from OFS.Folder import Folder
3 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
4 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
5 from Products.PythonScripts.standard import url_quote
6
7
8 from Ft.Xml.Domlette import NonvalidatingReader
9 from Ft.Xml.Domlette import PrettyPrint, Print
10 from Ft.Xml import EMPTY_NAMESPACE, Parse
11
12 from xml.dom.minidom import parse, parseString
13
14 import Ft.Xml.XPath
15 import cStringIO
16 import xmlrpclib
17 import os.path
18 import sys
19 import cgi
20 import urllib
21 import logging
22 import math
23 import documentViewer
24
25 import urlparse
26 from types import *
27
28 def getTextFromNode(nodename):
29 "get the cdata content of a node"
30 if nodename is None:
31 return ""
32 nodelist=nodename.childNodes
33 rc = ""
34 for node in nodelist:
35 if node.nodeType == node.TEXT_NODE:
36 rc = rc + node.data
37 return rc
38
39 def serializeNode(node, encoding='utf-8'):
40 "returns a string containing node as XML"
41 buf = cStringIO.StringIO()
42 Print(node, stream=buf, encoding=encoding)
43 s = buf.getvalue()
44 buf.close()
45 return s
46
47
48 class extraFunction(Folder):
49
50
51 def __init__(self,id, title=""):
52
53 self.id=id
54 self.title=title
55
56 def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None):
57 """get search list"""
58 docpath = docinfo['textURLPath']
59 url = docinfo['url']
60 logging.debug("documentViewer (gettoc) docpath: %s"%(docpath))
61 logging.debug("documentViewer (gettoc) url: %s"%(url))
62 pagesize = pageinfo['queryPageSize']
63 pn = pageinfo['searchPN']
64 sn = pageinfo['sn']
65 highlightQuery = pageinfo['highlightQuery']
66 query =pageinfo['query']
67 queryType =pageinfo['queryType']
68 viewMode= pageinfo['viewMode']
69 tocMode = pageinfo['tocMode']
70 tocPN = pageinfo['tocPN']
71 selfurl = self.absolute_url()
72 page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False)
73 pagexml = page.replace('?document=%s'%str(docpath),'?url=%s'%url)
74 pagedom = Parse(pagexml)
75 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
76 pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
77 if len(pagedivs)>0:
78 pagenode=pagedivs[0]
79 links=pagenode.xpath("//a")
80 for l in links:
81 hrefNode = l.getAttributeNodeNS(None, u"href")
82 if hrefNode:
83 href = hrefNode.nodeValue
84 if href.startswith('page-fragment.xql'):
85 selfurl = self.absolute_url()
86 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
87 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
88 return serializeNode(pagenode)
89 if (queryType=="fulltextMorph"):
90 pagedivs = pagedom.xpath("//div[@class='queryResult']")
91 if len(pagedivs)>0:
92 pagenode=pagedivs[0]
93 links=pagenode.xpath("//a")
94 for l in links:
95 hrefNode = l.getAttributeNodeNS(None, u"href")
96 if hrefNode:
97 href = hrefNode.nodeValue
98 if href.startswith('page-fragment.xql'):
99 selfurl = self.absolute_url()
100 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
101 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
102 if href.startswith('../lt/lemma.xql'):
103 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl))
104 l.setAttributeNS(None, 'target', '_blank')
105 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
106 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
107 pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']")
108 return serializeNode(pagenode)
109 if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
110 pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
111 if len(pagedivs)>0:
112 pagenode=pagedivs[0]
113 links=pagenode.xpath("//a")
114 for l in links:
115 hrefNode = l.getAttributeNodeNS(None, u"href")
116 if hrefNode:
117 href = hrefNode.nodeValue
118 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn))
119 if href.startswith('../lt/lex.xql'):
120 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl)
121 l.setAttributeNS(None, 'target', '_blank')
122 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
123 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
124 if href.startswith('../lt/lemma.xql'):
125 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
126 l.setAttributeNS(None, 'target', '_blank')
127 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
128 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
129 return serializeNode(pagenode)
130 return "no text here"
131
132 def getNumPages(self,docinfo=None):
133 """get list of pages from fulltext and put in docinfo"""
134 xquery = '//pb'
135 text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
136 docinfo['numPages'] = text.count("<pb ")
137 return docinfo
138
139 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None):
140 """returns single page from fulltext"""
141 docpath = docinfo['textURLPath']
142 path = docinfo['textURLPath']
143 url = docinfo['url']
144 viewMode= pageinfo['viewMode']
145 tocMode = pageinfo['tocMode']
146 tocPN = pageinfo['tocPN']
147 selfurl = self.absolute_url()
148 if mode == "text_dict":
149 textmode = "textPollux"
150 else:
151 textmode = mode
152
153 textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn)
154 if highlightQuery is not None:
155 textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn)
156 pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)
157 pagedom = Parse(pagexml)
158 # plain text mode
159 if mode == "text":
160 # first div contains text
161 pagedivs = pagedom.xpath("/div")
162 if len(pagedivs) > 0:
163 pagenode = pagedivs[0]
164 links = pagenode.xpath("//a")
165 for l in links:
166 hrefNode = l.getAttributeNodeNS(None, u"href")
167 if hrefNode:
168 href= hrefNode.nodeValue
169 if href.startswith('#note-'):
170 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
171 return serializeNode(pagenode)
172 if mode == "xml":
173 # first div contains text
174 pagedivs = pagedom.xpath("/div")
175 if len(pagedivs) > 0:
176 pagenode = pagedivs[0]
177 return serializeNode(pagenode)
178 if mode == "pureXml":
179 # first div contains text
180 pagedivs = pagedom.xpath("/div")
181 if len(pagedivs) > 0:
182 pagenode = pagedivs[0]
183 return serializeNode(pagenode)
184 # text-with-links mode
185 if mode == "text_dict":
186 # first div contains text
187 pagedivs = pagedom.xpath("/div")
188 if len(pagedivs) > 0:
189 pagenode = pagedivs[0]
190 # check all a-tags
191 links = pagenode.xpath("//a")
192 for l in links:
193 hrefNode = l.getAttributeNodeNS(None, u"href")
194 if hrefNode:
195 # is link with href
196 href = hrefNode.nodeValue
197 if href.startswith('lt/lex.xql'):
198 # is pollux link
199 selfurl = self.absolute_url()
200 # change href
201 hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl)
202 # add target
203 l.setAttributeNS(None, 'target', '_blank')
204 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
205 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
206 if href.startswith('lt/lemma.xql'):
207 selfurl = self.absolute_url()
208 hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
209 l.setAttributeNS(None, 'target', '_blank')
210 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
211 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
212 if href.startswith('#note-'):
213 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
214 return serializeNode(pagenode)
215 return "no text here"
216
217 def getTranslate(self, query=None, language=None):
218 """translate into another languages"""
219 pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
220 return pagexml
221
222 def getLemma(self, lemma=None, language=None):
223 """simular words lemma """
224 pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
225 return pagexml
226
227 def getLemmaNew(self, query=None, language=None):
228 """simular words lemma """
229 pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
230 return pagexml
231
232 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
233 """number of"""
234 docpath = docinfo['textURLPath']
235 pagesize = pageinfo['queryPageSize']
236 pn = pageinfo['searchPN']
237 query =pageinfo['query']
238 queryType =pageinfo['queryType']
239 tocSearch = 0
240 tocDiv = None
241 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)
242 pagedom = Parse(pagexml)
243 numdivs = pagedom.xpath("//div[@class='queryResultHits']")
244 tocSearch = int(getTextFromNode(numdivs[0]))
245 tc=int((tocSearch/10)+1)
246 logging.debug("documentViewer (gettoc) tc: %s"%(tc))
247 return tc
248
249 def getToc(self, mode="text", docinfo=None):
250 """loads table of contents and stores in docinfo"""
251 logging.debug("documentViewer (gettoc) mode: %s"%(mode))
252 if mode == "none":
253 return docinfo
254 if 'tocSize_%s'%mode in docinfo:
255 # cached toc
256 return docinfo
257
258 docpath = docinfo['textURLPath']
259 # we need to set a result set size
260 pagesize = 1000
261 pn = 1
262 if mode == "text":
263 queryType = "toc"
264 else:
265 queryType = mode
266 # number of entries in toc
267 tocSize = 0
268 tocDiv = None
269 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
270 # post-processing downloaded xml
271 pagedom = Parse(pagexml)
272 # get number of entries
273 numdivs = pagedom.xpath("//div[@class='queryResultHits']")
274 if len(numdivs) > 0:
275 tocSize = int(getTextFromNode(numdivs[0]))
276 docinfo['tocSize_%s'%mode] = tocSize
277 return docinfo
278
279 def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
280 """returns single page from the table of contents"""
281 # TODO: this should use the cached TOC
282 if mode == "text":
283 queryType = "toc"
284 else:
285 queryType = mode
286 docpath = docinfo['textURLPath']
287 path = docinfo['textURLPath']
288 pagesize = pageinfo['tocPageSize']
289 pn = pageinfo['tocPN']
290 url = docinfo['url']
291 selfurl = self.absolute_url()
292 viewMode= pageinfo['viewMode']
293 tocMode = pageinfo['tocMode']
294 tocPN = pageinfo['tocPN']
295 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn), outputUnicode=False)
296 page = pagexml.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
297 text = page.replace('mode=image','mode=texttool')
298 return text
299
300