Mercurial > hg > documentViewer
comparison extraFunction.py @ 2:8cc283757c39 modularisierung
New Version
author | abukhman |
---|---|
date | Mon, 14 Jun 2010 12:50:06 +0200 |
parents | |
children | 3ba8479c7aba |
comparison
equal
deleted
inserted
replaced
1:312446f900da | 2:8cc283757c39 |
---|---|
1 | |
2 from OFS.Folder import Folder | |
3 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate | |
4 from Products.PageTemplates.PageTemplateFile import PageTemplateFile | |
5 from Products.PythonScripts.standard import url_quote | |
6 | |
7 | |
8 from Ft.Xml.Domlette import NonvalidatingReader | |
9 from Ft.Xml.Domlette import PrettyPrint, Print | |
10 from Ft.Xml import EMPTY_NAMESPACE, Parse | |
11 | |
12 from xml.dom.minidom import parse, parseString | |
13 | |
14 import Ft.Xml.XPath | |
15 import cStringIO | |
16 import xmlrpclib | |
17 import os.path | |
18 import sys | |
19 import cgi | |
20 import urllib | |
21 import logging | |
22 import math | |
23 import documentViewer | |
24 | |
25 import urlparse | |
26 from types import * | |
27 | |
28 def getTextFromNode(nodename): | |
29 "get the cdata content of a node" | |
30 if nodename is None: | |
31 return "" | |
32 nodelist=nodename.childNodes | |
33 rc = "" | |
34 for node in nodelist: | |
35 if node.nodeType == node.TEXT_NODE: | |
36 rc = rc + node.data | |
37 return rc | |
38 | |
39 def serializeNode(node, encoding='utf-8'): | |
40 "returns a string containing node as XML" | |
41 buf = cStringIO.StringIO() | |
42 Print(node, stream=buf, encoding=encoding) | |
43 s = buf.getvalue() | |
44 buf.close() | |
45 return s | |
46 | |
47 | |
48 class extraFunction(Folder): | |
49 | |
50 | |
51 def __init__(self,id, title=""): | |
52 | |
53 self.id=id | |
54 self.title=title | |
55 | |
56 def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): | |
57 """get search list""" | |
58 docpath = docinfo['textURLPath'] | |
59 url = docinfo['url'] | |
60 logging.debug("documentViewer (gettoc) docpath: %s"%(docpath)) | |
61 logging.debug("documentViewer (gettoc) url: %s"%(url)) | |
62 pagesize = pageinfo['queryPageSize'] | |
63 pn = pageinfo['searchPN'] | |
64 sn = pageinfo['sn'] | |
65 highlightQuery = pageinfo['highlightQuery'] | |
66 query =pageinfo['query'] | |
67 queryType =pageinfo['queryType'] | |
68 viewMode= pageinfo['viewMode'] | |
69 tocMode = pageinfo['tocMode'] | |
70 tocPN = pageinfo['tocPN'] | |
71 selfurl = self.absolute_url() | |
72 page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) | |
73 pagexml = page.replace('?document=%s'%str(docpath),'?url=%s'%url) | |
74 pagedom = Parse(pagexml) | |
75 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): | |
76 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") | |
77 if len(pagedivs)>0: | |
78 pagenode=pagedivs[0] | |
79 links=pagenode.xpath("//a") | |
80 for l in links: | |
81 hrefNode = l.getAttributeNodeNS(None, u"href") | |
82 if hrefNode: | |
83 href = hrefNode.nodeValue | |
84 if href.startswith('page-fragment.xql'): | |
85 selfurl = self.absolute_url() | |
86 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) | |
87 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) | |
88 return serializeNode(pagenode) | |
89 if (queryType=="fulltextMorph"): | |
90 pagedivs = pagedom.xpath("//div[@class='queryResult']") | |
91 if len(pagedivs)>0: | |
92 pagenode=pagedivs[0] | |
93 links=pagenode.xpath("//a") | |
94 for l in links: | |
95 hrefNode = l.getAttributeNodeNS(None, u"href") | |
96 if hrefNode: | |
97 href = hrefNode.nodeValue | |
98 if href.startswith('page-fragment.xql'): | |
99 selfurl = self.absolute_url() | |
100 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) | |
101 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) | |
102 if href.startswith('../lt/lemma.xql'): | |
103 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl)) | |
104 l.setAttributeNS(None, 'target', '_blank') | |
105 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") | |
106 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') | |
107 pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") | |
108 return serializeNode(pagenode) | |
109 if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): | |
110 pagedivs= pagedom.xpath("//div[@class='queryResultPage']") | |
111 if len(pagedivs)>0: | |
112 pagenode=pagedivs[0] | |
113 links=pagenode.xpath("//a") | |
114 for l in links: | |
115 hrefNode = l.getAttributeNodeNS(None, u"href") | |
116 if hrefNode: | |
117 href = hrefNode.nodeValue | |
118 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn)) | |
119 if href.startswith('../lt/lex.xql'): | |
120 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl) | |
121 l.setAttributeNS(None, 'target', '_blank') | |
122 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") | |
123 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') | |
124 if href.startswith('../lt/lemma.xql'): | |
125 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%selfurl) | |
126 l.setAttributeNS(None, 'target', '_blank') | |
127 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") | |
128 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') | |
129 return serializeNode(pagenode) | |
130 return "no text here" | |
131 | |
132 def getNumPages(self,docinfo=None): | |
133 """get list of pages from fulltext and put in docinfo""" | |
134 xquery = '//pb' | |
135 text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) | |
136 docinfo['numPages'] = text.count("<pb ") | |
137 return docinfo | |
138 | |
139 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None): | |
140 """returns single page from fulltext""" | |
141 docpath = docinfo['textURLPath'] | |
142 path = docinfo['textURLPath'] | |
143 url = docinfo['url'] | |
144 viewMode= pageinfo['viewMode'] | |
145 tocMode = pageinfo['tocMode'] | |
146 tocPN = pageinfo['tocPN'] | |
147 selfurl = self.absolute_url() | |
148 if mode == "text_dict": | |
149 textmode = "textPollux" | |
150 else: | |
151 textmode = mode | |
152 | |
153 textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) | |
154 if highlightQuery is not None: | |
155 textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) | |
156 pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False) | |
157 pagedom = Parse(pagexml) | |
158 # plain text mode | |
159 if mode == "text": | |
160 # first div contains text | |
161 pagedivs = pagedom.xpath("/div") | |
162 if len(pagedivs) > 0: | |
163 pagenode = pagedivs[0] | |
164 links = pagenode.xpath("//a") | |
165 for l in links: | |
166 hrefNode = l.getAttributeNodeNS(None, u"href") | |
167 if hrefNode: | |
168 href= hrefNode.nodeValue | |
169 if href.startswith('#note-'): | |
170 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) | |
171 return serializeNode(pagenode) | |
172 if mode == "xml": | |
173 # first div contains text | |
174 pagedivs = pagedom.xpath("/div") | |
175 if len(pagedivs) > 0: | |
176 pagenode = pagedivs[0] | |
177 return serializeNode(pagenode) | |
178 if mode == "pureXml": | |
179 # first div contains text | |
180 pagedivs = pagedom.xpath("/div") | |
181 if len(pagedivs) > 0: | |
182 pagenode = pagedivs[0] | |
183 return serializeNode(pagenode) | |
184 # text-with-links mode | |
185 if mode == "text_dict": | |
186 # first div contains text | |
187 pagedivs = pagedom.xpath("/div") | |
188 if len(pagedivs) > 0: | |
189 pagenode = pagedivs[0] | |
190 # check all a-tags | |
191 links = pagenode.xpath("//a") | |
192 for l in links: | |
193 hrefNode = l.getAttributeNodeNS(None, u"href") | |
194 if hrefNode: | |
195 # is link with href | |
196 href = hrefNode.nodeValue | |
197 if href.startswith('lt/lex.xql'): | |
198 # is pollux link | |
199 selfurl = self.absolute_url() | |
200 # change href | |
201 hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl) | |
202 # add target | |
203 l.setAttributeNS(None, 'target', '_blank') | |
204 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") | |
205 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') | |
206 if href.startswith('lt/lemma.xql'): | |
207 selfurl = self.absolute_url() | |
208 hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) | |
209 l.setAttributeNS(None, 'target', '_blank') | |
210 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") | |
211 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') | |
212 if href.startswith('#note-'): | |
213 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) | |
214 return serializeNode(pagenode) | |
215 return "no text here" | |
216 | |
217 def getTranslate(self, query=None, language=None): | |
218 """translate into another languages""" | |
219 pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) | |
220 return pagexml | |
221 | |
222 def getLemma(self, lemma=None, language=None): | |
223 """simular words lemma """ | |
224 pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) | |
225 return pagexml | |
226 | |
227 def getLemmaNew(self, query=None, language=None): | |
228 """simular words lemma """ | |
229 pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) | |
230 return pagexml | |
231 | |
232 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): | |
233 """number of""" | |
234 docpath = docinfo['textURLPath'] | |
235 pagesize = pageinfo['queryPageSize'] | |
236 pn = pageinfo['searchPN'] | |
237 query =pageinfo['query'] | |
238 queryType =pageinfo['queryType'] | |
239 tocSearch = 0 | |
240 tocDiv = None | |
241 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) | |
242 pagedom = Parse(pagexml) | |
243 numdivs = pagedom.xpath("//div[@class='queryResultHits']") | |
244 tocSearch = int(getTextFromNode(numdivs[0])) | |
245 tc=int((tocSearch/10)+1) | |
246 logging.debug("documentViewer (gettoc) tc: %s"%(tc)) | |
247 return tc | |
248 | |
249 def getToc(self, mode="text", docinfo=None): | |
250 """loads table of contents and stores in docinfo""" | |
251 logging.debug("documentViewer (gettoc) mode: %s"%(mode)) | |
252 if mode == "none": | |
253 return docinfo | |
254 if 'tocSize_%s'%mode in docinfo: | |
255 # cached toc | |
256 return docinfo | |
257 | |
258 docpath = docinfo['textURLPath'] | |
259 # we need to set a result set size | |
260 pagesize = 1000 | |
261 pn = 1 | |
262 if mode == "text": | |
263 queryType = "toc" | |
264 else: | |
265 queryType = mode | |
266 # number of entries in toc | |
267 tocSize = 0 | |
268 tocDiv = None | |
269 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) | |
270 # post-processing downloaded xml | |
271 pagedom = Parse(pagexml) | |
272 # get number of entries | |
273 numdivs = pagedom.xpath("//div[@class='queryResultHits']") | |
274 if len(numdivs) > 0: | |
275 tocSize = int(getTextFromNode(numdivs[0])) | |
276 docinfo['tocSize_%s'%mode] = tocSize | |
277 return docinfo | |
278 | |
279 def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None): | |
280 """returns single page from the table of contents""" | |
281 # TODO: this should use the cached TOC | |
282 if mode == "text": | |
283 queryType = "toc" | |
284 else: | |
285 queryType = mode | |
286 docpath = docinfo['textURLPath'] | |
287 path = docinfo['textURLPath'] | |
288 pagesize = pageinfo['tocPageSize'] | |
289 pn = pageinfo['tocPN'] | |
290 url = docinfo['url'] | |
291 selfurl = self.absolute_url() | |
292 viewMode= pageinfo['viewMode'] | |
293 tocMode = pageinfo['tocMode'] | |
294 tocPN = pageinfo['tocPN'] | |
295 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn), outputUnicode=False) | |
296 page = pagexml.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) | |
297 text = page.replace('mode=image','mode=texttool') | |
298 return text | |
299 | |
300 |