Mercurial > hg > documentViewer
comparison MpdlXmlTextServer.py @ 495:ede0c93de798 metalify-1
update branch to latest version of HEAD (with modularisierung branch)
author | casties |
---|---|
date | Thu, 17 Jun 2010 19:35:24 +0200 |
parents | |
children | f83ffab77502 |
comparison
equal
deleted
inserted
replaced
1:312446f900da | 495:ede0c93de798 |
---|---|
1 | |
2 from OFS.SimpleItem import SimpleItem | |
3 from Products.PageTemplates.PageTemplateFile import PageTemplateFile | |
4 | |
5 from Ft.Xml import EMPTY_NAMESPACE, Parse | |
6 | |
7 import sys | |
8 import logging | |
9 import documentViewer | |
10 from documentViewer import getTextFromNode, serializeNode | |
11 | |
12 | |
13 class MpdlXmlTextServer(SimpleItem): | |
14 """TextServer implementation for MPDL-XML eXist server""" | |
15 meta_type="MPDL-XML TextServer" | |
16 | |
17 manage_options=( | |
18 {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'}, | |
19 )+SimpleItem.manage_options | |
20 | |
21 manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals()) | |
22 | |
23 def __init__(self,id,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40): | |
24 """constructor""" | |
25 self.id=id | |
26 self.title=title | |
27 self.timeout = timeout | |
28 if serverName is None: | |
29 self.serverUrl = serverUrl | |
30 else: | |
31 self.serverUrl = "http://%s/mpdl/interface/"%serverName | |
32 | |
33 | |
34 def getHttpData(self, url, data=None): | |
35 """returns result from url+data HTTP request""" | |
36 return documentViewer.getHttpData(url,data,timeout=self.timeout) | |
37 | |
38 | |
39 def getServerData(self, method, data=None): | |
40 """returns result from text server for method+data""" | |
41 url = self.serverUrl+method | |
42 return documentViewer.getHttpData(url,data,timeout=self.timeout) | |
43 | |
44 | |
45 def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): | |
46 """get search list""" | |
47 docpath = docinfo['textURLPath'] | |
48 url = docinfo['url'] | |
49 logging.debug("documentViewer (gettoc) docpath: %s"%(docpath)) | |
50 logging.debug("documentViewer (gettoc) url: %s"%(url)) | |
51 pagesize = pageinfo['queryPageSize'] | |
52 pn = pageinfo['searchPN'] | |
53 sn = pageinfo['sn'] | |
54 highlightQuery = pageinfo['highlightQuery'] | |
55 query =pageinfo['query'] | |
56 queryType =pageinfo['queryType'] | |
57 viewMode= pageinfo['viewMode'] | |
58 tocMode = pageinfo['tocMode'] | |
59 tocPN = pageinfo['tocPN'] | |
60 selfurl = self.absolute_url() | |
61 | |
62 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) | |
63 #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) | |
64 | |
65 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) | |
66 pagedom = Parse(pagexml) | |
67 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): | |
68 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") | |
69 if len(pagedivs)>0: | |
70 pagenode=pagedivs[0] | |
71 links=pagenode.xpath("//a") | |
72 for l in links: | |
73 hrefNode = l.getAttributeNodeNS(None, u"href") | |
74 if hrefNode: | |
75 href = hrefNode.nodeValue | |
76 if href.startswith('page-fragment.xql'): | |
77 selfurl = self.absolute_url() | |
78 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) | |
79 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) | |
80 return serializeNode(pagenode) | |
81 if (queryType=="fulltextMorph"): | |
82 pagedivs = pagedom.xpath("//div[@class='queryResult']") | |
83 if len(pagedivs)>0: | |
84 pagenode=pagedivs[0] | |
85 links=pagenode.xpath("//a") | |
86 for l in links: | |
87 hrefNode = l.getAttributeNodeNS(None, u"href") | |
88 if hrefNode: | |
89 href = hrefNode.nodeValue | |
90 if href.startswith('page-fragment.xql'): | |
91 selfurl = self.absolute_url() | |
92 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) | |
93 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) | |
94 if href.startswith('../lt/lemma.xql'): | |
95 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl)) | |
96 l.setAttributeNS(None, 'target', '_blank') | |
97 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") | |
98 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') | |
99 pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") | |
100 return serializeNode(pagenode) | |
101 if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): | |
102 pagedivs= pagedom.xpath("//div[@class='queryResultPage']") | |
103 if len(pagedivs)>0: | |
104 pagenode=pagedivs[0] | |
105 links=pagenode.xpath("//a") | |
106 for l in links: | |
107 hrefNode = l.getAttributeNodeNS(None, u"href") | |
108 if hrefNode: | |
109 href = hrefNode.nodeValue | |
110 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn)) | |
111 if href.startswith('../lt/lex.xql'): | |
112 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl) | |
113 l.setAttributeNS(None, 'target', '_blank') | |
114 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") | |
115 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') | |
116 if href.startswith('../lt/lemma.xql'): | |
117 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%selfurl) | |
118 l.setAttributeNS(None, 'target', '_blank') | |
119 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") | |
120 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') | |
121 return serializeNode(pagenode) | |
122 return "no text here" | |
123 | |
124 def getNumPages(self,docinfo=None): | |
125 """get list of pages from fulltext and put in docinfo""" | |
126 if 'numPages' in docinfo: | |
127 # already there | |
128 return docinfo | |
129 | |
130 xquery = '//pb' | |
131 text = self.getServerData("xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) | |
132 #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) | |
133 docinfo['numPages'] = text.count("<pb ") | |
134 return docinfo | |
135 | |
136 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None): | |
137 """returns single page from fulltext""" | |
138 docpath = docinfo['textURLPath'] | |
139 path = docinfo['textURLPath'] | |
140 url = docinfo['url'] | |
141 viewMode= pageinfo['viewMode'] | |
142 tocMode = pageinfo['tocMode'] | |
143 tocPN = pageinfo['tocPN'] | |
144 selfurl = self.absolute_url() | |
145 if mode == "text_dict": | |
146 textmode = "textPollux" | |
147 else: | |
148 textmode = mode | |
149 | |
150 textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) | |
151 if highlightQuery is not None: | |
152 textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) | |
153 | |
154 pagexml = self.getServerData("page-fragment.xql",textParam) | |
155 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False) | |
156 | |
157 pagedom = Parse(pagexml) | |
158 # plain text mode | |
159 if mode == "text": | |
160 # first div contains text | |
161 pagedivs = pagedom.xpath("/div") | |
162 if len(pagedivs) > 0: | |
163 pagenode = pagedivs[0] | |
164 links = pagenode.xpath("//a") | |
165 for l in links: | |
166 hrefNode = l.getAttributeNodeNS(None, u"href") | |
167 if hrefNode: | |
168 href= hrefNode.nodeValue | |
169 if href.startswith('#note-'): | |
170 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) | |
171 return serializeNode(pagenode) | |
172 if mode == "xml": | |
173 # first div contains text | |
174 pagedivs = pagedom.xpath("/div") | |
175 if len(pagedivs) > 0: | |
176 pagenode = pagedivs[0] | |
177 return serializeNode(pagenode) | |
178 if mode == "pureXml": | |
179 # first div contains text | |
180 pagedivs = pagedom.xpath("/div") | |
181 if len(pagedivs) > 0: | |
182 pagenode = pagedivs[0] | |
183 return serializeNode(pagenode) | |
184 # text-with-links mode | |
185 if mode == "text_dict": | |
186 # first div contains text | |
187 pagedivs = pagedom.xpath("/div") | |
188 if len(pagedivs) > 0: | |
189 pagenode = pagedivs[0] | |
190 # check all a-tags | |
191 links = pagenode.xpath("//a") | |
192 for l in links: | |
193 hrefNode = l.getAttributeNodeNS(None, u"href") | |
194 if hrefNode: | |
195 # is link with href | |
196 href = hrefNode.nodeValue | |
197 if href.startswith('lt/lex.xql'): | |
198 # is pollux link | |
199 selfurl = self.absolute_url() | |
200 # change href | |
201 hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl) | |
202 # add target | |
203 l.setAttributeNS(None, 'target', '_blank') | |
204 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") | |
205 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') | |
206 if href.startswith('lt/lemma.xql'): | |
207 selfurl = self.absolute_url() | |
208 hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) | |
209 l.setAttributeNS(None, 'target', '_blank') | |
210 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") | |
211 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') | |
212 if href.startswith('#note-'): | |
213 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) | |
214 return serializeNode(pagenode) | |
215 return "no text here" | |
216 | |
217 def getTranslate(self, query=None, language=None): | |
218 """translate into another languages""" | |
219 data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) | |
220 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) | |
221 return data | |
222 | |
223 def getLemma(self, lemma=None, language=None): | |
224 """simular words lemma """ | |
225 data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) | |
226 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) | |
227 return data | |
228 | |
229 def getLemmaNew(self, query=None, language=None): | |
230 """simular words lemma """ | |
231 data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) | |
232 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) | |
233 return data | |
234 | |
235 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): | |
236 """number of""" | |
237 docpath = docinfo['textURLPath'] | |
238 pagesize = pageinfo['queryPageSize'] | |
239 pn = pageinfo['searchPN'] | |
240 query =pageinfo['query'] | |
241 queryType =pageinfo['queryType'] | |
242 tocSearch = 0 | |
243 tocDiv = None | |
244 | |
245 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn)) | |
246 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) | |
247 pagedom = Parse(pagexml) | |
248 numdivs = pagedom.xpath("//div[@class='queryResultHits']") | |
249 tocSearch = int(getTextFromNode(numdivs[0])) | |
250 tc=int((tocSearch/10)+1) | |
251 logging.debug("documentViewer (gettoc) tc: %s"%(tc)) | |
252 return tc | |
253 | |
254 def getToc(self, mode="text", docinfo=None): | |
255 """loads table of contents and stores in docinfo""" | |
256 logging.debug("documentViewer (gettoc) mode: %s"%(mode)) | |
257 if mode == "none": | |
258 return docinfo | |
259 if 'tocSize_%s'%mode in docinfo: | |
260 # cached toc | |
261 return docinfo | |
262 | |
263 docpath = docinfo['textURLPath'] | |
264 # we need to set a result set size | |
265 pagesize = 1000 | |
266 pn = 1 | |
267 if mode == "text": | |
268 queryType = "toc" | |
269 else: | |
270 queryType = mode | |
271 # number of entries in toc | |
272 tocSize = 0 | |
273 tocDiv = None | |
274 | |
275 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) | |
276 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) | |
277 # post-processing downloaded xml | |
278 pagedom = Parse(pagexml) | |
279 # get number of entries | |
280 numdivs = pagedom.xpath("//div[@class='queryResultHits']") | |
281 if len(numdivs) > 0: | |
282 tocSize = int(getTextFromNode(numdivs[0])) | |
283 docinfo['tocSize_%s'%mode] = tocSize | |
284 return docinfo | |
285 | |
286 def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None): | |
287 """returns single page from the table of contents""" | |
288 # TODO: this should use the cached TOC | |
289 if mode == "text": | |
290 queryType = "toc" | |
291 else: | |
292 queryType = mode | |
293 docpath = docinfo['textURLPath'] | |
294 path = docinfo['textURLPath'] | |
295 pagesize = pageinfo['tocPageSize'] | |
296 pn = pageinfo['tocPN'] | |
297 url = docinfo['url'] | |
298 selfurl = self.absolute_url() | |
299 viewMode= pageinfo['viewMode'] | |
300 tocMode = pageinfo['tocMode'] | |
301 tocPN = pageinfo['tocPN'] | |
302 | |
303 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) | |
304 | |
305 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) | |
306 text = page.replace('mode=image','mode=texttool') | |
307 return text | |
308 | |
309 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): | |
310 """change settings""" | |
311 self.title=title | |
312 self.timeout = timeout | |
313 self.serverUrl = serverUrl | |
314 if RESPONSE is not None: | |
315 RESPONSE.redirect('manage_main') | |
316 | |
317 # management methods | |
318 def manage_addMpdlXmlTextServerForm(self): | |
319 """Form for adding""" | |
320 pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self) | |
321 return pt() | |
322 | |
323 def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): | |
324 """add zogiimage""" | |
325 newObj = MpdlXmlTextServer(id,title,serverUrl,timeout) | |
326 self.Destination()._setObject(id, newObj) | |
327 if RESPONSE is not None: | |
328 RESPONSE.redirect('manage_main') | |
329 | |
330 | |
331 |