Mercurial > hg > documentViewer
comparison extraFunction.py @ 3:3ba8479c7aba modularisierung
new function
author | abukhman |
---|---|
date | Wed, 16 Jun 2010 11:16:02 +0200 |
parents | 8cc283757c39 |
children | e9085ba2bb51 |
comparison
equal
deleted
inserted
replaced
2:8cc283757c39 | 3:3ba8479c7aba |
---|---|
19 import cgi | 19 import cgi |
20 import urllib | 20 import urllib |
21 import logging | 21 import logging |
22 import math | 22 import math |
23 import documentViewer | 23 import documentViewer |
24 | 24 import urllib2 |
25 import urllib | |
25 import urlparse | 26 import urlparse |
26 from types import * | 27 from types import * |
27 | 28 |
28 def getTextFromNode(nodename): | 29 def getTextFromNode(nodename): |
29 "get the cdata content of a node" | 30 "get the cdata content of a node" |
67 queryType =pageinfo['queryType'] | 68 queryType =pageinfo['queryType'] |
68 viewMode= pageinfo['viewMode'] | 69 viewMode= pageinfo['viewMode'] |
69 tocMode = pageinfo['tocMode'] | 70 tocMode = pageinfo['tocMode'] |
70 tocPN = pageinfo['tocPN'] | 71 tocPN = pageinfo['tocPN'] |
71 selfurl = self.absolute_url() | 72 selfurl = self.absolute_url() |
72 page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) | 73 |
73 pagexml = page.replace('?document=%s'%str(docpath),'?url=%s'%url) | 74 page = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) |
75 #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) | |
76 data = page.read() | |
77 page.close() | |
78 | |
79 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) | |
74 pagedom = Parse(pagexml) | 80 pagedom = Parse(pagexml) |
75 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): | 81 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): |
76 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") | 82 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") |
77 if len(pagedivs)>0: | 83 if len(pagedivs)>0: |
78 pagenode=pagedivs[0] | 84 pagenode=pagedivs[0] |
130 return "no text here" | 136 return "no text here" |
131 | 137 |
132 def getNumPages(self,docinfo=None): | 138 def getNumPages(self,docinfo=None): |
133 """get list of pages from fulltext and put in docinfo""" | 139 """get list of pages from fulltext and put in docinfo""" |
134 xquery = '//pb' | 140 xquery = '//pb' |
135 text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) | 141 text = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) |
142 #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) | |
136 docinfo['numPages'] = text.count("<pb ") | 143 docinfo['numPages'] = text.count("<pb ") |
137 return docinfo | 144 return docinfo |
138 | 145 |
139 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None): | 146 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None): |
140 """returns single page from fulltext""" | 147 """returns single page from fulltext""" |
151 textmode = mode | 158 textmode = mode |
152 | 159 |
153 textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) | 160 textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) |
154 if highlightQuery is not None: | 161 if highlightQuery is not None: |
155 textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) | 162 textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) |
156 pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False) | 163 |
164 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam) | |
165 """pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)""" | |
166 | |
157 pagedom = Parse(pagexml) | 167 pagedom = Parse(pagexml) |
158 # plain text mode | 168 # plain text mode |
159 if mode == "text": | 169 if mode == "text": |
160 # first div contains text | 170 # first div contains text |
161 pagedivs = pagedom.xpath("/div") | 171 pagedivs = pagedom.xpath("/div") |
214 return serializeNode(pagenode) | 224 return serializeNode(pagenode) |
215 return "no text here" | 225 return "no text here" |
216 | 226 |
217 def getTranslate(self, query=None, language=None): | 227 def getTranslate(self, query=None, language=None): |
218 """translate into another languages""" | 228 """translate into another languages""" |
219 pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) | 229 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) |
220 return pagexml | 230 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) |
231 data = pagexml.read() | |
232 pagexml.close() | |
233 return data | |
221 | 234 |
222 def getLemma(self, lemma=None, language=None): | 235 def getLemma(self, lemma=None, language=None): |
223 """simular words lemma """ | 236 """simular words lemma """ |
224 pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) | 237 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) |
225 return pagexml | 238 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) |
239 data = pagexml.read() | |
240 pagexml.close() | |
241 return data | |
226 | 242 |
227 def getLemmaNew(self, query=None, language=None): | 243 def getLemmaNew(self, query=None, language=None): |
228 """simular words lemma """ | 244 """simular words lemma """ |
229 pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) | 245 |
230 return pagexml | 246 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) |
247 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) | |
248 data = pagexml.read() | |
249 pagexml.close() | |
250 return data | |
231 | 251 |
232 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): | 252 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): |
233 """number of""" | 253 """number of""" |
234 docpath = docinfo['textURLPath'] | 254 docpath = docinfo['textURLPath'] |
235 pagesize = pageinfo['queryPageSize'] | 255 pagesize = pageinfo['queryPageSize'] |
236 pn = pageinfo['searchPN'] | 256 pn = pageinfo['searchPN'] |
237 query =pageinfo['query'] | 257 query =pageinfo['query'] |
238 queryType =pageinfo['queryType'] | 258 queryType =pageinfo['queryType'] |
239 tocSearch = 0 | 259 tocSearch = 0 |
240 tocDiv = None | 260 tocDiv = None |
241 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) | 261 |
262 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn)) | |
263 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) | |
242 pagedom = Parse(pagexml) | 264 pagedom = Parse(pagexml) |
243 numdivs = pagedom.xpath("//div[@class='queryResultHits']") | 265 numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
244 tocSearch = int(getTextFromNode(numdivs[0])) | 266 tocSearch = int(getTextFromNode(numdivs[0])) |
245 tc=int((tocSearch/10)+1) | 267 tc=int((tocSearch/10)+1) |
246 logging.debug("documentViewer (gettoc) tc: %s"%(tc)) | 268 logging.debug("documentViewer (gettoc) tc: %s"%(tc)) |
264 else: | 286 else: |
265 queryType = mode | 287 queryType = mode |
266 # number of entries in toc | 288 # number of entries in toc |
267 tocSize = 0 | 289 tocSize = 0 |
268 tocDiv = None | 290 tocDiv = None |
269 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) | 291 |
292 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) | |
293 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) | |
270 # post-processing downloaded xml | 294 # post-processing downloaded xml |
271 pagedom = Parse(pagexml) | 295 pagedom = Parse(pagexml) |
272 # get number of entries | 296 # get number of entries |
273 numdivs = pagedom.xpath("//div[@class='queryResultHits']") | 297 numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
274 if len(numdivs) > 0: | 298 if len(numdivs) > 0: |
289 pn = pageinfo['tocPN'] | 313 pn = pageinfo['tocPN'] |
290 url = docinfo['url'] | 314 url = docinfo['url'] |
291 selfurl = self.absolute_url() | 315 selfurl = self.absolute_url() |
292 viewMode= pageinfo['viewMode'] | 316 viewMode= pageinfo['viewMode'] |
293 tocMode = pageinfo['tocMode'] | 317 tocMode = pageinfo['tocMode'] |
294 tocPN = pageinfo['tocPN'] | 318 tocPN = pageinfo['tocPN'] |
295 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn), outputUnicode=False) | 319 |
296 page = pagexml.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) | 320 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
321 data = pagexml.read() | |
322 pagexml.close() | |
323 | |
324 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) | |
297 text = page.replace('mode=image','mode=texttool') | 325 text = page.replace('mode=image','mode=texttool') |
298 return text | 326 return text |
299 | 327 |
300 | 328 |