comparison extraFunction.py @ 3:3ba8479c7aba modularisierung

new function
author abukhman
date Wed, 16 Jun 2010 11:16:02 +0200
parents 8cc283757c39
children e9085ba2bb51
comparison
equal deleted inserted replaced
2:8cc283757c39 3:3ba8479c7aba
19 import cgi 19 import cgi
20 import urllib 20 import urllib
21 import logging 21 import logging
22 import math 22 import math
23 import documentViewer 23 import documentViewer
24 24 import urllib2
25 import urllib
25 import urlparse 26 import urlparse
26 from types import * 27 from types import *
27 28
28 def getTextFromNode(nodename): 29 def getTextFromNode(nodename):
29 "get the cdata content of a node" 30 "get the cdata content of a node"
67 queryType =pageinfo['queryType'] 68 queryType =pageinfo['queryType']
68 viewMode= pageinfo['viewMode'] 69 viewMode= pageinfo['viewMode']
69 tocMode = pageinfo['tocMode'] 70 tocMode = pageinfo['tocMode']
70 tocPN = pageinfo['tocPN'] 71 tocPN = pageinfo['tocPN']
71 selfurl = self.absolute_url() 72 selfurl = self.absolute_url()
72 page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) 73
73 pagexml = page.replace('?document=%s'%str(docpath),'?url=%s'%url) 74 page = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery))
75 #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False)
76 data = page.read()
77 page.close()
78
79 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
74 pagedom = Parse(pagexml) 80 pagedom = Parse(pagexml)
75 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): 81 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
76 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") 82 pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
77 if len(pagedivs)>0: 83 if len(pagedivs)>0:
78 pagenode=pagedivs[0] 84 pagenode=pagedivs[0]
130 return "no text here" 136 return "no text here"
131 137
132 def getNumPages(self,docinfo=None): 138 def getNumPages(self,docinfo=None):
133 """get list of pages from fulltext and put in docinfo""" 139 """get list of pages from fulltext and put in docinfo"""
134 xquery = '//pb' 140 xquery = '//pb'
135 text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) 141 text = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
142 #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
136 docinfo['numPages'] = text.count("<pb ") 143 docinfo['numPages'] = text.count("<pb ")
137 return docinfo 144 return docinfo
138 145
139 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None): 146 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None):
140 """returns single page from fulltext""" 147 """returns single page from fulltext"""
151 textmode = mode 158 textmode = mode
152 159
153 textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) 160 textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn)
154 if highlightQuery is not None: 161 if highlightQuery is not None:
155 textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) 162 textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn)
156 pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False) 163
164 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam)
165 """pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)"""
166
157 pagedom = Parse(pagexml) 167 pagedom = Parse(pagexml)
158 # plain text mode 168 # plain text mode
159 if mode == "text": 169 if mode == "text":
160 # first div contains text 170 # first div contains text
161 pagedivs = pagedom.xpath("/div") 171 pagedivs = pagedom.xpath("/div")
214 return serializeNode(pagenode) 224 return serializeNode(pagenode)
215 return "no text here" 225 return "no text here"
216 226
217 def getTranslate(self, query=None, language=None): 227 def getTranslate(self, query=None, language=None):
218 """translate into another languages""" 228 """translate into another languages"""
219 pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) 229 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
220 return pagexml 230 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
231 data = pagexml.read()
232 pagexml.close()
233 return data
221 234
222 def getLemma(self, lemma=None, language=None): 235 def getLemma(self, lemma=None, language=None):
223 """simular words lemma """ 236 """simular words lemma """
224 pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) 237 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
225 return pagexml 238 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
239 data = pagexml.read()
240 pagexml.close()
241 return data
226 242
227 def getLemmaNew(self, query=None, language=None): 243 def getLemmaNew(self, query=None, language=None):
228 """simular words lemma """ 244 """simular words lemma """
229 pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) 245
230 return pagexml 246 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
247 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
248 data = pagexml.read()
249 pagexml.close()
250 return data
231 251
232 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): 252 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
233 """number of""" 253 """number of"""
234 docpath = docinfo['textURLPath'] 254 docpath = docinfo['textURLPath']
235 pagesize = pageinfo['queryPageSize'] 255 pagesize = pageinfo['queryPageSize']
236 pn = pageinfo['searchPN'] 256 pn = pageinfo['searchPN']
237 query =pageinfo['query'] 257 query =pageinfo['query']
238 queryType =pageinfo['queryType'] 258 queryType =pageinfo['queryType']
239 tocSearch = 0 259 tocSearch = 0
240 tocDiv = None 260 tocDiv = None
241 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) 261
262 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn))
263 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)
242 pagedom = Parse(pagexml) 264 pagedom = Parse(pagexml)
243 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 265 numdivs = pagedom.xpath("//div[@class='queryResultHits']")
244 tocSearch = int(getTextFromNode(numdivs[0])) 266 tocSearch = int(getTextFromNode(numdivs[0]))
245 tc=int((tocSearch/10)+1) 267 tc=int((tocSearch/10)+1)
246 logging.debug("documentViewer (gettoc) tc: %s"%(tc)) 268 logging.debug("documentViewer (gettoc) tc: %s"%(tc))
264 else: 286 else:
265 queryType = mode 287 queryType = mode
266 # number of entries in toc 288 # number of entries in toc
267 tocSize = 0 289 tocSize = 0
268 tocDiv = None 290 tocDiv = None
269 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) 291
292 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
293 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
270 # post-processing downloaded xml 294 # post-processing downloaded xml
271 pagedom = Parse(pagexml) 295 pagedom = Parse(pagexml)
272 # get number of entries 296 # get number of entries
273 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 297 numdivs = pagedom.xpath("//div[@class='queryResultHits']")
274 if len(numdivs) > 0: 298 if len(numdivs) > 0:
289 pn = pageinfo['tocPN'] 313 pn = pageinfo['tocPN']
290 url = docinfo['url'] 314 url = docinfo['url']
291 selfurl = self.absolute_url() 315 selfurl = self.absolute_url()
292 viewMode= pageinfo['viewMode'] 316 viewMode= pageinfo['viewMode']
293 tocMode = pageinfo['tocMode'] 317 tocMode = pageinfo['tocMode']
294 tocPN = pageinfo['tocPN'] 318 tocPN = pageinfo['tocPN']
295 pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn), outputUnicode=False) 319
296 page = pagexml.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) 320 pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
321 data = pagexml.read()
322 pagexml.close()
323
324 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
297 text = page.replace('mode=image','mode=texttool') 325 text = page.replace('mode=image','mode=texttool')
298 return text 326 return text
299 327
300 328