Mercurial > hg > documentViewer
comparison MpdlXmlTextServer.py @ 419:69205c9d9404
*** empty log message ***
author | abukhman |
---|---|
date | Thu, 06 Jan 2011 16:44:18 +0100 |
parents | bed1ff5eaec1 |
children | a935129e169f |
comparison
equal
deleted
inserted
replaced
418:bed1ff5eaec1 | 419:69205c9d9404 |
---|---|
36 def getServerData(self, method, data=None): | 36 def getServerData(self, method, data=None): |
37 """returns result from text server for method+data""" | 37 """returns result from text server for method+data""" |
38 url = self.serverUrl+method | 38 url = self.serverUrl+method |
39 return documentViewer.getHttpData(url,data,timeout=self.timeout) | 39 return documentViewer.getHttpData(url,data,timeout=self.timeout) |
40 | 40 |
41 def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None, characterNormalization=None, optionToggle=None): | 41 def getSearch(self, pageinfo=None, docinfo=None, lemma=None): |
42 """get search list""" | 42 """get search list""" |
43 docpath = docinfo['textURLPath'] | 43 docpath = docinfo['textURLPath'] |
44 url = docinfo['url'] | 44 url = docinfo['url'] |
45 pagesize = pageinfo['queryPageSize'] | 45 pagesize = pageinfo['queryPageSize'] |
46 pn = pageinfo['searchPN'] | 46 pn = pageinfo.get('searchPN',1) |
47 sn = pageinfo['sn'] | 47 sn = pageinfo['sn'] |
48 highlightQuery = pageinfo['highlightQuery'] | 48 highlightQuery = pageinfo['highlightQuery'] |
49 query =pageinfo['query'] | 49 query =pageinfo['query'] |
50 queryType =pageinfo['queryType'] | 50 queryType =pageinfo['queryType'] |
51 viewMode= pageinfo['viewMode'] | 51 viewMode= pageinfo['viewMode'] |
54 optionToggle = pageinfo['optionToggle'] | 54 optionToggle = pageinfo['optionToggle'] |
55 tocPN = pageinfo['tocPN'] | 55 tocPN = pageinfo['tocPN'] |
56 selfurl = self.absolute_url() | 56 selfurl = self.absolute_url() |
57 | 57 |
58 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&optionToggle=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization,optionToggle ,urllib.quote(highlightQuery))) | 58 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&optionToggle=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization,optionToggle ,urllib.quote(highlightQuery))) |
59 #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) | 59 |
60 | |
61 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) | 60 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) |
62 pagedom = Parse(pagexml) | 61 pagedom = Parse(pagexml) |
62 | |
63 """ | |
64 pagedivs = pagedom.xpath("//div[@class='queryResultHits']") | |
65 if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")): | |
66 if len(pagedivs)>0: | |
67 docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0])) | |
68 s = getTextFromNode(pagedivs[0]) | |
69 s1 = int(s)/10+1 | |
70 try: | |
71 docinfo['queryResultHits'] = int(s1) | |
72 logging.debug("SEARCH ENTRIES: %s"%(s1)) | |
73 except: | |
74 docinfo['queryResultHits'] = 0 | |
75 """ | |
63 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): | 76 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): |
64 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") | 77 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") |
65 if len(pagedivs)>0: | 78 if len(pagedivs)>0: |
66 pagenode=pagedivs[0] | 79 pagenode=pagedivs[0] |
67 links=pagenode.xpath("//a") | 80 links=pagenode.xpath("//a") |
115 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") | 128 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") |
116 l.setAttributeNS(None, 'onClick', 'popupWin.focus();') | 129 l.setAttributeNS(None, 'onClick', 'popupWin.focus();') |
117 return serializeNode(pagenode) | 130 return serializeNode(pagenode) |
118 return "no text here" | 131 return "no text here" |
119 | 132 |
120 | 133 |
121 def getNumTextPages (self, docinfo=None): | |
122 """get list of pages from fulltext (texts without images) and put in docinfo""" | |
123 logging.debug("getNumTextPages") | |
124 docpath = docinfo['textURLPath'] | |
125 selfurl = self.absolute_url() | |
126 #viewMode= pageinfo['viewMode'] | |
127 if 'numPages' in docinfo: | |
128 # allredy there | |
129 return docinfo | |
130 | |
131 text = self.getServerData("page-fragment.xql","document=%s"%(docinfo['textURLPath'])) | |
132 dom =Parse(text) | |
133 pagedivs = dom.xpath("//div[@class='countPages']") | |
134 logging.debug ("pagedivs=%s"%(pagedivs)) | |
135 if len(pagedivs)>0: | |
136 docinfo['numPages']= int(getTextFromNode(pagedivs[0])) | |
137 return docinfo | |
138 | |
139 def getTocEntries (self, docinfo=None): | |
140 """ number of text entries""" | |
141 docinfo = self.getInfoFromPage(docinfo) | |
142 logging.debug("gettocentries: docinfo=%s"%docinfo) | |
143 return docinfo['tocEntries'] | |
144 | |
145 def getFigureEntries (self, docinfo=None): | |
146 """ number of figure entries""" | |
147 docinfo = self.getInfoFromPage(docinfo) | |
148 return docinfo['figureEntries'] | |
149 | |
150 def getGisPlaces(self, docinfo=None, pageinfo=None): | 134 def getGisPlaces(self, docinfo=None, pageinfo=None): |
151 """ Show all Gis Places of whole Page""" | 135 """ Show all Gis Places of whole Page""" |
152 xpath='//place' | 136 xpath='//place' |
153 docpath = docinfo.get('textURLPath',None) | 137 docpath = docinfo.get('textURLPath',None) |
154 if not docpath: | 138 if not docpath: |
188 href= hrefNode.nodeValue | 172 href= hrefNode.nodeValue |
189 hrefList.append(href) | 173 hrefList.append(href) |
190 myList = ",".join(hrefList) | 174 myList = ",".join(hrefList) |
191 logging.debug("getALLGisPlaces :%s"%(myList)) | 175 logging.debug("getALLGisPlaces :%s"%(myList)) |
192 return myList | 176 return myList |
193 | 177 |
194 def getOrigPages (self, docinfo=None, pageinfo=None): | |
195 """Show original page """ | |
196 docpath = docinfo.get('textURLPath',None) | |
197 if not docpath: | |
198 return None | |
199 selfurl = self.absolute_url() | |
200 pn =pageinfo['current'] | |
201 | |
202 viewMode= pageinfo['viewMode'] | |
203 text = self.getServerData("page-fragment.xql","document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'], 'text', pn)) | |
204 dom =Parse(text) | |
205 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") | |
206 if len(pagedivs)>0: | |
207 originalPage= getTextFromNode(pagedivs[0]) | |
208 #return docinfo['originalPage'] | |
209 return originalPage | |
210 | |
211 def getAllPlaces (self, docinfo=None): | |
212 """Show all Places if no places than 0""" | |
213 docinfo = self.getInfoFromPage(docinfo) | |
214 return docinfo['allPlaces'] | |
215 | |
216 def getInfoFromPage(self, docinfo=None): | |
217 """ extract diverse info from page-fragment""" | |
218 docpath = docinfo['textURLPath'] | |
219 if 'allPlaces' in docinfo: | |
220 # allredy there | |
221 return docinfo | |
222 | |
223 if (docpath is not None): | |
224 text = self.getServerData("page-fragment.xql","document=%s"%(docinfo['textURLPath'])) | |
225 dom = Parse(text) | |
226 # figureEntries | |
227 pagedivs = dom.xpath("//div[@class='countFigureEntries']") | |
228 s = getTextFromNode(pagedivs[0]) | |
229 try: | |
230 docinfo['figureEntries'] = int(s) | |
231 except: | |
232 docinfo['figureEntries'] = 0 | |
233 # tocEntries | |
234 pagedivs = dom.xpath("//div[@class='countTocEntries']") | |
235 s = getTextFromNode(pagedivs[0]) | |
236 try: | |
237 docinfo['tocEntries'] = int(s) | |
238 except: | |
239 docinfo['tocEntries'] = 0 | |
240 # allPlaces | |
241 pagedivs = dom.xpath("//div[@class='countPlaces']") | |
242 s = getTextFromNode(pagedivs[0]) | |
243 try: | |
244 docinfo['allPlaces'] = int(s) | |
245 except: | |
246 docinfo['allPlaces'] = 0 | |
247 | |
248 else: | |
249 # no full text -- init to 0 | |
250 docinfo['figureEntries'] = 0 | |
251 docinfo['tocEntries'] = 0 | |
252 docinfo['allPlaces'] = 0 | |
253 | |
254 return docinfo | |
255 | |
256 | 178 |
257 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, viewMode=None, tocMode=None, tocPN=None, characterNormalization="reg", highlightQuery=None, sn=None, optionToggle=None): | 179 def getTextPage(self, docinfo=None, pageinfo=None): |
258 """returns single page from fulltext""" | 180 """returns single page from fulltext""" |
259 docpath = docinfo['textURLPath'] | 181 docpath = docinfo['textURLPath'] |
260 path = docinfo['textURLPath'] | 182 path = docinfo['textURLPath'] |
261 url = docinfo['url'] | 183 url = docinfo.get('url',None) |
262 name = docinfo['name'] | 184 name = docinfo.get('name',None) |
263 viewMode= pageinfo['viewMode'] | 185 pn =pageinfo['current'] |
186 #viewMode= pageinfo['viewMode'] | |
264 sn = pageinfo['sn'] | 187 sn = pageinfo['sn'] |
265 highlightQuery = pageinfo['highlightQuery'] | 188 highlightQuery = pageinfo['highlightQuery'] |
266 | 189 mode = pageinfo ['viewMode'] |
267 tocMode = pageinfo['tocMode'] | 190 tocMode = pageinfo['tocMode'] |
268 characterNormalization=pageinfo['characterNormalization'] | 191 characterNormalization=pageinfo['characterNormalization'] |
269 tocPN = pageinfo['tocPN'] | 192 tocPN = pageinfo['tocPN'] |
270 selfurl = self.absolute_url() | 193 selfurl = self.absolute_url() |
271 if mode == "text_dict": | 194 if mode == "text_dict": |
272 textmode = "textPollux" | 195 textmode = "textPollux" |
273 else: | 196 else: |
274 textmode = mode | 197 textmode = mode |
275 #logging.debug("documentViewer highlightQuery: %s"%(highlightQuery)) | 198 |
276 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) | 199 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) |
277 if highlightQuery is not None: | 200 if highlightQuery is not None: |
278 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) | 201 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) |
279 #logging.debug("documentViewer highlightQuery: %s"%(highlightQuery)) | 202 |
280 pagexml = self.getServerData("page-fragment.xql",textParam) | 203 pagexml = self.getServerData("page-fragment.xql",textParam) |
281 logging.debug("documentViewer highlightQuery: %s"%(highlightQuery)) | 204 dom = Parse(pagexml) |
282 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False) | 205 |
283 | 206 #original Pages |
284 pagedom = Parse(pagexml) | 207 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") |
208 if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): | |
209 if len(pagedivs)>0: | |
210 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) | |
211 logging.debug("ORIGINAL PAGES: %s"%(docinfo['pageNumberOrig'])) | |
212 | |
213 #figureEntries | |
214 pagedivs = dom.xpath("//div[@class='countFigureEntries']") | |
215 if pagedivs == dom.xpath("//div[@class='countFigureEntries']"): | |
216 if len(pagedivs)>0: | |
217 docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0]) | |
218 s = getTextFromNode(pagedivs[0]) | |
219 if s=='0': | |
220 try: | |
221 docinfo['countFigureEntries'] = int(s) | |
222 logging.debug("FIGURE ENTRIES: %s"%(s)) | |
223 except: | |
224 docinfo['countFigureEntries'] = 0 | |
225 else: | |
226 s1 = int(s)/30+1 | |
227 try: | |
228 docinfo['countFigureEntries'] = int(s1) | |
229 logging.debug("FIGURE ENTRIES: %s"%(s1)) | |
230 except: | |
231 docinfo['countFigureEntries'] = 0 | |
232 | |
233 #allPlaces | |
234 pagedivs = dom.xpath("//div[@class='countPlaces']") | |
235 if pagedivs == dom.xpath("//div[@class='countPlaces']"): | |
236 if len(pagedivs)>0: | |
237 docinfo['countPlaces']= getTextFromNode(pagedivs[0]) | |
238 s = getTextFromNode(pagedivs[0]) | |
239 try: | |
240 docinfo['countPlaces'] = int(s) | |
241 logging.debug("PLACES HERE: %s"%(s)) | |
242 except: | |
243 docinfo['countPlaces'] = 0 | |
244 | |
245 #tocEntries | |
246 pagedivs = dom.xpath("//div[@class='countTocEntries']") | |
247 if pagedivs == dom.xpath("//div[@class='countTocEntries']"): | |
248 if len(pagedivs)>0: | |
249 docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0])) | |
250 s = getTextFromNode(pagedivs[0]) | |
251 if s=='0': | |
252 try: | |
253 docinfo['countTocEntries'] = int(s) | |
254 logging.debug("TEXT ENTRIES: %s"%(s)) | |
255 except: | |
256 docinfo['countTocEntries'] = 0 | |
257 else: | |
258 s1 = int(s)/30+1 | |
259 try: | |
260 docinfo['countTocEntries'] = int(s1) | |
261 logging.debug("TEXT ENTRIES: %s"%(s1)) | |
262 except: | |
263 docinfo['countTocEntries'] = 0 | |
264 | |
265 #numTextPages | |
266 pagedivs = dom.xpath("//div[@class='countPages']") | |
267 if pagedivs == dom.xpath("//div[@class='countPages']"): | |
268 if len(pagedivs)>0: | |
269 docinfo['numPages'] = getTextFromNode(pagedivs[0]) | |
270 s = getTextFromNode(pagedivs[0]) | |
271 | |
272 try: | |
273 docinfo['numPages'] = int(s) | |
274 logging.debug("PAGE NUMBER: %s"%(s)) | |
275 | |
276 np = docinfo['numPages'] | |
277 pageinfo['end'] = min(pageinfo['end'], np) | |
278 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) | |
279 if np % pageinfo['groupsize'] > 0: | |
280 pageinfo['numgroups'] += 1 | |
281 except: | |
282 docinfo['numPages'] = 0 | |
283 | |
284 else: | |
285 #no full text -- init to 0 | |
286 docinfo['pageNumberOrig'] = 0 | |
287 docinfo['countFigureEntries'] = 0 | |
288 docinfo['countPlaces'] = 0 | |
289 docinfo['countTocEntries'] = 0 | |
290 docinfo['numPages'] = 0 | |
291 #return docinfo | |
292 | |
285 # plain text mode | 293 # plain text mode |
286 if mode == "text": | 294 if mode == "text": |
287 # first div contains text | 295 # first div contains text |
288 pagedivs = pagedom.xpath("/div") | 296 pagedivs = dom.xpath("/div") |
289 if len(pagedivs) > 0: | 297 if len(pagedivs) > 0: |
290 pagenode = pagedivs[0] | 298 pagenode = pagedivs[0] |
291 links = pagenode.xpath("//a") | 299 links = pagenode.xpath("//a") |
292 for l in links: | 300 for l in links: |
293 hrefNode = l.getAttributeNodeNS(None, u"href") | 301 hrefNode = l.getAttributeNodeNS(None, u"href") |
296 if href.startswith('#note-'): | 304 if href.startswith('#note-'): |
297 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) | 305 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) |
298 return serializeNode(pagenode) | 306 return serializeNode(pagenode) |
299 if mode == "xml": | 307 if mode == "xml": |
300 # first div contains text | 308 # first div contains text |
301 pagedivs = pagedom.xpath("/div") | 309 pagedivs = dom.xpath("/div") |
302 if len(pagedivs) > 0: | 310 if len(pagedivs) > 0: |
303 pagenode = pagedivs[0] | 311 pagenode = pagedivs[0] |
304 return serializeNode(pagenode) | 312 return serializeNode(pagenode) |
305 if mode == "gis": | 313 if mode == "gis": |
306 # first div contains text | 314 # first div contains text |
307 pagedivs = pagedom.xpath("/div") | 315 pagedivs = dom.xpath("/div") |
308 if len(pagedivs) > 0: | 316 if len(pagedivs) > 0: |
309 pagenode = pagedivs[0] | 317 pagenode = pagedivs[0] |
310 links =pagenode.xpath("//a") | 318 links =pagenode.xpath("//a") |
311 for l in links: | 319 for l in links: |
312 hrefNode =l.getAttributeNodeNS(None, u"href") | 320 hrefNode =l.getAttributeNodeNS(None, u"href") |
317 l.setAttributeNS(None, 'target', '_blank') | 325 l.setAttributeNS(None, 'target', '_blank') |
318 return serializeNode(pagenode) | 326 return serializeNode(pagenode) |
319 | 327 |
320 if mode == "pureXml": | 328 if mode == "pureXml": |
321 # first div contains text | 329 # first div contains text |
322 pagedivs = pagedom.xpath("/div") | 330 pagedivs = dom.xpath("/div") |
323 if len(pagedivs) > 0: | 331 if len(pagedivs) > 0: |
324 pagenode = pagedivs[0] | 332 pagenode = pagedivs[0] |
325 return serializeNode(pagenode) | 333 return serializeNode(pagenode) |
326 # text-with-links mode | 334 # text-with-links mode |
327 if mode == "text_dict": | 335 if mode == "text_dict": |
328 # first div contains text | 336 # first div contains text |
329 pagedivs = pagedom.xpath("/div") | 337 pagedivs = dom.xpath("/div") |
330 if len(pagedivs) > 0: | 338 if len(pagedivs) > 0: |
331 pagenode = pagedivs[0] | 339 pagenode = pagedivs[0] |
332 # check all a-tags | 340 # check all a-tags |
333 links = pagenode.xpath("//a") | 341 links = pagenode.xpath("//a") |
334 for l in links: | 342 for l in links: |
373 data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+urllib.quote(query)) | 381 data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+urllib.quote(query)) |
374 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) | 382 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) |
375 return data | 383 return data |
376 | 384 |
377 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1, optionToggle=None): | 385 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1, optionToggle=None): |
378 """number of""" | 386 #number of |
379 docpath = docinfo['textURLPath'] | 387 docpath = docinfo['textURLPath'] |
380 pagesize = pageinfo['queryPageSize'] | 388 pagesize = pageinfo['queryPageSize'] |
381 pn = pageinfo['searchPN'] | 389 pn = pageinfo['searchPN'] |
382 query =pageinfo['query'] | 390 query =pageinfo['query'] |
383 queryType =pageinfo['queryType'] | 391 queryType =pageinfo['queryType'] |
384 tocSearch = 0 | 392 tocSearch = 0 |
385 tocDiv = None | 393 tocDiv = None |
386 | 394 |
387 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) | 395 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) |
388 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) | |
389 pagedom = Parse(pagexml) | 396 pagedom = Parse(pagexml) |
390 numdivs = pagedom.xpath("//div[@class='queryResultHits']") | 397 numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
391 tocSearch = int(getTextFromNode(numdivs[0])) | 398 tocSearch = int(getTextFromNode(numdivs[0])) |
392 logging.debug("documentViewer (gettoc) tocSearch: %s"%(tocSearch)) | 399 logging.debug("documentViewer (gettoc) tocSearch: %s"%(tocSearch)) |
393 tc=int((tocSearch/10)+1) | 400 tc=int((tocSearch/10)+1) |
394 logging.debug("documentViewer (gettoc) tc: %s"%(tc)) | 401 logging.debug("documentViewer (gettoc) tc: %s"%(tc)) |
395 return tc | 402 return tc |
396 | 403 |
397 def getQueryResultHits(self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1, optionsClose=None): | |
398 | |
399 """number of hits in Search mode""" | |
400 docpath = docinfo['textURLPath'] | |
401 pagesize = pageinfo['queryPageSize'] | |
402 pn = pageinfo['searchPN'] | |
403 query =pageinfo['query'] | |
404 queryType =pageinfo['queryType'] | |
405 tocSearch = 0 | |
406 tocDiv = None | |
407 | |
408 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) | |
409 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) | |
410 pagedom = Parse(pagexml) | |
411 numdivs = pagedom.xpath("//div[@class='queryResultHits']") | |
412 tocSearch = int(getTextFromNode(numdivs[0])) | |
413 tc=int((tocSearch/10)+1) | |
414 return tc | |
415 | |
416 def getQueryResultHitsText(self, docinfo=None, pageinfo=None): | |
417 """number of hits in Text of Contents mode""" | |
418 selfurl = self.absolute_url() | |
419 docpath = docinfo['textURLPath'] | |
420 viewMode= pageinfo['viewMode'] | |
421 text = self.getServerData("page-fragment.xql","document=%s&mode=%s"%(docinfo['textURLPath'], 'text')) | |
422 dom =Parse(text) | |
423 pagedivs = dom.xpath("//div[@class='countTocEntries']") | |
424 logging.debug ("pagedivs=%s"%(pagedivs)) | |
425 if len(pagedivs)>0: | |
426 originalPage= (getTextFromNode(pagedivs[0])) | |
427 tc = int (originalPage) | |
428 tc1 =tc/30+1 | |
429 return tc1 | |
430 | |
431 def getQueryResultHitsFigures(self, docinfo=None, pageinfo=None): | |
432 """number of hits in Text of Figures mode""" | |
433 | |
434 selfurl = self.absolute_url() | |
435 docpath = docinfo['textURLPath'] | |
436 viewMode= pageinfo['viewMode'] | |
437 text = self.getServerData("page-fragment.xql","document=%s&mode=%s"%(docinfo['textURLPath'], 'text')) | |
438 dom =Parse(text) | |
439 pagedivs = dom.xpath("//div[@class='countFigureEntries']") | |
440 logging.debug ("pagedivs=%s"%(pagedivs)) | |
441 if len(pagedivs)>0: | |
442 originalPage= (getTextFromNode(pagedivs[0])) | |
443 tc = int (originalPage) | |
444 tc1 =tc/30+1 | |
445 return tc1 | |
446 | |
447 | |
448 def getToc(self, mode="text", docinfo=None): | 404 def getToc(self, mode="text", docinfo=None): |
449 """loads table of contents and stores in docinfo""" | 405 """loads table of contents and stores in docinfo""" |
450 logging.debug("documentViewer (gettoc) mode: %s"%(mode)) | 406 logging.debug("documentViewer (gettoc) mode: %s"%(mode)) |
451 if mode == "none": | 407 if mode == "none": |
452 return docinfo | 408 return docinfo |
465 # number of entries in toc | 421 # number of entries in toc |
466 tocSize = 0 | 422 tocSize = 0 |
467 tocDiv = None | 423 tocDiv = None |
468 | 424 |
469 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) | 425 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
470 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) | 426 |
471 # post-processing downloaded xml | 427 # post-processing downloaded xml |
472 pagedom = Parse(pagexml) | 428 pagedom = Parse(pagexml) |
473 # get number of entries | 429 # get number of entries |
474 numdivs = pagedom.xpath("//div[@class='queryResultHits']") | 430 numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
475 if len(numdivs) > 0: | 431 if len(numdivs) > 0: |
497 tocPN = pageinfo['tocPN'] | 453 tocPN = pageinfo['tocPN'] |
498 | 454 |
499 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm&optionToggle=1"%(docpath,queryType, pagesize, pn)) | 455 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm&optionToggle=1"%(docpath,queryType, pagesize, pn)) |
500 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&optionToggle=1'%(selfurl,url, viewMode, tocMode, tocPN)) | 456 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&optionToggle=1'%(selfurl,url, viewMode, tocMode, tocPN)) |
501 text = page.replace('mode=image','mode=texttool') | 457 text = page.replace('mode=image','mode=texttool') |
502 logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) | 458 #logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) |
503 #logging.debug("documentViewer (characterNormalization) text: %s"%(text)) | 459 |
504 return text | 460 return text |
505 | 461 |
506 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): | 462 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |
507 """change settings""" | 463 """change settings""" |
508 self.title=title | 464 self.title=title |