comparison MpdlXmlTextServer.py @ 419:69205c9d9404

*** empty log message ***
author abukhman
date Thu, 06 Jan 2011 16:44:18 +0100
parents bed1ff5eaec1
children a935129e169f
comparison
equal deleted inserted replaced
418:bed1ff5eaec1 419:69205c9d9404
36 def getServerData(self, method, data=None): 36 def getServerData(self, method, data=None):
37 """returns result from text server for method+data""" 37 """returns result from text server for method+data"""
38 url = self.serverUrl+method 38 url = self.serverUrl+method
39 return documentViewer.getHttpData(url,data,timeout=self.timeout) 39 return documentViewer.getHttpData(url,data,timeout=self.timeout)
40 40
41 def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None, characterNormalization=None, optionToggle=None): 41 def getSearch(self, pageinfo=None, docinfo=None, lemma=None):
42 """get search list""" 42 """get search list"""
43 docpath = docinfo['textURLPath'] 43 docpath = docinfo['textURLPath']
44 url = docinfo['url'] 44 url = docinfo['url']
45 pagesize = pageinfo['queryPageSize'] 45 pagesize = pageinfo['queryPageSize']
46 pn = pageinfo['searchPN'] 46 pn = pageinfo.get('searchPN',1)
47 sn = pageinfo['sn'] 47 sn = pageinfo['sn']
48 highlightQuery = pageinfo['highlightQuery'] 48 highlightQuery = pageinfo['highlightQuery']
49 query =pageinfo['query'] 49 query =pageinfo['query']
50 queryType =pageinfo['queryType'] 50 queryType =pageinfo['queryType']
51 viewMode= pageinfo['viewMode'] 51 viewMode= pageinfo['viewMode']
54 optionToggle = pageinfo['optionToggle'] 54 optionToggle = pageinfo['optionToggle']
55 tocPN = pageinfo['tocPN'] 55 tocPN = pageinfo['tocPN']
56 selfurl = self.absolute_url() 56 selfurl = self.absolute_url()
57 57
58 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&optionToggle=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization,optionToggle ,urllib.quote(highlightQuery))) 58 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&optionToggle=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization,optionToggle ,urllib.quote(highlightQuery)))
59 #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) 59
60
61 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) 60 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
62 pagedom = Parse(pagexml) 61 pagedom = Parse(pagexml)
62
63 """
64 pagedivs = pagedom.xpath("//div[@class='queryResultHits']")
65 if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")):
66 if len(pagedivs)>0:
67 docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0]))
68 s = getTextFromNode(pagedivs[0])
69 s1 = int(s)/10+1
70 try:
71 docinfo['queryResultHits'] = int(s1)
72 logging.debug("SEARCH ENTRIES: %s"%(s1))
73 except:
74 docinfo['queryResultHits'] = 0
75 """
63 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): 76 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
64 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") 77 pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
65 if len(pagedivs)>0: 78 if len(pagedivs)>0:
66 pagenode=pagedivs[0] 79 pagenode=pagedivs[0]
67 links=pagenode.xpath("//a") 80 links=pagenode.xpath("//a")
115 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 128 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
116 l.setAttributeNS(None, 'onClick', 'popupWin.focus();') 129 l.setAttributeNS(None, 'onClick', 'popupWin.focus();')
117 return serializeNode(pagenode) 130 return serializeNode(pagenode)
118 return "no text here" 131 return "no text here"
119 132
120 133
121 def getNumTextPages (self, docinfo=None):
122 """get list of pages from fulltext (texts without images) and put in docinfo"""
123 logging.debug("getNumTextPages")
124 docpath = docinfo['textURLPath']
125 selfurl = self.absolute_url()
126 #viewMode= pageinfo['viewMode']
127 if 'numPages' in docinfo:
128 # allredy there
129 return docinfo
130
131 text = self.getServerData("page-fragment.xql","document=%s"%(docinfo['textURLPath']))
132 dom =Parse(text)
133 pagedivs = dom.xpath("//div[@class='countPages']")
134 logging.debug ("pagedivs=%s"%(pagedivs))
135 if len(pagedivs)>0:
136 docinfo['numPages']= int(getTextFromNode(pagedivs[0]))
137 return docinfo
138
139 def getTocEntries (self, docinfo=None):
140 """ number of text entries"""
141 docinfo = self.getInfoFromPage(docinfo)
142 logging.debug("gettocentries: docinfo=%s"%docinfo)
143 return docinfo['tocEntries']
144
145 def getFigureEntries (self, docinfo=None):
146 """ number of figure entries"""
147 docinfo = self.getInfoFromPage(docinfo)
148 return docinfo['figureEntries']
149
150 def getGisPlaces(self, docinfo=None, pageinfo=None): 134 def getGisPlaces(self, docinfo=None, pageinfo=None):
151 """ Show all Gis Places of whole Page""" 135 """ Show all Gis Places of whole Page"""
152 xpath='//place' 136 xpath='//place'
153 docpath = docinfo.get('textURLPath',None) 137 docpath = docinfo.get('textURLPath',None)
154 if not docpath: 138 if not docpath:
188 href= hrefNode.nodeValue 172 href= hrefNode.nodeValue
189 hrefList.append(href) 173 hrefList.append(href)
190 myList = ",".join(hrefList) 174 myList = ",".join(hrefList)
191 logging.debug("getALLGisPlaces :%s"%(myList)) 175 logging.debug("getALLGisPlaces :%s"%(myList))
192 return myList 176 return myList
193 177
194 def getOrigPages (self, docinfo=None, pageinfo=None):
195 """Show original page """
196 docpath = docinfo.get('textURLPath',None)
197 if not docpath:
198 return None
199 selfurl = self.absolute_url()
200 pn =pageinfo['current']
201
202 viewMode= pageinfo['viewMode']
203 text = self.getServerData("page-fragment.xql","document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'], 'text', pn))
204 dom =Parse(text)
205 pagedivs = dom.xpath("//div[@class='pageNumberOrig']")
206 if len(pagedivs)>0:
207 originalPage= getTextFromNode(pagedivs[0])
208 #return docinfo['originalPage']
209 return originalPage
210
211 def getAllPlaces (self, docinfo=None):
212 """Show all Places if no places than 0"""
213 docinfo = self.getInfoFromPage(docinfo)
214 return docinfo['allPlaces']
215
216 def getInfoFromPage(self, docinfo=None):
217 """ extract diverse info from page-fragment"""
218 docpath = docinfo['textURLPath']
219 if 'allPlaces' in docinfo:
220 # allredy there
221 return docinfo
222
223 if (docpath is not None):
224 text = self.getServerData("page-fragment.xql","document=%s"%(docinfo['textURLPath']))
225 dom = Parse(text)
226 # figureEntries
227 pagedivs = dom.xpath("//div[@class='countFigureEntries']")
228 s = getTextFromNode(pagedivs[0])
229 try:
230 docinfo['figureEntries'] = int(s)
231 except:
232 docinfo['figureEntries'] = 0
233 # tocEntries
234 pagedivs = dom.xpath("//div[@class='countTocEntries']")
235 s = getTextFromNode(pagedivs[0])
236 try:
237 docinfo['tocEntries'] = int(s)
238 except:
239 docinfo['tocEntries'] = 0
240 # allPlaces
241 pagedivs = dom.xpath("//div[@class='countPlaces']")
242 s = getTextFromNode(pagedivs[0])
243 try:
244 docinfo['allPlaces'] = int(s)
245 except:
246 docinfo['allPlaces'] = 0
247
248 else:
249 # no full text -- init to 0
250 docinfo['figureEntries'] = 0
251 docinfo['tocEntries'] = 0
252 docinfo['allPlaces'] = 0
253
254 return docinfo
255
256 178
257 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, viewMode=None, tocMode=None, tocPN=None, characterNormalization="reg", highlightQuery=None, sn=None, optionToggle=None): 179 def getTextPage(self, docinfo=None, pageinfo=None):
258 """returns single page from fulltext""" 180 """returns single page from fulltext"""
259 docpath = docinfo['textURLPath'] 181 docpath = docinfo['textURLPath']
260 path = docinfo['textURLPath'] 182 path = docinfo['textURLPath']
261 url = docinfo['url'] 183 url = docinfo.get('url',None)
262 name = docinfo['name'] 184 name = docinfo.get('name',None)
263 viewMode= pageinfo['viewMode'] 185 pn =pageinfo['current']
186 #viewMode= pageinfo['viewMode']
264 sn = pageinfo['sn'] 187 sn = pageinfo['sn']
265 highlightQuery = pageinfo['highlightQuery'] 188 highlightQuery = pageinfo['highlightQuery']
266 189 mode = pageinfo ['viewMode']
267 tocMode = pageinfo['tocMode'] 190 tocMode = pageinfo['tocMode']
268 characterNormalization=pageinfo['characterNormalization'] 191 characterNormalization=pageinfo['characterNormalization']
269 tocPN = pageinfo['tocPN'] 192 tocPN = pageinfo['tocPN']
270 selfurl = self.absolute_url() 193 selfurl = self.absolute_url()
271 if mode == "text_dict": 194 if mode == "text_dict":
272 textmode = "textPollux" 195 textmode = "textPollux"
273 else: 196 else:
274 textmode = mode 197 textmode = mode
275 #logging.debug("documentViewer highlightQuery: %s"%(highlightQuery)) 198
276 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) 199 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization)
277 if highlightQuery is not None: 200 if highlightQuery is not None:
278 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) 201 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn)
279 #logging.debug("documentViewer highlightQuery: %s"%(highlightQuery)) 202
280 pagexml = self.getServerData("page-fragment.xql",textParam) 203 pagexml = self.getServerData("page-fragment.xql",textParam)
281 logging.debug("documentViewer highlightQuery: %s"%(highlightQuery)) 204 dom = Parse(pagexml)
282 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False) 205
283 206 #original Pages
284 pagedom = Parse(pagexml) 207 pagedivs = dom.xpath("//div[@class='pageNumberOrig']")
208 if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"):
209 if len(pagedivs)>0:
210 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0])
211 logging.debug("ORIGINAL PAGES: %s"%(docinfo['pageNumberOrig']))
212
213 #figureEntries
214 pagedivs = dom.xpath("//div[@class='countFigureEntries']")
215 if pagedivs == dom.xpath("//div[@class='countFigureEntries']"):
216 if len(pagedivs)>0:
217 docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0])
218 s = getTextFromNode(pagedivs[0])
219 if s=='0':
220 try:
221 docinfo['countFigureEntries'] = int(s)
222 logging.debug("FIGURE ENTRIES: %s"%(s))
223 except:
224 docinfo['countFigureEntries'] = 0
225 else:
226 s1 = int(s)/30+1
227 try:
228 docinfo['countFigureEntries'] = int(s1)
229 logging.debug("FIGURE ENTRIES: %s"%(s1))
230 except:
231 docinfo['countFigureEntries'] = 0
232
233 #allPlaces
234 pagedivs = dom.xpath("//div[@class='countPlaces']")
235 if pagedivs == dom.xpath("//div[@class='countPlaces']"):
236 if len(pagedivs)>0:
237 docinfo['countPlaces']= getTextFromNode(pagedivs[0])
238 s = getTextFromNode(pagedivs[0])
239 try:
240 docinfo['countPlaces'] = int(s)
241 logging.debug("PLACES HERE: %s"%(s))
242 except:
243 docinfo['countPlaces'] = 0
244
245 #tocEntries
246 pagedivs = dom.xpath("//div[@class='countTocEntries']")
247 if pagedivs == dom.xpath("//div[@class='countTocEntries']"):
248 if len(pagedivs)>0:
249 docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0]))
250 s = getTextFromNode(pagedivs[0])
251 if s=='0':
252 try:
253 docinfo['countTocEntries'] = int(s)
254 logging.debug("TEXT ENTRIES: %s"%(s))
255 except:
256 docinfo['countTocEntries'] = 0
257 else:
258 s1 = int(s)/30+1
259 try:
260 docinfo['countTocEntries'] = int(s1)
261 logging.debug("TEXT ENTRIES: %s"%(s1))
262 except:
263 docinfo['countTocEntries'] = 0
264
265 #numTextPages
266 pagedivs = dom.xpath("//div[@class='countPages']")
267 if pagedivs == dom.xpath("//div[@class='countPages']"):
268 if len(pagedivs)>0:
269 docinfo['numPages'] = getTextFromNode(pagedivs[0])
270 s = getTextFromNode(pagedivs[0])
271
272 try:
273 docinfo['numPages'] = int(s)
274 logging.debug("PAGE NUMBER: %s"%(s))
275
276 np = docinfo['numPages']
277 pageinfo['end'] = min(pageinfo['end'], np)
278 pageinfo['numgroups'] = int(np / pageinfo['groupsize'])
279 if np % pageinfo['groupsize'] > 0:
280 pageinfo['numgroups'] += 1
281 except:
282 docinfo['numPages'] = 0
283
284 else:
285 #no full text -- init to 0
286 docinfo['pageNumberOrig'] = 0
287 docinfo['countFigureEntries'] = 0
288 docinfo['countPlaces'] = 0
289 docinfo['countTocEntries'] = 0
290 docinfo['numPages'] = 0
291 #return docinfo
292
285 # plain text mode 293 # plain text mode
286 if mode == "text": 294 if mode == "text":
287 # first div contains text 295 # first div contains text
288 pagedivs = pagedom.xpath("/div") 296 pagedivs = dom.xpath("/div")
289 if len(pagedivs) > 0: 297 if len(pagedivs) > 0:
290 pagenode = pagedivs[0] 298 pagenode = pagedivs[0]
291 links = pagenode.xpath("//a") 299 links = pagenode.xpath("//a")
292 for l in links: 300 for l in links:
293 hrefNode = l.getAttributeNodeNS(None, u"href") 301 hrefNode = l.getAttributeNodeNS(None, u"href")
296 if href.startswith('#note-'): 304 if href.startswith('#note-'):
297 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) 305 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
298 return serializeNode(pagenode) 306 return serializeNode(pagenode)
299 if mode == "xml": 307 if mode == "xml":
300 # first div contains text 308 # first div contains text
301 pagedivs = pagedom.xpath("/div") 309 pagedivs = dom.xpath("/div")
302 if len(pagedivs) > 0: 310 if len(pagedivs) > 0:
303 pagenode = pagedivs[0] 311 pagenode = pagedivs[0]
304 return serializeNode(pagenode) 312 return serializeNode(pagenode)
305 if mode == "gis": 313 if mode == "gis":
306 # first div contains text 314 # first div contains text
307 pagedivs = pagedom.xpath("/div") 315 pagedivs = dom.xpath("/div")
308 if len(pagedivs) > 0: 316 if len(pagedivs) > 0:
309 pagenode = pagedivs[0] 317 pagenode = pagedivs[0]
310 links =pagenode.xpath("//a") 318 links =pagenode.xpath("//a")
311 for l in links: 319 for l in links:
312 hrefNode =l.getAttributeNodeNS(None, u"href") 320 hrefNode =l.getAttributeNodeNS(None, u"href")
317 l.setAttributeNS(None, 'target', '_blank') 325 l.setAttributeNS(None, 'target', '_blank')
318 return serializeNode(pagenode) 326 return serializeNode(pagenode)
319 327
320 if mode == "pureXml": 328 if mode == "pureXml":
321 # first div contains text 329 # first div contains text
322 pagedivs = pagedom.xpath("/div") 330 pagedivs = dom.xpath("/div")
323 if len(pagedivs) > 0: 331 if len(pagedivs) > 0:
324 pagenode = pagedivs[0] 332 pagenode = pagedivs[0]
325 return serializeNode(pagenode) 333 return serializeNode(pagenode)
326 # text-with-links mode 334 # text-with-links mode
327 if mode == "text_dict": 335 if mode == "text_dict":
328 # first div contains text 336 # first div contains text
329 pagedivs = pagedom.xpath("/div") 337 pagedivs = dom.xpath("/div")
330 if len(pagedivs) > 0: 338 if len(pagedivs) > 0:
331 pagenode = pagedivs[0] 339 pagenode = pagedivs[0]
332 # check all a-tags 340 # check all a-tags
333 links = pagenode.xpath("//a") 341 links = pagenode.xpath("//a")
334 for l in links: 342 for l in links:
373 data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+urllib.quote(query)) 381 data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+urllib.quote(query))
374 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) 382 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
375 return data 383 return data
376 384
377 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1, optionToggle=None): 385 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1, optionToggle=None):
378 """number of""" 386 #number of
379 docpath = docinfo['textURLPath'] 387 docpath = docinfo['textURLPath']
380 pagesize = pageinfo['queryPageSize'] 388 pagesize = pageinfo['queryPageSize']
381 pn = pageinfo['searchPN'] 389 pn = pageinfo['searchPN']
382 query =pageinfo['query'] 390 query =pageinfo['query']
383 queryType =pageinfo['queryType'] 391 queryType =pageinfo['queryType']
384 tocSearch = 0 392 tocSearch = 0
385 tocDiv = None 393 tocDiv = None
386 394
387 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) 395 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn))
388 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)
389 pagedom = Parse(pagexml) 396 pagedom = Parse(pagexml)
390 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 397 numdivs = pagedom.xpath("//div[@class='queryResultHits']")
391 tocSearch = int(getTextFromNode(numdivs[0])) 398 tocSearch = int(getTextFromNode(numdivs[0]))
392 logging.debug("documentViewer (gettoc) tocSearch: %s"%(tocSearch)) 399 logging.debug("documentViewer (gettoc) tocSearch: %s"%(tocSearch))
393 tc=int((tocSearch/10)+1) 400 tc=int((tocSearch/10)+1)
394 logging.debug("documentViewer (gettoc) tc: %s"%(tc)) 401 logging.debug("documentViewer (gettoc) tc: %s"%(tc))
395 return tc 402 return tc
396 403
397 def getQueryResultHits(self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1, optionsClose=None):
398
399 """number of hits in Search mode"""
400 docpath = docinfo['textURLPath']
401 pagesize = pageinfo['queryPageSize']
402 pn = pageinfo['searchPN']
403 query =pageinfo['query']
404 queryType =pageinfo['queryType']
405 tocSearch = 0
406 tocDiv = None
407
408 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn))
409 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)
410 pagedom = Parse(pagexml)
411 numdivs = pagedom.xpath("//div[@class='queryResultHits']")
412 tocSearch = int(getTextFromNode(numdivs[0]))
413 tc=int((tocSearch/10)+1)
414 return tc
415
416 def getQueryResultHitsText(self, docinfo=None, pageinfo=None):
417 """number of hits in Text of Contents mode"""
418 selfurl = self.absolute_url()
419 docpath = docinfo['textURLPath']
420 viewMode= pageinfo['viewMode']
421 text = self.getServerData("page-fragment.xql","document=%s&mode=%s"%(docinfo['textURLPath'], 'text'))
422 dom =Parse(text)
423 pagedivs = dom.xpath("//div[@class='countTocEntries']")
424 logging.debug ("pagedivs=%s"%(pagedivs))
425 if len(pagedivs)>0:
426 originalPage= (getTextFromNode(pagedivs[0]))
427 tc = int (originalPage)
428 tc1 =tc/30+1
429 return tc1
430
431 def getQueryResultHitsFigures(self, docinfo=None, pageinfo=None):
432 """number of hits in Text of Figures mode"""
433
434 selfurl = self.absolute_url()
435 docpath = docinfo['textURLPath']
436 viewMode= pageinfo['viewMode']
437 text = self.getServerData("page-fragment.xql","document=%s&mode=%s"%(docinfo['textURLPath'], 'text'))
438 dom =Parse(text)
439 pagedivs = dom.xpath("//div[@class='countFigureEntries']")
440 logging.debug ("pagedivs=%s"%(pagedivs))
441 if len(pagedivs)>0:
442 originalPage= (getTextFromNode(pagedivs[0]))
443 tc = int (originalPage)
444 tc1 =tc/30+1
445 return tc1
446
447
448 def getToc(self, mode="text", docinfo=None): 404 def getToc(self, mode="text", docinfo=None):
449 """loads table of contents and stores in docinfo""" 405 """loads table of contents and stores in docinfo"""
450 logging.debug("documentViewer (gettoc) mode: %s"%(mode)) 406 logging.debug("documentViewer (gettoc) mode: %s"%(mode))
451 if mode == "none": 407 if mode == "none":
452 return docinfo 408 return docinfo
465 # number of entries in toc 421 # number of entries in toc
466 tocSize = 0 422 tocSize = 0
467 tocDiv = None 423 tocDiv = None
468 424
469 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) 425 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
470 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) 426
471 # post-processing downloaded xml 427 # post-processing downloaded xml
472 pagedom = Parse(pagexml) 428 pagedom = Parse(pagexml)
473 # get number of entries 429 # get number of entries
474 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 430 numdivs = pagedom.xpath("//div[@class='queryResultHits']")
475 if len(numdivs) > 0: 431 if len(numdivs) > 0:
497 tocPN = pageinfo['tocPN'] 453 tocPN = pageinfo['tocPN']
498 454
499 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm&optionToggle=1"%(docpath,queryType, pagesize, pn)) 455 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm&optionToggle=1"%(docpath,queryType, pagesize, pn))
500 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&optionToggle=1'%(selfurl,url, viewMode, tocMode, tocPN)) 456 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&optionToggle=1'%(selfurl,url, viewMode, tocMode, tocPN))
501 text = page.replace('mode=image','mode=texttool') 457 text = page.replace('mode=image','mode=texttool')
502 logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) 458 #logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization))
503 #logging.debug("documentViewer (characterNormalization) text: %s"%(text)) 459
504 return text 460 return text
505 461
506 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): 462 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
507 """change settings""" 463 """change settings"""
508 self.title=title 464 self.title=title