Changeset 419:69205c9d9404 in documentViewer
- Timestamp:
- Jan 6, 2011, 3:44:18 PM (14 years ago)
- Branch:
- default
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
MpdlXmlTextServer.py
r418 r419 39 39 return documentViewer.getHttpData(url,data,timeout=self.timeout) 40 40 41 def getSearch(self, p n=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None, characterNormalization=None, optionToggle=None):41 def getSearch(self, pageinfo=None, docinfo=None, lemma=None): 42 42 """get search list""" 43 43 docpath = docinfo['textURLPath'] 44 44 url = docinfo['url'] 45 45 pagesize = pageinfo['queryPageSize'] 46 pn = pageinfo ['searchPN']46 pn = pageinfo.get('searchPN',1) 47 47 sn = pageinfo['sn'] 48 48 highlightQuery = pageinfo['highlightQuery'] … … 57 57 58 58 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&optionToggle=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization,optionToggle ,urllib.quote(highlightQuery))) 59 #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) 60 59 61 60 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) 62 61 pagedom = Parse(pagexml) 62 63 """ 64 pagedivs = pagedom.xpath("//div[@class='queryResultHits']") 65 if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")): 66 if len(pagedivs)>0: 67 docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0])) 68 s = getTextFromNode(pagedivs[0]) 69 s1 = int(s)/10+1 70 try: 71 docinfo['queryResultHits'] = int(s1) 72 logging.debug("SEARCH ENTRIES: %s"%(s1)) 73 except: 74 docinfo['queryResultHits'] = 0 75 """ 63 76 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): 64 77 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") … … 118 131 return "no text here" 119 132 120 121 def getNumTextPages (self, docinfo=None): 122 """get list of pages from fulltext (texts without images) and put in docinfo""" 123 logging.debug("getNumTextPages") 124 docpath = docinfo['textURLPath'] 125 selfurl = self.absolute_url() 126 #viewMode= pageinfo['viewMode'] 127 if 'numPages' in docinfo: 128 # allredy there 129 return docinfo 130 131 text = self.getServerData("page-fragment.xql","document=%s"%(docinfo['textURLPath'])) 132 dom =Parse(text) 133 pagedivs = dom.xpath("//div[@class='countPages']") 134 logging.debug ("pagedivs=%s"%(pagedivs)) 135 if len(pagedivs)>0: 136 docinfo['numPages']= int(getTextFromNode(pagedivs[0])) 137 return docinfo 138 139 def getTocEntries (self, docinfo=None): 140 """ number of text entries""" 141 docinfo = self.getInfoFromPage(docinfo) 142 logging.debug("gettocentries: docinfo=%s"%docinfo) 143 return docinfo['tocEntries'] 144 145 def getFigureEntries (self, docinfo=None): 146 """ number of figure entries""" 147 docinfo = self.getInfoFromPage(docinfo) 148 return docinfo['figureEntries'] 149 133 150 134 def getGisPlaces(self, docinfo=None, pageinfo=None): 151 135 """ Show all Gis Places of whole Page""" … … 191 175 logging.debug("getALLGisPlaces :%s"%(myList)) 192 176 return myList 193 194 def getOrigPages (self, docinfo=None, pageinfo=None): 195 """Show original page """ 196 docpath = docinfo.get('textURLPath',None) 197 if not docpath: 198 return None 199 selfurl = self.absolute_url() 200 pn =pageinfo['current'] 201 202 viewMode= pageinfo['viewMode'] 203 text = self.getServerData("page-fragment.xql","document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'], 'text', pn)) 204 dom =Parse(text) 205 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") 206 if len(pagedivs)>0: 207 originalPage= getTextFromNode(pagedivs[0]) 208 #return docinfo['originalPage'] 209 return originalPage 210 211 def getAllPlaces (self, docinfo=None): 212 """Show all Places if no places than 0""" 213 docinfo = self.getInfoFromPage(docinfo) 214 return docinfo['allPlaces'] 215 216 def getInfoFromPage(self, docinfo=None): 217 """ extract diverse info from page-fragment""" 218 docpath = docinfo['textURLPath'] 219 if 'allPlaces' in docinfo: 220 # allredy there 221 return docinfo 222 223 if (docpath is not None): 224 text = self.getServerData("page-fragment.xql","document=%s"%(docinfo['textURLPath'])) 225 dom = Parse(text) 226 # figureEntries 227 pagedivs = dom.xpath("//div[@class='countFigureEntries']") 228 s = getTextFromNode(pagedivs[0]) 229 try: 230 docinfo['figureEntries'] = int(s) 231 except: 232 docinfo['figureEntries'] = 0 233 # tocEntries 234 pagedivs = dom.xpath("//div[@class='countTocEntries']") 235 s = getTextFromNode(pagedivs[0]) 236 try: 237 docinfo['tocEntries'] = int(s) 238 except: 239 docinfo['tocEntries'] = 0 240 # allPlaces 241 pagedivs = dom.xpath("//div[@class='countPlaces']") 242 s = getTextFromNode(pagedivs[0]) 243 try: 244 docinfo['allPlaces'] = int(s) 245 except: 246 docinfo['allPlaces'] = 0 247 248 else: 249 # no full text -- init to 0 250 docinfo['figureEntries'] = 0 251 docinfo['tocEntries'] = 0 252 docinfo['allPlaces'] = 0 253 254 return docinfo 255 177 256 178 257 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, viewMode=None, tocMode=None, tocPN=None, characterNormalization="reg", highlightQuery=None, sn=None, optionToggle=None):179 def getTextPage(self, docinfo=None, pageinfo=None): 258 180 """returns single page from fulltext""" 259 181 docpath = docinfo['textURLPath'] 260 182 path = docinfo['textURLPath'] 261 url = docinfo['url'] 262 name = docinfo['name'] 263 viewMode= pageinfo['viewMode'] 183 url = docinfo.get('url',None) 184 name = docinfo.get('name',None) 185 pn =pageinfo['current'] 186 #viewMode= pageinfo['viewMode'] 264 187 sn = pageinfo['sn'] 265 188 highlightQuery = pageinfo['highlightQuery'] 266 189 mode = pageinfo ['viewMode'] 267 190 tocMode = pageinfo['tocMode'] 268 191 characterNormalization=pageinfo['characterNormalization'] … … 273 196 else: 274 197 textmode = mode 275 #logging.debug("documentViewer highlightQuery: %s"%(highlightQuery))198 276 199 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) 277 200 if highlightQuery is not None: 278 201 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) 279 #logging.debug("documentViewer highlightQuery: %s"%(highlightQuery))202 280 203 pagexml = self.getServerData("page-fragment.xql",textParam) 281 logging.debug("documentViewer highlightQuery: %s"%(highlightQuery)) 282 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False) 283 284 pagedom = Parse(pagexml) 204 dom = Parse(pagexml) 205 206 #original Pages 207 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") 208 if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): 209 if len(pagedivs)>0: 210 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) 211 logging.debug("ORIGINAL PAGES: %s"%(docinfo['pageNumberOrig'])) 212 213 #figureEntries 214 pagedivs = dom.xpath("//div[@class='countFigureEntries']") 215 if pagedivs == dom.xpath("//div[@class='countFigureEntries']"): 216 if len(pagedivs)>0: 217 docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0]) 218 s = getTextFromNode(pagedivs[0]) 219 if s=='0': 220 try: 221 docinfo['countFigureEntries'] = int(s) 222 logging.debug("FIGURE ENTRIES: %s"%(s)) 223 except: 224 docinfo['countFigureEntries'] = 0 225 else: 226 s1 = int(s)/30+1 227 try: 228 docinfo['countFigureEntries'] = int(s1) 229 logging.debug("FIGURE ENTRIES: %s"%(s1)) 230 except: 231 docinfo['countFigureEntries'] = 0 232 233 #allPlaces 234 pagedivs = dom.xpath("//div[@class='countPlaces']") 235 if pagedivs == dom.xpath("//div[@class='countPlaces']"): 236 if len(pagedivs)>0: 237 docinfo['countPlaces']= getTextFromNode(pagedivs[0]) 238 s = getTextFromNode(pagedivs[0]) 239 try: 240 docinfo['countPlaces'] = int(s) 241 logging.debug("PLACES HERE: %s"%(s)) 242 except: 243 docinfo['countPlaces'] = 0 244 245 #tocEntries 246 pagedivs = dom.xpath("//div[@class='countTocEntries']") 247 if pagedivs == dom.xpath("//div[@class='countTocEntries']"): 248 if len(pagedivs)>0: 249 docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0])) 250 s = getTextFromNode(pagedivs[0]) 251 if s=='0': 252 try: 253 docinfo['countTocEntries'] = int(s) 254 logging.debug("TEXT ENTRIES: %s"%(s)) 255 except: 256 docinfo['countTocEntries'] = 0 257 else: 258 s1 = int(s)/30+1 259 try: 260 docinfo['countTocEntries'] = int(s1) 261 logging.debug("TEXT ENTRIES: %s"%(s1)) 262 except: 263 docinfo['countTocEntries'] = 0 264 265 #numTextPages 266 pagedivs = dom.xpath("//div[@class='countPages']") 267 if pagedivs == dom.xpath("//div[@class='countPages']"): 268 if len(pagedivs)>0: 269 docinfo['numPages'] = getTextFromNode(pagedivs[0]) 270 s = getTextFromNode(pagedivs[0]) 271 272 try: 273 docinfo['numPages'] = int(s) 274 logging.debug("PAGE NUMBER: %s"%(s)) 275 276 np = docinfo['numPages'] 277 pageinfo['end'] = min(pageinfo['end'], np) 278 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) 279 if np % pageinfo['groupsize'] > 0: 280 pageinfo['numgroups'] += 1 281 except: 282 docinfo['numPages'] = 0 283 284 else: 285 #no full text -- init to 0 286 docinfo['pageNumberOrig'] = 0 287 docinfo['countFigureEntries'] = 0 288 docinfo['countPlaces'] = 0 289 docinfo['countTocEntries'] = 0 290 docinfo['numPages'] = 0 291 #return docinfo 292 285 293 # plain text mode 286 294 if mode == "text": 287 295 # first div contains text 288 pagedivs = pagedom.xpath("/div")296 pagedivs = dom.xpath("/div") 289 297 if len(pagedivs) > 0: 290 298 pagenode = pagedivs[0] … … 299 307 if mode == "xml": 300 308 # first div contains text 301 pagedivs = pagedom.xpath("/div")309 pagedivs = dom.xpath("/div") 302 310 if len(pagedivs) > 0: 303 311 pagenode = pagedivs[0] … … 305 313 if mode == "gis": 306 314 # first div contains text 307 pagedivs = pagedom.xpath("/div")315 pagedivs = dom.xpath("/div") 308 316 if len(pagedivs) > 0: 309 317 pagenode = pagedivs[0] … … 320 328 if mode == "pureXml": 321 329 # first div contains text 322 pagedivs = pagedom.xpath("/div")330 pagedivs = dom.xpath("/div") 323 331 if len(pagedivs) > 0: 324 332 pagenode = pagedivs[0] … … 327 335 if mode == "text_dict": 328 336 # first div contains text 329 pagedivs = pagedom.xpath("/div")337 pagedivs = dom.xpath("/div") 330 338 if len(pagedivs) > 0: 331 339 pagenode = pagedivs[0] … … 376 384 377 385 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1, optionToggle=None): 378 """number of"""386 #number of 379 387 docpath = docinfo['textURLPath'] 380 388 pagesize = pageinfo['queryPageSize'] … … 386 394 387 395 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) 388 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)389 396 pagedom = Parse(pagexml) 390 397 numdivs = pagedom.xpath("//div[@class='queryResultHits']") … … 394 401 logging.debug("documentViewer (gettoc) tc: %s"%(tc)) 395 402 return tc 396 397 def getQueryResultHits(self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1, optionsClose=None): 398 399 """number of hits in Search mode""" 400 docpath = docinfo['textURLPath'] 401 pagesize = pageinfo['queryPageSize'] 402 pn = pageinfo['searchPN'] 403 query =pageinfo['query'] 404 queryType =pageinfo['queryType'] 405 tocSearch = 0 406 tocDiv = None 407 408 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) 409 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) 410 pagedom = Parse(pagexml) 411 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 412 tocSearch = int(getTextFromNode(numdivs[0])) 413 tc=int((tocSearch/10)+1) 414 return tc 415 416 def getQueryResultHitsText(self, docinfo=None, pageinfo=None): 417 """number of hits in Text of Contents mode""" 418 selfurl = self.absolute_url() 419 docpath = docinfo['textURLPath'] 420 viewMode= pageinfo['viewMode'] 421 text = self.getServerData("page-fragment.xql","document=%s&mode=%s"%(docinfo['textURLPath'], 'text')) 422 dom =Parse(text) 423 pagedivs = dom.xpath("//div[@class='countTocEntries']") 424 logging.debug ("pagedivs=%s"%(pagedivs)) 425 if len(pagedivs)>0: 426 originalPage= (getTextFromNode(pagedivs[0])) 427 tc = int (originalPage) 428 tc1 =tc/30+1 429 return tc1 430 431 def getQueryResultHitsFigures(self, docinfo=None, pageinfo=None): 432 """number of hits in Text of Figures mode""" 433 434 selfurl = self.absolute_url() 435 docpath = docinfo['textURLPath'] 436 viewMode= pageinfo['viewMode'] 437 text = self.getServerData("page-fragment.xql","document=%s&mode=%s"%(docinfo['textURLPath'], 'text')) 438 dom =Parse(text) 439 pagedivs = dom.xpath("//div[@class='countFigureEntries']") 440 logging.debug ("pagedivs=%s"%(pagedivs)) 441 if len(pagedivs)>0: 442 originalPage= (getTextFromNode(pagedivs[0])) 443 tc = int (originalPage) 444 tc1 =tc/30+1 445 return tc1 446 447 403 448 404 def getToc(self, mode="text", docinfo=None): 449 405 """loads table of contents and stores in docinfo""" … … 468 424 469 425 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) 470 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)426 471 427 # post-processing downloaded xml 472 428 pagedom = Parse(pagexml) … … 500 456 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&optionToggle=1'%(selfurl,url, viewMode, tocMode, tocPN)) 501 457 text = page.replace('mode=image','mode=texttool') 502 logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization))503 #logging.debug("documentViewer (characterNormalization) text: %s"%(text))458 #logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) 459 504 460 return text 505 461 -
documentViewer.py
r413 r419 199 199 200 200 def getQuery(self, **args): 201 """get query """201 """get query in search""" 202 202 return self.template.fulltextclient.getQuery(**args) 203 204 def getQueryResultHits(self, **args): 205 """get query""" 206 return self.template.fulltextclient.getQueryResultHits(**args) 207 208 def getQueryResultHitsText(self, **args): 209 """get query""" 210 return self.template.fulltextclient.getQueryResultHitsText(**args) 211 212 def getQueryResultHitsFigures(self, **args): 213 """get query""" 214 return self.template.fulltextclient.getQueryResultHitsFigures(**args) 215 203 216 204 def getSearch(self, **args): 217 205 """get search""" … … 225 213 """get all gis places """ 226 214 return self.template.fulltextclient.getAllGisPlaces(**args) 227 228 def getOrigPages(self, **args): 229 """get original page number """ 230 return self.template.fulltextclient.getOrigPages(**args) 231 232 def getAllPlaces(self, **args): 233 """get original page number """ 234 return self.template.fulltextclient.getAllPlaces(**args) 235 236 def getTocEntries(self, **args): 237 """get original page number """ 238 return self.template.fulltextclient.getTocEntries(**args) 239 240 def getFigureEntries(self, **args): 241 """get original page number """ 242 return self.template.fulltextclient.getFigureEntries(**args) 243 244 def getNumPages(self, docinfo): 245 """get numpages""" 246 return self.template.fulltextclient.getNumPages(docinfo) 247 248 def getNumTextPages(self, docinfo): 249 """get numpages text""" 250 return self.template.fulltextclient.getNumTextPages(docinfo) 251 215 252 216 def getTranslate(self, **args): 253 217 """get translate""" … … 336 300 pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) 337 301 302 if (docinfo.get('textURLPath',None)): 303 page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo) 304 pageinfo['textPage'] = page 338 305 pt = getattr(self.template, 'viewer_main') 339 306 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) … … 725 692 textUrl = getTextFromNode(textUrls[0]) 726 693 docinfo['textURLPath'] = textUrl 727 if not docinfo['imagePath']:694 #if not docinfo['imagePath']: 728 695 # text-only, no page images 729 docinfo = self.getNumTextPages(docinfo) 696 #docinfo = self.getNumTextPages(docinfo) 697 730 698 731 699 presentationUrls = dom.xpath("//texttool/presentation") … … 855 823 pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10') 856 824 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') 825 857 826 toc = int (pageinfo['tocPN']) 858 827 pageinfo['textPages'] =int (toc)
Note: See TracChangeset
for help on using the changeset viewer.