Changeset 576:b2c7e272e075 in documentViewer
- Timestamp:
- Oct 17, 2012, 2:36:13 PM (13 years ago)
- Branch:
- default
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
MpiwgXmlTextServer.py
r575 r576 9 9 import urlparse 10 10 import base64 11 12 from datetime import datetime 11 13 12 14 from SrvTxtUtils import getInt, getText, getHttpData … … 187 189 188 190 logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn)) 191 startTime = datetime.now() 189 192 # check for cached text -- but ideally this shouldn't be called twice 190 193 if pageinfo.has_key('textPage'): … … 209 212 if normMode == 'regPlusNorm': 210 213 normMode = 'norm' 211 214 215 # TODO: this should not be necessary when the backend is fixed 216 textParams['normalization'] = normMode 217 212 218 if not mode: 213 219 # default is dict … … 241 247 if 'dict' in modes: 242 248 textmode = 'dict' 243 textParams['mode'] = 'tokenized'244 249 textParams['outputFormat'] = 'html' 245 250 elif 'xml' in modes: 246 251 textmode = 'xml' 247 textParams['mode'] = 'untokenized'248 252 textParams['outputFormat'] = 'xmlDisplay' 249 textParams['normMode']= 'orig'253 normMode = 'orig' 250 254 elif 'gis' in modes: 251 255 #FIXME! … … 254 258 # text is default mode 255 259 textmode = 'plain' 256 textParams['mode'] = 'untokenized'257 260 textParams['outputFormat'] = 'html' 258 261 … … 273 276 # add textmode and normMode classes 274 277 pagediv.set('class', 'text %s %s'%(textmode, normMode)) 275 #self._processWTags(textmode, normMode, pagediv)278 self._processWTags(textmode, normMode, pagediv) 276 279 #self._processPbTag(pagediv, pageinfo) 277 280 self._processFigures(pagediv, docinfo) … … 288 291 if linkurl.path.endswith('GetDictionaryEntries'): 289 292 #TODO: replace wordInfo page 290 # is dictionary link - change href (keeping parameters)291 #l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl))292 293 # add target to open new page 293 294 l.set('target', '_blank') 294 295 elif href.startswith('#note-'):296 # note link FIXME!297 l.set('href', href.replace('#note-',"%s#note-"%selfurl))298 295 299 296 if punditMode: 300 297 self._addPunditAttributes(pagediv, pageinfo, docinfo) 301 302 return serialize(pagediv) 298 299 s = serialize(pagediv) 300 logging.debug("getTextPage done in %s"%(datetime.now()-startTime)) 301 return s 303 302 304 303 # xml mode … … 346 345 """selects the necessary information from w-spans and removes the rest from pagediv""" 347 346 logging.debug("processWTags(textMode=%s,norm=%s,pagediv"%(repr(textMode),repr(normMode))) 347 startTime = datetime.now() 348 348 wtags = pagediv.findall(".//span[@class='w']") 349 349 for wtag in wtags: 350 text = None351 attr = None352 350 if textMode == 'dict': 353 # take a-tag and matching child 354 attr = wtag.find('a').items() 355 text = wtag.find("a/span[@class='%s']"%normMode).text 351 # delete non-a-tags 352 wtag.remove(wtag.find("span[@class='nodictionary orig']")) 353 wtag.remove(wtag.find("span[@class='nodictionary reg']")) 354 wtag.remove(wtag.find("span[@class='nodictionary norm']")) 355 # delete non-matching children of a-tag and suppress remaining tag name 356 atag = wtag.find("a[@class='dictionary']") 357 if normMode == 'orig': 358 atag.remove(atag.find("span[@class='reg']")) 359 atag.remove(atag.find("span[@class='norm']")) 360 atag.find("span[@class='orig']").tag = None 361 elif normMode == 'reg': 362 atag.remove(atag.find("span[@class='orig']")) 363 atag.remove(atag.find("span[@class='norm']")) 364 atag.find("span[@class='reg']").tag = None 365 elif normMode == 'norm': 366 atag.remove(atag.find("span[@class='orig']")) 367 atag.remove(atag.find("span[@class='reg']")) 368 atag.find("span[@class='norm']").tag = None 369 356 370 else: 357 # take matching child 358 text = wtag.find("span[@class='nodictionary %s']"%normMode).text 371 # delete a-tag 372 wtag.remove(wtag.find("a[@class='dictionary']")) 373 # delete non-matching children and suppress remaining tag name 374 if normMode == 'orig': 375 wtag.remove(wtag.find("span[@class='nodictionary reg']")) 376 wtag.remove(wtag.find("span[@class='nodictionary norm']")) 377 wtag.find("span[@class='nodictionary orig']").tag = None 378 elif normMode == 'reg': 379 wtag.remove(wtag.find("span[@class='nodictionary orig']")) 380 wtag.remove(wtag.find("span[@class='nodictionary norm']")) 381 wtag.find("span[@class='nodictionary reg']").tag = None 382 elif normMode == 'norm': 383 wtag.remove(wtag.find("span[@class='nodictionary orig']")) 384 wtag.remove(wtag.find("span[@class='nodictionary reg']")) 385 wtag.find("span[@class='nodictionary norm']").tag = None 359 386 360 if text: 361 # replace wtag by new content 362 logging.debug("new w-tag attr=%s text=%s"%(attr,text)) 363 wtag.clear() 364 365 if attr: 366 # make dictionary link 367 wtag.tag = 'a' 368 wtag.attrib.update(dict(attr)) 369 370 # text content 371 wtag.text = text 372 387 # suppress w-tag name 388 wtag.tag = None 389 390 logging.debug("processWTags in %s"%(datetime.now()-startTime)) 373 391 return pagediv 374 392 … … 410 428 def _processFigures(self, pagediv, docinfo): 411 429 """processes figure-tags""" 412 divs = pagediv.findall(".//span[@class='figure']") 430 # unfortunately etree can not select class.startswith('figure') 431 divs = pagediv.findall(".//span[@class]") 413 432 scalerUrl = docinfo['digilibScalerUrl'] 414 433 viewerUrl = docinfo['digilibViewerUrl'] 415 434 for d in divs: 435 if not d.get('class').startswith('figure'): 436 continue 437 416 438 try: 417 439 a = d.find('a') … … 485 507 dom = ET.fromstring(pagexml) 486 508 # page content is currently in multiple <td align=left> 487 alldivs = dom.findall(".//t d[@align='left']")509 alldivs = dom.findall(".//tr[@class='hit']") 488 510 for div in alldivs: 511 # change tr to div 512 div.tag = 'div' 513 # change td to span 514 for d in div.findall('td'): 515 d.tag = 'span' 516 489 517 # TODO: can we put etree in the session? 490 518 results.append(div) … … 517 545 start = (pn - 1) * size 518 546 519 fullresult = ET.fromstring(resultxml) 520 521 if fullresult is not None: 547 #fullresult = ET.fromstring(resultxml) 548 #fullresult = resultxml 549 #logging.debug("resultxml=%s"%repr(resultxml)) 550 551 if resultxml is not None: 522 552 # paginate 523 553 first = start-1 524 len = size 525 del fullresult[:first] 526 del fullresult[len:] 527 tocdivs = fullresult 528 529 # check all a-tags 530 links = tocdivs.findall(".//a") 531 for l in links: 532 href = l.get('href') 533 if href: 534 # assume all links go to pages 535 linkUrl = urlparse.urlparse(href) 536 linkParams = urlparse.parse_qs(linkUrl.query) 537 # take some parameters 538 params = {'pn': linkParams['pn'], 539 'highlightQuery': linkParams.get('highlightQuery',''), 540 'highlightElement': linkParams.get('highlightElement',''), 541 'highlightElementPos': linkParams.get('highlightElementPos','') 542 } 543 url = self.getLink(params=params) 544 l.set('href', url) 554 last = first+size 555 tocdivs = resultxml[first:last] 556 #del fullresult[:first] 557 #del fullresult[len:] 558 #tocdivs = fullresult 559 560 toc = ET.Element('div', attrib={'class':'queryResultPage'}) 561 for div in tocdivs: 562 # check all a-tags 563 links = div.findall(".//a") 564 for l in links: 565 href = l.get('href') 566 if href: 567 # assume all links go to pages 568 linkUrl = urlparse.urlparse(href) 569 linkParams = urlparse.parse_qs(linkUrl.query) 570 # take some parameters (make sure it works even if the link was already parsed) 571 params = {'pn': linkParams.get('page',linkParams.get('pn', None)), 572 'highlightQuery': linkParams.get('highlightQuery',None), 573 'highlightElement': linkParams.get('highlightElem',linkParams.get('highlightElement',None)), 574 'highlightElementPos': linkParams.get('highlightElemPos',linkParams.get('highlightElementPos',None)) 575 } 576 if not params['pn']: 577 logging.warn("getResultsPage: link has no page: %s"%href) 578 579 url = self.getLink(params=params) 580 l.set('href', url) 545 581 546 return serialize(tocdivs) 582 toc.append(div) 583 584 return serialize(toc) 547 585 548 586 return "ERROR: no results!" -
css/docuviewer.css
r575 r576 269 269 margin-bottom: 0.25em; 270 270 } 271 /* normalization forms * /271 /* normalization forms * 272 272 div.col.main div.content.text div.text.orig span.w span.reg, 273 273 div.col.main div.content.text div.text.orig span.w span.norm { … … 282 282 display: none; 283 283 } 284 /* dictionary forms * /284 /* dictionary forms * 285 285 div.col.main div.content.text div.text.plain span.w a.dictionary { 286 286 display: none; … … 288 288 div.col.main div.content.text div.text.dict span.w span.nodictionary { 289 289 display: none; 290 } 290 } 291 */ 291 292 /* page break */ 292 293 div.col.main div.content.text span.pb span.n, … … 301 302 } 302 303 /* note */ 303 div.col.main div.content.text span.note {304 div.col.main div.content.text span.note span.noteBody { 304 305 display: block; 305 306 /* float: left; */ … … 310 311 } 311 312 div.col.main div.content.text span.note span.noteSign { 312 display: none; 313 display: none; 314 /* font-size: 70%; 315 vertical-align: super; */ 313 316 } 314 317 /* figure */ … … 355 358 margin-bottom: 0.5em; 356 359 } 360 div.col.results div.content div.hit { 361 margin-bottom: 0.5em; 362 } 363 div.col.results div.content div.hit span.hitLink { 364 margin-right: 0.5em; 365 } 357 366 358 367 /* … … 399 408 font-family: Monaco,Courier,monospace; 400 409 font-size: 12px; 410 } 411 div.col.main div.content.xml ul { 412 padding-left: 1em; 401 413 } 402 414 div.col.main div.content.xml div.pageHeaderTitle {
Note: See TracChangeset
for help on using the changeset viewer.