Changeset 568:694935574177 in documentViewer
- Timestamp:
- Oct 11, 2012, 4:27:14 PM (12 years ago)
- Branch:
- default
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
MpiwgXmlTextServer.py
r567 r568 94 94 95 95 field = '' 96 if mode in ['pages', 'toc', 'figures' ]:96 if mode in ['pages', 'toc', 'figures', 'handwritten']: 97 97 # translate mode to field param 98 98 field = '&field=%s'%mode … … 126 126 if mode is None: 127 127 # get general info from system-tag 128 cp = doc.find('system/countPages') 129 if cp is not None: 130 docinfo['numTextPages'] = getInt(cp.text) 128 sys = doc.find('system') 129 if sys is not None: 130 docinfo['numTextPages'] = getInt(getText(sys.find('countPages'))) 131 docinfo['numFigureEntries'] = getInt(getText(sys.find('countFigures'))) 132 docinfo['numHandwritten'] = getInt(getText(sys.find('countHandwritten'))) 133 docinfo['numTocEntries'] = getInt(getText(sys.find('countTocEntries'))) 131 134 132 135 else: … … 146 149 pn = getInt(i.get('n')) 147 150 page['pn'] = pn 148 no = getInt(i.get('o'))151 no = i.get('o') 149 152 page['no'] = no 150 non = getInt(i.get('o-norm'))153 non = i.get('o-norm') 151 154 page['non'] = non 152 155 … … 158 161 159 162 # toc 160 elif name == 'toc':163 elif lt == 'toc' or lt == 'figures' or lt == 'handwritten': 161 164 # contains tags with table of contents/figures 162 # < toc-entry><page>13</page><level>3</level><content>Chapter I</content><level-string>1.</level-string><real-level>1</real-level></toc-entry>165 # <item n="2.1." lv="2">CAP.I. <ref o="119">132</ref></item> 163 166 tocs = [] 164 for te in tag: 165 toc = {} 166 for t in te: 167 if t.tag == 'page': 168 toc['pn'] = getInt(t.text) 169 elif t.tag == 'level': 170 toc['level'] = t.text 171 elif t.tag == 'content': 172 toc['content'] = t.text 173 elif t.tag == 'level-string': 174 toc['level-string'] = t.text 175 elif t.tag == 'real-level': 176 toc['real-level'] = t.text 177 178 tocs.append(toc) 167 for te in l: 168 if te.tag == 'item': 169 toc = {} 170 toc['level-string'] = te.get('n') 171 toc['level'] = te.get('lv') 172 toc['content'] = te.text.strip() 173 ref = te.find('ref') 174 toc['pn'] = getInt(ref.text) 175 toc['no'] = ref.get('o') 176 toc['non'] = ref.get('o-norm') 177 tocs.append(toc) 179 178 180 179 # save as full_toc/full_figures … … 184 183 185 184 186 def processPageInfo(self, dom, docinfo, pageinfo):187 """processes page info divs from dom and stores in docinfo and pageinfo"""188 # assume first second level div is pageMeta189 alldivs = dom.find("div")190 191 if alldivs is None or alldivs.get('class', '') != 'pageMeta':192 logging.error("processPageInfo: pageMeta div not found!")193 return194 195 for div in alldivs:196 dc = div.get('class')197 198 # pageNumberOrig199 if dc == 'pageNumberOrig':200 pageinfo['pageNumberOrig'] = div.text201 202 # pageNumberOrigNorm203 elif dc == 'pageNumberOrigNorm':204 pageinfo['pageNumberOrigNorm'] = div.text205 206 # pageHeaderTitle207 elif dc == 'pageHeaderTitle':208 pageinfo['pageHeaderTitle'] = div.text209 210 #logging.debug("processPageInfo: pageinfo=%s"%repr(pageinfo))211 return212 213 214 185 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None): 215 186 """returns single page from fulltext""" … … 452 423 return docinfo 453 424 425 #TODO: put mode into query 426 454 427 cachedQuery = docinfo.get('cachedQuery', None) 455 428 if cachedQuery is not None: … … 462 435 # different query 463 436 del docinfo['resultSize'] 464 del docinfo['result XML']437 del docinfo['results'] 465 438 466 439 # cache query … … 469 442 # fetch full results 470 443 docpath = docinfo['textURLPath'] 471 params = {'document': docpath, 472 'mode': 'text', 473 'queryType': mode, 444 params = {'docId': docpath, 474 445 'query': query, 475 'queryResultPageSize': 1000, 476 'queryResultPN': 1, 477 'characterNormalization': pageinfo.get('characterNormalization', 'reg')} 478 pagexml = self.getServerData("doc-query.xql",urllib.urlencode(params)) 479 #pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&s=%s&viewMode=%s&characterNormalization=%s&highlightElementPos=%s&highlightElement=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, s, viewMode,characterNormalization, highlightElementPos, highlightElement, urllib.quote(highlightQuery))) 480 dom = ET.fromstring(pagexml) 481 # page content is in <div class="queryResultPage"> 482 pagediv = None 483 # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage'] 484 alldivs = dom.findall("div") 485 for div in alldivs: 486 dc = div.get('class') 487 # page content div 488 if dc == 'queryResultPage': 489 pagediv = div 490 491 elif dc == 'queryResultHits': 492 docinfo['resultSize'] = getInt(div.text) 493 494 if pagediv is not None: 495 # store XML in docinfo 496 docinfo['resultXML'] = ET.tostring(pagediv, 'UTF-8') 446 'pageSize': 1000, 447 'page': 1, 448 'outputFormat': 'html'} 449 pagexml = self.getServerData("query/QueryDocument",urllib.urlencode(params)) 450 results = [] 451 try: 452 dom = ET.fromstring(pagexml) 453 # page content is currently in multiple <td align=left> 454 alldivs = dom.findall(".//td[@align='left']") 455 for div in alldivs: 456 # TODO: can we put etree in the session? 457 results.append(div) 458 459 except Exception, e: 460 logging.error("GetSearchResults: Error parsing search result: %s"%e) 461 462 # store results in docinfo 463 docinfo['resultSize'] = len(results) 464 docinfo['results'] = results 497 465 498 466 return docinfo … … 505 473 self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo) 506 474 507 resultxml = docinfo.get('result XML', None)475 resultxml = docinfo.get('results', None) 508 476 if not resultxml: 509 logging.error("getResultPage: unable to find result XML")477 logging.error("getResultPage: unable to find results") 510 478 return "Error: no result!" 511 479 … … 562 530 return docinfo.get('full_%s'%queryType, []) 563 531 532 564 533 def getTocPage(self, mode='text', pn=None, start=None, size=None, pageinfo=None, docinfo=None): 565 534 """returns single page from the table of contents""" … … 584 553 pageurl = self.getLink('pn', toc['pn']) 585 554 tp += '<div class="tocline">' 586 tp += '<div class="toc name">[%s %s]</div>'%(toc['level-string'], toc['content']) 587 tp += '<div class="toc float right page"><a href="%s">Page: %s</a></div>'%(pageurl, toc['pn']) 555 content = toc['content'] 556 if content: 557 tp += '<div class="toc name">[%s] %s</div>'%(toc['level-string'], toc['content']) 558 else: 559 tp += '<div class="toc name">[Figure %s]</div>'%(toc['level-string']) 560 561 if toc.get('no', None): 562 tp += '<div class="toc page"><a href="%s">Page: %s (%s)</a></div>'%(pageurl, toc['pn'], toc['no']) 563 else: 564 tp += '<div class="toc page"><a href="%s">Page: %s</a></div>'%(pageurl, toc['pn']) 565 588 566 tp += '</div>\n' 589 567 -
css/docuviewer.css
r567 r568 147 147 } 148 148 149 div.tocbody.text .toc ,150 div.tocbody.figures .toc ,151 div.tocbody.concordance .toc {149 div.tocbody.text .toc.name, 150 div.tocbody.figures .toc.name, 151 div.tocbody.concordance .toc.name { 152 152 float:left; 153 153 clear:right; 154 } 155 div.tocbody.text .toc.float.right, 156 div.tocbody.figures .toc.float.right, 157 div.tocbody.concordance .toc.float.right { 154 margin-right: 1em; 155 } 156 div.tocbody.text .toc.page, 157 div.tocbody.figures .toc.page, 158 div.tocbody.concordance .toc.page { 158 159 float:right; 159 160 } … … 273 274 display: none; 274 275 } 276 /* running head */ 275 277 div.col.main div.content.text span.pb span.rhead { 276 display: block; 277 } 278 /* running head */ 279 div.col.main div.content.text div.pageHeaderTitle { 278 display: block; 280 279 text-align: center; 281 280 margin-bottom: 1em; -
documentViewer.py
r566 r568 589 589 if texttool: 590 590 docinfo = self.getDocinfoFromTexttool(docinfo, texttool) 591 # document info (including toc) from full text591 # document info from full text server 592 592 if docinfo.get('textURLPath', None): 593 593 docinfo = self.getTextInfo(mode=None, docinfo=docinfo) 594 # include list of pages TODO: do we need this always? 595 docinfo = self.getTextInfo(mode='pages', docinfo=docinfo) 594 596 595 597 # bib info
Note: See TracChangeset
for help on using the changeset viewer.