Changeset 516:7d7b639d7be7 in documentViewer
- Timestamp:
- Mar 5, 2012, 5:04:49 PM (13 years ago)
- Branch:
- default
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
MpdlXmlTextServer.py
r513 r516 71 71 return places 72 72 73 74 def getTextInfo(self, docinfo=None): 75 """reads document info, including page concordance, from text server""" 76 logging.debug("getDocInfo") 77 docpath = docinfo.get('textURLPath', None) 78 if docpath is None: 79 logging.error("getTextInfo: no textURLPath!") 80 return docinfo 81 82 # we need to set a result set size 83 pagesize = 10000 84 pn = 1 85 # fetch docinfo 86 pagexml = self.getServerData("doc-info.xql","document=%s&pageSize=%s&pn=%s"%(docpath,pagesize,pn)) 87 dom = ET.fromstring(pagexml) 88 # all info in tag <document> 89 doc = dom.find("document") 90 if doc is None: 91 logging.error("getTextInfo: unable to find document-tag!") 92 else: 93 # go through all child elements 94 for tag in doc: 95 name = tag.tag 96 # numTextPages 97 if name == 'countPages': 98 np = getInt(tag.text) 99 if np > 0: 100 docinfo['numTextPages'] = np 101 102 # numFigureEntries 103 elif name == 'countFigureEntries': 104 docinfo['numFigureEntries'] = getInt(tag.text) 105 106 # numTocEntries 107 elif name == 'countTocEntries': 108 # WTF: s1 = int(s)/30+1 109 docinfo['numTocEntries'] = getInt(tag.text) 110 111 # numPlaces 112 elif name == 'countPlaces': 113 docinfo['numPlaces'] = getInt(tag.text) 114 115 # pageNumbers 116 elif name == 'pageNumbers': 117 # contains tags with page numbers 118 # <pn><n>4</n><no>4</no><non/></pn> 119 # n=scan number, no=original page no, non=normalized original page no 120 # pageNumbers is a dict indexed by scan number 121 pages = {} 122 for pn in tag: 123 page = {} 124 n = 0 125 for p in pn: 126 if p.tag == 'n': 127 n = getInt(p.text) 128 page['n'] = n 129 elif p.tag == 'no': 130 page['no'] = p.text 131 elif p.tag == 'non': 132 page['non'] = p.text 133 134 if n > 0: 135 pages[n] = page 136 137 docinfo['pageNumbers'] = pages 138 logging.debug("got pageNumbers=%s"%repr(pages)) 139 140 return docinfo 141 73 142 74 143 def processPageInfo(self, dom, docinfo, pageinfo): … … 334 403 """returns single page from the table of contents""" 335 404 logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn)) 336 # check for cached result 337 if not 'resultXML' in docinfo: 338 self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo) 405 # get (cached) result 406 self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo) 339 407 340 408 resultxml = docinfo.get('resultXML', None) -
documentViewer.py
r514 r516 184 184 return self.template.fulltextclient.getResultsPage(**args) 185 185 186 def getTextInfo(self, **args): 187 """returns document info from the text server""" 188 return self.template.fulltextclient.getTextInfo(**args) 189 186 190 def getToc(self, **args): 187 191 """loads table of contents and stores XML in docinfo""" … … 480 484 if texttool: 481 485 docinfo = self.getDocinfoFromTexttool(docinfo, texttool) 486 # document info from full text 487 if docinfo.get('textURLPath', None): 488 docinfo = self.getTextInfo(docinfo=docinfo) 482 489 483 490 # bib info … … 510 517 # image path 511 518 if mode != 'texttool': 512 # override image path from texttool with url 519 # override image path from texttool with url TODO: how about mode=auto? 513 520 docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1) 514 521 … … 517 524 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath'] 518 525 docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath']) 526 527 # check numPages 528 if docinfo.get('numPages', 0) == 0: 529 if docinfo.get('numTextPages', 0) > 0: 530 # replace with numTextPages (text-only?) 531 docinfo['numPages'] = docinfo['numTextPages'] 519 532 520 533 logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) … … 523 536 self.REQUEST.SESSION['docinfo'] = docinfo 524 537 return docinfo 538 525 539 526 540 def getDocinfoFromResource(self, docinfo, resource): … … 699 713 pageinfo['tocMode'] = tocMode 700 714 715 # TODO: unify current and pn! 701 716 current = getInt(current) 702 717 pageinfo['current'] = current … … 716 731 if np == 0: 717 732 # numPages unknown - maybe we can get it from text page 733 logging.warn("getPageInfo: numPages=0 trying getTextPage!") 718 734 if docinfo.get('textURLPath', None): 719 735 # cache text page as well … … 733 749 pageinfo['pageZero'] = pageZero 734 750 pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np) 735 751 # more page parameters 736 752 pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg') 753 if docinfo.get('pageNumbers'): 754 # get original page numbers 755 pageNumber = docinfo['pageNumbers'].get(current, None) 756 if pageNumber is not None: 757 pageinfo['pageNumberOrig'] = pageNumber['no'] 758 pageinfo['pageNumberOrigNorm'] = pageNumber['non'] 737 759 738 760 # cache search results -
version.txt
r514 r516 1 DocumentViewer 2. 0b1 DocumentViewer 2.1a -
zpt/toc_thumbs.zpt
r489 r516 12 12 numgroups pageinfo/numgroups; 13 13 pageBatch pageinfo/pageBatch; pageZero pageinfo/pageZero; 14 pageNumbers docinfo/pageNumbers | nothing; 14 15 left python:test(flowLtr,pageBatch['prevStart'],pageBatch['nextStart']); 15 16 right python:test(flowLtr,pageBatch['nextStart'],pageBatch['prevStart']);"> … … 60 61 tal:attributes="src python:test(docinfo['imageURL'],here.getScalerUrl(pn=idx,dw=100,dh=100,docinfo=docinfo),'images/pic'); 61 62 alt idx" /><br/> 62 <span tal:content="idx" /> 63 <span title="Scan number" tal:content="idx"/> 64 <span tal:condition="python:pageNumbers and pageNumbers[idx]['no']" title="Original page number" tal:content="python:' (%s)'%(pageNumbers[idx]['no'])"/> 63 65 </a> 64 66 </td>
Note: See TracChangeset
for help on using the changeset viewer.