Mercurial > hg > documentViewer
changeset 518:91051b36b9cc
uses xml info from doc-info.xql for table of contents now.
author | casties |
---|---|
date | Mon, 12 Mar 2012 19:01:14 +0100 |
parents | aaacdf551f6f |
children | 9a3cc3732194 |
files | MpdlXmlTextServer.py documentViewer.py version.txt zpt/toc_figures.zpt zpt/toc_text.zpt |
diffstat | 5 files changed, 96 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/MpdlXmlTextServer.py Mon Mar 05 19:11:59 2012 +0100 +++ b/MpdlXmlTextServer.py Mon Mar 12 19:01:14 2012 +0100 @@ -73,8 +73,20 @@ def getTextInfo(self, mode='', docinfo=None): """reads document info, including page concordance, from text server""" - logging.debug("getDocInfo") - #TODO: check cached info + logging.debug("getTextInfo mode=%s"%mode) + if mode not in ['toc', 'figures', '']: + mode = '' + # check cached info + if mode: + # cached toc-request? + if 'full_%s'%mode in docinfo: + return docinfo + + else: + # no toc-request + if 'numTextPages' in docinfo: + return docinfo + docpath = docinfo.get('textURLPath', None) if docpath is None: logging.error("getTextInfo: no textURLPath!") @@ -83,7 +95,7 @@ # we need to set a result set size pagesize = 10000 pn = 1 - # fetch docinfo + # fetch docinfo pagexml = self.getServerData("doc-info.xql","document=%s&info=%s&pageSize=%s&pn=%s"%(docpath,mode,pagesize,pn)) dom = ET.fromstring(pagexml) # all info in tag <document> @@ -126,7 +138,7 @@ for p in pn: if p.tag == 'n': n = getInt(p.text) - page['n'] = n + page['pn'] = n elif p.tag == 'no': page['no'] = p.text elif p.tag == 'non': @@ -140,9 +152,27 @@ # toc elif name == 'toc': - # contains tags with table of contents - # TODO: implement - pass + # contains tags with table of contents/figures + # <toc-entry><page>13</page><level>3</level><content>Chapter I</content><level-string>1.</level-string><real-level>1</real-level></toc-entry> + tocs = [] + for te in tag: + toc = {} + for t in te: + if t.tag == 'page': + toc['pn'] = getInt(t.text) + elif t.tag == 'level': + toc['level'] = t.text + elif t.tag == 'content': + toc['content'] = t.text + elif t.tag == 'level-string': + toc['level-string'] = t.text + elif t.tag == 'real-level': + toc['real-level'] = t.text + + tocs.append(toc) + + # save as full_toc/full_figures + docinfo['full_%s'%mode] = tocs return docinfo @@ -429,7 +459,52 @@ return "ERROR: no results!" - def getToc(self, mode="text", docinfo=None): + def getToc(self, mode='text', docinfo=None): + """returns list of table of contents from docinfo""" + logging.debug("getToc mode=%s"%mode) + if mode == 'text': + queryType = 'toc' + else: + queryType = mode + + if not 'full_%s'%queryType in docinfo: + # get new toc + docinfo = self.getTextInfo(queryType, docinfo) + + return docinfo.get('full_%s'%queryType, []) + + def getTocPage(self, mode='text', pn=None, start=None, size=None, pageinfo=None, docinfo=None): + """returns single page from the table of contents""" + logging.debug("getTocPage mode=%s, pn=%s start=%s size=%s"%(mode,repr(pn),repr(start),repr(size))) + fulltoc = self.getToc(mode=mode, docinfo=docinfo) + if len(fulltoc) < 1: + logging.error("getTocPage: unable to find toc!") + return "Error: no table of contents!" + + if size is None: + size = pageinfo.get('tocPageSize', 30) + + if start is None: + start = (pn - 1) * size + + # paginate + first = (start - 1) + last = first + size + tocs = fulltoc[first:last] + tp = '<div>' + for toc in tocs: + pageurl = self.getLink('pn', toc['pn']) + tp += '<div class="tocline">' + tp += '<div class="toc name">[%s %s]</div>'%(toc['level-string'], toc['content']) + tp += '<div class="toc float right page"><a href="%s">Page: %s</a></div>'%(pageurl, toc['pn']) + tp += '</div>\n' + + tp += '</div>' + + return tp + + + def getToc_old(self, mode="text", docinfo=None): """loads table of contents and stores XML in docinfo""" logging.debug("getToc mode=%s"%mode) if mode == "none": @@ -472,7 +547,7 @@ return docinfo - def getTocPage(self, mode="text", pn=None, start=None, size=None, pageinfo=None, docinfo=None): + def getTocPage_old(self, mode="text", pn=None, start=None, size=None, pageinfo=None, docinfo=None): """returns single page from the table of contents""" logging.debug("getTocPage mode=%s, pn=%s"%(mode,pn)) if mode == "text":
--- a/documentViewer.py Mon Mar 05 19:11:59 2012 +0100 +++ b/documentViewer.py Mon Mar 12 19:01:14 2012 +0100 @@ -256,12 +256,8 @@ if not getattr(self, 'digilibBaseUrl', None): self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" - docinfo = self.getDocinfo(mode=mode,url=url) + docinfo = self.getDocinfo(mode=mode,url=url,tocMode=tocMode) - if tocMode != "thumbs": - # get table of contents - self.getToc(mode=tocMode, docinfo=docinfo) - # auto viewMode: text if there is a text else images if viewMode=="auto": if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): @@ -429,7 +425,7 @@ - def getDocinfo(self, mode, url): + def getDocinfo(self, mode, url, tocMode=None): """returns docinfo depending on mode""" logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url)) # look for cached docinfo in session @@ -483,9 +479,9 @@ texttool = self.metadataService.getTexttoolData(dom=metaDom) if texttool: docinfo = self.getDocinfoFromTexttool(docinfo, texttool) - # document info from full text + # document info (including toc) from full text if docinfo.get('textURLPath', None): - docinfo = self.getTextInfo(docinfo=docinfo) + docinfo = self.getTextInfo(mode=tocMode, docinfo=docinfo) # bib info bib = self.metadataService.getBibData(dom=metaDom) @@ -729,13 +725,11 @@ # get number of pages np = int(docinfo.get('numPages', 0)) if np == 0: - # numPages unknown - maybe we can get it from text page - logging.warn("getPageInfo: numPages=0 trying getTextPage!") - if docinfo.get('textURLPath', None): - # cache text page as well - pageinfo['textPage'] = self.getTextPage(mode=viewLayer, pn=current, docinfo=docinfo, pageinfo=pageinfo) - np = int(docinfo.get('numPages', 0)) - + # try numTextPages + np = docinfo.get('numTextPages', 0) + if np != 0: + docinfo['numPages'] = np + # cache table of contents pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30)) pageinfo['numgroups'] = int(np / grpsize)
--- a/version.txt Mon Mar 05 19:11:59 2012 +0100 +++ b/version.txt Mon Mar 12 19:01:14 2012 +0100 @@ -1,1 +1,1 @@ -DocumentViewer 2.1a \ No newline at end of file +DocumentViewer 2.1.2a \ No newline at end of file
--- a/zpt/toc_figures.zpt Mon Mar 05 19:11:59 2012 +0100 +++ b/zpt/toc_figures.zpt Mon Mar 12 19:01:14 2012 +0100 @@ -7,7 +7,7 @@ <body> <!-- block used for main content area --> <div class="toc-figures" metal:define-macro="main" - tal:define="start pageinfo/start; tocsize docinfo/tocSize_figures; grpsize pageinfo/tocPageSize; + tal:define="start pageinfo/start; tocsize docinfo/numFigureEntries; grpsize pageinfo/tocPageSize; batch python:here.getBatch(start=start,size=grpsize,end=tocsize);"> <ul class="switcher"> <li><a
--- a/zpt/toc_text.zpt Mon Mar 05 19:11:59 2012 +0100 +++ b/zpt/toc_text.zpt Mon Mar 12 19:01:14 2012 +0100 @@ -7,7 +7,7 @@ <body> <!-- block used for main content area --> <div class="toc-text" metal:define-macro="main" - tal:define="start pageinfo/start; tocsize docinfo/tocSize_text; grpsize pageinfo/tocPageSize; + tal:define="start pageinfo/start; tocsize docinfo/numTocEntries; grpsize pageinfo/tocPageSize; batch python:here.getBatch(start=start,size=grpsize,end=tocsize);"> <ul class="switcher"> <li><a