--- documentViewer/MpdlXmlTextServer.py 2010/12/06 10:41:55 1.215 +++ documentViewer/MpdlXmlTextServer.py 2010/12/23 17:13:01 1.221 @@ -117,29 +117,36 @@ class MpdlXmlTextServer(SimpleItem): return serializeNode(pagenode) return "no text here" - """def getNumPages(self, docinfo): - ""get list of pages from fulltext and put in docinfo"" - if 'numPages' in docinfo: - # already there - return docinfo - xquery = '//pb' - text = self.getServerData("xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) - docinfo['numPages'] = text.count("0: + docinfo['numPages']= int(getTextFromNode(pagedivs[0])) + return docinfo + + def getTocEntries (self, docinfo=None): + """ number of text entries""" + docinfo = self.getInfoFromPage(docinfo) + logging.debug("gettocentries: docinfo=%s"%docinfo) + return docinfo['tocEntries'] + def getFigureEntries (self, docinfo=None): + """ number of figure entries""" + docinfo = self.getInfoFromPage(docinfo) + return docinfo['figureEntries'] + def getGisPlaces(self, docinfo=None, pageinfo=None): """ Show all Gis Places of whole Page""" xpath='//place' @@ -183,30 +190,12 @@ class MpdlXmlTextServer(SimpleItem): myList = ",".join(hrefList) logging.debug("getALLGisPlaces :%s"%(myList)) return myList - - - def getPDF (self, docinfo=None, pageinfo=None): - """Show and Save different Pages as PDF in Options""" - selfurl=self.absolute_url() - pn=pageinfo['current'] - - viewMode =pageinfo['viewMode'] - - #text = ("page-fragment.xql","document=%s&mode=%s&pn=%s&export=%s"%(docinfo['textURLPath'], 'text', pn,'pdf')) - #text = self.getServerData("page-fragment.xql", "document=%s&mode=%s&pn=%s&export=%s"(docinfo['textURLPath'],'text', pn,'pdf')) - #logging.debug("text :%s"%(text)) - #dom =Parse(text) - #logging.debug("text :%s"%(text)) - #return text - + def getOrigPages (self, docinfo=None, pageinfo=None): """Show original page """ docpath = docinfo.get('textURLPath',None) if not docpath: return None - - logging.debug ("docinfo['textURLPath']=%s"%(docinfo['textURLPath'])) - #url = docinfo['url'] selfurl = self.absolute_url() pn =pageinfo['current'] @@ -219,23 +208,53 @@ class MpdlXmlTextServer(SimpleItem): #return docinfo['originalPage'] return originalPage - def getAllPlaces (self, docinfo=None, pageinfo=None): + def getAllPlaces (self, docinfo=None): """Show all Places if no places than 0""" - docpath = docinfo['textURLPath'] - selfurl = self.absolute_url() - viewMode= pageinfo['viewMode'] - text = self.getServerData("page-fragment.xql","document=%s&mode=%s"%(docinfo['textURLPath'], 'text')) - dom =Parse(text) - pagedivs = dom.xpath("//div[@class='countPlaces']") - logging.debug ("pagedivs=%s"%(pagedivs)) - #originalPage= getTextFromNode(pagedivs[0]) - #return allPlaces - if len(pagedivs)>0: - originalPage= getTextFromNode(pagedivs[0]) - #return docinfo['originalPage'] - return originalPage + docinfo = self.getInfoFromPage(docinfo) + return docinfo['allPlaces'] + + def getInfoFromPage(self, docinfo=None): + """ extract diverse info from page-fragment""" + docpath = docinfo['textURLPath'] + if 'allPlaces' in docinfo: + # allredy there + return docinfo + + if (docpath is not None): + text = self.getServerData("page-fragment.xql","document=%s"%(docinfo['textURLPath'])) + dom = Parse(text) + # figureEntries + pagedivs = dom.xpath("//div[@class='countFigureEntries']") + s = getTextFromNode(pagedivs[0]) + try: + docinfo['figureEntries'] = int(s) + except: + docinfo['figureEntries'] = 0 + # tocEntries + pagedivs = dom.xpath("//div[@class='countTocEntries']") + s = getTextFromNode(pagedivs[0]) + try: + docinfo['tocEntries'] = int(s) + except: + docinfo['tocEntries'] = 0 + # allPlaces + pagedivs = dom.xpath("//div[@class='countPlaces']") + s = getTextFromNode(pagedivs[0]) + try: + docinfo['allPlaces'] = int(s) + except: + docinfo['allPlaces'] = 0 + + else: + # no full text -- init to 0 + docinfo['figureEntries'] = 0 + docinfo['tocEntries'] = 0 + docinfo['allPlaces'] = 0 + + return docinfo + - def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, viewMode=None, tocMode=None, tocPN=None, characterNormalization="regPlusNorm", highlightQuery=None, sn=None, optionToggle=None): + def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, viewMode=None, tocMode=None, tocPN=None, characterNormalization="reg", highlightQuery=None, sn=None, optionToggle=None): """returns single page from fulltext""" docpath = docinfo['textURLPath'] path = docinfo['textURLPath'] @@ -396,43 +415,34 @@ class MpdlXmlTextServer(SimpleItem): def getQueryResultHitsText(self, docinfo=None, pageinfo=None): """number of hits in Text of Contents mode""" + selfurl = self.absolute_url() + docpath = docinfo['textURLPath'] + viewMode= pageinfo['viewMode'] + text = self.getServerData("page-fragment.xql","document=%s&mode=%s"%(docinfo['textURLPath'], 'text')) + dom =Parse(text) + pagedivs = dom.xpath("//div[@class='countTocEntries']") + logging.debug ("pagedivs=%s"%(pagedivs)) + if len(pagedivs)>0: + originalPage= (getTextFromNode(pagedivs[0])) + tc = int (originalPage) + tc1 =tc/30+1 + return tc1 - docpath = docinfo['textURLPath'] - pagesize = pageinfo['queryPageSize'] - pn = pageinfo['searchPN'] - query =pageinfo['query'] - queryType =pageinfo['queryType'] - tocSearch = 0 - tocDiv = None - tocMode = pageinfo['tocMode'] - tocPN = pageinfo['tocPN'] - pagexml=self.getServerData("doc-query.xql", "document=%s&queryType=%s"%(docpath,'toc')) - pagedom = Parse(pagexml) - logging.debug("documentViewer (pagedom) pagedom: %s"%(pagedom)) - numdivs = pagedom.xpath("//div[@class='queryResultHits']") - tocSearch = int(getTextFromNode(numdivs[0])) - tc=int((tocSearch/30)+1) - return tc - def getQueryResultHitsFigures(self, docinfo=None, pageinfo=None): """number of hits in Text of Figures mode""" - docpath = docinfo['textURLPath'] - pagesize = pageinfo['queryPageSize'] - pn = pageinfo['searchPN'] - query =pageinfo['query'] - queryType =pageinfo['queryType'] - tocSearch = 0 - tocDiv = None - tocMode = pageinfo['tocMode'] - tocPN = pageinfo['tocPN'] - pagexml=self.getServerData("doc-query.xql", "document=%s&queryType=%s"%(docpath,'figures')) - pagedom = Parse(pagexml) - logging.debug("documentViewer (pagedom) pagedom: %s"%(pagedom)) - numdivs = pagedom.xpath("//div[@class='queryResultHits']") - tocSearch = int(getTextFromNode(numdivs[0])) - tc=int((tocSearch/30)+1) - return tc + selfurl = self.absolute_url() + docpath = docinfo['textURLPath'] + viewMode= pageinfo['viewMode'] + text = self.getServerData("page-fragment.xql","document=%s&mode=%s"%(docinfo['textURLPath'], 'text')) + dom =Parse(text) + pagedivs = dom.xpath("//div[@class='countFigureEntries']") + logging.debug ("pagedivs=%s"%(pagedivs)) + if len(pagedivs)>0: + originalPage= (getTextFromNode(pagedivs[0])) + tc = int (originalPage) + tc1 =tc/30+1 + return tc1 def getToc(self, mode="text", docinfo=None):