--- documentViewer/MpdlXmlTextServer.py 2010/11/23 17:05:20 1.212 +++ documentViewer/MpdlXmlTextServer.py 2010/12/23 17:05:01 1.220 @@ -71,7 +71,7 @@ class MpdlXmlTextServer(SimpleItem): href = hrefNode.nodeValue if href.startswith('page-fragment.xql'): selfurl = self.absolute_url() - pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&optionToggle=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,optionToggle)) + pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&optionToggle=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,optionToggle,characterNormalization)) hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) return serializeNode(pagenode) if (queryType=="fulltextMorph"): @@ -85,7 +85,7 @@ class MpdlXmlTextServer(SimpleItem): href = hrefNode.nodeValue if href.startswith('page-fragment.xql'): selfurl = self.absolute_url() - pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&optionToggle=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,optionToggle)) + pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&optionToggle=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,optionToggle,characterNormalization)) hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) if href.startswith('../lt/lemma.xql'): hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl)) @@ -103,7 +103,7 @@ class MpdlXmlTextServer(SimpleItem): hrefNode = l.getAttributeNodeNS(None, u"href") if hrefNode: href = hrefNode.nodeValue - hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&optionToggle=%s'%(viewMode,tocMode,tocPN,pn,optionToggle)) + hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&optionToggle=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,optionToggle,characterNormalization)) if href.startswith('../lt/lex.xql'): hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl) l.setAttributeNS(None, 'target', '_blank') @@ -117,33 +117,42 @@ class MpdlXmlTextServer(SimpleItem): return serializeNode(pagenode) return "no text here" - """def getNumPages(self, docinfo): - ""get list of pages from fulltext and put in docinfo"" - if 'numPages' in docinfo: - # already there - return docinfo - xquery = '//pb' - text = self.getServerData("xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) - docinfo['numPages'] = text.count("0: + docinfo['numPages']= int(getTextFromNode(pagedivs[0])) + return docinfo + + def getTocEntries (self, docinfo=None): + """ number of text entries""" + self.getInfoFromPage(docinfo) + return docinfo['tocEntries'] + def getFigureEntries (self, docinfo=None): + """ number of figure entries""" + self.getInfoFromPage(docinfo) + return docinfo['figureEntries'] + def getGisPlaces(self, docinfo=None, pageinfo=None): """ Show all Gis Places of whole Page""" xpath='//place' - docpath = docinfo['textURLPath'] + docpath = docinfo.get('textURLPath',None) + if not docpath: + return None + url = docinfo['url'] selfurl = self.absolute_url() pn = pageinfo['current'] @@ -180,27 +189,12 @@ class MpdlXmlTextServer(SimpleItem): myList = ",".join(hrefList) logging.debug("getALLGisPlaces :%s"%(myList)) return myList - - - def getPDF (self, docinfo=None, pageinfo=None): - """Show and Save different Pages as PDF in Options""" - selfurl=self.absolute_url() - pn=pageinfo['current'] - - viewMode =pageinfo['viewMode'] - - #text = ("page-fragment.xql","document=%s&mode=%s&pn=%s&export=%s"%(docinfo['textURLPath'], 'text', pn,'pdf')) - #text = self.getServerData("page-fragment.xql", "document=%s&mode=%s&pn=%s&export=%s"(docinfo['textURLPath'],'text', pn,'pdf')) - #logging.debug("text :%s"%(text)) - #dom =Parse(text) - #logging.debug("text :%s"%(text)) - #return text - + def getOrigPages (self, docinfo=None, pageinfo=None): """Show original page """ - docpath = docinfo['textURLPath'] - logging.debug ("docinfo['textURLPath']=%s"%(docinfo['textURLPath'])) - #url = docinfo['url'] + docpath = docinfo.get('textURLPath',None) + if not docpath: + return None selfurl = self.absolute_url() pn =pageinfo['current'] @@ -208,14 +202,58 @@ class MpdlXmlTextServer(SimpleItem): text = self.getServerData("page-fragment.xql","document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'], 'text', pn)) dom =Parse(text) pagedivs = dom.xpath("//div[@class='pageNumberOrig']") - logging.debug("YYYYYYpagedivs :%s"%(pagedivs)) if len(pagedivs)>0: originalPage= getTextFromNode(pagedivs[0]) #return docinfo['originalPage'] return originalPage - - def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, viewMode=None, tocMode=None, tocPN=None, characterNormalization="regPlusNorm", highlightQuery=None, sn=None, optionToggle=None): + def getAllPlaces (self, docinfo=None): + """Show all Places if no places than 0""" + self.getInfoFromPage(docinfo) + return docinfo['allPlaces'] + + def getInfoFromPage(self, docinfo=None): + """ extract diverse info from page-fragment""" + docpath = docinfo['textURLPath'] + if 'allPlaces' in docinfo: + # allredy there + return docinfo + + if (docpath is not None): + text = self.getServerData("page-fragment.xql","document=%s"%(docinfo['textURLPath'])) + dom = Parse(text) + # figureEntries + pagedivs = dom.xpath("//div[@class='countFigureEntries']") + s = getTextFromNode(pagedivs[0]) + try: + docinfo['figureEntries'] = int(s) + except: + docinfo['figureEntries'] = 0 + # tocEntries + pagedivs = dom.xpath("//div[@class='countTocEntries']") + s = getTextFromNode(pagedivs[0]) + try: + docinfo['tocEntries'] = int(s) + except: + docinfo['tocEntries'] = 0 + # allPlaces + pagedivs = dom.xpath("//div[@class='countPlaces']") + s = getTextFromNode(pagedivs[0]) + try: + docinfo['allPlaces'] = int(s) + except: + docinfo['allPlaces'] = 0 + + else: + # no full text -- init to 0 + docinfo['figureEntries'] = 0 + docinfo['tocEntries'] = 0 + docinfo['allPlaces'] = 0 + + return docinfo + + + def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, viewMode=None, tocMode=None, tocPN=None, characterNormalization="reg", highlightQuery=None, sn=None, optionToggle=None): """returns single page from fulltext""" docpath = docinfo['textURLPath'] path = docinfo['textURLPath'] @@ -376,43 +414,34 @@ class MpdlXmlTextServer(SimpleItem): def getQueryResultHitsText(self, docinfo=None, pageinfo=None): """number of hits in Text of Contents mode""" + selfurl = self.absolute_url() + docpath = docinfo['textURLPath'] + viewMode= pageinfo['viewMode'] + text = self.getServerData("page-fragment.xql","document=%s&mode=%s"%(docinfo['textURLPath'], 'text')) + dom =Parse(text) + pagedivs = dom.xpath("//div[@class='countTocEntries']") + logging.debug ("pagedivs=%s"%(pagedivs)) + if len(pagedivs)>0: + originalPage= (getTextFromNode(pagedivs[0])) + tc = int (originalPage) + tc1 =tc/30+1 + return tc1 - docpath = docinfo['textURLPath'] - pagesize = pageinfo['queryPageSize'] - pn = pageinfo['searchPN'] - query =pageinfo['query'] - queryType =pageinfo['queryType'] - tocSearch = 0 - tocDiv = None - tocMode = pageinfo['tocMode'] - tocPN = pageinfo['tocPN'] - pagexml=self.getServerData("doc-query.xql", "document=%s&queryType=%s"%(docpath,'toc')) - pagedom = Parse(pagexml) - logging.debug("documentViewer (pagedom) pagedom: %s"%(pagedom)) - numdivs = pagedom.xpath("//div[@class='queryResultHits']") - tocSearch = int(getTextFromNode(numdivs[0])) - tc=int((tocSearch/30)+1) - return tc - def getQueryResultHitsFigures(self, docinfo=None, pageinfo=None): """number of hits in Text of Figures mode""" - docpath = docinfo['textURLPath'] - pagesize = pageinfo['queryPageSize'] - pn = pageinfo['searchPN'] - query =pageinfo['query'] - queryType =pageinfo['queryType'] - tocSearch = 0 - tocDiv = None - tocMode = pageinfo['tocMode'] - tocPN = pageinfo['tocPN'] - pagexml=self.getServerData("doc-query.xql", "document=%s&queryType=%s"%(docpath,'figures')) - pagedom = Parse(pagexml) - logging.debug("documentViewer (pagedom) pagedom: %s"%(pagedom)) - numdivs = pagedom.xpath("//div[@class='queryResultHits']") - tocSearch = int(getTextFromNode(numdivs[0])) - tc=int((tocSearch/30)+1) - return tc + selfurl = self.absolute_url() + docpath = docinfo['textURLPath'] + viewMode= pageinfo['viewMode'] + text = self.getServerData("page-fragment.xql","document=%s&mode=%s"%(docinfo['textURLPath'], 'text')) + dom =Parse(text) + pagedivs = dom.xpath("//div[@class='countFigureEntries']") + logging.debug ("pagedivs=%s"%(pagedivs)) + if len(pagedivs)>0: + originalPage= (getTextFromNode(pagedivs[0])) + tc = int (originalPage) + tc1 =tc/30+1 + return tc1 def getToc(self, mode="text", docinfo=None): @@ -466,8 +495,8 @@ class MpdlXmlTextServer(SimpleItem): tocMode = pageinfo['tocMode'] tocPN = pageinfo['tocPN'] - data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm&optionToggle=%s"%(docpath,queryType, pagesize, pn,optionToggle)) - page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&optionToggle=%s'%(selfurl,url, viewMode, tocMode, tocPN,optionToggle)) + data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm&optionToggle=1"%(docpath,queryType, pagesize, pn)) + page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&optionToggle=1'%(selfurl,url, viewMode, tocMode, tocPN)) text = page.replace('mode=image','mode=texttool') logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) #logging.debug("documentViewer (characterNormalization) text: %s"%(text))