Mercurial > hg > documentViewer
changeset 158:4f4fe4e56ffe
characterNormalization
author | abukhman |
---|---|
date | Tue, 24 Aug 2010 11:38:45 +0200 |
parents | de82ae2e9850 |
children | 29fc850d4a6f |
files | MpdlXmlTextServer.py documentViewer.py |
diffstat | 2 files changed, 18 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/MpdlXmlTextServer.py Tue Aug 24 11:33:26 2010 +0200 +++ b/MpdlXmlTextServer.py Tue Aug 24 11:38:45 2010 +0200 @@ -52,7 +52,7 @@ queryType =pageinfo['queryType'] viewMode= pageinfo['viewMode'] tocMode = pageinfo['tocMode'] - #characterNormalization = pageinfo ['characterNormalization'] + characterNormalization = pageinfo ['characterNormalization'] tocPN = pageinfo['tocPN'] selfurl = self.absolute_url() @@ -72,9 +72,7 @@ href = hrefNode.nodeValue if href.startswith('page-fragment.xql'): selfurl = self.absolute_url() - #pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN,characterNormalization)) - pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) - + pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN,characterNormalization)) hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) return serializeNode(pagenode) if (queryType=="fulltextMorph"): @@ -88,9 +86,7 @@ href = hrefNode.nodeValue if href.startswith('page-fragment.xql'): selfurl = self.absolute_url() - #pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN,characterNormalization)) - pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) - + pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN,characterNormalization)) hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) if href.startswith('../lt/lemma.xql'): hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl)) @@ -108,8 +104,7 @@ hrefNode = l.getAttributeNodeNS(None, u"href") if hrefNode: href = hrefNode.nodeValue - #hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization)) - hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn)) + hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization)) if href.startswith('../lt/lex.xql'): hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl) l.setAttributeNS(None, 'target', '_blank') @@ -135,21 +130,21 @@ docinfo['numPages'] = text.count("<pb ") return docinfo - def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None): + def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None, characterNormalization="reg"): """returns single page from fulltext""" docpath = docinfo['textURLPath'] path = docinfo['textURLPath'] url = docinfo['url'] viewMode= pageinfo['viewMode'] tocMode = pageinfo['tocMode'] - #characterNormalization = pageinfo ['characterNormalization'] + characterNormalization = pageinfo ['characterNormalization'] tocPN = pageinfo['tocPN'] selfurl = self.absolute_url() if mode == "text_dict": textmode = "textPollux" else: textmode = mode - #logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) + logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) if highlightQuery is not None: textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) @@ -170,7 +165,7 @@ if hrefNode: href= hrefNode.nodeValue if href.startswith('#note-'): - hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) + hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&characterNormalization=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,characterNormalization,tocPN,pn)) return serializeNode(pagenode) if mode == "xml": # first div contains text @@ -220,7 +215,7 @@ l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") l.setAttributeNS(None, 'onClick', 'popupWin.focus();') if href.startswith('#note-'): - hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) + hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&characterNormalization=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,characterNormalization,tocPN,pn)) return serializeNode(pagenode) return "no text here" @@ -307,17 +302,16 @@ url = docinfo['url'] selfurl = self.absolute_url() viewMode= pageinfo['viewMode'] - #characterNormalization =pageinfo ['characterNormalization'] + characterNormalization =pageinfo ['characterNormalization'] tocMode = pageinfo['tocMode'] tocPN = pageinfo['tocPN'] data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) - #page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&characterNormalization=%s'%(selfurl,url, viewMode, tocMode, tocPN, characterNormalization)) - page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) + page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&characterNormalization=%s'%(selfurl,url, viewMode, tocMode, tocPN, characterNormalization)) text = page.replace('mode=image','mode=texttool') - #logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) - #logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) + logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) + logging.debug("documentViewer (characterNormalization) text: %s"%(text)) return text def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
--- a/documentViewer.py Tue Aug 24 11:33:26 2010 +0200 +++ b/documentViewer.py Tue Aug 24 11:38:45 2010 +0200 @@ -678,7 +678,7 @@ self.REQUEST.SESSION['docinfo'] = docinfo return docinfo - def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): + def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None, characterNormalization=None): """returns pageinfo with the given parameters""" pageinfo = {} current = getInt(current) @@ -701,7 +701,7 @@ pageinfo['numgroups'] += 1 pageinfo['viewMode'] = viewMode pageinfo['tocMode'] = tocMode - #pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','') + pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','') pageinfo['query'] = self.REQUEST.get('query',' ') pageinfo['queryType'] = self.REQUEST.get('queryType',' ') pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') @@ -712,7 +712,9 @@ pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') toc = int (pageinfo['tocPN']) pageinfo['textPages'] =int (toc) - + + + if 'tocSize_%s'%tocMode in docinfo: tocSize = int(docinfo['tocSize_%s'%tocMode]) tocPageSize = int(pageinfo['tocPageSize'])