--- documentViewer/documentViewer.py 2010/04/21 13:50:59 1.46 +++ documentViewer/documentViewer.py 2010/04/30 14:36:00 1.48 @@ -10,6 +10,9 @@ from Ft.Xml.Domlette import Nonvalidatin from Ft.Xml.Domlette import PrettyPrint, Print from Ft.Xml import EMPTY_NAMESPACE, Parse +from xml.dom.minidom import parse, parseString + + import Ft.Xml.XPath import cStringIO @@ -93,6 +96,7 @@ class documentViewer(Folder): page_main_images = PageTemplateFile('zpt/page_main_images', globals()) page_main_text = PageTemplateFile('zpt/page_main_text', globals()) page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) + page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) head_main = PageTemplateFile('zpt/head_main', globals()) docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) info_xml = PageTemplateFile('zpt/info_xml', globals()) @@ -169,7 +173,7 @@ class documentViewer(Folder): @param mode: defines how to access the document behind url @param url: url which contains display information @param viewMode: if images display images, if text display text, default is auto (text,images or auto) - @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, search) + @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph) ''' @@ -184,11 +188,14 @@ class documentViewer(Folder): self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" docinfo = self.getDocinfo(mode=mode,url=url) - pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) + + if tocMode != "thumbs": # get table of contents docinfo = self.getToc(mode=tocMode, docinfo=docinfo) - + + pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) + if viewMode=="auto": # automodus gewaehlt if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert viewMode="text" @@ -647,15 +654,33 @@ class documentViewer(Folder): pageinfo['numgroups'] = int(np / grpsize) if np % grpsize > 0: pageinfo['numgroups'] += 1 - + + pageinfo['viewMode'] = viewMode pageinfo['tocMode'] = tocMode pageinfo['query'] = self.REQUEST.get('query',' ') pageinfo['queryType'] = self.REQUEST.get('queryType',' ') pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') + + pageinfo['textPN'] = self.REQUEST.get('textPN','1') + pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '20') pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') + toc = int (pageinfo['tocPN']) + pageinfo['textPages'] =int (toc) + + if 'tocSize_%s'%tocMode in docinfo: + tocSize = int(docinfo['tocSize_%s'%tocMode]) + tocPageSize = int(pageinfo['tocPageSize']) + # cached toc + + if tocSize%tocPageSize>0: + tocPages=tocSize/tocPageSize+1 + else: + tocPages=tocSize/tocPageSize + pageinfo['tocPN'] = min (tocPages,toc) + pageinfo['searchPN'] =self.REQUEST.get('searchPN','1') pageinfo['sn'] =self.REQUEST.get('sn','1') @@ -672,19 +697,47 @@ class documentViewer(Folder): viewMode= pageinfo['viewMode'] tocMode = pageinfo['tocMode'] tocPN = pageinfo['tocPN'] - pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn) ,outputUnicode=False) + + page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn) ,outputUnicode=False) + pagexm = page.replace('?document=/echo/la/Benedetti_1585.xml','?url=/mpiwg/online/permanent/library/163127KK') + pagexml=pagexm.replace('mode=text','mode=texttool') pagedom = Parse(pagexml) - #pagedivs = pagedom.xpath("//div[@class='queryResultPage']") - selfurl = self.absolute_url() - page = pagexml.replace('page-fragment.xql?document=/echo/la/Benedetti_1585.xml','%s?url=/mpiwg/online/permanent/library/163127KK&viewMode=%s&tocMode=%s&tocPN=%s&query=%s&queryType=%s'%(selfurl, viewMode, tocMode, tocPN, query, queryType)) - text =page.replace('mode=text','mode=texttool') - return text - #if len(pagedivs) > 0: - # pagenode = pagedom[0] - # return serializeNode(pagenode) - #else: - # return "xaxa" - + + if (queryType=="fulltext")or(queryType=="fulltextMorph")or(queryType=="xpath")or(queryType=="xquery"): + pagedivs = pagedom.xpath("//div[@class='queryResultPage']") + selfurl = self.absolute_url() + page = pagexml.replace('page-fragment.xql?document=/echo/la/Benedetti_1585.xml','%s?url=/mpiwg/online/permanent/library/163127KK&viewMode=%s&tocMode=%s&tocPN=%s&query=%s&queryType=%s'%(selfurl, viewMode, tocMode, tocPN, query, queryType)) + text =page.replace('mode=text','mode=texttool') + return text + + if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): + pagedivs= pagedom.xpath("//div[@class='queryResultPage']") + if len(pagedivs)>0: + pagenode=pagedivs[0] + links=pagenode.xpath("//a") + for l in links: + hrefNode = l.getAttributeNodeNS(None, u"href") + if hrefNode: + href = hrefNode.nodeValue + if href.startswith('../lt/lex.xql'): + selfurl = self.absolute_url() + hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl) + + + + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=200, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + if href.startswith('../lt/lemma.xql'): + selfurl = self.absolute_url() + hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=200, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + return serializeNode(pagenode) + + return "no text here" + def getNumPages(self,docinfo=None): """get list of pages from fulltext and put in docinfo""" xquery = '//pb' @@ -707,10 +760,6 @@ class documentViewer(Folder): #tocPN = pageinfo['tocPN'] pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn), outputUnicode=False) - ####### - #page = pagexml.replace('page-fragment.xql?document=/echo/la/Benedetti_1585.xml','%s?url=/mpiwg/online/permanent/library/163127KK&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl, viewMode, tocMode, tocPN)) - #text =page.replace('mode=text','mode=texttool') - ####### # post-processing downloaded xml pagedom = Parse(pagexml) # plain text mode @@ -721,7 +770,12 @@ class documentViewer(Folder): if len(pagedivs) > 0: pagenode = pagedivs[0] return serializeNode(pagenode) - + if mode == "xml": + # first div contains text + pagedivs = pagedom.xpath("/div") + if len(pagedivs) > 0: + pagenode = pagedivs[0] + return serializeNode(pagenode) # text-with-links mode if mode == "text_dict": # first div contains text @@ -742,11 +796,15 @@ class documentViewer(Folder): hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl) # add target l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') if href.startswith('lt/lemma.xql'): selfurl = self.absolute_url() hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') return serializeNode(pagenode) return "no text here" @@ -756,18 +814,36 @@ class documentViewer(Folder): pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","query=%s&language=%s"%(query,language),outputUnicode=False) return pagexml - def getLemma(self): - """lemma""" - pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","query=%s&language=%s"%(query,language),outputUnicode=False) + def getLemma(self, lemma=None, language=None): + """simular words lemma """ + pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","lemma=%s&language=%s"%(lemma,language),outputUnicode=False) return pagexml + def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): + """number of""" + docpath = docinfo['textURLPath'] + pagesize = pageinfo['queryPageSize'] + pn = pageinfo['searchPN'] + query =pageinfo['query'] + queryType =pageinfo['queryType'] + + tocSearch = 0 + tocDiv = None + pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) + + pagedom = Parse(pagexml) + numdivs = pagedom.xpath("//div[@class='queryResultHits']") + tocSearch = int(getTextFromNode(numdivs[0])) + tc=int((tocSearch/20)+1) + logging.debug("documentViewer (gettoc) tc: %s"%(tc)) + return tc + def getToc(self, mode="text", docinfo=None): """loads table of contents and stores in docinfo""" logging.debug("documentViewer (gettoc) mode: %s"%(mode)) if 'tocSize_%s'%mode in docinfo: # cached toc - return docinfo - + return docinfo docpath = docinfo['textURLPath'] # we need to set a result set size pagesize = 1000