--- documentViewer/documentViewer.py 2010/04/08 11:04:51 1.44 +++ documentViewer/documentViewer.py 2010/04/30 14:36:00 1.48 @@ -10,6 +10,9 @@ from Ft.Xml.Domlette import Nonvalidatin from Ft.Xml.Domlette import PrettyPrint, Print from Ft.Xml import EMPTY_NAMESPACE, Parse +from xml.dom.minidom import parse, parseString + + import Ft.Xml.XPath import cStringIO @@ -93,6 +96,7 @@ class documentViewer(Folder): page_main_images = PageTemplateFile('zpt/page_main_images', globals()) page_main_text = PageTemplateFile('zpt/page_main_text', globals()) page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) + page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) head_main = PageTemplateFile('zpt/head_main', globals()) docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) info_xml = PageTemplateFile('zpt/info_xml', globals()) @@ -102,7 +106,7 @@ class documentViewer(Folder): changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) - def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): + def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"): """init document viewer""" self.id=id self.title=title @@ -163,13 +167,14 @@ class documentViewer(Folder): return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) security.declareProtected('View','index_html') - def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): + def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None): ''' view it @param mode: defines how to access the document behind url @param url: url which contains display information @param viewMode: if images display images, if text display text, default is auto (text,images or auto) - @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures) + @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) + @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph) ''' logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) @@ -183,11 +188,14 @@ class documentViewer(Folder): self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" docinfo = self.getDocinfo(mode=mode,url=url) - pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) + + if tocMode != "thumbs": # get table of contents docinfo = self.getToc(mode=tocMode, docinfo=docinfo) - + + pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) + if viewMode=="auto": # automodus gewaehlt if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert viewMode="text" @@ -646,16 +654,90 @@ class documentViewer(Folder): pageinfo['numgroups'] = int(np / grpsize) if np % grpsize > 0: pageinfo['numgroups'] += 1 - + + pageinfo['viewMode'] = viewMode pageinfo['tocMode'] = tocMode - pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '10') + pageinfo['query'] = self.REQUEST.get('query',' ') + pageinfo['queryType'] = self.REQUEST.get('queryType',' ') + pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') + + pageinfo['textPN'] = self.REQUEST.get('textPN','1') + + pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') + pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '20') pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') + toc = int (pageinfo['tocPN']) + pageinfo['textPages'] =int (toc) + + if 'tocSize_%s'%tocMode in docinfo: + tocSize = int(docinfo['tocSize_%s'%tocMode]) + tocPageSize = int(pageinfo['tocPageSize']) + # cached toc + + if tocSize%tocPageSize>0: + tocPages=tocSize/tocPageSize+1 + else: + tocPages=tocSize/tocPageSize + pageinfo['tocPN'] = min (tocPages,toc) + + pageinfo['searchPN'] =self.REQUEST.get('searchPN','1') + pageinfo['sn'] =self.REQUEST.get('sn','1') return pageinfo - - + def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None): + """get search list""" + docpath = docinfo['textURLPath'] + pagesize = pageinfo['queryPageSize'] + pn = pageinfo['searchPN'] + sn = pageinfo['sn'] + query =pageinfo['query'] + queryType =pageinfo['queryType'] + viewMode= pageinfo['viewMode'] + tocMode = pageinfo['tocMode'] + tocPN = pageinfo['tocPN'] + + page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn) ,outputUnicode=False) + pagexm = page.replace('?document=/echo/la/Benedetti_1585.xml','?url=/mpiwg/online/permanent/library/163127KK') + pagexml=pagexm.replace('mode=text','mode=texttool') + pagedom = Parse(pagexml) + + if (queryType=="fulltext")or(queryType=="fulltextMorph")or(queryType=="xpath")or(queryType=="xquery"): + pagedivs = pagedom.xpath("//div[@class='queryResultPage']") + selfurl = self.absolute_url() + page = pagexml.replace('page-fragment.xql?document=/echo/la/Benedetti_1585.xml','%s?url=/mpiwg/online/permanent/library/163127KK&viewMode=%s&tocMode=%s&tocPN=%s&query=%s&queryType=%s'%(selfurl, viewMode, tocMode, tocPN, query, queryType)) + text =page.replace('mode=text','mode=texttool') + return text + + if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): + pagedivs= pagedom.xpath("//div[@class='queryResultPage']") + if len(pagedivs)>0: + pagenode=pagedivs[0] + links=pagenode.xpath("//a") + for l in links: + hrefNode = l.getAttributeNodeNS(None, u"href") + if hrefNode: + href = hrefNode.nodeValue + if href.startswith('../lt/lex.xql'): + selfurl = self.absolute_url() + hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl) + + + + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=200, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + if href.startswith('../lt/lemma.xql'): + selfurl = self.absolute_url() + hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=200, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + return serializeNode(pagenode) + + return "no text here" + def getNumPages(self,docinfo=None): """get list of pages from fulltext and put in docinfo""" xquery = '//pb' @@ -664,7 +746,7 @@ class documentViewer(Folder): docinfo['numPages'] = text.count(" 0: pagenode = pagedivs[0] return serializeNode(pagenode) - + if mode == "xml": + # first div contains text + pagedivs = pagedom.xpath("/div") + if len(pagedivs) > 0: + pagenode = pagedivs[0] + return serializeNode(pagenode) # text-with-links mode if mode == "text_dict": # first div contains text @@ -700,20 +793,57 @@ class documentViewer(Folder): # is pollux link selfurl = self.absolute_url() # change href - hrefNode.nodeValue = href.replace('lt/lex.xql','%s/head_main_voc'%selfurl) + hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl) # add target l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + + if href.startswith('lt/lemma.xql'): + selfurl = self.absolute_url() + hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') return serializeNode(pagenode) return "no text here" + def getTranslate(self, query=None, language=None): + """translate into another languages""" + pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","query=%s&language=%s"%(query,language),outputUnicode=False) + return pagexml + + def getLemma(self, lemma=None, language=None): + """simular words lemma """ + pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","lemma=%s&language=%s"%(lemma,language),outputUnicode=False) + return pagexml + + def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): + """number of""" + docpath = docinfo['textURLPath'] + pagesize = pageinfo['queryPageSize'] + pn = pageinfo['searchPN'] + query =pageinfo['query'] + queryType =pageinfo['queryType'] + + tocSearch = 0 + tocDiv = None + pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) + + pagedom = Parse(pagexml) + numdivs = pagedom.xpath("//div[@class='queryResultHits']") + tocSearch = int(getTextFromNode(numdivs[0])) + tc=int((tocSearch/20)+1) + logging.debug("documentViewer (gettoc) tc: %s"%(tc)) + return tc + def getToc(self, mode="text", docinfo=None): """loads table of contents and stores in docinfo""" logging.debug("documentViewer (gettoc) mode: %s"%(mode)) if 'tocSize_%s'%mode in docinfo: # cached toc - return docinfo - + return docinfo docpath = docinfo['textURLPath'] # we need to set a result set size pagesize = 1000 @@ -741,7 +871,7 @@ class documentViewer(Folder): #docinfo['tocDiv_%s'%mode] = tocDiv return docinfo - def getTocPage(self, mode="toc", pn=1, pageinfo=None, docinfo=None): + def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None): """returns single page from the table of contents""" # TODO: this should use the cached TOC if mode == "text": @@ -751,19 +881,28 @@ class documentViewer(Folder): docpath = docinfo['textURLPath'] pagesize = pageinfo['tocPageSize'] pn = pageinfo['tocPN'] - pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) + + selfurl = self.absolute_url() + viewMode= pageinfo['viewMode'] + tocMode = pageinfo['tocMode'] + tocPN = pageinfo['tocPN'] + + pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn), outputUnicode=False) + page = pagexml.replace('page-fragment.xql?document=/echo/la/Benedetti_1585.xml','%s?url=/mpiwg/online/permanent/library/163127KK&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl, viewMode, tocMode, tocPN)) + text = page.replace('mode=image','mode=texttool') + return text # post-processing downloaded xml - pagedom = Parse(pagexml) + #pagedom = Parse(text) # div contains text - pagedivs = pagedom.xpath("//div[@class='queryResultPage']") - if len(pagedivs) > 0: - pagenode = pagedivs[0] - return serializeNode(pagenode) - else: - return "No TOC!" + #pagedivs = pagedom.xpath("//div[@class='queryResultPage']") + #if len(pagedivs) > 0: + # pagenode = pagedivs[0] + # return serializeNode(pagenode) + #else: + # return "No TOC!" - def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): + def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): """init document viewer""" self.title=title self.digilibBaseUrl = digilibBaseUrl