--- documentViewer/documentViewer.py 2010/04/19 13:13:13 1.45 +++ documentViewer/documentViewer.py 2010/05/19 16:19:45 1.52 @@ -2,6 +2,7 @@ from OFS.Folder import Folder from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate from Products.PageTemplates.PageTemplateFile import PageTemplateFile +from Products.PythonScripts.standard import url_quote from AccessControl import ClassSecurityInfo from AccessControl import getSecurityManager from Globals import package_home @@ -10,6 +11,9 @@ from Ft.Xml.Domlette import Nonvalidatin from Ft.Xml.Domlette import PrettyPrint, Print from Ft.Xml import EMPTY_NAMESPACE, Parse +from xml.dom.minidom import parse, parseString + + import Ft.Xml.XPath import cStringIO @@ -93,6 +97,7 @@ class documentViewer(Folder): page_main_images = PageTemplateFile('zpt/page_main_images', globals()) page_main_text = PageTemplateFile('zpt/page_main_text', globals()) page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) + page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) head_main = PageTemplateFile('zpt/head_main', globals()) docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) info_xml = PageTemplateFile('zpt/info_xml', globals()) @@ -169,8 +174,8 @@ class documentViewer(Folder): @param mode: defines how to access the document behind url @param url: url which contains display information @param viewMode: if images display images, if text display text, default is auto (text,images or auto) - @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, search) - @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph) + @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) + @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) ''' logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) @@ -184,11 +189,14 @@ class documentViewer(Folder): self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" docinfo = self.getDocinfo(mode=mode,url=url) - pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) + + if tocMode != "thumbs": # get table of contents docinfo = self.getToc(mode=tocMode, docinfo=docinfo) - + + pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) + if viewMode=="auto": # automodus gewaehlt if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert viewMode="text" @@ -538,7 +546,10 @@ class documentViewer(Folder): textUrls = dom.xpath("//texttool/text-url-path") if textUrls and (len(textUrls) > 0): textUrl = getTextFromNode(textUrls[0]) - docinfo['textURLPath'] = textUrl + docinfo['textURLPath'] = textUrl + if not docinfo['imagePath']: + # text-only, no page images + docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht presentationUrls = dom.xpath("//texttool/presentation") docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag @@ -551,7 +562,7 @@ class documentViewer(Folder): presentationUrl = url.replace('index.meta', presentationPath) else: presentationUrl = url + "/" + presentationPath - docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht + docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info @@ -647,15 +658,33 @@ class documentViewer(Folder): pageinfo['numgroups'] = int(np / grpsize) if np % grpsize > 0: pageinfo['numgroups'] += 1 - + + pageinfo['viewMode'] = viewMode pageinfo['tocMode'] = tocMode pageinfo['query'] = self.REQUEST.get('query',' ') pageinfo['queryType'] = self.REQUEST.get('queryType',' ') pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') + + pageinfo['textPN'] = self.REQUEST.get('textPN','1') + pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '20') pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') + toc = int (pageinfo['tocPN']) + pageinfo['textPages'] =int (toc) + + if 'tocSize_%s'%tocMode in docinfo: + tocSize = int(docinfo['tocSize_%s'%tocMode]) + tocPageSize = int(pageinfo['tocPageSize']) + # cached toc + + if tocSize%tocPageSize>0: + tocPages=tocSize/tocPageSize+1 + else: + tocPages=tocSize/tocPageSize + pageinfo['tocPN'] = min (tocPages,toc) + pageinfo['searchPN'] =self.REQUEST.get('searchPN','1') pageinfo['sn'] =self.REQUEST.get('sn','1') @@ -664,23 +693,87 @@ class documentViewer(Folder): def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None): """get search list""" docpath = docinfo['textURLPath'] + url = docinfo['url'] + logging.debug("documentViewer (gettoc) docpath: %s"%(docpath)) + logging.debug("documentViewer (gettoc) url: %s"%(url)) pagesize = pageinfo['queryPageSize'] pn = pageinfo['searchPN'] sn = pageinfo['sn'] query =pageinfo['query'] queryType =pageinfo['queryType'] - - pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn) ,outputUnicode=False) + viewMode= pageinfo['viewMode'] + tocMode = pageinfo['tocMode'] + tocPN = pageinfo['tocPN'] + selfurl = self.absolute_url() + logging.debug("documentViewer (gettoc) /mpdl/interface/doc-query.xql?document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode)) + page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode) ,outputUnicode=False) + pagexml = page.replace('?document=%s'%str(docpath),'?url=%s'%url) pagedom = Parse(pagexml) - #pagedivs = pagedom.xpath("//div[@class='queryResultPage']") + if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): + pagedivs = pagedom.xpath("//div[@class='queryResultPage']") + if len(pagedivs)>0: + pagenode=pagedivs[0] + links=pagenode.xpath("//a") + for l in links: + hrefNode = l.getAttributeNodeNS(None, u"href") + if hrefNode: + href = hrefNode.nodeValue + if href.startswith('page-fragment.xql'): + selfurl = self.absolute_url() + #l.setAttributeNS(None, "span class = 'hit highlight'", "background-color: #77DD77;") + pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) + hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) + l.setAttributeNS(None, "onClick", "id='highlighting'") + return serializeNode(pagenode) - return pagexml - #if len(pagedivs) > 0: - # pagenode = pagedom[0] - # return serializeNode(pagenode) - #else: - # return "xaxa" - + if (queryType=="fulltextMorph"): + pagedivs = pagedom.xpath("//div[@class='queryResult']") + + if len(pagedivs)>0: + pagenode=pagedivs[0] + links=pagenode.xpath("//a") + for l in links: + hrefNode = l.getAttributeNodeNS(None, u"href") + if hrefNode: + href = hrefNode.nodeValue + if href.startswith('page-fragment.xql'): + selfurl = self.absolute_url() + pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) + hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) + if href.startswith('../lt/lemma.xql'): + selfurl = self.absolute_url() + hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + return serializeNode(pagenode) + + if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): + pagedivs= pagedom.xpath("//div[@class='queryResultPage']") + if len(pagedivs)>0: + pagenode=pagedivs[0] + links=pagenode.xpath("//a") + for l in links: + hrefNode = l.getAttributeNodeNS(None, u"href") + if hrefNode: + href = hrefNode.nodeValue + hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn)) + + if href.startswith('../lt/lex.xql'): + # selfurl = self.absolute_url() + hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl) + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + if href.startswith('../lt/lemma.xql'): + #selfurl = self.absolute_url() + hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%selfurl) + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + return serializeNode(pagenode) + return "no text here" + def getNumPages(self,docinfo=None): """get list of pages from fulltext and put in docinfo""" xquery = '//pb' @@ -689,7 +782,7 @@ class documentViewer(Folder): docinfo['numPages'] = text.count(" 0: + pagenode = pagedivs[0] + links = pagenode.xpath("//a") + for l in links: + hrefNode = l.getAttributeNodeNS(None, u"href") + if hrefNode: + href= hrefNode.nodeValue + if href.startswith('#note-0006-01'): + selfurl = self.absolute_url() + hrefNode.nodeValue = href.replace("href='#note-'",'xaxa/%s/'%selfurl) return serializeNode(pagenode) - + if mode == "xml": + # first div contains text + pagedivs = pagedom.xpath("/div") + if len(pagedivs) > 0: + pagenode = pagedivs[0] + return serializeNode(pagenode) # text-with-links mode if mode == "text_dict": # first div contains text @@ -730,20 +838,58 @@ class documentViewer(Folder): # is pollux link selfurl = self.absolute_url() # change href - hrefNode.nodeValue = href.replace('lt/lex.xql','%s/head_main_voc'%selfurl) + hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl) # add target l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + + if href.startswith('lt/lemma.xql'): + selfurl = self.absolute_url() + hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') return serializeNode(pagenode) return "no text here" + def getTranslate(self, query=None, language=None): + """translate into another languages""" + pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) + return pagexml + + def getLemma(self, lemma=None, language=None): + """simular words lemma """ + pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) + #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","lemma=%s&language=%s"%(lemma,language),outputUnicode=False) + return pagexml + + def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): + """number of""" + docpath = docinfo['textURLPath'] + pagesize = pageinfo['queryPageSize'] + pn = pageinfo['searchPN'] + query =pageinfo['query'] + queryType =pageinfo['queryType'] + + tocSearch = 0 + tocDiv = None + pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) + + pagedom = Parse(pagexml) + numdivs = pagedom.xpath("//div[@class='queryResultHits']") + tocSearch = int(getTextFromNode(numdivs[0])) + tc=int((tocSearch/20)+1) + logging.debug("documentViewer (gettoc) tc: %s"%(tc)) + return tc + def getToc(self, mode="text", docinfo=None): """loads table of contents and stores in docinfo""" logging.debug("documentViewer (gettoc) mode: %s"%(mode)) if 'tocSize_%s'%mode in docinfo: # cached toc - return docinfo - + return docinfo docpath = docinfo['textURLPath'] # we need to set a result set size pagesize = 1000 @@ -779,18 +925,29 @@ class documentViewer(Folder): else: queryType = mode docpath = docinfo['textURLPath'] + path = docinfo['textURLPath'] + #logging.debug("documentViewer (gettoc) pathNomer: %s"%(pathNomer)) pagesize = pageinfo['tocPageSize'] pn = pageinfo['tocPN'] + url = docinfo['url'] + selfurl = self.absolute_url() + viewMode= pageinfo['viewMode'] + tocMode = pageinfo['tocMode'] + tocPN = pageinfo['tocPN'] + pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn), outputUnicode=False) + page = pagexml.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) + text = page.replace('mode=image','mode=texttool') + return text # post-processing downloaded xml - pagedom = Parse(pagexml) + #pagedom = Parse(text) # div contains text - pagedivs = pagedom.xpath("//div[@class='queryResultPage']") - if len(pagedivs) > 0: - pagenode = pagedivs[0] - return serializeNode(pagenode) - else: - return "No TOC!" + #pagedivs = pagedom.xpath("//div[@class='queryResultPage']") + #if len(pagedivs) > 0: + # pagenode = pagedivs[0] + # return serializeNode(pagenode) + #else: + # return "No TOC!" def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):