version 1.6, 2010/07/15 13:16:52
|
version 1.22, 2010/08/24 12:34:32
|
Line 1
|
Line 1
|
|
|
from OFS.SimpleItem import SimpleItem |
from OFS.SimpleItem import SimpleItem |
from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
|
|
from Ft.Xml import EMPTY_NAMESPACE, Parse |
from Ft.Xml import EMPTY_NAMESPACE, Parse |
|
|
import sys |
import sys |
Line 10 import urllib
|
Line 9 import urllib
|
import documentViewer |
import documentViewer |
from documentViewer import getTextFromNode, serializeNode |
from documentViewer import getTextFromNode, serializeNode |
|
|
|
|
class MpdlXmlTextServer(SimpleItem): |
class MpdlXmlTextServer(SimpleItem): |
"""TextServer implementation for MPDL-XML eXist server""" |
"""TextServer implementation for MPDL-XML eXist server""" |
meta_type="MPDL-XML TextServer" |
meta_type="MPDL-XML TextServer" |
Line 31 class MpdlXmlTextServer(SimpleItem):
|
Line 29 class MpdlXmlTextServer(SimpleItem):
|
else: |
else: |
self.serverUrl = "http://%s/mpdl/interface/"%serverName |
self.serverUrl = "http://%s/mpdl/interface/"%serverName |
|
|
|
|
def getHttpData(self, url, data=None): |
def getHttpData(self, url, data=None): |
"""returns result from url+data HTTP request""" |
"""returns result from url+data HTTP request""" |
return documentViewer.getHttpData(url,data,timeout=self.timeout) |
return documentViewer.getHttpData(url,data,timeout=self.timeout) |
|
|
|
|
def getServerData(self, method, data=None): |
def getServerData(self, method, data=None): |
"""returns result from text server for method+data""" |
"""returns result from text server for method+data""" |
url = self.serverUrl+method |
url = self.serverUrl+method |
return documentViewer.getHttpData(url,data,timeout=self.timeout) |
return documentViewer.getHttpData(url,data,timeout=self.timeout) |
|
|
|
|
def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): |
def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): |
"""get search list""" |
"""get search list""" |
docpath = docinfo['textURLPath'] |
docpath = docinfo['textURLPath'] |
url = docinfo['url'] |
url = docinfo['url'] |
logging.debug("documentViewer (gettoc) docpath: %s"%(docpath)) |
#logging.debug("documentViewer (gettoc) docpath: %s"%(docpath)) |
logging.debug("documentViewer (gettoc) url: %s"%(url)) |
#logging.debug("documentViewer (gettoc) url: %s"%(url)) |
pagesize = pageinfo['queryPageSize'] |
pagesize = pageinfo['queryPageSize'] |
pn = pageinfo['searchPN'] |
pn = pageinfo['searchPN'] |
sn = pageinfo['sn'] |
sn = pageinfo['sn'] |
Line 57 class MpdlXmlTextServer(SimpleItem):
|
Line 52 class MpdlXmlTextServer(SimpleItem):
|
queryType =pageinfo['queryType'] |
queryType =pageinfo['queryType'] |
viewMode= pageinfo['viewMode'] |
viewMode= pageinfo['viewMode'] |
tocMode = pageinfo['tocMode'] |
tocMode = pageinfo['tocMode'] |
|
#characterNormalization = pageinfo ['characterNormalization'] |
tocPN = pageinfo['tocPN'] |
tocPN = pageinfo['tocPN'] |
selfurl = self.absolute_url() |
selfurl = self.absolute_url() |
|
|
Line 134 class MpdlXmlTextServer(SimpleItem):
|
Line 130 class MpdlXmlTextServer(SimpleItem):
|
docinfo['numPages'] = text.count("<pb ") |
docinfo['numPages'] = text.count("<pb ") |
return docinfo |
return docinfo |
|
|
def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None): |
def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None, characterNormalization=None): |
"""returns single page from fulltext""" |
"""returns single page from fulltext""" |
docpath = docinfo['textURLPath'] |
docpath = docinfo['textURLPath'] |
path = docinfo['textURLPath'] |
path = docinfo['textURLPath'] |
url = docinfo['url'] |
url = docinfo['url'] |
viewMode= pageinfo['viewMode'] |
viewMode= pageinfo['viewMode'] |
tocMode = pageinfo['tocMode'] |
tocMode = pageinfo['tocMode'] |
|
characterNormalization=pageinfo['characterNormalization'] |
tocPN = pageinfo['tocPN'] |
tocPN = pageinfo['tocPN'] |
selfurl = self.absolute_url() |
selfurl = self.absolute_url() |
if mode == "text_dict": |
if mode == "text_dict": |
textmode = "textPollux" |
textmode = "textPollux" |
else: |
else: |
textmode = mode |
textmode = mode |
|
#logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) |
textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) |
textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) |
if highlightQuery is not None: |
if highlightQuery is not None: |
textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) |
textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) |
Line 168 class MpdlXmlTextServer(SimpleItem):
|
Line 165 class MpdlXmlTextServer(SimpleItem):
|
if hrefNode: |
if hrefNode: |
href= hrefNode.nodeValue |
href= hrefNode.nodeValue |
if href.startswith('#note-'): |
if href.startswith('#note-'): |
hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) |
hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s#note-"%(url,viewMode,tocMode,tocPN,pn,characterNormalization)) |
return serializeNode(pagenode) |
return serializeNode(pagenode) |
if mode == "xml": |
if mode == "xml": |
# first div contains text |
# first div contains text |
Line 176 class MpdlXmlTextServer(SimpleItem):
|
Line 173 class MpdlXmlTextServer(SimpleItem):
|
if len(pagedivs) > 0: |
if len(pagedivs) > 0: |
pagenode = pagedivs[0] |
pagenode = pagedivs[0] |
return serializeNode(pagenode) |
return serializeNode(pagenode) |
|
if mode == "gis": |
|
# first div contains text |
|
pagedivs = pagedom.xpath("/div") |
|
if len(pagedivs) > 0: |
|
pagenode = pagedivs[0] |
|
return serializeNode(pagenode) |
|
|
if mode == "pureXml": |
if mode == "pureXml": |
# first div contains text |
# first div contains text |
pagedivs = pagedom.xpath("/div") |
pagedivs = pagedom.xpath("/div") |
Line 249 class MpdlXmlTextServer(SimpleItem):
|
Line 253 class MpdlXmlTextServer(SimpleItem):
|
numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
tocSearch = int(getTextFromNode(numdivs[0])) |
tocSearch = int(getTextFromNode(numdivs[0])) |
tc=int((tocSearch/10)+1) |
tc=int((tocSearch/10)+1) |
logging.debug("documentViewer (gettoc) tc: %s"%(tc)) |
#logging.debug("documentViewer (gettoc) tc: %s"%(tc)) |
return tc |
return tc |
|
|
def getToc(self, mode="text", docinfo=None): |
def getToc(self, mode="text", docinfo=None): |
"""loads table of contents and stores in docinfo""" |
"""loads table of contents and stores in docinfo""" |
logging.debug("documentViewer (gettoc) mode: %s"%(mode)) |
#logging.debug("documentViewer (gettoc) mode: %s"%(mode)) |
if mode == "none": |
if mode == "none": |
return docinfo |
return docinfo |
if 'tocSize_%s'%mode in docinfo: |
if 'tocSize_%s'%mode in docinfo: |
Line 298 class MpdlXmlTextServer(SimpleItem):
|
Line 302 class MpdlXmlTextServer(SimpleItem):
|
url = docinfo['url'] |
url = docinfo['url'] |
selfurl = self.absolute_url() |
selfurl = self.absolute_url() |
viewMode= pageinfo['viewMode'] |
viewMode= pageinfo['viewMode'] |
|
characterNormalization =pageinfo['characterNormalization'] |
tocMode = pageinfo['tocMode'] |
tocMode = pageinfo['tocMode'] |
tocPN = pageinfo['tocPN'] |
tocPN = pageinfo['tocPN'] |
|
|
data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
|
page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&characterNormalization=%s'%(selfurl,url, viewMode, tocMode, tocPN,characterNormalization)) |
page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) |
|
text = page.replace('mode=image','mode=texttool') |
text = page.replace('mode=image','mode=texttool') |
|
logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) |
|
#logging.debug("documentViewer (characterNormalization) text: %s"%(text)) |
return text |
return text |
|
|
def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |
def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |