version 1.7, 2010/08/11 10:17:46
|
version 1.21, 2010/08/24 12:11:26
|
Line 1
|
Line 1
|
|
|
from OFS.SimpleItem import SimpleItem |
from OFS.SimpleItem import SimpleItem |
from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
|
|
from Ft.Xml import EMPTY_NAMESPACE, Parse |
from Ft.Xml import EMPTY_NAMESPACE, Parse |
|
|
import sys |
import sys |
Line 10 import urllib
|
Line 9 import urllib
|
import documentViewer |
import documentViewer |
from documentViewer import getTextFromNode, serializeNode |
from documentViewer import getTextFromNode, serializeNode |
|
|
|
|
class MpdlXmlTextServer(SimpleItem): |
class MpdlXmlTextServer(SimpleItem): |
"""TextServer implementation for MPDL-XML eXist server""" |
"""TextServer implementation for MPDL-XML eXist server""" |
meta_type="MPDL-XML TextServer" |
meta_type="MPDL-XML TextServer" |
Line 31 class MpdlXmlTextServer(SimpleItem):
|
Line 29 class MpdlXmlTextServer(SimpleItem):
|
else: |
else: |
self.serverUrl = "http://%s/mpdl/interface/"%serverName |
self.serverUrl = "http://%s/mpdl/interface/"%serverName |
|
|
|
|
def getHttpData(self, url, data=None): |
def getHttpData(self, url, data=None): |
"""returns result from url+data HTTP request""" |
"""returns result from url+data HTTP request""" |
return documentViewer.getHttpData(url,data,timeout=self.timeout) |
return documentViewer.getHttpData(url,data,timeout=self.timeout) |
|
|
|
|
def getServerData(self, method, data=None): |
def getServerData(self, method, data=None): |
"""returns result from text server for method+data""" |
"""returns result from text server for method+data""" |
url = self.serverUrl+method |
url = self.serverUrl+method |
return documentViewer.getHttpData(url,data,timeout=self.timeout) |
return documentViewer.getHttpData(url,data,timeout=self.timeout) |
|
|
|
|
def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): |
def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): |
"""get search list""" |
"""get search list""" |
docpath = docinfo['textURLPath'] |
docpath = docinfo['textURLPath'] |
Line 57 class MpdlXmlTextServer(SimpleItem):
|
Line 52 class MpdlXmlTextServer(SimpleItem):
|
queryType =pageinfo['queryType'] |
queryType =pageinfo['queryType'] |
viewMode= pageinfo['viewMode'] |
viewMode= pageinfo['viewMode'] |
tocMode = pageinfo['tocMode'] |
tocMode = pageinfo['tocMode'] |
|
#characterNormalization = pageinfo ['characterNormalization'] |
tocPN = pageinfo['tocPN'] |
tocPN = pageinfo['tocPN'] |
selfurl = self.absolute_url() |
selfurl = self.absolute_url() |
|
|
Line 134 class MpdlXmlTextServer(SimpleItem):
|
Line 130 class MpdlXmlTextServer(SimpleItem):
|
docinfo['numPages'] = text.count("<pb ") |
docinfo['numPages'] = text.count("<pb ") |
return docinfo |
return docinfo |
|
|
def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None): |
def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None, characterNormalization=None): |
"""returns single page from fulltext""" |
"""returns single page from fulltext""" |
docpath = docinfo['textURLPath'] |
docpath = docinfo['textURLPath'] |
path = docinfo['textURLPath'] |
path = docinfo['textURLPath'] |
url = docinfo['url'] |
url = docinfo['url'] |
viewMode= pageinfo['viewMode'] |
viewMode= pageinfo['viewMode'] |
tocMode = pageinfo['tocMode'] |
tocMode = pageinfo['tocMode'] |
|
characterNormalization=pageinfo['characterNormalization'] |
tocPN = pageinfo['tocPN'] |
tocPN = pageinfo['tocPN'] |
selfurl = self.absolute_url() |
selfurl = self.absolute_url() |
if mode == "text_dict": |
if mode == "text_dict": |
textmode = "textPollux" |
textmode = "textPollux" |
else: |
else: |
textmode = mode |
textmode = mode |
|
#logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) |
textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) |
textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) |
if highlightQuery is not None: |
if highlightQuery is not None: |
textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) |
textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) |
Line 168 class MpdlXmlTextServer(SimpleItem):
|
Line 165 class MpdlXmlTextServer(SimpleItem):
|
if hrefNode: |
if hrefNode: |
href= hrefNode.nodeValue |
href= hrefNode.nodeValue |
if href.startswith('#note-'): |
if href.startswith('#note-'): |
hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) |
hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s#note-"%(url,viewMode,tocMode,tocPN,pn,characterNormalization)) |
return serializeNode(pagenode) |
return serializeNode(pagenode) |
if mode == "xml": |
if mode == "xml": |
# first div contains text |
# first div contains text |
Line 305 class MpdlXmlTextServer(SimpleItem):
|
Line 302 class MpdlXmlTextServer(SimpleItem):
|
url = docinfo['url'] |
url = docinfo['url'] |
selfurl = self.absolute_url() |
selfurl = self.absolute_url() |
viewMode= pageinfo['viewMode'] |
viewMode= pageinfo['viewMode'] |
|
characterNormalization =pageinfo['characterNormalization'] |
tocMode = pageinfo['tocMode'] |
tocMode = pageinfo['tocMode'] |
tocPN = pageinfo['tocPN'] |
tocPN = pageinfo['tocPN'] |
|
|
data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
|
page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&characterNormalization=%s'%(selfurl,url, viewMode, tocMode, tocPN,characterNormalization)) |
page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) |
|
text = page.replace('mode=image','mode=texttool') |
text = page.replace('mode=image','mode=texttool') |
|
logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) |
|
#logging.debug("documentViewer (characterNormalization) text: %s"%(text)) |
return text |
return text |
|
|
def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |
def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |