version 1.224, 2011/01/20 16:04:21
|
version 1.226, 2011/02/15 11:12:39
|
Line 85 class MpdlXmlTextServer(SimpleItem):
|
Line 85 class MpdlXmlTextServer(SimpleItem):
|
selfurl = self.absolute_url() |
selfurl = self.absolute_url() |
pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&optionToggle=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,optionToggle,characterNormalization)) |
pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&optionToggle=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,optionToggle,characterNormalization)) |
hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) |
hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) |
|
logging.debug("PUREXML :%s"%(serializeNode(pagenode))) |
return serializeNode(pagenode) |
return serializeNode(pagenode) |
if (queryType=="fulltextMorph"): |
if (queryType=="fulltextMorph"): |
pagedivs = pagedom.xpath("//div[@class='queryResult']") |
pagedivs = pagedom.xpath("//div[@class='queryResult']") |
Line 175 class MpdlXmlTextServer(SimpleItem):
|
Line 176 class MpdlXmlTextServer(SimpleItem):
|
return myList |
return myList |
|
|
|
|
def getTextPage(self, docinfo=None, pageinfo=None): |
def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None): |
"""returns single page from fulltext""" |
"""returns single page from fulltext""" |
docpath = docinfo['textURLPath'] |
docpath = docinfo['textURLPath'] |
path = docinfo['textURLPath'] |
path = docinfo['textURLPath'] |
Line 185 class MpdlXmlTextServer(SimpleItem):
|
Line 186 class MpdlXmlTextServer(SimpleItem):
|
viewMode= pageinfo['viewMode'] |
viewMode= pageinfo['viewMode'] |
sn = pageinfo['sn'] |
sn = pageinfo['sn'] |
highlightQuery = pageinfo['highlightQuery'] |
highlightQuery = pageinfo['highlightQuery'] |
mode = pageinfo ['viewMode'] |
#mode = pageinfo ['viewMode'] |
tocMode = pageinfo['tocMode'] |
tocMode = pageinfo['tocMode'] |
characterNormalization=pageinfo['characterNormalization'] |
characterNormalization=pageinfo['characterNormalization'] |
tocPN = pageinfo['tocPN'] |
tocPN = pageinfo['tocPN'] |
Line 209 class MpdlXmlTextServer(SimpleItem):
|
Line 210 class MpdlXmlTextServer(SimpleItem):
|
docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) |
docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) |
logging.debug("ORIGINAL PAGES: %s"%(docinfo['pageNumberOrig'])) |
logging.debug("ORIGINAL PAGES: %s"%(docinfo['pageNumberOrig'])) |
|
|
|
#original Pages Norm |
|
pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']") |
|
if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"): |
|
if len(pagedivs)>0: |
|
docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0]) |
|
logging.debug("ORIGINAL PAGES: %s"%(docinfo['pageNumberOrigNorm'])) |
|
|
|
|
#figureEntries |
#figureEntries |
pagedivs = dom.xpath("//div[@class='countFigureEntries']") |
pagedivs = dom.xpath("//div[@class='countFigureEntries']") |
if pagedivs == dom.xpath("//div[@class='countFigureEntries']"): |
if pagedivs == dom.xpath("//div[@class='countFigureEntries']"): |
Line 287 class MpdlXmlTextServer(SimpleItem):
|
Line 296 class MpdlXmlTextServer(SimpleItem):
|
docinfo['countPlaces'] = 0 |
docinfo['countPlaces'] = 0 |
docinfo['countTocEntries'] = 0 |
docinfo['countTocEntries'] = 0 |
docinfo['numPages'] = 0 |
docinfo['numPages'] = 0 |
|
docinfo['pageNumberOrigNorm'] = 0 |
#return docinfo |
#return docinfo |
|
|
# plain text mode |
# plain text mode |
Line 363 class MpdlXmlTextServer(SimpleItem):
|
Line 373 class MpdlXmlTextServer(SimpleItem):
|
return serializeNode(pagenode) |
return serializeNode(pagenode) |
return "no text here" |
return "no text here" |
|
|
|
def getText(self, mode="text", pn=1, docinfo=None, pageinfo=None): |
|
data = self.getTextPage(mode="text", pn=1 ,docinfo=docinfo, pageinfo=pageinfo) |
|
logging.debug("TEXT ALL data: %s"%(data)) |
|
return data |
|
|
|
def getXML(self, mode="xml", pn=1, docinfo=None, pageinfo=None): |
|
data = self.getTextPage(mode="xml", pn=1 ,docinfo=docinfo, pageinfo=pageinfo) |
|
logging.debug("TEXT ALL data: %s"%(data)) |
|
return data |
|
|
|
def getDictionary(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None): |
|
data = self.getTextPage(mode="text_dict", pn=1 ,docinfo=docinfo, pageinfo=pageinfo) |
|
logging.debug("TEXT ALL data: %s"%(data)) |
|
return data |
|
|
def getTranslate(self, query=None, language=None): |
def getTranslate(self, query=None, language=None): |
"""translate into another languages""" |
"""translate into another languages""" |
data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query)) |
data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query)) |
Line 455 class MpdlXmlTextServer(SimpleItem):
|
Line 480 class MpdlXmlTextServer(SimpleItem):
|
page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&optionToggle=1'%(selfurl,url, viewMode, tocMode, tocPN)) |
page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&optionToggle=1'%(selfurl,url, viewMode, tocMode, tocPN)) |
text = page.replace('mode=image','mode=texttool') |
text = page.replace('mode=image','mode=texttool') |
#logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) |
#logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization)) |
|
logging.debug("TEXT %s"%(text)) |
return text |
return text |
|
|
def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |
def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |