version 1.1.2.1, 2010/06/14 10:50:06
|
version 1.1.2.3, 2010/06/16 16:38:17
|
Line 21 import urllib
|
Line 21 import urllib
|
import logging |
import logging |
import math |
import math |
import documentViewer |
import documentViewer |
|
import urllib2 |
|
import urllib |
import urlparse |
import urlparse |
from types import * |
from types import * |
|
|
Line 53 class extraFunction(Folder):
|
Line 54 class extraFunction(Folder):
|
self.id=id |
self.id=id |
self.title=title |
self.title=title |
|
|
|
def getHttpData(self, url, data=None, num_tries=3, timeout=40): |
|
"""returns result from url+data HTTP request""" |
|
# we do GET (by appending data to url) |
|
if isinstance(data, str) or isinstance(data, unicode): |
|
# if data is string then append |
|
url = "%s?%s"%(url,data) |
|
else: |
|
# we assume its a dict |
|
url = "%s?%s"%(url,urllib.urlencode(data)) |
|
|
|
response = None |
|
errmsg = None |
|
for cnt in range(num_tries): |
|
try: |
|
logging.debug("getHttpData(%s) url=%s"%(cnt+1,url)) |
|
if sys.version_info < (2, 6): |
|
# set timeout on socket -- ugly :-( |
|
import socket |
|
socket.setdefaulttimeout(timeout) |
|
response = urllib2.urlopen(url) |
|
else: |
|
response = urllib2.urlopen(url,timeout=timeout) |
|
# check result? |
|
break |
|
except urllib2.HTTPError, e: |
|
logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) |
|
errmsg = str(e) |
|
# stop trying |
|
break |
|
except urllib2.URLError, e: |
|
logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) |
|
errmsg = str(e) |
|
# stop trying |
|
#break |
|
|
|
if response is not None: |
|
data = response.read() |
|
response.close() |
|
return data |
|
|
|
raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) |
|
#return None |
|
|
|
|
|
|
def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): |
def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): |
"""get search list""" |
"""get search list""" |
docpath = docinfo['textURLPath'] |
docpath = docinfo['textURLPath'] |
Line 69 class extraFunction(Folder):
|
Line 115 class extraFunction(Folder):
|
tocMode = pageinfo['tocMode'] |
tocMode = pageinfo['tocMode'] |
tocPN = pageinfo['tocPN'] |
tocPN = pageinfo['tocPN'] |
selfurl = self.absolute_url() |
selfurl = self.absolute_url() |
page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) |
|
pagexml = page.replace('?document=%s'%str(docpath),'?url=%s'%url) |
data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) |
|
#page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) |
|
#data = page.read() |
|
#page.close() |
|
|
|
pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) |
pagedom = Parse(pagexml) |
pagedom = Parse(pagexml) |
if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): |
if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): |
pagedivs = pagedom.xpath("//div[@class='queryResultPage']") |
pagedivs = pagedom.xpath("//div[@class='queryResultPage']") |
Line 132 class extraFunction(Folder):
|
Line 183 class extraFunction(Folder):
|
def getNumPages(self,docinfo=None): |
def getNumPages(self,docinfo=None): |
"""get list of pages from fulltext and put in docinfo""" |
"""get list of pages from fulltext and put in docinfo""" |
xquery = '//pb' |
xquery = '//pb' |
text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) |
text = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) |
|
#text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) |
docinfo['numPages'] = text.count("<pb ") |
docinfo['numPages'] = text.count("<pb ") |
return docinfo |
return docinfo |
|
|
Line 153 class extraFunction(Folder):
|
Line 205 class extraFunction(Folder):
|
textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) |
textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) |
if highlightQuery is not None: |
if highlightQuery is not None: |
textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) |
textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) |
pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False) |
|
|
pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam) |
|
"""pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)""" |
|
|
pagedom = Parse(pagexml) |
pagedom = Parse(pagexml) |
# plain text mode |
# plain text mode |
if mode == "text": |
if mode == "text": |
Line 216 class extraFunction(Folder):
|
Line 271 class extraFunction(Folder):
|
|
|
def getTranslate(self, query=None, language=None): |
def getTranslate(self, query=None, language=None): |
"""translate into another languages""" |
"""translate into another languages""" |
pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) |
data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) |
return pagexml |
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) |
|
#data = pagexml.read() |
|
#pagexml.close() |
|
return data |
|
|
def getLemma(self, lemma=None, language=None): |
def getLemma(self, lemma=None, language=None): |
"""simular words lemma """ |
"""simular words lemma """ |
pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) |
data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) |
return pagexml |
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) |
|
#data = pagexml.read() |
|
#pagexml.close() |
|
return data |
|
|
def getLemmaNew(self, query=None, language=None): |
def getLemmaNew(self, query=None, language=None): |
"""simular words lemma """ |
"""simular words lemma """ |
pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) |
data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) |
return pagexml |
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) |
|
#data = pagexml.read() |
|
#pagexml.close() |
|
return data |
|
|
def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): |
def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): |
"""number of""" |
"""number of""" |
Line 238 class extraFunction(Folder):
|
Line 302 class extraFunction(Folder):
|
queryType =pageinfo['queryType'] |
queryType =pageinfo['queryType'] |
tocSearch = 0 |
tocSearch = 0 |
tocDiv = None |
tocDiv = None |
pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) |
|
|
pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn)) |
|
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) |
pagedom = Parse(pagexml) |
pagedom = Parse(pagexml) |
numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
tocSearch = int(getTextFromNode(numdivs[0])) |
tocSearch = int(getTextFromNode(numdivs[0])) |
Line 266 class extraFunction(Folder):
|
Line 332 class extraFunction(Folder):
|
# number of entries in toc |
# number of entries in toc |
tocSize = 0 |
tocSize = 0 |
tocDiv = None |
tocDiv = None |
pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) |
|
|
pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
|
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) |
# post-processing downloaded xml |
# post-processing downloaded xml |
pagedom = Parse(pagexml) |
pagedom = Parse(pagexml) |
# get number of entries |
# get number of entries |
Line 292 class extraFunction(Folder):
|
Line 360 class extraFunction(Folder):
|
viewMode= pageinfo['viewMode'] |
viewMode= pageinfo['viewMode'] |
tocMode = pageinfo['tocMode'] |
tocMode = pageinfo['tocMode'] |
tocPN = pageinfo['tocPN'] |
tocPN = pageinfo['tocPN'] |
pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn), outputUnicode=False) |
|
page = pagexml.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) |
data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
|
#data = pagexml.read() |
|
#pagexml.close() |
|
|
|
page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) |
text = page.replace('mode=image','mode=texttool') |
text = page.replace('mode=image','mode=texttool') |
return text |
return text |
|
|