version 1.1.2.2, 2010/06/16 09:16:02
|
version 1.1.2.3, 2010/06/16 16:38:17
|
Line 54 class extraFunction(Folder):
|
Line 54 class extraFunction(Folder):
|
self.id=id |
self.id=id |
self.title=title |
self.title=title |
|
|
|
def getHttpData(self, url, data=None, num_tries=3, timeout=40): |
|
"""returns result from url+data HTTP request""" |
|
# we do GET (by appending data to url) |
|
if isinstance(data, str) or isinstance(data, unicode): |
|
# if data is string then append |
|
url = "%s?%s"%(url,data) |
|
else: |
|
# we assume its a dict |
|
url = "%s?%s"%(url,urllib.urlencode(data)) |
|
|
|
response = None |
|
errmsg = None |
|
for cnt in range(num_tries): |
|
try: |
|
logging.debug("getHttpData(%s) url=%s"%(cnt+1,url)) |
|
if sys.version_info < (2, 6): |
|
# set timeout on socket -- ugly :-( |
|
import socket |
|
socket.setdefaulttimeout(timeout) |
|
response = urllib2.urlopen(url) |
|
else: |
|
response = urllib2.urlopen(url,timeout=timeout) |
|
# check result? |
|
break |
|
except urllib2.HTTPError, e: |
|
logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) |
|
errmsg = str(e) |
|
# stop trying |
|
break |
|
except urllib2.URLError, e: |
|
logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) |
|
errmsg = str(e) |
|
# stop trying |
|
#break |
|
|
|
if response is not None: |
|
data = response.read() |
|
response.close() |
|
return data |
|
|
|
raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) |
|
#return None |
|
|
|
|
|
|
def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): |
def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): |
"""get search list""" |
"""get search list""" |
docpath = docinfo['textURLPath'] |
docpath = docinfo['textURLPath'] |
Line 71 class extraFunction(Folder):
|
Line 116 class extraFunction(Folder):
|
tocPN = pageinfo['tocPN'] |
tocPN = pageinfo['tocPN'] |
selfurl = self.absolute_url() |
selfurl = self.absolute_url() |
|
|
page = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) |
data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) |
#page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) |
#page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) |
data = page.read() |
#data = page.read() |
page.close() |
#page.close() |
|
|
pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) |
pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) |
pagedom = Parse(pagexml) |
pagedom = Parse(pagexml) |
Line 138 class extraFunction(Folder):
|
Line 183 class extraFunction(Folder):
|
def getNumPages(self,docinfo=None): |
def getNumPages(self,docinfo=None): |
"""get list of pages from fulltext and put in docinfo""" |
"""get list of pages from fulltext and put in docinfo""" |
xquery = '//pb' |
xquery = '//pb' |
text = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) |
text = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) |
#text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) |
#text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) |
docinfo['numPages'] = text.count("<pb ") |
docinfo['numPages'] = text.count("<pb ") |
return docinfo |
return docinfo |
Line 161 class extraFunction(Folder):
|
Line 206 class extraFunction(Folder):
|
if highlightQuery is not None: |
if highlightQuery is not None: |
textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) |
textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) |
|
|
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam) |
pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam) |
"""pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)""" |
"""pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)""" |
|
|
pagedom = Parse(pagexml) |
pagedom = Parse(pagexml) |
Line 226 class extraFunction(Folder):
|
Line 271 class extraFunction(Folder):
|
|
|
def getTranslate(self, query=None, language=None): |
def getTranslate(self, query=None, language=None): |
"""translate into another languages""" |
"""translate into another languages""" |
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) |
data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) |
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) |
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) |
data = pagexml.read() |
#data = pagexml.read() |
pagexml.close() |
#pagexml.close() |
return data |
return data |
|
|
def getLemma(self, lemma=None, language=None): |
def getLemma(self, lemma=None, language=None): |
"""simular words lemma """ |
"""simular words lemma """ |
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) |
data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) |
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) |
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) |
data = pagexml.read() |
#data = pagexml.read() |
pagexml.close() |
#pagexml.close() |
return data |
return data |
|
|
def getLemmaNew(self, query=None, language=None): |
def getLemmaNew(self, query=None, language=None): |
"""simular words lemma """ |
"""simular words lemma """ |
|
data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) |
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) |
|
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) |
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) |
data = pagexml.read() |
#data = pagexml.read() |
pagexml.close() |
#pagexml.close() |
return data |
return data |
|
|
def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): |
def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): |
Line 259 class extraFunction(Folder):
|
Line 303 class extraFunction(Folder):
|
tocSearch = 0 |
tocSearch = 0 |
tocDiv = None |
tocDiv = None |
|
|
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn)) |
pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn)) |
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) |
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) |
pagedom = Parse(pagexml) |
pagedom = Parse(pagexml) |
numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
Line 289 class extraFunction(Folder):
|
Line 333 class extraFunction(Folder):
|
tocSize = 0 |
tocSize = 0 |
tocDiv = None |
tocDiv = None |
|
|
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) |
#pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) |
# post-processing downloaded xml |
# post-processing downloaded xml |
pagedom = Parse(pagexml) |
pagedom = Parse(pagexml) |
Line 317 class extraFunction(Folder):
|
Line 361 class extraFunction(Folder):
|
tocMode = pageinfo['tocMode'] |
tocMode = pageinfo['tocMode'] |
tocPN = pageinfo['tocPN'] |
tocPN = pageinfo['tocPN'] |
|
|
pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
data = pagexml.read() |
#data = pagexml.read() |
pagexml.close() |
#pagexml.close() |
|
|
page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) |
page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) |
text = page.replace('mode=image','mode=texttool') |
text = page.replace('mode=image','mode=texttool') |