--- documentViewer/Attic/extraFunction.py 2010/06/14 10:50:06 1.1.2.1 +++ documentViewer/Attic/extraFunction.py 2010/06/16 16:38:17 1.1.2.3 @@ -21,7 +21,8 @@ import urllib import logging import math import documentViewer - +import urllib2 +import urllib import urlparse from types import * @@ -52,6 +53,51 @@ class extraFunction(Folder): self.id=id self.title=title + + def getHttpData(self, url, data=None, num_tries=3, timeout=40): + """returns result from url+data HTTP request""" + # we do GET (by appending data to url) + if isinstance(data, str) or isinstance(data, unicode): + # if data is string then append + url = "%s?%s"%(url,data) + else: + # we assume its a dict + url = "%s?%s"%(url,urllib.urlencode(data)) + + response = None + errmsg = None + for cnt in range(num_tries): + try: + logging.debug("getHttpData(%s) url=%s"%(cnt+1,url)) + if sys.version_info < (2, 6): + # set timeout on socket -- ugly :-( + import socket + socket.setdefaulttimeout(timeout) + response = urllib2.urlopen(url) + else: + response = urllib2.urlopen(url,timeout=timeout) + # check result? + break + except urllib2.HTTPError, e: + logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) + errmsg = str(e) + # stop trying + break + except urllib2.URLError, e: + logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) + errmsg = str(e) + # stop trying + #break + + if response is not None: + data = response.read() + response.close() + return data + + raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) + #return None + + def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): """get search list""" @@ -69,8 +115,13 @@ class extraFunction(Folder): tocMode = pageinfo['tocMode'] tocPN = pageinfo['tocPN'] selfurl = self.absolute_url() - page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) - pagexml = page.replace('?document=%s'%str(docpath),'?url=%s'%url) + + data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) + #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) + #data = page.read() + #page.close() + + pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) pagedom = Parse(pagexml) if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): pagedivs = pagedom.xpath("//div[@class='queryResultPage']") @@ -132,7 +183,8 @@ class extraFunction(Folder): def getNumPages(self,docinfo=None): """get list of pages from fulltext and put in docinfo""" xquery = '//pb' - text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) + text = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) + #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) docinfo['numPages'] = text.count("