--- documentViewer/Attic/extraFunction.py 2010/06/16 09:16:02 1.1.2.2 +++ documentViewer/Attic/extraFunction.py 2010/06/16 16:38:17 1.1.2.3 @@ -53,6 +53,51 @@ class extraFunction(Folder): self.id=id self.title=title + + def getHttpData(self, url, data=None, num_tries=3, timeout=40): + """returns result from url+data HTTP request""" + # we do GET (by appending data to url) + if isinstance(data, str) or isinstance(data, unicode): + # if data is string then append + url = "%s?%s"%(url,data) + else: + # we assume its a dict + url = "%s?%s"%(url,urllib.urlencode(data)) + + response = None + errmsg = None + for cnt in range(num_tries): + try: + logging.debug("getHttpData(%s) url=%s"%(cnt+1,url)) + if sys.version_info < (2, 6): + # set timeout on socket -- ugly :-( + import socket + socket.setdefaulttimeout(timeout) + response = urllib2.urlopen(url) + else: + response = urllib2.urlopen(url,timeout=timeout) + # check result? + break + except urllib2.HTTPError, e: + logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) + errmsg = str(e) + # stop trying + break + except urllib2.URLError, e: + logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) + errmsg = str(e) + # stop trying + #break + + if response is not None: + data = response.read() + response.close() + return data + + raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) + #return None + + def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): """get search list""" @@ -71,10 +116,10 @@ class extraFunction(Folder): tocPN = pageinfo['tocPN'] selfurl = self.absolute_url() - page = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) + data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) - data = page.read() - page.close() + #data = page.read() + #page.close() pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) pagedom = Parse(pagexml) @@ -138,7 +183,7 @@ class extraFunction(Folder): def getNumPages(self,docinfo=None): """get list of pages from fulltext and put in docinfo""" xquery = '//pb' - text = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) + text = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) docinfo['numPages'] = text.count("