|
|
| version 1.1.2.2, 2010/06/16 09:16:02 | version 1.1.2.3, 2010/06/16 16:38:17 |
|---|---|
| Line 54 class extraFunction(Folder): | Line 54 class extraFunction(Folder): |
| self.id=id | self.id=id |
| self.title=title | self.title=title |
| def getHttpData(self, url, data=None, num_tries=3, timeout=40): | |
| """returns result from url+data HTTP request""" | |
| # we do GET (by appending data to url) | |
| if isinstance(data, str) or isinstance(data, unicode): | |
| # if data is string then append | |
| url = "%s?%s"%(url,data) | |
| else: | |
| # we assume its a dict | |
| url = "%s?%s"%(url,urllib.urlencode(data)) | |
| response = None | |
| errmsg = None | |
| for cnt in range(num_tries): | |
| try: | |
| logging.debug("getHttpData(%s) url=%s"%(cnt+1,url)) | |
| if sys.version_info < (2, 6): | |
| # set timeout on socket -- ugly :-( | |
| import socket | |
| socket.setdefaulttimeout(timeout) | |
| response = urllib2.urlopen(url) | |
| else: | |
| response = urllib2.urlopen(url,timeout=timeout) | |
| # check result? | |
| break | |
| except urllib2.HTTPError, e: | |
| logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) | |
| errmsg = str(e) | |
| # stop trying | |
| break | |
| except urllib2.URLError, e: | |
| logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) | |
| errmsg = str(e) | |
| # stop trying | |
| #break | |
| if response is not None: | |
| data = response.read() | |
| response.close() | |
| return data | |
| raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) | |
| #return None | |
| def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): | def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): |
| """get search list""" | """get search list""" |
| docpath = docinfo['textURLPath'] | docpath = docinfo['textURLPath'] |
| Line 71 class extraFunction(Folder): | Line 116 class extraFunction(Folder): |
| tocPN = pageinfo['tocPN'] | tocPN = pageinfo['tocPN'] |
| selfurl = self.absolute_url() | selfurl = self.absolute_url() |
| page = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) | data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) |
| #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) | #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) |
| data = page.read() | #data = page.read() |
| page.close() | #page.close() |
| pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) | pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) |
| pagedom = Parse(pagexml) | pagedom = Parse(pagexml) |
| Line 138 class extraFunction(Folder): | Line 183 class extraFunction(Folder): |
| def getNumPages(self,docinfo=None): | def getNumPages(self,docinfo=None): |
| """get list of pages from fulltext and put in docinfo""" | """get list of pages from fulltext and put in docinfo""" |
| xquery = '//pb' | xquery = '//pb' |
| text = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) | text = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) |
| #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) | #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) |
| docinfo['numPages'] = text.count("<pb ") | docinfo['numPages'] = text.count("<pb ") |
| return docinfo | return docinfo |
| Line 161 class extraFunction(Folder): | Line 206 class extraFunction(Folder): |
| if highlightQuery is not None: | if highlightQuery is not None: |
| textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) | textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) |
| pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam) | pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam) |
| """pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)""" | """pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)""" |
| pagedom = Parse(pagexml) | pagedom = Parse(pagexml) |
| Line 226 class extraFunction(Folder): | Line 271 class extraFunction(Folder): |
| def getTranslate(self, query=None, language=None): | def getTranslate(self, query=None, language=None): |
| """translate into another languages""" | """translate into another languages""" |
| pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) | data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) |
| #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) | #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) |
| data = pagexml.read() | #data = pagexml.read() |
| pagexml.close() | #pagexml.close() |
| return data | return data |
| def getLemma(self, lemma=None, language=None): | def getLemma(self, lemma=None, language=None): |
| """simular words lemma """ | """simular words lemma """ |
| pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) | data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) |
| #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) | #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) |
| data = pagexml.read() | #data = pagexml.read() |
| pagexml.close() | #pagexml.close() |
| return data | return data |
| def getLemmaNew(self, query=None, language=None): | def getLemmaNew(self, query=None, language=None): |
| """simular words lemma """ | """simular words lemma """ |
| data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) | |
| pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) | |
| #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) | #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) |
| data = pagexml.read() | #data = pagexml.read() |
| pagexml.close() | #pagexml.close() |
| return data | return data |
| def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): | def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): |
| Line 259 class extraFunction(Folder): | Line 303 class extraFunction(Folder): |
| tocSearch = 0 | tocSearch = 0 |
| tocDiv = None | tocDiv = None |
| pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn)) | pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn)) |
| #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) | #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) |
| pagedom = Parse(pagexml) | pagedom = Parse(pagexml) |
| numdivs = pagedom.xpath("//div[@class='queryResultHits']") | numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
| Line 289 class extraFunction(Folder): | Line 333 class extraFunction(Folder): |
| tocSize = 0 | tocSize = 0 |
| tocDiv = None | tocDiv = None |
| pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) | pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
| #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) | #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) |
| # post-processing downloaded xml | # post-processing downloaded xml |
| pagedom = Parse(pagexml) | pagedom = Parse(pagexml) |
| Line 317 class extraFunction(Folder): | Line 361 class extraFunction(Folder): |
| tocMode = pageinfo['tocMode'] | tocMode = pageinfo['tocMode'] |
| tocPN = pageinfo['tocPN'] | tocPN = pageinfo['tocPN'] |
| pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) | data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
| data = pagexml.read() | #data = pagexml.read() |
| pagexml.close() | #pagexml.close() |
| page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) | page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) |
| text = page.replace('mode=image','mode=texttool') | text = page.replace('mode=image','mode=texttool') |