| 1 | """Utility methods for handling XML, reading HTTP, etc""" |
|---|
| 2 | |
|---|
| 3 | from App.ImageFile import ImageFile |
|---|
| 4 | from App.Common import rfc1123_date |
|---|
| 5 | |
|---|
| 6 | import sys |
|---|
| 7 | import os |
|---|
| 8 | import stat |
|---|
| 9 | import urllib |
|---|
| 10 | import urllib2 |
|---|
| 11 | import logging |
|---|
| 12 | |
|---|
| 13 | |
|---|
| 14 | HocrTxtUtilsVersion = "0.1" |
|---|
| 15 | |
|---|
| 16 | def getInt(number, default=0): |
|---|
| 17 | """returns always an int (0 in case of problems)""" |
|---|
| 18 | try: |
|---|
| 19 | return int(number) |
|---|
| 20 | except: |
|---|
| 21 | return int(default) |
|---|
| 22 | |
|---|
| 23 | def getAt(array, idx, default=None): |
|---|
| 24 | """returns element idx from array or default (in case of problems)""" |
|---|
| 25 | try: |
|---|
| 26 | return array[idx] |
|---|
| 27 | except: |
|---|
| 28 | return default |
|---|
| 29 | |
|---|
| 30 | def unicodify(s): |
|---|
| 31 | """decode str (utf-8 or latin-1 representation) into unicode object""" |
|---|
| 32 | if not s: |
|---|
| 33 | return u"" |
|---|
| 34 | if isinstance(s, str): |
|---|
| 35 | try: |
|---|
| 36 | return s.decode('utf-8') |
|---|
| 37 | except: |
|---|
| 38 | return s.decode('latin-1') |
|---|
| 39 | else: |
|---|
| 40 | return unicode(s) |
|---|
| 41 | |
|---|
| 42 | def utf8ify(s): |
|---|
| 43 | """encode unicode object or string into byte string in utf-8 representation. |
|---|
| 44 | assumes string objects to be utf-8""" |
|---|
| 45 | if not s: |
|---|
| 46 | return "" |
|---|
| 47 | if isinstance(s, unicode): |
|---|
| 48 | return s.encode('utf-8') |
|---|
| 49 | else: |
|---|
| 50 | return str(s) |
|---|
| 51 | |
|---|
| 52 | def getText(node, recursive=0): |
|---|
| 53 | """returns all text content of a node and its subnodes""" |
|---|
| 54 | if node is None: |
|---|
| 55 | return '' |
|---|
| 56 | |
|---|
| 57 | # ElementTree: |
|---|
| 58 | text = node.text or '' |
|---|
| 59 | for e in node: |
|---|
| 60 | if recursive: |
|---|
| 61 | text += getText(e) |
|---|
| 62 | else: |
|---|
| 63 | text += e.text or '' |
|---|
| 64 | if e.tail: |
|---|
| 65 | text += e.tail |
|---|
| 66 | |
|---|
| 67 | # 4Suite: |
|---|
| 68 | #nodelist=node.childNodes |
|---|
| 69 | #text = "" |
|---|
| 70 | #for n in nodelist: |
|---|
| 71 | # if n.nodeType == node.TEXT_NODE: |
|---|
| 72 | # text = text + n.data |
|---|
| 73 | |
|---|
| 74 | return text |
|---|
| 75 | |
|---|
| 76 | |
|---|
| 77 | |
|---|
| 78 | def getHttpData(url, pn=1,data=None, num_tries=3, timeout=10, noExceptions=False): |
|---|
| 79 | """returns result from url+data HTTP request""" |
|---|
| 80 | # we do GET (by appending data to url) |
|---|
| 81 | if isinstance(data, str) or isinstance(data, unicode): |
|---|
| 82 | # if data is string then append |
|---|
| 83 | url = "%s?pn=%s&%s"%(url,pn,data) |
|---|
| 84 | elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple): |
|---|
| 85 | # urlencode |
|---|
| 86 | url = "%s?pn=%s&%s"%(url,pn,urllib.urlencode(data)) |
|---|
| 87 | |
|---|
| 88 | response = None |
|---|
| 89 | errmsg = None |
|---|
| 90 | for cnt in range(num_tries): |
|---|
| 91 | try: |
|---|
| 92 | logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) |
|---|
| 93 | if sys.version_info < (2, 6): |
|---|
| 94 | # set timeout on socket -- ugly :-( |
|---|
| 95 | import socket |
|---|
| 96 | socket.setdefaulttimeout(float(timeout)) |
|---|
| 97 | response = urllib2.urlopen(url) |
|---|
| 98 | else: |
|---|
| 99 | # timeout as parameter |
|---|
| 100 | response = urllib2.urlopen(url,timeout=float(timeout)) |
|---|
| 101 | # check result? |
|---|
| 102 | break |
|---|
| 103 | except urllib2.HTTPError, e: |
|---|
| 104 | logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) |
|---|
| 105 | errmsg = str(e) |
|---|
| 106 | # stop trying |
|---|
| 107 | break |
|---|
| 108 | except urllib2.URLError, e: |
|---|
| 109 | logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) |
|---|
| 110 | errmsg = str(e) |
|---|
| 111 | # stop trying |
|---|
| 112 | #break |
|---|
| 113 | |
|---|
| 114 | if response is not None: |
|---|
| 115 | data = response.read() |
|---|
| 116 | response.close() |
|---|
| 117 | return data |
|---|
| 118 | |
|---|
| 119 | if noExceptions: |
|---|
| 120 | return None |
|---|
| 121 | |
|---|
| 122 | raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) |
|---|
| 123 | #return None |
|---|
| 124 | |
|---|
| 125 | |
|---|
| 126 | def refreshingImageFileIndexHtml(self, REQUEST, RESPONSE): |
|---|
| 127 | """index_html method for App.ImageFile that updates the file info for each request.""" |
|---|
| 128 | stat_info = os.stat(self.path) |
|---|
| 129 | self.size = stat_info[stat.ST_SIZE] |
|---|
| 130 | self.lmt = float(stat_info[stat.ST_MTIME]) or time.time() |
|---|
| 131 | self.lmh = rfc1123_date(self.lmt) |
|---|
| 132 | # call original method |
|---|
| 133 | return ImageFile.index_html(self, REQUEST, RESPONSE) |
|---|
| 134 | |
|---|
| 135 | |
|---|
| 136 | def getBrowserType(self): |
|---|
| 137 | """check the browsers request to find out the browser type""" |
|---|
| 138 | bt = {} |
|---|
| 139 | ua = self.REQUEST.get_header("HTTP_USER_AGENT") |
|---|
| 140 | bt['ua'] = ua |
|---|
| 141 | bt['isIE'] = False |
|---|
| 142 | bt['isN4'] = False |
|---|
| 143 | if string.find(ua, 'MSIE') > -1: |
|---|
| 144 | bt['isIE'] = True |
|---|
| 145 | else: |
|---|
| 146 | bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1) |
|---|
| 147 | |
|---|
| 148 | try: |
|---|
| 149 | nav = ua[string.find(ua, '('):] |
|---|
| 150 | ie = string.split(nav, "; ")[1] |
|---|
| 151 | if string.find(ie, "MSIE") > -1: |
|---|
| 152 | bt['versIE'] = string.split(ie, " ")[1] |
|---|
| 153 | except: pass |
|---|
| 154 | |
|---|
| 155 | bt['isMac'] = string.find(ua, 'Macintosh') > -1 |
|---|
| 156 | bt['isWin'] = string.find(ua, 'Windows') > -1 |
|---|
| 157 | bt['isIEWin'] = bt['isIE'] and bt['isWin'] |
|---|
| 158 | bt['isIEMac'] = bt['isIE'] and bt['isMac'] |
|---|
| 159 | bt['staticHTML'] = False |
|---|
| 160 | |
|---|
| 161 | return bt |
|---|
| 162 | |
|---|
| 163 | |
|---|