Mercurial > hg > documentViewer
comparison SrvTxtUtils.py @ 620:a71ae589d342
use https for annotations and fix bug when logging httplib errors.
| author | casties |
|---|---|
| date | Thu, 11 Dec 2014 17:19:45 +0100 |
| parents | d16da6e739ef |
| children | a7b287122ce8 |
comparison
equal
deleted
inserted
replaced
| 619:79973dee60bd | 620:a71ae589d342 |
|---|---|
| 8 import stat | 8 import stat |
| 9 import urllib | 9 import urllib |
| 10 import logging | 10 import logging |
| 11 import time | 11 import time |
| 12 import re | 12 import re |
| 13 import string | |
| 14 import datetime | 13 import datetime |
| 15 try: | 14 try: |
| 16 import httplib2 | 15 import httplib2 |
| 17 httplib = 'httplib2' | 16 httplib = 'httplib2' |
| 18 except: | 17 except: |
| 20 import urllib2 | 19 import urllib2 |
| 21 httplib = 'urllib2' | 20 httplib = 'urllib2' |
| 22 | 21 |
| 23 import xml.etree.ElementTree as ET | 22 import xml.etree.ElementTree as ET |
| 24 | 23 |
| 25 srvTxtUtilsVersion = "1.12.2" | 24 srvTxtUtilsVersion = "1.12.3" |
| 26 | 25 |
| 27 map_months = {'en': [u"", | 26 map_months = {'en': [u"", |
| 28 u"January", | 27 u"January", |
| 29 u"February", | 28 u"February", |
| 30 u"March", | 29 u"March", |
| 31 u"April", | 30 u"April", |
| 32 u"May", | 31 u"May", |
| 33 u"June", | 32 u"June", |
| 34 u"July", | 33 u"July", |
| 35 u"August", | 34 u"August", |
| 36 u"September", | 35 u"September", |
| 37 u"October", | 36 u"October", |
| 38 u"November", | 37 u"November", |
| 39 u"December"], | 38 u"December"], |
| 40 'de': [u"", | 39 'de': [u"", |
| 41 u"Januar", | 40 u"Januar", |
| 42 u"Februar", | 41 u"Februar", |
| 43 u"M\u00e4rz", | 42 u"M\u00e4rz", |
| 44 u"April", | 43 u"April", |
| 45 u"Mai", | 44 u"Mai", |
| 46 u"Juni", | 45 u"Juni", |
| 47 u"Juli", | 46 u"Juli", |
| 48 u"August", | 47 u"August", |
| 49 u"September", | 48 u"September", |
| 50 u"Oktober", | 49 u"Oktober", |
| 51 u"November", | 50 u"November", |
| 52 u"Dezember"]} | 51 u"Dezember"]} |
| 52 | |
| 53 map_weekdays_short = {'en': [ | |
| 54 u"Mo", | |
| 55 u"Tu", | |
| 56 u"We", | |
| 57 u"Th", | |
| 58 u"Fr", | |
| 59 u"Sa", | |
| 60 u"Su", | |
| 61 ], | |
| 62 'de': [ | |
| 63 u"Mo", | |
| 64 u"Di", | |
| 65 u"Mi", | |
| 66 u"Do", | |
| 67 u"Fr", | |
| 68 u"Sa", | |
| 69 u"So", | |
| 70 ]} | |
| 71 | |
| 53 | 72 |
| 54 def getInt(number, default=0): | 73 def getInt(number, default=0): |
| 55 """returns always an int (0 in case of problems)""" | 74 """returns always an int (0 in case of problems)""" |
| 56 try: | 75 try: |
| 57 return int(number) | 76 return int(number) |
| 82 assumes string objects to be utf-8""" | 101 assumes string objects to be utf-8""" |
| 83 if not s: | 102 if not s: |
| 84 return "" | 103 return "" |
| 85 if isinstance(s, str): | 104 if isinstance(s, str): |
| 86 return s | 105 return s |
| 87 elif isinstance(s, unicode): | 106 else: |
| 88 return s.encode('utf-8') | 107 return s.encode('utf-8') |
| 89 else: | 108 |
| 90 return str(s) | 109 |
| 91 | 110 def getTextFromNode(node, recursive=False, length=0): |
| 92 def getText(node, recursive=0): | 111 """Return all text content of a (etree) node. |
| 93 """returns all text content of a (etree) node and its subnodes""" | 112 |
| 113 :param recursive: descend subnodes | |
| 114 | |
| 115 :returns: text string | |
| 116 """ | |
| 94 if node is None: | 117 if node is None: |
| 95 return '' | 118 return '' |
| 96 | 119 |
| 97 # ElementTree: | 120 # ElementTree: |
| 98 text = node.text or '' | 121 text = node.text or '' |
| 111 # if n.nodeType == node.TEXT_NODE: | 134 # if n.nodeType == node.TEXT_NODE: |
| 112 # text = text + n.data | 135 # text = text + n.data |
| 113 | 136 |
| 114 return text | 137 return text |
| 115 | 138 |
| 139 getText = getTextFromNode | |
| 140 | |
| 141 def getPlaintext(text, length=0, wordwrap=False, ignoretags=[]): | |
| 142 """Return plain text content by filtering out XML tags. | |
| 143 | |
| 144 :param text: string or etree node | |
| 145 :param length: length of text to return (0=all) | |
| 146 :param wordwrap: try not to break the last word (may return shorter string) | |
| 147 :returns: text string | |
| 148 """ | |
| 149 if text is None: | |
| 150 return '' | |
| 151 | |
| 152 try: | |
| 153 if isinstance(text, basestring): | |
| 154 xmltext = utf8ify("<div>%s</div>"%text) | |
| 155 dom = ET.fromstring(xmltext) | |
| 156 else: | |
| 157 dom = text | |
| 158 | |
| 159 plaintext = '' | |
| 160 for elem in dom.iter(): | |
| 161 if elem.tag in ignoretags: | |
| 162 # ignore tag | |
| 163 continue | |
| 164 | |
| 165 if elem.text: | |
| 166 plaintext += elem.text | |
| 167 if elem.tail: | |
| 168 plaintext += elem.tail | |
| 169 | |
| 170 if length > 0 and len(plaintext) > length: | |
| 171 break | |
| 172 | |
| 173 text = plaintext | |
| 174 | |
| 175 except Exception, e: | |
| 176 logging.warn("getPlaintext: error parsing text! Returning everything. %s"%e) | |
| 177 | |
| 178 if length > 0 and len(text) > length: | |
| 179 # try to not break words | |
| 180 if wordwrap and text[length] not in [' ', '.', '?', '!']: | |
| 181 # search the last blank | |
| 182 length = text.rfind(' ', 0, length) | |
| 183 | |
| 184 return text[:length] + '...' | |
| 185 | |
| 186 return text | |
| 187 | |
| 116 | 188 |
| 117 def serialize(node): | 189 def serialize(node): |
| 118 """returns a string containing an XML snippet of (etree) node""" | 190 """returns a string containing an XML snippet of (etree) node""" |
| 119 s = ET.tostring(node, 'UTF-8') | 191 s = ET.tostring(node, 'UTF-8') |
| 120 # snip off XML declaration | 192 # snip off XML declaration |
| 125 return s | 197 return s |
| 126 | 198 |
| 127 | 199 |
| 128 def getMonthName(mon, lang): | 200 def getMonthName(mon, lang): |
| 129 """returns the name of the month mon in the language lang""" | 201 """returns the name of the month mon in the language lang""" |
| 130 return map_months[lang][mon] | 202 return map_months[lang.lower()][mon] |
| 131 | 203 |
| 132 | 204 |
| 133 def getDateString(date=None, lang='en', withYear=True, monthNames=True, abbrev=False): | 205 def getWeekdayName(day, lang, short=True): |
| 206 """returns the name of the weekday day in the language lang""" | |
| 207 return map_weekdays_short[lang.lower()][day] | |
| 208 | |
| 209 | |
| 210 def getDateString(date=None, lang='en', short=False, withYear=True, monthNames=True, abbrev=False): | |
| 134 """Return formatted date string.""" | 211 """Return formatted date string.""" |
| 135 if date is None: | 212 if date is None: |
| 136 return None | 213 return None |
| 137 | 214 |
| 138 ds = None | 215 ds = None |
| 146 day = date.day | 223 day = date.day |
| 147 month = date.month | 224 month = date.month |
| 148 year = date.year | 225 year = date.year |
| 149 | 226 |
| 150 if lang.lower() == 'en': | 227 if lang.lower() == 'en': |
| 151 ds = "%s %s"%(getMonthName(month, lang), day) | 228 if short: |
| 152 if withYear: | 229 ds = "%s/%s/%s"%(year,month,day) |
| 153 ds += ", %s"%year | 230 else: |
| 231 ds = "%s %s"%(getMonthName(month, lang), day) | |
| 232 if withYear: | |
| 233 ds += ", %s"%year | |
| 154 | 234 |
| 155 elif lang.lower() == 'de': | 235 elif lang.lower() == 'de': |
| 156 ds = "%s. %s"%(day, getMonthName(month, lang)) | 236 if short: |
| 157 if withYear: | 237 ds = "%s.%s.%s"%(day,month,year) |
| 158 ds += " %s"%year | 238 else: |
| 239 ds = "%s. %s"%(day, getMonthName(month, lang)) | |
| 240 if withYear: | |
| 241 ds += " %s"%year | |
| 159 | 242 |
| 160 elif lang.lower() == 'iso': | 243 elif lang.lower() == 'iso': |
| 161 ds = date.isoformat() | 244 ds = date.isoformat() |
| 162 | 245 |
| 163 return ds | 246 return ds |
| 203 | 286 |
| 204 resp, data = h.request(url) | 287 resp, data = h.request(url) |
| 205 return data | 288 return data |
| 206 | 289 |
| 207 except httplib2.HttpLib2Error, e: | 290 except httplib2.HttpLib2Error, e: |
| 208 logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(e.code,e)) | 291 logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(getattr(e, 'code','?'),e)) |
| 209 errmsg = str(e) | 292 errmsg = str(e) |
| 210 # stop trying | 293 # stop trying |
| 211 break | 294 break |
| 212 | 295 |
| 213 else: | 296 else: |
| 283 return s.lower() | 366 return s.lower() |
| 284 | 367 |
| 285 return s | 368 return s |
| 286 | 369 |
| 287 | 370 |
| 288 def getBrowserType(self): | |
| 289 """(legacy) check the browsers request to find out the browser type""" | |
| 290 bt = {} | |
| 291 ua = self.REQUEST.get_header("HTTP_USER_AGENT") | |
| 292 bt['ua'] = ua | |
| 293 bt['isIE'] = False | |
| 294 bt['isN4'] = False | |
| 295 if string.find(ua, 'MSIE') > -1: | |
| 296 bt['isIE'] = True | |
| 297 else: | |
| 298 bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1) | |
| 299 | |
| 300 try: | |
| 301 nav = ua[string.find(ua, '('):] | |
| 302 ie = string.split(nav, "; ")[1] | |
| 303 if string.find(ie, "MSIE") > -1: | |
| 304 bt['versIE'] = string.split(ie, " ")[1] | |
| 305 except: pass | |
| 306 | |
| 307 bt['isMac'] = string.find(ua, 'Macintosh') > -1 | |
| 308 bt['isWin'] = string.find(ua, 'Windows') > -1 | |
| 309 bt['isIEWin'] = bt['isIE'] and bt['isWin'] | |
| 310 bt['isIEMac'] = bt['isIE'] and bt['isMac'] | |
| 311 bt['staticHTML'] = False | |
| 312 | |
| 313 return bt | |
| 314 |
