Mercurial > hg > documentViewer
comparison SrvTxtUtils.py @ 620:a71ae589d342
use https for annotations and fix bug when logging httplib errors.
author | casties |
---|---|
date | Thu, 11 Dec 2014 17:19:45 +0100 |
parents | d16da6e739ef |
children | a7b287122ce8 |
comparison
equal
deleted
inserted
replaced
619:79973dee60bd | 620:a71ae589d342 |
---|---|
8 import stat | 8 import stat |
9 import urllib | 9 import urllib |
10 import logging | 10 import logging |
11 import time | 11 import time |
12 import re | 12 import re |
13 import string | |
14 import datetime | 13 import datetime |
15 try: | 14 try: |
16 import httplib2 | 15 import httplib2 |
17 httplib = 'httplib2' | 16 httplib = 'httplib2' |
18 except: | 17 except: |
20 import urllib2 | 19 import urllib2 |
21 httplib = 'urllib2' | 20 httplib = 'urllib2' |
22 | 21 |
23 import xml.etree.ElementTree as ET | 22 import xml.etree.ElementTree as ET |
24 | 23 |
25 srvTxtUtilsVersion = "1.12.2" | 24 srvTxtUtilsVersion = "1.12.3" |
26 | 25 |
27 map_months = {'en': [u"", | 26 map_months = {'en': [u"", |
28 u"January", | 27 u"January", |
29 u"February", | 28 u"February", |
30 u"March", | 29 u"March", |
31 u"April", | 30 u"April", |
32 u"May", | 31 u"May", |
33 u"June", | 32 u"June", |
34 u"July", | 33 u"July", |
35 u"August", | 34 u"August", |
36 u"September", | 35 u"September", |
37 u"October", | 36 u"October", |
38 u"November", | 37 u"November", |
39 u"December"], | 38 u"December"], |
40 'de': [u"", | 39 'de': [u"", |
41 u"Januar", | 40 u"Januar", |
42 u"Februar", | 41 u"Februar", |
43 u"M\u00e4rz", | 42 u"M\u00e4rz", |
44 u"April", | 43 u"April", |
45 u"Mai", | 44 u"Mai", |
46 u"Juni", | 45 u"Juni", |
47 u"Juli", | 46 u"Juli", |
48 u"August", | 47 u"August", |
49 u"September", | 48 u"September", |
50 u"Oktober", | 49 u"Oktober", |
51 u"November", | 50 u"November", |
52 u"Dezember"]} | 51 u"Dezember"]} |
52 | |
53 map_weekdays_short = {'en': [ | |
54 u"Mo", | |
55 u"Tu", | |
56 u"We", | |
57 u"Th", | |
58 u"Fr", | |
59 u"Sa", | |
60 u"Su", | |
61 ], | |
62 'de': [ | |
63 u"Mo", | |
64 u"Di", | |
65 u"Mi", | |
66 u"Do", | |
67 u"Fr", | |
68 u"Sa", | |
69 u"So", | |
70 ]} | |
71 | |
53 | 72 |
54 def getInt(number, default=0): | 73 def getInt(number, default=0): |
55 """returns always an int (0 in case of problems)""" | 74 """returns always an int (0 in case of problems)""" |
56 try: | 75 try: |
57 return int(number) | 76 return int(number) |
82 assumes string objects to be utf-8""" | 101 assumes string objects to be utf-8""" |
83 if not s: | 102 if not s: |
84 return "" | 103 return "" |
85 if isinstance(s, str): | 104 if isinstance(s, str): |
86 return s | 105 return s |
87 elif isinstance(s, unicode): | 106 else: |
88 return s.encode('utf-8') | 107 return s.encode('utf-8') |
89 else: | 108 |
90 return str(s) | 109 |
91 | 110 def getTextFromNode(node, recursive=False, length=0): |
92 def getText(node, recursive=0): | 111 """Return all text content of a (etree) node. |
93 """returns all text content of a (etree) node and its subnodes""" | 112 |
113 :param recursive: descend subnodes | |
114 | |
115 :returns: text string | |
116 """ | |
94 if node is None: | 117 if node is None: |
95 return '' | 118 return '' |
96 | 119 |
97 # ElementTree: | 120 # ElementTree: |
98 text = node.text or '' | 121 text = node.text or '' |
111 # if n.nodeType == node.TEXT_NODE: | 134 # if n.nodeType == node.TEXT_NODE: |
112 # text = text + n.data | 135 # text = text + n.data |
113 | 136 |
114 return text | 137 return text |
115 | 138 |
139 getText = getTextFromNode | |
140 | |
141 def getPlaintext(text, length=0, wordwrap=False, ignoretags=[]): | |
142 """Return plain text content by filtering out XML tags. | |
143 | |
144 :param text: string or etree node | |
145 :param length: length of text to return (0=all) | |
146 :param wordwrap: try not to break the last word (may return shorter string) | |
147 :returns: text string | |
148 """ | |
149 if text is None: | |
150 return '' | |
151 | |
152 try: | |
153 if isinstance(text, basestring): | |
154 xmltext = utf8ify("<div>%s</div>"%text) | |
155 dom = ET.fromstring(xmltext) | |
156 else: | |
157 dom = text | |
158 | |
159 plaintext = '' | |
160 for elem in dom.iter(): | |
161 if elem.tag in ignoretags: | |
162 # ignore tag | |
163 continue | |
164 | |
165 if elem.text: | |
166 plaintext += elem.text | |
167 if elem.tail: | |
168 plaintext += elem.tail | |
169 | |
170 if length > 0 and len(plaintext) > length: | |
171 break | |
172 | |
173 text = plaintext | |
174 | |
175 except Exception, e: | |
176 logging.warn("getPlaintext: error parsing text! Returning everything. %s"%e) | |
177 | |
178 if length > 0 and len(text) > length: | |
179 # try to not break words | |
180 if wordwrap and text[length] not in [' ', '.', '?', '!']: | |
181 # search the last blank | |
182 length = text.rfind(' ', 0, length) | |
183 | |
184 return text[:length] + '...' | |
185 | |
186 return text | |
187 | |
116 | 188 |
117 def serialize(node): | 189 def serialize(node): |
118 """returns a string containing an XML snippet of (etree) node""" | 190 """returns a string containing an XML snippet of (etree) node""" |
119 s = ET.tostring(node, 'UTF-8') | 191 s = ET.tostring(node, 'UTF-8') |
120 # snip off XML declaration | 192 # snip off XML declaration |
125 return s | 197 return s |
126 | 198 |
127 | 199 |
128 def getMonthName(mon, lang): | 200 def getMonthName(mon, lang): |
129 """returns the name of the month mon in the language lang""" | 201 """returns the name of the month mon in the language lang""" |
130 return map_months[lang][mon] | 202 return map_months[lang.lower()][mon] |
131 | 203 |
132 | 204 |
133 def getDateString(date=None, lang='en', withYear=True, monthNames=True, abbrev=False): | 205 def getWeekdayName(day, lang, short=True): |
206 """returns the name of the weekday day in the language lang""" | |
207 return map_weekdays_short[lang.lower()][day] | |
208 | |
209 | |
210 def getDateString(date=None, lang='en', short=False, withYear=True, monthNames=True, abbrev=False): | |
134 """Return formatted date string.""" | 211 """Return formatted date string.""" |
135 if date is None: | 212 if date is None: |
136 return None | 213 return None |
137 | 214 |
138 ds = None | 215 ds = None |
146 day = date.day | 223 day = date.day |
147 month = date.month | 224 month = date.month |
148 year = date.year | 225 year = date.year |
149 | 226 |
150 if lang.lower() == 'en': | 227 if lang.lower() == 'en': |
151 ds = "%s %s"%(getMonthName(month, lang), day) | 228 if short: |
152 if withYear: | 229 ds = "%s/%s/%s"%(year,month,day) |
153 ds += ", %s"%year | 230 else: |
231 ds = "%s %s"%(getMonthName(month, lang), day) | |
232 if withYear: | |
233 ds += ", %s"%year | |
154 | 234 |
155 elif lang.lower() == 'de': | 235 elif lang.lower() == 'de': |
156 ds = "%s. %s"%(day, getMonthName(month, lang)) | 236 if short: |
157 if withYear: | 237 ds = "%s.%s.%s"%(day,month,year) |
158 ds += " %s"%year | 238 else: |
239 ds = "%s. %s"%(day, getMonthName(month, lang)) | |
240 if withYear: | |
241 ds += " %s"%year | |
159 | 242 |
160 elif lang.lower() == 'iso': | 243 elif lang.lower() == 'iso': |
161 ds = date.isoformat() | 244 ds = date.isoformat() |
162 | 245 |
163 return ds | 246 return ds |
203 | 286 |
204 resp, data = h.request(url) | 287 resp, data = h.request(url) |
205 return data | 288 return data |
206 | 289 |
207 except httplib2.HttpLib2Error, e: | 290 except httplib2.HttpLib2Error, e: |
208 logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(e.code,e)) | 291 logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(getattr(e, 'code','?'),e)) |
209 errmsg = str(e) | 292 errmsg = str(e) |
210 # stop trying | 293 # stop trying |
211 break | 294 break |
212 | 295 |
213 else: | 296 else: |
283 return s.lower() | 366 return s.lower() |
284 | 367 |
285 return s | 368 return s |
286 | 369 |
287 | 370 |
288 def getBrowserType(self): | |
289 """(legacy) check the browsers request to find out the browser type""" | |
290 bt = {} | |
291 ua = self.REQUEST.get_header("HTTP_USER_AGENT") | |
292 bt['ua'] = ua | |
293 bt['isIE'] = False | |
294 bt['isN4'] = False | |
295 if string.find(ua, 'MSIE') > -1: | |
296 bt['isIE'] = True | |
297 else: | |
298 bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1) | |
299 | |
300 try: | |
301 nav = ua[string.find(ua, '('):] | |
302 ie = string.split(nav, "; ")[1] | |
303 if string.find(ie, "MSIE") > -1: | |
304 bt['versIE'] = string.split(ie, " ")[1] | |
305 except: pass | |
306 | |
307 bt['isMac'] = string.find(ua, 'Macintosh') > -1 | |
308 bt['isWin'] = string.find(ua, 'Windows') > -1 | |
309 bt['isIEWin'] = bt['isIE'] and bt['isWin'] | |
310 bt['isIEMac'] = bt['isIE'] and bt['isMac'] | |
311 bt['staticHTML'] = False | |
312 | |
313 return bt | |
314 |