comparison SrvTxtUtils.py @ 620:a71ae589d342

use https for annotations and fix bug when logging httplib errors.
author casties
date Thu, 11 Dec 2014 17:19:45 +0100
parents d16da6e739ef
children a7b287122ce8
comparison
equal deleted inserted replaced
619:79973dee60bd 620:a71ae589d342
8 import stat 8 import stat
9 import urllib 9 import urllib
10 import logging 10 import logging
11 import time 11 import time
12 import re 12 import re
13 import string
14 import datetime 13 import datetime
15 try: 14 try:
16 import httplib2 15 import httplib2
17 httplib = 'httplib2' 16 httplib = 'httplib2'
18 except: 17 except:
20 import urllib2 19 import urllib2
21 httplib = 'urllib2' 20 httplib = 'urllib2'
22 21
23 import xml.etree.ElementTree as ET 22 import xml.etree.ElementTree as ET
24 23
25 srvTxtUtilsVersion = "1.12.2" 24 srvTxtUtilsVersion = "1.12.3"
26 25
27 map_months = {'en': [u"", 26 map_months = {'en': [u"",
28 u"January", 27 u"January",
29 u"February", 28 u"February",
30 u"March", 29 u"March",
31 u"April", 30 u"April",
32 u"May", 31 u"May",
33 u"June", 32 u"June",
34 u"July", 33 u"July",
35 u"August", 34 u"August",
36 u"September", 35 u"September",
37 u"October", 36 u"October",
38 u"November", 37 u"November",
39 u"December"], 38 u"December"],
40 'de': [u"", 39 'de': [u"",
41 u"Januar", 40 u"Januar",
42 u"Februar", 41 u"Februar",
43 u"M\u00e4rz", 42 u"M\u00e4rz",
44 u"April", 43 u"April",
45 u"Mai", 44 u"Mai",
46 u"Juni", 45 u"Juni",
47 u"Juli", 46 u"Juli",
48 u"August", 47 u"August",
49 u"September", 48 u"September",
50 u"Oktober", 49 u"Oktober",
51 u"November", 50 u"November",
52 u"Dezember"]} 51 u"Dezember"]}
52
53 map_weekdays_short = {'en': [
54 u"Mo",
55 u"Tu",
56 u"We",
57 u"Th",
58 u"Fr",
59 u"Sa",
60 u"Su",
61 ],
62 'de': [
63 u"Mo",
64 u"Di",
65 u"Mi",
66 u"Do",
67 u"Fr",
68 u"Sa",
69 u"So",
70 ]}
71
53 72
54 def getInt(number, default=0): 73 def getInt(number, default=0):
55 """returns always an int (0 in case of problems)""" 74 """returns always an int (0 in case of problems)"""
56 try: 75 try:
57 return int(number) 76 return int(number)
82 assumes string objects to be utf-8""" 101 assumes string objects to be utf-8"""
83 if not s: 102 if not s:
84 return "" 103 return ""
85 if isinstance(s, str): 104 if isinstance(s, str):
86 return s 105 return s
87 elif isinstance(s, unicode): 106 else:
88 return s.encode('utf-8') 107 return s.encode('utf-8')
89 else: 108
90 return str(s) 109
91 110 def getTextFromNode(node, recursive=False, length=0):
92 def getText(node, recursive=0): 111 """Return all text content of a (etree) node.
93 """returns all text content of a (etree) node and its subnodes""" 112
113 :param recursive: descend subnodes
114
115 :returns: text string
116 """
94 if node is None: 117 if node is None:
95 return '' 118 return ''
96 119
97 # ElementTree: 120 # ElementTree:
98 text = node.text or '' 121 text = node.text or ''
111 # if n.nodeType == node.TEXT_NODE: 134 # if n.nodeType == node.TEXT_NODE:
112 # text = text + n.data 135 # text = text + n.data
113 136
114 return text 137 return text
115 138
139 getText = getTextFromNode
140
141 def getPlaintext(text, length=0, wordwrap=False, ignoretags=[]):
142 """Return plain text content by filtering out XML tags.
143
144 :param text: string or etree node
145 :param length: length of text to return (0=all)
146 :param wordwrap: try not to break the last word (may return shorter string)
147 :returns: text string
148 """
149 if text is None:
150 return ''
151
152 try:
153 if isinstance(text, basestring):
154 xmltext = utf8ify("<div>%s</div>"%text)
155 dom = ET.fromstring(xmltext)
156 else:
157 dom = text
158
159 plaintext = ''
160 for elem in dom.iter():
161 if elem.tag in ignoretags:
162 # ignore tag
163 continue
164
165 if elem.text:
166 plaintext += elem.text
167 if elem.tail:
168 plaintext += elem.tail
169
170 if length > 0 and len(plaintext) > length:
171 break
172
173 text = plaintext
174
175 except Exception, e:
176 logging.warn("getPlaintext: error parsing text! Returning everything. %s"%e)
177
178 if length > 0 and len(text) > length:
179 # try to not break words
180 if wordwrap and text[length] not in [' ', '.', '?', '!']:
181 # search the last blank
182 length = text.rfind(' ', 0, length)
183
184 return text[:length] + '...'
185
186 return text
187
116 188
117 def serialize(node): 189 def serialize(node):
118 """returns a string containing an XML snippet of (etree) node""" 190 """returns a string containing an XML snippet of (etree) node"""
119 s = ET.tostring(node, 'UTF-8') 191 s = ET.tostring(node, 'UTF-8')
120 # snip off XML declaration 192 # snip off XML declaration
125 return s 197 return s
126 198
127 199
128 def getMonthName(mon, lang): 200 def getMonthName(mon, lang):
129 """returns the name of the month mon in the language lang""" 201 """returns the name of the month mon in the language lang"""
130 return map_months[lang][mon] 202 return map_months[lang.lower()][mon]
131 203
132 204
133 def getDateString(date=None, lang='en', withYear=True, monthNames=True, abbrev=False): 205 def getWeekdayName(day, lang, short=True):
206 """returns the name of the weekday day in the language lang"""
207 return map_weekdays_short[lang.lower()][day]
208
209
210 def getDateString(date=None, lang='en', short=False, withYear=True, monthNames=True, abbrev=False):
134 """Return formatted date string.""" 211 """Return formatted date string."""
135 if date is None: 212 if date is None:
136 return None 213 return None
137 214
138 ds = None 215 ds = None
146 day = date.day 223 day = date.day
147 month = date.month 224 month = date.month
148 year = date.year 225 year = date.year
149 226
150 if lang.lower() == 'en': 227 if lang.lower() == 'en':
151 ds = "%s %s"%(getMonthName(month, lang), day) 228 if short:
152 if withYear: 229 ds = "%s/%s/%s"%(year,month,day)
153 ds += ", %s"%year 230 else:
231 ds = "%s %s"%(getMonthName(month, lang), day)
232 if withYear:
233 ds += ", %s"%year
154 234
155 elif lang.lower() == 'de': 235 elif lang.lower() == 'de':
156 ds = "%s. %s"%(day, getMonthName(month, lang)) 236 if short:
157 if withYear: 237 ds = "%s.%s.%s"%(day,month,year)
158 ds += " %s"%year 238 else:
239 ds = "%s. %s"%(day, getMonthName(month, lang))
240 if withYear:
241 ds += " %s"%year
159 242
160 elif lang.lower() == 'iso': 243 elif lang.lower() == 'iso':
161 ds = date.isoformat() 244 ds = date.isoformat()
162 245
163 return ds 246 return ds
203 286
204 resp, data = h.request(url) 287 resp, data = h.request(url)
205 return data 288 return data
206 289
207 except httplib2.HttpLib2Error, e: 290 except httplib2.HttpLib2Error, e:
208 logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(e.code,e)) 291 logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(getattr(e, 'code','?'),e))
209 errmsg = str(e) 292 errmsg = str(e)
210 # stop trying 293 # stop trying
211 break 294 break
212 295
213 else: 296 else:
283 return s.lower() 366 return s.lower()
284 367
285 return s 368 return s
286 369
287 370
288 def getBrowserType(self):
289 """(legacy) check the browsers request to find out the browser type"""
290 bt = {}
291 ua = self.REQUEST.get_header("HTTP_USER_AGENT")
292 bt['ua'] = ua
293 bt['isIE'] = False
294 bt['isN4'] = False
295 if string.find(ua, 'MSIE') > -1:
296 bt['isIE'] = True
297 else:
298 bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
299
300 try:
301 nav = ua[string.find(ua, '('):]
302 ie = string.split(nav, "; ")[1]
303 if string.find(ie, "MSIE") > -1:
304 bt['versIE'] = string.split(ie, " ")[1]
305 except: pass
306
307 bt['isMac'] = string.find(ua, 'Macintosh') > -1
308 bt['isWin'] = string.find(ua, 'Windows') > -1
309 bt['isIEWin'] = bt['isIE'] and bt['isWin']
310 bt['isIEMac'] = bt['isIE'] and bt['isMac']
311 bt['staticHTML'] = False
312
313 return bt
314