1 | """Utility methods for handling XML, reading HTTP, etc""" |
---|
2 | |
---|
3 | from App.ImageFile import ImageFile |
---|
4 | from App.Common import rfc1123_date |
---|
5 | |
---|
6 | import sys |
---|
7 | import os |
---|
8 | import stat |
---|
9 | import urllib |
---|
10 | from urlparse import urlparse, urlunparse |
---|
11 | import logging |
---|
12 | import time |
---|
13 | import re |
---|
14 | import datetime |
---|
15 | try: |
---|
16 | import httplib2 |
---|
17 | httplib = 'httplib2' |
---|
18 | except: |
---|
19 | logging.warn("Unable to import httplib2! Falling back to urllib2!") |
---|
20 | import urllib2 |
---|
21 | httplib = 'urllib2' |
---|
22 | |
---|
23 | import xml.etree.ElementTree as ET |
---|
24 | |
---|
25 | srvTxtUtilsVersion = "1.13.1" |
---|
26 | |
---|
27 | map_months = {'en': [u"", |
---|
28 | u"January", |
---|
29 | u"February", |
---|
30 | u"March", |
---|
31 | u"April", |
---|
32 | u"May", |
---|
33 | u"June", |
---|
34 | u"July", |
---|
35 | u"August", |
---|
36 | u"September", |
---|
37 | u"October", |
---|
38 | u"November", |
---|
39 | u"December"], |
---|
40 | 'de': [u"", |
---|
41 | u"Januar", |
---|
42 | u"Februar", |
---|
43 | u"M\u00e4rz", |
---|
44 | u"April", |
---|
45 | u"Mai", |
---|
46 | u"Juni", |
---|
47 | u"Juli", |
---|
48 | u"August", |
---|
49 | u"September", |
---|
50 | u"Oktober", |
---|
51 | u"November", |
---|
52 | u"Dezember"]} |
---|
53 | |
---|
54 | map_weekdays_short = {'en': [ |
---|
55 | u"Mo", |
---|
56 | u"Tu", |
---|
57 | u"We", |
---|
58 | u"Th", |
---|
59 | u"Fr", |
---|
60 | u"Sa", |
---|
61 | u"Su", |
---|
62 | ], |
---|
63 | 'de': [ |
---|
64 | u"Mo", |
---|
65 | u"Di", |
---|
66 | u"Mi", |
---|
67 | u"Do", |
---|
68 | u"Fr", |
---|
69 | u"Sa", |
---|
70 | u"So", |
---|
71 | ]} |
---|
72 | |
---|
73 | |
---|
74 | def getInt(number, default=0): |
---|
75 | """returns always an int (0 in case of problems)""" |
---|
76 | try: |
---|
77 | return int(number) |
---|
78 | except: |
---|
79 | return int(default) |
---|
80 | |
---|
81 | def getAt(array, idx, default=None): |
---|
82 | """returns element idx from array or default (in case of problems)""" |
---|
83 | try: |
---|
84 | return array[idx] |
---|
85 | except: |
---|
86 | return default |
---|
87 | |
---|
88 | def unicodify(s): |
---|
89 | """decode str (utf-8 or latin-1 representation) into unicode object""" |
---|
90 | if not s: |
---|
91 | return u"" |
---|
92 | if isinstance(s, str): |
---|
93 | try: |
---|
94 | return s.decode('utf-8') |
---|
95 | except: |
---|
96 | return s.decode('latin-1') |
---|
97 | elif isinstance(s, unicode): |
---|
98 | return s |
---|
99 | else: |
---|
100 | return unicode(s) |
---|
101 | |
---|
102 | def utf8ify(s): |
---|
103 | """encode unicode object or string into byte string in utf-8 representation. |
---|
104 | assumes string objects to be utf-8""" |
---|
105 | if not s: |
---|
106 | return "" |
---|
107 | if isinstance(s, unicode): |
---|
108 | return s.encode('utf-8') |
---|
109 | elif isinstance(s, str): |
---|
110 | return s |
---|
111 | else: |
---|
112 | return str(s) |
---|
113 | |
---|
114 | |
---|
115 | def getTextFromNode(node, recursive=False, length=0): |
---|
116 | """Return all text content of a (etree) node. |
---|
117 | |
---|
118 | :param recursive: descend subnodes |
---|
119 | |
---|
120 | :returns: text string |
---|
121 | """ |
---|
122 | if node is None: |
---|
123 | return '' |
---|
124 | |
---|
125 | # ElementTree: |
---|
126 | text = node.text or '' |
---|
127 | for e in node: |
---|
128 | if recursive: |
---|
129 | text += getText(e) |
---|
130 | else: |
---|
131 | text += e.text or '' |
---|
132 | if e.tail: |
---|
133 | text += e.tail |
---|
134 | |
---|
135 | # 4Suite: |
---|
136 | #nodelist=node.childNodes |
---|
137 | #text = "" |
---|
138 | #for n in nodelist: |
---|
139 | # if n.nodeType == node.TEXT_NODE: |
---|
140 | # text = text + n.data |
---|
141 | |
---|
142 | return text |
---|
143 | |
---|
144 | getText = getTextFromNode |
---|
145 | |
---|
146 | def getPlaintext(text, length=0, wordwrap=False, ignoretags=[]): |
---|
147 | """Return plain text content by filtering out XML tags. |
---|
148 | |
---|
149 | :param text: string or etree node |
---|
150 | :param length: length of text to return (0=all) |
---|
151 | :param wordwrap: try not to break the last word (may return shorter string) |
---|
152 | :returns: text string |
---|
153 | """ |
---|
154 | if text is None: |
---|
155 | return '' |
---|
156 | |
---|
157 | try: |
---|
158 | if isinstance(text, basestring): |
---|
159 | xmltext = utf8ify("<div>%s</div>"%text) |
---|
160 | dom = ET.fromstring(xmltext) |
---|
161 | else: |
---|
162 | dom = text |
---|
163 | |
---|
164 | plaintext = '' |
---|
165 | for elem in dom.iter(): |
---|
166 | if elem.tag in ignoretags: |
---|
167 | # ignore tag |
---|
168 | continue |
---|
169 | |
---|
170 | if elem.text: |
---|
171 | plaintext += elem.text |
---|
172 | if elem.tail: |
---|
173 | plaintext += elem.tail |
---|
174 | |
---|
175 | if length > 0 and len(plaintext) > length: |
---|
176 | break |
---|
177 | |
---|
178 | text = plaintext |
---|
179 | |
---|
180 | except Exception, e: |
---|
181 | logging.warn("getPlaintext: error parsing text! Returning everything. %s"%e) |
---|
182 | |
---|
183 | if length > 0 and len(text) > length: |
---|
184 | # try to not break words |
---|
185 | if wordwrap and text[length] not in [' ', '.', '?', '!']: |
---|
186 | # search the last blank |
---|
187 | length = text.rfind(' ', 0, length) |
---|
188 | |
---|
189 | return text[:length] + '...' |
---|
190 | |
---|
191 | return text |
---|
192 | |
---|
193 | |
---|
194 | def serialize(node): |
---|
195 | """returns a string containing an XML snippet of (etree) node""" |
---|
196 | s = ET.tostring(node, 'UTF-8') |
---|
197 | # snip off XML declaration |
---|
198 | if s.startswith('<?xml'): |
---|
199 | i = s.find('?>') |
---|
200 | return s[i+3:] |
---|
201 | |
---|
202 | return s |
---|
203 | |
---|
204 | |
---|
205 | def getMonthName(mon, lang): |
---|
206 | """returns the name of the month mon in the language lang""" |
---|
207 | return map_months[lang.lower()][mon] |
---|
208 | |
---|
209 | |
---|
210 | def getWeekdayName(day, lang, short=True): |
---|
211 | """returns the name of the weekday day in the language lang""" |
---|
212 | return map_weekdays_short[lang.lower()][day] |
---|
213 | |
---|
214 | |
---|
215 | def getDateString(date=None, lang='en', short=False, withYear=True, monthNames=True, abbrev=False): |
---|
216 | """Return formatted date string.""" |
---|
217 | if date is None: |
---|
218 | return None |
---|
219 | |
---|
220 | ds = None |
---|
221 | if callable(date.day): |
---|
222 | # callable members |
---|
223 | day = date.day() |
---|
224 | month = date.month() |
---|
225 | year = date.year() |
---|
226 | else: |
---|
227 | # data members |
---|
228 | day = date.day |
---|
229 | month = date.month |
---|
230 | year = date.year |
---|
231 | |
---|
232 | if lang.lower() == 'en': |
---|
233 | if short: |
---|
234 | ds = "%s/%s/%s"%(year,month,day) |
---|
235 | else: |
---|
236 | ds = "%s %s"%(getMonthName(month, lang), day) |
---|
237 | if withYear: |
---|
238 | ds += ", %s"%year |
---|
239 | |
---|
240 | elif lang.lower() == 'de': |
---|
241 | if short: |
---|
242 | ds = "%s.%s.%s"%(day,month,year) |
---|
243 | else: |
---|
244 | ds = "%s. %s"%(day, getMonthName(month, lang)) |
---|
245 | if withYear: |
---|
246 | ds += " %s"%year |
---|
247 | |
---|
248 | elif lang.lower() == 'iso': |
---|
249 | ds = date.isoformat() |
---|
250 | |
---|
251 | return ds |
---|
252 | |
---|
253 | |
---|
254 | def getDate(date): |
---|
255 | """return date object from date or datetime date.""" |
---|
256 | if isinstance(date, datetime.datetime): |
---|
257 | # strip time |
---|
258 | return date.date() |
---|
259 | |
---|
260 | return date |
---|
261 | |
---|
262 | |
---|
263 | def getDatetime(date): |
---|
264 | """return datetime object from date or datetime date.""" |
---|
265 | if isinstance(date, datetime.date): |
---|
266 | # add time 0:00 |
---|
267 | return datetime.datetime.combine(date, datetime.time()) |
---|
268 | |
---|
269 | return date |
---|
270 | |
---|
271 | |
---|
272 | def getHttpData(url, data=None, num_tries=3, timeout=10, username=None, password=None, cache=None, insecure=False, noExceptions=False): |
---|
273 | """returns result from url+data HTTP request""" |
---|
274 | # we do GET (by appending data to url) |
---|
275 | if isinstance(data, str) or isinstance(data, unicode): |
---|
276 | # if data is string then append |
---|
277 | url = "%s?%s"%(url,data) |
---|
278 | elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple): |
---|
279 | # urlencode |
---|
280 | url = "%s?%s"%(url,urllib.urlencode(data)) |
---|
281 | |
---|
282 | errmsg = None |
---|
283 | if httplib == 'httplib2': |
---|
284 | # use httplib2 |
---|
285 | for cnt in range(num_tries): |
---|
286 | try: |
---|
287 | logging.debug("getHttp(lib2)Data(#%s %ss) url=%s"%(cnt+1,timeout,url)) |
---|
288 | # I would prefer at least disable_ssl_certificate_validation=insecure |
---|
289 | # but python < 2.7.9 doesn't do SNI :-( |
---|
290 | h = httplib2.Http(cache=cache, timeout=float(timeout), disable_ssl_certificate_validation=True) |
---|
291 | if username: |
---|
292 | h.add_credentials(username, password) |
---|
293 | |
---|
294 | resp, data = h.request(url) |
---|
295 | return data |
---|
296 | |
---|
297 | except httplib2.HttpLib2Error, e: |
---|
298 | logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(getattr(e, 'code','?'),e)) |
---|
299 | errmsg = str(e) |
---|
300 | # stop trying |
---|
301 | break |
---|
302 | |
---|
303 | else: |
---|
304 | # use urllib2 |
---|
305 | response = None |
---|
306 | for cnt in range(num_tries): |
---|
307 | try: |
---|
308 | logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) |
---|
309 | if sys.version_info < (2, 6): |
---|
310 | # set timeout on socket -- ugly :-( |
---|
311 | import socket |
---|
312 | socket.setdefaulttimeout(float(timeout)) |
---|
313 | response = urllib2.urlopen(url) |
---|
314 | else: |
---|
315 | # timeout as parameter |
---|
316 | response = urllib2.urlopen(url,timeout=float(timeout)) |
---|
317 | # check result? |
---|
318 | data = response.read() |
---|
319 | response.close() |
---|
320 | return data |
---|
321 | |
---|
322 | except urllib2.HTTPError, e: |
---|
323 | logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) |
---|
324 | errmsg = str(e) |
---|
325 | # stop trying |
---|
326 | break |
---|
327 | except urllib2.URLError, e: |
---|
328 | logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) |
---|
329 | errmsg = str(e) |
---|
330 | # stop trying |
---|
331 | #break |
---|
332 | |
---|
333 | if noExceptions: |
---|
334 | return None |
---|
335 | |
---|
336 | raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) |
---|
337 | #return None |
---|
338 | |
---|
339 | |
---|
340 | def refreshingImageFileIndexHtml(self, REQUEST, RESPONSE): |
---|
341 | """index_html method for App.ImageFile that updates the file info for each request.""" |
---|
342 | stat_info = os.stat(self.path) |
---|
343 | self.size = stat_info[stat.ST_SIZE] |
---|
344 | self.lmt = float(stat_info[stat.ST_MTIME]) or time.time() |
---|
345 | self.lmh = rfc1123_date(self.lmt) |
---|
346 | # call original method |
---|
347 | return ImageFile.index_html(self, REQUEST, RESPONSE) |
---|
348 | |
---|
349 | |
---|
350 | def shortenString(s, l, ellipsis='...'): |
---|
351 | """returns a string of length l (or l-1) by omitting characters in the middle of s, replacing with ellipsis.""" |
---|
352 | if len(s) <= l: |
---|
353 | return s |
---|
354 | |
---|
355 | l1 = int((l - len(ellipsis)) / 2) |
---|
356 | return "%s%s%s"%(s[:l1],ellipsis,s[-l1:]) |
---|
357 | |
---|
358 | |
---|
359 | def sqlName(s, lc=True, more=''): |
---|
360 | """returns restricted ASCII-only version of string""" |
---|
361 | if s is None: |
---|
362 | return "" |
---|
363 | |
---|
364 | if not isinstance(s, basestring): |
---|
365 | # make string object |
---|
366 | s = str(s) |
---|
367 | |
---|
368 | # remove ' |
---|
369 | s = s.replace("'","") |
---|
370 | # all else -> "_" |
---|
371 | s = re.sub('[^A-Za-z0-9_'+more+']','_',s) |
---|
372 | if lc: |
---|
373 | return s.lower() |
---|
374 | |
---|
375 | return s |
---|
376 | |
---|
377 | |
---|
378 | def sslifyUrl(url, app=None, force=False): |
---|
379 | """returns URL with http or https scheme. |
---|
380 | |
---|
381 | Looks at app.REQUEST.URL to find the scheme of the current page. |
---|
382 | Changes only schemeless (starting with //) URLs unless force=True. |
---|
383 | """ |
---|
384 | thatUrl = urlparse(url) |
---|
385 | if hasattr(app, 'REQUEST'): |
---|
386 | # get current page URL |
---|
387 | thisUrl = urlparse(app.REQUEST['URL']) |
---|
388 | if thatUrl.scheme == '': |
---|
389 | # schemeless URL -> use this scheme |
---|
390 | return "%s:%s"%(thisUrl.scheme, url) |
---|
391 | elif force: |
---|
392 | # use this scheme |
---|
393 | if thisUrl.scheme != thatUrl.scheme: |
---|
394 | return urlunparse((thisUrl.scheme,)+thatUrl[1:]) |
---|
395 | else: |
---|
396 | # keep scheme |
---|
397 | return url |
---|
398 | |
---|
399 | else: |
---|
400 | # keep scheme |
---|
401 | return url |
---|
402 | |
---|
403 | else: |
---|
404 | # no current page URL |
---|
405 | if force: |
---|
406 | # use https for force |
---|
407 | return urlunparse(('https',)+thatUrl[1:]) |
---|
408 | |
---|
409 | return url |
---|