1 | """Utility methods for handling XML, reading HTTP, etc""" |
---|
2 | |
---|
3 | from App.ImageFile import ImageFile |
---|
4 | from App.Common import rfc1123_date |
---|
5 | |
---|
6 | import sys |
---|
7 | import os |
---|
8 | import stat |
---|
9 | import urllib |
---|
10 | import logging |
---|
11 | import time |
---|
12 | import re |
---|
13 | import datetime |
---|
14 | try: |
---|
15 | import httplib2 |
---|
16 | httplib = 'httplib2' |
---|
17 | except: |
---|
18 | logging.warn("Unable to import httplib2! Falling back to urllib2!") |
---|
19 | import urllib2 |
---|
20 | httplib = 'urllib2' |
---|
21 | |
---|
22 | import xml.etree.ElementTree as ET |
---|
23 | |
---|
24 | srvTxtUtilsVersion = "1.12.3" |
---|
25 | |
---|
26 | map_months = {'en': [u"", |
---|
27 | u"January", |
---|
28 | u"February", |
---|
29 | u"March", |
---|
30 | u"April", |
---|
31 | u"May", |
---|
32 | u"June", |
---|
33 | u"July", |
---|
34 | u"August", |
---|
35 | u"September", |
---|
36 | u"October", |
---|
37 | u"November", |
---|
38 | u"December"], |
---|
39 | 'de': [u"", |
---|
40 | u"Januar", |
---|
41 | u"Februar", |
---|
42 | u"M\u00e4rz", |
---|
43 | u"April", |
---|
44 | u"Mai", |
---|
45 | u"Juni", |
---|
46 | u"Juli", |
---|
47 | u"August", |
---|
48 | u"September", |
---|
49 | u"Oktober", |
---|
50 | u"November", |
---|
51 | u"Dezember"]} |
---|
52 | |
---|
53 | map_weekdays_short = {'en': [ |
---|
54 | u"Mo", |
---|
55 | u"Tu", |
---|
56 | u"We", |
---|
57 | u"Th", |
---|
58 | u"Fr", |
---|
59 | u"Sa", |
---|
60 | u"Su", |
---|
61 | ], |
---|
62 | 'de': [ |
---|
63 | u"Mo", |
---|
64 | u"Di", |
---|
65 | u"Mi", |
---|
66 | u"Do", |
---|
67 | u"Fr", |
---|
68 | u"Sa", |
---|
69 | u"So", |
---|
70 | ]} |
---|
71 | |
---|
72 | |
---|
73 | def getInt(number, default=0): |
---|
74 | """returns always an int (0 in case of problems)""" |
---|
75 | try: |
---|
76 | return int(number) |
---|
77 | except: |
---|
78 | return int(default) |
---|
79 | |
---|
80 | def getAt(array, idx, default=None): |
---|
81 | """returns element idx from array or default (in case of problems)""" |
---|
82 | try: |
---|
83 | return array[idx] |
---|
84 | except: |
---|
85 | return default |
---|
86 | |
---|
87 | def unicodify(s): |
---|
88 | """decode str (utf-8 or latin-1 representation) into unicode object""" |
---|
89 | if not s: |
---|
90 | return u"" |
---|
91 | if isinstance(s, str): |
---|
92 | try: |
---|
93 | return s.decode('utf-8') |
---|
94 | except: |
---|
95 | return s.decode('latin-1') |
---|
96 | else: |
---|
97 | return s |
---|
98 | |
---|
99 | def utf8ify(s): |
---|
100 | """encode unicode object or string into byte string in utf-8 representation. |
---|
101 | assumes string objects to be utf-8""" |
---|
102 | if not s: |
---|
103 | return "" |
---|
104 | if isinstance(s, str): |
---|
105 | return s |
---|
106 | else: |
---|
107 | return s.encode('utf-8') |
---|
108 | |
---|
109 | |
---|
110 | def getTextFromNode(node, recursive=False, length=0): |
---|
111 | """Return all text content of a (etree) node. |
---|
112 | |
---|
113 | :param recursive: descend subnodes |
---|
114 | |
---|
115 | :returns: text string |
---|
116 | """ |
---|
117 | if node is None: |
---|
118 | return '' |
---|
119 | |
---|
120 | # ElementTree: |
---|
121 | text = node.text or '' |
---|
122 | for e in node: |
---|
123 | if recursive: |
---|
124 | text += getText(e) |
---|
125 | else: |
---|
126 | text += e.text or '' |
---|
127 | if e.tail: |
---|
128 | text += e.tail |
---|
129 | |
---|
130 | # 4Suite: |
---|
131 | #nodelist=node.childNodes |
---|
132 | #text = "" |
---|
133 | #for n in nodelist: |
---|
134 | # if n.nodeType == node.TEXT_NODE: |
---|
135 | # text = text + n.data |
---|
136 | |
---|
137 | return text |
---|
138 | |
---|
139 | getText = getTextFromNode |
---|
140 | |
---|
141 | def getPlaintext(text, length=0, wordwrap=False, ignoretags=[]): |
---|
142 | """Return plain text content by filtering out XML tags. |
---|
143 | |
---|
144 | :param text: string or etree node |
---|
145 | :param length: length of text to return (0=all) |
---|
146 | :param wordwrap: try not to break the last word (may return shorter string) |
---|
147 | :returns: text string |
---|
148 | """ |
---|
149 | if text is None: |
---|
150 | return '' |
---|
151 | |
---|
152 | try: |
---|
153 | if isinstance(text, basestring): |
---|
154 | xmltext = utf8ify("<div>%s</div>"%text) |
---|
155 | dom = ET.fromstring(xmltext) |
---|
156 | else: |
---|
157 | dom = text |
---|
158 | |
---|
159 | plaintext = '' |
---|
160 | for elem in dom.iter(): |
---|
161 | if elem.tag in ignoretags: |
---|
162 | # ignore tag |
---|
163 | continue |
---|
164 | |
---|
165 | if elem.text: |
---|
166 | plaintext += elem.text |
---|
167 | if elem.tail: |
---|
168 | plaintext += elem.tail |
---|
169 | |
---|
170 | if length > 0 and len(plaintext) > length: |
---|
171 | break |
---|
172 | |
---|
173 | text = plaintext |
---|
174 | |
---|
175 | except Exception, e: |
---|
176 | logging.warn("getPlaintext: error parsing text! Returning everything. %s"%e) |
---|
177 | |
---|
178 | if length > 0 and len(text) > length: |
---|
179 | # try to not break words |
---|
180 | if wordwrap and text[length] not in [' ', '.', '?', '!']: |
---|
181 | # search the last blank |
---|
182 | length = text.rfind(' ', 0, length) |
---|
183 | |
---|
184 | return text[:length] + '...' |
---|
185 | |
---|
186 | return text |
---|
187 | |
---|
188 | |
---|
189 | def serialize(node): |
---|
190 | """returns a string containing an XML snippet of (etree) node""" |
---|
191 | s = ET.tostring(node, 'UTF-8') |
---|
192 | # snip off XML declaration |
---|
193 | if s.startswith('<?xml'): |
---|
194 | i = s.find('?>') |
---|
195 | return s[i+3:] |
---|
196 | |
---|
197 | return s |
---|
198 | |
---|
199 | |
---|
200 | def getMonthName(mon, lang): |
---|
201 | """returns the name of the month mon in the language lang""" |
---|
202 | return map_months[lang.lower()][mon] |
---|
203 | |
---|
204 | |
---|
205 | def getWeekdayName(day, lang, short=True): |
---|
206 | """returns the name of the weekday day in the language lang""" |
---|
207 | return map_weekdays_short[lang.lower()][day] |
---|
208 | |
---|
209 | |
---|
210 | def getDateString(date=None, lang='en', short=False, withYear=True, monthNames=True, abbrev=False): |
---|
211 | """Return formatted date string.""" |
---|
212 | if date is None: |
---|
213 | return None |
---|
214 | |
---|
215 | ds = None |
---|
216 | if callable(date.day): |
---|
217 | # callable members |
---|
218 | day = date.day() |
---|
219 | month = date.month() |
---|
220 | year = date.year() |
---|
221 | else: |
---|
222 | # data members |
---|
223 | day = date.day |
---|
224 | month = date.month |
---|
225 | year = date.year |
---|
226 | |
---|
227 | if lang.lower() == 'en': |
---|
228 | if short: |
---|
229 | ds = "%s/%s/%s"%(year,month,day) |
---|
230 | else: |
---|
231 | ds = "%s %s"%(getMonthName(month, lang), day) |
---|
232 | if withYear: |
---|
233 | ds += ", %s"%year |
---|
234 | |
---|
235 | elif lang.lower() == 'de': |
---|
236 | if short: |
---|
237 | ds = "%s.%s.%s"%(day,month,year) |
---|
238 | else: |
---|
239 | ds = "%s. %s"%(day, getMonthName(month, lang)) |
---|
240 | if withYear: |
---|
241 | ds += " %s"%year |
---|
242 | |
---|
243 | elif lang.lower() == 'iso': |
---|
244 | ds = date.isoformat() |
---|
245 | |
---|
246 | return ds |
---|
247 | |
---|
248 | |
---|
249 | def getDate(date): |
---|
250 | """return date object from date or datetime date.""" |
---|
251 | if isinstance(date, datetime.datetime): |
---|
252 | # strip time |
---|
253 | return date.date() |
---|
254 | |
---|
255 | return date |
---|
256 | |
---|
257 | |
---|
258 | def getDatetime(date): |
---|
259 | """return datetime object from date or datetime date.""" |
---|
260 | if isinstance(date, datetime.date): |
---|
261 | # add time 0:00 |
---|
262 | return datetime.datetime.combine(date, datetime.time()) |
---|
263 | |
---|
264 | return date |
---|
265 | |
---|
266 | |
---|
267 | def getHttpData(url, data=None, num_tries=3, timeout=10, username=None, password=None, cache=None, insecure=False, noExceptions=False): |
---|
268 | """returns result from url+data HTTP request""" |
---|
269 | # we do GET (by appending data to url) |
---|
270 | if isinstance(data, str) or isinstance(data, unicode): |
---|
271 | # if data is string then append |
---|
272 | url = "%s?%s"%(url,data) |
---|
273 | elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple): |
---|
274 | # urlencode |
---|
275 | url = "%s?%s"%(url,urllib.urlencode(data)) |
---|
276 | |
---|
277 | errmsg = None |
---|
278 | if httplib == 'httplib2': |
---|
279 | # use httplib2 |
---|
280 | for cnt in range(num_tries): |
---|
281 | try: |
---|
282 | logging.debug("getHttp(lib2)Data(#%s %ss) url=%s"%(cnt+1,timeout,url)) |
---|
283 | h = httplib2.Http(cache=cache, timeout=float(timeout), disable_ssl_certificate_validation=insecure) |
---|
284 | if username: |
---|
285 | h.add_credentials(username, password) |
---|
286 | |
---|
287 | resp, data = h.request(url) |
---|
288 | return data |
---|
289 | |
---|
290 | except httplib2.HttpLib2Error, e: |
---|
291 | logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(getattr(e, 'code','?'),e)) |
---|
292 | errmsg = str(e) |
---|
293 | # stop trying |
---|
294 | break |
---|
295 | |
---|
296 | else: |
---|
297 | # use urllib2 |
---|
298 | response = None |
---|
299 | for cnt in range(num_tries): |
---|
300 | try: |
---|
301 | logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) |
---|
302 | if sys.version_info < (2, 6): |
---|
303 | # set timeout on socket -- ugly :-( |
---|
304 | import socket |
---|
305 | socket.setdefaulttimeout(float(timeout)) |
---|
306 | response = urllib2.urlopen(url) |
---|
307 | else: |
---|
308 | # timeout as parameter |
---|
309 | response = urllib2.urlopen(url,timeout=float(timeout)) |
---|
310 | # check result? |
---|
311 | data = response.read() |
---|
312 | response.close() |
---|
313 | return data |
---|
314 | |
---|
315 | except urllib2.HTTPError, e: |
---|
316 | logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) |
---|
317 | errmsg = str(e) |
---|
318 | # stop trying |
---|
319 | break |
---|
320 | except urllib2.URLError, e: |
---|
321 | logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) |
---|
322 | errmsg = str(e) |
---|
323 | # stop trying |
---|
324 | #break |
---|
325 | |
---|
326 | if noExceptions: |
---|
327 | return None |
---|
328 | |
---|
329 | raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) |
---|
330 | #return None |
---|
331 | |
---|
332 | |
---|
333 | def refreshingImageFileIndexHtml(self, REQUEST, RESPONSE): |
---|
334 | """index_html method for App.ImageFile that updates the file info for each request.""" |
---|
335 | stat_info = os.stat(self.path) |
---|
336 | self.size = stat_info[stat.ST_SIZE] |
---|
337 | self.lmt = float(stat_info[stat.ST_MTIME]) or time.time() |
---|
338 | self.lmh = rfc1123_date(self.lmt) |
---|
339 | # call original method |
---|
340 | return ImageFile.index_html(self, REQUEST, RESPONSE) |
---|
341 | |
---|
342 | |
---|
343 | def shortenString(s, l, ellipsis='...'): |
---|
344 | """returns a string of length l (or l-1) by omitting characters in the middle of s, replacing with ellipsis.""" |
---|
345 | if len(s) <= l: |
---|
346 | return s |
---|
347 | |
---|
348 | l1 = int((l - len(ellipsis)) / 2) |
---|
349 | return "%s%s%s"%(s[:l1],ellipsis,s[-l1:]) |
---|
350 | |
---|
351 | |
---|
352 | def sqlName(s, lc=True, more=''): |
---|
353 | """returns restricted ASCII-only version of string""" |
---|
354 | if s is None: |
---|
355 | return "" |
---|
356 | |
---|
357 | if not isinstance(s, basestring): |
---|
358 | # make string object |
---|
359 | s = str(s) |
---|
360 | |
---|
361 | # remove ' |
---|
362 | s = s.replace("'","") |
---|
363 | # all else -> "_" |
---|
364 | s = re.sub('[^A-Za-z0-9_'+more+']','_',s) |
---|
365 | if lc: |
---|
366 | return s.lower() |
---|
367 | |
---|
368 | return s |
---|
369 | |
---|
370 | |
---|