Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: documentViewer/SrvTxtUtils.py

Last change on this file was 628:447251b5af65, checked in by casties, 9 years ago
make sure utf8ify and unicodify output text.
File size: 11.5 KB

Line
1	"""Utility methods for handling XML, reading HTTP, etc"""
2
3	from App.ImageFile import ImageFile
4	from App.Common import rfc1123_date
5
6	import sys
7	import os
8	import stat
9	import urllib
10	from urlparse import urlparse, urlunparse
11	import logging
12	import time
13	import re
14	import datetime
15	try:
16	import httplib2
17	httplib = 'httplib2'
18	except:
19	logging.warn("Unable to import httplib2! Falling back to urllib2!")
20	import urllib2
21	httplib = 'urllib2'
22
23	import xml.etree.ElementTree as ET
24
25	srvTxtUtilsVersion = "1.13.1"
26
27	map_months = {'en': [u"",
28	u"January",
29	u"February",
30	u"March",
31	u"April",
32	u"May",
33	u"June",
34	u"July",
35	u"August",
36	u"September",
37	u"October",
38	u"November",
39	u"December"],
40	'de': [u"",
41	u"Januar",
42	u"Februar",
43	u"M\u00e4rz",
44	u"April",
45	u"Mai",
46	u"Juni",
47	u"Juli",
48	u"August",
49	u"September",
50	u"Oktober",
51	u"November",
52	u"Dezember"]}
53
54	map_weekdays_short = {'en': [
55	u"Mo",
56	u"Tu",
57	u"We",
58	u"Th",
59	u"Fr",
60	u"Sa",
61	u"Su",
62	],
63	'de': [
64	u"Mo",
65	u"Di",
66	u"Mi",
67	u"Do",
68	u"Fr",
69	u"Sa",
70	u"So",
71	]}
72
73
74	def getInt(number, default=0):
75	"""returns always an int (0 in case of problems)"""
76	try:
77	return int(number)
78	except:
79	return int(default)
80
81	def getAt(array, idx, default=None):
82	"""returns element idx from array or default (in case of problems)"""
83	try:
84	return array[idx]
85	except:
86	return default
87
88	def unicodify(s):
89	"""decode str (utf-8 or latin-1 representation) into unicode object"""
90	if not s:
91	return u""
92	if isinstance(s, str):
93	try:
94	return s.decode('utf-8')
95	except:
96	return s.decode('latin-1')
97	elif isinstance(s, unicode):
98	return s
99	else:
100	return unicode(s)
101
102	def utf8ify(s):
103	"""encode unicode object or string into byte string in utf-8 representation.
104	assumes string objects to be utf-8"""
105	if not s:
106	return ""
107	if isinstance(s, unicode):
108	return s.encode('utf-8')
109	elif isinstance(s, str):
110	return s
111	else:
112	return str(s)
113
114
115	def getTextFromNode(node, recursive=False, length=0):
116	"""Return all text content of a (etree) node.
117
118	:param recursive: descend subnodes
119
120	:returns: text string
121	"""
122	if node is None:
123	return ''
124
125	# ElementTree:
126	text = node.text or ''
127	for e in node:
128	if recursive:
129	text += getText(e)
130	else:
131	text += e.text or ''
132	if e.tail:
133	text += e.tail
134
135	# 4Suite:
136	#nodelist=node.childNodes
137	#text = ""
138	#for n in nodelist:
139	# if n.nodeType == node.TEXT_NODE:
140	# text = text + n.data
141
142	return text
143
144	getText = getTextFromNode
145
146	def getPlaintext(text, length=0, wordwrap=False, ignoretags=[]):
147	"""Return plain text content by filtering out XML tags.
148
149	:param text: string or etree node
150	:param length: length of text to return (0=all)
151	:param wordwrap: try not to break the last word (may return shorter string)
152	:returns: text string
153	"""
154	if text is None:
155	return ''
156
157	try:
158	if isinstance(text, basestring):
159	xmltext = utf8ify("<div>%s</div>"%text)
160	dom = ET.fromstring(xmltext)
161	else:
162	dom = text
163
164	plaintext = ''
165	for elem in dom.iter():
166	if elem.tag in ignoretags:
167	# ignore tag
168	continue
169
170	if elem.text:
171	plaintext += elem.text
172	if elem.tail:
173	plaintext += elem.tail
174
175	if length > 0 and len(plaintext) > length:
176	break
177
178	text = plaintext
179
180	except Exception, e:
181	logging.warn("getPlaintext: error parsing text! Returning everything. %s"%e)
182
183	if length > 0 and len(text) > length:
184	# try to not break words
185	if wordwrap and text[length] not in [' ', '.', '?', '!']:
186	# search the last blank
187	length = text.rfind(' ', 0, length)
188
189	return text[:length] + '...'
190
191	return text
192
193
194	def serialize(node):
195	"""returns a string containing an XML snippet of (etree) node"""
196	s = ET.tostring(node, 'UTF-8')
197	# snip off XML declaration
198	if s.startswith('<?xml'):
199	i = s.find('?>')
200	return s[i+3:]
201
202	return s
203
204
205	def getMonthName(mon, lang):
206	"""returns the name of the month mon in the language lang"""
207	return map_months[lang.lower()][mon]
208
209
210	def getWeekdayName(day, lang, short=True):
211	"""returns the name of the weekday day in the language lang"""
212	return map_weekdays_short[lang.lower()][day]
213
214
215	def getDateString(date=None, lang='en', short=False, withYear=True, monthNames=True, abbrev=False):
216	"""Return formatted date string."""
217	if date is None:
218	return None
219
220	ds = None
221	if callable(date.day):
222	# callable members
223	day = date.day()
224	month = date.month()
225	year = date.year()
226	else:
227	# data members
228	day = date.day
229	month = date.month
230	year = date.year
231
232	if lang.lower() == 'en':
233	if short:
234	ds = "%s/%s/%s"%(year,month,day)
235	else:
236	ds = "%s %s"%(getMonthName(month, lang), day)
237	if withYear:
238	ds += ", %s"%year
239
240	elif lang.lower() == 'de':
241	if short:
242	ds = "%s.%s.%s"%(day,month,year)
243	else:
244	ds = "%s. %s"%(day, getMonthName(month, lang))
245	if withYear:
246	ds += " %s"%year
247
248	elif lang.lower() == 'iso':
249	ds = date.isoformat()
250
251	return ds
252
253
254	def getDate(date):
255	"""return date object from date or datetime date."""
256	if isinstance(date, datetime.datetime):
257	# strip time
258	return date.date()
259
260	return date
261
262
263	def getDatetime(date):
264	"""return datetime object from date or datetime date."""
265	if isinstance(date, datetime.date):
266	# add time 0:00
267	return datetime.datetime.combine(date, datetime.time())
268
269	return date
270
271
272	def getHttpData(url, data=None, num_tries=3, timeout=10, username=None, password=None, cache=None, insecure=False, noExceptions=False):
273	"""returns result from url+data HTTP request"""
274	# we do GET (by appending data to url)
275	if isinstance(data, str) or isinstance(data, unicode):
276	# if data is string then append
277	url = "%s?%s"%(url,data)
278	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
279	# urlencode
280	url = "%s?%s"%(url,urllib.urlencode(data))
281
282	errmsg = None
283	if httplib == 'httplib2':
284	# use httplib2
285	for cnt in range(num_tries):
286	try:
287	logging.debug("getHttp(lib2)Data(#%s %ss) url=%s"%(cnt+1,timeout,url))
288	# I would prefer at least disable_ssl_certificate_validation=insecure
289	# but python < 2.7.9 doesn't do SNI :-(
290	h = httplib2.Http(cache=cache, timeout=float(timeout), disable_ssl_certificate_validation=True)
291	if username:
292	h.add_credentials(username, password)
293
294	resp, data = h.request(url)
295	return data
296
297	except httplib2.HttpLib2Error, e:
298	logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(getattr(e, 'code','?'),e))
299	errmsg = str(e)
300	# stop trying
301	break
302
303	else:
304	# use urllib2
305	response = None
306	for cnt in range(num_tries):
307	try:
308	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
309	if sys.version_info < (2, 6):
310	# set timeout on socket -- ugly :-(
311	import socket
312	socket.setdefaulttimeout(float(timeout))
313	response = urllib2.urlopen(url)
314	else:
315	# timeout as parameter
316	response = urllib2.urlopen(url,timeout=float(timeout))
317	# check result?
318	data = response.read()
319	response.close()
320	return data
321
322	except urllib2.HTTPError, e:
323	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
324	errmsg = str(e)
325	# stop trying
326	break
327	except urllib2.URLError, e:
328	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
329	errmsg = str(e)
330	# stop trying
331	#break
332
333	if noExceptions:
334	return None
335
336	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
337	#return None
338
339
340	def refreshingImageFileIndexHtml(self, REQUEST, RESPONSE):
341	"""index_html method for App.ImageFile that updates the file info for each request."""
342	stat_info = os.stat(self.path)
343	self.size = stat_info[stat.ST_SIZE]
344	self.lmt = float(stat_info[stat.ST_MTIME]) or time.time()
345	self.lmh = rfc1123_date(self.lmt)
346	# call original method
347	return ImageFile.index_html(self, REQUEST, RESPONSE)
348
349
350	def shortenString(s, l, ellipsis='...'):
351	"""returns a string of length l (or l-1) by omitting characters in the middle of s, replacing with ellipsis."""
352	if len(s) <= l:
353	return s
354
355	l1 = int((l - len(ellipsis)) / 2)
356	return "%s%s%s"%(s[:l1],ellipsis,s[-l1:])
357
358
359	def sqlName(s, lc=True, more=''):
360	"""returns restricted ASCII-only version of string"""
361	if s is None:
362	return ""
363
364	if not isinstance(s, basestring):
365	# make string object
366	s = str(s)
367
368	# remove '
369	s = s.replace("'","")
370	# all else -> "_"
371	s = re.sub('[^A-Za-z0-9_'+more+']','_',s)
372	if lc:
373	return s.lower()
374
375	return s
376
377
378	def sslifyUrl(url, app=None, force=False):
379	"""returns URL with http or https scheme.
380
381	Looks at app.REQUEST.URL to find the scheme of the current page.
382	Changes only schemeless (starting with //) URLs unless force=True.
383	"""
384	thatUrl = urlparse(url)
385	if hasattr(app, 'REQUEST'):
386	# get current page URL
387	thisUrl = urlparse(app.REQUEST['URL'])
388	if thatUrl.scheme == '':
389	# schemeless URL -> use this scheme
390	return "%s:%s"%(thisUrl.scheme, url)
391	elif force:
392	# use this scheme
393	if thisUrl.scheme != thatUrl.scheme:
394	return urlunparse((thisUrl.scheme,)+thatUrl[1:])
395	else:
396	# keep scheme
397	return url
398
399	else:
400	# keep scheme
401	return url
402
403	else:
404	# no current page URL
405	if force:
406	# use https for force
407	return urlunparse(('https',)+thatUrl[1:])
408
409	return url

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: documentViewer/SrvTxtUtils.py

Download in other formats: