Context Navigation

source: documentViewer/SrvTxtUtils.py @ 623:6012fe93f78c

Last change on this file since 623:6012fe93f78c was 623:6012fe93f78c, checked in by casties, 9 years ago
better scheme-less URL code.
File size: 11.4 KB

Line
1	"""Utility methods for handling XML, reading HTTP, etc"""
2
3	from App.ImageFile import ImageFile
4	from App.Common import rfc1123_date
5
6	import sys
7	import os
8	import stat
9	import urllib
10	from urlparse import urlparse, urlunparse
11	import logging
12	import time
13	import re
14	import datetime
15	try:
16	import httplib2
17	httplib = 'httplib2'
18	except:
19	logging.warn("Unable to import httplib2! Falling back to urllib2!")
20	import urllib2
21	httplib = 'urllib2'
22
23	import xml.etree.ElementTree as ET
24
25	srvTxtUtilsVersion = "1.13"
26
27	map_months = {'en': [u"",
28	u"January",
29	u"February",
30	u"March",
31	u"April",
32	u"May",
33	u"June",
34	u"July",
35	u"August",
36	u"September",
37	u"October",
38	u"November",
39	u"December"],
40	'de': [u"",
41	u"Januar",
42	u"Februar",
43	u"M\u00e4rz",
44	u"April",
45	u"Mai",
46	u"Juni",
47	u"Juli",
48	u"August",
49	u"September",
50	u"Oktober",
51	u"November",
52	u"Dezember"]}
53
54	map_weekdays_short = {'en': [
55	u"Mo",
56	u"Tu",
57	u"We",
58	u"Th",
59	u"Fr",
60	u"Sa",
61	u"Su",
62	],
63	'de': [
64	u"Mo",
65	u"Di",
66	u"Mi",
67	u"Do",
68	u"Fr",
69	u"Sa",
70	u"So",
71	]}
72
73
74	def getInt(number, default=0):
75	"""returns always an int (0 in case of problems)"""
76	try:
77	return int(number)
78	except:
79	return int(default)
80
81	def getAt(array, idx, default=None):
82	"""returns element idx from array or default (in case of problems)"""
83	try:
84	return array[idx]
85	except:
86	return default
87
88	def unicodify(s):
89	"""decode str (utf-8 or latin-1 representation) into unicode object"""
90	if not s:
91	return u""
92	if isinstance(s, str):
93	try:
94	return s.decode('utf-8')
95	except:
96	return s.decode('latin-1')
97	else:
98	return s
99
100	def utf8ify(s):
101	"""encode unicode object or string into byte string in utf-8 representation.
102	assumes string objects to be utf-8"""
103	if not s:
104	return ""
105	if isinstance(s, str):
106	return s
107	else:
108	return s.encode('utf-8')
109
110
111	def getTextFromNode(node, recursive=False, length=0):
112	"""Return all text content of a (etree) node.
113
114	:param recursive: descend subnodes
115
116	:returns: text string
117	"""
118	if node is None:
119	return ''
120
121	# ElementTree:
122	text = node.text or ''
123	for e in node:
124	if recursive:
125	text += getText(e)
126	else:
127	text += e.text or ''
128	if e.tail:
129	text += e.tail
130
131	# 4Suite:
132	#nodelist=node.childNodes
133	#text = ""
134	#for n in nodelist:
135	# if n.nodeType == node.TEXT_NODE:
136	# text = text + n.data
137
138	return text
139
140	getText = getTextFromNode
141
142	def getPlaintext(text, length=0, wordwrap=False, ignoretags=[]):
143	"""Return plain text content by filtering out XML tags.
144
145	:param text: string or etree node
146	:param length: length of text to return (0=all)
147	:param wordwrap: try not to break the last word (may return shorter string)
148	:returns: text string
149	"""
150	if text is None:
151	return ''
152
153	try:
154	if isinstance(text, basestring):
155	xmltext = utf8ify("<div>%s</div>"%text)
156	dom = ET.fromstring(xmltext)
157	else:
158	dom = text
159
160	plaintext = ''
161	for elem in dom.iter():
162	if elem.tag in ignoretags:
163	# ignore tag
164	continue
165
166	if elem.text:
167	plaintext += elem.text
168	if elem.tail:
169	plaintext += elem.tail
170
171	if length > 0 and len(plaintext) > length:
172	break
173
174	text = plaintext
175
176	except Exception, e:
177	logging.warn("getPlaintext: error parsing text! Returning everything. %s"%e)
178
179	if length > 0 and len(text) > length:
180	# try to not break words
181	if wordwrap and text[length] not in [' ', '.', '?', '!']:
182	# search the last blank
183	length = text.rfind(' ', 0, length)
184
185	return text[:length] + '...'
186
187	return text
188
189
190	def serialize(node):
191	"""returns a string containing an XML snippet of (etree) node"""
192	s = ET.tostring(node, 'UTF-8')
193	# snip off XML declaration
194	if s.startswith('<?xml'):
195	i = s.find('?>')
196	return s[i+3:]
197
198	return s
199
200
201	def getMonthName(mon, lang):
202	"""returns the name of the month mon in the language lang"""
203	return map_months[lang.lower()][mon]
204
205
206	def getWeekdayName(day, lang, short=True):
207	"""returns the name of the weekday day in the language lang"""
208	return map_weekdays_short[lang.lower()][day]
209
210
211	def getDateString(date=None, lang='en', short=False, withYear=True, monthNames=True, abbrev=False):
212	"""Return formatted date string."""
213	if date is None:
214	return None
215
216	ds = None
217	if callable(date.day):
218	# callable members
219	day = date.day()
220	month = date.month()
221	year = date.year()
222	else:
223	# data members
224	day = date.day
225	month = date.month
226	year = date.year
227
228	if lang.lower() == 'en':
229	if short:
230	ds = "%s/%s/%s"%(year,month,day)
231	else:
232	ds = "%s %s"%(getMonthName(month, lang), day)
233	if withYear:
234	ds += ", %s"%year
235
236	elif lang.lower() == 'de':
237	if short:
238	ds = "%s.%s.%s"%(day,month,year)
239	else:
240	ds = "%s. %s"%(day, getMonthName(month, lang))
241	if withYear:
242	ds += " %s"%year
243
244	elif lang.lower() == 'iso':
245	ds = date.isoformat()
246
247	return ds
248
249
250	def getDate(date):
251	"""return date object from date or datetime date."""
252	if isinstance(date, datetime.datetime):
253	# strip time
254	return date.date()
255
256	return date
257
258
259	def getDatetime(date):
260	"""return datetime object from date or datetime date."""
261	if isinstance(date, datetime.date):
262	# add time 0:00
263	return datetime.datetime.combine(date, datetime.time())
264
265	return date
266
267
268	def getHttpData(url, data=None, num_tries=3, timeout=10, username=None, password=None, cache=None, insecure=False, noExceptions=False):
269	"""returns result from url+data HTTP request"""
270	# we do GET (by appending data to url)
271	if isinstance(data, str) or isinstance(data, unicode):
272	# if data is string then append
273	url = "%s?%s"%(url,data)
274	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
275	# urlencode
276	url = "%s?%s"%(url,urllib.urlencode(data))
277
278	errmsg = None
279	if httplib == 'httplib2':
280	# use httplib2
281	for cnt in range(num_tries):
282	try:
283	logging.debug("getHttp(lib2)Data(#%s %ss) url=%s"%(cnt+1,timeout,url))
284	# I would prefer at least disable_ssl_certificate_validation=insecure
285	# but python < 2.7.9 doesn't do SNI :-(
286	h = httplib2.Http(cache=cache, timeout=float(timeout), disable_ssl_certificate_validation=True)
287	if username:
288	h.add_credentials(username, password)
289
290	resp, data = h.request(url)
291	return data
292
293	except httplib2.HttpLib2Error, e:
294	logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(getattr(e, 'code','?'),e))
295	errmsg = str(e)
296	# stop trying
297	break
298
299	else:
300	# use urllib2
301	response = None
302	for cnt in range(num_tries):
303	try:
304	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
305	if sys.version_info < (2, 6):
306	# set timeout on socket -- ugly :-(
307	import socket
308	socket.setdefaulttimeout(float(timeout))
309	response = urllib2.urlopen(url)
310	else:
311	# timeout as parameter
312	response = urllib2.urlopen(url,timeout=float(timeout))
313	# check result?
314	data = response.read()
315	response.close()
316	return data
317
318	except urllib2.HTTPError, e:
319	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
320	errmsg = str(e)
321	# stop trying
322	break
323	except urllib2.URLError, e:
324	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
325	errmsg = str(e)
326	# stop trying
327	#break
328
329	if noExceptions:
330	return None
331
332	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
333	#return None
334
335
336	def refreshingImageFileIndexHtml(self, REQUEST, RESPONSE):
337	"""index_html method for App.ImageFile that updates the file info for each request."""
338	stat_info = os.stat(self.path)
339	self.size = stat_info[stat.ST_SIZE]
340	self.lmt = float(stat_info[stat.ST_MTIME]) or time.time()
341	self.lmh = rfc1123_date(self.lmt)
342	# call original method
343	return ImageFile.index_html(self, REQUEST, RESPONSE)
344
345
346	def shortenString(s, l, ellipsis='...'):
347	"""returns a string of length l (or l-1) by omitting characters in the middle of s, replacing with ellipsis."""
348	if len(s) <= l:
349	return s
350
351	l1 = int((l - len(ellipsis)) / 2)
352	return "%s%s%s"%(s[:l1],ellipsis,s[-l1:])
353
354
355	def sqlName(s, lc=True, more=''):
356	"""returns restricted ASCII-only version of string"""
357	if s is None:
358	return ""
359
360	if not isinstance(s, basestring):
361	# make string object
362	s = str(s)
363
364	# remove '
365	s = s.replace("'","")
366	# all else -> "_"
367	s = re.sub('[^A-Za-z0-9_'+more+']','_',s)
368	if lc:
369	return s.lower()
370
371	return s
372
373
374	def sslifyUrl(url, app=None, force=False):
375	"""returns URL with http or https scheme.
376
377	Looks at app.REQUEST.URL to find the scheme of the current page.
378	Changes only schemeless (starting with //) URLs unless force=True.
379	"""
380	thatUrl = urlparse(url)
381	if hasattr(app, 'REQUEST'):
382	# get current page URL
383	thisUrl = urlparse(app.REQUEST['URL'])
384	if thatUrl.scheme == '':
385	# schemeless URL -> use this scheme
386	return "%s:%s"%(thisUrl.scheme, url)
387	elif force:
388	# use this scheme
389	if thisUrl.scheme != thatUrl.scheme:
390	return urlunparse((thisUrl.scheme,)+thatUrl[1:])
391	else:
392	# keep scheme
393	return url
394
395	else:
396	# keep scheme
397	return url
398
399	else:
400	# no current page URL
401	if force:
402	# use https for force
403	return urlunparse(('https',)+thatUrl[1:])
404
405	return url

Note: See TracBrowser for help on using the repository browser.

Download in other formats: