"""Utility methods for handling XML, reading HTTP, etc"""
from App.ImageFile import ImageFile
from App.Common import rfc1123_date
import sys
import os
import stat
import urllib
from urlparse import urlparse, urlunparse
import logging
import time
import re
import datetime
try:
import httplib2
httplib = 'httplib2'
except:
logging.warn("Unable to import httplib2! Falling back to urllib2!")
import urllib2
httplib = 'urllib2'
import xml.etree.ElementTree as ET
srvTxtUtilsVersion = "1.13.1"
map_months = {'en': [u"",
u"January",
u"February",
u"March",
u"April",
u"May",
u"June",
u"July",
u"August",
u"September",
u"October",
u"November",
u"December"],
'de': [u"",
u"Januar",
u"Februar",
u"M\u00e4rz",
u"April",
u"Mai",
u"Juni",
u"Juli",
u"August",
u"September",
u"Oktober",
u"November",
u"Dezember"]}
map_weekdays_short = {'en': [
u"Mo",
u"Tu",
u"We",
u"Th",
u"Fr",
u"Sa",
u"Su",
],
'de': [
u"Mo",
u"Di",
u"Mi",
u"Do",
u"Fr",
u"Sa",
u"So",
]}
def getInt(number, default=0):
"""returns always an int (0 in case of problems)"""
try:
return int(number)
except:
return int(default)
def getAt(array, idx, default=None):
"""returns element idx from array or default (in case of problems)"""
try:
return array[idx]
except:
return default
def unicodify(s):
"""decode str (utf-8 or latin-1 representation) into unicode object"""
if not s:
return u""
if isinstance(s, str):
try:
return s.decode('utf-8')
except:
return s.decode('latin-1')
elif isinstance(s, unicode):
return s
else:
return unicode(s)
def utf8ify(s):
"""encode unicode object or string into byte string in utf-8 representation.
assumes string objects to be utf-8"""
if not s:
return ""
if isinstance(s, unicode):
return s.encode('utf-8')
elif isinstance(s, str):
return s
else:
return str(s)
def getTextFromNode(node, recursive=False, length=0):
"""Return all text content of a (etree) node.
:param recursive: descend subnodes
:returns: text string
"""
if node is None:
return ''
# ElementTree:
text = node.text or ''
for e in node:
if recursive:
text += getText(e)
else:
text += e.text or ''
if e.tail:
text += e.tail
# 4Suite:
#nodelist=node.childNodes
#text = ""
#for n in nodelist:
# if n.nodeType == node.TEXT_NODE:
# text = text + n.data
return text
getText = getTextFromNode
def getPlaintext(text, length=0, wordwrap=False, ignoretags=[]):
"""Return plain text content by filtering out XML tags.
:param text: string or etree node
:param length: length of text to return (0=all)
:param wordwrap: try not to break the last word (may return shorter string)
:returns: text string
"""
if text is None:
return ''
try:
if isinstance(text, basestring):
xmltext = utf8ify("
%s
"%text)
dom = ET.fromstring(xmltext)
else:
dom = text
plaintext = ''
for elem in dom.iter():
if elem.tag in ignoretags:
# ignore tag
continue
if elem.text:
plaintext += elem.text
if elem.tail:
plaintext += elem.tail
if length > 0 and len(plaintext) > length:
break
text = plaintext
except Exception, e:
logging.warn("getPlaintext: error parsing text! Returning everything. %s"%e)
if length > 0 and len(text) > length:
# try to not break words
if wordwrap and text[length] not in [' ', '.', '?', '!']:
# search the last blank
length = text.rfind(' ', 0, length)
return text[:length] + '...'
return text
def serialize(node):
"""returns a string containing an XML snippet of (etree) node"""
s = ET.tostring(node, 'UTF-8')
# snip off XML declaration
if s.startswith('')
return s[i+3:]
return s
def getMonthName(mon, lang):
"""returns the name of the month mon in the language lang"""
return map_months[lang.lower()][mon]
def getWeekdayName(day, lang, short=True):
"""returns the name of the weekday day in the language lang"""
return map_weekdays_short[lang.lower()][day]
def getDateString(date=None, lang='en', short=False, withYear=True, monthNames=True, abbrev=False):
"""Return formatted date string."""
if date is None:
return None
ds = None
if callable(date.day):
# callable members
day = date.day()
month = date.month()
year = date.year()
else:
# data members
day = date.day
month = date.month
year = date.year
if lang.lower() == 'en':
if short:
ds = "%s/%s/%s"%(year,month,day)
else:
ds = "%s %s"%(getMonthName(month, lang), day)
if withYear:
ds += ", %s"%year
elif lang.lower() == 'de':
if short:
ds = "%s.%s.%s"%(day,month,year)
else:
ds = "%s. %s"%(day, getMonthName(month, lang))
if withYear:
ds += " %s"%year
elif lang.lower() == 'iso':
ds = date.isoformat()
return ds
def getDate(date):
"""return date object from date or datetime date."""
if isinstance(date, datetime.datetime):
# strip time
return date.date()
return date
def getDatetime(date):
"""return datetime object from date or datetime date."""
if isinstance(date, datetime.date):
# add time 0:00
return datetime.datetime.combine(date, datetime.time())
return date
def getHttpData(url, data=None, num_tries=3, timeout=10, username=None, password=None, cache=None, insecure=False, noExceptions=False):
"""returns result from url+data HTTP request"""
# we do GET (by appending data to url)
if isinstance(data, str) or isinstance(data, unicode):
# if data is string then append
url = "%s?%s"%(url,data)
elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
# urlencode
url = "%s?%s"%(url,urllib.urlencode(data))
errmsg = None
if httplib == 'httplib2':
# use httplib2
for cnt in range(num_tries):
try:
logging.debug("getHttp(lib2)Data(#%s %ss) url=%s"%(cnt+1,timeout,url))
# I would prefer at least disable_ssl_certificate_validation=insecure
# but python < 2.7.9 doesn't do SNI :-(
h = httplib2.Http(cache=cache, timeout=float(timeout), disable_ssl_certificate_validation=True)
if username:
h.add_credentials(username, password)
resp, data = h.request(url)
return data
except httplib2.HttpLib2Error, e:
logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(getattr(e, 'code','?'),e))
errmsg = str(e)
# stop trying
break
else:
# use urllib2
response = None
for cnt in range(num_tries):
try:
logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
if sys.version_info < (2, 6):
# set timeout on socket -- ugly :-(
import socket
socket.setdefaulttimeout(float(timeout))
response = urllib2.urlopen(url)
else:
# timeout as parameter
response = urllib2.urlopen(url,timeout=float(timeout))
# check result?
data = response.read()
response.close()
return data
except urllib2.HTTPError, e:
logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
errmsg = str(e)
# stop trying
break
except urllib2.URLError, e:
logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
errmsg = str(e)
# stop trying
#break
if noExceptions:
return None
raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
#return None
def refreshingImageFileIndexHtml(self, REQUEST, RESPONSE):
"""index_html method for App.ImageFile that updates the file info for each request."""
stat_info = os.stat(self.path)
self.size = stat_info[stat.ST_SIZE]
self.lmt = float(stat_info[stat.ST_MTIME]) or time.time()
self.lmh = rfc1123_date(self.lmt)
# call original method
return ImageFile.index_html(self, REQUEST, RESPONSE)
def shortenString(s, l, ellipsis='...'):
"""returns a string of length l (or l-1) by omitting characters in the middle of s, replacing with ellipsis."""
if len(s) <= l:
return s
l1 = int((l - len(ellipsis)) / 2)
return "%s%s%s"%(s[:l1],ellipsis,s[-l1:])
def sqlName(s, lc=True, more=''):
"""returns restricted ASCII-only version of string"""
if s is None:
return ""
if not isinstance(s, basestring):
# make string object
s = str(s)
# remove '
s = s.replace("'","")
# all else -> "_"
s = re.sub('[^A-Za-z0-9_'+more+']','_',s)
if lc:
return s.lower()
return s
def sslifyUrl(url, app=None, force=False):
"""returns URL with http or https scheme.
Looks at app.REQUEST.URL to find the scheme of the current page.
Changes only schemeless (starting with //) URLs unless force=True.
"""
thatUrl = urlparse(url)
if hasattr(app, 'REQUEST'):
# get current page URL
thisUrl = urlparse(app.REQUEST['URL'])
if thatUrl.scheme == '':
# schemeless URL -> use this scheme
return "%s:%s"%(thisUrl.scheme, url)
elif force:
# use this scheme
if thisUrl.scheme != thatUrl.scheme:
return urlunparse((thisUrl.scheme,)+thatUrl[1:])
else:
# keep scheme
return url
else:
# keep scheme
return url
else:
# no current page URL
if force:
# use https for force
return urlunparse(('https',)+thatUrl[1:])
return url