"""Utility methods for handling XML, reading HTTP, etc""" from App.ImageFile import ImageFile from App.Common import rfc1123_date import sys import os import stat import urllib from urlparse import urlparse, urlunparse import logging import time import re import datetime try: import httplib2 httplib = 'httplib2' except: logging.warn("Unable to import httplib2! Falling back to urllib2!") import urllib2 httplib = 'urllib2' import xml.etree.ElementTree as ET srvTxtUtilsVersion = "1.13.1" map_months = {'en': [u"", u"January", u"February", u"March", u"April", u"May", u"June", u"July", u"August", u"September", u"October", u"November", u"December"], 'de': [u"", u"Januar", u"Februar", u"M\u00e4rz", u"April", u"Mai", u"Juni", u"Juli", u"August", u"September", u"Oktober", u"November", u"Dezember"]} map_weekdays_short = {'en': [ u"Mo", u"Tu", u"We", u"Th", u"Fr", u"Sa", u"Su", ], 'de': [ u"Mo", u"Di", u"Mi", u"Do", u"Fr", u"Sa", u"So", ]} def getInt(number, default=0): """returns always an int (0 in case of problems)""" try: return int(number) except: return int(default) def getAt(array, idx, default=None): """returns element idx from array or default (in case of problems)""" try: return array[idx] except: return default def unicodify(s): """decode str (utf-8 or latin-1 representation) into unicode object""" if not s: return u"" if isinstance(s, str): try: return s.decode('utf-8') except: return s.decode('latin-1') elif isinstance(s, unicode): return s else: return unicode(s) def utf8ify(s): """encode unicode object or string into byte string in utf-8 representation. assumes string objects to be utf-8""" if not s: return "" if isinstance(s, unicode): return s.encode('utf-8') elif isinstance(s, str): return s else: return str(s) def getTextFromNode(node, recursive=False, length=0): """Return all text content of a (etree) node. :param recursive: descend subnodes :returns: text string """ if node is None: return '' # ElementTree: text = node.text or '' for e in node: if recursive: text += getText(e) else: text += e.text or '' if e.tail: text += e.tail # 4Suite: #nodelist=node.childNodes #text = "" #for n in nodelist: # if n.nodeType == node.TEXT_NODE: # text = text + n.data return text getText = getTextFromNode def getPlaintext(text, length=0, wordwrap=False, ignoretags=[]): """Return plain text content by filtering out XML tags. :param text: string or etree node :param length: length of text to return (0=all) :param wordwrap: try not to break the last word (may return shorter string) :returns: text string """ if text is None: return '' try: if isinstance(text, basestring): xmltext = utf8ify("
%s
"%text) dom = ET.fromstring(xmltext) else: dom = text plaintext = '' for elem in dom.iter(): if elem.tag in ignoretags: # ignore tag continue if elem.text: plaintext += elem.text if elem.tail: plaintext += elem.tail if length > 0 and len(plaintext) > length: break text = plaintext except Exception, e: logging.warn("getPlaintext: error parsing text! Returning everything. %s"%e) if length > 0 and len(text) > length: # try to not break words if wordwrap and text[length] not in [' ', '.', '?', '!']: # search the last blank length = text.rfind(' ', 0, length) return text[:length] + '...' return text def serialize(node): """returns a string containing an XML snippet of (etree) node""" s = ET.tostring(node, 'UTF-8') # snip off XML declaration if s.startswith('') return s[i+3:] return s def getMonthName(mon, lang): """returns the name of the month mon in the language lang""" return map_months[lang.lower()][mon] def getWeekdayName(day, lang, short=True): """returns the name of the weekday day in the language lang""" return map_weekdays_short[lang.lower()][day] def getDateString(date=None, lang='en', short=False, withYear=True, monthNames=True, abbrev=False): """Return formatted date string.""" if date is None: return None ds = None if callable(date.day): # callable members day = date.day() month = date.month() year = date.year() else: # data members day = date.day month = date.month year = date.year if lang.lower() == 'en': if short: ds = "%s/%s/%s"%(year,month,day) else: ds = "%s %s"%(getMonthName(month, lang), day) if withYear: ds += ", %s"%year elif lang.lower() == 'de': if short: ds = "%s.%s.%s"%(day,month,year) else: ds = "%s. %s"%(day, getMonthName(month, lang)) if withYear: ds += " %s"%year elif lang.lower() == 'iso': ds = date.isoformat() return ds def getDate(date): """return date object from date or datetime date.""" if isinstance(date, datetime.datetime): # strip time return date.date() return date def getDatetime(date): """return datetime object from date or datetime date.""" if isinstance(date, datetime.date): # add time 0:00 return datetime.datetime.combine(date, datetime.time()) return date def getHttpData(url, data=None, num_tries=3, timeout=10, username=None, password=None, cache=None, insecure=False, noExceptions=False): """returns result from url+data HTTP request""" # we do GET (by appending data to url) if isinstance(data, str) or isinstance(data, unicode): # if data is string then append url = "%s?%s"%(url,data) elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple): # urlencode url = "%s?%s"%(url,urllib.urlencode(data)) errmsg = None if httplib == 'httplib2': # use httplib2 for cnt in range(num_tries): try: logging.debug("getHttp(lib2)Data(#%s %ss) url=%s"%(cnt+1,timeout,url)) # I would prefer at least disable_ssl_certificate_validation=insecure # but python < 2.7.9 doesn't do SNI :-( h = httplib2.Http(cache=cache, timeout=float(timeout), disable_ssl_certificate_validation=True) if username: h.add_credentials(username, password) resp, data = h.request(url) return data except httplib2.HttpLib2Error, e: logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(getattr(e, 'code','?'),e)) errmsg = str(e) # stop trying break else: # use urllib2 response = None for cnt in range(num_tries): try: logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) if sys.version_info < (2, 6): # set timeout on socket -- ugly :-( import socket socket.setdefaulttimeout(float(timeout)) response = urllib2.urlopen(url) else: # timeout as parameter response = urllib2.urlopen(url,timeout=float(timeout)) # check result? data = response.read() response.close() return data except urllib2.HTTPError, e: logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) errmsg = str(e) # stop trying break except urllib2.URLError, e: logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) errmsg = str(e) # stop trying #break if noExceptions: return None raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) #return None def refreshingImageFileIndexHtml(self, REQUEST, RESPONSE): """index_html method for App.ImageFile that updates the file info for each request.""" stat_info = os.stat(self.path) self.size = stat_info[stat.ST_SIZE] self.lmt = float(stat_info[stat.ST_MTIME]) or time.time() self.lmh = rfc1123_date(self.lmt) # call original method return ImageFile.index_html(self, REQUEST, RESPONSE) def shortenString(s, l, ellipsis='...'): """returns a string of length l (or l-1) by omitting characters in the middle of s, replacing with ellipsis.""" if len(s) <= l: return s l1 = int((l - len(ellipsis)) / 2) return "%s%s%s"%(s[:l1],ellipsis,s[-l1:]) def sqlName(s, lc=True, more=''): """returns restricted ASCII-only version of string""" if s is None: return "" if not isinstance(s, basestring): # make string object s = str(s) # remove ' s = s.replace("'","") # all else -> "_" s = re.sub('[^A-Za-z0-9_'+more+']','_',s) if lc: return s.lower() return s def sslifyUrl(url, app=None, force=False): """returns URL with http or https scheme. Looks at app.REQUEST.URL to find the scheme of the current page. Changes only schemeless (starting with //) URLs unless force=True. """ thatUrl = urlparse(url) if hasattr(app, 'REQUEST'): # get current page URL thisUrl = urlparse(app.REQUEST['URL']) if thatUrl.scheme == '': # schemeless URL -> use this scheme return "%s:%s"%(thisUrl.scheme, url) elif force: # use this scheme if thisUrl.scheme != thatUrl.scheme: return urlunparse((thisUrl.scheme,)+thatUrl[1:]) else: # keep scheme return url else: # keep scheme return url else: # no current page URL if force: # use https for force return urlunparse(('https',)+thatUrl[1:]) return url