Mercurial > hg > documentViewer
changeset 613:c57d80a649ea
CLOSED - # 281: List of thumbnails verschluckt Seite, wenn odd-scan-position gesetzt ist
https://it-dev.mpiwg-berlin.mpg.de/tracs/mpdl-project-software/ticket/281
author | casties |
---|---|
date | Thu, 17 Oct 2013 16:25:39 +0200 |
parents | a79e4e4b3e37 |
children | d16da6e739ef |
files | MpdlXmlTextServer.py MpiwgXmlTextServer.py SrvTxtUtils.py documentViewer.py |
diffstat | 4 files changed, 221 insertions(+), 78 deletions(-) [+] |
line wrap: on
line diff
--- a/MpdlXmlTextServer.py Thu May 16 18:04:00 2013 +0200 +++ b/MpdlXmlTextServer.py Thu Oct 17 16:25:39 2013 +0200 @@ -9,17 +9,7 @@ import urlparse import base64 -from SrvTxtUtils import getInt, getText, getHttpData - -def serialize(node): - """returns a string containing an XML snippet of node""" - s = ET.tostring(node, 'UTF-8') - # snip off XML declaration - if s.startswith('<?xml'): - i = s.find('?>') - return s[i+3:] - - return s +from SrvTxtUtils import getInt, getText, getHttpData, serialize class MpdlXmlTextServer(SimpleItem):
--- a/MpiwgXmlTextServer.py Thu May 16 18:04:00 2013 +0200 +++ b/MpiwgXmlTextServer.py Thu Oct 17 16:25:39 2013 +0200 @@ -11,7 +11,7 @@ from datetime import datetime -from SrvTxtUtils import getInt, getText, getHttpData +from SrvTxtUtils import getInt, getText, getHttpData, serialize # mapping of fields in the output of /mpiwg-mpdl-cms-web/query/GetDocInfo to documentViewer docinfo textinfoFieldMap = { @@ -23,16 +23,6 @@ 'countTocEntries' : 'numTocEntries' } -def serialize(node): - """returns a string containing an XML snippet of node""" - s = ET.tostring(node, 'UTF-8') - # snip off XML declaration - if s.startswith('<?xml'): - i = s.find('?>') - return s[i+3:] - - return s - class MpiwgXmlTextServer(SimpleItem): """TextServer implementation for MPIWG-XML server"""
--- a/SrvTxtUtils.py Thu May 16 18:04:00 2013 +0200 +++ b/SrvTxtUtils.py Thu Oct 17 16:25:39 2013 +0200 @@ -7,11 +7,49 @@ import os import stat import urllib -import urllib2 import logging +import time +import re +import string +import datetime +try: + import httplib2 + httplib = 'httplib2' +except: + logging.warn("Unable to import httplib2! Falling back to urllib2!") + import urllib2 + httplib = 'urllib2' + +import xml.etree.ElementTree as ET + +srvTxtUtilsVersion = "1.12.1" - -srvTxtUtilsVersion = "1.6" +map_months = {'en': [u"", + u"January", + u"February", + u"March", + u"April", + u"May", + u"June", + u"July", + u"August", + u"September", + u"October", + u"November", + u"December"], + 'de': [u"", + u"Januar", + u"Februar", + u"M\u00e4rz", + u"April", + u"Mai", + u"Juni", + u"Juli", + u"August", + u"September", + u"Oktober", + u"November", + u"Dezember"]} def getInt(number, default=0): """returns always an int (0 in case of problems)""" @@ -37,20 +75,20 @@ except: return s.decode('latin-1') else: - return unicode(s) + return s def utf8ify(s): """encode unicode object or string into byte string in utf-8 representation. assumes string objects to be utf-8""" if not s: return "" - if isinstance(s, unicode): + if isinstance(s, str): + return s + else: return s.encode('utf-8') - else: - return str(s) def getText(node, recursive=0): - """returns all text content of a node and its subnodes""" + """returns all text content of a (etree) node and its subnodes""" if node is None: return '' @@ -74,8 +112,74 @@ return text +def serialize(node): + """returns a string containing an XML snippet of (etree) node""" + s = ET.tostring(node, 'UTF-8') + # snip off XML declaration + if s.startswith('<?xml'): + i = s.find('?>') + return s[i+3:] -def getHttpData(url, data=None, num_tries=3, timeout=10, noExceptions=False): + return s + + +def getMonthName(mon, lang): + """returns the name of the month mon in the language lang""" + return map_months[lang][mon] + + +def getDateString(date=None, lang='en', withYear=True, monthNames=True, abbrev=False): + """Return formatted date string.""" + if date is None: + return None + + ds = None + if callable(date.day): + # callable members + day = date.day() + month = date.month() + year = date.year() + else: + # data members + day = date.day + month = date.month + year = date.year + + if lang.lower() == 'en': + ds = "%s %s"%(getMonthName(month, lang), day) + if withYear: + ds += ", %s"%year + + elif lang.lower() == 'de': + ds = "%s. %s"%(day, getMonthName(month, lang)) + if withYear: + ds += " %s"%year + + elif lang.lower() == 'iso': + ds = date.isoformat() + + return ds + + +def getDate(date): + """return date object from date or datetime date.""" + if isinstance(date, datetime.datetime): + # strip time + return date.date() + + return date + + +def getDatetime(date): + """return datetime object from date or datetime date.""" + if isinstance(date, datetime.date): + # add time 0:00 + return datetime.datetime.combine(date, datetime.time()) + + return date + + +def getHttpData(url, data=None, num_tries=3, timeout=10, username=None, password=None, cache=None, insecure=False, noExceptions=False): """returns result from url+data HTTP request""" # we do GET (by appending data to url) if isinstance(data, str) or isinstance(data, unicode): @@ -84,37 +188,55 @@ elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple): # urlencode url = "%s?%s"%(url,urllib.urlencode(data)) - - response = None + errmsg = None - for cnt in range(num_tries): - try: - logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) - if sys.version_info < (2, 6): - # set timeout on socket -- ugly :-( - import socket - socket.setdefaulttimeout(float(timeout)) - response = urllib2.urlopen(url) - else: - # timeout as parameter - response = urllib2.urlopen(url,timeout=float(timeout)) - # check result? - break - except urllib2.HTTPError, e: - logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) - errmsg = str(e) - # stop trying - break - except urllib2.URLError, e: - logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) - errmsg = str(e) - # stop trying - #break + if httplib == 'httplib2': + # use httplib2 + for cnt in range(num_tries): + try: + logging.debug("getHttp(lib2)Data(#%s %ss) url=%s"%(cnt+1,timeout,url)) + h = httplib2.Http(cache=cache, timeout=float(timeout), disable_ssl_certificate_validation=insecure) + if username: + h.add_credentials(username, password) + + resp, data = h.request(url) + return data + + except httplib2.HttpLib2Error, e: + logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(e.code,e)) + errmsg = str(e) + # stop trying + break + + else: + # use urllib2 + response = None + for cnt in range(num_tries): + try: + logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) + if sys.version_info < (2, 6): + # set timeout on socket -- ugly :-( + import socket + socket.setdefaulttimeout(float(timeout)) + response = urllib2.urlopen(url) + else: + # timeout as parameter + response = urllib2.urlopen(url,timeout=float(timeout)) + # check result? + data = response.read() + response.close() + return data - if response is not None: - data = response.read() - response.close() - return data + except urllib2.HTTPError, e: + logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) + errmsg = str(e) + # stop trying + break + except urllib2.URLError, e: + logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) + errmsg = str(e) + # stop trying + #break if noExceptions: return None @@ -133,8 +255,36 @@ return ImageFile.index_html(self, REQUEST, RESPONSE) +def shortenString(s, l, ellipsis='...'): + """returns a string of length l (or l-1) by omitting characters in the middle of s, replacing with ellipsis.""" + if len(s) <= l: + return s + + l1 = int((l - len(ellipsis)) / 2) + return "%s%s%s"%(s[:l1],ellipsis,s[-l1:]) + + +def sqlName(s, lc=True, more=''): + """returns restricted ASCII-only version of string""" + if s is None: + return "" + + if not isinstance(s, basestring): + # make string object + s = str(s) + + # remove ' + s = s.replace("'","") + # all else -> "_" + s = re.sub('[^A-Za-z0-9_'+more+']','_',s) + if lc: + return s.lower() + + return s + + def getBrowserType(self): - """check the browsers request to find out the browser type""" + """(legacy) check the browsers request to find out the browser type""" bt = {} ua = self.REQUEST.get_header("HTTP_USER_AGENT") bt['ua'] = ua @@ -160,4 +310,3 @@ return bt -
--- a/documentViewer.py Thu May 16 18:04:00 2013 +0200 +++ b/documentViewer.py Thu Oct 17 16:25:39 2013 +0200 @@ -4,34 +4,20 @@ from App.ImageFile import ImageFile from AccessControl import ClassSecurityInfo from AccessControl import getSecurityManager -from Globals import package_home import xml.etree.ElementTree as ET import os -import sys import urllib import logging import math import urlparse -import re -import string import json from Products.MetaDataProvider import MetaDataFolder from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml -def serializeNode(node, encoding="utf-8"): - """returns a string containing node as XML""" - s = ET.tostring(node) - - # 4Suite: - # stream = cStringIO.StringIO() - # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) - # s = stream.getvalue() - # stream.close() - return s def getMDText(node): """returns the @text content from the MetaDataProvider metadata node""" @@ -993,7 +979,23 @@ def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0): - """returns dict with array of page information for one screenfull of thumbnails""" + """Return dict with array of page information for one screenfull of thumbnails. + + :param start: index of current page + :param rows: number of rows in one batch + :param cols: number of columns in one batch + :param pageFlowLtr: do indexes increase from left to right + :param pageZero: is there a zeroth non-visible page + :param minIdx: minimum index to use + :param maxIdx: maximum index to use + :returns: dict with + first: first page index + last: last page index + batches: list of all possible batches(dict: 'start': index, 'end': index) + pages: list for current batch of rows(list of cols(list of pages(dict: 'idx': index))) + nextStart: first index of next batch + prevStart: first index of previous batch + """ logging.debug("getPageBatch start=%s minIdx=%s maxIdx=%s"%(start,minIdx,maxIdx)) batch = {} grpsize = rows * cols @@ -1001,7 +1003,12 @@ maxIdx = start + grpsize np = maxIdx - minIdx + 1 + if pageZero: + # correct number of pages for batching + np += 1 + nb = int(math.ceil(np / float(grpsize))) + # list of all batch start and end points batches = [] if pageZero: @@ -1016,6 +1023,7 @@ batch['batches'] = batches + # list of pages for current screen pages = [] if pageZero and start == minIdx: # correct beginning @@ -1045,15 +1053,21 @@ batch['prevStart'] = None if start + grpsize <= maxIdx: - batch['nextStart'] = start + grpsize + if pageZero and start == minIdx: + # correct nextStart for pageZero + batch['nextStart'] = grpsize + else: + batch['nextStart'] = start + grpsize else: batch['nextStart'] = None batch['pages'] = pages batch['first'] = minIdx batch['last'] = maxIdx + logging.debug("batch: %s"%repr(batch)) return batch + def getBatch(self, start=1, size=10, end=0, data=None, fullData=True): """returns dict with information for one screenfull of data.""" batch = {}