Mercurial > hg > documentViewer

--- a/MpdlXmlTextServer.py	Thu May 16 18:04:00 2013 +0200
+++ b/MpdlXmlTextServer.py	Thu Oct 17 16:25:39 2013 +0200
@@ -9,17 +9,7 @@
 import urlparse
 import base64

-from SrvTxtUtils import getInt, getText, getHttpData
-
-def serialize(node):
-    """returns a string containing an XML snippet of node"""
-    s = ET.tostring(node, 'UTF-8')
-    # snip off XML declaration
-    if s.startswith('<?xml'):
-        i = s.find('?>')
-        return s[i+3:]
-
-    return s
+from SrvTxtUtils import getInt, getText, getHttpData, serialize


 class MpdlXmlTextServer(SimpleItem):
--- a/MpiwgXmlTextServer.py	Thu May 16 18:04:00 2013 +0200
+++ b/MpiwgXmlTextServer.py	Thu Oct 17 16:25:39 2013 +0200
@@ -11,7 +11,7 @@

 from datetime import datetime

-from SrvTxtUtils import getInt, getText, getHttpData
+from SrvTxtUtils import getInt, getText, getHttpData, serialize

 # mapping of fields in the output of /mpiwg-mpdl-cms-web/query/GetDocInfo to documentViewer docinfo
 textinfoFieldMap = {
@@ -23,16 +23,6 @@
                     'countTocEntries' : 'numTocEntries'
                     }

-def serialize(node):
-    """returns a string containing an XML snippet of node"""
-    s = ET.tostring(node, 'UTF-8')
-    # snip off XML declaration
-    if s.startswith('<?xml'):
-        i = s.find('?>')
-        return s[i+3:]
-
-    return s
-

 class MpiwgXmlTextServer(SimpleItem):
     """TextServer implementation for MPIWG-XML server"""
--- a/SrvTxtUtils.py	Thu May 16 18:04:00 2013 +0200
+++ b/SrvTxtUtils.py	Thu Oct 17 16:25:39 2013 +0200
@@ -7,11 +7,49 @@
 import os
 import stat
 import urllib
-import urllib2
 import logging
+import time
+import re
+import string
+import datetime
+try:
+    import httplib2
+    httplib = 'httplib2'
+except:
+    logging.warn("Unable to import httplib2! Falling back to urllib2!")
+    import urllib2
+    httplib = 'urllib2'
+
+import xml.etree.ElementTree as ET
+
+srvTxtUtilsVersion = "1.12.1"

-
-srvTxtUtilsVersion = "1.6"
+map_months = {'en': [u"",
+               u"January",
+               u"February",
+               u"March",
+               u"April",
+               u"May",
+               u"June",
+               u"July",
+               u"August",
+               u"September",
+               u"October",
+               u"November",
+               u"December"],
+              'de': [u"",
+               u"Januar",
+               u"Februar",
+               u"M\u00e4rz",
+               u"April",
+               u"Mai",
+               u"Juni",
+               u"Juli",
+               u"August",
+               u"September",
+               u"Oktober",
+               u"November",
+               u"Dezember"]}

 def getInt(number, default=0):
     """returns always an int (0 in case of problems)"""
@@ -37,20 +75,20 @@
         except:
             return s.decode('latin-1')
     else:
-        return unicode(s)
+        return s

 def utf8ify(s):
     """encode unicode object or string into byte string in utf-8 representation.
        assumes string objects to be utf-8"""
     if not s:
         return ""
-    if isinstance(s, unicode):
+    if isinstance(s, str):
+        return s
+    else:
         return s.encode('utf-8')
-    else:
-        return str(s)

 def getText(node, recursive=0):
-    """returns all text content of a node and its subnodes"""
+    """returns all text content of a (etree) node and its subnodes"""
     if node is None:
         return ''

@@ -74,8 +112,74 @@
     return text


+def serialize(node):
+    """returns a string containing an XML snippet of (etree) node"""
+    s = ET.tostring(node, 'UTF-8')
+    # snip off XML declaration
+    if s.startswith('<?xml'):
+        i = s.find('?>')
+        return s[i+3:]

-def getHttpData(url, data=None, num_tries=3, timeout=10, noExceptions=False):
+    return s
+
+
+def getMonthName(mon, lang):
+    """returns the name of the month mon in the language lang"""
+    return map_months[lang][mon]
+
+
+def getDateString(date=None, lang='en', withYear=True, monthNames=True, abbrev=False):
+    """Return formatted date string."""
+    if date is None:
+        return None
+
+    ds = None
+    if callable(date.day):
+        # callable members
+        day = date.day()
+        month = date.month()
+        year = date.year()
+    else:
+        # data members
+        day = date.day
+        month = date.month
+        year = date.year
+
+    if lang.lower() == 'en':
+        ds = "%s %s"%(getMonthName(month, lang), day)
+        if withYear:
+            ds += ", %s"%year
+
+    elif lang.lower() == 'de':
+        ds = "%s. %s"%(day, getMonthName(month, lang))
+        if withYear:
+            ds += " %s"%year
+
+    elif lang.lower() == 'iso':
+        ds = date.isoformat()
+
+    return ds
+
+
+def getDate(date):
+    """return date object from date or datetime date."""
+    if isinstance(date, datetime.datetime):
+        # strip time
+        return date.date()
+
+    return date
+
+
+def getDatetime(date):
+    """return datetime object from date or datetime date."""
+    if isinstance(date, datetime.date):
+        # add time 0:00
+        return datetime.datetime.combine(date, datetime.time())
+
+    return date
+
+
+def getHttpData(url, data=None, num_tries=3, timeout=10, username=None, password=None, cache=None, insecure=False, noExceptions=False):
     """returns result from url+data HTTP request"""
     # we do GET (by appending data to url)
     if isinstance(data, str) or isinstance(data, unicode):
@@ -84,37 +188,55 @@
     elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
         # urlencode
         url = "%s?%s"%(url,urllib.urlencode(data))
-
-    response = None
+
     errmsg = None
-    for cnt in range(num_tries):
-        try:
-            logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
-            if sys.version_info < (2, 6):
-                # set timeout on socket -- ugly :-(
-                import socket
-                socket.setdefaulttimeout(float(timeout))
-                response = urllib2.urlopen(url)
-            else:
-                # timeout as parameter
-                response = urllib2.urlopen(url,timeout=float(timeout))
-            # check result?
-            break
-        except urllib2.HTTPError, e:
-            logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
-            errmsg = str(e)
-            # stop trying
-            break
-        except urllib2.URLError, e:
-            logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
-            errmsg = str(e)
-            # stop trying
-            #break
+    if httplib == 'httplib2':
+        # use httplib2
+        for cnt in range(num_tries):
+            try:
+                logging.debug("getHttp(lib2)Data(#%s %ss) url=%s"%(cnt+1,timeout,url))
+                h = httplib2.Http(cache=cache, timeout=float(timeout), disable_ssl_certificate_validation=insecure)
+                if username:
+                    h.add_credentials(username, password)
+
+                resp, data = h.request(url)
+                return data
+
+            except httplib2.HttpLib2Error, e:
+                logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(e.code,e))
+                errmsg = str(e)
+                # stop trying
+                break
+
+    else:
+        # use urllib2
+        response = None
+        for cnt in range(num_tries):
+            try:
+                logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
+                if sys.version_info < (2, 6):
+                    # set timeout on socket -- ugly :-(
+                    import socket
+                    socket.setdefaulttimeout(float(timeout))
+                    response = urllib2.urlopen(url)
+                else:
+                    # timeout as parameter
+                    response = urllib2.urlopen(url,timeout=float(timeout))
+                # check result?
+                data = response.read()
+                response.close()
+                return data

-    if response is not None:
-        data = response.read()
-        response.close()
-        return data
+            except urllib2.HTTPError, e:
+                logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
+                errmsg = str(e)
+                # stop trying
+                break
+            except urllib2.URLError, e:
+                logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
+                errmsg = str(e)
+                # stop trying
+                #break

     if noExceptions:
         return None
@@ -133,8 +255,36 @@
     return ImageFile.index_html(self, REQUEST, RESPONSE)


+def shortenString(s, l, ellipsis='...'):
+    """returns a string of length l (or l-1) by omitting characters in the middle of s, replacing with ellipsis."""
+    if len(s) <= l:
+        return s
+
+    l1 = int((l - len(ellipsis)) / 2)
+    return "%s%s%s"%(s[:l1],ellipsis,s[-l1:])
+
+
+def sqlName(s, lc=True, more=''):
+    """returns restricted ASCII-only version of string"""
+    if s is None:
+        return ""
+
+    if not isinstance(s, basestring):
+        # make string object
+        s = str(s)
+
+    # remove '
+    s = s.replace("'","")
+    # all else -> "_"
+    s = re.sub('[^A-Za-z0-9_'+more+']','_',s)
+    if lc:
+        return s.lower()
+
+    return s
+
+
 def getBrowserType(self):
-    """check the browsers request to find out the browser type"""
+    """(legacy) check the browsers request to find out the browser type"""
     bt = {}
     ua = self.REQUEST.get_header("HTTP_USER_AGENT")
     bt['ua'] = ua
@@ -160,4 +310,3 @@

     return bt

-
--- a/documentViewer.py	Thu May 16 18:04:00 2013 +0200
+++ b/documentViewer.py	Thu Oct 17 16:25:39 2013 +0200
@@ -4,34 +4,20 @@
 from App.ImageFile import ImageFile
 from AccessControl import ClassSecurityInfo
 from AccessControl import getSecurityManager
-from Globals import package_home

 import xml.etree.ElementTree as ET

 import os
-import sys
 import urllib
 import logging
 import math
 import urlparse
-import re
-import string
 import json

 from Products.MetaDataProvider import MetaDataFolder

 from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml

-def serializeNode(node, encoding="utf-8"):
-    """returns a string containing node as XML"""
-    s = ET.tostring(node)
-
-    # 4Suite:
-    #    stream = cStringIO.StringIO()
-    #    Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
-    #    s = stream.getvalue()
-    #    stream.close()
-    return s

 def getMDText(node):
     """returns the @text content from the MetaDataProvider metadata node"""
@@ -993,7 +979,23 @@


     def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0):
-        """returns dict with array of page information for one screenfull of thumbnails"""
+        """Return dict with array of page information for one screenfull of thumbnails.
+
+        :param start: index of current page
+        :param rows: number of rows in one batch
+        :param cols: number of columns in one batch
+        :param pageFlowLtr: do indexes increase from left to right
+        :param pageZero: is there a zeroth non-visible page
+        :param minIdx: minimum index to use
+        :param maxIdx: maximum index to use
+        :returns: dict with
+            first: first page index
+            last: last page index
+            batches: list of all possible batches(dict: 'start': index, 'end': index)
+            pages: list for current batch of rows(list of cols(list of pages(dict: 'idx': index)))
+            nextStart: first index of next batch
+            prevStart: first index of previous batch
+        """
         logging.debug("getPageBatch start=%s minIdx=%s maxIdx=%s"%(start,minIdx,maxIdx))
         batch = {}
         grpsize = rows * cols
@@ -1001,7 +1003,12 @@
             maxIdx = start + grpsize

         np = maxIdx - minIdx + 1
+        if pageZero:
+            # correct number of pages for batching
+            np += 1
+
         nb = int(math.ceil(np / float(grpsize)))
+
         # list of all batch start and end points
         batches = []
         if pageZero:
@@ -1016,6 +1023,7 @@

         batch['batches'] = batches

+        # list of pages for current screen
         pages = []
         if pageZero and start == minIdx:
             # correct beginning
@@ -1045,15 +1053,21 @@
             batch['prevStart'] = None

         if start + grpsize <= maxIdx:
-            batch['nextStart'] = start + grpsize
+            if pageZero and start == minIdx:
+                # correct nextStart for pageZero
+                batch['nextStart'] = grpsize
+            else:
+                batch['nextStart'] = start + grpsize
         else:
             batch['nextStart'] = None

         batch['pages'] = pages
         batch['first'] = minIdx
         batch['last'] = maxIdx
+        logging.debug("batch: %s"%repr(batch))
         return batch

+
     def getBatch(self, start=1, size=10, end=0, data=None, fullData=True):
         """returns dict with information for one screenfull of data."""
         batch = {}