changeset 41:9a23640f7f49

added getAllProjectsAndTagsAsCSV.
author casties
date Wed, 30 Oct 2013 17:43:03 +0100
parents 1eabc75b9180
children b1f5d42bb93b
files MPIWGThesaurus.py SrvTxtUtils.py zpt/manageMPIWGThesaurus.zpt
diffstat 3 files changed, 328 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/MPIWGThesaurus.py	Wed Oct 30 10:18:08 2013 +0100
+++ b/MPIWGThesaurus.py	Wed Oct 30 17:43:03 2013 +0100
@@ -17,6 +17,7 @@
 from AccessControl import ClassSecurityInfo
 
 #import os
+import re
 import os.path
 import logging
 import time
@@ -27,7 +28,7 @@
 import urllib2
 import urllib
 
-
+from SrvTxtUtils import utf8ify
 
 class projectHolder:
             projectHolder=""
@@ -318,17 +319,52 @@
             retarray.append(project.projectName)
             retarray.append(project.persons)
             
-            
-            
             for tagType in self.tags.keys():
                 tags = project.tags.get(tagType,[''])
                 retarray.append(";".join(tags))
             
-            
             ret+="\t".join(retarray)+"\n"
                 
         return ret
     
+
+    def getAllProjectsAndTagsAsCSV(self,archived=1,RESPONSE=None):
+        """alle projekte auch die nicht getaggten"""
+        retList=[]
+        headers=['projectId','sortingNumber','projectName','scholars','startedAt','completedAt','lastChangeThesaurusAt','lastChangeProjectAt','projectCreatedAt','persons','objects']
+        headers.extend(list(self.tags.keys()))
+        retList.append("\t".join(headers))
+        
+        projectTags = self.getProjectsAndTags()
+        projects = self.getMPIWGRoot().getProjectFolder().getProjectsAsList(archived=archived)
+        for proj in projects:
+            p_name = proj.getLabel()
+            retProj=[]
+            #if (not proj.isArchivedProject() and archived==1) or (proj.isArchivedProject() and archived==2):
+            retProj.append(utf8ify(proj.getId()))
+            retProj.append(utf8ify(proj.getContent('xdata_05')))
+            retProj.append(utf8ify(p_name))  
+            retProj.append(utf8ify(proj.getContent('xdata_01')))
+            retProj.append(utf8ify(proj.getStartedAt()))
+            retProj.append(utf8ify(proj.getCompletedAt()))
+            changeDate=self.lastChangeInThesaurus.get(proj.getId(),'') 
+            n = re.sub("[:\- ]","",str(changeDate))
+            retProj.append(n)
+            retProj.append(utf8ify(getattr(proj,'creationTime','20050101000000')))  
+            retProj.append("")#TODO: project created at   
+            retProj.append(";".join([person[1] for person in self.getPersonsFromProject(proj.getId())]))
+            #retProj.append(";".join([person[1] for person in self.thesaurus.getHistoricalPlacesFromProject(proj.getId())]))
+            retProj.append(";".join([person[1] for person in self.getObjectsFromProject(proj.getId())]))
+            retProj+=self.getTags(proj.getId(),projectTags)
+            retList.append("\t".join(retProj))
+        
+        if RESPONSE:            
+            RESPONSE.setHeader('Content-Disposition','attachment; filename="ProjectsAndTags.tsv"')
+            RESPONSE.setHeader('Content-Type', "application/octet-stream")
+      
+        return "\n".join(retList);
+    
+
     def getLogpath(self):
         return getattr(self,"logpath",self.logpathDefault) 
     
@@ -803,7 +839,7 @@
         retarray=[]    
         for tagType in self.tags.keys():
                 tags = project.tags.get(tagType,[''])
-                retarray.append(self.utf8ify(";".join(tags)))
+                retarray.append(utf8ify(";".join(tags)))
             
             
         return retarray       
@@ -1529,8 +1565,6 @@
         if tags is None:
             logging.debug("can't find Tags")
             return []
-            
-        
     
         tagsStored=tags.get(tagType,None)
         if tagsStored is None:
@@ -1552,7 +1586,8 @@
         return ret
     
     def getProjectDetails(self,projectId):
-        project=getattr(self.projects,projectId,None)
+        pf = self.getMPIWGRoot().getProjectFolder()
+        project=pf.get(projectId,None)
         if project is None:
             logging.debug("no project!! " +projectId)
             return None
@@ -1652,7 +1687,6 @@
             return
         
         opener2 = urllib2.build_opener()
-        
        
         personIDencoded=urllib.quote(personURI,safe="")
         graphEncoded=urllib.quote(self.additionalNamesGraphURL,safe="")
@@ -1667,8 +1701,6 @@
             logging.error(e.code)
             logging.error(e.read())
             return
-        
-                    
        
         return
     
@@ -1676,12 +1708,6 @@
     def storeChange(self,typeString,projectID,value):
         
         from time import gmtime,strftime
-      
-      
-              
-       
-        
-        
         fl = open(self.getLogpath(),"a")
         person = self.REQUEST.AUTHENTICATED_USER.getId()
         date = strftime("%Y-%m-%d %H:%M:%S", gmtime())
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SrvTxtUtils.py	Wed Oct 30 17:43:03 2013 +0100
@@ -0,0 +1,284 @@
+"""Utility methods for handling XML, reading HTTP, etc"""
+
+from App.ImageFile import ImageFile
+from App.Common import rfc1123_date
+
+import sys
+import os
+import stat
+import urllib
+import logging
+import time
+import re
+import datetime
+try:
+    import httplib2
+    httplib = 'httplib2'
+except:
+    logging.warn("Unable to import httplib2! Falling back to urllib2!")
+    import urllib2
+    httplib = 'urllib2'
+
+import xml.etree.ElementTree as ET
+
+srvTxtUtilsVersion = "1.12.0"
+
+map_months = {'en': [u"",
+               u"January",
+               u"February",
+               u"March",
+               u"April",
+               u"May",
+               u"June",
+               u"July",
+               u"August",
+               u"September",
+               u"October",
+               u"November",
+               u"December"],
+              'de': [u"",
+               u"Januar",
+               u"Februar",
+               u"M\u00e4rz",
+               u"April",
+               u"Mai",
+               u"Juni",
+               u"Juli",
+               u"August",
+               u"September",
+               u"Oktober",
+               u"November",
+               u"Dezember"]}
+
+def getInt(number, default=0):
+    """returns always an int (0 in case of problems)"""
+    try:
+        return int(number)
+    except:
+        return int(default)
+
+def getAt(array, idx, default=None):
+    """returns element idx from array or default (in case of problems)"""
+    try:
+        return array[idx]
+    except:
+        return default
+
+def unicodify(s):
+    """decode str (utf-8 or latin-1 representation) into unicode object"""
+    if not s:
+        return u""
+    if isinstance(s, str):
+        try:
+            return s.decode('utf-8')
+        except:
+            return s.decode('latin-1')
+    else:
+        return s
+
+def utf8ify(s):
+    """encode unicode object or string into byte string in utf-8 representation.
+       assumes string objects to be utf-8"""
+    if not s:
+        return ""
+    if isinstance(s, str):
+        return s
+    else:
+        return s.encode('utf-8')
+
+def getText(node, recursive=0):
+    """returns all text content of a (etree) node and its subnodes"""
+    if node is None:
+        return ''
+    
+    # ElementTree:
+    text = node.text or ''
+    for e in node:
+        if recursive:
+            text += getText(e)
+        else:
+            text += e.text or ''
+        if e.tail:
+            text += e.tail
+
+    # 4Suite:
+    #nodelist=node.childNodes
+    #text = ""
+    #for n in nodelist:
+    #    if n.nodeType == node.TEXT_NODE:
+    #       text = text + n.data
+    
+    return text
+
+
+def serialize(node):
+    """returns a string containing an XML snippet of (etree) node"""
+    s = ET.tostring(node, 'UTF-8')
+    # snip off XML declaration
+    if s.startswith('<?xml'):
+        i = s.find('?>')
+        return s[i+3:]
+
+    return s
+
+
+def getMonthName(mon, lang):
+    """returns the name of the month mon in the language lang"""
+    return map_months[lang][mon]
+
+
+def getDateString(date=None, lang='en', withYear=True, monthNames=True, abbrev=False):
+    """Return formatted date string."""
+    if date is None:
+        return None
+    
+    ds = None
+    if callable(date.day):
+        # callable members
+        day = date.day()
+        month = date.month()
+        year = date.year()
+    else:
+        # data members
+        day = date.day
+        month = date.month
+        year = date.year
+        
+    if lang.lower() == 'en':
+        ds = "%s %s"%(getMonthName(month, lang), day)
+        if withYear:
+            ds += ", %s"%year
+            
+    elif lang.lower() == 'de':
+        ds = "%s. %s"%(day, getMonthName(month, lang))
+        if withYear:
+            ds += " %s"%year
+
+    elif lang.lower() == 'iso':
+        ds = date.isoformat()
+            
+    return ds
+        
+
+def getDate(date):
+    """return date object from date or datetime date."""
+    if isinstance(date, datetime.datetime):
+        # strip time
+        return date.date()
+    
+    return date
+
+
+def getDatetime(date):
+    """return datetime object from date or datetime date."""
+    if isinstance(date, datetime.date):
+        # add time 0:00
+        return datetime.datetime.combine(date, datetime.time())
+    
+    return date
+
+
+def getHttpData(url, data=None, num_tries=3, timeout=10, username=None, password=None, cache=None, insecure=False, noExceptions=False):
+    """returns result from url+data HTTP request"""
+    # we do GET (by appending data to url)
+    if isinstance(data, str) or isinstance(data, unicode):
+        # if data is string then append
+        url = "%s?%s"%(url,data)
+    elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
+        # urlencode
+        url = "%s?%s"%(url,urllib.urlencode(data))
+
+    errmsg = None
+    if httplib == 'httplib2':
+        # use httplib2
+        for cnt in range(num_tries):
+            try:
+                logging.debug("getHttp(lib2)Data(#%s %ss) url=%s"%(cnt+1,timeout,url))
+                h = httplib2.Http(cache=cache, timeout=float(timeout), disable_ssl_certificate_validation=insecure)
+                if username:
+                    h.add_credentials(username, password)
+                    
+                resp, data = h.request(url)
+                return data
+            
+            except httplib2.HttpLib2Error, e:
+                logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(e.code,e))
+                errmsg = str(e)
+                # stop trying
+                break
+    
+    else:
+        # use urllib2 
+        response = None
+        for cnt in range(num_tries):
+            try:
+                logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
+                if sys.version_info < (2, 6):
+                    # set timeout on socket -- ugly :-(
+                    import socket
+                    socket.setdefaulttimeout(float(timeout))
+                    response = urllib2.urlopen(url)
+                else:
+                    # timeout as parameter
+                    response = urllib2.urlopen(url,timeout=float(timeout))
+                # check result?
+                data = response.read()
+                response.close()
+                return data
+
+            except urllib2.HTTPError, e:
+                logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
+                errmsg = str(e)
+                # stop trying
+                break
+            except urllib2.URLError, e:
+                logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
+                errmsg = str(e)
+                # stop trying
+                #break
+    
+    if noExceptions:
+        return None
+    
+    raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
+    #return None
+
+
+def refreshingImageFileIndexHtml(self, REQUEST, RESPONSE):
+    """index_html method for App.ImageFile that updates the file info for each request."""
+    stat_info = os.stat(self.path)
+    self.size = stat_info[stat.ST_SIZE]
+    self.lmt = float(stat_info[stat.ST_MTIME]) or time.time()
+    self.lmh = rfc1123_date(self.lmt)
+    # call original method
+    return ImageFile.index_html(self, REQUEST, RESPONSE)
+
+
+def shortenString(s, l, ellipsis='...'):
+    """returns a string of length l (or l-1) by omitting characters in the middle of s, replacing with ellipsis."""
+    if len(s) <= l:
+        return s
+    
+    l1 = int((l - len(ellipsis)) / 2)
+    return "%s%s%s"%(s[:l1],ellipsis,s[-l1:])
+
+
+def sqlName(s, lc=True, more=''):
+    """returns restricted ASCII-only version of string"""
+    if s is None:
+        return ""
+    
+    if not isinstance(s, basestring):
+        # make string object
+        s = str(s)
+        
+    # remove '
+    s = s.replace("'","")
+    # all else -> "_"
+    s = re.sub('[^A-Za-z0-9_'+more+']','_',s)
+    if lc:
+        return s.lower()
+    
+    return s
+
+
--- a/zpt/manageMPIWGThesaurus.zpt	Wed Oct 30 10:18:08 2013 +0100
+++ b/zpt/manageMPIWGThesaurus.zpt	Wed Oct 30 17:43:03 2013 +0100
@@ -10,7 +10,7 @@
 
 <h3>Overview</h3>
 <li><a href="getProjectsAndTagsAsCSV">Uebersicht ueber alle vergebenenen Tags (TAB-getrennte Liste) </a></li>
-<!-- <li><a href="../en/getAllProjectsAndTagsAsCSV">Uebersicht ueber alle Projekte mit deren  Tags (TAB-getrennte Liste) </a></li> -->
+<li><a href="getAllProjectsAndTagsAsCSV">Uebersicht ueber alle Projekte mit deren  Tags (TAB-getrennte Liste) </a></li>
 </ul>
 </body>
 </html>
\ No newline at end of file