Mercurial > hg > MPIWGThesaurus
changeset 41:9a23640f7f49
added getAllProjectsAndTagsAsCSV.
author | casties |
---|---|
date | Wed, 30 Oct 2013 17:43:03 +0100 |
parents | 1eabc75b9180 |
children | b1f5d42bb93b |
files | MPIWGThesaurus.py SrvTxtUtils.py zpt/manageMPIWGThesaurus.zpt |
diffstat | 3 files changed, 328 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/MPIWGThesaurus.py Wed Oct 30 10:18:08 2013 +0100 +++ b/MPIWGThesaurus.py Wed Oct 30 17:43:03 2013 +0100 @@ -17,6 +17,7 @@ from AccessControl import ClassSecurityInfo #import os +import re import os.path import logging import time @@ -27,7 +28,7 @@ import urllib2 import urllib - +from SrvTxtUtils import utf8ify class projectHolder: projectHolder="" @@ -318,17 +319,52 @@ retarray.append(project.projectName) retarray.append(project.persons) - - for tagType in self.tags.keys(): tags = project.tags.get(tagType,['']) retarray.append(";".join(tags)) - ret+="\t".join(retarray)+"\n" return ret + + def getAllProjectsAndTagsAsCSV(self,archived=1,RESPONSE=None): + """alle projekte auch die nicht getaggten""" + retList=[] + headers=['projectId','sortingNumber','projectName','scholars','startedAt','completedAt','lastChangeThesaurusAt','lastChangeProjectAt','projectCreatedAt','persons','objects'] + headers.extend(list(self.tags.keys())) + retList.append("\t".join(headers)) + + projectTags = self.getProjectsAndTags() + projects = self.getMPIWGRoot().getProjectFolder().getProjectsAsList(archived=archived) + for proj in projects: + p_name = proj.getLabel() + retProj=[] + #if (not proj.isArchivedProject() and archived==1) or (proj.isArchivedProject() and archived==2): + retProj.append(utf8ify(proj.getId())) + retProj.append(utf8ify(proj.getContent('xdata_05'))) + retProj.append(utf8ify(p_name)) + retProj.append(utf8ify(proj.getContent('xdata_01'))) + retProj.append(utf8ify(proj.getStartedAt())) + retProj.append(utf8ify(proj.getCompletedAt())) + changeDate=self.lastChangeInThesaurus.get(proj.getId(),'') + n = re.sub("[:\- ]","",str(changeDate)) + retProj.append(n) + retProj.append(utf8ify(getattr(proj,'creationTime','20050101000000'))) + retProj.append("")#TODO: project created at + retProj.append(";".join([person[1] for person in self.getPersonsFromProject(proj.getId())])) + #retProj.append(";".join([person[1] for person in self.thesaurus.getHistoricalPlacesFromProject(proj.getId())])) + retProj.append(";".join([person[1] for person in self.getObjectsFromProject(proj.getId())])) + retProj+=self.getTags(proj.getId(),projectTags) + retList.append("\t".join(retProj)) + + if RESPONSE: + RESPONSE.setHeader('Content-Disposition','attachment; filename="ProjectsAndTags.tsv"') + RESPONSE.setHeader('Content-Type', "application/octet-stream") + + return "\n".join(retList); + + def getLogpath(self): return getattr(self,"logpath",self.logpathDefault) @@ -803,7 +839,7 @@ retarray=[] for tagType in self.tags.keys(): tags = project.tags.get(tagType,['']) - retarray.append(self.utf8ify(";".join(tags))) + retarray.append(utf8ify(";".join(tags))) return retarray @@ -1529,8 +1565,6 @@ if tags is None: logging.debug("can't find Tags") return [] - - tagsStored=tags.get(tagType,None) if tagsStored is None: @@ -1552,7 +1586,8 @@ return ret def getProjectDetails(self,projectId): - project=getattr(self.projects,projectId,None) + pf = self.getMPIWGRoot().getProjectFolder() + project=pf.get(projectId,None) if project is None: logging.debug("no project!! " +projectId) return None @@ -1652,7 +1687,6 @@ return opener2 = urllib2.build_opener() - personIDencoded=urllib.quote(personURI,safe="") graphEncoded=urllib.quote(self.additionalNamesGraphURL,safe="") @@ -1667,8 +1701,6 @@ logging.error(e.code) logging.error(e.read()) return - - return @@ -1676,12 +1708,6 @@ def storeChange(self,typeString,projectID,value): from time import gmtime,strftime - - - - - - fl = open(self.getLogpath(),"a") person = self.REQUEST.AUTHENTICATED_USER.getId() date = strftime("%Y-%m-%d %H:%M:%S", gmtime())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SrvTxtUtils.py Wed Oct 30 17:43:03 2013 +0100 @@ -0,0 +1,284 @@ +"""Utility methods for handling XML, reading HTTP, etc""" + +from App.ImageFile import ImageFile +from App.Common import rfc1123_date + +import sys +import os +import stat +import urllib +import logging +import time +import re +import datetime +try: + import httplib2 + httplib = 'httplib2' +except: + logging.warn("Unable to import httplib2! Falling back to urllib2!") + import urllib2 + httplib = 'urllib2' + +import xml.etree.ElementTree as ET + +srvTxtUtilsVersion = "1.12.0" + +map_months = {'en': [u"", + u"January", + u"February", + u"March", + u"April", + u"May", + u"June", + u"July", + u"August", + u"September", + u"October", + u"November", + u"December"], + 'de': [u"", + u"Januar", + u"Februar", + u"M\u00e4rz", + u"April", + u"Mai", + u"Juni", + u"Juli", + u"August", + u"September", + u"Oktober", + u"November", + u"Dezember"]} + +def getInt(number, default=0): + """returns always an int (0 in case of problems)""" + try: + return int(number) + except: + return int(default) + +def getAt(array, idx, default=None): + """returns element idx from array or default (in case of problems)""" + try: + return array[idx] + except: + return default + +def unicodify(s): + """decode str (utf-8 or latin-1 representation) into unicode object""" + if not s: + return u"" + if isinstance(s, str): + try: + return s.decode('utf-8') + except: + return s.decode('latin-1') + else: + return s + +def utf8ify(s): + """encode unicode object or string into byte string in utf-8 representation. + assumes string objects to be utf-8""" + if not s: + return "" + if isinstance(s, str): + return s + else: + return s.encode('utf-8') + +def getText(node, recursive=0): + """returns all text content of a (etree) node and its subnodes""" + if node is None: + return '' + + # ElementTree: + text = node.text or '' + for e in node: + if recursive: + text += getText(e) + else: + text += e.text or '' + if e.tail: + text += e.tail + + # 4Suite: + #nodelist=node.childNodes + #text = "" + #for n in nodelist: + # if n.nodeType == node.TEXT_NODE: + # text = text + n.data + + return text + + +def serialize(node): + """returns a string containing an XML snippet of (etree) node""" + s = ET.tostring(node, 'UTF-8') + # snip off XML declaration + if s.startswith('<?xml'): + i = s.find('?>') + return s[i+3:] + + return s + + +def getMonthName(mon, lang): + """returns the name of the month mon in the language lang""" + return map_months[lang][mon] + + +def getDateString(date=None, lang='en', withYear=True, monthNames=True, abbrev=False): + """Return formatted date string.""" + if date is None: + return None + + ds = None + if callable(date.day): + # callable members + day = date.day() + month = date.month() + year = date.year() + else: + # data members + day = date.day + month = date.month + year = date.year + + if lang.lower() == 'en': + ds = "%s %s"%(getMonthName(month, lang), day) + if withYear: + ds += ", %s"%year + + elif lang.lower() == 'de': + ds = "%s. %s"%(day, getMonthName(month, lang)) + if withYear: + ds += " %s"%year + + elif lang.lower() == 'iso': + ds = date.isoformat() + + return ds + + +def getDate(date): + """return date object from date or datetime date.""" + if isinstance(date, datetime.datetime): + # strip time + return date.date() + + return date + + +def getDatetime(date): + """return datetime object from date or datetime date.""" + if isinstance(date, datetime.date): + # add time 0:00 + return datetime.datetime.combine(date, datetime.time()) + + return date + + +def getHttpData(url, data=None, num_tries=3, timeout=10, username=None, password=None, cache=None, insecure=False, noExceptions=False): + """returns result from url+data HTTP request""" + # we do GET (by appending data to url) + if isinstance(data, str) or isinstance(data, unicode): + # if data is string then append + url = "%s?%s"%(url,data) + elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple): + # urlencode + url = "%s?%s"%(url,urllib.urlencode(data)) + + errmsg = None + if httplib == 'httplib2': + # use httplib2 + for cnt in range(num_tries): + try: + logging.debug("getHttp(lib2)Data(#%s %ss) url=%s"%(cnt+1,timeout,url)) + h = httplib2.Http(cache=cache, timeout=float(timeout), disable_ssl_certificate_validation=insecure) + if username: + h.add_credentials(username, password) + + resp, data = h.request(url) + return data + + except httplib2.HttpLib2Error, e: + logging.error("getHttp(lib2)Data: HTTP error(%s): %s"%(e.code,e)) + errmsg = str(e) + # stop trying + break + + else: + # use urllib2 + response = None + for cnt in range(num_tries): + try: + logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) + if sys.version_info < (2, 6): + # set timeout on socket -- ugly :-( + import socket + socket.setdefaulttimeout(float(timeout)) + response = urllib2.urlopen(url) + else: + # timeout as parameter + response = urllib2.urlopen(url,timeout=float(timeout)) + # check result? + data = response.read() + response.close() + return data + + except urllib2.HTTPError, e: + logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) + errmsg = str(e) + # stop trying + break + except urllib2.URLError, e: + logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) + errmsg = str(e) + # stop trying + #break + + if noExceptions: + return None + + raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) + #return None + + +def refreshingImageFileIndexHtml(self, REQUEST, RESPONSE): + """index_html method for App.ImageFile that updates the file info for each request.""" + stat_info = os.stat(self.path) + self.size = stat_info[stat.ST_SIZE] + self.lmt = float(stat_info[stat.ST_MTIME]) or time.time() + self.lmh = rfc1123_date(self.lmt) + # call original method + return ImageFile.index_html(self, REQUEST, RESPONSE) + + +def shortenString(s, l, ellipsis='...'): + """returns a string of length l (or l-1) by omitting characters in the middle of s, replacing with ellipsis.""" + if len(s) <= l: + return s + + l1 = int((l - len(ellipsis)) / 2) + return "%s%s%s"%(s[:l1],ellipsis,s[-l1:]) + + +def sqlName(s, lc=True, more=''): + """returns restricted ASCII-only version of string""" + if s is None: + return "" + + if not isinstance(s, basestring): + # make string object + s = str(s) + + # remove ' + s = s.replace("'","") + # all else -> "_" + s = re.sub('[^A-Za-z0-9_'+more+']','_',s) + if lc: + return s.lower() + + return s + +
--- a/zpt/manageMPIWGThesaurus.zpt Wed Oct 30 10:18:08 2013 +0100 +++ b/zpt/manageMPIWGThesaurus.zpt Wed Oct 30 17:43:03 2013 +0100 @@ -10,7 +10,7 @@ <h3>Overview</h3> <li><a href="getProjectsAndTagsAsCSV">Uebersicht ueber alle vergebenenen Tags (TAB-getrennte Liste) </a></li> -<!-- <li><a href="../en/getAllProjectsAndTagsAsCSV">Uebersicht ueber alle Projekte mit deren Tags (TAB-getrennte Liste) </a></li> --> +<li><a href="getAllProjectsAndTagsAsCSV">Uebersicht ueber alle Projekte mit deren Tags (TAB-getrennte Liste) </a></li> </ul> </body> </html> \ No newline at end of file