--- MPIWGWeb/Attic/MPIWGRoot.py 2008/10/23 10:02:09 1.1.2.21 +++ MPIWGWeb/Attic/MPIWGRoot.py 2012/12/07 10:12:56 1.1.2.42 @@ -21,15 +21,19 @@ from bibliography import * import time import xml.dom.minidom import sys -from Ft.Xml.XPath import Evaluate -from Ft.Xml.XPath.Context import Context -from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print -from Ft.Xml import EMPTY_NAMESPACE +import transaction + +#from Ft.Xml.XPath import Evaluate +#from Ft.Xml.XPath.Context import Context +#from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print +#from Ft.Xml import EMPTY_NAMESPACE import copy import updatePersonalWWW import MPIWGStaff from MPIWGHelper import * - +from BeautifulSoup import BeautifulSoup, Comment +from ZODB import FileStorage, DB +from ZEO import ClientStorage def sortWeight(x,y): x1=int(getattr(x[1],'weight','0')) @@ -40,6 +44,7 @@ def sortWeight(x,y): class MPIWGRoot(ZSQLExtendFolder): """Stammordner fuer den Web-Server""" + _v_harvestCache=None meta_type='MPIWGRoot' fieldLabels={'WEB_title':'WEB_Title', @@ -67,6 +72,49 @@ class MPIWGRoot(ZSQLExtendFolder): # types of objects that show up in navigation nav_meta_types = ['MPIWGTemplate','MPIWGLink','MPIWGFolder'] + manage_options = Folder.manage_options+( + {'label':'Update personal homepages','action':'updatePersonalwww_html'}, + {'label':'Reindex catalogs','action':'reindexCatalogs'}, + {'label':'Main config','action':'changeMPIWGRootForm'}, + {'label':'add e-mails','action':'showNewDBEntries'}, + #{'label':'update the institutsbibliography','action':'updateInstitutsbiliography'}, + #{'label':'Edit Historical Persons','action':'editHistoricalPersonsForm'}, + #{'label':'Store Historical Persons','action':'storeHistoricalPersons'}, + ) + + + def getHarvestCachePort(self): + return getattr(self,"harvestPort",29999) + + def getHarvestCacheServer(self): + return getattr(self,"harvestServer","localhost") + + + def getHarvestCache(self): + logging.debug("CACHE:"+repr(self._v_harvestCache)) + if self._v_harvestCache==None: + #storage = FileStorage.FileStorage('/tmp/'+self.getId()+'test-filestorage.fs') + addr = self.getHarvestCacheServer(), self.getHarvestCachePort() + storage = ClientStorage.ClientStorage(addr) + db = DB(storage) + self._v_harvestDV=db + self._v_harvestDV=db + conn = db.open() + dbroot = conn.root() + if not dbroot.has_key('templates'): + from BTrees.OOBTree import OOBTree + dbroot['templates'] = OOBTree() + + self._v_harvestCache = dbroot['templates'] + logging.debug("CACHE2:"+repr(self._v_harvestCache)) + return self._v_harvestCache + + + + def __del__(self): + if self._v_harvestCache!=None: + self._v_harvestDV.close(); + def getGetNeighbourhood(self,obj, wordStr, length=100,tagging=True): """finde umgebung um die worte in wordStr, zurueckgegeben wird eine Array mit den Umgebungen von Fundstellen der Worte alle Tags werden entfernt, die Fundstellen werden mit XX getaggt, die Umgebungen werden @@ -80,6 +128,8 @@ class MPIWGRoot(ZSQLExtendFolder): ret=[] # nimmt das Array auf, dass spaeter zurueckgegeben wird ranges=[] #Array mit tupeln x,y wobei x die Position des Anfang und y des Endes der i-ten Umgebung angiebt + wordStr=wordStr.lstrip().rstrip() + def isInRanges(nr,length): """test ob eine gegeben Position nr schon irgendwo in einer Umgebung ist, gibt den Index des ersten Wertes aus ranges zurueck, -1, wenn kein Treffer @@ -105,11 +155,29 @@ class MPIWGRoot(ZSQLExtendFolder): words=wordStr.split(" ") #if not words is ListType: # words=[words] + + + txtCache = self.en.getHarvestCache(); + txt= txtCache.get(obj.absolute_url(),None) + + if txt==None: - txt=obj.harvest_page() + logging.debug("NO CACHE for: "+obj.absolute_url()) + txt=obj.harvest_page(mode="slim") + + if not txt: return ret - txt=re.sub("<.*?>", "", txt) # loesche alle Tags + + soup = BeautifulSoup(txt) + + comments = soup.findAll(text=lambda text:isinstance(text, Comment)) + [comment.extract() for comment in comments] + + txt = ''.join(soup.findAll(text=True)) + + + #txt=re.sub("<.*?>", "", txt) # loesche alle Tags for word in words: word=re.sub("_"," ",word) # ersetze zurueck "_" durch " " pos=0 @@ -131,6 +199,13 @@ class MPIWGRoot(ZSQLExtendFolder): y=max(ranges[nr][1],y) str=txt[x:y] + if x!=0: #add dots if in the middle of text + str="..."+str + + if y!=len(txt): #add dots if in the middle of text + str=str+"..." + + if nr >=0: # word ist in einer schon gefunden Umgebung ranges[nr]=(x,y) # neue Position der Umgebung @@ -321,6 +396,9 @@ class MPIWGRoot(ZSQLExtendFolder): return base def isNewCapital(self,text=None,reset=None): + + if text: + text=text.upper() if reset: self.REQUEST['capital']="A" return True @@ -384,11 +462,14 @@ class MPIWGRoot(ZSQLExtendFolder): else: return style - def getFeatures(self): - """returns a list of all Features""" + def getFeatures(self, num=None): + """returns a list of the last num Features""" dir = getattr(self, 'features') features = dir.objectItems(['MPIWGFeature']) features.sort(sortWeight) + if num is not None: + # take only the last num elements + features = features[-num:] # return pure list of objects return [f[1] for f in features] @@ -584,15 +665,23 @@ class MPIWGRoot(ZSQLExtendFolder): pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMPIWGRootForm')).__of__(self) return pt() - def changeMPIWGRoot(self,title,disciplineList,themesList,connection_id,lang=None,RESPONSE=None): + def changeMPIWGRoot(self,title,connection_id,coneServiceURL,harvestPort,harvestServer,lang=None,autocommit=None,RESPONSE=None): """change""" self.title=title self.connection_id=connection_id - self.disciplineList=disciplineList - self.themesList=themesList + #self.disciplineList=disciplineList + #self.themesList=themesList + self.coneServiceURL=coneServiceURL + self.harvestServer=harvestServer + try: + self.harvestPort=int(harvestPort) + except: + logging.error("couldn't change port!: no number:"+harvestPort) + if lang is not None: self.lang = lang + self.autocommit = (autocommit == "on") if RESPONSE is not None: RESPONSE.redirect('manage_main') @@ -633,7 +722,49 @@ class MPIWGRoot(ZSQLExtendFolder): return ret + + def getAllProjectsAndTagsAsCSV(self,archived=1,RESPONSE=None): + """alle projekte auch die nicht getaggten""" + retList=[] + headers=['projectId','sortingNumber','projectName','scholars','startedAt','completedAt','lastChangeThesaurusAt','lastChangeProjectAt','projectCreatedAt','persons','places','objects'] + headers.extend(list(self.thesaurus.tags.keys())) + retList.append("\t".join(headers)) + if not hasattr(self,'thesaurus'): + return "NON thesaurus (there have to be a MPIWGthesaurus object, with object ID thesaurus)" + + projectTags = self.thesaurus.getProjectsAndTags() + for project in self.getProjectFields('WEB_title_or_short'): + proj = project[0] + p_name = project[1] + retProj=[] + #if (not proj.isArchivedProject() and archived==1) or (proj.isArchivedProject() and archived==2): + retProj.append(self.utf8ify(proj.getId())) + retProj.append(self.utf8ify(proj.getContent('xdata_05'))) + retProj.append(self.utf8ify(p_name)) + retProj.append(self.utf8ify(proj.getContent('xdata_01'))) + retProj.append(self.utf8ify(proj.getStartedAt())) + retProj.append(self.utf8ify(proj.getCompletedAt())) + changeDate=self.thesaurus.lastChangeInThesaurus.get(proj.getId(),'') + n = re.sub("[:\- ]","",str(changeDate)) + retProj.append(n) + retProj.append(self.utf8ify(getattr(proj,'creationTime','20050101000000'))) + retProj.append("")#TODO: project created at + retProj.append(";".join([person[1] for person in self.thesaurus.getPersonsFromProject(proj.getId())])) + retProj.append(";".join([person[1] for person in self.thesaurus.getHistoricalPlacesFromProject(proj.getId())])) + retProj.append(";".join([person[1] for person in self.thesaurus.getObjectsFromProject(proj.getId())])) + retProj+=self.thesaurus.getTags(proj.getId(),projectTags) + retList.append("\t".join(retProj)) + + if RESPONSE: + + RESPONSE.setHeader('Content-Disposition','attachment; filename="ProjectsAndTags.tsv"') + RESPONSE.setHeader('Content-Type', "application/octet-stream") + + return "\n".join(retList); + + + def getProjectFields(self,fieldName,date=None,folder=None,sort=None): """getListofFieldNames""" ret=[] @@ -678,17 +809,6 @@ class MPIWGRoot(ZSQLExtendFolder): return projects - manage_options = Folder.manage_options+( - {'label':'Update personal homepages','action':'updatePersonalwww_html'}, - {'label':'Reindex catalogs','action':'reindexCatalogs'}, - {'label':'Main config','action':'changeMPIWGRootForm'}, - {'label':'add e-mails','action':'showNewDBEntries'}, - {'label':'update the institutsbibliography','action':'updateInstitutsbiliography'}, - #{'label':'Edit Historical Persons','action':'editHistoricalPersonsForm'}, - #{'label':'Store Historical Persons','action':'storeHistoricalPersons'}, - ) - - def updatePublicationDB(self,personId=None): """updates the publication db, i.e. copy year and type into the main table""" @@ -774,17 +894,20 @@ class MPIWGRoot(ZSQLExtendFolder): result,msg=MPIWGStaff.createNewDBEntry(self,data['publish_the_data'],data['key'],data['last_name'], - data['first_name'],data['title'],data['status'],"", + data['first_name'],data['titles_new'],data['status'],"", "",data['date_from'],data['date_to'], - data['department'],data['home_inst'],data['funded_by'], + data['department'],'',data['funded_by'], data['e_mail2'],data['current_work'],"yes",data['date_stay_at_mpiwg'],data['group'],"no",data['current_work']) return result,msg - def updatePersonEntry(self,data,ignoreEntries=[]): + def updatePersonEntry(self,data,ignoreEntries=None): """update an person entry from data. but ignore all fields in ignore Entries""" - - ignoreEntries.append('current_work') # TODO:updatecurrent work + if ignoreEntries is None: + ignoreEntries = [] + + #ignoreEntries.append('current_work') # TODO:updatecurrent work + logging.debug("updatePersonEntry: data=%s ignoreEntries=%s"%(repr(data),repr(ignoreEntries))) if data['date_to']=="": # wenn date_to leer data['date_to']="date_none" @@ -798,9 +921,9 @@ class MPIWGRoot(ZSQLExtendFolder): columns=data.keys() for x in ignoreEntries: - logging.info("ign rem: %s"%x) + logging.debug("updatePersonEntry: ignoring %s"%x) try: #falls in ignore entries felder sind, die nicht in columns sind, fange den fehler ab - columns.remove(x) + columns.remove(x) except: pass @@ -832,6 +955,8 @@ class MPIWGRoot(ZSQLExtendFolder): resultSet=self.REQUEST.SESSION['personal_www']['resultSet'] news=self.REQUEST.SESSION['personal_www']['news'] conflicts=self.REQUEST.SESSION['personal_www']['conflicts'] + logging.debug("updatePersonalwww_doIt: args=%s\n resultSet=%s\n news=%s\n conflicts=%s"%(args,resultSet,news,conflicts)) + ret="" # generate the new entry @@ -839,8 +964,8 @@ class MPIWGRoot(ZSQLExtendFolder): ret+="

Hinzugefügt

" ret+="

Neueinträge erscheinen erst auf der Homepage, wenn ihnen eine e-mail Adresse zugeordnet wurde.

" ret+="" @@ -866,13 +992,13 @@ class MPIWGRoot(ZSQLExtendFolder): if args[conflict.encode('utf-8')+'_'+cf[0]]=="stored": #use the stored one ignoreEntries.append(cf[0]) #so ignore field cf[0] displayIgnored.append(cf) + if len(displayIgnored)>0: ret+="

%s

"%conflict.encode('utf-8') - ret+="" for iE in displayIgnored: ret+=""%(iE[0].encode('utf-8'),iE[1].encode('utf-8'),iE[2].encode('utf-8')) - ret+="" + ret+="
%s%s%s
" self.updatePersonEntry(resultSet[conflict],ignoreEntries=ignoreEntries) @@ -925,13 +1051,29 @@ class MPIWGRoot(ZSQLExtendFolder): try: self.ProjectCatalog.manage_catalogReindex(self.REQUEST,RESPONSE,self.REQUEST['URL1']) + logger("MPIWG Root (reindexCatalog: projects)",logging.INFO,"DONE") except: - logger("MPIWG Root (updatehomepage)",logging.WARNING," %s %s"%sys.exc_info()[:2]) + logger("MPIWG Root (reindexCatalog: projects)",logging.WARNING," %s %s"%sys.exc_info()[:2]) + + try: + self.MembersCatalog.manage_catalogReindex(self.REQUEST,RESPONSE,self.REQUEST['URL1']) + logger("MPIWG Root (reindexCatalog: members)",logging.INFO,"DONE") + except: + logger("MPIWG Root (reindexCatalog: members)",logging.WARNING," %s %s"%sys.exc_info()[:2]) + + try: + + self.fulltextProjectsMembers.manage_catalogReindex(self.REQUEST,RESPONSE,self.REQUEST['URL1']) + logger("MPIWG Root (reindexCatalog: fulltextProjectsMembers)",logging.INFO,"DONE") + except: + logger("MPIWG Root (reindexCatalog: fulltextProjectsMembers)",logging.WARNING," %s %s"%sys.exc_info()[:2]) + + + + - - if RESPONSE: RESPONSE.redirect('manage_main') @@ -942,11 +1084,11 @@ class MPIWGRoot(ZSQLExtendFolder): #ret=[] def sorter(x,y): - return cmp(x[0],y[0]) + return cmp(x[0].lower(),y[0].lower()) results=self.MembersCatalog({'isPublished':True}) - ret=[(", ".join([proj.lastName, proj.firstName]).decode('utf-8'),proj.getKey) for proj in results] + ret=[(unicodify(", ".join([proj.lastName, proj.firstName])), proj.getKey) for proj in results] ret.sort(sorter) return ret @@ -999,6 +1141,8 @@ class MPIWGRoot(ZSQLExtendFolder): onlyArchived=0: alle Projekte onlyArchived= 1 : nur aktuelle Projekte onlyArchived = 2: nur archivierte Projekte + + department fuer das Tree geholt werden soll """ returnListTmp=[] @@ -1018,7 +1162,7 @@ class MPIWGRoot(ZSQLExtendFolder): if idNr[0]=="x": # kompatibilitaet mit alter Konvention, x vor der Nummer macht project inactive project[0].setActiveFlag(False) - if (not dep) or (idNr[0]==dep): #falls dep gesetzt ist nur dieses hinzufuegen. + if (not dep) or (splittedId[0]==dep): #falls dep gesetzt ist nur dieses hinzufuegen. if (onlyActive==0): returnListTmp.append((depth,nr,title,project[0])) @@ -1080,9 +1224,9 @@ class MPIWGRoot(ZSQLExtendFolder): hashList={} onlyArchived=int(form.get("onlyArchived",0)) onlyActive=int(form.get("onlyActive",0)) + dep=form.get("dep",None) - - fields=self.getTree(onlyArchived=onlyArchived,onlyActive=onlyActive) + fields=self.getTree(dep=dep,onlyArchived=onlyArchived,onlyActive=onlyActive) logging.info("GOT TREE!----------------------------------------------------") for field in form.keys(): @@ -1099,25 +1243,33 @@ class MPIWGRoot(ZSQLExtendFolder): else: fields[nr][3].setActiveFlag(False) - #nummer hat sich geŠndert + #nummer hat sich geaendert entryChanged = False; - + if isinstance(fields[nr][3].xdata_05,list): #for some reasons somtimes the content of the field is a list with one entry. + fields[nr][3].xdata_05=fields[nr][3].xdata_05[0] + if not (fields[nr][3].xdata_05==form[str(nr)+'_number']): logging.info("Changed!Number+++++++++++++++++++++++++++++++++") + logging.info(repr(fields[nr][3].xdata_05)+" ---> "+ repr(form[str(nr)+'_number'])) fields[nr][3].xdata_05=form[str(nr)+'_number'] entryChanged = True #completed har sich geaendert - - if not (fields[nr][3].getCompletedAt()==fields[nr][3].transformDate(form[str(nr)+'_completed'])): + + td = fields[nr][3].transformDate # hole die funktion zum transformieren des datums + + if not (td(fields[nr][3].getCompletedAt())==td(form[str(nr)+'_completed'])): fields[nr][3].setCompletedAt(form[str(nr)+'_completed']) + logging.info(repr(td(fields[nr][3].getCompletedAt()))+" ---> "+ repr(td(form[str(nr)+'_completed']))) logging.info("Changed!Completed+++++++++++++++++++++++++++++++++") entryChanged = True - if not (fields[nr][3].getStartedAt()==fields[nr][3].transformDate(form[str(nr)+'_started'])): + if not (td(fields[nr][3].getStartedAt())==td(form[str(nr)+'_started'])): fields[nr][3].setStartedAt(form[str(nr)+'_started']) + + logging.info(repr(td(fields[nr][3].getStartedAt()))+" ---> "+ repr(td(form[str(nr)+'_started']))) logging.info("Changed!Started+++++++++++++++++++++++++++++++++") entryChanged = True @@ -1193,10 +1345,10 @@ class MPIWGRoot(ZSQLExtendFolder): """give tuple member /projects""" ret=[] members=self.getAllMembers() - logging.error("X %s"%repr(members)) + logging.debug("X %s"%repr(members)) #return str(members) for x in members: - logging.error("X %s"%repr(x)) + #logging.debug("X %s"%repr(x)) projects=self.getProjectsOfMember(key=x[1],date=date) if len(projects)>0: ret.append((x[0],projects)) @@ -1220,11 +1372,12 @@ class MPIWGRoot(ZSQLExtendFolder): ret=[] if key: + logging.debug("MPIWGROOT (getProjectsOfMember):"+key) proj=self.ProjectCatalog({'getPersonKeyList':utf8ify(key)}) else: return ret # key muss definiert sein - + #logging.debug("MPIWGROOT (getProjectsOfMember):"+repr(proj)) if proj: proj2=[] for x in proj: @@ -1465,6 +1618,19 @@ class MPIWGRoot(ZSQLExtendFolder): return "done" + def sortResults(self,results): + """search the catalog and give results back sorted by meta_type""" + ret = {} + logging.debug(results()) + for result in results(): + metaType = result.meta_type + resultList= ret.get(metaType,[]) + resultList.append(result) + ret[metaType]=resultList + + logging.debug(ret) + return ret + def manage_addMPIWGRootForm(self): """form for adding the root""" @@ -1479,4 +1645,4 @@ def manage_addMPIWGRoot(self,id,title,co setattr(ob,'connection_id',connection_id) if RESPONSE is not None: RESPONSE.redirect('manage_main') - \ No newline at end of file +