Mercurial > hg > MPIWGWeb
diff MPIWGRoot_deleted_methods.py @ 33:01b5265264b6
more work on projects.
author | casties |
---|---|
date | Wed, 24 Apr 2013 20:48:45 +0200 |
parents | 1f2760ed3efe |
children | e30a4bd074db |
line wrap: on
line diff
--- a/MPIWGRoot_deleted_methods.py Tue Apr 23 20:54:02 2013 +0200 +++ b/MPIWGRoot_deleted_methods.py Wed Apr 24 20:48:45 2013 +0200 @@ -1,4 +1,4 @@ - def removeStopWords(self,xo): + def removeStopWords(self,xo): """remove stop words from xo""" if not hasattr(self,'_v_stopWords'): self._v_stopWords=self.stopwords_en.data.split("\n") @@ -181,4 +181,124 @@ fh=file("/tmp/harvest_MPIWG/"+name,"w") fh.write(txt) fh.close() - \ No newline at end of file + + + + def generateNameIndex(self): + """erzeuge einen index verwendeter personen""" + import psycopg + o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) + results={} + print self.fulltext.historicalNames.items() + for nameItem in self.fulltext.historicalNames.items(): #gehe durch alle namen des lexikons + + c = o.cursor() + name=nameItem[0] + print "check",name + c.execute("select lastname,firstname from persons where lower(lastname) = '%s'"%quote(name)) + tmpres=c.fetchall() + firstnames=[result[1] for result in tmpres] # find all firstnames + if tmpres: + lastname=tmpres[0][0] + + for found in self.fulltext({'names':name}): + if found.getObject().isActual(): + for nh in found.getObject().getGetNeighbourhood(name, length=50,tagging=False): #hole umgebung + #schaue nun ob der vorname hinter oder vor dem name ist + position=nh.find(lastname) + # vorher + #print "NH",nh + bevorS=nh[0:position].split() + #print "BV",bevorS + if len(bevorS)>1: + try: + bevor=[bevorS[-1],bevorS[-2]] + except: + bevor=[bevorS[0]] + else: + bevor=[] + #nachher + behindS= re.split("[,|;| ]",nh[position:]) + #print "BH",behindS + if len(behindS)>2: + try: + behind=behindS[1:3] + except: + behind=[bevorS[1]] + else: + behind=[] + for firstname in firstnames: + if firstname in bevor+behind: #Namen wie mit Adelspraedikaten werden so erstmal nich gefunden + id="%s,%s"%(lastname,firstname) + if not results.has_key(id): + results[id]=[] + objId=found.getObject().getId() + if not (objId in results[id]): + print "d %s for %s"%(id,objId) + results[id].append(objId) + self.nameIndex=results + return results + + def editNameIndexHTML(self): + """edit the name index""" + if not hasattr(self,'nameIndexEdited'): # falls editierter index noch nicht existiert, kopiere automatisch erstellten + self.nameIndexEdited=copy.copy(self.nameIndex) + print "huh" + #self.nameIndexEdited=copy.copy(self.nameIndex) + #print self.nameIndexEdited + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','editHistoricalNames.zpt')).__of__(self) + return pt() + + def getNamesInProject(self,projectId): + """get all names ofnameIndexEdited which are references in projec with projectId""" + + ret=[] + for name in self.nameIndexEdited.keys(): + if projectId in self.nameIndexEdited[name]: + ret.append(name) + + return ret + + def editNameIndex(self,RESPONSE=None,name=None,occurrances=None,submit=None): + """edit the index""" + nI=self.nameIndexEdited # mI introduced to make sure that changes to nameIndexEdited are know to ZODB + if submit=="delete": + + + dh=getattr(self,'deletedHistoricalNames',{}) + + if type(dh) is ListType: + dh={} + if not dh.has_key(name): + dh[name]=occurrances.split("\n") + else: + dh[name]+=occurrances.split("\n") + + self.deletedHistoricalNames=dh + + del self.nameIndexEdited[name] + + + elif (submit=="change"): + + nI[name]=occurrances.split("\n")[0:] + + elif (submit=="add"): + if not nI.has_key(name): + nI[name]=occurrances.split("\n") + else: + nI[name]+=occurrances.split("\n") + + self.nameIndexEdited=nI + + + if RESPONSE is not None: + RESPONSE.redirect('editNameIndexHTML') + + + + def restoreIndex(self): + """restore""" + self.nameIndexEdited=self.nameIndex + return "done" +