# HG changeset patch # User dwinter # Date 1324501876 -3600 # Node ID e21db3150dae86b04320ff8d593aec505e12b2e2 # Parent fcab446bca7915920380d9747983c5bd2cc02398 manage persons diff -r fcab446bca79 -r e21db3150dae MPIWGThesaurus.py --- a/MPIWGThesaurus.py Wed Oct 12 16:15:57 2011 +0200 +++ b/MPIWGThesaurus.py Wed Dec 21 22:11:16 2011 +0100 @@ -62,8 +62,11 @@ TMP_PERSON_NS="http://ontologies.mpiwg-berlin.mpg.de/tempObjects/person/" ONTOLOGY_NS="http://ontologies.mpiwg-berlin.mpg.de/authorities/namedIdentities/" - #personproviderURL="http://127.0.0.1:8280/MetaDataManagerRestlet/person/" - personproviderURL="http://virtuoso.mpiwg-berlin.mpg.de:8080/MetaDataManagerRestlet/person/" + personproviderURL="http://127.0.0.1:8280/MetaDataManagerRestlet/person/" + #personproviderURL="http://virtuoso.mpiwg-berlin.mpg.de:8080/MetaDataManagerRestlet/person/" + #PERSONS_LOOKUP_URL="http://127.0.0.1:8280/MetaDataManagerRestlet/search/persons" + PERSONS_LOOKUP_URL="http://127.0.0.1:8280/MetaDataManagerRestlet/persons" + additionalNamesGraphURL="file://newpersonsFromProjects" #virtuosoServer="http://ontologies.mpiwg-berlin.mpg.de" @@ -75,11 +78,11 @@ #BTrees fuer die Tags - projectPersons= OOBTree() + projectPersons= OOBTree() #project --> personen IDs projectObjects= OOBTree() projectHistoricalPlaces= OOBTree() projectSuggestedTags= OOBTree() - persons2Projects= OOBTree() + persons2Projects= OOBTree() #personenID --> projects objects2Projects=OOBTree() historicalPlaces2Projects=OOBTree() suggestedTags2Projects=OOBTree() @@ -200,12 +203,19 @@ #In der Anzeige soll der Name der zitierten Personen in Klartext angezeigt werden, ausserdem die Varianten, wie sie tatsaechlich #in den Projekten benutzt werden - def addPersonAndFirstNameFromTripleStore(self, personID): mainName, sortName = self.getMainNameFromTripleStore(personID) # hole die hauptbezeichnung aus dem triplestore personNames = [] - for project in self.persons2Projects.get(personID): #hole die personen aus dem projekte + logging.debug("get person:"+ personID) + logging.debug("names:"+repr(mainName)+":"+ repr(sortName)) + + projects=self.persons2Projects.get(personID) + logging.debug(repr(projects)) + if projects is None: + projects=[] + + for project in projects: #hole die personen aus dem projekte logging.debug("Found:project:" + project) namesInProject = self.projectPersons.get(project) for nameInProjectTuple in namesInProject: @@ -266,9 +276,12 @@ # #Hole die Namen, die einer personID zugeordnet sind. def getNamesFromID(self,personID): + personID=personID.rstrip().lstrip() #make sure no spaces + logging.debug("<"+personID+">") retStr="" - retStr+=self.personIDtoNames.get(personID)[0] # hole die Hauptbezeichnung - additionalNames = self.personIDtoNames.get(personID)[1] + + retStr+=self.personIDtoNames.get(personID,[personID])[0] # hole die Hauptbezeichnung, falls keine angeben ist, wird die ID ausgegeben, das is jedoch ein Felher in den Daten!! + additionalNames = self.personIDtoNames.get(personID,['',[]])[1] if len(additionalNames)>0: retStr+=" ("+",".join(additionalNames)+","+")" return retStr @@ -294,6 +307,43 @@ return names,(lastName,firstName) + def callSparqlAll(self,cmdString): + """list of results""" + auth_handler = urllib2.HTTPBasicAuthHandler() + auth_handler.add_password(realm='sparql', + uri=self.virtuosoServer+"/sparql", + user=self.virtuosoDAVUser, + passwd=self.virtuosoDAVPW) + + opener = urllib2.build_opener(auth_handler) + opener.addheaders = [('Content-Type','application/sparql-query')] + + logging.debug(cmdString) + try: + logging.debug(self.virtuosoServer+"/sparql?" + urllib.urlencode({'query':cmdString,'default-graph-uri':self.virtuosoGraph,'named-graph-uri':'','format':'text/csv'})) + #r= opener.open(self.virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':self.virtuosoGraph,'named-graph-uri':'','format':'text/csv'})) + r= opener.open(self.virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':'','named-graph-uri':'','format':'text/csv'})) + namesTxt=r.read() + except urllib2.URLError, e: + logging.error(e.code) + logging.error(e.read()) + + + return + logging.debug(namesTxt) + names=namesTxt.split("\n") + if len(names) < 2: #in der ersten Zeile stehen bei der Rückgabe die Spaltennamen, <2 heiss also es gibt keinen Eintrag + return [] + + ret=[] + for name in names[1:]: + line=[] + for entry in name.split("\",\""): + + line.append(entry.replace('"','')); + ret.append(line); + return ret; + def callSparql(self,cmdString): auth_handler = urllib2.HTTPBasicAuthHandler() @@ -731,22 +781,201 @@ setattr(self,"persons2Projects",OOBTree()) projects = self.persons2Projects.get(person,None) - - if projects==None: #person hatte noch keine projekte + logging.debug("found projects:"+ repr(projects)) + if projects is None: #person hatte noch keine projekte projects=OOSet(); - self.addPersonAndFirstNameFromTripleStore(person) - self.generateSortingOrderForPersonIDs(); projects.insert(projectID ) logging.debug("update:"+person) self.persons2Projects.update({person:projects}) - + + self.addPersonAndFirstNameFromTripleStore(person) + self.generateSortingOrderForPersonIDs(); + self.addNameTOPersonIDNames(value,label) retstring = self.getPersonsFromProjectAsHTML(projectID) logging.debug(retstring) transaction.commit() return retstring + + + def rearangePersonIDsHTML(self,REQUEST=None): + """setze neue ID zu personen""" + ret=[] + for personID in self.persons2Projects.keys(): + logging.debug(personID) + + masterID,name=self.findMasterIDAndName(personID.rstrip().lstrip()) + logging.debug(" masterID -appending") + logging.debug(repr(name)) + if len(name)==0: + name=[''] + ret.append((personID,self.personIDtoNames.get(personID,[''])[0],masterID,self.personIDtoNames.get(masterID,name)[0])) + + + if REQUEST: + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','rearrangePersons.zpt')).__of__(self) + return pt(changeList=ret,lookupUrl=self.PERSONS_LOOKUP_URL); + else: + return ret; + + def rearangePersonIDs(self,REQUEST): + """unify a list of persons""" + + argv=REQUEST.form; + logging.debug(repr(argv)) + changes=argv['changes'] + if isinstance(changes,str): + changes=[changes] + + changeList=self.rearangePersonIDsHTML() + personToMaster={} + logging.debug("changelist:"+repr(changeList)) + #aendere person2project + for change in changes: + changeItem=changeList[int(change)]; + masterID=argv['newID_'+change].lstrip().rstrip() #make sure no spaces + personID=changeItem[0] + + personToMaster[personID]=masterID + masterIDProjects = self.persons2Projects.get(masterID,None); + if masterIDProjects==None: + masterIDProjects=OOSet(); + #ret.append((personID,self.personIDtoNames.get(personID,[''])[0],masterID,self.personIDtoNames.get(masterID,[''])[0])) + + oldProjects= self.persons2Projects.get(personID) + logging.debug("personID:"+repr(personID)) + logging.debug("masterID:"+repr(masterID)) + logging.debug("keys:"+repr(self.persons2Projects.keys())) + logging.debug("oldProjects:"+repr(oldProjects)) + masterIDProjects.update(oldProjects) + self.persons2Projects.update({masterID:masterIDProjects}) + self.persons2Projects.pop(personID) + + self.addPersonAndFirstNameFromTripleStore(masterID) #addpersontotiplestore + + logging.debug("Change:" +personID+":"+ masterID) + + if personID!=masterID: + self.addSameAsRelationToTripleStore(personID,masterID) + + #aendere nun projectperson + logging.debug(personToMaster) + for project in self.projectPersons.keys(): + personsNew=OOSet() + + persons=self.projectPersons.get(project) + for person in persons: + personsNew.insert([personToMaster.get(person[0],person[0]),person[1]]) + logging.debug("REPLACE in:"+project+":" +repr(person)+" by "+ repr(personToMaster.get(person[0],person[0]))) + self.projectPersons.update({project:personsNew}) + + self.generateSortingOrderForPersonIDs(); #now sort the new names + return personToMaster + #fuehre personen zusammen die ueber owl:sameAs verbunden sind + def getUnifyPersonsList(self,REQUEST=None): + """vereinheitlichung der personen auf eine ID aus der GND, wenn moeglich""" + ret=[] + for personID in self.persons2Projects.keys(): + masterID,name=self.findMasterIDAndName(personID) + logging.debug("masterID:"+masterID) + if (masterID is not None) and (masterID is not "") and (not personID==masterID): + #masterIDProjects = self.persons2Projects.get(masterID,None); + ##if masterIDProjects==None: + # masterIDProjects=OOSet(); + logging.debug(" masterID -appending") + logging.debug(repr(name)) + + + ret.append((personID,self.personIDtoNames.get(personID,[''])[0],masterID,self.personIDtoNames.get(masterID,name)[0])) + #masterIDProjects.update(self.persons2Projects.get(personID)); + #self.persons2Projects.update({masterID:masterIDProjects}); + + + if REQUEST: + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','unifyPersons.zpt')).__of__(self) + return pt(changeList=ret); + + else: + return ret; + + def unifyPersons(self,changes,REQUEST=None): + """unify a list of persons""" + + if isinstance(changes,str): + changes=[changes] + + changeList=self.getUnifyPersonsList(); + personToMaster={} + logging.debug("changelist:"+repr(changeList)) + #aendere person2project + for change in changes: + changeItem=changeList[int(change)]; + masterID=changeItem[2] + personID=changeItem[0] + + personToMaster[personID]=masterID + masterIDProjects = self.persons2Projects.get(masterID,None); + if masterIDProjects==None: + masterIDProjects=OOSet(); + #ret.append((personID,self.personIDtoNames.get(personID,[''])[0],masterID,self.personIDtoNames.get(masterID,[''])[0])) + + oldProjects= self.persons2Projects.get(personID) + logging.debug("personID:"+repr(personID)) + logging.debug("masterID:"+repr(masterID)) + logging.debug("keys:"+repr(self.persons2Projects.keys())) + logging.debug("oldProjects:"+repr(oldProjects)) + masterIDProjects.update(oldProjects) + self.persons2Projects.update({masterID:masterIDProjects}) + self.persons2Projects.pop(personID) + + self.addPersonAndFirstNameFromTripleStore(masterID) #addpersontotiplestore + + logging.debug("Change:" +personID+":"+ masterID) + + #aendere nun projectperson + logging.debug(personToMaster) + for project in self.projectPersons.keys(): + personsNew=OOSet() + + persons=self.projectPersons.get(project) + for person in persons: + personsNew.insert([personToMaster.get(person[0],person[0]),person[1]]) + logging.debug("REPLACE in:"+project+":" +repr(person)+" by "+ repr(personToMaster.get(person[0],person[0]))) + self.projectPersons.update({project:personsNew}) + + self.generateSortingOrderForPersonIDs(); #now sort the new names + return personToMaster + + + + def findMasterIDAndName(self,ressourceID): + queryString="""select * +FROM +FROM +FROM +FROM +where { +?person <%s>. + +?ident ?person. +?gnd crm:P1_is_identified_by ?ident. + +?gnd ?birthDate. +?gnd ?deathDate. +?person ?name. +?person ?lastName. +?person ?firstName. +} +"""%ressourceID + entries = self.callSparqlAll(queryString); + if len(entries)>0: + return entries[0][0],entries[0][5:8] #nur den ersten Treffer und nur die personID + + + return None,None + security.declareProtected('View management screens','getPersonsWithProjectIDs') def getPersonsWithProjectIDs(self,check=False): """holt die getaggted Personen mit Projekten""" @@ -757,8 +986,17 @@ personsList=[x for x in persons.keys()] def sort(x,y): - sortNrx=self.personsIDForSort.index(x) - sortNry=self.personsIDForSort.index(y) + try: + sortNrx=self.personsIDForSort.index(x) + except: + logging.warn("couldn't find personsIDForSort:"+x) + sortNrx=0 + + try: + sortNry=self.personsIDForSort.index(y) + except: + logging.warn("couldn't find personsIDForSort:"+y) + sortNry=0 #logging.debug("INSORT***") #logging.debug((sortNrx,sortNry)) return cmp(sortNrx,sortNry) @@ -776,7 +1014,8 @@ #TODO: person muss duch den namen von provider geholt werden retlist=[] projectsList=persons.get(person) - + if projectsList is None: + projectsList=[] for projectID in list(projectsList): #list notwendig da projectList in der folgenden iteration veraendert wird. if check: #teste ob im Projekt noch ein Verweis auf den Namen steht #sollte eigentlich nicht sein. @@ -1028,6 +1267,31 @@ self.createTempPersonInVirtuoso(projectID, idstring, personName, personComment) #TODO: add suername to the triplestore return self.addPersonToProject(projectID, idstring, personName); + def addSameAsRelationToTripleStore(self,personID,masterID): + cmdString ="insert in GRAPH <"+self.virtuosoGraph+"> {" + cmdString +="<%s> <%s>."%(personID,masterID) + cmdString +="<%s> <%s>."%(masterID,personID) + cmdString+="}" + + auth_handler = urllib2.HTTPBasicAuthHandler() + auth_handler.add_password(realm='sparql', + uri=self.virtuosoServer+"/sparql", + user=self.virtuosoDAVUser, + passwd=self.virtuosoDAVPW) + + opener = urllib2.build_opener(auth_handler) + opener.addheaders = [('Content-Type','application/sparql-query')] + + logging.debug(cmdString) + try: + r= opener.open(self.virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':self.virtuosoGraph,'named-graph-uri':None})) + logging.debug(r.read()) + except urllib2.URLError, e: + logging.error(e.code) + logging.error(e.read()) + return + + security.declareProtected('View management screens','createTempPersonInVirtuoso') def createTempPersonInVirtuoso(self,projectID,personURI, personName,personComment): """add a new person to the triple store of tempory persons""" diff -r fcab446bca79 -r e21db3150dae zpt/manageMPIWGThesaurus.zpt --- a/zpt/manageMPIWGThesaurus.zpt Wed Oct 12 16:15:57 2011 +0200 +++ b/zpt/manageMPIWGThesaurus.zpt Wed Dec 21 22:11:16 2011 +0100 @@ -5,6 +5,8 @@
  • Zeige alle vorgeschlagenen Tags
  • Hole/Update die Namenseintraege aus dem Triplestore, z.B. falls im Triplestore Namem geaendert worden
  • Hole all Personen mit ProjectIDs, raeumt zu gleich den Cache Persone-->ProjectID auf
  • +
  • Ordne neue IDs vorhandenen Personen zu, insbesondere ersetze temporäre IDs durch GND oder andere,
  • +
  • Ordne GND ids zu, falls bisher IDs aus anderen Kontexten vergeben wurden.
  • \ No newline at end of file diff -r fcab446bca79 -r e21db3150dae zpt/rearrangePersons.zpt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/zpt/rearrangePersons.zpt Wed Dec 21 22:11:16 2011 +0100 @@ -0,0 +1,21 @@ + + +

    Unify persons

    +
    + + + + + + + + +
    Nr.saved IDNameNew IDUnify?
    + +see +look up
    + +
    + + \ No newline at end of file diff -r fcab446bca79 -r e21db3150dae zpt/unifyPersons.zpt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/zpt/unifyPersons.zpt Wed Dec 21 22:11:16 2011 +0100 @@ -0,0 +1,20 @@ + + +

    Unify persons

    +
    + + + + + + + +
    Nr.ID not from GNDNameGND idGND NameUnify?
    + + + +
    + +
    + + \ No newline at end of file