Mercurial > hg > MPIWGThesaurus
changeset 7:e21db3150dae
manage persons
author | dwinter |
---|---|
date | Wed, 21 Dec 2011 22:11:16 +0100 |
parents | fcab446bca79 |
children | 3a5a7c2552c8 |
files | MPIWGThesaurus.py zpt/manageMPIWGThesaurus.zpt zpt/rearrangePersons.zpt zpt/unifyPersons.zpt |
diffstat | 4 files changed, 323 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/MPIWGThesaurus.py Wed Oct 12 16:15:57 2011 +0200 +++ b/MPIWGThesaurus.py Wed Dec 21 22:11:16 2011 +0100 @@ -62,8 +62,11 @@ TMP_PERSON_NS="http://ontologies.mpiwg-berlin.mpg.de/tempObjects/person/" ONTOLOGY_NS="http://ontologies.mpiwg-berlin.mpg.de/authorities/namedIdentities/" - #personproviderURL="http://127.0.0.1:8280/MetaDataManagerRestlet/person/" - personproviderURL="http://virtuoso.mpiwg-berlin.mpg.de:8080/MetaDataManagerRestlet/person/" + personproviderURL="http://127.0.0.1:8280/MetaDataManagerRestlet/person/" + #personproviderURL="http://virtuoso.mpiwg-berlin.mpg.de:8080/MetaDataManagerRestlet/person/" + #PERSONS_LOOKUP_URL="http://127.0.0.1:8280/MetaDataManagerRestlet/search/persons" + PERSONS_LOOKUP_URL="http://127.0.0.1:8280/MetaDataManagerRestlet/persons" + additionalNamesGraphURL="file://newpersonsFromProjects" #virtuosoServer="http://ontologies.mpiwg-berlin.mpg.de" @@ -75,11 +78,11 @@ #BTrees fuer die Tags - projectPersons= OOBTree() + projectPersons= OOBTree() #project --> personen IDs projectObjects= OOBTree() projectHistoricalPlaces= OOBTree() projectSuggestedTags= OOBTree() - persons2Projects= OOBTree() + persons2Projects= OOBTree() #personenID --> projects objects2Projects=OOBTree() historicalPlaces2Projects=OOBTree() suggestedTags2Projects=OOBTree() @@ -200,12 +203,19 @@ #In der Anzeige soll der Name der zitierten Personen in Klartext angezeigt werden, ausserdem die Varianten, wie sie tatsaechlich #in den Projekten benutzt werden - def addPersonAndFirstNameFromTripleStore(self, personID): mainName, sortName = self.getMainNameFromTripleStore(personID) # hole die hauptbezeichnung aus dem triplestore personNames = [] - for project in self.persons2Projects.get(personID): #hole die personen aus dem projekte + logging.debug("get person:"+ personID) + logging.debug("names:"+repr(mainName)+":"+ repr(sortName)) + + projects=self.persons2Projects.get(personID) + logging.debug(repr(projects)) + if projects is None: + projects=[] + + for project in projects: #hole die personen aus dem projekte logging.debug("Found:project:" + project) namesInProject = self.projectPersons.get(project) for nameInProjectTuple in namesInProject: @@ -266,9 +276,12 @@ # #Hole die Namen, die einer personID zugeordnet sind. def getNamesFromID(self,personID): + personID=personID.rstrip().lstrip() #make sure no spaces + logging.debug("<"+personID+">") retStr="" - retStr+=self.personIDtoNames.get(personID)[0] # hole die Hauptbezeichnung - additionalNames = self.personIDtoNames.get(personID)[1] + + retStr+=self.personIDtoNames.get(personID,[personID])[0] # hole die Hauptbezeichnung, falls keine angeben ist, wird die ID ausgegeben, das is jedoch ein Felher in den Daten!! + additionalNames = self.personIDtoNames.get(personID,['',[]])[1] if len(additionalNames)>0: retStr+=" ("+",".join(additionalNames)+","+")" return retStr @@ -294,6 +307,43 @@ return names,(lastName,firstName) + def callSparqlAll(self,cmdString): + """list of results""" + auth_handler = urllib2.HTTPBasicAuthHandler() + auth_handler.add_password(realm='sparql', + uri=self.virtuosoServer+"/sparql", + user=self.virtuosoDAVUser, + passwd=self.virtuosoDAVPW) + + opener = urllib2.build_opener(auth_handler) + opener.addheaders = [('Content-Type','application/sparql-query')] + + logging.debug(cmdString) + try: + logging.debug(self.virtuosoServer+"/sparql?" + urllib.urlencode({'query':cmdString,'default-graph-uri':self.virtuosoGraph,'named-graph-uri':'','format':'text/csv'})) + #r= opener.open(self.virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':self.virtuosoGraph,'named-graph-uri':'','format':'text/csv'})) + r= opener.open(self.virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':'','named-graph-uri':'','format':'text/csv'})) + namesTxt=r.read() + except urllib2.URLError, e: + logging.error(e.code) + logging.error(e.read()) + + + return + logging.debug(namesTxt) + names=namesTxt.split("\n") + if len(names) < 2: #in der ersten Zeile stehen bei der Rückgabe die Spaltennamen, <2 heiss also es gibt keinen Eintrag + return [] + + ret=[] + for name in names[1:]: + line=[] + for entry in name.split("\",\""): + + line.append(entry.replace('"','')); + ret.append(line); + return ret; + def callSparql(self,cmdString): auth_handler = urllib2.HTTPBasicAuthHandler() @@ -731,22 +781,201 @@ setattr(self,"persons2Projects",OOBTree()) projects = self.persons2Projects.get(person,None) - - if projects==None: #person hatte noch keine projekte + logging.debug("found projects:"+ repr(projects)) + if projects is None: #person hatte noch keine projekte projects=OOSet(); - self.addPersonAndFirstNameFromTripleStore(person) - self.generateSortingOrderForPersonIDs(); projects.insert(projectID ) logging.debug("update:"+person) self.persons2Projects.update({person:projects}) - + + self.addPersonAndFirstNameFromTripleStore(person) + self.generateSortingOrderForPersonIDs(); + self.addNameTOPersonIDNames(value,label) retstring = self.getPersonsFromProjectAsHTML(projectID) logging.debug(retstring) transaction.commit() return retstring + + + def rearangePersonIDsHTML(self,REQUEST=None): + """setze neue ID zu personen""" + ret=[] + for personID in self.persons2Projects.keys(): + logging.debug(personID) + + masterID,name=self.findMasterIDAndName(personID.rstrip().lstrip()) + logging.debug(" masterID -appending") + logging.debug(repr(name)) + if len(name)==0: + name=[''] + ret.append((personID,self.personIDtoNames.get(personID,[''])[0],masterID,self.personIDtoNames.get(masterID,name)[0])) + + + if REQUEST: + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','rearrangePersons.zpt')).__of__(self) + return pt(changeList=ret,lookupUrl=self.PERSONS_LOOKUP_URL); + else: + return ret; + + def rearangePersonIDs(self,REQUEST): + """unify a list of persons""" + + argv=REQUEST.form; + logging.debug(repr(argv)) + changes=argv['changes'] + if isinstance(changes,str): + changes=[changes] + + changeList=self.rearangePersonIDsHTML() + personToMaster={} + logging.debug("changelist:"+repr(changeList)) + #aendere person2project + for change in changes: + changeItem=changeList[int(change)]; + masterID=argv['newID_'+change].lstrip().rstrip() #make sure no spaces + personID=changeItem[0] + + personToMaster[personID]=masterID + masterIDProjects = self.persons2Projects.get(masterID,None); + if masterIDProjects==None: + masterIDProjects=OOSet(); + #ret.append((personID,self.personIDtoNames.get(personID,[''])[0],masterID,self.personIDtoNames.get(masterID,[''])[0])) + + oldProjects= self.persons2Projects.get(personID) + logging.debug("personID:"+repr(personID)) + logging.debug("masterID:"+repr(masterID)) + logging.debug("keys:"+repr(self.persons2Projects.keys())) + logging.debug("oldProjects:"+repr(oldProjects)) + masterIDProjects.update(oldProjects) + self.persons2Projects.update({masterID:masterIDProjects}) + self.persons2Projects.pop(personID) + + self.addPersonAndFirstNameFromTripleStore(masterID) #addpersontotiplestore + + logging.debug("Change:" +personID+":"+ masterID) + + if personID!=masterID: + self.addSameAsRelationToTripleStore(personID,masterID) + + #aendere nun projectperson + logging.debug(personToMaster) + for project in self.projectPersons.keys(): + personsNew=OOSet() + + persons=self.projectPersons.get(project) + for person in persons: + personsNew.insert([personToMaster.get(person[0],person[0]),person[1]]) + logging.debug("REPLACE in:"+project+":" +repr(person)+" by "+ repr(personToMaster.get(person[0],person[0]))) + self.projectPersons.update({project:personsNew}) + + self.generateSortingOrderForPersonIDs(); #now sort the new names + return personToMaster + #fuehre personen zusammen die ueber owl:sameAs verbunden sind + def getUnifyPersonsList(self,REQUEST=None): + """vereinheitlichung der personen auf eine ID aus der GND, wenn moeglich""" + ret=[] + for personID in self.persons2Projects.keys(): + masterID,name=self.findMasterIDAndName(personID) + logging.debug("masterID:"+masterID) + if (masterID is not None) and (masterID is not "") and (not personID==masterID): + #masterIDProjects = self.persons2Projects.get(masterID,None); + ##if masterIDProjects==None: + # masterIDProjects=OOSet(); + logging.debug(" masterID -appending") + logging.debug(repr(name)) + + + ret.append((personID,self.personIDtoNames.get(personID,[''])[0],masterID,self.personIDtoNames.get(masterID,name)[0])) + #masterIDProjects.update(self.persons2Projects.get(personID)); + #self.persons2Projects.update({masterID:masterIDProjects}); + + + if REQUEST: + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','unifyPersons.zpt')).__of__(self) + return pt(changeList=ret); + + else: + return ret; + + def unifyPersons(self,changes,REQUEST=None): + """unify a list of persons""" + + if isinstance(changes,str): + changes=[changes] + + changeList=self.getUnifyPersonsList(); + personToMaster={} + logging.debug("changelist:"+repr(changeList)) + #aendere person2project + for change in changes: + changeItem=changeList[int(change)]; + masterID=changeItem[2] + personID=changeItem[0] + + personToMaster[personID]=masterID + masterIDProjects = self.persons2Projects.get(masterID,None); + if masterIDProjects==None: + masterIDProjects=OOSet(); + #ret.append((personID,self.personIDtoNames.get(personID,[''])[0],masterID,self.personIDtoNames.get(masterID,[''])[0])) + + oldProjects= self.persons2Projects.get(personID) + logging.debug("personID:"+repr(personID)) + logging.debug("masterID:"+repr(masterID)) + logging.debug("keys:"+repr(self.persons2Projects.keys())) + logging.debug("oldProjects:"+repr(oldProjects)) + masterIDProjects.update(oldProjects) + self.persons2Projects.update({masterID:masterIDProjects}) + self.persons2Projects.pop(personID) + + self.addPersonAndFirstNameFromTripleStore(masterID) #addpersontotiplestore + + logging.debug("Change:" +personID+":"+ masterID) + + #aendere nun projectperson + logging.debug(personToMaster) + for project in self.projectPersons.keys(): + personsNew=OOSet() + + persons=self.projectPersons.get(project) + for person in persons: + personsNew.insert([personToMaster.get(person[0],person[0]),person[1]]) + logging.debug("REPLACE in:"+project+":" +repr(person)+" by "+ repr(personToMaster.get(person[0],person[0]))) + self.projectPersons.update({project:personsNew}) + + self.generateSortingOrderForPersonIDs(); #now sort the new names + return personToMaster + + + + def findMasterIDAndName(self,ressourceID): + queryString="""select * +FROM <file://mpiwg_persons_dnb.rdf> +FROM <file://mpiwg_persons_2.rdf> +FROM <file:///GND.rdf> +FROM <http://identifiedNames> +where { +?person <http://www.w3.org/2002/07/owl#sameAs> <%s>. + +?ident <http://ontologies.mpiwg-berlin.mpg.de/authorities/namedIdentities/identifies_NamedEntity> ?person. +?gnd crm:P1_is_identified_by ?ident. + +?gnd <http://RDVocab.info/ElementsGr2/dateOfBirth> ?birthDate. +?gnd <http://RDVocab.info/ElementsGr2/dateOfDeath> ?deathDate. +?person <http://xmlns.com/foaf/0.1/name> ?name. +?person <http://xmlns.com/foaf/0.1/lastName> ?lastName. +?person <http://xmlns.com/foaf/0.1/firstName> ?firstName. +} +"""%ressourceID + entries = self.callSparqlAll(queryString); + if len(entries)>0: + return entries[0][0],entries[0][5:8] #nur den ersten Treffer und nur die personID + + + return None,None + security.declareProtected('View management screens','getPersonsWithProjectIDs') def getPersonsWithProjectIDs(self,check=False): """holt die getaggted Personen mit Projekten""" @@ -757,8 +986,17 @@ personsList=[x for x in persons.keys()] def sort(x,y): - sortNrx=self.personsIDForSort.index(x) - sortNry=self.personsIDForSort.index(y) + try: + sortNrx=self.personsIDForSort.index(x) + except: + logging.warn("couldn't find personsIDForSort:"+x) + sortNrx=0 + + try: + sortNry=self.personsIDForSort.index(y) + except: + logging.warn("couldn't find personsIDForSort:"+y) + sortNry=0 #logging.debug("INSORT***") #logging.debug((sortNrx,sortNry)) return cmp(sortNrx,sortNry) @@ -776,7 +1014,8 @@ #TODO: person muss duch den namen von provider geholt werden retlist=[] projectsList=persons.get(person) - + if projectsList is None: + projectsList=[] for projectID in list(projectsList): #list notwendig da projectList in der folgenden iteration veraendert wird. if check: #teste ob im Projekt noch ein Verweis auf den Namen steht #sollte eigentlich nicht sein. @@ -1028,6 +1267,31 @@ self.createTempPersonInVirtuoso(projectID, idstring, personName, personComment) #TODO: add suername to the triplestore return self.addPersonToProject(projectID, idstring, personName); + def addSameAsRelationToTripleStore(self,personID,masterID): + cmdString ="insert in GRAPH <"+self.virtuosoGraph+"> {" + cmdString +="<%s> <http://www.w3.org/2002/07/owl#sameAs> <%s>."%(personID,masterID) + cmdString +="<%s> <http://www.w3.org/2002/07/owl#sameAs> <%s>."%(masterID,personID) + cmdString+="}" + + auth_handler = urllib2.HTTPBasicAuthHandler() + auth_handler.add_password(realm='sparql', + uri=self.virtuosoServer+"/sparql", + user=self.virtuosoDAVUser, + passwd=self.virtuosoDAVPW) + + opener = urllib2.build_opener(auth_handler) + opener.addheaders = [('Content-Type','application/sparql-query')] + + logging.debug(cmdString) + try: + r= opener.open(self.virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':self.virtuosoGraph,'named-graph-uri':None})) + logging.debug(r.read()) + except urllib2.URLError, e: + logging.error(e.code) + logging.error(e.read()) + return + + security.declareProtected('View management screens','createTempPersonInVirtuoso') def createTempPersonInVirtuoso(self,projectID,personURI, personName,personComment): """add a new person to the triple store of tempory persons"""
--- a/zpt/manageMPIWGThesaurus.zpt Wed Oct 12 16:15:57 2011 +0200 +++ b/zpt/manageMPIWGThesaurus.zpt Wed Dec 21 22:11:16 2011 +0100 @@ -5,6 +5,8 @@ <li><a href="listAllSuggestedTags">Zeige alle vorgeschlagenen Tags</a></li> <li><a href="collectPersonNamesToIds">Hole/Update die Namenseintraege aus dem Triplestore, z.B. falls im Triplestore Namem geaendert worden</a></li> <li><a href="getPersonsWithProjectIDs?check=bool:True">Hole all Personen mit ProjectIDs, raeumt zu gleich den Cache Persone-->ProjectID auf</a></li> +<li><a href="rearangePersonIDsHTML">Ordne neue IDs vorhandenen Personen zu, insbesondere ersetze temporäre IDs durch GND oder andere,</a></li> +<li><a href="getUnifyPersonsList">Ordne GND ids zu, falls bisher IDs aus anderen Kontexten vergeben wurden.</a></li> </ul> </body> </html> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/zpt/rearrangePersons.zpt Wed Dec 21 22:11:16 2011 +0100 @@ -0,0 +1,21 @@ +<html> +<body tal:define="global i python:-1"> +<h1>Unify persons</h1> +<form method="post" action="rearangePersonIDs"> +<table> +<tr><td>Nr.</td><td>saved ID</td><td>Name</td><td>New ID</td><td>Unify?</td> +</tr> +<tr tal:repeat="item python:options['changeList']" > +<td tal:define="global i python:i+1" tal:content="python:i"/> +<td><a tal:attributes="href python:item[0]" tal:content="python:item[0]"/></td> +<td tal:content="python:item[1]"/> +<td><a tal:attributes="href python:item[2]">see</a><input size="100" tal:attributes="name python:'newID_'+str(i); +value python:item[2]"/></td> +<td><input type="checkbox" name="changes" tal:attributes="value python:i"/> +<td><a tal:attributes="href python:options['lookupUrl']">look up</a></td> +</tr> +</table> +<input type="submit"/> +</form> +</body> +</html> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/zpt/unifyPersons.zpt Wed Dec 21 22:11:16 2011 +0100 @@ -0,0 +1,20 @@ +<html> +<body tal:define="global i python:-1"> +<h1>Unify persons</h1> +<form method="post" action="unifyPersons"> +<table> +<tr><td>Nr.</td><td>ID not from GND</td><td>Name</td><td>GND id</td><td>GND Name</td><td>Unify?</td> +</tr> +<tr tal:repeat="item python:options['changeList']" > +<td tal:define="global i python:i+1" tal:content="python:i"/> +<td><a tal:attributes="href python:item[0]" tal:content="python:item[0]"/></td> +<td tal:content="python:item[1]"/> +<td><a tal:attributes="href python:item[2]" tal:content="python:item[2]"/></td> +<td tal:content="python:item[3]"/> +<td><input type="checkbox" name="changes" tal:attributes="value python:i"/> +</tr> +</table> +<input type="submit"/> +</form> +</body> +</html> \ No newline at end of file