changeset 7:e21db3150dae

manage persons
author dwinter
date Wed, 21 Dec 2011 22:11:16 +0100
parents fcab446bca79
children 3a5a7c2552c8
files MPIWGThesaurus.py zpt/manageMPIWGThesaurus.zpt zpt/rearrangePersons.zpt zpt/unifyPersons.zpt
diffstat 4 files changed, 323 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/MPIWGThesaurus.py	Wed Oct 12 16:15:57 2011 +0200
+++ b/MPIWGThesaurus.py	Wed Dec 21 22:11:16 2011 +0100
@@ -62,8 +62,11 @@
    
     TMP_PERSON_NS="http://ontologies.mpiwg-berlin.mpg.de/tempObjects/person/"
     ONTOLOGY_NS="http://ontologies.mpiwg-berlin.mpg.de/authorities/namedIdentities/"
-    #personproviderURL="http://127.0.0.1:8280/MetaDataManagerRestlet/person/"
-    personproviderURL="http://virtuoso.mpiwg-berlin.mpg.de:8080/MetaDataManagerRestlet/person/"
+    personproviderURL="http://127.0.0.1:8280/MetaDataManagerRestlet/person/"
+    #personproviderURL="http://virtuoso.mpiwg-berlin.mpg.de:8080/MetaDataManagerRestlet/person/"
+    #PERSONS_LOOKUP_URL="http://127.0.0.1:8280/MetaDataManagerRestlet/search/persons"
+    PERSONS_LOOKUP_URL="http://127.0.0.1:8280/MetaDataManagerRestlet/persons"
+    
     additionalNamesGraphURL="file://newpersonsFromProjects"
     
     #virtuosoServer="http://ontologies.mpiwg-berlin.mpg.de"
@@ -75,11 +78,11 @@
     
     #BTrees fuer die Tags
     
-    projectPersons= OOBTree()
+    projectPersons= OOBTree() #project --> personen IDs
     projectObjects= OOBTree()
     projectHistoricalPlaces= OOBTree()
     projectSuggestedTags= OOBTree()
-    persons2Projects= OOBTree()
+    persons2Projects= OOBTree() #personenID --> projects
     objects2Projects=OOBTree()
     historicalPlaces2Projects=OOBTree()
     suggestedTags2Projects=OOBTree()
@@ -200,12 +203,19 @@
     #In der Anzeige soll der Name der zitierten Personen in Klartext angezeigt werden, ausserdem die Varianten, wie sie tatsaechlich
     #in den Projekten benutzt werden
     
-    
 
     def addPersonAndFirstNameFromTripleStore(self, personID):
         mainName, sortName = self.getMainNameFromTripleStore(personID) # hole die hauptbezeichnung aus dem triplestore
         personNames = []
-        for project in self.persons2Projects.get(personID): #hole die personen aus dem projekte
+        logging.debug("get person:"+ personID)
+        logging.debug("names:"+repr(mainName)+":"+ repr(sortName))
+        
+        projects=self.persons2Projects.get(personID)
+        logging.debug(repr(projects))
+        if projects is None:
+            projects=[]
+            
+        for project in projects: #hole die personen aus dem projekte
             logging.debug("Found:project:" + project)
             namesInProject = self.projectPersons.get(project)
             for nameInProjectTuple in namesInProject:
@@ -266,9 +276,12 @@
 #    
     #Hole die Namen, die einer personID zugeordnet sind.
     def getNamesFromID(self,personID):
+        personID=personID.rstrip().lstrip() #make sure no spaces
+        logging.debug("<"+personID+">")
         retStr=""
-        retStr+=self.personIDtoNames.get(personID)[0] # hole die Hauptbezeichnung
-        additionalNames = self.personIDtoNames.get(personID)[1]
+        
+        retStr+=self.personIDtoNames.get(personID,[personID])[0] # hole die Hauptbezeichnung, falls keine angeben ist, wird die ID ausgegeben, das is jedoch ein Felher in den Daten!!
+        additionalNames = self.personIDtoNames.get(personID,['',[]])[1]
         if len(additionalNames)>0:
             retStr+=" ("+",".join(additionalNames)+","+")"
         return retStr
@@ -294,6 +307,43 @@
         return names,(lastName,firstName)
     
     
+    def callSparqlAll(self,cmdString):
+        """list of results"""
+        auth_handler = urllib2.HTTPBasicAuthHandler()
+        auth_handler.add_password(realm='sparql',
+                          uri=self.virtuosoServer+"/sparql",
+                          user=self.virtuosoDAVUser,
+                          passwd=self.virtuosoDAVPW)
+                          
+        opener = urllib2.build_opener(auth_handler)
+        opener.addheaders = [('Content-Type','application/sparql-query')]
+        
+        logging.debug(cmdString)
+        try:
+            logging.debug(self.virtuosoServer+"/sparql?" + urllib.urlencode({'query':cmdString,'default-graph-uri':self.virtuosoGraph,'named-graph-uri':'','format':'text/csv'}))
+            #r= opener.open(self.virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':self.virtuosoGraph,'named-graph-uri':'','format':'text/csv'}))
+            r= opener.open(self.virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':'','named-graph-uri':'','format':'text/csv'}))
+            namesTxt=r.read()
+        except urllib2.URLError, e:
+            logging.error(e.code)
+            logging.error(e.read())
+            
+            
+            return
+        logging.debug(namesTxt)
+        names=namesTxt.split("\n")
+        if len(names) < 2: #in der ersten Zeile stehen bei der Rückgabe die Spaltennamen, <2 heiss also es gibt keinen Eintrag
+            return []
+        
+        ret=[]
+        for name in names[1:]:
+            line=[]
+            for entry in name.split("\",\""):
+
+                line.append(entry.replace('"',''));
+            ret.append(line);
+        return ret;
+         
     def callSparql(self,cmdString):
 
         auth_handler = urllib2.HTTPBasicAuthHandler()
@@ -731,22 +781,201 @@
                 setattr(self,"persons2Projects",OOBTree())
          
             projects = self.persons2Projects.get(person,None)
-                   
-            if projects==None: #person hatte noch keine projekte
+            logging.debug("found projects:"+ repr(projects))
+            if projects is None: #person hatte noch keine projekte
                 projects=OOSet();
-                self.addPersonAndFirstNameFromTripleStore(person)
-                self.generateSortingOrderForPersonIDs();
                 
             projects.insert(projectID )
             logging.debug("update:"+person)
             self.persons2Projects.update({person:projects})
-        
+            
+            self.addPersonAndFirstNameFromTripleStore(person)
+            self.generateSortingOrderForPersonIDs();
+               
         self.addNameTOPersonIDNames(value,label)
         retstring = self.getPersonsFromProjectAsHTML(projectID)
         logging.debug(retstring)
         transaction.commit()
         return retstring
     
+    
+    
+    def rearangePersonIDsHTML(self,REQUEST=None):
+        """setze neue ID zu personen"""
+        ret=[]
+        for personID in self.persons2Projects.keys():
+            logging.debug(personID)
+            
+            masterID,name=self.findMasterIDAndName(personID.rstrip().lstrip())
+            logging.debug("        masterID -appending")
+            logging.debug(repr(name))
+            if len(name)==0:
+                name=['']
+            ret.append((personID,self.personIDtoNames.get(personID,[''])[0],masterID,self.personIDtoNames.get(masterID,name)[0]))   
+                                      
+    
+        if REQUEST:
+            pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','rearrangePersons.zpt')).__of__(self)
+            return pt(changeList=ret,lookupUrl=self.PERSONS_LOOKUP_URL);
+        else:
+            return ret; 
+        
+    def rearangePersonIDs(self,REQUEST):
+        """unify a list of persons"""
+        
+        argv=REQUEST.form;
+        logging.debug(repr(argv))
+        changes=argv['changes']
+        if isinstance(changes,str):
+            changes=[changes]
+        
+        changeList=self.rearangePersonIDsHTML()
+        personToMaster={}
+        logging.debug("changelist:"+repr(changeList))
+        #aendere person2project
+        for change in changes:
+            changeItem=changeList[int(change)];
+            masterID=argv['newID_'+change].lstrip().rstrip() #make sure no spaces
+            personID=changeItem[0]
+            
+            personToMaster[personID]=masterID
+            masterIDProjects = self.persons2Projects.get(masterID,None);
+            if masterIDProjects==None:
+                masterIDProjects=OOSet();
+                #ret.append((personID,self.personIDtoNames.get(personID,[''])[0],masterID,self.personIDtoNames.get(masterID,[''])[0]))   
+                
+            oldProjects= self.persons2Projects.get(personID)
+            logging.debug("personID:"+repr(personID))
+            logging.debug("masterID:"+repr(masterID))
+            logging.debug("keys:"+repr(self.persons2Projects.keys()))
+            logging.debug("oldProjects:"+repr(oldProjects))
+            masterIDProjects.update(oldProjects)
+            self.persons2Projects.update({masterID:masterIDProjects})
+            self.persons2Projects.pop(personID)
+            
+            self.addPersonAndFirstNameFromTripleStore(masterID) #addpersontotiplestore
+            
+            logging.debug("Change:" +personID+":"+ masterID)
+            
+            if personID!=masterID:
+                self.addSameAsRelationToTripleStore(personID,masterID)
+            
+        #aendere nun projectperson
+        logging.debug(personToMaster)
+        for project in self.projectPersons.keys():
+            personsNew=OOSet()
+            
+            persons=self.projectPersons.get(project)
+            for person in persons:
+                personsNew.insert([personToMaster.get(person[0],person[0]),person[1]])
+                logging.debug("REPLACE in:"+project+":" +repr(person)+" by "+ repr(personToMaster.get(person[0],person[0])))
+            self.projectPersons.update({project:personsNew})
+            
+        self.generateSortingOrderForPersonIDs(); #now sort the new names
+        return personToMaster
+    #fuehre personen zusammen die ueber owl:sameAs verbunden sind
+    def getUnifyPersonsList(self,REQUEST=None):
+        """vereinheitlichung der personen auf eine ID aus der GND, wenn moeglich"""
+        ret=[]
+        for personID in self.persons2Projects.keys():
+            masterID,name=self.findMasterIDAndName(personID)
+            logging.debug("masterID:"+masterID)
+            if (masterID is not None) and (masterID is not "") and (not personID==masterID):
+                #masterIDProjects = self.persons2Projects.get(masterID,None);
+                ##if masterIDProjects==None:
+                #    masterIDProjects=OOSet();
+                logging.debug("        masterID -appending")
+                logging.debug(repr(name))
+
+            
+                ret.append((personID,self.personIDtoNames.get(personID,[''])[0],masterID,self.personIDtoNames.get(masterID,name)[0]))   
+                #masterIDProjects.update(self.persons2Projects.get(personID));
+                #self.persons2Projects.update({masterID:masterIDProjects});
+                                        
+    
+        if REQUEST:
+            pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','unifyPersons.zpt')).__of__(self)
+            return pt(changeList=ret);
+        
+        else:
+            return ret; 
+    
+    def unifyPersons(self,changes,REQUEST=None):
+        """unify a list of persons"""
+        
+        if isinstance(changes,str):
+            changes=[changes]
+        
+        changeList=self.getUnifyPersonsList();
+        personToMaster={}
+        logging.debug("changelist:"+repr(changeList))
+        #aendere person2project
+        for change in changes:
+            changeItem=changeList[int(change)];
+            masterID=changeItem[2]
+            personID=changeItem[0]
+            
+            personToMaster[personID]=masterID
+            masterIDProjects = self.persons2Projects.get(masterID,None);
+            if masterIDProjects==None:
+                masterIDProjects=OOSet();
+                #ret.append((personID,self.personIDtoNames.get(personID,[''])[0],masterID,self.personIDtoNames.get(masterID,[''])[0]))   
+                
+            oldProjects= self.persons2Projects.get(personID)
+            logging.debug("personID:"+repr(personID))
+            logging.debug("masterID:"+repr(masterID))
+            logging.debug("keys:"+repr(self.persons2Projects.keys()))
+            logging.debug("oldProjects:"+repr(oldProjects))
+            masterIDProjects.update(oldProjects)
+            self.persons2Projects.update({masterID:masterIDProjects})
+            self.persons2Projects.pop(personID)
+            
+            self.addPersonAndFirstNameFromTripleStore(masterID) #addpersontotiplestore
+            
+            logging.debug("Change:" +personID+":"+ masterID)
+            
+        #aendere nun projectperson
+        logging.debug(personToMaster)
+        for project in self.projectPersons.keys():
+            personsNew=OOSet()
+            
+            persons=self.projectPersons.get(project)
+            for person in persons:
+                personsNew.insert([personToMaster.get(person[0],person[0]),person[1]])
+                logging.debug("REPLACE in:"+project+":" +repr(person)+" by "+ repr(personToMaster.get(person[0],person[0])))
+            self.projectPersons.update({project:personsNew})
+            
+        self.generateSortingOrderForPersonIDs(); #now sort the new names
+        return personToMaster
+        
+        
+    
+    def findMasterIDAndName(self,ressourceID):
+        queryString="""select * 
+FROM <file://mpiwg_persons_dnb.rdf>  
+FROM <file://mpiwg_persons_2.rdf> 
+FROM <file:///GND.rdf> 
+FROM <http://identifiedNames>
+where {
+?person <http://www.w3.org/2002/07/owl#sameAs> <%s>.
+
+?ident <http://ontologies.mpiwg-berlin.mpg.de/authorities/namedIdentities/identifies_NamedEntity> ?person. 
+?gnd crm:P1_is_identified_by ?ident.
+
+?gnd <http://RDVocab.info/ElementsGr2/dateOfBirth> ?birthDate.
+?gnd <http://RDVocab.info/ElementsGr2/dateOfDeath> ?deathDate.
+?person <http://xmlns.com/foaf/0.1/name> ?name.
+?person <http://xmlns.com/foaf/0.1/lastName> ?lastName.
+?person <http://xmlns.com/foaf/0.1/firstName> ?firstName.
+} 
+"""%ressourceID
+        entries = self.callSparqlAll(queryString);
+        if len(entries)>0:
+            return entries[0][0],entries[0][5:8] #nur den ersten Treffer und nur die personID
+        
+    
+        return None,None
+        
     security.declareProtected('View management screens','getPersonsWithProjectIDs') 
     def getPersonsWithProjectIDs(self,check=False):
         """holt die getaggted Personen mit Projekten"""
@@ -757,8 +986,17 @@
         personsList=[x for x in persons.keys()]
         
         def sort(x,y):
-            sortNrx=self.personsIDForSort.index(x)
-            sortNry=self.personsIDForSort.index(y)
+            try:
+                sortNrx=self.personsIDForSort.index(x)
+            except:
+                logging.warn("couldn't find personsIDForSort:"+x)
+                sortNrx=0
+            
+            try:
+                sortNry=self.personsIDForSort.index(y)
+            except:
+                logging.warn("couldn't find personsIDForSort:"+y)
+                sortNry=0
             #logging.debug("INSORT***")
             #logging.debug((sortNrx,sortNry))
             return cmp(sortNrx,sortNry)
@@ -776,7 +1014,8 @@
             #TODO: person muss duch den namen von provider geholt werden
             retlist=[]
             projectsList=persons.get(person)
-            
+            if projectsList is None:
+                projectsList=[]
             for projectID in list(projectsList): #list notwendig da projectList in der folgenden iteration veraendert wird.
                 if check: #teste ob im Projekt noch ein Verweis auf den Namen steht
                     #sollte eigentlich nicht sein.
@@ -1028,6 +1267,31 @@
         self.createTempPersonInVirtuoso(projectID, idstring, personName, personComment) #TODO: add suername to the triplestore
         return self.addPersonToProject(projectID, idstring, personName);
        
+    def addSameAsRelationToTripleStore(self,personID,masterID):
+        cmdString ="insert in GRAPH <"+self.virtuosoGraph+">  {"
+        cmdString +="<%s> <http://www.w3.org/2002/07/owl#sameAs> <%s>."%(personID,masterID)
+        cmdString +="<%s> <http://www.w3.org/2002/07/owl#sameAs> <%s>."%(masterID,personID)
+        cmdString+="}"
+
+        auth_handler = urllib2.HTTPBasicAuthHandler()
+        auth_handler.add_password(realm='sparql',
+                          uri=self.virtuosoServer+"/sparql",
+                          user=self.virtuosoDAVUser,
+                          passwd=self.virtuosoDAVPW)
+                          
+        opener = urllib2.build_opener(auth_handler)
+        opener.addheaders = [('Content-Type','application/sparql-query')]
+        
+        logging.debug(cmdString)
+        try:
+            r= opener.open(self.virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':self.virtuosoGraph,'named-graph-uri':None}))
+            logging.debug(r.read())
+        except urllib2.URLError, e:
+            logging.error(e.code)
+            logging.error(e.read())
+            return
+        
+        
     security.declareProtected('View management screens','createTempPersonInVirtuoso')     
     def createTempPersonInVirtuoso(self,projectID,personURI, personName,personComment):
         """add a new person to the triple store of tempory persons"""
--- a/zpt/manageMPIWGThesaurus.zpt	Wed Oct 12 16:15:57 2011 +0200
+++ b/zpt/manageMPIWGThesaurus.zpt	Wed Dec 21 22:11:16 2011 +0100
@@ -5,6 +5,8 @@
 <li><a href="listAllSuggestedTags">Zeige alle vorgeschlagenen Tags</a></li>
 <li><a href="collectPersonNamesToIds">Hole/Update die Namenseintraege aus dem Triplestore, z.B. falls im Triplestore Namem geaendert worden</a></li>
 <li><a href="getPersonsWithProjectIDs?check=bool:True">Hole all Personen mit ProjectIDs, raeumt zu gleich den Cache Persone-->ProjectID auf</a></li>
+<li><a href="rearangePersonIDsHTML">Ordne neue IDs vorhandenen Personen zu, insbesondere ersetze temporäre IDs durch GND oder andere,</a></li>
+<li><a href="getUnifyPersonsList">Ordne GND ids zu, falls bisher IDs aus anderen Kontexten vergeben wurden.</a></li>
 </ul>
 </body>
 </html>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/zpt/rearrangePersons.zpt	Wed Dec 21 22:11:16 2011 +0100
@@ -0,0 +1,21 @@
+<html>
+<body tal:define="global i python:-1">
+<h1>Unify persons</h1>
+<form method="post" action="rearangePersonIDs">
+<table>
+<tr><td>Nr.</td><td>saved ID</td><td>Name</td><td>New ID</td><td>Unify?</td>
+</tr>
+<tr tal:repeat="item python:options['changeList']" >
+<td tal:define="global i python:i+1" tal:content="python:i"/>
+<td><a tal:attributes="href python:item[0]" tal:content="python:item[0]"/></td>
+<td tal:content="python:item[1]"/>
+<td><a tal:attributes="href python:item[2]">see</a><input size="100" tal:attributes="name python:'newID_'+str(i);
+value python:item[2]"/></td>
+<td><input type="checkbox" name="changes" tal:attributes="value python:i"/>
+<td><a tal:attributes="href python:options['lookupUrl']">look up</a></td>
+</tr>
+</table>
+<input type="submit"/>
+</form>
+</body>
+</html>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/zpt/unifyPersons.zpt	Wed Dec 21 22:11:16 2011 +0100
@@ -0,0 +1,20 @@
+<html>
+<body tal:define="global i python:-1">
+<h1>Unify persons</h1>
+<form method="post" action="unifyPersons">
+<table>
+<tr><td>Nr.</td><td>ID not from GND</td><td>Name</td><td>GND id</td><td>GND Name</td><td>Unify?</td>
+</tr>
+<tr tal:repeat="item python:options['changeList']" >
+<td tal:define="global i python:i+1" tal:content="python:i"/>
+<td><a tal:attributes="href python:item[0]" tal:content="python:item[0]"/></td>
+<td tal:content="python:item[1]"/>
+<td><a tal:attributes="href python:item[2]" tal:content="python:item[2]"/></td>
+<td tal:content="python:item[3]"/>
+<td><input type="checkbox" name="changes" tal:attributes="value python:i"/>
+</tr>
+</table>
+<input type="submit"/>
+</form>
+</body>
+</html>
\ No newline at end of file