diff migrateThesaurus.py @ 45:efdbe9eb2403

thesaurus jetzt in datenbank
author dwinter
date Mon, 25 Nov 2013 08:12:11 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/migrateThesaurus.py	Mon Nov 25 08:12:11 2013 +0100
@@ -0,0 +1,178 @@
+import xml.etree.ElementTree as etree
+import web
+
+import urllib2
+import logging
+import urllib
+
+virtuosoServer="http://virtuoso.mpiwg-berlin.mpg.de:8890"
+virtuosoDAV="/DAV/home/websiteuser/"
+virtuosoDAVUser="websiteuser"
+virtuosoDAVPW="w3s45us3"
+virtuosoGraph="file://newpersonsFromProjects"
+   
+
+def callSparql(cmdString):
+        
+        print cmdString
+        auth_handler = urllib2.HTTPBasicAuthHandler()
+        auth_handler.add_password(realm='sparql',
+                          uri=virtuosoServer+"/sparql",
+                          user=virtuosoDAVUser,
+                          passwd=virtuosoDAVPW)
+                          
+        opener = urllib2.build_opener(auth_handler)
+        opener.addheaders = [('Content-Type','application/sparql-query')]
+        
+        logging.debug(cmdString)
+        try:
+            logging.debug(virtuosoServer+"/sparql?" + urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'}))
+            #r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'}))
+            r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':'','named-graph-uri':'','format':'text/csv'}))
+            namesTxt=r.read()
+        except urllib2.URLError, e:
+            logging.error(e.code)
+            logging.error(e.read())
+            
+            
+            return
+        logging.debug(namesTxt)
+        names=namesTxt.split("\n")
+        if len(names) < 2: #in der ersten Zeile stehen bei der Rueckgabe die Spaltennamen, <2 heiss also es gibt keinen Eintrag
+            return
+         
+        return names[1].replace('"','') # wir nehmen nur den ersten treffer
+         
+    
+
+
+fl = file("/usr/local/testzope13/Products/MPIWGThesaurus/examples/ProjectsAndTags.xml")
+dom = etree.parse(fl)
+
+ns = {'fm':'http://www.filemaker.com/fmpdsoresult'}
+
+tagListShort=['spaces','approaches','disciplines','periods','transfers','technologies','objects']
+
+tagList=["{http://www.filemaker.com/fmpdsoresult}"+x for x in tagListShort]
+
+db =web.database(dbn="postgres", user="dwinter", pw="weikiki7",db="personalwww", host="localhost")
+
+
+
+for row in dom.findall(".//fm:ROW",ns):
+    tags={}
+    chds = row.getchildren();
+    for ch in chds:
+        print ch.tag
+        if ch.tag in tagList:
+            
+            
+            if ch.text is not None:
+                tags[ch.tag] = ch.text.split(";")
+            else:
+                tags[ch.tag] = []
+        if ch.tag == '{http://www.filemaker.com/fmpdsoresult}projectId':
+            projectID=ch.text
+            
+            
+
+    for tagTypeLong in tags.keys():
+       
+    
+       for tagName in tags[tagTypeLong]:
+           #suche nach tag order lege an
+           
+           tagType=tagTypeLong.replace('{http://www.filemaker.com/fmpdsoresult}','')
+           if tagType == "objects":
+               tagType ="object"
+           
+           res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
+           if len(res)==0:
+               db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName);
+               res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
+       
+       
+           #trage jetzt das projekt ein
+           tagID=res[0].id
+           qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti"
+               
+           tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID})
+               
+           if len (tag_ids) ==  0:
+                           
+                   qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)"
+               
+                   db.query(qs,vars={'pi':projectID,'ti':tagID})
+               
+                   print qs
+           
+    
+               
+#personen personen getaggte objekte
+# file von http://www.mpiwg-berlin.mpg.de:28080/www/en/research/thesaurus/getPersonsWithProjectIDsJSON
+import json
+
+fl = file('/usr/local/testzope13/Products/MPIWGThesaurus/examples/getPersonsWithProjectIDsJSON')
+personsProjects = json.load(fl)
+
+tagType="person"
+
+for tagName in personsProjects.keys():
+    res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
+    if len(res)==0:
+        db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName);
+        res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
+       
+           
+    #trage jetzt das projekt ein
+    tagID=res[0].id
+    
+    for proj in personsProjects.get(tagName):
+        projectID=proj[0]
+    
+        qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti"
+           
+        tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID})
+           
+        if len (tag_ids) ==  0:
+                       
+               qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)"
+           
+               db.query(qs,vars={'pi':projectID,'ti':tagID})
+           
+               print qs
+
+#finally ad labels:
+
+for tagName in personsProjects.keys():
+    res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
+
+    personID=tagName
+    
+    cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/name> ?name}"""%personID
+    
+    names= callSparql(cmdString)
+        
+    cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/lastName> ?name}"""%personID
+    
+    lastName= callSparql(cmdString)
+        
+    cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/firstName> ?name}"""%personID
+    
+    firstName= callSparql(cmdString)
+        
+    if names != '':
+        qs = "update thesaurus_tags set tag_label=$tl where id=$ti"
+        print names
+        db.query(qs,vars={'tl':names.decode('latin-1'), 'ti':res[0].id})
+        
+     
+        
+    
+    
+
+
+       
+
+       
+    
\ No newline at end of file