Mercurial > hg > MPIWGThesaurus
diff migrateThesaurus.py @ 45:efdbe9eb2403
thesaurus jetzt in datenbank
author | dwinter |
---|---|
date | Mon, 25 Nov 2013 08:12:11 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/migrateThesaurus.py Mon Nov 25 08:12:11 2013 +0100 @@ -0,0 +1,178 @@ +import xml.etree.ElementTree as etree +import web + +import urllib2 +import logging +import urllib + +virtuosoServer="http://virtuoso.mpiwg-berlin.mpg.de:8890" +virtuosoDAV="/DAV/home/websiteuser/" +virtuosoDAVUser="websiteuser" +virtuosoDAVPW="w3s45us3" +virtuosoGraph="file://newpersonsFromProjects" + + +def callSparql(cmdString): + + print cmdString + auth_handler = urllib2.HTTPBasicAuthHandler() + auth_handler.add_password(realm='sparql', + uri=virtuosoServer+"/sparql", + user=virtuosoDAVUser, + passwd=virtuosoDAVPW) + + opener = urllib2.build_opener(auth_handler) + opener.addheaders = [('Content-Type','application/sparql-query')] + + logging.debug(cmdString) + try: + logging.debug(virtuosoServer+"/sparql?" + urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'})) + #r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'})) + r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':'','named-graph-uri':'','format':'text/csv'})) + namesTxt=r.read() + except urllib2.URLError, e: + logging.error(e.code) + logging.error(e.read()) + + + return + logging.debug(namesTxt) + names=namesTxt.split("\n") + if len(names) < 2: #in der ersten Zeile stehen bei der Rueckgabe die Spaltennamen, <2 heiss also es gibt keinen Eintrag + return + + return names[1].replace('"','') # wir nehmen nur den ersten treffer + + + + +fl = file("/usr/local/testzope13/Products/MPIWGThesaurus/examples/ProjectsAndTags.xml") +dom = etree.parse(fl) + +ns = {'fm':'http://www.filemaker.com/fmpdsoresult'} + +tagListShort=['spaces','approaches','disciplines','periods','transfers','technologies','objects'] + +tagList=["{http://www.filemaker.com/fmpdsoresult}"+x for x in tagListShort] + +db =web.database(dbn="postgres", user="dwinter", pw="weikiki7",db="personalwww", host="localhost") + + + +for row in dom.findall(".//fm:ROW",ns): + tags={} + chds = row.getchildren(); + for ch in chds: + print ch.tag + if ch.tag in tagList: + + + if ch.text is not None: + tags[ch.tag] = ch.text.split(";") + else: + tags[ch.tag] = [] + if ch.tag == '{http://www.filemaker.com/fmpdsoresult}projectId': + projectID=ch.text + + + + for tagTypeLong in tags.keys(): + + + for tagName in tags[tagTypeLong]: + #suche nach tag order lege an + + tagType=tagTypeLong.replace('{http://www.filemaker.com/fmpdsoresult}','') + if tagType == "objects": + tagType ="object" + + res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) + if len(res)==0: + db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName); + res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) + + + #trage jetzt das projekt ein + tagID=res[0].id + qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti" + + tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID}) + + if len (tag_ids) == 0: + + qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)" + + db.query(qs,vars={'pi':projectID,'ti':tagID}) + + print qs + + + +#personen personen getaggte objekte +# file von http://www.mpiwg-berlin.mpg.de:28080/www/en/research/thesaurus/getPersonsWithProjectIDsJSON +import json + +fl = file('/usr/local/testzope13/Products/MPIWGThesaurus/examples/getPersonsWithProjectIDsJSON') +personsProjects = json.load(fl) + +tagType="person" + +for tagName in personsProjects.keys(): + res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) + if len(res)==0: + db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName); + res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) + + + #trage jetzt das projekt ein + tagID=res[0].id + + for proj in personsProjects.get(tagName): + projectID=proj[0] + + qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti" + + tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID}) + + if len (tag_ids) == 0: + + qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)" + + db.query(qs,vars={'pi':projectID,'ti':tagID}) + + print qs + +#finally ad labels: + +for tagName in personsProjects.keys(): + res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) + + personID=tagName + + cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/name> ?name}"""%personID + + names= callSparql(cmdString) + + cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/lastName> ?name}"""%personID + + lastName= callSparql(cmdString) + + cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/firstName> ?name}"""%personID + + firstName= callSparql(cmdString) + + if names != '': + qs = "update thesaurus_tags set tag_label=$tl where id=$ti" + print names + db.query(qs,vars={'tl':names.decode('latin-1'), 'ti':res[0].id}) + + + + + + + + + + + \ No newline at end of file