Mercurial > hg > MPIWGThesaurus
view migrateThesaurus.py @ 47:b5d89c03f958
fix unicode problem with thes_quote.
author | casties |
---|---|
date | Thu, 28 Nov 2013 17:17:23 +0100 |
parents | efdbe9eb2403 |
children |
line wrap: on
line source
import xml.etree.ElementTree as etree import web import urllib2 import logging import urllib virtuosoServer="http://virtuoso.mpiwg-berlin.mpg.de:8890" virtuosoDAV="/DAV/home/websiteuser/" virtuosoDAVUser="websiteuser" virtuosoDAVPW="w3s45us3" virtuosoGraph="file://newpersonsFromProjects" def callSparql(cmdString): print cmdString auth_handler = urllib2.HTTPBasicAuthHandler() auth_handler.add_password(realm='sparql', uri=virtuosoServer+"/sparql", user=virtuosoDAVUser, passwd=virtuosoDAVPW) opener = urllib2.build_opener(auth_handler) opener.addheaders = [('Content-Type','application/sparql-query')] logging.debug(cmdString) try: logging.debug(virtuosoServer+"/sparql?" + urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'})) #r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'})) r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':'','named-graph-uri':'','format':'text/csv'})) namesTxt=r.read() except urllib2.URLError, e: logging.error(e.code) logging.error(e.read()) return logging.debug(namesTxt) names=namesTxt.split("\n") if len(names) < 2: #in der ersten Zeile stehen bei der Rueckgabe die Spaltennamen, <2 heiss also es gibt keinen Eintrag return return names[1].replace('"','') # wir nehmen nur den ersten treffer fl = file("/usr/local/testzope13/Products/MPIWGThesaurus/examples/ProjectsAndTags.xml") dom = etree.parse(fl) ns = {'fm':'http://www.filemaker.com/fmpdsoresult'} tagListShort=['spaces','approaches','disciplines','periods','transfers','technologies','objects'] tagList=["{http://www.filemaker.com/fmpdsoresult}"+x for x in tagListShort] db =web.database(dbn="postgres", user="dwinter", pw="weikiki7",db="personalwww", host="localhost") for row in dom.findall(".//fm:ROW",ns): tags={} chds = row.getchildren(); for ch in chds: print ch.tag if ch.tag in tagList: if ch.text is not None: tags[ch.tag] = ch.text.split(";") else: tags[ch.tag] = [] if ch.tag == '{http://www.filemaker.com/fmpdsoresult}projectId': projectID=ch.text for tagTypeLong in tags.keys(): for tagName in tags[tagTypeLong]: #suche nach tag order lege an tagType=tagTypeLong.replace('{http://www.filemaker.com/fmpdsoresult}','') if tagType == "objects": tagType ="object" res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) if len(res)==0: db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName); res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) #trage jetzt das projekt ein tagID=res[0].id qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti" tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID}) if len (tag_ids) == 0: qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)" db.query(qs,vars={'pi':projectID,'ti':tagID}) print qs #personen personen getaggte objekte # file von http://www.mpiwg-berlin.mpg.de:28080/www/en/research/thesaurus/getPersonsWithProjectIDsJSON import json fl = file('/usr/local/testzope13/Products/MPIWGThesaurus/examples/getPersonsWithProjectIDsJSON') personsProjects = json.load(fl) tagType="person" for tagName in personsProjects.keys(): res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) if len(res)==0: db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName); res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) #trage jetzt das projekt ein tagID=res[0].id for proj in personsProjects.get(tagName): projectID=proj[0] qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti" tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID}) if len (tag_ids) == 0: qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)" db.query(qs,vars={'pi':projectID,'ti':tagID}) print qs #finally ad labels: for tagName in personsProjects.keys(): res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) personID=tagName cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/name> ?name}"""%personID names= callSparql(cmdString) cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/lastName> ?name}"""%personID lastName= callSparql(cmdString) cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/firstName> ?name}"""%personID firstName= callSparql(cmdString) if names != '': qs = "update thesaurus_tags set tag_label=$tl where id=$ti" print names db.query(qs,vars={'tl':names.decode('latin-1'), 'ti':res[0].id})