view migrateThesaurus.py @ 47:b5d89c03f958

fix unicode problem with thes_quote.
author casties
date Thu, 28 Nov 2013 17:17:23 +0100
parents efdbe9eb2403
children
line wrap: on
line source

import xml.etree.ElementTree as etree
import web

import urllib2
import logging
import urllib

virtuosoServer="http://virtuoso.mpiwg-berlin.mpg.de:8890"
virtuosoDAV="/DAV/home/websiteuser/"
virtuosoDAVUser="websiteuser"
virtuosoDAVPW="w3s45us3"
virtuosoGraph="file://newpersonsFromProjects"
   

def callSparql(cmdString):
        
        print cmdString
        auth_handler = urllib2.HTTPBasicAuthHandler()
        auth_handler.add_password(realm='sparql',
                          uri=virtuosoServer+"/sparql",
                          user=virtuosoDAVUser,
                          passwd=virtuosoDAVPW)
                          
        opener = urllib2.build_opener(auth_handler)
        opener.addheaders = [('Content-Type','application/sparql-query')]
        
        logging.debug(cmdString)
        try:
            logging.debug(virtuosoServer+"/sparql?" + urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'}))
            #r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'}))
            r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':'','named-graph-uri':'','format':'text/csv'}))
            namesTxt=r.read()
        except urllib2.URLError, e:
            logging.error(e.code)
            logging.error(e.read())
            
            
            return
        logging.debug(namesTxt)
        names=namesTxt.split("\n")
        if len(names) < 2: #in der ersten Zeile stehen bei der Rueckgabe die Spaltennamen, <2 heiss also es gibt keinen Eintrag
            return
         
        return names[1].replace('"','') # wir nehmen nur den ersten treffer
         
    


fl = file("/usr/local/testzope13/Products/MPIWGThesaurus/examples/ProjectsAndTags.xml")
dom = etree.parse(fl)

ns = {'fm':'http://www.filemaker.com/fmpdsoresult'}

tagListShort=['spaces','approaches','disciplines','periods','transfers','technologies','objects']

tagList=["{http://www.filemaker.com/fmpdsoresult}"+x for x in tagListShort]

db =web.database(dbn="postgres", user="dwinter", pw="weikiki7",db="personalwww", host="localhost")



for row in dom.findall(".//fm:ROW",ns):
    tags={}
    chds = row.getchildren();
    for ch in chds:
        print ch.tag
        if ch.tag in tagList:
            
            
            if ch.text is not None:
                tags[ch.tag] = ch.text.split(";")
            else:
                tags[ch.tag] = []
        if ch.tag == '{http://www.filemaker.com/fmpdsoresult}projectId':
            projectID=ch.text
            
            

    for tagTypeLong in tags.keys():
       
    
       for tagName in tags[tagTypeLong]:
           #suche nach tag order lege an
           
           tagType=tagTypeLong.replace('{http://www.filemaker.com/fmpdsoresult}','')
           if tagType == "objects":
               tagType ="object"
           
           res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
           if len(res)==0:
               db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName);
               res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
       
       
           #trage jetzt das projekt ein
           tagID=res[0].id
           qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti"
               
           tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID})
               
           if len (tag_ids) ==  0:
                           
                   qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)"
               
                   db.query(qs,vars={'pi':projectID,'ti':tagID})
               
                   print qs
           
    
               
#personen personen getaggte objekte
# file von http://www.mpiwg-berlin.mpg.de:28080/www/en/research/thesaurus/getPersonsWithProjectIDsJSON
import json

fl = file('/usr/local/testzope13/Products/MPIWGThesaurus/examples/getPersonsWithProjectIDsJSON')
personsProjects = json.load(fl)

tagType="person"

for tagName in personsProjects.keys():
    res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
    if len(res)==0:
        db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName);
        res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
       
           
    #trage jetzt das projekt ein
    tagID=res[0].id
    
    for proj in personsProjects.get(tagName):
        projectID=proj[0]
    
        qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti"
           
        tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID})
           
        if len (tag_ids) ==  0:
                       
               qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)"
           
               db.query(qs,vars={'pi':projectID,'ti':tagID})
           
               print qs

#finally ad labels:

for tagName in personsProjects.keys():
    res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})

    personID=tagName
    
    cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/name> ?name}"""%personID
    
    names= callSparql(cmdString)
        
    cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/lastName> ?name}"""%personID
    
    lastName= callSparql(cmdString)
        
    cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/firstName> ?name}"""%personID
    
    firstName= callSparql(cmdString)
        
    if names != '':
        qs = "update thesaurus_tags set tag_label=$tl where id=$ti"
        print names
        db.query(qs,vars={'tl':names.decode('latin-1'), 'ti':res[0].id})