comparison migrateThesaurus.py @ 45:efdbe9eb2403

thesaurus jetzt in datenbank
author dwinter
date Mon, 25 Nov 2013 08:12:11 +0100
parents
children
comparison
equal deleted inserted replaced
44:a30307377ed9 45:efdbe9eb2403
1 import xml.etree.ElementTree as etree
2 import web
3
4 import urllib2
5 import logging
6 import urllib
7
8 virtuosoServer="http://virtuoso.mpiwg-berlin.mpg.de:8890"
9 virtuosoDAV="/DAV/home/websiteuser/"
10 virtuosoDAVUser="websiteuser"
11 virtuosoDAVPW="w3s45us3"
12 virtuosoGraph="file://newpersonsFromProjects"
13
14
15 def callSparql(cmdString):
16
17 print cmdString
18 auth_handler = urllib2.HTTPBasicAuthHandler()
19 auth_handler.add_password(realm='sparql',
20 uri=virtuosoServer+"/sparql",
21 user=virtuosoDAVUser,
22 passwd=virtuosoDAVPW)
23
24 opener = urllib2.build_opener(auth_handler)
25 opener.addheaders = [('Content-Type','application/sparql-query')]
26
27 logging.debug(cmdString)
28 try:
29 logging.debug(virtuosoServer+"/sparql?" + urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'}))
30 #r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'}))
31 r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':'','named-graph-uri':'','format':'text/csv'}))
32 namesTxt=r.read()
33 except urllib2.URLError, e:
34 logging.error(e.code)
35 logging.error(e.read())
36
37
38 return
39 logging.debug(namesTxt)
40 names=namesTxt.split("\n")
41 if len(names) < 2: #in der ersten Zeile stehen bei der Rueckgabe die Spaltennamen, <2 heiss also es gibt keinen Eintrag
42 return
43
44 return names[1].replace('"','') # wir nehmen nur den ersten treffer
45
46
47
48
49 fl = file("/usr/local/testzope13/Products/MPIWGThesaurus/examples/ProjectsAndTags.xml")
50 dom = etree.parse(fl)
51
52 ns = {'fm':'http://www.filemaker.com/fmpdsoresult'}
53
54 tagListShort=['spaces','approaches','disciplines','periods','transfers','technologies','objects']
55
56 tagList=["{http://www.filemaker.com/fmpdsoresult}"+x for x in tagListShort]
57
58 db =web.database(dbn="postgres", user="dwinter", pw="weikiki7",db="personalwww", host="localhost")
59
60
61
62 for row in dom.findall(".//fm:ROW",ns):
63 tags={}
64 chds = row.getchildren();
65 for ch in chds:
66 print ch.tag
67 if ch.tag in tagList:
68
69
70 if ch.text is not None:
71 tags[ch.tag] = ch.text.split(";")
72 else:
73 tags[ch.tag] = []
74 if ch.tag == '{http://www.filemaker.com/fmpdsoresult}projectId':
75 projectID=ch.text
76
77
78
79 for tagTypeLong in tags.keys():
80
81
82 for tagName in tags[tagTypeLong]:
83 #suche nach tag order lege an
84
85 tagType=tagTypeLong.replace('{http://www.filemaker.com/fmpdsoresult}','')
86 if tagType == "objects":
87 tagType ="object"
88
89 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
90 if len(res)==0:
91 db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName);
92 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
93
94
95 #trage jetzt das projekt ein
96 tagID=res[0].id
97 qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti"
98
99 tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID})
100
101 if len (tag_ids) == 0:
102
103 qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)"
104
105 db.query(qs,vars={'pi':projectID,'ti':tagID})
106
107 print qs
108
109
110
111 #personen personen getaggte objekte
112 # file von http://www.mpiwg-berlin.mpg.de:28080/www/en/research/thesaurus/getPersonsWithProjectIDsJSON
113 import json
114
115 fl = file('/usr/local/testzope13/Products/MPIWGThesaurus/examples/getPersonsWithProjectIDsJSON')
116 personsProjects = json.load(fl)
117
118 tagType="person"
119
120 for tagName in personsProjects.keys():
121 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
122 if len(res)==0:
123 db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName);
124 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
125
126
127 #trage jetzt das projekt ein
128 tagID=res[0].id
129
130 for proj in personsProjects.get(tagName):
131 projectID=proj[0]
132
133 qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti"
134
135 tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID})
136
137 if len (tag_ids) == 0:
138
139 qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)"
140
141 db.query(qs,vars={'pi':projectID,'ti':tagID})
142
143 print qs
144
145 #finally ad labels:
146
147 for tagName in personsProjects.keys():
148 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
149
150 personID=tagName
151
152 cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/name> ?name}"""%personID
153
154 names= callSparql(cmdString)
155
156 cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/lastName> ?name}"""%personID
157
158 lastName= callSparql(cmdString)
159
160 cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/firstName> ?name}"""%personID
161
162 firstName= callSparql(cmdString)
163
164 if names != '':
165 qs = "update thesaurus_tags set tag_label=$tl where id=$ti"
166 print names
167 db.query(qs,vars={'tl':names.decode('latin-1'), 'ti':res[0].id})
168
169
170
171
172
173
174
175
176
177
178