Mercurial > hg > MPIWGThesaurus
comparison migrateThesaurus.py @ 45:efdbe9eb2403
thesaurus jetzt in datenbank
author | dwinter |
---|---|
date | Mon, 25 Nov 2013 08:12:11 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
44:a30307377ed9 | 45:efdbe9eb2403 |
---|---|
1 import xml.etree.ElementTree as etree | |
2 import web | |
3 | |
4 import urllib2 | |
5 import logging | |
6 import urllib | |
7 | |
8 virtuosoServer="http://virtuoso.mpiwg-berlin.mpg.de:8890" | |
9 virtuosoDAV="/DAV/home/websiteuser/" | |
10 virtuosoDAVUser="websiteuser" | |
11 virtuosoDAVPW="w3s45us3" | |
12 virtuosoGraph="file://newpersonsFromProjects" | |
13 | |
14 | |
15 def callSparql(cmdString): | |
16 | |
17 print cmdString | |
18 auth_handler = urllib2.HTTPBasicAuthHandler() | |
19 auth_handler.add_password(realm='sparql', | |
20 uri=virtuosoServer+"/sparql", | |
21 user=virtuosoDAVUser, | |
22 passwd=virtuosoDAVPW) | |
23 | |
24 opener = urllib2.build_opener(auth_handler) | |
25 opener.addheaders = [('Content-Type','application/sparql-query')] | |
26 | |
27 logging.debug(cmdString) | |
28 try: | |
29 logging.debug(virtuosoServer+"/sparql?" + urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'})) | |
30 #r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'})) | |
31 r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':'','named-graph-uri':'','format':'text/csv'})) | |
32 namesTxt=r.read() | |
33 except urllib2.URLError, e: | |
34 logging.error(e.code) | |
35 logging.error(e.read()) | |
36 | |
37 | |
38 return | |
39 logging.debug(namesTxt) | |
40 names=namesTxt.split("\n") | |
41 if len(names) < 2: #in der ersten Zeile stehen bei der Rueckgabe die Spaltennamen, <2 heiss also es gibt keinen Eintrag | |
42 return | |
43 | |
44 return names[1].replace('"','') # wir nehmen nur den ersten treffer | |
45 | |
46 | |
47 | |
48 | |
49 fl = file("/usr/local/testzope13/Products/MPIWGThesaurus/examples/ProjectsAndTags.xml") | |
50 dom = etree.parse(fl) | |
51 | |
52 ns = {'fm':'http://www.filemaker.com/fmpdsoresult'} | |
53 | |
54 tagListShort=['spaces','approaches','disciplines','periods','transfers','technologies','objects'] | |
55 | |
56 tagList=["{http://www.filemaker.com/fmpdsoresult}"+x for x in tagListShort] | |
57 | |
58 db =web.database(dbn="postgres", user="dwinter", pw="weikiki7",db="personalwww", host="localhost") | |
59 | |
60 | |
61 | |
62 for row in dom.findall(".//fm:ROW",ns): | |
63 tags={} | |
64 chds = row.getchildren(); | |
65 for ch in chds: | |
66 print ch.tag | |
67 if ch.tag in tagList: | |
68 | |
69 | |
70 if ch.text is not None: | |
71 tags[ch.tag] = ch.text.split(";") | |
72 else: | |
73 tags[ch.tag] = [] | |
74 if ch.tag == '{http://www.filemaker.com/fmpdsoresult}projectId': | |
75 projectID=ch.text | |
76 | |
77 | |
78 | |
79 for tagTypeLong in tags.keys(): | |
80 | |
81 | |
82 for tagName in tags[tagTypeLong]: | |
83 #suche nach tag order lege an | |
84 | |
85 tagType=tagTypeLong.replace('{http://www.filemaker.com/fmpdsoresult}','') | |
86 if tagType == "objects": | |
87 tagType ="object" | |
88 | |
89 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) | |
90 if len(res)==0: | |
91 db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName); | |
92 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) | |
93 | |
94 | |
95 #trage jetzt das projekt ein | |
96 tagID=res[0].id | |
97 qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti" | |
98 | |
99 tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID}) | |
100 | |
101 if len (tag_ids) == 0: | |
102 | |
103 qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)" | |
104 | |
105 db.query(qs,vars={'pi':projectID,'ti':tagID}) | |
106 | |
107 print qs | |
108 | |
109 | |
110 | |
111 #personen personen getaggte objekte | |
112 # file von http://www.mpiwg-berlin.mpg.de:28080/www/en/research/thesaurus/getPersonsWithProjectIDsJSON | |
113 import json | |
114 | |
115 fl = file('/usr/local/testzope13/Products/MPIWGThesaurus/examples/getPersonsWithProjectIDsJSON') | |
116 personsProjects = json.load(fl) | |
117 | |
118 tagType="person" | |
119 | |
120 for tagName in personsProjects.keys(): | |
121 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) | |
122 if len(res)==0: | |
123 db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName); | |
124 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) | |
125 | |
126 | |
127 #trage jetzt das projekt ein | |
128 tagID=res[0].id | |
129 | |
130 for proj in personsProjects.get(tagName): | |
131 projectID=proj[0] | |
132 | |
133 qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti" | |
134 | |
135 tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID}) | |
136 | |
137 if len (tag_ids) == 0: | |
138 | |
139 qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)" | |
140 | |
141 db.query(qs,vars={'pi':projectID,'ti':tagID}) | |
142 | |
143 print qs | |
144 | |
145 #finally ad labels: | |
146 | |
147 for tagName in personsProjects.keys(): | |
148 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName}) | |
149 | |
150 personID=tagName | |
151 | |
152 cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/name> ?name}"""%personID | |
153 | |
154 names= callSparql(cmdString) | |
155 | |
156 cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/lastName> ?name}"""%personID | |
157 | |
158 lastName= callSparql(cmdString) | |
159 | |
160 cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/firstName> ?name}"""%personID | |
161 | |
162 firstName= callSparql(cmdString) | |
163 | |
164 if names != '': | |
165 qs = "update thesaurus_tags set tag_label=$tl where id=$ti" | |
166 print names | |
167 db.query(qs,vars={'tl':names.decode('latin-1'), 'ti':res[0].id}) | |
168 | |
169 | |
170 | |
171 | |
172 | |
173 | |
174 | |
175 | |
176 | |
177 | |
178 |