annotate migrateThesaurus.py @ 50:8bfd713e5d99

closed: small bug in export (https://otrs.mpiwg-berlin.mpg.de/otrs/index.pl?Action=AgentTicketZoom;TicketID=38)
author dwinter
date Thu, 05 Dec 2013 13:24:43 +0100
parents efdbe9eb2403
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
45
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
1 import xml.etree.ElementTree as etree
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
2 import web
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
3
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
4 import urllib2
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
5 import logging
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
6 import urllib
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
7
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
8 virtuosoServer="http://virtuoso.mpiwg-berlin.mpg.de:8890"
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
9 virtuosoDAV="/DAV/home/websiteuser/"
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
10 virtuosoDAVUser="websiteuser"
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
11 virtuosoDAVPW="w3s45us3"
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
12 virtuosoGraph="file://newpersonsFromProjects"
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
13
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
14
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
15 def callSparql(cmdString):
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
16
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
17 print cmdString
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
18 auth_handler = urllib2.HTTPBasicAuthHandler()
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
19 auth_handler.add_password(realm='sparql',
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
20 uri=virtuosoServer+"/sparql",
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
21 user=virtuosoDAVUser,
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
22 passwd=virtuosoDAVPW)
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
23
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
24 opener = urllib2.build_opener(auth_handler)
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
25 opener.addheaders = [('Content-Type','application/sparql-query')]
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
26
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
27 logging.debug(cmdString)
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
28 try:
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
29 logging.debug(virtuosoServer+"/sparql?" + urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'}))
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
30 #r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':virtuosoGraph,'named-graph-uri':'','format':'text/csv'}))
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
31 r= opener.open(virtuosoServer+"/sparql", urllib.urlencode({'query':cmdString,'default-graph-uri':'','named-graph-uri':'','format':'text/csv'}))
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
32 namesTxt=r.read()
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
33 except urllib2.URLError, e:
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
34 logging.error(e.code)
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
35 logging.error(e.read())
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
36
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
37
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
38 return
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
39 logging.debug(namesTxt)
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
40 names=namesTxt.split("\n")
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
41 if len(names) < 2: #in der ersten Zeile stehen bei der Rueckgabe die Spaltennamen, <2 heiss also es gibt keinen Eintrag
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
42 return
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
43
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
44 return names[1].replace('"','') # wir nehmen nur den ersten treffer
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
45
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
46
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
47
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
48
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
49 fl = file("/usr/local/testzope13/Products/MPIWGThesaurus/examples/ProjectsAndTags.xml")
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
50 dom = etree.parse(fl)
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
51
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
52 ns = {'fm':'http://www.filemaker.com/fmpdsoresult'}
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
53
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
54 tagListShort=['spaces','approaches','disciplines','periods','transfers','technologies','objects']
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
55
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
56 tagList=["{http://www.filemaker.com/fmpdsoresult}"+x for x in tagListShort]
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
57
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
58 db =web.database(dbn="postgres", user="dwinter", pw="weikiki7",db="personalwww", host="localhost")
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
59
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
60
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
61
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
62 for row in dom.findall(".//fm:ROW",ns):
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
63 tags={}
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
64 chds = row.getchildren();
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
65 for ch in chds:
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
66 print ch.tag
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
67 if ch.tag in tagList:
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
68
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
69
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
70 if ch.text is not None:
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
71 tags[ch.tag] = ch.text.split(";")
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
72 else:
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
73 tags[ch.tag] = []
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
74 if ch.tag == '{http://www.filemaker.com/fmpdsoresult}projectId':
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
75 projectID=ch.text
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
76
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
77
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
78
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
79 for tagTypeLong in tags.keys():
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
80
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
81
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
82 for tagName in tags[tagTypeLong]:
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
83 #suche nach tag order lege an
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
84
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
85 tagType=tagTypeLong.replace('{http://www.filemaker.com/fmpdsoresult}','')
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
86 if tagType == "objects":
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
87 tagType ="object"
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
88
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
89 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
90 if len(res)==0:
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
91 db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName);
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
92 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
93
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
94
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
95 #trage jetzt das projekt ein
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
96 tagID=res[0].id
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
97 qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti"
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
98
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
99 tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID})
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
100
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
101 if len (tag_ids) == 0:
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
102
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
103 qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)"
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
104
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
105 db.query(qs,vars={'pi':projectID,'ti':tagID})
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
106
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
107 print qs
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
108
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
109
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
110
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
111 #personen personen getaggte objekte
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
112 # file von http://www.mpiwg-berlin.mpg.de:28080/www/en/research/thesaurus/getPersonsWithProjectIDsJSON
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
113 import json
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
114
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
115 fl = file('/usr/local/testzope13/Products/MPIWGThesaurus/examples/getPersonsWithProjectIDsJSON')
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
116 personsProjects = json.load(fl)
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
117
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
118 tagType="person"
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
119
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
120 for tagName in personsProjects.keys():
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
121 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
122 if len(res)==0:
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
123 db.insert('thesaurus_tags',tag_type=tagType,tag_name=tagName);
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
124 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
125
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
126
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
127 #trage jetzt das projekt ein
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
128 tagID=res[0].id
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
129
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
130 for proj in personsProjects.get(tagName):
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
131 projectID=proj[0]
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
132
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
133 qsSelect = "select id from thesaurus_projects_tags where project_id = $pi and tag_id= $ti"
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
134
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
135 tag_ids = db.query(qsSelect,vars={'pi':projectID,'ti':tagID})
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
136
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
137 if len (tag_ids) == 0:
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
138
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
139 qs="insert into thesaurus_projects_tags (project_id,tag_id) values ($pi,$ti)"
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
140
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
141 db.query(qs,vars={'pi':projectID,'ti':tagID})
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
142
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
143 print qs
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
144
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
145 #finally ad labels:
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
146
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
147 for tagName in personsProjects.keys():
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
148 res = db.query("select id from thesaurus_tags where tag_type = $tt and tag_name= $tn",vars={'tt':tagType,'tn':tagName})
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
149
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
150 personID=tagName
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
151
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
152 cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/name> ?name}"""%personID
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
153
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
154 names= callSparql(cmdString)
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
155
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
156 cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/lastName> ?name}"""%personID
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
157
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
158 lastName= callSparql(cmdString)
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
159
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
160 cmdString ="""select * where { <%s> <http://xmlns.com/foaf/0.1/firstName> ?name}"""%personID
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
161
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
162 firstName= callSparql(cmdString)
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
163
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
164 if names != '':
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
165 qs = "update thesaurus_tags set tag_label=$tl where id=$ti"
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
166 print names
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
167 db.query(qs,vars={'tl':names.decode('latin-1'), 'ti':res[0].id})
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
168
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
169
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
170
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
171
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
172
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
173
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
174
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
175
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
176
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
177
efdbe9eb2403 thesaurus jetzt in datenbank
dwinter
parents:
diff changeset
178