comparison scripts/migration/scrub_duplicate_emails.sql @ 10:a50cf11e5178

Rewrite LGDataverse completely upgrading to dataverse4.0
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Tue, 08 Sep 2015 17:00:21 +0200
parents
children
comparison
equal deleted inserted replaced
9:5926d6419569 10:a50cf11e5178
1 --------------------
2 --REFERENCE QUERIES
3 --------------------
4 /*
5 -- Query to list all user acocunts with duplicate e-mails
6 select id, username, email from vdcuser
7 where email in (
8 select email from vdcuser
9 group by email
10 having count(*) > 1
11 )
12 order by email
13
14 -- Query to list all e-mails that have are duplicated (total = # of actual users, without duplicates)
15 select email, count(*) from vdcuser
16 group by email
17 having count(*) > 1
18 order by count(*) desc
19
20 -- Query to list all e-mails that have are duplicated and reference to original account (account with lowest id)
21 select u1.id, u1.username, u1.active,u1.email, u2.id, u2.username, u2.active
22 from vdcuser u1, vdcuser u2
23 where 1=1
24 and u1.id != u2.id
25 and u1.email = u2.email
26 and u1.email in (
27 select email from vdcuser
28 group by email
29 having count(*) > 1
30 )
31 and u2.id in (
32 select min(id) from vdcuser
33 group by email
34 having count(*) > 1
35 )
36 order by u1.email
37
38 -- Delete query, to be run after all the updates
39 delete from vdcuser where id in (
40 select u1.id
41 from vdcuser u1, vdcuser u2
42 where 1=1
43 and u1.id != u2.id
44 and u1.email = u2.email
45 and u1.email in (
46 select email from vdcuser
47 group by email
48 having count(*) > 1
49 )
50 and u2.id in (
51 select min(id) from vdcuser
52 group by email
53 having count(*) > 1
54 )
55 )
56
57 */
58 --------------------
59 --UPDATE QUERIES
60 --------------------
61 -- these queries will update the foreign key references in all (relevant) tables to the orignal account
62 --
63 -- Generated by
64 /*
65 SELECT tc.table_schema, tc.constraint_name, tc.table_name, kcu.column_name, ccu.table_name AS foreign_table_name,ccu.column_name AS foreign_column_name,
66 'update ' || tc.table_name || ' ref set ' || kcu.column_name || ' = u2.id
67 from vdcuser u1, vdcuser u2
68 where ref.' || kcu.column_name || ' = u1.id
69 and u1.id != u2.id
70 and u1.email = u2.email
71 and u1.email in (
72 select email from vdcuser
73 group by email
74 having count(*) > 1
75 )
76 and u2.id in (
77 select min(id) from vdcuser
78 group by email
79 having count(*) > 1
80 );' as query
81 FROM information_schema.table_constraints tc
82 JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name
83 JOIN information_schema.constraint_column_usage ccu ON ccu.constraint_name = tc.constraint_name
84 WHERE constraint_type = 'FOREIGN KEY' AND ccu.table_name='vdcuser';
85 */
86 --
87 --
88 -- if any of the below fail because of duplicate constraints, you will need to first delete the duplicates
89 -- here is a sample query for deleting the duplicate entries from studyfile_vdcuser (the most likey to fail))
90 /*
91 delete from studyfile_vdcuser
92 where allowedusers_id || '_' || studyfiles_id in (
93 select u1.id || '_' || fu1.studyfiles_id
94 from vdcuser u1, vdcuser u2, studyfile_vdcuser fu1, studyfile_vdcuser fu2
95 where 1=1
96 and fu1.studyfiles_id = fu2.studyfiles_id
97 and fu1.allowedusers_id = u1.id
98 and fu2.allowedusers_id = u2.id
99 and u1.id != u2.id
100 and u1.email = u2.email
101 and u1.email in (
102 select email from vdcuser
103 group by email
104 having count(*) > 1
105 )
106 and u2.id in (
107 select min(id) from vdcuser -- also may need to run with max(id) or some other combinations!
108 group by email
109 having count(*) > 1
110 )
111 )
112 */
113
114
115 update flagged_study_comments ref set user_id = u2.id
116 from vdcuser u1, vdcuser u2
117 where ref.user_id = u1.id
118 and u1.id != u2.id
119 and u1.email = u2.email
120 and u1.email in (
121 select email from vdcuser
122 group by email
123 having count(*) > 1
124 )
125 and u2.id in (
126 select min(id) from vdcuser
127 group by email
128 having count(*) > 1
129 );
130
131 update guestbookresponse ref set vdcuser_id = u2.id
132 from vdcuser u1, vdcuser u2
133 where ref.vdcuser_id = u1.id
134 and u1.id != u2.id
135 and u1.email = u2.email
136 and u1.email in (
137 select email from vdcuser
138 group by email
139 having count(*) > 1
140 )
141 and u2.id in (
142 select min(id) from vdcuser
143 group by email
144 having count(*) > 1
145 );
146
147 update harvestingdataverse_vdcuser ref set allowedfileusers_id = u2.id
148 from vdcuser u1, vdcuser u2
149 where ref.allowedfileusers_id = u1.id
150 and u1.id != u2.id
151 and u1.email = u2.email
152 and u1.email in (
153 select email from vdcuser
154 group by email
155 having count(*) > 1
156 )
157 and u2.id in (
158 select min(id) from vdcuser
159 group by email
160 having count(*) > 1
161 );
162
163 update networkrolerequest ref set vdcuser_id = u2.id
164 from vdcuser u1, vdcuser u2
165 where ref.vdcuser_id = u1.id
166 and u1.id != u2.id
167 and u1.email = u2.email
168 and u1.email in (
169 select email from vdcuser
170 group by email
171 having count(*) > 1
172 )
173 and u2.id in (
174 select min(id) from vdcuser
175 group by email
176 having count(*) > 1
177 );
178
179 update rolerequest ref set vdcuser_id = u2.id
180 from vdcuser u1, vdcuser u2
181 where ref.vdcuser_id = u1.id
182 and u1.id != u2.id
183 and u1.email = u2.email
184 and u1.email in (
185 select email from vdcuser
186 group by email
187 having count(*) > 1
188 )
189 and u2.id in (
190 select min(id) from vdcuser
191 group by email
192 having count(*) > 1
193 );
194
195 update studyaccessrequest ref set vdcuser_id = u2.id
196 from vdcuser u1, vdcuser u2
197 where ref.vdcuser_id = u1.id
198 and u1.id != u2.id
199 and u1.email = u2.email
200 and u1.email in (
201 select email from vdcuser
202 group by email
203 having count(*) > 1
204 )
205 and u2.id in (
206 select min(id) from vdcuser
207 group by email
208 having count(*) > 1
209 );
210
211 update studycomment ref set commentcreator_id = u2.id
212 from vdcuser u1, vdcuser u2
213 where ref.commentcreator_id = u1.id
214 and u1.id != u2.id
215 and u1.email = u2.email
216 and u1.email in (
217 select email from vdcuser
218 group by email
219 having count(*) > 1
220 )
221 and u2.id in (
222 select min(id) from vdcuser
223 group by email
224 having count(*) > 1
225 );
226
227 update study ref set creator_id = u2.id
228 from vdcuser u1, vdcuser u2
229 where ref.creator_id = u1.id
230 and u1.id != u2.id
231 and u1.email = u2.email
232 and u1.email in (
233 select email from vdcuser
234 group by email
235 having count(*) > 1
236 )
237 and u2.id in (
238 select min(id) from vdcuser
239 group by email
240 having count(*) > 1
241 );
242
243 update studyfile_vdcuser ref set allowedusers_id = u2.id
244 from vdcuser u1, vdcuser u2
245 where ref.allowedusers_id = u1.id
246 and u1.id != u2.id
247 and u1.email = u2.email
248 and u1.email in (
249 select email from vdcuser
250 group by email
251 having count(*) > 1
252 )
253 and u2.id in (
254 select min(id) from vdcuser
255 group by email
256 having count(*) > 1
257 );
258
259 update study ref set lastupdater_id = u2.id
260 from vdcuser u1, vdcuser u2
261 where ref.lastupdater_id = u1.id
262 and u1.id != u2.id
263 and u1.email = u2.email
264 and u1.email in (
265 select email from vdcuser
266 group by email
267 having count(*) > 1
268 )
269 and u2.id in (
270 select min(id) from vdcuser
271 group by email
272 having count(*) > 1
273 );
274
275 update studylock ref set user_id = u2.id
276 from vdcuser u1, vdcuser u2
277 where ref.user_id = u1.id
278 and u1.id != u2.id
279 and u1.email = u2.email
280 and u1.email in (
281 select email from vdcuser
282 group by email
283 having count(*) > 1
284 )
285 and u2.id in (
286 select min(id) from vdcuser
287 group by email
288 having count(*) > 1
289 );
290
291 update studyrequest ref set vdcuser_id = u2.id
292 from vdcuser u1, vdcuser u2
293 where ref.vdcuser_id = u1.id
294 and u1.id != u2.id
295 and u1.email = u2.email
296 and u1.email in (
297 select email from vdcuser
298 group by email
299 having count(*) > 1
300 )
301 and u2.id in (
302 select min(id) from vdcuser
303 group by email
304 having count(*) > 1
305 );
306
307 update study ref set reviewer_id = u2.id
308 from vdcuser u1, vdcuser u2
309 where ref.reviewer_id = u1.id
310 and u1.id != u2.id
311 and u1.email = u2.email
312 and u1.email in (
313 select email from vdcuser
314 group by email
315 having count(*) > 1
316 )
317 and u2.id in (
318 select min(id) from vdcuser
319 group by email
320 having count(*) > 1
321 );
322
323 update study_vdcuser ref set allowedusers_id = u2.id
324 from vdcuser u1, vdcuser u2
325 where ref.allowedusers_id = u1.id
326 and u1.id != u2.id
327 and u1.email = u2.email
328 and u1.email in (
329 select email from vdcuser
330 group by email
331 having count(*) > 1
332 )
333 and u2.id in (
334 select min(id) from vdcuser
335 group by email
336 having count(*) > 1
337 );
338
339 update vdc ref set creator_id = u2.id
340 from vdcuser u1, vdcuser u2
341 where ref.creator_id = u1.id
342 and u1.id != u2.id
343 and u1.email = u2.email
344 and u1.email in (
345 select email from vdcuser
346 group by email
347 having count(*) > 1
348 )
349 and u2.id in (
350 select min(id) from vdcuser
351 group by email
352 having count(*) > 1
353 );
354
355 update vdc_fileuser ref set allowedfileusers_id = u2.id
356 from vdcuser u1, vdcuser u2
357 where ref.allowedfileusers_id = u1.id
358 and u1.id != u2.id
359 and u1.email = u2.email
360 and u1.email in (
361 select email from vdcuser
362 group by email
363 having count(*) > 1
364 )
365 and u2.id in (
366 select min(id) from vdcuser
367 group by email
368 having count(*) > 1
369 );
370
371 update vdcnetwork ref set creator_id = u2.id
372 from vdcuser u1, vdcuser u2
373 where ref.creator_id = u1.id
374 and u1.id != u2.id
375 and u1.email = u2.email
376 and u1.email in (
377 select email from vdcuser
378 group by email
379 having count(*) > 1
380 )
381 and u2.id in (
382 select min(id) from vdcuser
383 group by email
384 having count(*) > 1
385 );
386
387 update vdcnetwork ref set defaultnetworkadmin_id = u2.id
388 from vdcuser u1, vdcuser u2
389 where ref.defaultnetworkadmin_id = u1.id
390 and u1.id != u2.id
391 and u1.email = u2.email
392 and u1.email in (
393 select email from vdcuser
394 group by email
395 having count(*) > 1
396 )
397 and u2.id in (
398 select min(id) from vdcuser
399 group by email
400 having count(*) > 1
401 );
402
403 update vdc ref set reviewer_id = u2.id
404 from vdcuser u1, vdcuser u2
405 where ref.reviewer_id = u1.id
406 and u1.id != u2.id
407 and u1.email = u2.email
408 and u1.email in (
409 select email from vdcuser
410 group by email
411 having count(*) > 1
412 )
413 and u2.id in (
414 select min(id) from vdcuser
415 group by email
416 having count(*) > 1
417 );
418
419 update vdcrole ref set vdcuser_id = u2.id
420 from vdcuser u1, vdcuser u2
421 where ref.vdcuser_id = u1.id
422 and u1.id != u2.id
423 and u1.email = u2.email
424 and u1.email in (
425 select email from vdcuser
426 group by email
427 having count(*) > 1
428 )
429 and u2.id in (
430 select min(id) from vdcuser
431 group by email
432 having count(*) > 1
433 );
434
435 update vdcuser_studycomment ref set vdcuser_id = u2.id
436 from vdcuser u1, vdcuser u2
437 where ref.vdcuser_id = u1.id
438 and u1.id != u2.id
439 and u1.email = u2.email
440 and u1.email in (
441 select email from vdcuser
442 group by email
443 having count(*) > 1
444 )
445 and u2.id in (
446 select min(id) from vdcuser
447 group by email
448 having count(*) > 1
449 );
450
451 update vdcuser_usergroup ref set users_id = u2.id
452 from vdcuser u1, vdcuser u2
453 where ref.users_id = u1.id
454 and u1.id != u2.id
455 and u1.email = u2.email
456 and u1.email in (
457 select email from vdcuser
458 group by email
459 having count(*) > 1
460 )
461 and u2.id in (
462 select min(id) from vdcuser
463 group by email
464 having count(*) > 1
465 );
466
467 update vdc_fileuser ref set allowedfileusers_id = u2.id
468 from vdcuser u1, vdcuser u2
469 where ref.allowedfileusers_id = u1.id
470 and u1.id != u2.id
471 and u1.email = u2.email
472 and u1.email in (
473 select email from vdcuser
474 group by email
475 having count(*) > 1
476 )
477 and u2.id in (
478 select min(id) from vdcuser
479 group by email
480 having count(*) > 1
481 );
482
483 update versioncontributor ref set contributor_id = u2.id
484 from vdcuser u1, vdcuser u2
485 where ref.contributor_id = u1.id
486 and u1.id != u2.id
487 and u1.email = u2.email
488 and u1.email in (
489 select email from vdcuser
490 group by email
491 having count(*) > 1
492 )
493 and u2.id in (
494 select min(id) from vdcuser
495 group by email
496 having count(*) > 1
497 );