Mercurial > hg > LGDataverses
comparison scripts/migration/scrub_duplicate_emails.sql @ 10:a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Tue, 08 Sep 2015 17:00:21 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 9:5926d6419569 | 10:a50cf11e5178 |
|---|---|
| 1 -------------------- | |
| 2 --REFERENCE QUERIES | |
| 3 -------------------- | |
| 4 /* | |
| 5 -- Query to list all user acocunts with duplicate e-mails | |
| 6 select id, username, email from vdcuser | |
| 7 where email in ( | |
| 8 select email from vdcuser | |
| 9 group by email | |
| 10 having count(*) > 1 | |
| 11 ) | |
| 12 order by email | |
| 13 | |
| 14 -- Query to list all e-mails that have are duplicated (total = # of actual users, without duplicates) | |
| 15 select email, count(*) from vdcuser | |
| 16 group by email | |
| 17 having count(*) > 1 | |
| 18 order by count(*) desc | |
| 19 | |
| 20 -- Query to list all e-mails that have are duplicated and reference to original account (account with lowest id) | |
| 21 select u1.id, u1.username, u1.active,u1.email, u2.id, u2.username, u2.active | |
| 22 from vdcuser u1, vdcuser u2 | |
| 23 where 1=1 | |
| 24 and u1.id != u2.id | |
| 25 and u1.email = u2.email | |
| 26 and u1.email in ( | |
| 27 select email from vdcuser | |
| 28 group by email | |
| 29 having count(*) > 1 | |
| 30 ) | |
| 31 and u2.id in ( | |
| 32 select min(id) from vdcuser | |
| 33 group by email | |
| 34 having count(*) > 1 | |
| 35 ) | |
| 36 order by u1.email | |
| 37 | |
| 38 -- Delete query, to be run after all the updates | |
| 39 delete from vdcuser where id in ( | |
| 40 select u1.id | |
| 41 from vdcuser u1, vdcuser u2 | |
| 42 where 1=1 | |
| 43 and u1.id != u2.id | |
| 44 and u1.email = u2.email | |
| 45 and u1.email in ( | |
| 46 select email from vdcuser | |
| 47 group by email | |
| 48 having count(*) > 1 | |
| 49 ) | |
| 50 and u2.id in ( | |
| 51 select min(id) from vdcuser | |
| 52 group by email | |
| 53 having count(*) > 1 | |
| 54 ) | |
| 55 ) | |
| 56 | |
| 57 */ | |
| 58 -------------------- | |
| 59 --UPDATE QUERIES | |
| 60 -------------------- | |
| 61 -- these queries will update the foreign key references in all (relevant) tables to the orignal account | |
| 62 -- | |
| 63 -- Generated by | |
| 64 /* | |
| 65 SELECT tc.table_schema, tc.constraint_name, tc.table_name, kcu.column_name, ccu.table_name AS foreign_table_name,ccu.column_name AS foreign_column_name, | |
| 66 'update ' || tc.table_name || ' ref set ' || kcu.column_name || ' = u2.id | |
| 67 from vdcuser u1, vdcuser u2 | |
| 68 where ref.' || kcu.column_name || ' = u1.id | |
| 69 and u1.id != u2.id | |
| 70 and u1.email = u2.email | |
| 71 and u1.email in ( | |
| 72 select email from vdcuser | |
| 73 group by email | |
| 74 having count(*) > 1 | |
| 75 ) | |
| 76 and u2.id in ( | |
| 77 select min(id) from vdcuser | |
| 78 group by email | |
| 79 having count(*) > 1 | |
| 80 );' as query | |
| 81 FROM information_schema.table_constraints tc | |
| 82 JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name | |
| 83 JOIN information_schema.constraint_column_usage ccu ON ccu.constraint_name = tc.constraint_name | |
| 84 WHERE constraint_type = 'FOREIGN KEY' AND ccu.table_name='vdcuser'; | |
| 85 */ | |
| 86 -- | |
| 87 -- | |
| 88 -- if any of the below fail because of duplicate constraints, you will need to first delete the duplicates | |
| 89 -- here is a sample query for deleting the duplicate entries from studyfile_vdcuser (the most likey to fail)) | |
| 90 /* | |
| 91 delete from studyfile_vdcuser | |
| 92 where allowedusers_id || '_' || studyfiles_id in ( | |
| 93 select u1.id || '_' || fu1.studyfiles_id | |
| 94 from vdcuser u1, vdcuser u2, studyfile_vdcuser fu1, studyfile_vdcuser fu2 | |
| 95 where 1=1 | |
| 96 and fu1.studyfiles_id = fu2.studyfiles_id | |
| 97 and fu1.allowedusers_id = u1.id | |
| 98 and fu2.allowedusers_id = u2.id | |
| 99 and u1.id != u2.id | |
| 100 and u1.email = u2.email | |
| 101 and u1.email in ( | |
| 102 select email from vdcuser | |
| 103 group by email | |
| 104 having count(*) > 1 | |
| 105 ) | |
| 106 and u2.id in ( | |
| 107 select min(id) from vdcuser -- also may need to run with max(id) or some other combinations! | |
| 108 group by email | |
| 109 having count(*) > 1 | |
| 110 ) | |
| 111 ) | |
| 112 */ | |
| 113 | |
| 114 | |
| 115 update flagged_study_comments ref set user_id = u2.id | |
| 116 from vdcuser u1, vdcuser u2 | |
| 117 where ref.user_id = u1.id | |
| 118 and u1.id != u2.id | |
| 119 and u1.email = u2.email | |
| 120 and u1.email in ( | |
| 121 select email from vdcuser | |
| 122 group by email | |
| 123 having count(*) > 1 | |
| 124 ) | |
| 125 and u2.id in ( | |
| 126 select min(id) from vdcuser | |
| 127 group by email | |
| 128 having count(*) > 1 | |
| 129 ); | |
| 130 | |
| 131 update guestbookresponse ref set vdcuser_id = u2.id | |
| 132 from vdcuser u1, vdcuser u2 | |
| 133 where ref.vdcuser_id = u1.id | |
| 134 and u1.id != u2.id | |
| 135 and u1.email = u2.email | |
| 136 and u1.email in ( | |
| 137 select email from vdcuser | |
| 138 group by email | |
| 139 having count(*) > 1 | |
| 140 ) | |
| 141 and u2.id in ( | |
| 142 select min(id) from vdcuser | |
| 143 group by email | |
| 144 having count(*) > 1 | |
| 145 ); | |
| 146 | |
| 147 update harvestingdataverse_vdcuser ref set allowedfileusers_id = u2.id | |
| 148 from vdcuser u1, vdcuser u2 | |
| 149 where ref.allowedfileusers_id = u1.id | |
| 150 and u1.id != u2.id | |
| 151 and u1.email = u2.email | |
| 152 and u1.email in ( | |
| 153 select email from vdcuser | |
| 154 group by email | |
| 155 having count(*) > 1 | |
| 156 ) | |
| 157 and u2.id in ( | |
| 158 select min(id) from vdcuser | |
| 159 group by email | |
| 160 having count(*) > 1 | |
| 161 ); | |
| 162 | |
| 163 update networkrolerequest ref set vdcuser_id = u2.id | |
| 164 from vdcuser u1, vdcuser u2 | |
| 165 where ref.vdcuser_id = u1.id | |
| 166 and u1.id != u2.id | |
| 167 and u1.email = u2.email | |
| 168 and u1.email in ( | |
| 169 select email from vdcuser | |
| 170 group by email | |
| 171 having count(*) > 1 | |
| 172 ) | |
| 173 and u2.id in ( | |
| 174 select min(id) from vdcuser | |
| 175 group by email | |
| 176 having count(*) > 1 | |
| 177 ); | |
| 178 | |
| 179 update rolerequest ref set vdcuser_id = u2.id | |
| 180 from vdcuser u1, vdcuser u2 | |
| 181 where ref.vdcuser_id = u1.id | |
| 182 and u1.id != u2.id | |
| 183 and u1.email = u2.email | |
| 184 and u1.email in ( | |
| 185 select email from vdcuser | |
| 186 group by email | |
| 187 having count(*) > 1 | |
| 188 ) | |
| 189 and u2.id in ( | |
| 190 select min(id) from vdcuser | |
| 191 group by email | |
| 192 having count(*) > 1 | |
| 193 ); | |
| 194 | |
| 195 update studyaccessrequest ref set vdcuser_id = u2.id | |
| 196 from vdcuser u1, vdcuser u2 | |
| 197 where ref.vdcuser_id = u1.id | |
| 198 and u1.id != u2.id | |
| 199 and u1.email = u2.email | |
| 200 and u1.email in ( | |
| 201 select email from vdcuser | |
| 202 group by email | |
| 203 having count(*) > 1 | |
| 204 ) | |
| 205 and u2.id in ( | |
| 206 select min(id) from vdcuser | |
| 207 group by email | |
| 208 having count(*) > 1 | |
| 209 ); | |
| 210 | |
| 211 update studycomment ref set commentcreator_id = u2.id | |
| 212 from vdcuser u1, vdcuser u2 | |
| 213 where ref.commentcreator_id = u1.id | |
| 214 and u1.id != u2.id | |
| 215 and u1.email = u2.email | |
| 216 and u1.email in ( | |
| 217 select email from vdcuser | |
| 218 group by email | |
| 219 having count(*) > 1 | |
| 220 ) | |
| 221 and u2.id in ( | |
| 222 select min(id) from vdcuser | |
| 223 group by email | |
| 224 having count(*) > 1 | |
| 225 ); | |
| 226 | |
| 227 update study ref set creator_id = u2.id | |
| 228 from vdcuser u1, vdcuser u2 | |
| 229 where ref.creator_id = u1.id | |
| 230 and u1.id != u2.id | |
| 231 and u1.email = u2.email | |
| 232 and u1.email in ( | |
| 233 select email from vdcuser | |
| 234 group by email | |
| 235 having count(*) > 1 | |
| 236 ) | |
| 237 and u2.id in ( | |
| 238 select min(id) from vdcuser | |
| 239 group by email | |
| 240 having count(*) > 1 | |
| 241 ); | |
| 242 | |
| 243 update studyfile_vdcuser ref set allowedusers_id = u2.id | |
| 244 from vdcuser u1, vdcuser u2 | |
| 245 where ref.allowedusers_id = u1.id | |
| 246 and u1.id != u2.id | |
| 247 and u1.email = u2.email | |
| 248 and u1.email in ( | |
| 249 select email from vdcuser | |
| 250 group by email | |
| 251 having count(*) > 1 | |
| 252 ) | |
| 253 and u2.id in ( | |
| 254 select min(id) from vdcuser | |
| 255 group by email | |
| 256 having count(*) > 1 | |
| 257 ); | |
| 258 | |
| 259 update study ref set lastupdater_id = u2.id | |
| 260 from vdcuser u1, vdcuser u2 | |
| 261 where ref.lastupdater_id = u1.id | |
| 262 and u1.id != u2.id | |
| 263 and u1.email = u2.email | |
| 264 and u1.email in ( | |
| 265 select email from vdcuser | |
| 266 group by email | |
| 267 having count(*) > 1 | |
| 268 ) | |
| 269 and u2.id in ( | |
| 270 select min(id) from vdcuser | |
| 271 group by email | |
| 272 having count(*) > 1 | |
| 273 ); | |
| 274 | |
| 275 update studylock ref set user_id = u2.id | |
| 276 from vdcuser u1, vdcuser u2 | |
| 277 where ref.user_id = u1.id | |
| 278 and u1.id != u2.id | |
| 279 and u1.email = u2.email | |
| 280 and u1.email in ( | |
| 281 select email from vdcuser | |
| 282 group by email | |
| 283 having count(*) > 1 | |
| 284 ) | |
| 285 and u2.id in ( | |
| 286 select min(id) from vdcuser | |
| 287 group by email | |
| 288 having count(*) > 1 | |
| 289 ); | |
| 290 | |
| 291 update studyrequest ref set vdcuser_id = u2.id | |
| 292 from vdcuser u1, vdcuser u2 | |
| 293 where ref.vdcuser_id = u1.id | |
| 294 and u1.id != u2.id | |
| 295 and u1.email = u2.email | |
| 296 and u1.email in ( | |
| 297 select email from vdcuser | |
| 298 group by email | |
| 299 having count(*) > 1 | |
| 300 ) | |
| 301 and u2.id in ( | |
| 302 select min(id) from vdcuser | |
| 303 group by email | |
| 304 having count(*) > 1 | |
| 305 ); | |
| 306 | |
| 307 update study ref set reviewer_id = u2.id | |
| 308 from vdcuser u1, vdcuser u2 | |
| 309 where ref.reviewer_id = u1.id | |
| 310 and u1.id != u2.id | |
| 311 and u1.email = u2.email | |
| 312 and u1.email in ( | |
| 313 select email from vdcuser | |
| 314 group by email | |
| 315 having count(*) > 1 | |
| 316 ) | |
| 317 and u2.id in ( | |
| 318 select min(id) from vdcuser | |
| 319 group by email | |
| 320 having count(*) > 1 | |
| 321 ); | |
| 322 | |
| 323 update study_vdcuser ref set allowedusers_id = u2.id | |
| 324 from vdcuser u1, vdcuser u2 | |
| 325 where ref.allowedusers_id = u1.id | |
| 326 and u1.id != u2.id | |
| 327 and u1.email = u2.email | |
| 328 and u1.email in ( | |
| 329 select email from vdcuser | |
| 330 group by email | |
| 331 having count(*) > 1 | |
| 332 ) | |
| 333 and u2.id in ( | |
| 334 select min(id) from vdcuser | |
| 335 group by email | |
| 336 having count(*) > 1 | |
| 337 ); | |
| 338 | |
| 339 update vdc ref set creator_id = u2.id | |
| 340 from vdcuser u1, vdcuser u2 | |
| 341 where ref.creator_id = u1.id | |
| 342 and u1.id != u2.id | |
| 343 and u1.email = u2.email | |
| 344 and u1.email in ( | |
| 345 select email from vdcuser | |
| 346 group by email | |
| 347 having count(*) > 1 | |
| 348 ) | |
| 349 and u2.id in ( | |
| 350 select min(id) from vdcuser | |
| 351 group by email | |
| 352 having count(*) > 1 | |
| 353 ); | |
| 354 | |
| 355 update vdc_fileuser ref set allowedfileusers_id = u2.id | |
| 356 from vdcuser u1, vdcuser u2 | |
| 357 where ref.allowedfileusers_id = u1.id | |
| 358 and u1.id != u2.id | |
| 359 and u1.email = u2.email | |
| 360 and u1.email in ( | |
| 361 select email from vdcuser | |
| 362 group by email | |
| 363 having count(*) > 1 | |
| 364 ) | |
| 365 and u2.id in ( | |
| 366 select min(id) from vdcuser | |
| 367 group by email | |
| 368 having count(*) > 1 | |
| 369 ); | |
| 370 | |
| 371 update vdcnetwork ref set creator_id = u2.id | |
| 372 from vdcuser u1, vdcuser u2 | |
| 373 where ref.creator_id = u1.id | |
| 374 and u1.id != u2.id | |
| 375 and u1.email = u2.email | |
| 376 and u1.email in ( | |
| 377 select email from vdcuser | |
| 378 group by email | |
| 379 having count(*) > 1 | |
| 380 ) | |
| 381 and u2.id in ( | |
| 382 select min(id) from vdcuser | |
| 383 group by email | |
| 384 having count(*) > 1 | |
| 385 ); | |
| 386 | |
| 387 update vdcnetwork ref set defaultnetworkadmin_id = u2.id | |
| 388 from vdcuser u1, vdcuser u2 | |
| 389 where ref.defaultnetworkadmin_id = u1.id | |
| 390 and u1.id != u2.id | |
| 391 and u1.email = u2.email | |
| 392 and u1.email in ( | |
| 393 select email from vdcuser | |
| 394 group by email | |
| 395 having count(*) > 1 | |
| 396 ) | |
| 397 and u2.id in ( | |
| 398 select min(id) from vdcuser | |
| 399 group by email | |
| 400 having count(*) > 1 | |
| 401 ); | |
| 402 | |
| 403 update vdc ref set reviewer_id = u2.id | |
| 404 from vdcuser u1, vdcuser u2 | |
| 405 where ref.reviewer_id = u1.id | |
| 406 and u1.id != u2.id | |
| 407 and u1.email = u2.email | |
| 408 and u1.email in ( | |
| 409 select email from vdcuser | |
| 410 group by email | |
| 411 having count(*) > 1 | |
| 412 ) | |
| 413 and u2.id in ( | |
| 414 select min(id) from vdcuser | |
| 415 group by email | |
| 416 having count(*) > 1 | |
| 417 ); | |
| 418 | |
| 419 update vdcrole ref set vdcuser_id = u2.id | |
| 420 from vdcuser u1, vdcuser u2 | |
| 421 where ref.vdcuser_id = u1.id | |
| 422 and u1.id != u2.id | |
| 423 and u1.email = u2.email | |
| 424 and u1.email in ( | |
| 425 select email from vdcuser | |
| 426 group by email | |
| 427 having count(*) > 1 | |
| 428 ) | |
| 429 and u2.id in ( | |
| 430 select min(id) from vdcuser | |
| 431 group by email | |
| 432 having count(*) > 1 | |
| 433 ); | |
| 434 | |
| 435 update vdcuser_studycomment ref set vdcuser_id = u2.id | |
| 436 from vdcuser u1, vdcuser u2 | |
| 437 where ref.vdcuser_id = u1.id | |
| 438 and u1.id != u2.id | |
| 439 and u1.email = u2.email | |
| 440 and u1.email in ( | |
| 441 select email from vdcuser | |
| 442 group by email | |
| 443 having count(*) > 1 | |
| 444 ) | |
| 445 and u2.id in ( | |
| 446 select min(id) from vdcuser | |
| 447 group by email | |
| 448 having count(*) > 1 | |
| 449 ); | |
| 450 | |
| 451 update vdcuser_usergroup ref set users_id = u2.id | |
| 452 from vdcuser u1, vdcuser u2 | |
| 453 where ref.users_id = u1.id | |
| 454 and u1.id != u2.id | |
| 455 and u1.email = u2.email | |
| 456 and u1.email in ( | |
| 457 select email from vdcuser | |
| 458 group by email | |
| 459 having count(*) > 1 | |
| 460 ) | |
| 461 and u2.id in ( | |
| 462 select min(id) from vdcuser | |
| 463 group by email | |
| 464 having count(*) > 1 | |
| 465 ); | |
| 466 | |
| 467 update vdc_fileuser ref set allowedfileusers_id = u2.id | |
| 468 from vdcuser u1, vdcuser u2 | |
| 469 where ref.allowedfileusers_id = u1.id | |
| 470 and u1.id != u2.id | |
| 471 and u1.email = u2.email | |
| 472 and u1.email in ( | |
| 473 select email from vdcuser | |
| 474 group by email | |
| 475 having count(*) > 1 | |
| 476 ) | |
| 477 and u2.id in ( | |
| 478 select min(id) from vdcuser | |
| 479 group by email | |
| 480 having count(*) > 1 | |
| 481 ); | |
| 482 | |
| 483 update versioncontributor ref set contributor_id = u2.id | |
| 484 from vdcuser u1, vdcuser u2 | |
| 485 where ref.contributor_id = u1.id | |
| 486 and u1.id != u2.id | |
| 487 and u1.email = u2.email | |
| 488 and u1.email in ( | |
| 489 select email from vdcuser | |
| 490 group by email | |
| 491 having count(*) > 1 | |
| 492 ) | |
| 493 and u2.id in ( | |
| 494 select min(id) from vdcuser | |
| 495 group by email | |
| 496 having count(*) > 1 | |
| 497 ); |
