Mercurial > hg > LGDataverses
view scripts/migration/scrub_duplicate_emails.sql @ 12:c2e2d794847f
new: add config.properties file for gazetteer
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Wed, 30 Sep 2015 13:43:54 +0200 |
| parents | a50cf11e5178 |
| children |
line wrap: on
line source
-------------------- --REFERENCE QUERIES -------------------- /* -- Query to list all user acocunts with duplicate e-mails select id, username, email from vdcuser where email in ( select email from vdcuser group by email having count(*) > 1 ) order by email -- Query to list all e-mails that have are duplicated (total = # of actual users, without duplicates) select email, count(*) from vdcuser group by email having count(*) > 1 order by count(*) desc -- Query to list all e-mails that have are duplicated and reference to original account (account with lowest id) select u1.id, u1.username, u1.active,u1.email, u2.id, u2.username, u2.active from vdcuser u1, vdcuser u2 where 1=1 and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ) order by u1.email -- Delete query, to be run after all the updates delete from vdcuser where id in ( select u1.id from vdcuser u1, vdcuser u2 where 1=1 and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ) ) */ -------------------- --UPDATE QUERIES -------------------- -- these queries will update the foreign key references in all (relevant) tables to the orignal account -- -- Generated by /* SELECT tc.table_schema, tc.constraint_name, tc.table_name, kcu.column_name, ccu.table_name AS foreign_table_name,ccu.column_name AS foreign_column_name, 'update ' || tc.table_name || ' ref set ' || kcu.column_name || ' = u2.id from vdcuser u1, vdcuser u2 where ref.' || kcu.column_name || ' = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 );' as query FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name JOIN information_schema.constraint_column_usage ccu ON ccu.constraint_name = tc.constraint_name WHERE constraint_type = 'FOREIGN KEY' AND ccu.table_name='vdcuser'; */ -- -- -- if any of the below fail because of duplicate constraints, you will need to first delete the duplicates -- here is a sample query for deleting the duplicate entries from studyfile_vdcuser (the most likey to fail)) /* delete from studyfile_vdcuser where allowedusers_id || '_' || studyfiles_id in ( select u1.id || '_' || fu1.studyfiles_id from vdcuser u1, vdcuser u2, studyfile_vdcuser fu1, studyfile_vdcuser fu2 where 1=1 and fu1.studyfiles_id = fu2.studyfiles_id and fu1.allowedusers_id = u1.id and fu2.allowedusers_id = u2.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser -- also may need to run with max(id) or some other combinations! group by email having count(*) > 1 ) ) */ update flagged_study_comments ref set user_id = u2.id from vdcuser u1, vdcuser u2 where ref.user_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update guestbookresponse ref set vdcuser_id = u2.id from vdcuser u1, vdcuser u2 where ref.vdcuser_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update harvestingdataverse_vdcuser ref set allowedfileusers_id = u2.id from vdcuser u1, vdcuser u2 where ref.allowedfileusers_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update networkrolerequest ref set vdcuser_id = u2.id from vdcuser u1, vdcuser u2 where ref.vdcuser_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update rolerequest ref set vdcuser_id = u2.id from vdcuser u1, vdcuser u2 where ref.vdcuser_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update studyaccessrequest ref set vdcuser_id = u2.id from vdcuser u1, vdcuser u2 where ref.vdcuser_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update studycomment ref set commentcreator_id = u2.id from vdcuser u1, vdcuser u2 where ref.commentcreator_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update study ref set creator_id = u2.id from vdcuser u1, vdcuser u2 where ref.creator_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update studyfile_vdcuser ref set allowedusers_id = u2.id from vdcuser u1, vdcuser u2 where ref.allowedusers_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update study ref set lastupdater_id = u2.id from vdcuser u1, vdcuser u2 where ref.lastupdater_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update studylock ref set user_id = u2.id from vdcuser u1, vdcuser u2 where ref.user_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update studyrequest ref set vdcuser_id = u2.id from vdcuser u1, vdcuser u2 where ref.vdcuser_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update study ref set reviewer_id = u2.id from vdcuser u1, vdcuser u2 where ref.reviewer_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update study_vdcuser ref set allowedusers_id = u2.id from vdcuser u1, vdcuser u2 where ref.allowedusers_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update vdc ref set creator_id = u2.id from vdcuser u1, vdcuser u2 where ref.creator_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update vdc_fileuser ref set allowedfileusers_id = u2.id from vdcuser u1, vdcuser u2 where ref.allowedfileusers_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update vdcnetwork ref set creator_id = u2.id from vdcuser u1, vdcuser u2 where ref.creator_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update vdcnetwork ref set defaultnetworkadmin_id = u2.id from vdcuser u1, vdcuser u2 where ref.defaultnetworkadmin_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update vdc ref set reviewer_id = u2.id from vdcuser u1, vdcuser u2 where ref.reviewer_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update vdcrole ref set vdcuser_id = u2.id from vdcuser u1, vdcuser u2 where ref.vdcuser_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update vdcuser_studycomment ref set vdcuser_id = u2.id from vdcuser u1, vdcuser u2 where ref.vdcuser_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update vdcuser_usergroup ref set users_id = u2.id from vdcuser u1, vdcuser u2 where ref.users_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update vdc_fileuser ref set allowedfileusers_id = u2.id from vdcuser u1, vdcuser u2 where ref.allowedfileusers_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 ); update versioncontributor ref set contributor_id = u2.id from vdcuser u1, vdcuser u2 where ref.contributor_id = u1.id and u1.id != u2.id and u1.email = u2.email and u1.email in ( select email from vdcuser group by email having count(*) > 1 ) and u2.id in ( select min(id) from vdcuser group by email having count(*) > 1 );
