From 8965dedfd70e3b9602de7fe0aac72d89e22fff0f Mon Sep 17 00:00:00 2001 From: luisramos0 Date: Tue, 5 Mar 2019 17:04:41 +0000 Subject: [PATCH 1/5] Add sanitize and truncate tasks to support loading live data into other environments safely --- db/sanitize_data.rb | 43 ------------------ lib/tasks/data/sanitize_data.rake | 56 +++++++++++++++++++++++ lib/tasks/data/truncate_data.rake | 75 +++++++++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 43 deletions(-) delete mode 100644 db/sanitize_data.rb create mode 100644 lib/tasks/data/sanitize_data.rake create mode 100644 lib/tasks/data/truncate_data.rake diff --git a/db/sanitize_data.rb b/db/sanitize_data.rb deleted file mode 100644 index 54d9c7ef48..0000000000 --- a/db/sanitize_data.rb +++ /dev/null @@ -1,43 +0,0 @@ - -def update_address(address, user) - unless address.nil? - address.firstname = user[:first_name] - address.lastname = user[:last_name] - address.phone = user[:phone] - address.save! - end -end - -def sanitize_data - canned_users = [ { :first_name => "Bob", :last_name => "jones", :email => "bob@jones.com", :phone => "0123456789" }, - { :first_name => "cindy", :last_name => "rest", :email => "cindy@gmail.com", :phone => "0123456789" }, - { :first_name => "Pete", :last_name => "smith", :email => "pete@gmail.com", :phone => "0123456789" }, - { :first_name => "Tony", :last_name => "ballantyne", :email => "tony@gmail.com", :phone => "0123456789" }, - { :first_name => "Ben", :last_name => "raven", :email => "ben@gmail.com", :phone => "0123456789" }, - { :first_name => "Robyn", :last_name => "monster", :email => "robyn@gmail.com", :phone => "0123456789" }, - { :first_name => "Nako", :last_name => "tolkein", :email => "nako@gmail.com", :phone => "0123456789" }, - { :first_name => "Helen", :last_name => "mitcham", :email => "helen@gmail.com", :phone => "0123456789" }, - { :first_name => "Emma", :last_name => "low", :email => "emma@gmail.com", :phone => "0123456789" }, - { :first_name => "Mandy", :last_name => "Trust", :email => "Mandy@trust.com", :phone => "0123456789" } ] - - Spree::Order.all.each_with_index do |order, index| - canned_user = canned_users[index%canned_users.size] - puts "updating order #{order.id} with #{canned_user[:first_name]}" - - order.email = canned_user[:email] - - update_address(order.bill_address, canned_user) - update_address(order.ship_address, canned_user) - order.save! - end - - Spree::User.all.each_with_index do |user, index| - unless user.email == "admin@openfoodweb.org" - canned_user = canned_users[index%canned_users.size] - puts "updating user #{user.id} with #{canned_user[:first_name]}" - - user.email = "#{canned_user[:email]}#{index}" - user.save! - end - end -end \ No newline at end of file diff --git a/lib/tasks/data/sanitize_data.rake b/lib/tasks/data/sanitize_data.rake new file mode 100644 index 0000000000..31369665d9 --- /dev/null +++ b/lib/tasks/data/sanitize_data.rake @@ -0,0 +1,56 @@ +require 'highline' + +namespace :ofn do + namespace :data do + desc 'Sanitize data' + task sanitize: :environment do + guard_and_warn + + Spree::User.update_all("email = concat(id, '_ofn_user@example.com'), + login = concat(id, '_ofn_user@example.com'), + unconfirmed_email = concat(id, '_ofn_user@example.com')") + Spree::Customer.update_all("email = concat(id, '_ofn_customer@example.com'), + name = concat('Customer Number ', id)") + Spree::Order.update_all("email = concat(id, '_ofn_order@example.com')") + Spree::Address.update_all(" + firstname = concat('Ms. Number', id), lastname = 'Jones', phone = '01234567890', + alternative_phone = '01234567890', address1 = 'Dummy address', + address2 = 'Dummy address continuation', city = 'Dummy City', zipcode = '0000', + company = null, latitude = null, longitude = null") + Spree::TokenizedPermission.update_all("token = null") + + # Sanitize payments related entities + Spree::PaymentMethod.update_all("name = concat('Dummy Payment Method', id), + description = name") + Spree::CreditCard.update_all(" + month = 12, year = 2020, start_month = 12, start_year = 2000, + cc_type = 'VISA', first_name = 'Dummy', last_name = 'Dummy', last_digits = '2543'") + Spree::Payment.update_all("response_code = null, avs_response = null, + cvv_response_code = null, identifier = null, + cvv_response_message = null") + Spree::PaypalExpressCheckout.update_all("token = null") + StripeAccount.delete_all + ActiveRecord::Base.connection.execute("delete from spree_paypal_accounts") + + # Update environment in mail methods and payment methods + ActiveRecord::Base.connection.execute("update spree_mail_methods set environment = '#{Rails.env}'") + Spree::PaymentMethod.update_all("environment = '#{Rails.env}'") + + # Delete all preferences that may contain sensitive information + Spree::Preference + .where("key like '%gateway%' OR key like '%billing_integration%' OR key like '%s3%'") + .delete_all + end + + def guard_and_warn + if Rails.env.production? + Rails.logger.info("This task cannot be executed in production") + exit + end + + message = "\n <%= color('This will permanently change DB contents', :yellow) %>, + are you sure you want to proceed? (y/N)" + exit unless HighLine.new.agree(message) { |q| q.default = "n" } + end + end +end diff --git a/lib/tasks/data/truncate_data.rake b/lib/tasks/data/truncate_data.rake new file mode 100644 index 0000000000..586a4b8cd7 --- /dev/null +++ b/lib/tasks/data/truncate_data.rake @@ -0,0 +1,75 @@ +# This task can be used to significantly reduce the size of a database +# This is used for example when loading live data into a staging server +# This way the staging server is not overloaded with too much data +namespace :ofn do + namespace :data do + desc 'Truncate data' + task truncate: :environment do + guard_and_warn + + date = 3.months.ago + + where_ocs_to_delete = "where orders_close_at < '#{date}'" + where_oc_id_in_ocs_to_delete = " + where order_cycle_id in (select id from order_cycles #{where_ocs_to_delete} )" + where_order_id_in_orders_to_delete = " + where order_id in (select id from spree_orders #{where_oc_id_in_ocs_to_delete})" + + sql_delete_from " + spree_inventory_units #{where_order_id_in_orders_to_delete}" + sql_delete_from " + spree_adjustments where source_type = 'Spree::Order' + and source_id in (select id from spree_orders #{where_oc_id_in_ocs_to_delete})" + sql_delete_from " + spree_adjustments where source_type = 'Spree::Shipment' + and source_id in (select id from spree_shipments #{where_order_id_in_orders_to_delete})" + sql_delete_from " + spree_adjustments where source_type = 'Spree::Payment' + and source_id in (select id from spree_payments #{where_order_id_in_orders_to_delete})" + sql_delete_from " + spree_adjustments where source_type = 'Spree::LineItem' + and source_id in (select id from spree_line_items #{where_order_id_in_orders_to_delete})" + sql_delete_from "spree_line_items #{where_order_id_in_orders_to_delete}" + sql_delete_from "spree_payments #{where_order_id_in_orders_to_delete}" + sql_delete_from "spree_shipments #{where_order_id_in_orders_to_delete}" + sql_delete_from "billable_periods" + sql_delete_from "account_invoices" + Spree::ReturnAuthorization.delete_all + + sql_delete_from "coordinator_fees #{where_oc_id_in_ocs_to_delete}" + sql_delete_from " + exchange_variants where exchange_id + in (select id from exchanges #{where_oc_id_in_ocs_to_delete})" + sql_delete_from " + exchange_fees where exchange_id + in (select id from exchanges #{where_oc_id_in_ocs_to_delete})" + sql_delete_from "exchanges #{where_oc_id_in_ocs_to_delete}" + sql_delete_from "proxy_orders #{where_oc_id_in_ocs_to_delete}" + + sql_delete_from "spree_orders #{where_oc_id_in_ocs_to_delete}" + sql_delete_from "order_cycle_schedules #{where_oc_id_in_ocs_to_delete}" + sql_delete_from "order_cycles #{where_ocs_to_delete}" + + # Truncating addresses like this takes many hours to run on top of 300k records + # The sanitize task will work well with the ful range of 300k addresses + # sql_delete_from "spree_addresses where + # id not in (select address_id from enterprise_groups) and + # id not in (select address_id from enterprises) and + # id not in (select bill_address_id from spree_orders where bill_address_id is not null) and + # id not in (select ship_address_id from spree_orders where ship_address_id is not null) and + # id not in (select address_id from spree_shipments where address_id is not null) and + # id not in (select address_id from spree_shipments where address_id is not null) and + # id not in (select bill_address_id from customers where bill_address_id is not null) and + # id not in (select ship_address_id from customers where ship_address_id is not null)" + + Spree::TokenizedPermission.where("created_at < '#{date}'").delete_all + Spree::StateChange.delete_all + Spree::LogEntry.delete_all + sql_delete_from "sessions" + end + + def sql_delete_from(sql) + ActiveRecord::Base.connection.execute("delete from #{sql}") + end + end +end From b148b9ae469b5b2dc34fb966464cc8fbbf38ad01 Mon Sep 17 00:00:00 2001 From: luisramos0 Date: Mon, 18 Mar 2019 12:04:37 +0000 Subject: [PATCH 2/5] Remove delete addresses SQL from truncate task --- lib/tasks/data/truncate_data.rake | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/lib/tasks/data/truncate_data.rake b/lib/tasks/data/truncate_data.rake index 586a4b8cd7..3eadc422b2 100644 --- a/lib/tasks/data/truncate_data.rake +++ b/lib/tasks/data/truncate_data.rake @@ -50,18 +50,6 @@ namespace :ofn do sql_delete_from "order_cycle_schedules #{where_oc_id_in_ocs_to_delete}" sql_delete_from "order_cycles #{where_ocs_to_delete}" - # Truncating addresses like this takes many hours to run on top of 300k records - # The sanitize task will work well with the ful range of 300k addresses - # sql_delete_from "spree_addresses where - # id not in (select address_id from enterprise_groups) and - # id not in (select address_id from enterprises) and - # id not in (select bill_address_id from spree_orders where bill_address_id is not null) and - # id not in (select ship_address_id from spree_orders where ship_address_id is not null) and - # id not in (select address_id from spree_shipments where address_id is not null) and - # id not in (select address_id from spree_shipments where address_id is not null) and - # id not in (select bill_address_id from customers where bill_address_id is not null) and - # id not in (select ship_address_id from customers where ship_address_id is not null)" - Spree::TokenizedPermission.where("created_at < '#{date}'").delete_all Spree::StateChange.delete_all Spree::LogEntry.delete_all From fefced5400dca762c9c22df15c0f728c8536d857 Mon Sep 17 00:00:00 2001 From: luisramos0 Date: Wed, 20 Mar 2019 11:49:55 +0000 Subject: [PATCH 3/5] Improve sanitize data: keep customers connected to users and let addresses' city and zipcode remain as they are --- lib/tasks/data/sanitize_data.rake | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/tasks/data/sanitize_data.rake b/lib/tasks/data/sanitize_data.rake index 31369665d9..a08e24d6d1 100644 --- a/lib/tasks/data/sanitize_data.rake +++ b/lib/tasks/data/sanitize_data.rake @@ -9,13 +9,18 @@ namespace :ofn do Spree::User.update_all("email = concat(id, '_ofn_user@example.com'), login = concat(id, '_ofn_user@example.com'), unconfirmed_email = concat(id, '_ofn_user@example.com')") - Spree::Customer.update_all("email = concat(id, '_ofn_customer@example.com'), - name = concat('Customer Number ', id)") + Customer.where("user_id IS NULL") + .update_all("email = concat(id, '_ofn_customer@example.com'), + name = concat('Customer Number ', id, ' (without connected User)')") + Customer.where("user_id IS NOT NULL") + .update_all("email = concat(user_id, '_ofn_user@example.com'), + name = concat('Customer Number ', id, ' - User ', user_id)") + Spree::Order.update_all("email = concat(id, '_ofn_order@example.com')") Spree::Address.update_all(" firstname = concat('Ms. Number', id), lastname = 'Jones', phone = '01234567890', alternative_phone = '01234567890', address1 = 'Dummy address', - address2 = 'Dummy address continuation', city = 'Dummy City', zipcode = '0000', + address2 = 'Dummy address continuation', company = null, latitude = null, longitude = null") Spree::TokenizedPermission.update_all("token = null") From 38416569799091764eeb4e017d5f7465b97842c0 Mon Sep 17 00:00:00 2001 From: luisramos0 Date: Wed, 20 Mar 2019 11:52:37 +0000 Subject: [PATCH 4/5] Rename data sanitize task to data anonymize --- lib/tasks/data/{sanitize_data.rake => anonymize_data.rake} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename lib/tasks/data/{sanitize_data.rake => anonymize_data.rake} (96%) diff --git a/lib/tasks/data/sanitize_data.rake b/lib/tasks/data/anonymize_data.rake similarity index 96% rename from lib/tasks/data/sanitize_data.rake rename to lib/tasks/data/anonymize_data.rake index a08e24d6d1..9d0de691bb 100644 --- a/lib/tasks/data/sanitize_data.rake +++ b/lib/tasks/data/anonymize_data.rake @@ -2,8 +2,8 @@ require 'highline' namespace :ofn do namespace :data do - desc 'Sanitize data' - task sanitize: :environment do + desc 'Anonymize data' + task anonymize: :environment do guard_and_warn Spree::User.update_all("email = concat(id, '_ofn_user@example.com'), @@ -24,7 +24,7 @@ namespace :ofn do company = null, latitude = null, longitude = null") Spree::TokenizedPermission.update_all("token = null") - # Sanitize payments related entities + # Anonymize payments related entities Spree::PaymentMethod.update_all("name = concat('Dummy Payment Method', id), description = name") Spree::CreditCard.update_all(" From 61433ee39594b9b04afb330fd5e244555f289e08 Mon Sep 17 00:00:00 2001 From: luisramos0 Date: Thu, 21 Mar 2019 11:07:32 +0000 Subject: [PATCH 5/5] Make anonimize and truncate tasks respect rubocop rules --- lib/tasks/data/anonymize_data.rake | 68 +++++++++++++++++----------- lib/tasks/data/truncate_data.rake | 73 ++++++++++++++++++------------ 2 files changed, 86 insertions(+), 55 deletions(-) diff --git a/lib/tasks/data/anonymize_data.rake b/lib/tasks/data/anonymize_data.rake index 9d0de691bb..f494de99fc 100644 --- a/lib/tasks/data/anonymize_data.rake +++ b/lib/tasks/data/anonymize_data.rake @@ -6,40 +6,20 @@ namespace :ofn do task anonymize: :environment do guard_and_warn - Spree::User.update_all("email = concat(id, '_ofn_user@example.com'), - login = concat(id, '_ofn_user@example.com'), - unconfirmed_email = concat(id, '_ofn_user@example.com')") - Customer.where("user_id IS NULL") - .update_all("email = concat(id, '_ofn_customer@example.com'), - name = concat('Customer Number ', id, ' (without connected User)')") - Customer.where("user_id IS NOT NULL") - .update_all("email = concat(user_id, '_ofn_user@example.com'), - name = concat('Customer Number ', id, ' - User ', user_id)") + anonymize_users_data - Spree::Order.update_all("email = concat(id, '_ofn_order@example.com')") Spree::Address.update_all(" firstname = concat('Ms. Number', id), lastname = 'Jones', phone = '01234567890', alternative_phone = '01234567890', address1 = 'Dummy address', address2 = 'Dummy address continuation', company = null, latitude = null, longitude = null") + + anonymize_payments_data + anonymize_payments_accounts + Spree::TokenizedPermission.update_all("token = null") - - # Anonymize payments related entities - Spree::PaymentMethod.update_all("name = concat('Dummy Payment Method', id), - description = name") - Spree::CreditCard.update_all(" - month = 12, year = 2020, start_month = 12, start_year = 2000, - cc_type = 'VISA', first_name = 'Dummy', last_name = 'Dummy', last_digits = '2543'") - Spree::Payment.update_all("response_code = null, avs_response = null, - cvv_response_code = null, identifier = null, - cvv_response_message = null") - Spree::PaypalExpressCheckout.update_all("token = null") - StripeAccount.delete_all - ActiveRecord::Base.connection.execute("delete from spree_paypal_accounts") - - # Update environment in mail methods and payment methods - ActiveRecord::Base.connection.execute("update spree_mail_methods set environment = '#{Rails.env}'") - Spree::PaymentMethod.update_all("environment = '#{Rails.env}'") + ActiveRecord::Base.connection.execute("update spree_mail_methods + set environment = '#{Rails.env}'") # Delete all preferences that may contain sensitive information Spree::Preference @@ -57,5 +37,39 @@ namespace :ofn do are you sure you want to proceed? (y/N)" exit unless HighLine.new.agree(message) { |q| q.default = "n" } end + + private + + def anonymize_users_data + Spree::User.update_all("email = concat(id, '_ofn_user@example.com'), + login = concat(id, '_ofn_user@example.com'), + unconfirmed_email = concat(id, '_ofn_user@example.com')") + Customer.where("user_id IS NULL") + .update_all("email = concat(id, '_ofn_customer@example.com'), + name = concat('Customer Number ', id, ' (without connected User)')") + Customer.where("user_id IS NOT NULL") + .update_all("email = concat(user_id, '_ofn_user@example.com'), + name = concat('Customer Number ', id, ' - User ', user_id)") + + Spree::Order.update_all("email = concat(id, '_ofn_order@example.com')") + end + + def anonymize_payments_data + Spree::PaymentMethod.update_all("name = concat('Dummy Payment Method', id), + description = name, + environment = '#{Rails.env}'") + Spree::Payment.update_all("response_code = null, avs_response = null, + cvv_response_code = null, identifier = null, + cvv_response_message = null") + Spree::CreditCard.update_all(" + month = 12, year = 2020, start_month = 12, start_year = 2000, + cc_type = 'VISA', first_name = 'Dummy', last_name = 'Dummy', last_digits = '2543'") + end + + def anonymize_payments_accounts + Spree::PaypalExpressCheckout.update_all("token = null") + StripeAccount.delete_all + ActiveRecord::Base.connection.execute("delete from spree_paypal_accounts") + end end end diff --git a/lib/tasks/data/truncate_data.rake b/lib/tasks/data/truncate_data.rake index 3eadc422b2..3b1096f33e 100644 --- a/lib/tasks/data/truncate_data.rake +++ b/lib/tasks/data/truncate_data.rake @@ -7,28 +7,11 @@ namespace :ofn do task truncate: :environment do guard_and_warn - date = 3.months.ago - - where_ocs_to_delete = "where orders_close_at < '#{date}'" - where_oc_id_in_ocs_to_delete = " - where order_cycle_id in (select id from order_cycles #{where_ocs_to_delete} )" - where_order_id_in_orders_to_delete = " - where order_id in (select id from spree_orders #{where_oc_id_in_ocs_to_delete})" - sql_delete_from " spree_inventory_units #{where_order_id_in_orders_to_delete}" - sql_delete_from " - spree_adjustments where source_type = 'Spree::Order' - and source_id in (select id from spree_orders #{where_oc_id_in_ocs_to_delete})" - sql_delete_from " - spree_adjustments where source_type = 'Spree::Shipment' - and source_id in (select id from spree_shipments #{where_order_id_in_orders_to_delete})" - sql_delete_from " - spree_adjustments where source_type = 'Spree::Payment' - and source_id in (select id from spree_payments #{where_order_id_in_orders_to_delete})" - sql_delete_from " - spree_adjustments where source_type = 'Spree::LineItem' - and source_id in (select id from spree_line_items #{where_order_id_in_orders_to_delete})" + + truncate_adjustments + sql_delete_from "spree_line_items #{where_order_id_in_orders_to_delete}" sql_delete_from "spree_payments #{where_order_id_in_orders_to_delete}" sql_delete_from "spree_shipments #{where_order_id_in_orders_to_delete}" @@ -36,14 +19,8 @@ namespace :ofn do sql_delete_from "account_invoices" Spree::ReturnAuthorization.delete_all - sql_delete_from "coordinator_fees #{where_oc_id_in_ocs_to_delete}" - sql_delete_from " - exchange_variants where exchange_id - in (select id from exchanges #{where_oc_id_in_ocs_to_delete})" - sql_delete_from " - exchange_fees where exchange_id - in (select id from exchanges #{where_oc_id_in_ocs_to_delete})" - sql_delete_from "exchanges #{where_oc_id_in_ocs_to_delete}" + truncate_order_cycle_data + sql_delete_from "proxy_orders #{where_oc_id_in_ocs_to_delete}" sql_delete_from "spree_orders #{where_oc_id_in_ocs_to_delete}" @@ -59,5 +36,45 @@ namespace :ofn do def sql_delete_from(sql) ActiveRecord::Base.connection.execute("delete from #{sql}") end + + private + + def date + 3.months.ago + end + + def where_ocs_to_delete + "where orders_close_at < '#{date}'" + end + + def where_oc_id_in_ocs_to_delete + "where order_cycle_id in (select id from order_cycles #{where_ocs_to_delete} )" + end + + def where_order_id_in_orders_to_delete + "where order_id in (select id from spree_orders #{where_oc_id_in_ocs_to_delete})" + end + + def truncate_adjustments + sql_delete_from "spree_adjustments where source_type = 'Spree::Order' + and source_id in (select id from spree_orders #{where_oc_id_in_ocs_to_delete})" + sql_delete_from "spree_adjustments where source_type = 'Spree::Shipment' + and source_id in (select id from spree_shipments #{where_order_id_in_orders_to_delete})" + sql_delete_from "spree_adjustments where source_type = 'Spree::Payment' + and source_id in (select id from spree_payments #{where_order_id_in_orders_to_delete})" + sql_delete_from "spree_adjustments where source_type = 'Spree::LineItem' + and source_id in (select id from spree_line_items #{where_order_id_in_orders_to_delete})" + end + + def truncate_order_cycle_data + sql_delete_from "coordinator_fees #{where_oc_id_in_ocs_to_delete}" + sql_delete_from " + exchange_variants where exchange_id + in (select id from exchanges #{where_oc_id_in_ocs_to_delete})" + sql_delete_from " + exchange_fees where exchange_id + in (select id from exchanges #{where_oc_id_in_ocs_to_delete})" + sql_delete_from "exchanges #{where_oc_id_in_ocs_to_delete}" + end end end