From 076efd653d04cc6701b6c0da326eb0242a6de5d4 Mon Sep 17 00:00:00 2001 From: Maikel Linke Date: Thu, 2 Jun 2022 12:13:34 +1000 Subject: [PATCH] Correct checksum of big files stored on AWS S3 --- ...0602013938_compute_checksum_for_big_files.rb | 17 +++++++++++++++++ db/schema.rb | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20220602013938_compute_checksum_for_big_files.rb diff --git a/db/migrate/20220602013938_compute_checksum_for_big_files.rb b/db/migrate/20220602013938_compute_checksum_for_big_files.rb new file mode 100644 index 0000000000..697f731e5d --- /dev/null +++ b/db/migrate/20220602013938_compute_checksum_for_big_files.rb @@ -0,0 +1,17 @@ +# When migrating to Active Storage, we used Amazon's ETag for the blob +# checksum. But big files have been uploaded in chunks and then the checksum +# differs. We need to recalculate the checksum for large files. +class ComputeChecksumForBigFiles < ActiveRecord::Migration[6.1] + def up + blobs_with_incorrect_checksum.find_each do |blob| + md5 = Digest::MD5.base64digest(blob.download) + blob.update(checksum: md5) + end + end + + def blobs_with_incorrect_checksum + ActiveStorage::Blob. + where(service_name: "amazon"). + where("byte_size >= 20000000") + end +end diff --git a/db/schema.rb b/db/schema.rb index 0ad1f12db3..010059c3d6 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2022_04_10_162955) do +ActiveRecord::Schema.define(version: 2022_06_02_013938) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql"