From 92677385fa7db8e8a5386f5432e836bc089b2468 Mon Sep 17 00:00:00 2001 From: Maikel Linke Date: Mon, 2 May 2022 15:24:35 +1000 Subject: [PATCH] Correct checksum in Paperclip migration task Active Storage needs a checksum for each file and AWS S3 provides this checksum as "ETag". They are both MD5 but AWS stores it as hexdigest and Active Storage as base64digest. We need to convert it from on to the other to get a valid checksum for Active Storage. Where the migration task has already run (only staging servers), delete all Active Storage data first and then run the task again: bundle exec rake db:migrate:down VERSION=20220316055458 bundle exec rake db:migrate bundle exec rake from_paperclip_to_active_storage:copy_content_config bundle exec rake from_paperclip_to_active_storage:migrate --- lib/tasks/from_paperclip_to_active_storage.rake | 10 ++++++++-- .../from_paperclip_to_active_storage_rake_spec.rb | 11 +++++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/lib/tasks/from_paperclip_to_active_storage.rake b/lib/tasks/from_paperclip_to_active_storage.rake index 8859c7bed4..a9ec0a3fa4 100644 --- a/lib/tasks/from_paperclip_to_active_storage.rake +++ b/lib/tasks/from_paperclip_to_active_storage.rake @@ -74,13 +74,15 @@ namespace :from_paperclip_to_active_storage do # stored on AWS S3. Getting the checksum requires a HEAD request. # In my tests, I could process 100 records per minute this way. def storage_record_for(name, paperclip) + checksum = hex_to_base64_digest(paperclip.s3_object.etag) + blob = ActiveStorage::Blob.new( key: paperclip.path(:original), filename: paperclip.original_filename, content_type: paperclip.content_type, metadata: {}, byte_size: paperclip.size, - checksum: paperclip.s3_object.etag, + checksum: checksum, created_at: paperclip.updated_at, ) ActiveStorage::Attachment.new( @@ -93,7 +95,7 @@ namespace :from_paperclip_to_active_storage do def migrate_content_config_file(name) paperclip = ContentConfig.public_send(name) - return if ContentConfig.public_send("#{name}_blob_id") + return if ContentConfig.public_send("#{name}_blob") return if paperclip.path.blank? || !paperclip.exists? blob = ActiveStorage::Blob.create_and_upload!( @@ -127,4 +129,8 @@ namespace :from_paperclip_to_active_storage do left_outer_joins("#{attachment}_attachment".to_sym). where(active_storage_attachments: { id: nil }) end + + def hex_to_base64_digest(hexdigest) + [[hexdigest].pack("H*")].pack("m0") + end end diff --git a/spec/lib/tasks/from_paperclip_to_active_storage_rake_spec.rb b/spec/lib/tasks/from_paperclip_to_active_storage_rake_spec.rb index e13cfc2a18..9ada032c88 100644 --- a/spec/lib/tasks/from_paperclip_to_active_storage_rake_spec.rb +++ b/spec/lib/tasks/from_paperclip_to_active_storage_rake_spec.rb @@ -55,7 +55,7 @@ describe "from_paperclip_to_active_storage.rake" do stub_request(:head, /amazonaws/).to_return( status: 200, body: "", headers: { - "ETag" => "md5sum000test000example" + "ETag" => "87b0a401e077485a078c0a15ceb7eb39" } ) stub_request(:put, /amazonaws/).to_return(status: 200, body: "", headers: {}) @@ -70,7 +70,14 @@ describe "from_paperclip_to_active_storage.rake" do image.reload.active_storage_attachment.attached? }.to(true) - expect(image.attachment_blob.checksum).to eq "md5sum000test000example" + # The checksum can be computed with Active Storage: + # + # ActiveStorage::Blob.build_after_unfurling( + # io: file, identify: false, + # filename: "logo-black.png", + # content_type: "image/png", + # ).checksum + expect(image.attachment_blob.checksum).to eq "h7CkAeB3SFoHjAoVzrfrOQ==" end end