Correct checksum in Paperclip migration task

Active Storage needs a checksum for each file and AWS S3 provides this
checksum as "ETag". They are both MD5 but AWS stores it as hexdigest and
Active Storage as base64digest. We need to convert it from on to the
other to get a valid checksum for Active Storage.

Where the migration task has already run (only staging servers), delete all
Active Storage data first and then run the task again:

  bundle exec rake db:migrate:down VERSION=20220316055458
  bundle exec rake db:migrate

  bundle exec rake from_paperclip_to_active_storage:copy_content_config
  bundle exec rake from_paperclip_to_active_storage:migrate
This commit is contained in:
Maikel Linke
2022-05-02 15:24:35 +10:00
parent c00a35b5a4
commit 92677385fa
2 changed files with 17 additions and 4 deletions

View File

@@ -74,13 +74,15 @@ namespace :from_paperclip_to_active_storage do
# stored on AWS S3. Getting the checksum requires a HEAD request.
# In my tests, I could process 100 records per minute this way.
def storage_record_for(name, paperclip)
checksum = hex_to_base64_digest(paperclip.s3_object.etag)
blob = ActiveStorage::Blob.new(
key: paperclip.path(:original),
filename: paperclip.original_filename,
content_type: paperclip.content_type,
metadata: {},
byte_size: paperclip.size,
checksum: paperclip.s3_object.etag,
checksum: checksum,
created_at: paperclip.updated_at,
)
ActiveStorage::Attachment.new(
@@ -93,7 +95,7 @@ namespace :from_paperclip_to_active_storage do
def migrate_content_config_file(name)
paperclip = ContentConfig.public_send(name)
return if ContentConfig.public_send("#{name}_blob_id")
return if ContentConfig.public_send("#{name}_blob")
return if paperclip.path.blank? || !paperclip.exists?
blob = ActiveStorage::Blob.create_and_upload!(
@@ -127,4 +129,8 @@ namespace :from_paperclip_to_active_storage do
left_outer_joins("#{attachment}_attachment".to_sym).
where(active_storage_attachments: { id: nil })
end
def hex_to_base64_digest(hexdigest)
[[hexdigest].pack("H*")].pack("m0")
end
end

View File

@@ -55,7 +55,7 @@ describe "from_paperclip_to_active_storage.rake" do
stub_request(:head, /amazonaws/).to_return(
status: 200, body: "",
headers: {
"ETag" => "md5sum000test000example"
"ETag" => "87b0a401e077485a078c0a15ceb7eb39"
}
)
stub_request(:put, /amazonaws/).to_return(status: 200, body: "", headers: {})
@@ -70,7 +70,14 @@ describe "from_paperclip_to_active_storage.rake" do
image.reload.active_storage_attachment.attached?
}.to(true)
expect(image.attachment_blob.checksum).to eq "md5sum000test000example"
# The checksum can be computed with Active Storage:
#
# ActiveStorage::Blob.build_after_unfurling(
# io: file, identify: false,
# filename: "logo-black.png",
# content_type: "image/png",
# ).checksum
expect(image.attachment_blob.checksum).to eq "h7CkAeB3SFoHjAoVzrfrOQ=="
end
end