Sanitize no-parsable BOM characters inserted by Excel

This commit is contained in:
Matt-Yorkley
2019-02-15 14:59:15 +00:00
parent 6f2b894cfe
commit 9705c249ac
3 changed files with 40 additions and 1 deletions

View File

@@ -84,7 +84,8 @@ module Admin
directory = 'tmp/product_import'
Dir.mkdir(directory) unless File.exist?(directory)
File.open(Rails.root.join(directory, filename + extension), 'wb') do |f|
f.write(upload.read)
data = UploadSanitizer.new(upload.read).call
f.write(data)
f.path
end
end

View File

@@ -0,0 +1,18 @@
# Formats uploaded files to UTF-8 encoding and strips unexpected BOM characters.
# Takes an open File object as input
class UploadSanitizer
def initialize(upload)
@data = upload
end
def call
@data.force_encoding('UTF-8')
strip_bom_character
end
private
def strip_bom_character
@data.gsub("\xEF\xBB\xBF".force_encoding("UTF-8"), '')
end
end

View File

@@ -0,0 +1,20 @@
require 'spec_helper'
describe UploadSanitizer do
describe "#call" do
let(:upload) do
File.open("/tmp/unsanitized.csv", 'wb:ascii-8bit') do |f|
f << "\xEF\xBB\xBF"
f << "Test"
end
end
let(:service) { UploadSanitizer.new(File.open(upload).read) }
it "sanitizes the uploaded file" do
sanitized_upload = service.call
expect(sanitized_upload.encoding.name).to eq "UTF-8"
expect(sanitized_upload.to_s).to eq "Test"
end
end
end