diff --git a/db/migrate/20241023054951_sanitize_html_attributes.rb b/db/migrate/20241023054951_sanitize_html_attributes.rb new file mode 100644 index 0000000000..376ceeea4e --- /dev/null +++ b/db/migrate/20241023054951_sanitize_html_attributes.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +class SanitizeHtmlAttributes < ActiveRecord::Migration[7.0] + class CustomTab < ApplicationRecord + end + + class EnterpriseGroup < ApplicationRecord + end + + class SpreeProduct < ApplicationRecord + end + + # This is a copy from our application code at the time of writing. + # We prefer to keep migrations isolated and not affected by changing + # application code in the future. + # If we need to change the sanitizer in the future we may need a new + # migration (not change the old one) to sanitise the data properly. + class HtmlSanitizer + ALLOWED_TAGS = %w[h1 h2 h3 h4 div p br b i u a strong em del pre blockquote ul ol li hr + figure].freeze + ALLOWED_ATTRIBUTES = %w[href target].freeze + ALLOWED_TRIX_DATA_ATTRIBUTES = %w[data-trix-attachment].freeze + + def self.sanitize(html) + @sanitizer ||= Rails::HTML5::SafeListSanitizer.new + @sanitizer.sanitize( + html, tags: ALLOWED_TAGS, attributes: (ALLOWED_ATTRIBUTES + ALLOWED_TRIX_DATA_ATTRIBUTES) + ) + end + + def self.sanitize_and_enforce_link_target_blank(html) + sanitize(enforce_link_target_blank(html)) + end + + def self.enforce_link_target_blank(html) + return if html.nil? + + Nokogiri::HTML::DocumentFragment.parse(html).tap do |document| + document.css("a").each { |link| link["target"] = "_blank" } + end.to_s + end + end + + def up + CustomTab.where.not(content: [nil, ""]).find_each do |row| + sane = HtmlSanitizer.sanitize(row.content) + row.update_column(:content, sane) + end + EnterpriseGroup.where.not(long_description: [nil, ""]).find_each do |row| + sane = HtmlSanitizer.sanitize_and_enforce_link_target_blank(row.long_description) + row.update_column(:long_description, sane) + end + SpreeProduct.where.not(description: [nil, ""]).find_each do |row| + sane = HtmlSanitizer.sanitize(row.description) + row.update_column(:description, sane) + end + end +end diff --git a/db/schema.rb b/db/schema.rb index ced400f493..856aa02bbf 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.0].define(version: 2024_10_02_014059) do +ActiveRecord::Schema[7.0].define(version: 2024_10_23_054951) do # These are extensions that must be enabled in order to support this database enable_extension "pg_stat_statements" enable_extension "plpgsql" diff --git a/spec/migrations/20241023054951_sanitize_html_attributes_spec.rb b/spec/migrations/20241023054951_sanitize_html_attributes_spec.rb new file mode 100644 index 0000000000..d183cc4d1d --- /dev/null +++ b/spec/migrations/20241023054951_sanitize_html_attributes_spec.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_relative '../../db/migrate/20241023054951_sanitize_html_attributes' + +RSpec.describe SanitizeHtmlAttributes do + describe "#up" do + # Let's hack some bad data: + let!(:tab) { + create(:custom_tab).tap do |row| + row.update_columns(content: bad_html) + end + } + let!(:enterprise_group) { + create(:enterprise_group).tap do |row| + row.update_columns(long_description: bad_html) + end + } + let!(:product) { + create(:product).tap do |row| + row.update_columns(description: bad_html) + end + } + let(:bad_html) { + <<~HTML.squish +
Fred Farmer is a certified + organic + ...
+ HTML + } + let(:good_html) { + <<~HTML.squish +Fred Farmer is a certified + organic + alert("Gotcha!")...
+ HTML + } + let(:good_html_external_link) { + <<~HTML.squish +Fred Farmer is a certified + organic + alert("Gotcha!")...
+ HTML + } + + it "sanitises HTML attributes" do + expect { subject.up }.to change { + tab.reload.attributes["content"] + } + .from(bad_html).to(good_html) + .and change { + enterprise_group.reload.attributes["long_description"] + } + .from(bad_html).to(good_html_external_link) + .and change { + product.reload.attributes["description"] + } + .from(bad_html).to(good_html) + end + end +end