From 169e1cf288717d4973cfba2dbe6c95d8c9d8ac2d Mon Sep 17 00:00:00 2001 From: Maikel Linke Date: Thu, 24 Oct 2024 08:23:52 +1100 Subject: [PATCH 1/2] Sanitise HTML attributes in the database --- ...20241023054951_sanitize_html_attributes.rb | 58 ++++++++++++++++++ db/schema.rb | 2 +- ...023054951_sanitize_html_attributes_spec.rb | 61 +++++++++++++++++++ 3 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20241023054951_sanitize_html_attributes.rb create mode 100644 spec/migrations/20241023054951_sanitize_html_attributes_spec.rb diff --git a/db/migrate/20241023054951_sanitize_html_attributes.rb b/db/migrate/20241023054951_sanitize_html_attributes.rb new file mode 100644 index 0000000000..376ceeea4e --- /dev/null +++ b/db/migrate/20241023054951_sanitize_html_attributes.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +class SanitizeHtmlAttributes < ActiveRecord::Migration[7.0] + class CustomTab < ApplicationRecord + end + + class EnterpriseGroup < ApplicationRecord + end + + class SpreeProduct < ApplicationRecord + end + + # This is a copy from our application code at the time of writing. + # We prefer to keep migrations isolated and not affected by changing + # application code in the future. + # If we need to change the sanitizer in the future we may need a new + # migration (not change the old one) to sanitise the data properly. + class HtmlSanitizer + ALLOWED_TAGS = %w[h1 h2 h3 h4 div p br b i u a strong em del pre blockquote ul ol li hr + figure].freeze + ALLOWED_ATTRIBUTES = %w[href target].freeze + ALLOWED_TRIX_DATA_ATTRIBUTES = %w[data-trix-attachment].freeze + + def self.sanitize(html) + @sanitizer ||= Rails::HTML5::SafeListSanitizer.new + @sanitizer.sanitize( + html, tags: ALLOWED_TAGS, attributes: (ALLOWED_ATTRIBUTES + ALLOWED_TRIX_DATA_ATTRIBUTES) + ) + end + + def self.sanitize_and_enforce_link_target_blank(html) + sanitize(enforce_link_target_blank(html)) + end + + def self.enforce_link_target_blank(html) + return if html.nil? + + Nokogiri::HTML::DocumentFragment.parse(html).tap do |document| + document.css("a").each { |link| link["target"] = "_blank" } + end.to_s + end + end + + def up + CustomTab.where.not(content: [nil, ""]).find_each do |row| + sane = HtmlSanitizer.sanitize(row.content) + row.update_column(:content, sane) + end + EnterpriseGroup.where.not(long_description: [nil, ""]).find_each do |row| + sane = HtmlSanitizer.sanitize_and_enforce_link_target_blank(row.long_description) + row.update_column(:long_description, sane) + end + SpreeProduct.where.not(description: [nil, ""]).find_each do |row| + sane = HtmlSanitizer.sanitize(row.description) + row.update_column(:description, sane) + end + end +end diff --git a/db/schema.rb b/db/schema.rb index ced400f493..856aa02bbf 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.0].define(version: 2024_10_02_014059) do +ActiveRecord::Schema[7.0].define(version: 2024_10_23_054951) do # These are extensions that must be enabled in order to support this database enable_extension "pg_stat_statements" enable_extension "plpgsql" diff --git a/spec/migrations/20241023054951_sanitize_html_attributes_spec.rb b/spec/migrations/20241023054951_sanitize_html_attributes_spec.rb new file mode 100644 index 0000000000..d183cc4d1d --- /dev/null +++ b/spec/migrations/20241023054951_sanitize_html_attributes_spec.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_relative '../../db/migrate/20241023054951_sanitize_html_attributes' + +RSpec.describe SanitizeHtmlAttributes do + describe "#up" do + # Let's hack some bad data: + let!(:tab) { + create(:custom_tab).tap do |row| + row.update_columns(content: bad_html) + end + } + let!(:enterprise_group) { + create(:enterprise_group).tap do |row| + row.update_columns(long_description: bad_html) + end + } + let!(:product) { + create(:product).tap do |row| + row.update_columns(description: bad_html) + end + } + let(:bad_html) { + <<~HTML.squish +

Fred Farmer is a certified + organic + ...

+ HTML + } + let(:good_html) { + <<~HTML.squish +

Fred Farmer is a certified + organic + alert("Gotcha!")...

+ HTML + } + let(:good_html_external_link) { + <<~HTML.squish +

Fred Farmer is a certified + organic + alert("Gotcha!")...

+ HTML + } + + it "sanitises HTML attributes" do + expect { subject.up }.to change { + tab.reload.attributes["content"] + } + .from(bad_html).to(good_html) + .and change { + enterprise_group.reload.attributes["long_description"] + } + .from(bad_html).to(good_html_external_link) + .and change { + product.reload.attributes["description"] + } + .from(bad_html).to(good_html) + end + end +end From d2e50876683100d978f75997e62413326a2d99bd Mon Sep 17 00:00:00 2001 From: Maikel Linke Date: Thu, 24 Oct 2024 08:46:33 +1100 Subject: [PATCH 2/2] Remove redundant HTML sanitisation We don't need to run the sanitiser each time we read an attribute. It's a waste of time. --- app/models/custom_tab.rb | 5 ----- app/models/enterprise_group.rb | 5 ----- app/models/spree/product.rb | 5 ----- spec/models/custom_tab_spec.rb | 5 ----- spec/models/enterprise_group_spec.rb | 5 ----- spec/models/spree/product_spec.rb | 5 ----- 6 files changed, 30 deletions(-) diff --git a/app/models/custom_tab.rb b/app/models/custom_tab.rb index d679f254d7..e8934afd36 100644 --- a/app/models/custom_tab.rb +++ b/app/models/custom_tab.rb @@ -5,11 +5,6 @@ class CustomTab < ApplicationRecord validates :title, presence: true, length: { maximum: 20 } - # Remove any unsupported HTML. - def content - HtmlSanitizer.sanitize(super) - end - # Remove any unsupported HTML. def content=(html) super(HtmlSanitizer.sanitize(html)) diff --git a/app/models/enterprise_group.rb b/app/models/enterprise_group.rb index 608cf8c879..5e59488f55 100644 --- a/app/models/enterprise_group.rb +++ b/app/models/enterprise_group.rb @@ -74,11 +74,6 @@ class EnterpriseGroup < ApplicationRecord permalink end - # Remove any unsupported HTML. - def long_description - HtmlSanitizer.sanitize_and_enforce_link_target_blank(super) - end - # Remove any unsupported HTML. def long_description=(html) super(HtmlSanitizer.sanitize_and_enforce_link_target_blank(html)) diff --git a/app/models/spree/product.rb b/app/models/spree/product.rb index 57f765413c..bb1f2d2f59 100755 --- a/app/models/spree/product.rb +++ b/app/models/spree/product.rb @@ -279,11 +279,6 @@ module Spree end # rubocop:enable Metrics/AbcSize - # Remove any unsupported HTML. - def description - HtmlSanitizer.sanitize(super) - end - # Remove any unsupported HTML. def description=(html) super(HtmlSanitizer.sanitize(html)) diff --git a/spec/models/custom_tab_spec.rb b/spec/models/custom_tab_spec.rb index 52fa70c38a..7ee37614cf 100644 --- a/spec/models/custom_tab_spec.rb +++ b/spec/models/custom_tab_spec.rb @@ -18,10 +18,5 @@ RSpec.describe CustomTab do subject.content = "Hello dearest monster." expect(subject.content).to eq "Hello alert dearest monster." end - - it "sanitises existing HTML in content" do - subject[:content] = "Hello dearest monster." - expect(subject.content).to eq "Hello alert dearest monster." - end end end diff --git a/spec/models/enterprise_group_spec.rb b/spec/models/enterprise_group_spec.rb index aa2c10f2ed..7e7c5fd95c 100644 --- a/spec/models/enterprise_group_spec.rb +++ b/spec/models/enterprise_group_spec.rb @@ -124,10 +124,5 @@ RSpec.describe EnterpriseGroup do subject.long_description = "Hello dearest monster." expect(subject.long_description).to eq "Hello alert dearest monster." end - - it "sanitises existing HTML in long_description" do - subject[:long_description] = "Hello dearest monster." - expect(subject.long_description).to eq "Hello alert dearest monster." - end end end diff --git a/spec/models/spree/product_spec.rb b/spec/models/spree/product_spec.rb index df80c8f052..b51a4736f1 100644 --- a/spec/models/spree/product_spec.rb +++ b/spec/models/spree/product_spec.rb @@ -707,11 +707,6 @@ module Spree subject.description = "Hello dearest monster." expect(subject.description).to eq "Hello alert dearest monster." end - - it "sanitises existing HTML in description" do - subject[:description] = "Hello dearest monster." - expect(subject.description).to eq "Hello alert dearest monster." - end end end