Sanitise HTML attributes in the database

This commit is contained in:
Maikel Linke
2024-10-24 08:23:52 +11:00
parent a123369f8d
commit 169e1cf288
3 changed files with 120 additions and 1 deletions

View File

@@ -0,0 +1,58 @@
# frozen_string_literal: true
class SanitizeHtmlAttributes < ActiveRecord::Migration[7.0]
class CustomTab < ApplicationRecord
end
class EnterpriseGroup < ApplicationRecord
end
class SpreeProduct < ApplicationRecord
end
# This is a copy from our application code at the time of writing.
# We prefer to keep migrations isolated and not affected by changing
# application code in the future.
# If we need to change the sanitizer in the future we may need a new
# migration (not change the old one) to sanitise the data properly.
class HtmlSanitizer
ALLOWED_TAGS = %w[h1 h2 h3 h4 div p br b i u a strong em del pre blockquote ul ol li hr
figure].freeze
ALLOWED_ATTRIBUTES = %w[href target].freeze
ALLOWED_TRIX_DATA_ATTRIBUTES = %w[data-trix-attachment].freeze
def self.sanitize(html)
@sanitizer ||= Rails::HTML5::SafeListSanitizer.new
@sanitizer.sanitize(
html, tags: ALLOWED_TAGS, attributes: (ALLOWED_ATTRIBUTES + ALLOWED_TRIX_DATA_ATTRIBUTES)
)
end
def self.sanitize_and_enforce_link_target_blank(html)
sanitize(enforce_link_target_blank(html))
end
def self.enforce_link_target_blank(html)
return if html.nil?
Nokogiri::HTML::DocumentFragment.parse(html).tap do |document|
document.css("a").each { |link| link["target"] = "_blank" }
end.to_s
end
end
def up
CustomTab.where.not(content: [nil, ""]).find_each do |row|
sane = HtmlSanitizer.sanitize(row.content)
row.update_column(:content, sane)
end
EnterpriseGroup.where.not(long_description: [nil, ""]).find_each do |row|
sane = HtmlSanitizer.sanitize_and_enforce_link_target_blank(row.long_description)
row.update_column(:long_description, sane)
end
SpreeProduct.where.not(description: [nil, ""]).find_each do |row|
sane = HtmlSanitizer.sanitize(row.description)
row.update_column(:description, sane)
end
end
end

View File

@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.0].define(version: 2024_10_02_014059) do
ActiveRecord::Schema[7.0].define(version: 2024_10_23_054951) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_stat_statements"
enable_extension "plpgsql"

View File

@@ -0,0 +1,61 @@
# frozen_string_literal: true
require 'spec_helper'
require_relative '../../db/migrate/20241023054951_sanitize_html_attributes'
RSpec.describe SanitizeHtmlAttributes do
describe "#up" do
# Let's hack some bad data:
let!(:tab) {
create(:custom_tab).tap do |row|
row.update_columns(content: bad_html)
end
}
let!(:enterprise_group) {
create(:enterprise_group).tap do |row|
row.update_columns(long_description: bad_html)
end
}
let!(:product) {
create(:product).tap do |row|
row.update_columns(description: bad_html)
end
}
let(:bad_html) {
<<~HTML.squish
<p data-controller="load->payMe">Fred Farmer is a certified
<a href="https://example.net/">organic</a>
<script>alert("Gotcha!")</script>...</p>
HTML
}
let(:good_html) {
<<~HTML.squish
<p>Fred Farmer is a certified
<a href="https://example.net/">organic</a>
alert("Gotcha!")...</p>
HTML
}
let(:good_html_external_link) {
<<~HTML.squish
<p>Fred Farmer is a certified
<a href="https://example.net/" target="_blank">organic</a>
alert("Gotcha!")...</p>
HTML
}
it "sanitises HTML attributes" do
expect { subject.up }.to change {
tab.reload.attributes["content"]
}
.from(bad_html).to(good_html)
.and change {
enterprise_group.reload.attributes["long_description"]
}
.from(bad_html).to(good_html_external_link)
.and change {
product.reload.attributes["description"]
}
.from(bad_html).to(good_html)
end
end
end