From a41c5f609af9fb64d83945949e332042d621d94e Mon Sep 17 00:00:00 2001 From: Ryan Grove Date: Thu, 26 Dec 2024 17:20:01 -0800 Subject: [PATCH] Adopt Standard Ruby style https://github.com/standardrb/standard --- .rubocop.yml | 30 +- .standard.yml | 1 + Gemfile | 4 + README.md | 128 ++--- Rakefile | 18 +- lib/sanitize.rb | 72 +-- lib/sanitize/config.rb | 24 +- lib/sanitize/config/basic.rb | 28 +- lib/sanitize/config/default.rb | 88 +-- lib/sanitize/config/relaxed.rb | 60 +- lib/sanitize/config/restricted.rb | 2 +- lib/sanitize/css.rb | 612 ++++++++++----------- lib/sanitize/transformers/clean_cdata.rb | 18 +- lib/sanitize/transformers/clean_comment.rb | 18 +- lib/sanitize/transformers/clean_css.rb | 112 ++-- lib/sanitize/transformers/clean_doctype.rb | 28 +- lib/sanitize/transformers/clean_element.rb | 433 ++++++++------- lib/sanitize/version.rb | 2 +- sanitize.gemspec | 36 +- test/common.rb | 4 +- test/test_clean_comment.rb | 48 +- test/test_clean_css.rb | 24 +- test/test_clean_doctype.rb | 38 +- test/test_clean_element.rb | 528 +++++++++--------- test/test_config.rb | 40 +- test/test_malicious_css.rb | 36 +- test/test_malicious_html.rb | 196 +++---- test/test_parser.rb | 48 +- test/test_sanitize.rb | 136 ++--- test/test_sanitize_css.rb | 225 ++++---- test/test_transformers.rb | 161 +++--- 31 files changed, 1597 insertions(+), 1601 deletions(-) create mode 100644 .standard.yml diff --git a/.rubocop.yml b/.rubocop.yml index b9e8f6e..3c31320 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,15 +1,29 @@ +inherit_mode: + merge: + - Exclude + +require: + - standard + - standard-custom + - standard-performance + - rubocop-performance + - rubocop-minitest + - rubocop-packaging + - rubocop-rake + +inherit_gem: + standard: config/base.yml + standard-custom: config/base.yml + standard-performance: config/base.yml + inherit_from: .rubocop_todo.yml AllCops: Exclude: - - '.*/**/*' - - 'benchmark/**/*' - - 'test/**/*' - - 'tmp/**/*' - TargetRubyVersion: 3.1.0 - -Metrics: - Enabled: false + - ".*/**/*" + - "tmp/**/*" + SuggestExtensions: false + TargetRubyVersion: 3.1 Style/Documentation: Enabled: false diff --git a/.standard.yml b/.standard.yml new file mode 100644 index 0000000..72b2693 --- /dev/null +++ b/.standard.yml @@ -0,0 +1 @@ +ruby_version: 3.1 diff --git a/Gemfile b/Gemfile index cafd43e..421eef3 100644 --- a/Gemfile +++ b/Gemfile @@ -7,4 +7,8 @@ group :development do gem "bundler", "~> 2.6.2" gem "minitest", "5.25.4" gem "rake", "13.2.1" + gem "rubocop-minitest", "0.36.0" + gem "rubocop-packaging", "0.5.2" + gem "rubocop-rake", "0.6.0" + gem "standard", "1.43.0" end diff --git a/README.md b/README.md index 9cffb53..a944e9f 100644 --- a/README.md +++ b/README.md @@ -71,14 +71,14 @@ If you don't specify any configuration options, Sanitize will use its strictest ```ruby html = 'foo' Sanitize.fragment(html) -# => 'foo' +# => "foo" ``` To keep certain elements, add them to the element allowlist. ```ruby -Sanitize.fragment(html, :elements => ['b']) -# => 'foo' +Sanitize.fragment(html, elements: ['b']) +# => "foo" ``` ### HTML Documents @@ -94,14 +94,10 @@ html = %[ ] Sanitize.document(html, - :allow_doctype => true, - :elements => ['html'] + allow_doctype: true, + elements: ['html'] ) -# => %[ -# foo -# -# -# ] +# => "foo\n \n" ``` ### CSS in HTML @@ -119,11 +115,11 @@ html = %[ ] Sanitize.fragment(html, - :elements => ['div', 'style'], - :attributes => {'div' => ['style']}, + elements: ['div', 'style'], + attributes: {'div' => ['style']}, - :css => { - :properties => ['width'] + css: { + properties: ['width'] } ) #=> %[ @@ -156,7 +152,6 @@ Sanitize::CSS.stylesheet(css, Sanitize::Config::RELAXED) # => %[ # # -# # a { text-decoration: none; } # # a:hover { @@ -173,7 +168,6 @@ Sanitize::CSS.properties(%[ # # text-decoration: underline; # ] - ``` ## Configuration @@ -186,7 +180,7 @@ Allows only very simple inline markup. No links, images, or block elements. ```ruby Sanitize.fragment(html, Sanitize::Config::RESTRICTED) -# => 'foo' +# => "foo" ``` ### Sanitize::Config::BASIC @@ -215,14 +209,14 @@ If the built-in modes don't meet your needs, you can easily specify a custom con ```ruby Sanitize.fragment(html, - :elements => ['a', 'span'], + elements: ['a', 'span'], - :attributes => { - 'a' => ['href', 'title'], + attributes: { + 'a' => ['href', 'title'], 'span' => ['class'] }, - :protocols => { + protocols: { 'a' => {'href' => ['http', 'https', 'mailto']} } ) @@ -236,8 +230,8 @@ The built-in configs are deeply frozen to prevent people from modifying them (ei # Create a customized copy of the Basic config, adding
and to the # existing allowlisted elements. Sanitize.fragment(html, Sanitize::Config.merge(Sanitize::Config::BASIC, - :elements => Sanitize::Config::BASIC[:elements] + ['div', 'table'], - :remove_contents => true + elements: Sanitize::Config::BASIC[:elements] + ['div', 'table'], + remove_contents: true )) ``` @@ -246,8 +240,8 @@ The example above adds the `
` and `
` elements to a copy of the exist ```ruby # Overwrite :elements instead of creating a copy with new entries. Sanitize.fragment(html, Sanitize::Config.merge(Sanitize::Config::BASIC, - :elements => ['div', 'table'], - :remove_contents => true + elements: ['div', 'table'], + remove_contents: true )) ``` @@ -258,7 +252,7 @@ Sanitize.fragment(html, Sanitize::Config.merge(Sanitize::Config::BASIC, Attributes to add to specific elements. If the attribute already exists, it will be replaced with the value specified here. Specify all element names and attributes in lowercase. ```ruby -:add_attributes => { +add_attributes: { 'a' => {'rel' => 'nofollow'} } ``` @@ -276,10 +270,10 @@ Whether or not to allow well-formed HTML doctype declarations such as " { - 'a' => ['href', 'title'], +attributes: { + 'a' => ['href', 'title'], 'blockquote' => ['cite'], - 'img' => ['alt', 'src', 'title'] + 'img' => ['alt', 'src', 'title'] } ``` @@ -287,9 +281,9 @@ If you'd like to allow certain attributes on all elements, use the symbol `:all` ```ruby # Allow the class attribute on all elements. -:attributes => { +attributes: { :all => ['class'], - 'a' => ['href', 'title'] + 'a' => ['href', 'title'] } ``` @@ -297,7 +291,7 @@ To allow arbitrary HTML5 `data-*` attributes, use the symbol `:data` in place of ```ruby # Allow arbitrary HTML5 data-* attributes on
elements. -:attributes => { +attributes: { 'div' => [:data] } ``` @@ -353,7 +347,7 @@ If you'd like to allow the use of relative URLs which don't have a protocol, inc Array of HTML element names to allow. Specify all names in lowercase. Any elements not in this array will be removed. ```ruby -:elements => %w[ +elements: %w[ a abbr b blockquote br cite code dd dfn dl dt em i kbd li mark ol p pre q s samp small strike strong sub sup time u ul var ] @@ -373,10 +367,10 @@ Array of HTML element names to allow. Specify all names in lowercase. Any elemen #### :parser_options (Hash) -[Parsing options](https://github.com/rubys/nokogumbo/tree/master#parsing-options) to be supplied to `nokogumbo`. +[Parsing options](https://nokogiri.org/tutorials/parsing_an_html5_document.html?h=parsing+options#parsing-options) to be supplied to Nokogiri. ```ruby -:parser_options => { +parser_options: { max_errors: -1, max_tree_depth: -1 } @@ -387,16 +381,16 @@ Array of HTML element names to allow. Specify all names in lowercase. Any elemen URL protocols to allow in specific attributes. If an attribute is listed here and contains a protocol other than those specified (or if it contains no protocol at all), it will be removed. ```ruby -:protocols => { - 'a' => {'href' => ['ftp', 'http', 'https', 'mailto']}, - 'img' => {'src' => ['http', 'https']} +protocols: { + 'a' => {'href' => ['ftp', 'http', 'https', 'mailto']}, + 'img' => {'src' => ['http', 'https']} } ``` If you'd like to allow the use of relative URLs which don't have a protocol, include the symbol `:relative` in the protocol array: ```ruby -:protocols => { +protocols: { 'a' => {'href' => ['http', 'https', :relative]} } ``` @@ -407,7 +401,7 @@ If this is `true`, Sanitize will remove the contents of any non-allowlisted elem If this is an Array or Set of element names, then only the contents of the specified elements (when filtered) will be removed, and the contents of all other filtered elements will be left behind. -The default value is `%w[iframe math noembed noframes noscript plaintext script style svg xmp]`. +The default value can be seen in the [default config](lib/sanitize/config/default.rb). #### :transformers (Array or callable) @@ -420,20 +414,14 @@ Hash of element names which, when removed, should have their contents surrounded Each element name is a key pointing to another Hash, which provides the specific whitespace that should be inserted `:before` and `:after` the removed element's position. The `:after` value will only be inserted if the removed element has children, in which case it will be inserted after those children. ```ruby -:whitespace_elements => { - 'br' => { :before => "\n", :after => "" }, - 'div' => { :before => "\n", :after => "\n" }, - 'p' => { :before => "\n", :after => "\n" } +whitespace_elements: { + 'br' => { before: "\n", after: "" }, + 'div' => { before: "\n", after: "\n" }, + 'p' => { before: "\n", after: "\n" } } ``` -The default elements with whitespace added before and after are: - -``` -address article aside blockquote br dd div dl dt -footer h1 h2 h3 h4 h5 h6 header hgroup hr li nav -ol p pre section ul -``` +The default elements with whitespace added before and after can be seen in [the default config](lib/sanitize/config/default.rb). ## Transformers @@ -442,7 +430,7 @@ Transformers allow you to filter and modify HTML nodes using your own custom log To use one or more transformers, pass them to the `:transformers` config setting. You may pass a single transformer or an array of transformers. ```ruby -Sanitize.fragment(html, :transformers => [ +Sanitize.fragment(html, transformers: [ transformer_one, transformer_two ]) @@ -493,7 +481,7 @@ transformer = lambda do |env| end # Prints "header", "span", "strong", "p", "footer". -Sanitize.fragment(html, :transformers => transformer) +Sanitize.fragment(html, transformers: transformer) ``` Transformers have a tremendous amount of power, including the power to completely bypass Sanitize's built-in filtering. Be careful! Your safety is in your own hands. @@ -503,20 +491,22 @@ Transformers have a tremendous amount of power, including the power to completel The following example demonstrates how to remove image elements unless they use a relative URL or are hosted on a specific domain. It assumes that the `` element and its `src` attribute are already allowlisted. ```ruby -require 'uri' +require "uri" image_allowlist_transformer = lambda do |env| # Ignore everything except elements. - return unless env[:node_name] == 'img' + return unless env[:node_name] == "img" - node = env[:node] - image_uri = URI.parse(node['src']) + node = env[:node] + image_uri = URI.parse(node["src"]) # Only allow relative URLs or URLs with the example.com domain. The # image_uri.host.nil? check ensures that protocol-relative URLs like - # "//evil.com/foo.jpg". - unless image_uri.host == 'example.com' || (image_uri.host.nil? && image_uri.relative?) - node.unlink # `Nokogiri::XML::Node#unlink` removes a node from the document + # "//evil.com/foo.jpg" are not allowed. + unless image_uri.host == "example.com" + unless image_uri.host.nil? && image_uri.relative? + node.unlink # `Nokogiri::XML::Node#unlink` removes a node from the document + end end end ``` @@ -527,40 +517,40 @@ The following example demonstrates how to create a transformer that will safely ```ruby youtube_transformer = lambda do |env| - node = env[:node] + node = env[:node] node_name = env[:node_name] # Don't continue if this node is already allowlisted or is not an element. return if env[:is_allowlisted] || !node.element? # Don't continue unless the node is an iframe. - return unless node_name == 'iframe' + return unless node_name == "iframe" # Verify that the video URL is actually a valid YouTube video URL. - return unless node['src'] =~ %r|\A(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/| + return unless %r{\A(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/}.match?(node["src"]) # We're now certain that this is a YouTube embed, but we still need to run # it through a special Sanitize step to ensure that no unwanted elements or # attributes that don't belong in a YouTube embed can sneak in. Sanitize.node!(node, { - :elements => %w[iframe], + elements: %w[iframe], - :attributes => { - 'iframe' => %w[allowfullscreen frameborder height src width] + attributes: { + "iframe" => %w[allowfullscreen frameborder height src width] } }) # Now that we're sure that this is a valid YouTube embed and that there are # no unwanted elements or attributes hidden inside it, we can tell Sanitize # to allowlist the current node. - {:node_allowlist => [node]} + {node_allowlist: [node]} end html = %[ -] +].strip -Sanitize.fragment(html, :transformers => youtube_transformer) +Sanitize.fragment(html, transformers: youtube_transformer) # => '' ``` diff --git a/Rakefile b/Rakefile index 4e056ba..69a1849 100644 --- a/Rakefile +++ b/Rakefile @@ -1,20 +1,24 @@ # frozen_string_literal: true -require 'bundler' -require 'rake/clean' -require 'rake/testtask' +require "bundler" +require "rake/clean" +require "rake/testtask" +require "standard/rake" Bundler::GemHelper.install_tasks Rake::TestTask.new task default: [:test] + +desc "Run unit tests" task test: :set_rubyopts +desc "Enable warnings" task :set_rubyopts do - ENV['RUBYOPT'] ||= '' - ENV['RUBYOPT'] += ' -w' + ENV["RUBYOPT"] ||= "" + ENV["RUBYOPT"] += " -w" - if RUBY_ENGINE == 'ruby' - ENV['RUBYOPT'] += ' --enable-frozen-string-literal --debug=frozen-string-literal' + if RUBY_ENGINE == "ruby" + ENV["RUBYOPT"] += " --enable-frozen-string-literal --debug=frozen-string-literal" end end diff --git a/lib/sanitize.rb b/lib/sanitize.rb index 3a3e46d..6f55464 100644 --- a/lib/sanitize.rb +++ b/lib/sanitize.rb @@ -1,20 +1,20 @@ # frozen_string_literal: true -require 'nokogiri' -require 'set' - -require_relative 'sanitize/version' -require_relative 'sanitize/config' -require_relative 'sanitize/config/default' -require_relative 'sanitize/config/restricted' -require_relative 'sanitize/config/basic' -require_relative 'sanitize/config/relaxed' -require_relative 'sanitize/css' -require_relative 'sanitize/transformers/clean_cdata' -require_relative 'sanitize/transformers/clean_comment' -require_relative 'sanitize/transformers/clean_css' -require_relative 'sanitize/transformers/clean_doctype' -require_relative 'sanitize/transformers/clean_element' +require "nokogiri" +require "set" + +require_relative "sanitize/version" +require_relative "sanitize/config" +require_relative "sanitize/config/default" +require_relative "sanitize/config/restricted" +require_relative "sanitize/config/basic" +require_relative "sanitize/config/relaxed" +require_relative "sanitize/css" +require_relative "sanitize/transformers/clean_cdata" +require_relative "sanitize/transformers/clean_comment" +require_relative "sanitize/transformers/clean_css" +require_relative "sanitize/transformers/clean_doctype" +require_relative "sanitize/transformers/clean_element" class Sanitize attr_reader :config @@ -33,12 +33,12 @@ class Sanitize # - https://infra.spec.whatwg.org/#noncharacter REGEX_HTML_NON_CHARACTERS = /[\ufdd0-\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}]+/u - # Matches an attribute value that could be treated by a browser as a URL - # with a protocol prefix, such as "http:" or "javascript:". Any string of zero - # or more characters followed by a colon is considered a match, even if the - # colon is encoded as an entity and even if it's an incomplete entity (which - # IE6 and Opera will still parse). - REGEX_PROTOCOL = /\A\s*([^\/#]*?)(?:\:|�*58|�*3a)/i + # Matches an attribute value that could be treated by a browser as a URL with + # a protocol prefix, such as "http:" or "javascript:". Any string of zero or + # more characters followed by a colon is considered a match, even if the colon + # is encoded as an entity and even if it's an incomplete entity (which IE6 and + # Opera will still parse). + REGEX_PROTOCOL = /\A\s*([^\/#]*?)(?::|�*58|�*3a)/i # Matches one or more characters that should be stripped from HTML before # parsing. This is a combination of `REGEX_HTML_CONTROL_CHARACTERS` and @@ -99,12 +99,12 @@ def initialize(config = {}) @transformers << Transformers::CleanElement.new(@config) @transformers << Transformers::CleanComment unless @config[:allow_comments] - if @config[:elements].include?('style') + if @config[:elements].include?("style") scss = Sanitize::CSS.new(config) @transformers << Transformers::CSS::CleanElement.new(scss) end - if @config[:attributes].values.any? {|attr| attr.include?('style') } + if @config[:attributes].values.any? { |attr| attr.include?("style") } scss ||= Sanitize::CSS.new(config) @transformers << Transformers::CSS::CleanAttribute.new(scss) end @@ -112,7 +112,7 @@ def initialize(config = {}) @transformers << Transformers::CleanDoctype @transformers << Transformers::CleanCDATA - @transformer_config = { config: @config } + @transformer_config = {config: @config} end # Returns a sanitized copy of the given _html_ document. @@ -121,7 +121,7 @@ def initialize(config = {}) # error will be raised. If this is undesirable, you should probably use # {#fragment} instead. def document(html) - return '' unless html + return "" unless html doc = Nokogiri::HTML5.parse(preprocess(html), **@config[:parser_options]) node!(doc) @@ -133,7 +133,7 @@ def document(html) # Returns a sanitized copy of the given _html_ fragment. def fragment(html) - return '' unless html + return "" unless html frag = Nokogiri::HTML5.fragment(preprocess(html), **@config[:parser_options]) node!(frag) @@ -152,7 +152,7 @@ def node!(node) raise ArgumentError unless node.is_a?(Nokogiri::XML::Node) if node.is_a?(Nokogiri::XML::Document) - unless @config[:elements].include?('html') + unless @config[:elements].include?("html") raise Error, 'When sanitizing a document, "" must be allowlisted.' end end @@ -175,13 +175,13 @@ def node!(node) def preprocess(html) html = html.to_s.dup - unless html.encoding.name == 'UTF-8' - html.encode!('UTF-8', - :invalid => :replace, - :undef => :replace) + unless html.encoding.name == "UTF-8" + html.encode!("UTF-8", + invalid: :replace, + undef: :replace) end - html.gsub!(REGEX_UNSUITABLE_CHARS, '') + html.gsub!(REGEX_UNSUITABLE_CHARS, "") html end @@ -225,17 +225,17 @@ def traverse(node, &block) child = node.child - while child do + while child prev = child.previous_sibling traverse(child, &block) - if child.parent == node - child = child.next_sibling + child = if child.parent == node + child.next_sibling else # The child was unlinked or reparented, so traverse the previous node's # next sibling, or the parent's first child if there is no previous # node. - child = prev ? prev.next_sibling : node.child + prev ? prev.next_sibling : node.child end end end diff --git a/lib/sanitize/config.rb b/lib/sanitize/config.rb index 3755617..beb7d90 100644 --- a/lib/sanitize/config.rb +++ b/lib/sanitize/config.rb @@ -1,16 +1,15 @@ # frozen_string_literal: true -require 'set' +require "set" class Sanitize module Config - # Deeply freezes and returns the given configuration Hash. def self.freeze_config(config) if Hash === config - config.each_value {|c| freeze_config(c) } + config.each_value { |c| freeze_config(c) } elsif Array === config || Set === config - config.each {|c| freeze_config(c) } + config.each { |c| freeze_config(c) } end config.freeze @@ -22,11 +21,11 @@ def self.freeze_config(config) # This is the safest way to use a built-in Sanitize config as the basis for # your own custom config. def self.merge(config, other_config = {}) - raise ArgumentError, 'config must be a Hash' unless Hash === config - raise ArgumentError, 'other_config must be a Hash' unless Hash === other_config + raise ArgumentError, "config must be a Hash" unless Hash === config + raise ArgumentError, "other_config must be a Hash" unless Hash === other_config merged = {} - keys = Set.new(config.keys + other_config.keys) + keys = Set.new(config.keys + other_config.keys) keys.each do |key| oldval = config[key] @@ -34,12 +33,12 @@ def self.merge(config, other_config = {}) if other_config.has_key?(key) newval = other_config[key] - if Hash === oldval && Hash === newval - merged[key] = oldval.empty? ? newval.dup : merge(oldval, newval) + merged[key] = if Hash === oldval && Hash === newval + oldval.empty? ? newval.dup : merge(oldval, newval) elsif Array === newval && key != :transformers - merged[key] = Set.new(newval) + Set.new(newval) else - merged[key] = can_dupe?(newval) ? newval.dup : newval + can_dupe?(newval) ? newval.dup : newval end else merged[key] = can_dupe?(oldval) ? oldval.dup : oldval @@ -52,9 +51,8 @@ def self.merge(config, other_config = {}) # Returns `true` if `dup` may be safely called on _value_, `false` # otherwise. def self.can_dupe?(value) - !(true == value || false == value || value.nil? || Method === value || Numeric === value || Symbol === value) + !(value == true || value == false || value.nil? || Method === value || Numeric === value || Symbol === value) end private_class_method :can_dupe? - end end diff --git a/lib/sanitize/config/basic.rb b/lib/sanitize/config/basic.rb index 0309f50..a069182 100644 --- a/lib/sanitize/config/basic.rb +++ b/lib/sanitize/config/basic.rb @@ -3,28 +3,28 @@ class Sanitize module Config BASIC = freeze_config( - :elements => RESTRICTED[:elements] + %w[ + elements: RESTRICTED[:elements] + %w[ a abbr blockquote br cite code dd dfn dl dt kbd li mark ol p pre q s samp small strike sub sup time ul var ], - :attributes => { - 'a' => %w[href], - 'abbr' => %w[title], - 'blockquote' => %w[cite], - 'dfn' => %w[title], - 'q' => %w[cite], - 'time' => %w[datetime pubdate] + attributes: { + "a" => %w[href], + "abbr" => %w[title], + "blockquote" => %w[cite], + "dfn" => %w[title], + "q" => %w[cite], + "time" => %w[datetime pubdate] }, - :add_attributes => { - 'a' => {'rel' => 'nofollow'} + add_attributes: { + "a" => {"rel" => "nofollow"} }, - :protocols => { - 'a' => {'href' => ['ftp', 'http', 'https', 'mailto', :relative]}, - 'blockquote' => {'cite' => ['http', 'https', :relative]}, - 'q' => {'cite' => ['http', 'https', :relative]} + protocols: { + "a" => {"href" => ["ftp", "http", "https", "mailto", :relative]}, + "blockquote" => {"cite" => ["http", "https", :relative]}, + "q" => {"cite" => ["http", "https", :relative]} } ) end diff --git a/lib/sanitize/config/default.rb b/lib/sanitize/config/default.rb index c86910d..74c27d4 100644 --- a/lib/sanitize/config/default.rb +++ b/lib/sanitize/config/default.rb @@ -5,51 +5,51 @@ module Config DEFAULT = freeze_config( # HTML attributes to add to specific elements. By default, no attributes # are added. - :add_attributes => {}, + add_attributes: {}, # Whether or not to allow HTML comments. Allowing comments is strongly # discouraged, since IE allows script execution within conditional # comments. - :allow_comments => false, + allow_comments: false, # Whether or not to allow well-formed HTML doctype declarations such as # "" when sanitizing a document. This setting is ignored # when sanitizing fragments. - :allow_doctype => false, + allow_doctype: false, # HTML attributes to allow in specific elements. By default, no attributes # are allowed. Use the symbol :data to indicate that arbitrary HTML5 # data-* attributes should be allowed. - :attributes => {}, + attributes: {}, # CSS sanitization settings. - :css => { + css: { # Whether or not to allow CSS comments. - :allow_comments => false, + allow_comments: false, # Whether or not to allow browser compatibility hacks such as the IE * # and _ hacks. These are generally harmless, but technically result in # invalid CSS. - :allow_hacks => false, + allow_hacks: false, # CSS at-rules to allow that may not have associated blocks (e.g. # "import"). # # https://developer.mozilla.org/en-US/docs/Web/CSS/At-rule - :at_rules => [], + at_rules: [], # CSS at-rules to allow whose blocks may contain properties (e.g. # "font-face"). - :at_rules_with_properties => [], + at_rules_with_properties: [], # CSS at-rules to allow whose blocks may contain styles (e.g. "media"). - :at_rules_with_styles => [], + at_rules_with_styles: [], # CSS properties to allow. - :properties => [], + properties: [], # URL protocols to allow in CSS URLs. - :protocols => [] + protocols: [] }, # HTML elements to allow. By default, no elements are allowed (which means @@ -59,16 +59,16 @@ module Config # elements (elements in the MathML or SVG namespaces). Do not add `math` # or `svg` to this list! If you do, you may create a security # vulnerability in your application. - :elements => [], + elements: [], # HTML parsing options to pass to Nokogumbo. # https://github.com/rubys/nokogumbo/tree/v2.0.1#parsing-options - :parser_options => {}, + parser_options: {}, # URL handling protocols to allow in specific attributes. By default, no # protocols are allowed. Use :relative in place of a protocol if you want # to allow relative URLs sans protocol. - :protocols => {}, + protocols: {}, # If this is true, Sanitize will remove the contents of any filtered # elements in addition to the elements themselves. By default, Sanitize @@ -78,45 +78,45 @@ module Config # If this is an Array or Set of element names, then only the contents of # the specified elements (when filtered) will be removed, and the contents # of all other filtered elements will be left behind. - :remove_contents => %w[ + remove_contents: %w[ iframe math noembed noframes noscript plaintext script style svg xmp ], # Transformers allow you to filter or alter nodes using custom logic. See # README.md for details and examples. - :transformers => [], + transformers: [], # Elements which, when removed, should have their contents surrounded by # values specified with `before` and `after` keys to preserve readability. # For example, `foo
bar
baz` will become 'foo bar baz' when the #
is removed. - :whitespace_elements => { - 'address' => { :before => ' ', :after => ' ' }, - 'article' => { :before => ' ', :after => ' ' }, - 'aside' => { :before => ' ', :after => ' ' }, - 'blockquote' => { :before => ' ', :after => ' ' }, - 'br' => { :before => ' ', :after => ' ' }, - 'dd' => { :before => ' ', :after => ' ' }, - 'div' => { :before => ' ', :after => ' ' }, - 'dl' => { :before => ' ', :after => ' ' }, - 'dt' => { :before => ' ', :after => ' ' }, - 'footer' => { :before => ' ', :after => ' ' }, - 'h1' => { :before => ' ', :after => ' ' }, - 'h2' => { :before => ' ', :after => ' ' }, - 'h3' => { :before => ' ', :after => ' ' }, - 'h4' => { :before => ' ', :after => ' ' }, - 'h5' => { :before => ' ', :after => ' ' }, - 'h6' => { :before => ' ', :after => ' ' }, - 'header' => { :before => ' ', :after => ' ' }, - 'hgroup' => { :before => ' ', :after => ' ' }, - 'hr' => { :before => ' ', :after => ' ' }, - 'li' => { :before => ' ', :after => ' ' }, - 'nav' => { :before => ' ', :after => ' ' }, - 'ol' => { :before => ' ', :after => ' ' }, - 'p' => { :before => ' ', :after => ' ' }, - 'pre' => { :before => ' ', :after => ' ' }, - 'section' => { :before => ' ', :after => ' ' }, - 'ul' => { :before => ' ', :after => ' ' } + whitespace_elements: { + "address" => {before: " ", after: " "}, + "article" => {before: " ", after: " "}, + "aside" => {before: " ", after: " "}, + "blockquote" => {before: " ", after: " "}, + "br" => {before: " ", after: " "}, + "dd" => {before: " ", after: " "}, + "div" => {before: " ", after: " "}, + "dl" => {before: " ", after: " "}, + "dt" => {before: " ", after: " "}, + "footer" => {before: " ", after: " "}, + "h1" => {before: " ", after: " "}, + "h2" => {before: " ", after: " "}, + "h3" => {before: " ", after: " "}, + "h4" => {before: " ", after: " "}, + "h5" => {before: " ", after: " "}, + "h6" => {before: " ", after: " "}, + "header" => {before: " ", after: " "}, + "hgroup" => {before: " ", after: " "}, + "hr" => {before: " ", after: " "}, + "li" => {before: " ", after: " "}, + "nav" => {before: " ", after: " "}, + "ol" => {before: " ", after: " "}, + "p" => {before: " ", after: " "}, + "pre" => {before: " ", after: " "}, + "section" => {before: " ", after: " "}, + "ul" => {before: " ", after: " "} } ) end diff --git a/lib/sanitize/config/relaxed.rb b/lib/sanitize/config/relaxed.rb index 42b64bb..fb3df5d 100644 --- a/lib/sanitize/config/relaxed.rb +++ b/lib/sanitize/config/relaxed.rb @@ -3,44 +3,42 @@ class Sanitize module Config RELAXED = freeze_config( - :elements => BASIC[:elements] + %w[ + elements: BASIC[:elements] + %w[ address article aside bdi bdo body caption col colgroup data del div figcaption figure footer h1 h2 h3 h4 h5 h6 head header hgroup hr html img ins main nav rp rt ruby section span style summary table tbody td tfoot th thead title tr wbr ], - :allow_doctype => true, + allow_doctype: true, - :attributes => merge(BASIC[:attributes], - :all => %w[class dir hidden id lang style tabindex title translate], - 'a' => %w[href hreflang name rel], - 'col' => %w[span width], - 'colgroup' => %w[span width], - 'data' => %w[value], - 'del' => %w[cite datetime], - 'img' => %w[align alt border height src srcset width], - 'ins' => %w[cite datetime], - 'li' => %w[value], - 'ol' => %w[reversed start type], - 'style' => %w[media scoped type], - 'table' => %w[align bgcolor border cellpadding cellspacing frame rules sortable summary width], - 'td' => %w[abbr align axis colspan headers rowspan valign width], - 'th' => %w[abbr align axis colspan headers rowspan scope sorted valign width], - 'ul' => %w[type] - ), + attributes: merge(BASIC[:attributes], + :all => %w[class dir hidden id lang style tabindex title translate], + "a" => %w[href hreflang name rel], + "col" => %w[span width], + "colgroup" => %w[span width], + "data" => %w[value], + "del" => %w[cite datetime], + "img" => %w[align alt border height src srcset width], + "ins" => %w[cite datetime], + "li" => %w[value], + "ol" => %w[reversed start type], + "style" => %w[media scoped type], + "table" => %w[align bgcolor border cellpadding cellspacing frame rules sortable summary width], + "td" => %w[abbr align axis colspan headers rowspan valign width], + "th" => %w[abbr align axis colspan headers rowspan scope sorted valign width], + "ul" => %w[type]), - :protocols => merge(BASIC[:protocols], - 'del' => {'cite' => ['http', 'https', :relative]}, - 'img' => {'src' => ['http', 'https', :relative]}, - 'ins' => {'cite' => ['http', 'https', :relative]} - ), + protocols: merge(BASIC[:protocols], + "del" => {"cite" => ["http", "https", :relative]}, + "img" => {"src" => ["http", "https", :relative]}, + "ins" => {"cite" => ["http", "https", :relative]}), - :css => { - :allow_comments => true, - :allow_hacks => true, + css: { + allow_comments: true, + allow_hacks: true, - :at_rules_with_properties => %w[ + at_rules_with_properties: %w[ bottom-center bottom-left bottom-left-corner @@ -61,7 +59,7 @@ module Config top-right-corner ], - :at_rules_with_styles => %w[ + at_rules_with_styles: %w[ -moz-keyframes -o-keyframes -webkit-keyframes @@ -71,9 +69,9 @@ module Config supports ], - :protocols => ['http', 'https', :relative], + protocols: ["http", "https", :relative], - :properties => %w[ + properties: %w[ -moz-appearance -moz-background-inline-policy -moz-box-sizing diff --git a/lib/sanitize/config/restricted.rb b/lib/sanitize/config/restricted.rb index 8dba67a..f4632c4 100644 --- a/lib/sanitize/config/restricted.rb +++ b/lib/sanitize/config/restricted.rb @@ -3,7 +3,7 @@ class Sanitize module Config RESTRICTED = freeze_config( - :elements => %w[b em i strong u] + elements: %w[b em i strong u] ) end end diff --git a/lib/sanitize/css.rb b/lib/sanitize/css.rb index fc886ef..aa52e68 100644 --- a/lib/sanitize/css.rb +++ b/lib/sanitize/css.rb @@ -1,331 +1,333 @@ # frozen_string_literal: true -require 'crass' -require 'set' - -class Sanitize; class CSS - attr_reader :config - - # -- Class Methods ----------------------------------------------------------- - - # Sanitizes inline CSS style properties. - # - # This is most useful for sanitizing non-stylesheet fragments of CSS like you - # would find in the `style` attribute of an HTML element. To sanitize a full - # CSS stylesheet, use {.stylesheet}. - # - # @example - # Sanitize::CSS.properties("background: url(foo.png); color: #fff;") - # - # @return [String] Sanitized CSS properties. - def self.properties(css, config = {}) - self.new(config).properties(css) - end +require "crass" +require "set" + +class Sanitize + class CSS + attr_reader :config + + # -- Class Methods --------------------------------------------------------- + + # Sanitizes inline CSS style properties. + # + # This is most useful for sanitizing non-stylesheet fragments of CSS like + # you would find in the `style` attribute of an HTML element. To sanitize a + # full CSS stylesheet, use {.stylesheet}. + # + # @example + # Sanitize::CSS.properties("background: url(foo.png); color: #fff;") + # + # @return [String] Sanitized CSS properties. + def self.properties(css, config = {}) + new(config).properties(css) + end - # Sanitizes a full CSS stylesheet. - # - # A stylesheet may include selectors, at-rules, and comments. To sanitize only - # inline style properties such as the contents of an HTML `style` attribute, - # use {.properties}. - # - # @example - # css = %[ - # .foo { - # background: url(foo.png); - # color: #fff; - # } - # - # #bar { - # font: 42pt 'Comic Sans MS'; - # } - # ] - # - # Sanitize::CSS.stylesheet(css, Sanitize::Config::RELAXED) - # - # @return [String] Sanitized CSS stylesheet. - def self.stylesheet(css, config = {}) - self.new(config).stylesheet(css) - end + # Sanitizes a full CSS stylesheet. + # + # A stylesheet may include selectors, at-rules, and comments. To sanitize + # only inline style properties such as the contents of an HTML `style` + # attribute, use {.properties}. + # + # @example + # css = %[ + # .foo { + # background: url(foo.png); + # color: #fff; + # } + # + # #bar { + # font: 42pt 'Comic Sans MS'; + # } + # ] + # + # Sanitize::CSS.stylesheet(css, Sanitize::Config::RELAXED) + # + # @return [String] Sanitized CSS stylesheet. + def self.stylesheet(css, config = {}) + new(config).stylesheet(css) + end - # Sanitizes the given Crass CSS parse tree and all its children, modifying it - # in place. - # - # @example - # css = %[ - # .foo { - # background: url(foo.png); - # color: #fff; - # } - # - # #bar { - # font: 42pt 'Comic Sans MS'; - # } - # ] - # - # tree = Crass.parse(css) - # Sanitize::CSS.tree!(tree, Sanitize::Config::RELAXED) - # - # @return [Array] Sanitized Crass CSS parse tree. - def self.tree!(tree, config = {}) - self.new(config).tree!(tree) - end + # Sanitizes the given Crass CSS parse tree and all its children, modifying + # it in place. + # + # @example + # css = %[ + # .foo { + # background: url(foo.png); + # color: #fff; + # } + # + # #bar { + # font: 42pt 'Comic Sans MS'; + # } + # ] + # + # tree = Crass.parse(css) + # Sanitize::CSS.tree!(tree, Sanitize::Config::RELAXED) + # + # @return [Array] Sanitized Crass CSS parse tree. + def self.tree!(tree, config = {}) + new(config).tree!(tree) + end - # -- Instance Methods -------------------------------------------------------- + # -- Instance Methods ------------------------------------------------------ - # Returns a new Sanitize::CSS object initialized with the settings in - # _config_. - def initialize(config = {}) - @config = Config.merge(Config::DEFAULT[:css], config[:css] || config) + # Returns a new Sanitize::CSS object initialized with the settings in + # _config_. + def initialize(config = {}) + @config = Config.merge(Config::DEFAULT[:css], config[:css] || config) - @at_rules = Set.new(@config[:at_rules]) - @at_rules_with_properties = Set.new(@config[:at_rules_with_properties]) - @at_rules_with_styles = Set.new(@config[:at_rules_with_styles]) - @import_url_validator = @config[:import_url_validator] - end + @at_rules = Set.new(@config[:at_rules]) + @at_rules_with_properties = Set.new(@config[:at_rules_with_properties]) + @at_rules_with_styles = Set.new(@config[:at_rules_with_styles]) + @import_url_validator = @config[:import_url_validator] + end - # Sanitizes inline CSS style properties. - # - # This is most useful for sanitizing non-stylesheet fragments of CSS like you - # would find in the `style` attribute of an HTML element. To sanitize a full - # CSS stylesheet, use {#stylesheet}. - # - # @example - # scss = Sanitize::CSS.new(Sanitize::Config::RELAXED) - # scss.properties("background: url(foo.png); color: #fff;") - # - # @return [String] Sanitized CSS properties. - def properties(css) - tree = Crass.parse_properties(css, - :preserve_comments => @config[:allow_comments], - :preserve_hacks => @config[:allow_hacks]) - - tree!(tree) - Crass::Parser.stringify(tree) - end + # Sanitizes inline CSS style properties. + # + # This is most useful for sanitizing non-stylesheet fragments of CSS like + # you would find in the `style` attribute of an HTML element. To sanitize a + # full CSS stylesheet, use {#stylesheet}. + # + # @example + # scss = Sanitize::CSS.new(Sanitize::Config::RELAXED) + # scss.properties("background: url(foo.png); color: #fff;") + # + # @return [String] Sanitized CSS properties. + def properties(css) + tree = Crass.parse_properties(css, + preserve_comments: @config[:allow_comments], + preserve_hacks: @config[:allow_hacks]) + + tree!(tree) + Crass::Parser.stringify(tree) + end - # Sanitizes a full CSS stylesheet. - # - # A stylesheet may include selectors, at-rules, and comments. To sanitize only - # inline style properties such as the contents of an HTML `style` attribute, - # use {#properties}. - # - # @example - # css = %[ - # .foo { - # background: url(foo.png); - # color: #fff; - # } - # - # #bar { - # font: 42pt 'Comic Sans MS'; - # } - # ] - # - # scss = Sanitize::CSS.new(Sanitize::Config::RELAXED) - # scss.stylesheet(css) - # - # @return [String] Sanitized CSS stylesheet. - def stylesheet(css) - tree = Crass.parse(css, - :preserve_comments => @config[:allow_comments], - :preserve_hacks => @config[:allow_hacks]) - - tree!(tree) - Crass::Parser.stringify(tree) - end + # Sanitizes a full CSS stylesheet. + # + # A stylesheet may include selectors, at-rules, and comments. To sanitize + # only inline style properties such as the contents of an HTML `style` + # attribute, use {#properties}. + # + # @example + # css = %[ + # .foo { + # background: url(foo.png); + # color: #fff; + # } + # + # #bar { + # font: 42pt 'Comic Sans MS'; + # } + # ] + # + # scss = Sanitize::CSS.new(Sanitize::Config::RELAXED) + # scss.stylesheet(css) + # + # @return [String] Sanitized CSS stylesheet. + def stylesheet(css) + tree = Crass.parse(css, + preserve_comments: @config[:allow_comments], + preserve_hacks: @config[:allow_hacks]) + + tree!(tree) + Crass::Parser.stringify(tree) + end + + # Sanitizes the given Crass CSS parse tree and all its children, modifying + # it in place. + # + # @example + # css = %[ + # .foo { + # background: url(foo.png); + # color: #fff; + # } + # + # #bar { + # font: 42pt 'Comic Sans MS'; + # } + # ] + # + # scss = Sanitize::CSS.new(Sanitize::Config::RELAXED) + # tree = Crass.parse(css) + # + # scss.tree!(tree) + # + # @return [Array] Sanitized Crass CSS parse tree. + def tree!(tree) + preceded_by_property = false + + tree.map! do |node| + next nil if node.nil? + + case node[:node] + when :at_rule + preceded_by_property = false + next at_rule!(node) + + when :comment + next node if @config[:allow_comments] + + when :property + prop = property!(node) + preceded_by_property = !prop.nil? + next prop + + when :semicolon + # Only preserve the semicolon if it was preceded by an allowlisted + # property. Otherwise, omit it in order to prevent redundant + # semicolons. + if preceded_by_property + preceded_by_property = false + next node + end - # Sanitizes the given Crass CSS parse tree and all its children, modifying it - # in place. - # - # @example - # css = %[ - # .foo { - # background: url(foo.png); - # color: #fff; - # } - # - # #bar { - # font: 42pt 'Comic Sans MS'; - # } - # ] - # - # scss = Sanitize::CSS.new(Sanitize::Config::RELAXED) - # tree = Crass.parse(css) - # - # scss.tree!(tree) - # - # @return [Array] Sanitized Crass CSS parse tree. - def tree!(tree) - preceded_by_property = false - - tree.map! do |node| - next nil if node.nil? - - case node[:node] - when :at_rule - preceded_by_property = false - next at_rule!(node) - - when :comment - next node if @config[:allow_comments] - - when :property - prop = property!(node) - preceded_by_property = !prop.nil? - next prop - - when :semicolon - # Only preserve the semicolon if it was preceded by an allowlisted - # property. Otherwise, omit it in order to prevent redundant semicolons. - if preceded_by_property + when :style_rule preceded_by_property = false + tree!(node[:children]) next node - end - when :style_rule - preceded_by_property = false - tree!(node[:children]) - next node + when :whitespace + next node + end - when :whitespace - next node + nil end - nil + tree end - tree - end + # -- Protected Instance Methods -------------------------------------------- + protected - # -- Protected Instance Methods ---------------------------------------------- - protected + # Sanitizes a CSS at-rule node. Returns the sanitized node, or `nil` if the + # current config doesn't allow this at-rule. + def at_rule!(rule) + name = rule[:name].downcase - # Sanitizes a CSS at-rule node. Returns the sanitized node, or `nil` if the - # current config doesn't allow this at-rule. - def at_rule!(rule) - name = rule[:name].downcase + if @at_rules_with_styles.include?(name) + styles = Crass::Parser.parse_rules(rule[:block], + preserve_comments: @config[:allow_comments], + preserve_hacks: @config[:allow_hacks]) - if @at_rules_with_styles.include?(name) - styles = Crass::Parser.parse_rules(rule[:block], - :preserve_comments => @config[:allow_comments], - :preserve_hacks => @config[:allow_hacks]) + rule[:block] = tree!(styles) - rule[:block] = tree!(styles) + elsif @at_rules_with_properties.include?(name) + props = Crass::Parser.parse_properties(rule[:block], + preserve_comments: @config[:allow_comments], + preserve_hacks: @config[:allow_hacks]) - elsif @at_rules_with_properties.include?(name) - props = Crass::Parser.parse_properties(rule[:block], - :preserve_comments => @config[:allow_comments], - :preserve_hacks => @config[:allow_hacks]) + rule[:block] = tree!(props) - rule[:block] = tree!(props) + elsif @at_rules.include?(name) + return nil if name == "import" && !import_url_allowed?(rule) + return nil if rule.has_key?(:block) + else + return nil + end - elsif @at_rules.include?(name) - return nil if name == "import" && !import_url_allowed?(rule) - return nil if rule.has_key?(:block) - else - return nil + rule end - rule - end + # Returns `true` if the given CSS function name is an image-related function + # that may contain image URLs that need to be validated. + def image_function?(name) + ["image", "image-set", "-webkit-image-set"].include?(name) + end - # Returns `true` if the given CSS function name is an image-related function - # that may contain image URLs that need to be validated. - def image_function?(name) - ['image', 'image-set', '-webkit-image-set'].include?(name) - end + # Passes the URL value of an @import rule to a block to ensure + # it's an allowed URL + def import_url_allowed?(rule) + return true unless @import_url_validator - # Passes the URL value of an @import rule to a block to ensure - # it's an allowed URL - def import_url_allowed?(rule) - return true unless @import_url_validator + url_token = rule[:tokens].detect { |t| t[:node] == :url || t[:node] == :string } - url_token = rule[:tokens].detect { |t| t[:node] == :url || t[:node] == :string } + # don't allow @imports with no URL value + return false unless url_token && (import_url = url_token[:value]) - # don't allow @imports with no URL value - return false unless url_token && (import_url = url_token[:value]) + @import_url_validator.call(import_url) + end - @import_url_validator.call(import_url) - end + # Sanitizes a CSS property node. Returns the sanitized node, or `nil` if the + # current config doesn't allow this property. + def property!(prop) + name = prop[:name].downcase - # Sanitizes a CSS property node. Returns the sanitized node, or `nil` if the - # current config doesn't allow this property. - def property!(prop) - name = prop[:name].downcase + # Preserve IE * and _ hacks if desired. + if @config[:allow_hacks] + name.slice!(0) if /\A[*_]/.match?(name) + end - # Preserve IE * and _ hacks if desired. - if @config[:allow_hacks] - name.slice!(0) if name =~ /\A[*_]/ - end + return nil unless @config[:properties].include?(name) - return nil unless @config[:properties].include?(name) + nodes = prop[:children].dup + combined_value = +"" - nodes = prop[:children].dup - combined_value = String.new + nodes.each do |child| + value = child[:value] - nodes.each do |child| - value = child[:value] + case child[:node] + when :ident + combined_value << value.downcase if String === value - case child[:node] - when :ident - combined_value << value.downcase if String === value + when :function + if child.key?(:name) + name = child[:name].downcase - when :function - if child.key?(:name) - name = child[:name].downcase + if name == "url" + return nil unless valid_url?(child) + end - if name == 'url' - return nil unless valid_url?(child) + if image_function?(name) + return nil unless valid_image?(child) + end + + combined_value << name + return nil if name == "expression" || combined_value == "expression" end - if image_function?(name) - return nil unless valid_image?(child) + if Array === value + nodes.concat(value) + elsif String === value + lowercase_value = value.downcase + combined_value << lowercase_value + return nil if lowercase_value == "expression" || combined_value == "expression" end - combined_value << name - return nil if name == 'expression' || combined_value == 'expression' - end + when :url + return nil unless valid_url?(child) - if Array === value - nodes.concat(value) - elsif String === value - lowercase_value = value.downcase - combined_value << lowercase_value - return nil if lowercase_value == 'expression' || combined_value == 'expression' + when :bad_url + return nil end - - when :url - return nil unless valid_url?(child) - - when :bad_url - return nil end - end - prop - end + prop + end - # Returns `true` if the given node (which may be of type `:url` or - # `:function`, since the CSS syntax can produce both) uses an allowlisted - # protocol. - def valid_url?(node) - type = node[:node] + # Returns `true` if the given node (which may be of type `:url` or + # `:function`, since the CSS syntax can produce both) uses an allowlisted + # protocol. + def valid_url?(node) + type = node[:node] - if type == :function - return false unless node.key?(:name) && node[:name].downcase == 'url' - return false unless Array === node[:value] + if type == :function + return false unless node.key?(:name) && node[:name].downcase == "url" + return false unless Array === node[:value] - # A URL function's `:value` should be an array containing no more than one - # `:string` node and any number of `:whitespace` nodes. - # - # If it contains more than one `:string` node, or if it contains any other - # nodes except `:whitespace` nodes, it's not valid. - url_string_node = nil + # A URL function's `:value` should be an array containing no more than + # one `:string` node and any number of `:whitespace` nodes. + # + # If it contains more than one `:string` node, or if it contains any + # other nodes except `:whitespace` nodes, it's not valid. + url_string_node = nil - node[:value].each do |token| - return false unless Hash === token + node[:value].each do |token| + return false unless Hash === token - case token[:node] + case token[:node] when :string return false unless url_string_node.nil? url_string_node = token @@ -335,47 +337,45 @@ def valid_url?(node) else return false + end end - end - return false if url_string_node.nil? - url = url_string_node[:value] - elsif type == :url - url = node[:value] - else - return false - end + return false if url_string_node.nil? + url = url_string_node[:value] + elsif type == :url + url = node[:value] + else + return false + end - if url =~ Sanitize::REGEX_PROTOCOL - return @config[:protocols].include?($1.downcase) - else - return @config[:protocols].include?(:relative) + if url =~ Sanitize::REGEX_PROTOCOL + @config[:protocols].include?($1.downcase) + else + @config[:protocols].include?(:relative) + end end - false - end - - # Returns `true` if the given node is an image-related function and contains - # only strings that use an allowlisted protocol. - def valid_image?(node) - return false unless node[:node] == :function - return false unless node.key?(:name) && image_function?(node[:name].downcase) - return false unless Array === node[:value] + # Returns `true` if the given node is an image-related function and contains + # only strings that use an allowlisted protocol. + def valid_image?(node) + return false unless node[:node] == :function + return false unless node.key?(:name) && image_function?(node[:name].downcase) + return false unless Array === node[:value] - node[:value].each do |token| + node[:value].each do |token| return false unless Hash === token case token[:node] - when :string - if token[:value] =~ Sanitize::REGEX_PROTOCOL - return false unless @config[:protocols].include?($1.downcase) - else - return false unless @config[:protocols].include?(:relative) - end + when :string + if token[:value] =~ Sanitize::REGEX_PROTOCOL + return false unless @config[:protocols].include?($1.downcase) else - next + return false unless @config[:protocols].include?(:relative) + end + else + next end end + end end - -end; end +end diff --git a/lib/sanitize/transformers/clean_cdata.rb b/lib/sanitize/transformers/clean_cdata.rb index c1bdb0f..5315de2 100644 --- a/lib/sanitize/transformers/clean_cdata.rb +++ b/lib/sanitize/transformers/clean_cdata.rb @@ -1,13 +1,13 @@ # frozen_string_literal: true -class Sanitize; module Transformers - - CleanCDATA = lambda do |env| - node = env[:node] - - if node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE - node.replace(Nokogiri::XML::Text.new(node.text, node.document)) +class Sanitize + module Transformers + CleanCDATA = lambda do |env| + node = env[:node] + + if node.type == Nokogiri::XML::Node::CDATA_SECTION_NODE + node.replace(Nokogiri::XML::Text.new(node.text, node.document)) + end end end - -end; end +end diff --git a/lib/sanitize/transformers/clean_comment.rb b/lib/sanitize/transformers/clean_comment.rb index 9fa2196..ac7c660 100644 --- a/lib/sanitize/transformers/clean_comment.rb +++ b/lib/sanitize/transformers/clean_comment.rb @@ -1,13 +1,13 @@ # frozen_string_literal: true -class Sanitize; module Transformers - - CleanComment = lambda do |env| - node = env[:node] - - if node.type == Nokogiri::XML::Node::COMMENT_NODE - node.unlink unless env[:is_allowlisted] +class Sanitize + module Transformers + CleanComment = lambda do |env| + node = env[:node] + + if node.type == Nokogiri::XML::Node::COMMENT_NODE + node.unlink unless env[:is_allowlisted] + end end end - -end; end +end diff --git a/lib/sanitize/transformers/clean_css.rb b/lib/sanitize/transformers/clean_css.rb index aaf280d..ed8d8aa 100644 --- a/lib/sanitize/transformers/clean_css.rb +++ b/lib/sanitize/transformers/clean_css.rb @@ -1,60 +1,62 @@ # frozen_string_literal: true -class Sanitize; module Transformers; module CSS - -# Enforces a CSS allowlist on the contents of `style` attributes. -class CleanAttribute - def initialize(sanitizer_or_config) - if Sanitize::CSS === sanitizer_or_config - @scss = sanitizer_or_config - else - @scss = Sanitize::CSS.new(sanitizer_or_config) - end - end - - def call(env) - node = env[:node] - - return unless node.type == Nokogiri::XML::Node::ELEMENT_NODE && - node.key?('style') && !env[:is_allowlisted] - - attr = node.attribute('style') - css = @scss.properties(attr.value) - - if css.strip.empty? - attr.unlink - else - attr.value = css +class Sanitize + module Transformers + module CSS + # Enforces a CSS allowlist on the contents of `style` attributes. + class CleanAttribute + def initialize(sanitizer_or_config) + @scss = if Sanitize::CSS === sanitizer_or_config + sanitizer_or_config + else + Sanitize::CSS.new(sanitizer_or_config) + end + end + + def call(env) + node = env[:node] + + return unless node.type == Nokogiri::XML::Node::ELEMENT_NODE && + node.key?("style") && !env[:is_allowlisted] + + attr = node.attribute("style") + css = @scss.properties(attr.value) + + if css.strip.empty? + attr.unlink + else + attr.value = css + end + end + end + + # Enforces a CSS allowlist on the contents of `')).must_equal '' + it "should remove the ")).must_equal "" end end diff --git a/test/test_clean_doctype.rb b/test/test_clean_doctype.rb index 92781c5..8ebb535 100644 --- a/test/test_clean_doctype.rb +++ b/test/test_clean_doctype.rb @@ -1,23 +1,23 @@ # frozen_string_literal: true -require_relative 'common' +require_relative "common" -describe 'Sanitize::Transformers::CleanDoctype' do +describe "Sanitize::Transformers::CleanDoctype" do make_my_diffs_pretty! parallelize_me! - describe 'when :allow_doctype is false' do + describe "when :allow_doctype is false" do before do - @s = Sanitize.new(:allow_doctype => false, :elements => ['html']) + @s = Sanitize.new(allow_doctype: false, elements: ["html"]) end - it 'should remove doctype declarations' do - _(@s.document('foo')).must_equal "foo" - _(@s.fragment('foo')).must_equal 'foo' + it "should remove doctype declarations" do + _(@s.document("foo")).must_equal "foo" + _(@s.fragment("foo")).must_equal "foo" end - it 'should not allow doctype definitions in fragments' do - _(@s.fragment('foo')) + it "should not allow doctype definitions in fragments" do + _(@s.fragment("foo")) .must_equal "foo" _(@s.fragment('foo')) @@ -28,13 +28,13 @@ end end - describe 'when :allow_doctype is true' do + describe "when :allow_doctype is true" do before do - @s = Sanitize.new(:allow_doctype => true, :elements => ['html']) + @s = Sanitize.new(allow_doctype: true, elements: ["html"]) end - it 'should allow doctype declarations in documents' do - _(@s.document('foo')) + it "should allow doctype declarations in documents" do + _(@s.document("foo")) .must_equal "foo" _(@s.document('foo')) @@ -44,22 +44,22 @@ .must_equal "foo" end - it 'should not allow obviously invalid doctype declarations in documents' do - _(@s.document('foo')) + it "should not allow obviously invalid doctype declarations in documents" do + _(@s.document("foo")) .must_equal "foo" - _(@s.document('foo')) + _(@s.document("foo")) .must_equal "foo" _(@s.document('foo')) .must_equal "foo" - _(@s.document('foo')) + _(@s.document("foo")) .must_equal "foo" end - it 'should not allow doctype definitions in fragments' do - _(@s.fragment('foo')) + it "should not allow doctype definitions in fragments" do + _(@s.fragment("foo")) .must_equal "foo" _(@s.fragment('foo')) diff --git a/test/test_clean_element.rb b/test/test_clean_element.rb index 34b80cf..39f15cd 100644 --- a/test/test_clean_element.rb +++ b/test/test_clean_element.rb @@ -1,246 +1,246 @@ # frozen_string_literal: true -require_relative 'common' +require_relative "common" -describe 'Sanitize::Transformers::CleanElement' do +describe "Sanitize::Transformers::CleanElement" do make_my_diffs_pretty! parallelize_me! strings = { - :basic => { - :html => 'Lorem ipsum dolor sit
amet ', - :default => 'Lorem ipsum dolor sit amet ', - :restricted => 'Lorem ipsum dolor sit amet ', - :basic => 'Lorem ipsum dolor sit
amet ', - :relaxed => 'Lorem ipsum dolor sit
amet ' + basic: { + html: 'Lorem ipsum dolor sit
amet ', + default: "Lorem ipsum dolor sit amet ", + restricted: "Lorem ipsum dolor sit amet ", + basic: 'Lorem ipsum dolor sit
amet ', + relaxed: 'Lorem ipsum dolor sit
amet ' }, - :malformed => { - :html => 'Lorem dolor sit
amet ', - :default => 'Lorem ipsum dolor sit amet <script>alert("hello world");', - :restricted => 'Lorem ipsum dolor sit amet <script>alert("hello world");', - :basic => 'Lorem ipsum dolor sit
amet <script>alert("hello world");', - :relaxed => 'Lorem ipsum dolor sit
amet <script>alert("hello world");' + malicious: { + html: 'Lorem ipsum dolor sit
amet <script>alert("hello world");', + default: 'Lorem ipsum dolor sit amet <script>alert("hello world");', + restricted: 'Lorem ipsum dolor sit amet <script>alert("hello world");', + basic: 'Lorem ipsum dolor sit
amet <script>alert("hello world");', + relaxed: 'Lorem ipsum dolor sit
amet <script>alert("hello world");' } } protocols = { - 'protocol-based JS injection: simple, no spaces' => { - :html => 'foo', - :default => 'foo', - :restricted => 'foo', - :basic => 'foo', - :relaxed => 'foo' + "protocol-based JS injection: simple, no spaces" => { + html: 'foo', + default: "foo", + restricted: "foo", + basic: 'foo', + relaxed: "foo" }, - 'protocol-based JS injection: simple, spaces before' => { - :html => 'foo', - :default => 'foo', - :restricted => 'foo', - :basic => 'foo', - :relaxed => 'foo' + "protocol-based JS injection: simple, spaces before" => { + html: 'foo', + default: "foo", + restricted: "foo", + basic: 'foo', + relaxed: "foo" }, - 'protocol-based JS injection: simple, spaces after' => { - :html => 'foo', - :default => 'foo', - :restricted => 'foo', - :basic => 'foo', - :relaxed => 'foo' + "protocol-based JS injection: simple, spaces after" => { + html: 'foo', + default: "foo", + restricted: "foo", + basic: 'foo', + relaxed: "foo" }, - 'protocol-based JS injection: simple, spaces before and after' => { - :html => 'foo', - :default => 'foo', - :restricted => 'foo', - :basic => 'foo', - :relaxed => 'foo' + "protocol-based JS injection: simple, spaces before and after" => { + html: 'foo', + default: "foo", + restricted: "foo", + basic: 'foo', + relaxed: "foo" }, - 'protocol-based JS injection: preceding colon' => { - :html => 'foo', - :default => 'foo', - :restricted => 'foo', - :basic => 'foo', - :relaxed => 'foo' + "protocol-based JS injection: preceding colon" => { + html: 'foo', + default: "foo", + restricted: "foo", + basic: 'foo', + relaxed: "foo" }, - 'protocol-based JS injection: UTF-8 encoding' => { - :html => 'foo', - :default => 'foo', - :restricted => 'foo', - :basic => 'foo', - :relaxed => 'foo' + "protocol-based JS injection: UTF-8 encoding" => { + html: 'foo', + default: "foo", + restricted: "foo", + basic: 'foo', + relaxed: "foo" }, - 'protocol-based JS injection: long UTF-8 encoding' => { - :html => 'foo', - :default => 'foo', - :restricted => 'foo', - :basic => 'foo', - :relaxed => 'foo' + "protocol-based JS injection: long UTF-8 encoding" => { + html: 'foo', + default: "foo", + restricted: "foo", + basic: 'foo', + relaxed: "foo" }, - 'protocol-based JS injection: long UTF-8 encoding without semicolons' => { - :html => 'foo', - :default => 'foo', - :restricted => 'foo', - :basic => 'foo', - :relaxed => 'foo' + "protocol-based JS injection: long UTF-8 encoding without semicolons" => { + html: "foo", + default: "foo", + restricted: "foo", + basic: 'foo', + relaxed: "foo" }, - 'protocol-based JS injection: hex encoding' => { - :html => 'foo', - :default => 'foo', - :restricted => 'foo', - :basic => 'foo', - :relaxed => 'foo' + "protocol-based JS injection: hex encoding" => { + html: 'foo', + default: "foo", + restricted: "foo", + basic: 'foo', + relaxed: "foo" }, - 'protocol-based JS injection: long hex encoding' => { - :html => 'foo', - :default => 'foo', - :restricted => 'foo', - :basic => 'foo', - :relaxed => 'foo' + "protocol-based JS injection: long hex encoding" => { + html: 'foo', + default: "foo", + restricted: "foo", + basic: 'foo', + relaxed: "foo" }, - 'protocol-based JS injection: hex encoding without semicolons' => { - :html => 'foo', - :default => 'foo', - :restricted => 'foo', - :basic => 'foo', - :relaxed => 'foo' + "protocol-based JS injection: hex encoding without semicolons" => { + html: "foo", + default: "foo", + restricted: "foo", + basic: 'foo', + relaxed: "foo" }, - 'protocol-based JS injection: null char' => { - :html => "", - :default => '', - :restricted => '', - :basic => '', - :relaxed => '' + "protocol-based JS injection: null char" => { + html: "", + default: "", + restricted: "", + basic: "", + relaxed: "" }, - 'protocol-based JS injection: invalid URL char' => { - :html => '', - :default => '', - :restricted => '', - :basic => '', - :relaxed => '' + "protocol-based JS injection: invalid URL char" => { + html: '', + default: "", + restricted: "", + basic: "", + relaxed: "" }, - 'protocol-based JS injection: spaces and entities' => { - :html => '', - :default => '', - :restricted => '', - :basic => '', - :relaxed => '' + "protocol-based JS injection: spaces and entities" => { + html: '', + default: "", + restricted: "", + basic: "", + relaxed: "" }, - 'protocol whitespace' => { - :html => '', - :default => '', - :restricted => '', - :basic => '', - :relaxed => '' + "protocol whitespace" => { + html: '', + default: "", + restricted: "", + basic: '', + relaxed: '' } } - describe 'Default config' do - it 'should remove non-allowlisted elements, leaving safe contents behind' do + describe "Default config" do + it "should remove non-allowlisted elements, leaving safe contents behind" do _(Sanitize.fragment('foo bar baz quux')) - .must_equal 'foo bar baz quux' + .must_equal "foo bar baz quux" _(Sanitize.fragment('')) - .must_equal '' + .must_equal "" _(Sanitize.fragment('<')) .must_equal '< script <>> alert("");' end - it 'should surround the contents of :whitespace_elements with space characters when removing the element' do - _(Sanitize.fragment('foo
bar
baz')) - .must_equal 'foo bar baz' + it "should surround the contents of :whitespace_elements with space characters when removing the element" do + _(Sanitize.fragment("foo
bar
baz")) + .must_equal "foo bar baz" - _(Sanitize.fragment('foo
bar
baz')) - .must_equal 'foo bar baz' + _(Sanitize.fragment("foo
bar
baz")) + .must_equal "foo bar baz" - _(Sanitize.fragment('foo
bar
baz')) - .must_equal 'foo bar baz' + _(Sanitize.fragment("foo
bar
baz")) + .must_equal "foo bar baz" end - it 'should not choke on several instances of the same element in a row' do + it "should not choke on several instances of the same element in a row" do _(Sanitize.fragment('')) - .must_equal '' + .must_equal "" end - it 'should not preserve the content of removed `iframe` elements' do - _(Sanitize.fragment('')) - .must_equal '' + it "should not preserve the content of removed `iframe` elements" do + _(Sanitize.fragment("")) + .must_equal "" end - it 'should not preserve the content of removed `math` elements' do - _(Sanitize.fragment('hello! ')) - .must_equal '' + it "should not preserve the content of removed `math` elements" do + _(Sanitize.fragment("hello! ")) + .must_equal "" end - it 'should not preserve the content of removed `noembed` elements' do - _(Sanitize.fragment('hello! <script>alert(0)</script>')) - .must_equal '' + it "should not preserve the content of removed `noembed` elements" do + _(Sanitize.fragment("hello! <script>alert(0)</script>")) + .must_equal "" end - it 'should not preserve the content of removed `noframes` elements' do - _(Sanitize.fragment('hello! <script>alert(0)</script>')) - .must_equal '' + it "should not preserve the content of removed `noframes` elements" do + _(Sanitize.fragment("hello! <script>alert(0)</script>")) + .must_equal "" end - it 'should not preserve the content of removed `noscript` elements' do - _(Sanitize.fragment('')) - .must_equal '' + it "should not preserve the content of removed `noscript` elements" do + _(Sanitize.fragment("")) + .must_equal "" end - it 'should not preserve the content of removed `plaintext` elements' do - _(Sanitize.fragment('hello! <script>alert(0)</script>')) - .must_equal '' + it "should not preserve the content of removed `plaintext` elements" do + _(Sanitize.fragment("<plaintext>hello! <script>alert(0)</script>")) + .must_equal "" end - it 'should not preserve the content of removed `script` elements' do - _(Sanitize.fragment('<script>hello! <script>alert(0)</script></script>')) - .must_equal '' + it "should not preserve the content of removed `script` elements" do + _(Sanitize.fragment("<script>hello! <script>alert(0)</script></script>")) + .must_equal "" end - it 'should not preserve the content of removed `style` elements' do - _(Sanitize.fragment('<style>hello! <script>alert(0)</script></style>')) - .must_equal '' + it "should not preserve the content of removed `style` elements" do + _(Sanitize.fragment("<style>hello! <script>alert(0)</script></style>")) + .must_equal "" end - it 'should not preserve the content of removed `svg` elements' do - _(Sanitize.fragment('<svg>hello! <script>alert(0)</script></svg>')) - .must_equal '' + it "should not preserve the content of removed `svg` elements" do + _(Sanitize.fragment("<svg>hello! <script>alert(0)</script></svg>")) + .must_equal "" end - it 'should not preserve the content of removed `xmp` elements' do - _(Sanitize.fragment('<xmp>hello! <script>alert(0)</script></xmp>')) - .must_equal '' + it "should not preserve the content of removed `xmp` elements" do + _(Sanitize.fragment("<xmp>hello! <script>alert(0)</script></xmp>")) + .must_equal "" end strings.each do |name, data| @@ -256,7 +256,7 @@ end end - describe 'Restricted config' do + describe "Restricted config" do before do @s = Sanitize.new(Sanitize::Config::RESTRICTED) end @@ -274,17 +274,17 @@ end end - describe 'Basic config' do + describe "Basic config" do before do @s = Sanitize.new(Sanitize::Config::BASIC) end - it 'should not choke on valueless attributes' do - _(@s.fragment('foo <a href>foo</a> bar')) + it "should not choke on valueless attributes" do + _(@s.fragment("foo <a href>foo</a> bar")) .must_equal 'foo <a href="" rel="nofollow">foo</a> bar' end - it 'should downcase attribute names' do + it "should downcase attribute names" do _(@s.fragment('<a HREF="javascript:alert(\'foo\')">bar</a>')) .must_equal '<a rel="nofollow">bar</a>' end @@ -302,12 +302,12 @@ end end - describe 'Relaxed config' do + describe "Relaxed config" do before do @s = Sanitize.new(Sanitize::Config::RELAXED) end - it 'should encode special chars in attribute values' do + it "should encode special chars in attribute values" do _(@s.fragment('<a href="http://example.com" title="<b>&eacute;xamples</b> & things">foo</a>')) .must_equal '<a href="http://example.com" title="<b>éxamples</b> &amp; things">foo</a>' end @@ -325,25 +325,25 @@ end end - describe 'Custom configs' do - it 'should allow attributes on all elements if allowlisted under :all' do + describe "Custom configs" do + it "should allow attributes on all elements if allowlisted under :all" do input = '<p class="foo">bar</p>' - _(Sanitize.fragment(input)).must_equal ' bar ' + _(Sanitize.fragment(input)).must_equal " bar " _(Sanitize.fragment(input, { - :elements => ['p'], - :attributes => {:all => ['class']} + elements: ["p"], + attributes: {all: ["class"]} })).must_equal input _(Sanitize.fragment(input, { - :elements => ['p'], - :attributes => {'div' => ['class']} - })).must_equal '<p>bar</p>' + elements: ["p"], + attributes: {"div" => ["class"]} + })).must_equal "<p>bar</p>" _(Sanitize.fragment(input, { - :elements => ['p'], - :attributes => {'p' => ['title'], :all => ['class']} + elements: ["p"], + attributes: {"p" => ["title"], :all => ["class"]} })).must_equal input end @@ -351,203 +351,187 @@ input = '<a href="/foo/bar">Link</a>' _(Sanitize.fragment(input, - :elements => ['a'], - :attributes => {'a' => ['href']}, - :protocols => {'a' => {'href' => ['http']}} - )).must_equal '<a>Link</a>' + elements: ["a"], + attributes: {"a" => ["href"]}, + protocols: {"a" => {"href" => ["http"]}})).must_equal "<a>Link</a>" end - it 'should allow relative URLs containing colons when the colon is not in the first path segment' do + it "should allow relative URLs containing colons when the colon is not in the first path segment" do input = '<a href="/wiki/Special:Random">Random Page</a>' _(Sanitize.fragment(input, { - :elements => ['a'], - :attributes => {'a' => ['href']}, - :protocols => {'a' => {'href' => [:relative]}} + elements: ["a"], + attributes: {"a" => ["href"]}, + protocols: {"a" => {"href" => [:relative]}} })).must_equal input end - it 'should allow relative URLs containing colons when the colon is part of an anchor' do + it "should allow relative URLs containing colons when the colon is part of an anchor" do input = '<a href="#fn:1">Footnote 1</a>' _(Sanitize.fragment(input, { - :elements => ['a'], - :attributes => {'a' => ['href']}, - :protocols => {'a' => {'href' => [:relative]}} + elements: ["a"], + attributes: {"a" => ["href"]}, + protocols: {"a" => {"href" => [:relative]}} })).must_equal input input = '<a href="somepage#fn:1">Footnote 1</a>' _(Sanitize.fragment(input, { - :elements => ['a'], - :attributes => {'a' => ['href']}, - :protocols => {'a' => {'href' => [:relative]}} + elements: ["a"], + attributes: {"a" => ["href"]}, + protocols: {"a" => {"href" => [:relative]}} })).must_equal input end - it 'should remove the contents of filtered nodes when :remove_contents is true' do - _(Sanitize.fragment('foo bar <div>baz<span>quux</span></div>', - :remove_contents => true - )).must_equal 'foo bar ' + it "should remove the contents of filtered nodes when :remove_contents is true" do + _(Sanitize.fragment("foo bar <div>baz<span>quux</span></div>", + remove_contents: true)).must_equal "foo bar " end - it 'should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as strings' do + it "should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as strings" do _(Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>', - :remove_contents => ['script', 'span'] - )).must_equal 'foo bar baz hi ' + remove_contents: ["script", "span"])).must_equal "foo bar baz hi " _(Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>', - :remove_contents => Set.new(['script', 'span']) - )).must_equal 'foo bar baz hi ' + remove_contents: Set.new(["script", "span"]))).must_equal "foo bar baz hi " end - it 'should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as symbols' do + it "should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as symbols" do _(Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>', - :remove_contents => [:script, :span] - )).must_equal 'foo bar baz hi ' + remove_contents: [:script, :span])).must_equal "foo bar baz hi " _(Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>', - :remove_contents => Set.new([:script, :span]) - )).must_equal 'foo bar baz hi ' + remove_contents: Set.new([:script, :span]))).must_equal "foo bar baz hi " end - it 'should remove the contents of allowlisted iframes' do - _(Sanitize.fragment('<iframe>hi <script>hello</script></iframe>', - :elements => ['iframe'] - )).must_equal '<iframe></iframe>' + it "should remove the contents of allowlisted iframes" do + _(Sanitize.fragment("<iframe>hi <script>hello</script></iframe>", + elements: ["iframe"])).must_equal "<iframe></iframe>" end - it 'should not allow arbitrary HTML5 data attributes by default' do + it "should not allow arbitrary HTML5 data attributes by default" do _(Sanitize.fragment('<b data-foo="bar"></b>', - :elements => ['b'] - )).must_equal '<b></b>' + elements: ["b"])).must_equal "<b></b>" _(Sanitize.fragment('<b class="foo" data-foo="bar"></b>', - :attributes => {'b' => ['class']}, - :elements => ['b'] - )).must_equal '<b class="foo"></b>' + attributes: {"b" => ["class"]}, + elements: ["b"])).must_equal '<b class="foo"></b>' end - it 'should allow arbitrary HTML5 data attributes when the :attributes config includes :data' do + it "should allow arbitrary HTML5 data attributes when the :attributes config includes :data" do s = Sanitize.new( - :attributes => {'b' => [:data]}, - :elements => ['b'] + attributes: {"b" => [:data]}, + elements: ["b"] ) _(s.fragment('<b data-foo="valid" data-bar="valid"></b>')) .must_equal '<b data-foo="valid" data-bar="valid"></b>' _(s.fragment('<b data-="invalid"></b>')) - .must_equal '<b></b>' + .must_equal "<b></b>" _(s.fragment('<b data-="invalid"></b>')) - .must_equal '<b></b>' + .must_equal "<b></b>" _(s.fragment('<b data-xml="invalid"></b>')) - .must_equal '<b></b>' + .must_equal "<b></b>" _(s.fragment('<b data-xmlfoo="invalid"></b>')) - .must_equal '<b></b>' + .must_equal "<b></b>" _(s.fragment('<b data-f:oo="valid"></b>')) - .must_equal '<b></b>' + .must_equal "<b></b>" _(s.fragment('<b data-f/oo="partial"></b>')) .must_equal '<b data-f=""></b>' # Nokogiri quirk; not ideal, but harmless _(s.fragment('<b data-éfoo="valid"></b>')) - .must_equal '<b></b>' # Another annoying Nokogiri quirk. + .must_equal "<b></b>" # Another annoying Nokogiri quirk. end - it 'should replace whitespace_elements with configured :before and :after values' do + it "should replace whitespace_elements with configured :before and :after values" do s = Sanitize.new( - :whitespace_elements => { - 'p' => { :before => "\n", :after => "\n" }, - 'div' => { :before => "\n", :after => "\n" }, - 'br' => { :before => "\n", :after => "\n" }, + whitespace_elements: { + "p" => {before: "\n", after: "\n"}, + "div" => {before: "\n", after: "\n"}, + "br" => {before: "\n", after: "\n"} } ) - _(s.fragment('<p>foo</p>')).must_equal "\nfoo\n" - _(s.fragment('<p>foo</p><p>bar</p>')).must_equal "\nfoo\n\nbar\n" - _(s.fragment('foo<div>bar</div>baz')).must_equal "foo\nbar\nbaz" - _(s.fragment('foo<br>bar<br>baz')).must_equal "foo\nbar\nbaz" + _(s.fragment("<p>foo</p>")).must_equal "\nfoo\n" + _(s.fragment("<p>foo</p><p>bar</p>")).must_equal "\nfoo\n\nbar\n" + _(s.fragment("foo<div>bar</div>baz")).must_equal "foo\nbar\nbaz" + _(s.fragment("foo<br>bar<br>baz")).must_equal "foo\nbar\nbaz" end - it 'should handle protocols correctly regardless of case' do + it "should handle protocols correctly regardless of case" do input = '<a href="hTTpS://foo.com/">Text</a>' _(Sanitize.fragment(input, { - :elements => ['a'], - :attributes => {'a' => ['href']}, - :protocols => {'a' => {'href' => ['https']}} + elements: ["a"], + attributes: {"a" => ["href"]}, + protocols: {"a" => {"href" => ["https"]}} })).must_equal input input = '<a href="mailto:someone@example.com?Subject=Hello">Text</a>' _(Sanitize.fragment(input, { - :elements => ['a'], - :attributes => {'a' => ['href']}, - :protocols => {'a' => {'href' => ['https']}} + elements: ["a"], + attributes: {"a" => ["href"]}, + protocols: {"a" => {"href" => ["https"]}} })).must_equal "<a>Text</a>" end - it 'should sanitize protocols in data attributes even if data attributes are generically allowed' do + it "should sanitize protocols in data attributes even if data attributes are generically allowed" do input = '<a data-url="mailto:someone@example.com">Text</a>' _(Sanitize.fragment(input, { - :elements => ['a'], - :attributes => {'a' => [:data]}, - :protocols => {'a' => {'data-url' => ['https']}} + elements: ["a"], + attributes: {"a" => [:data]}, + protocols: {"a" => {"data-url" => ["https"]}} })).must_equal "<a>Text</a>" _(Sanitize.fragment(input, { - :elements => ['a'], - :attributes => {'a' => [:data]}, - :protocols => {'a' => {'data-url' => ['mailto']}} + elements: ["a"], + attributes: {"a" => [:data]}, + protocols: {"a" => {"data-url" => ["mailto"]}} })).must_equal input end - it 'should prevent `<meta>` tags from being used to set a non-UTF-8 charset' do + it "should prevent `<meta>` tags from being used to set a non-UTF-8 charset" do _(Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>', - :elements => %w[html head meta body], - :attributes => {'meta' => ['charset']} - )).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>" + elements: %w[html head meta body], + attributes: {"meta" => ["charset"]})).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>" _(Sanitize.document('<html><meta charset="utf-8">Howdy!</html>', - :elements => %w[html meta], - :attributes => {'meta' => ['charset']} - )).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>" + elements: %w[html meta], + attributes: {"meta" => ["charset"]})).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>" _(Sanitize.document('<html><meta charset="us-ascii">Howdy!</html>', - :elements => %w[html meta], - :attributes => {'meta' => ['charset']} - )).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>" + elements: %w[html meta], + attributes: {"meta" => ["charset"]})).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>" _(Sanitize.document('<html><meta http-equiv="content-type" content=" text/html; charset=us-ascii">Howdy!</html>', - :elements => %w[html meta], - :attributes => {'meta' => %w[content http-equiv]} - )).must_equal "<html><meta http-equiv=\"content-type\" content=\" text/html;charset=utf-8\">Howdy!</html>" + elements: %w[html meta], + attributes: {"meta" => %w[content http-equiv]})).must_equal "<html><meta http-equiv=\"content-type\" content=\" text/html;charset=utf-8\">Howdy!</html>" _(Sanitize.document('<html><meta http-equiv="Content-Type" content="text/plain;charset = us-ascii">Howdy!</html>', - :elements => %w[html meta], - :attributes => {'meta' => %w[content http-equiv]} - )).must_equal "<html><meta http-equiv=\"Content-Type\" content=\"text/plain;charset=utf-8\">Howdy!</html>" + elements: %w[html meta], + attributes: {"meta" => %w[content http-equiv]})).must_equal "<html><meta http-equiv=\"Content-Type\" content=\"text/plain;charset=utf-8\">Howdy!</html>" end - it 'should not modify `<meta>` tags that already set a UTF-8 charset' do + it "should not modify `<meta>` tags that already set a UTF-8 charset" do _(Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>', - :elements => %w[html head meta body], - :attributes => {'meta' => %w[content http-equiv]} - )).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>" + elements: %w[html head meta body], + attributes: {"meta" => %w[content http-equiv]})).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>" end - it 'always removes `<noscript>` elements even if `noscript` is in the allowlist' do + it "always removes `<noscript>` elements even if `noscript` is in the allowlist" do assert_equal( - '', - Sanitize.fragment('<noscript>foo</noscript>', elements: ['noscript']) + "", + Sanitize.fragment("<noscript>foo</noscript>", elements: ["noscript"]) ) end - end end diff --git a/test/test_config.rb b/test/test_config.rb index 2c50684..7a2ec5d 100644 --- a/test/test_config.rb +++ b/test/test_config.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true -require_relative 'common' +require_relative "common" -describe 'Config' do +describe "Config" do make_my_diffs_pretty! parallelize_me! @@ -10,22 +10,22 @@ def verify_deeply_frozen(config) _(config).must_be :frozen? if Hash === config - config.each_value {|v| verify_deeply_frozen(v) } + config.each_value { |v| verify_deeply_frozen(v) } elsif Set === config || Array === config - config.each {|v| verify_deeply_frozen(v) } + config.each { |v| verify_deeply_frozen(v) } end end - it 'built-in configs should be deeply frozen' do + it "built-in configs should be deeply frozen" do verify_deeply_frozen Sanitize::Config::DEFAULT verify_deeply_frozen Sanitize::Config::BASIC verify_deeply_frozen Sanitize::Config::RELAXED verify_deeply_frozen Sanitize::Config::RESTRICTED end - describe '.freeze_config' do - it 'should deeply freeze and return a configuration Hash' do - a = {:one => {:one_one => [0, '1', :a], :one_two => false, :one_three => Set.new([:a, :b, :c])}} + describe ".freeze_config" do + it "should deeply freeze and return a configuration Hash" do + a = {one: {one_one: [0, "1", :a], one_two: false, one_three: Set.new([:a, :b, :c])}} b = Sanitize::Config.freeze_config(a) _(b).must_be_same_as a @@ -33,11 +33,11 @@ def verify_deeply_frozen(config) end end - describe '.merge' do - it 'should deeply merge a configuration Hash' do + describe ".merge" do + it "should deeply merge a configuration Hash" do # Freeze to ensure that we get an error if either Hash is modified. - a = Sanitize::Config.freeze_config({:one => {:one_one => [0, '1', :a], :one_two => false, :one_three => Set.new([:a, :b, :c])}}) - b = Sanitize::Config.freeze_config({:one => {:one_two => true, :one_three => 3}, :two => 2}) + a = Sanitize::Config.freeze_config({one: {one_one: [0, "1", :a], one_two: false, one_three: Set.new([:a, :b, :c])}}) + b = Sanitize::Config.freeze_config({one: {one_two: true, one_three: 3}, two: 2}) c = Sanitize::Config.merge(a, b) @@ -45,22 +45,22 @@ def verify_deeply_frozen(config) _(c).wont_be_same_as b _(c).must_equal( - :one => { - :one_one => [0, '1', :a], - :one_two => true, - :one_three => 3 + one: { + one_one: [0, "1", :a], + one_two: true, + one_three: 3 }, - :two => 2 + two: 2 ) _(c[:one]).wont_be_same_as a[:one] _(c[:one][:one_one]).wont_be_same_as a[:one][:one_one] end - it 'should raise an ArgumentError if either argument is not a Hash' do - _(proc { Sanitize::Config.merge('foo', {}) }).must_raise ArgumentError - _(proc { Sanitize::Config.merge({}, 'foo') }).must_raise ArgumentError + it "should raise an ArgumentError if either argument is not a Hash" do + _(proc { Sanitize::Config.merge("foo", {}) }).must_raise ArgumentError + _(proc { Sanitize::Config.merge({}, "foo") }).must_raise ArgumentError end end end diff --git a/test/test_malicious_css.rb b/test/test_malicious_css.rb index a79cbfd..d9507e6 100644 --- a/test/test_malicious_css.rb +++ b/test/test_malicious_css.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative 'common' +require_relative "common" # Miscellaneous attempts to sneak maliciously crafted CSS past Sanitize. Some of # these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat @@ -8,7 +8,7 @@ # # https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet -describe 'Malicious CSS' do +describe "Malicious CSS" do make_my_diffs_pretty! parallelize_me! @@ -16,37 +16,37 @@ @s = Sanitize::CSS.new(Sanitize::Config::RELAXED) end - it 'should not be possible to inject an expression by munging it with a comment' do - _(@s.properties(%[width:expr/*XSS*/ession(alert('XSS'))])). - must_equal '' + it "should not be possible to inject an expression by munging it with a comment" do + _(@s.properties(%[width:expr/*XSS*/ession(alert('XSS'))])) + .must_equal "" - _(@s.properties(%[width:ex/*XSS*//*/*/pression(alert("XSS"))])). - must_equal '' + _(@s.properties(%[width:ex/*XSS*//*/*/pression(alert("XSS"))])) + .must_equal "" end - it 'should not be possible to inject an expression by munging it with a newline' do - _(@s.properties(%[width:\nexpression(alert('XSS'));])). - must_equal '' + it "should not be possible to inject an expression by munging it with a newline" do + _(@s.properties(%[width:\nexpression(alert('XSS'));])) + .must_equal "" end - it 'should not allow the javascript protocol' do - _(@s.properties(%[background-image:url("javascript:alert('XSS')");])). - must_equal '' + it "should not allow the javascript protocol" do + _(@s.properties(%[background-image:url("javascript:alert('XSS')");])) + .must_equal "" _(Sanitize.fragment(%[<div style="background-image: url(&#1;javascript:alert('XSS'))">], - Sanitize::Config::RELAXED)).must_equal '<div></div>' + Sanitize::Config::RELAXED)).must_equal "<div></div>" end - it 'should not allow behaviors' do - _(@s.properties(%[behavior: url(xss.htc);])).must_equal '' + it "should not allow behaviors" do + _(@s.properties(%[behavior: url(xss.htc);])).must_equal "" end - describe 'sanitization bypass via CSS at-rule in HTML <style> element' do + describe "sanitization bypass via CSS at-rule in HTML <style> element" do before do @s = Sanitize.new(Sanitize::Config::RELAXED) end - it 'is not possible to prematurely end a <style> element' do + it "is not possible to prematurely end a <style> element" do assert_equal( %[<style>@media<\\/style><iframe srcdoc='<script>alert(document.domain)<\\/script>'>{}</style>], @s.fragment(%[<style>@media</sty/**/le><iframe srcdoc='<script>alert(document.domain)</script>'></style>]) diff --git a/test/test_malicious_html.rb b/test/test_malicious_html.rb index d53c7fd..36486be 100644 --- a/test/test_malicious_html.rb +++ b/test/test_malicious_html.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative 'common' +require_relative "common" # Miscellaneous attempts to sneak maliciously crafted HTML past Sanitize. Many # of these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat @@ -8,7 +8,7 @@ # # https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet -describe 'Malicious HTML' do +describe "Malicious HTML" do make_my_diffs_pretty! parallelize_me! @@ -16,114 +16,114 @@ @s = Sanitize.new(Sanitize::Config::RELAXED) end - describe 'comments' do - it 'should not allow script injection via conditional comments' do - _(@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->])). - must_equal '' + describe "comments" do + it "should not allow script injection via conditional comments" do + _(@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->])) + .must_equal "" end end - describe 'interpolation (ERB, PHP, etc.)' do - it 'should escape ERB-style tags' do - _(@s.fragment('<% naughty_ruby_code %>')). - must_equal '&lt;% naughty_ruby_code %&gt;' + describe "interpolation (ERB, PHP, etc.)" do + it "should escape ERB-style tags" do + _(@s.fragment("<% naughty_ruby_code %>")) + .must_equal "&lt;% naughty_ruby_code %&gt;" - _(@s.fragment('<%= naughty_ruby_code %>')). - must_equal '&lt;%= naughty_ruby_code %&gt;' + _(@s.fragment("<%= naughty_ruby_code %>")) + .must_equal "&lt;%= naughty_ruby_code %&gt;" end - it 'should remove PHP-style tags' do - _(@s.fragment('<? naughtyPHPCode(); ?>')). - must_equal '' + it "should remove PHP-style tags" do + _(@s.fragment("<? naughtyPHPCode(); ?>")) + .must_equal "" - _(@s.fragment('<?= naughtyPHPCode(); ?>')). - must_equal '' + _(@s.fragment("<?= naughtyPHPCode(); ?>")) + .must_equal "" end end - describe '<body>' do - it 'should not be possible to inject JS via a malformed event attribute' do - _(@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>')). - must_equal "<html><head></head><body></body></html>" + describe "<body>" do + it "should not be possible to inject JS via a malformed event attribute" do + _(@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>')) + .must_equal "<html><head></head><body></body></html>" end end - describe '<iframe>' do - it 'should not be possible to inject an iframe using an improperly closed tag' do - _(@s.fragment(%[<iframe src=http://ha.ckers.org/scriptlet.html <])). - must_equal '' + describe "<iframe>" do + it "should not be possible to inject an iframe using an improperly closed tag" do + _(@s.fragment(%(<iframe src=http://ha.ckers.org/scriptlet.html <))) + .must_equal "" end end - describe '<img>' do - it 'should not be possible to inject JS via an unquoted <img> src attribute' do - _(@s.fragment("<img src=javascript:alert('XSS')>")).must_equal '<img>' + describe "<img>" do + it "should not be possible to inject JS via an unquoted <img> src attribute" do + _(@s.fragment("<img src=javascript:alert('XSS')>")).must_equal "<img>" end - it 'should not be possible to inject JS using grave accents as <img> src delimiters' do - _(@s.fragment("<img src=`javascript:alert('XSS')`>")).must_equal '<img>' + it "should not be possible to inject JS using grave accents as <img> src delimiters" do + _(@s.fragment("<img src=`javascript:alert('XSS')`>")).must_equal "<img>" end - it 'should not be possible to inject <script> via a malformed <img> tag' do - _(@s.fragment('<img """><script>alert("XSS")</script>">')). - must_equal '<img>"&gt;' + it "should not be possible to inject <script> via a malformed <img> tag" do + _(@s.fragment('<img """><script>alert("XSS")</script>">')) + .must_equal '<img>"&gt;' end - it 'should not be possible to inject protocol-based JS' do - _(@s.fragment('<img src=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>')). - must_equal '<img>' + it "should not be possible to inject protocol-based JS" do + _(@s.fragment("<img src=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>")) + .must_equal "<img>" - _(@s.fragment('<img src=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>')). - must_equal '<img>' + _(@s.fragment("<img src=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>")) + .must_equal "<img>" - _(@s.fragment('<img src=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>')). - must_equal '<img>' + _(@s.fragment("<img src=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>")) + .must_equal "<img>" # Encoded tab character. - _(@s.fragment(%[<img src="jav&#x09;ascript:alert('XSS');">])). - must_equal '<img>' + _(@s.fragment(%[<img src="jav&#x09;ascript:alert('XSS');">])) + .must_equal "<img>" # Encoded newline. - _(@s.fragment(%[<img src="jav&#x0A;ascript:alert('XSS');">])). - must_equal '<img>' + _(@s.fragment(%[<img src="jav&#x0A;ascript:alert('XSS');">])) + .must_equal "<img>" # Encoded carriage return. - _(@s.fragment(%[<img src="jav&#x0D;ascript:alert('XSS');">])). - must_equal '<img>' + _(@s.fragment(%[<img src="jav&#x0D;ascript:alert('XSS');">])) + .must_equal "<img>" # Null byte. - _(@s.fragment(%[<img src=java\0script:alert("XSS")>])). - must_equal '<img>' + _(@s.fragment(%[<img src=java\0script:alert("XSS")>])) + .must_equal "<img>" # Spaces plus meta char. - _(@s.fragment(%[<img src=" &#14; javascript:alert('XSS');">])). - must_equal '<img>' + _(@s.fragment(%[<img src=" &#14; javascript:alert('XSS');">])) + .must_equal "<img>" # Mixed spaces and tabs. - _(@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">])). - must_equal '<img>' + _(@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">])) + .must_equal "<img>" end - it 'should not be possible to inject protocol-based JS via whitespace' do - _(@s.fragment(%[<img src="jav\tascript:alert('XSS');">])). - must_equal '<img>' + it "should not be possible to inject protocol-based JS via whitespace" do + _(@s.fragment(%[<img src="jav\tascript:alert('XSS');">])) + .must_equal "<img>" end - it 'should not be possible to inject JS using a half-open <img> tag' do - _(@s.fragment(%[<img src="javascript:alert('XSS')"])). - must_equal '' + it "should not be possible to inject JS using a half-open <img> tag" do + _(@s.fragment(%[<img src="javascript:alert('XSS')"])) + .must_equal "" end end - describe '<script>' do - it 'should not be possible to inject <script> using a malformed non-alphanumeric tag name' do - _(@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>])). - must_equal '' + describe "<script>" do + it "should not be possible to inject <script> using a malformed non-alphanumeric tag name" do + _(@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>])) + .must_equal "" end - it 'should not be possible to inject <script> via extraneous open brackets' do - _(@s.fragment(%[<<script>alert("XSS");//<</script>])). - must_equal '&lt;' + it "should not be possible to inject <script> via extraneous open brackets" do + _(@s.fragment(%[<<script>alert("XSS");//<</script>])) + .must_equal "&lt;" end end @@ -135,29 +135,29 @@ # # The relevant libxml2 code is here: # <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588> - describe 'unsafe libxml2 server-side includes in attributes' do + describe "unsafe libxml2 server-side includes in attributes" do using_unpatched_libxml2 = Nokogiri::VersionInfo.instance.libxml2_using_system? tag_configs = [ { - tag_name: 'a', - escaped_attrs: %w[ action href src name ], + tag_name: "a", + escaped_attrs: %w[action href src name], unescaped_attrs: [] }, { - tag_name: 'div', - escaped_attrs: %w[ action href src ], - unescaped_attrs: %w[ name ] + tag_name: "div", + escaped_attrs: %w[action href src], + unescaped_attrs: %w[name] } ] before do @s = Sanitize.new({ - elements: %w[ a div ], + elements: %w[a div], attributes: { - all: %w[ action href src name ] + all: %w[action href src name] } }) end @@ -168,13 +168,13 @@ tag_config[:escaped_attrs].each do |attr_name| input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>] - it 'should escape unsafe characters in attributes' do + it "should escape unsafe characters in attributes" do skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2 # This uses Nokogumbo's HTML-compliant serializer rather than # libxml2's. - _(@s.fragment(input)). - must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>]) + _(@s.fragment(input)) + .must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>]) # This uses the not-quite-standards-compliant libxml2 serializer via # Nokogiri, so the output may be a little different as of Nokogiri @@ -182,11 +182,11 @@ # https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d fragment = Nokogiri::HTML.fragment(input) @s.node!(fragment) - _(fragment.to_html). - must_equal(%[<#{tag_name} #{attr_name}="examp&lt;!--%22%20onmouseover=alert(1)&gt;--&gt;le.com">foo</#{tag_name}>]) + _(fragment.to_html) + .must_equal(%[<#{tag_name} #{attr_name}="examp&lt;!--%22%20onmouseover=alert(1)&gt;--&gt;le.com">foo</#{tag_name}>]) end - it 'should round-trip to the same output' do + it "should round-trip to the same output" do output = @s.fragment(input) _(@s.fragment(output)).must_equal(output) end @@ -195,13 +195,13 @@ tag_config[:unescaped_attrs].each do |attr_name| input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>] - it 'should not escape characters unnecessarily' do + it "should not escape characters unnecessarily" do skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2 # This uses Nokogumbo's HTML-compliant serializer rather than # libxml2's. - _(@s.fragment(input)). - must_equal(%[<#{tag_name} #{attr_name}="examp<!--&quot; onmouseover=alert(1)>-->le.com">foo</#{tag_name}>]) + _(@s.fragment(input)) + .must_equal(%[<#{tag_name} #{attr_name}="examp<!--&quot; onmouseover=alert(1)>-->le.com">foo</#{tag_name}>]) # This uses the not-quite-standards-compliant libxml2 serializer via # Nokogiri, so the output may be a little different as of Nokogiri @@ -209,11 +209,11 @@ # https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d fragment = Nokogiri::HTML.fragment(input) @s.node!(fragment) - _(fragment.to_html). - must_equal(%[<#{tag_name} #{attr_name}='examp&lt;!--" onmouseover=alert(1)&gt;--&gt;le.com'>foo</#{tag_name}>]) + _(fragment.to_html) + .must_equal(%[<#{tag_name} #{attr_name}='examp&lt;!--" onmouseover=alert(1)&gt;--&gt;le.com'>foo</#{tag_name}>]) end - it 'should round-trip to the same output' do + it "should round-trip to the same output" do output = @s.fragment(input) _(@s.fragment(output)).must_equal(output) end @@ -222,14 +222,14 @@ end # https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m - describe 'foreign content bypass in relaxed config' do - it 'prevents a sanitization bypass via carefully crafted foreign content' do + describe "foreign content bypass in relaxed config" do + it "prevents a sanitization bypass via carefully crafted foreign content" do %w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name| - _(@s.fragment(%[<math><#{tag_name}>/*&lt;/#{tag_name}&gt;&lt;img src onerror=alert(1)>*/])). - must_equal '' + _(@s.fragment(%[<math><#{tag_name}>/*&lt;/#{tag_name}&gt;&lt;img src onerror=alert(1)>*/])) + .must_equal "" - _(@s.fragment(%[<svg><#{tag_name}>/*&lt;/#{tag_name}&gt;&lt;img src onerror=alert(1)>*/])). - must_equal '' + _(@s.fragment(%[<svg><#{tag_name}>/*&lt;/#{tag_name}&gt;&lt;img src onerror=alert(1)>*/])) + .must_equal "" end end end @@ -241,7 +241,7 @@ # # Do not use the custom configs you see in these tests! If you do, you may be # creating XSS vulnerabilities in your application. - describe 'foreign content bypass in unsafe custom config that allows MathML or SVG' do + describe "foreign content bypass in unsafe custom config that allows MathML or SVG" do unescaped_content_elements = %w[ noembed noframes @@ -306,33 +306,33 @@ removed_elements.each do |name| it "removes `<#{name}>` elements in a MathML namespace" do assert_equal( - '<math></math>', + "<math></math>", @s.fragment("<math><#{name}>&lt;img src=x onerror=alert(1)&gt;</#{name}>") ) end it "removes `<#{name}>` elements in an SVG namespace" do assert_equal( - '<svg></svg>', + "<svg></svg>", @s.fragment("<svg><#{name}>&lt;img src=x onerror=alert(1)&gt;</#{name}>") ) end end end - describe 'sanitization bypass by exploiting scripting-disabled <noscript> behavior' do + describe "sanitization bypass by exploiting scripting-disabled <noscript> behavior" do before do @s = Sanitize.new( Sanitize::Config.merge( Sanitize::Config::RELAXED, - elements: Sanitize::Config::RELAXED[:elements] + ['noscript'] + elements: Sanitize::Config::RELAXED[:elements] + ["noscript"] ) ) end - it 'is prevented by removing `<noscript>` elements regardless of the allowlist' do + it "is prevented by removing `<noscript>` elements regardless of the allowlist" do assert_equal( - '', + "", @s.fragment(%[<noscript><div id='</noscript>&lt;img src=x onerror=alert(1)&gt; '>]) ) end diff --git a/test/test_parser.rb b/test/test_parser.rb index 1eda0f7..0eeafef 100644 --- a/test/test_parser.rb +++ b/test/test_parser.rb @@ -1,37 +1,37 @@ # frozen_string_literal: true -require_relative 'common' +require_relative "common" -describe 'Parser' do +describe "Parser" do make_my_diffs_pretty! parallelize_me! - it 'should translate valid entities into characters' do + it "should translate valid entities into characters" do _(Sanitize.fragment("&apos;&eacute;&amp;")).must_equal("'é&amp;") end - it 'should translate orphaned ampersands into entities' do - _(Sanitize.fragment('at&t')).must_equal('at&amp;t') + it "should translate orphaned ampersands into entities" do + _(Sanitize.fragment("at&t")).must_equal("at&amp;t") end - it 'should not add newlines after tags when serializing a fragment' do - _(Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", :elements => ['div', 'p'])) + it "should not add newlines after tags when serializing a fragment" do + _(Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", elements: ["div", "p"])) .must_equal "<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>" end - it 'should not have the Nokogiri 1.4.2+ unterminated script/style element bug' do - _(Sanitize.fragment('foo <script>bar')).must_equal 'foo ' - _(Sanitize.fragment('foo <style>bar')).must_equal 'foo ' + it "should not have the Nokogiri 1.4.2+ unterminated script/style element bug" do + _(Sanitize.fragment("foo <script>bar")).must_equal "foo " + _(Sanitize.fragment("foo <style>bar")).must_equal "foo " end it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do - _(Sanitize.fragment('1 > 2 and 2 < 1')).must_equal '1 &gt; 2 and 2 &lt; 1' - _(Sanitize.fragment('OMG HAPPY BIRTHDAY! *<:-D')).must_equal 'OMG HAPPY BIRTHDAY! *&lt;:-D' + _(Sanitize.fragment("1 > 2 and 2 < 1")).must_equal "1 &gt; 2 and 2 &lt; 1" + _(Sanitize.fragment("OMG HAPPY BIRTHDAY! *<:-D")).must_equal "OMG HAPPY BIRTHDAY! *&lt;:-D" end - describe 'when siblings are added after a node during traversal' do - it 'the added siblings should be traversed' do - html = %[ + describe "when siblings are added after a node during traversal" do + it "the added siblings should be traversed" do + html = %( <div id="one"> <div id="one_one"> <div id="one_one_one"></div> @@ -43,20 +43,20 @@ <div id="two_two"></div> </div> <div id="three"></div> - ] + ) siblings = [] - Sanitize.fragment(html, :transformers => ->(env) { - name = env[:node].name + Sanitize.fragment(html, transformers: ->(env) { + name = env[:node].name - if name == 'div' - env[:node].add_next_sibling('<b id="added_' + env[:node]['id'] + '">') - elsif name == 'b' - siblings << env[:node][:id] - end + if name == "div" + env[:node].add_next_sibling('<b id="added_' + env[:node]["id"] + '">') + elsif name == "b" + siblings << env[:node][:id] + end - return {:node_allowlist => [env[:node]]} + {node_allowlist: [env[:node]]} }) # All siblings should be traversed, and in the order added. diff --git a/test/test_sanitize.rb b/test/test_sanitize.rb index 6998481..53a4033 100644 --- a/test/test_sanitize.rb +++ b/test/test_sanitize.rb @@ -1,191 +1,191 @@ # frozen_string_literal: true -require_relative 'common' +require_relative "common" -describe 'Sanitize' do - describe 'initializer' do - it 'should not modify a transformers array in the given config' do +describe "Sanitize" do + describe "initializer" do + it "should not modify a transformers array in the given config" do transformers = [ lambda {} ] - Sanitize.new({ :transformers => transformers }) + Sanitize.new({transformers: transformers}) _(transformers.length).must_equal(1) end end - describe 'instance methods' do + describe "instance methods" do before do @s = Sanitize.new end - describe '#document' do + describe "#document" do before do - @s = Sanitize.new(:elements => ['html']) + @s = Sanitize.new(elements: ["html"]) end - it 'should sanitize an HTML document' do + it "should sanitize an HTML document" do _(@s.document('<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>')) .must_equal "<html>Lorem ipsum dolor sit amet </html>" end - it 'should not modify the input string' do - input = '<!DOCTYPE html><b>foo</b>' + it "should not modify the input string" do + input = "<!DOCTYPE html><b>foo</b>" @s.document(input) - _(input).must_equal('<!DOCTYPE html><b>foo</b>') + _(input).must_equal("<!DOCTYPE html><b>foo</b>") end - it 'should not choke on frozen documents' do - _(@s.document('<!doctype html><html><b>foo</b>'.freeze)).must_equal "<html>foo</html>" + it "should not choke on frozen documents" do + _(@s.document("<!doctype html><html><b>foo</b>")).must_equal "<html>foo</html>" end - it 'should normalize newlines' do + it "should normalize newlines" do _(@s.document("a\r\n\n\r\r\r\nz")).must_equal "<html>a\n\n\n\n\nz</html>" end - it 'should strip control characters (except ASCII whitespace)' do + it "should strip control characters (except ASCII whitespace)" do sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f" whitespace = "\t\n\f\u0020" _(@s.document("a#{sample_control_chars}#{whitespace}z")).must_equal "<html>a#{whitespace}z</html>" end - it 'should strip non-characters' do + it "should strip non-characters" do sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}" _(@s.document("a#{sample_non_chars}z")).must_equal "<html>az</html>" end - describe 'when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH' do + describe "when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH" do let(:content) do - content = nest_html_content('<b>foo</b>', Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH) + content = nest_html_content("<b>foo</b>", Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH) "<html>#{content}</html>" end - it 'raises an ArgumentError exception' do + it "raises an ArgumentError exception" do assert_raises ArgumentError do @s.document(content) end end - describe 'and :max_tree_depth of -1 is supplied in :parser_options' do + describe "and :max_tree_depth of -1 is supplied in :parser_options" do before do - @s = Sanitize.new(elements: ['html'], parser_options: { max_tree_depth: -1 }) + @s = Sanitize.new(elements: ["html"], parser_options: {max_tree_depth: -1}) end - it 'does not raise an ArgumentError exception' do - _(@s.document(content)).must_equal '<html>foo</html>' + it "does not raise an ArgumentError exception" do + _(@s.document(content)).must_equal "<html>foo</html>" end end end end - describe '#fragment' do - it 'should sanitize an HTML fragment' do + describe "#fragment" do + it "should sanitize an HTML fragment" do _(@s.fragment('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')) - .must_equal 'Lorem ipsum dolor sit amet ' + .must_equal "Lorem ipsum dolor sit amet " end - it 'should not modify the input string' do - input = '<b>foo</b>' + it "should not modify the input string" do + input = "<b>foo</b>" @s.fragment(input) - _(input).must_equal '<b>foo</b>' + _(input).must_equal "<b>foo</b>" end - it 'should not choke on fragments containing <html> or <body>' do - _(@s.fragment('<html><b>foo</b></html>')).must_equal 'foo' - _(@s.fragment('<body><b>foo</b></body>')).must_equal 'foo' - _(@s.fragment('<html><body><b>foo</b></body></html>')).must_equal 'foo' - _(@s.fragment('<!DOCTYPE html><html><body><b>foo</b></body></html>')).must_equal 'foo' + it "should not choke on fragments containing <html> or <body>" do + _(@s.fragment("<html><b>foo</b></html>")).must_equal "foo" + _(@s.fragment("<body><b>foo</b></body>")).must_equal "foo" + _(@s.fragment("<html><body><b>foo</b></body></html>")).must_equal "foo" + _(@s.fragment("<!DOCTYPE html><html><body><b>foo</b></body></html>")).must_equal "foo" end - it 'should not choke on frozen fragments' do - _(@s.fragment('<b>foo</b>'.freeze)).must_equal 'foo' + it "should not choke on frozen fragments" do + _(@s.fragment("<b>foo</b>")).must_equal "foo" end - it 'should normalize newlines' do + it "should normalize newlines" do _(@s.fragment("a\r\n\n\r\r\r\nz")).must_equal "a\n\n\n\n\nz" end - it 'should strip control characters (except ASCII whitespace)' do + it "should strip control characters (except ASCII whitespace)" do sample_control_chars = "\u0001\u0008\u000b\u000e\u001f\u007f\u009f" whitespace = "\t\n\f\u0020" _(@s.fragment("a#{sample_control_chars}#{whitespace}z")).must_equal "a#{whitespace}z" end - it 'should strip non-characters' do + it "should strip non-characters" do sample_non_chars = "\ufdd0\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}" _(@s.fragment("a#{sample_non_chars}z")).must_equal "az" end - describe 'when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH' do + describe "when html body exceeds Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH" do let(:content) do - content = nest_html_content('<b>foo</b>', Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH) + content = nest_html_content("<b>foo</b>", Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH) "<body>#{content}</body>" end - it 'raises an ArgumentError exception' do + it "raises an ArgumentError exception" do assert_raises ArgumentError do @s.fragment(content) end end - describe 'and :max_tree_depth of -1 is supplied in :parser_options' do + describe "and :max_tree_depth of -1 is supplied in :parser_options" do before do - @s = Sanitize.new(parser_options: { max_tree_depth: -1 }) + @s = Sanitize.new(parser_options: {max_tree_depth: -1}) end - it 'does not raise an ArgumentError exception' do - _(@s.fragment(content)).must_equal 'foo' + it "does not raise an ArgumentError exception" do + _(@s.fragment(content)).must_equal "foo" end end end end - describe '#node!' do - it 'should sanitize a Nokogiri::XML::Node' do - doc = Nokogiri::HTML5.parse('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>') + describe "#node!" do + it "should sanitize a Nokogiri::XML::Node" do + doc = Nokogiri::HTML5.parse('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>') frag = doc.fragment - doc.xpath('/html/body/node()').each {|node| frag << node } + doc.xpath("/html/body/node()").each { |node| frag << node } @s.node!(frag) - _(frag.to_html).must_equal 'Lorem ipsum dolor sit amet ' + _(frag.to_html).must_equal "Lorem ipsum dolor sit amet " end describe "when the given node is a document and <html> isn't allowlisted" do - it 'should raise a Sanitize::Error' do - doc = Nokogiri::HTML5.parse('foo') + it "should raise a Sanitize::Error" do + doc = Nokogiri::HTML5.parse("foo") _(proc { @s.node!(doc) }).must_raise Sanitize::Error end end end end - describe 'class methods' do - describe '.document' do - it 'should sanitize an HTML document with the given config' do + describe "class methods" do + describe ".document" do + it "should sanitize an HTML document with the given config" do html = '<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>' - _(Sanitize.document(html, :elements => ['html'])) + _(Sanitize.document(html, elements: ["html"])) .must_equal "<html>Lorem ipsum dolor sit amet </html>" end end - describe '.fragment' do - it 'should sanitize an HTML fragment with the given config' do + describe ".fragment" do + it "should sanitize an HTML fragment with the given config" do html = '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>' - _(Sanitize.fragment(html, :elements => ['strong'])) - .must_equal 'Lorem ipsum <strong>dolor</strong> sit amet ' + _(Sanitize.fragment(html, elements: ["strong"])) + .must_equal "Lorem ipsum <strong>dolor</strong> sit amet " end end - describe '.node!' do - it 'should sanitize a Nokogiri::XML::Node with the given config' do + describe ".node!" do + it "should sanitize a Nokogiri::XML::Node with the given config" do doc = Nokogiri::HTML5.parse('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>') frag = doc.fragment - doc.xpath('/html/body/node()').each {|node| frag << node } + doc.xpath("/html/body/node()").each { |node| frag << node } - Sanitize.node!(frag, :elements => ['strong']) - _(frag.to_html).must_equal 'Lorem ipsum <strong>dolor</strong> sit amet ' + Sanitize.node!(frag, elements: ["strong"]) + _(frag.to_html).must_equal "Lorem ipsum <strong>dolor</strong> sit amet " end end end @@ -193,6 +193,6 @@ private def nest_html_content(html_content, depth) - "#{'<span>' * depth}#{html_content}#{'</span>' * depth}" + "#{"<span>" * depth}#{html_content}#{"</span>" * depth}" end end diff --git a/test/test_sanitize_css.rb b/test/test_sanitize_css.rb index cd24bf1..0452a1c 100644 --- a/test/test_sanitize_css.rb +++ b/test/test_sanitize_css.rb @@ -1,28 +1,28 @@ # frozen_string_literal: true -require_relative 'common' +require_relative "common" -describe 'Sanitize::CSS' do +describe "Sanitize::CSS" do make_my_diffs_pretty! parallelize_me! - describe 'instance methods' do + describe "instance methods" do before do @default = Sanitize::CSS.new @relaxed = Sanitize::CSS.new(Sanitize::Config::RELAXED[:css]) - @custom = Sanitize::CSS.new(:properties => %w[background color width]) + @custom = Sanitize::CSS.new(properties: %w[background color width]) end - describe '#properties' do - it 'should sanitize CSS properties' do + describe "#properties" do + it "should sanitize CSS properties" do css = 'background: #fff; width: expression(alert("hi"));' - _(@default.properties(css)).must_equal ' ' - _(@relaxed.properties(css)).must_equal 'background: #fff; ' - _(@custom.properties(css)).must_equal 'background: #fff; ' + _(@default.properties(css)).must_equal " " + _(@relaxed.properties(css)).must_equal "background: #fff; " + _(@custom.properties(css)).must_equal "background: #fff; " end - it 'should allow allowlisted URL protocols' do + it "should allow allowlisted URL protocols" do [ "background: url(relative.jpg)", "background: url('relative.jpg')", @@ -40,13 +40,13 @@ "background: image('https://example.com/https.jpg');", "background: image(rtl 'https://example.com/https.jpg');" ].each do |css| - _(@default.properties(css)).must_equal '' + _(@default.properties(css)).must_equal "" _(@relaxed.properties(css)).must_equal css - _(@custom.properties(css)).must_equal '' + _(@custom.properties(css)).must_equal "" end end - it 'should not allow non-allowlisted URL protocols' do + it "should not allow non-allowlisted URL protocols" do [ "background: url(javascript:alert(0))", "background: url(ja\\56 ascript:alert(0))", @@ -56,21 +56,21 @@ "background: url('javas\\\ncript:alert(0)')", "background: url('java\\0script:foo')" ].each do |css| - _(@default.properties(css)).must_equal '' - _(@relaxed.properties(css)).must_equal '' - _(@custom.properties(css)).must_equal '' + _(@default.properties(css)).must_equal "" + _(@relaxed.properties(css)).must_equal "" + _(@custom.properties(css)).must_equal "" end end - it 'should not allow -moz-binding' do + it "should not allow -moz-binding" do css = "-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')" - _(@default.properties(css)).must_equal '' - _(@relaxed.properties(css)).must_equal '' - _(@custom.properties(css)).must_equal '' + _(@default.properties(css)).must_equal "" + _(@relaxed.properties(css)).must_equal "" + _(@custom.properties(css)).must_equal "" end - it 'should not allow expressions' do + it "should not allow expressions" do [ "width:expression(alert(1))", "width: /**/expression(alert(1)", @@ -79,57 +79,57 @@ "xss:expression(alert(1))", "height: foo(expression(alert(1)));" ].each do |css| - _(@default.properties(css)).must_equal '' - _(@relaxed.properties(css)).must_equal '' - _(@custom.properties(css)).must_equal '' + _(@default.properties(css)).must_equal "" + _(@relaxed.properties(css)).must_equal "" + _(@custom.properties(css)).must_equal "" end end - it 'should not allow behaviors' do + it "should not allow behaviors" do css = "behavior: url(xss.htc);" - _(@default.properties(css)).must_equal '' - _(@relaxed.properties(css)).must_equal '' - _(@custom.properties(css)).must_equal '' + _(@default.properties(css)).must_equal "" + _(@relaxed.properties(css)).must_equal "" + _(@custom.properties(css)).must_equal "" end - describe 'when :allow_comments is true' do - it 'should preserve comments' do - _(@relaxed.properties('color: #fff; /* comment */ width: 100px;')) - .must_equal 'color: #fff; /* comment */ width: 100px;' + describe "when :allow_comments is true" do + it "should preserve comments" do + _(@relaxed.properties("color: #fff; /* comment */ width: 100px;")) + .must_equal "color: #fff; /* comment */ width: 100px;" _(@relaxed.properties("color: #fff; /* \n\ncomment */ width: 100px;")) .must_equal "color: #fff; /* \n\ncomment */ width: 100px;" end end - describe 'when :allow_comments is false' do - it 'should strip comments' do - _(@custom.properties('color: #fff; /* comment */ width: 100px;')) - .must_equal 'color: #fff; width: 100px;' + describe "when :allow_comments is false" do + it "should strip comments" do + _(@custom.properties("color: #fff; /* comment */ width: 100px;")) + .must_equal "color: #fff; width: 100px;" _(@custom.properties("color: #fff; /* \n\ncomment */ width: 100px;")) - .must_equal 'color: #fff; width: 100px;' + .must_equal "color: #fff; width: 100px;" end end - describe 'when :allow_hacks is true' do - it 'should allow common CSS hacks' do - _(@relaxed.properties('_border: 1px solid #fff; *width: 10px')) - .must_equal '_border: 1px solid #fff; *width: 10px' + describe "when :allow_hacks is true" do + it "should allow common CSS hacks" do + _(@relaxed.properties("_border: 1px solid #fff; *width: 10px")) + .must_equal "_border: 1px solid #fff; *width: 10px" end end - describe 'when :allow_hacks is false' do - it 'should not allow common CSS hacks' do - _(@custom.properties('_border: 1px solid #fff; *width: 10px')) - .must_equal ' ' + describe "when :allow_hacks is false" do + it "should not allow common CSS hacks" do + _(@custom.properties("_border: 1px solid #fff; *width: 10px")) + .must_equal " " end end end - describe '#stylesheet' do - it 'should sanitize a CSS stylesheet' do + describe "#stylesheet" do + it "should sanitize a CSS stylesheet" do css = %[ /* Yay CSS! */ .foo { color: #fff; } @@ -141,82 +141,82 @@ } ].strip - _(@default.stylesheet(css).strip).must_equal %[ + _(@default.stylesheet(css).strip).must_equal %( .foo { } #bar { } - ].strip + ).strip _(@relaxed.stylesheet(css)).must_equal css - _(@custom.stylesheet(css).strip).must_equal %[ + _(@custom.stylesheet(css).strip).must_equal %( .foo { color: #fff; } #bar { } - ].strip + ).strip end - describe 'when :allow_comments is true' do - it 'should preserve comments' do - _(@relaxed.stylesheet('.foo { color: #fff; /* comment */ width: 100px; }')) - .must_equal '.foo { color: #fff; /* comment */ width: 100px; }' + describe "when :allow_comments is true" do + it "should preserve comments" do + _(@relaxed.stylesheet(".foo { color: #fff; /* comment */ width: 100px; }")) + .must_equal ".foo { color: #fff; /* comment */ width: 100px; }" _(@relaxed.stylesheet(".foo { color: #fff; /* \n\ncomment */ width: 100px; }")) .must_equal ".foo { color: #fff; /* \n\ncomment */ width: 100px; }" end end - describe 'when :allow_comments is false' do - it 'should strip comments' do - _(@custom.stylesheet('.foo { color: #fff; /* comment */ width: 100px; }')) - .must_equal '.foo { color: #fff; width: 100px; }' + describe "when :allow_comments is false" do + it "should strip comments" do + _(@custom.stylesheet(".foo { color: #fff; /* comment */ width: 100px; }")) + .must_equal ".foo { color: #fff; width: 100px; }" _(@custom.stylesheet(".foo { color: #fff; /* \n\ncomment */ width: 100px; }")) - .must_equal '.foo { color: #fff; width: 100px; }' + .must_equal ".foo { color: #fff; width: 100px; }" end end - describe 'when :allow_hacks is true' do - it 'should allow common CSS hacks' do - _(@relaxed.stylesheet('.foo { _border: 1px solid #fff; *width: 10px }')) - .must_equal '.foo { _border: 1px solid #fff; *width: 10px }' + describe "when :allow_hacks is true" do + it "should allow common CSS hacks" do + _(@relaxed.stylesheet(".foo { _border: 1px solid #fff; *width: 10px }")) + .must_equal ".foo { _border: 1px solid #fff; *width: 10px }" end end - describe 'when :allow_hacks is false' do - it 'should not allow common CSS hacks' do - _(@custom.stylesheet('.foo { _border: 1px solid #fff; *width: 10px }')) - .must_equal '.foo { }' + describe "when :allow_hacks is false" do + it "should not allow common CSS hacks" do + _(@custom.stylesheet(".foo { _border: 1px solid #fff; *width: 10px }")) + .must_equal ".foo { }" end end end - describe '#tree!' do - it 'should sanitize a Crass CSS parse tree' do - tree = Crass.parse(String.new("@import url(foo.css);\n") << - ".foo { background: #fff; font: 16pt 'Comic Sans MS'; }\n" << + describe "#tree!" do + it "should sanitize a Crass CSS parse tree" do + tree = Crass.parse("@import url(foo.css);\n" \ + ".foo { background: #fff; font: 16pt 'Comic Sans MS'; }\n" \ "#bar { top: 125px; background: green; }") _(@custom.tree!(tree)).must_be_same_as tree - _(Crass::Parser.stringify(tree)).must_equal String.new("\n") << - ".foo { background: #fff; }\n" << - "#bar { background: green; }" + _(Crass::Parser.stringify(tree)).must_equal "\n" \ + ".foo { background: #fff; }\n" \ + "#bar { background: green; }" end end end - describe 'class methods' do - describe '.properties' do - it 'should sanitize CSS properties with the given config' do + describe "class methods" do + describe ".properties" do + it "should sanitize CSS properties with the given config" do css = 'background: #fff; width: expression(alert("hi"));' - _(Sanitize::CSS.properties(css)).must_equal ' ' - _(Sanitize::CSS.properties(css, Sanitize::Config::RELAXED[:css])).must_equal 'background: #fff; ' - _(Sanitize::CSS.properties(css, :properties => %w[background color width])).must_equal 'background: #fff; ' + _(Sanitize::CSS.properties(css)).must_equal " " + _(Sanitize::CSS.properties(css, Sanitize::Config::RELAXED[:css])).must_equal "background: #fff; " + _(Sanitize::CSS.properties(css, properties: %w[background color width])).must_equal "background: #fff; " end end - describe '.stylesheet' do - it 'should sanitize a CSS stylesheet with the given config' do + describe ".stylesheet" do + it "should sanitize a CSS stylesheet with the given config" do css = %[ /* Yay CSS! */ .foo { color: #fff; } @@ -228,43 +228,43 @@ } ].strip - _(Sanitize::CSS.stylesheet(css).strip).must_equal %[ + _(Sanitize::CSS.stylesheet(css).strip).must_equal %( .foo { } #bar { } - ].strip + ).strip _(Sanitize::CSS.stylesheet(css, Sanitize::Config::RELAXED[:css])).must_equal css - _(Sanitize::CSS.stylesheet(css, :properties => %w[background color width]).strip).must_equal %[ + _(Sanitize::CSS.stylesheet(css, properties: %w[background color width]).strip).must_equal %( .foo { color: #fff; } #bar { } - ].strip + ).strip end end - describe '.tree!' do - it 'should sanitize a Crass CSS parse tree with the given config' do - tree = Crass.parse(String.new("@import url(foo.css);\n") << - ".foo { background: #fff; font: 16pt 'Comic Sans MS'; }\n" << + describe ".tree!" do + it "should sanitize a Crass CSS parse tree with the given config" do + tree = Crass.parse("@import url(foo.css);\n" \ + ".foo { background: #fff; font: 16pt 'Comic Sans MS'; }\n" \ "#bar { top: 125px; background: green; }") - _(Sanitize::CSS.tree!(tree, :properties => %w[background color width])).must_be_same_as tree + _(Sanitize::CSS.tree!(tree, properties: %w[background color width])).must_be_same_as tree - _(Crass::Parser.stringify(tree)).must_equal String.new("\n") << - ".foo { background: #fff; }\n" << - "#bar { background: green; }" + _(Crass::Parser.stringify(tree)).must_equal "\n" \ + ".foo { background: #fff; }\n" \ + "#bar { background: green; }" end end end - describe 'functionality' do + describe "functionality" do before do @default = Sanitize::CSS.new @relaxed = Sanitize::CSS.new(Sanitize::Config::RELAXED[:css]) end # https://github.com/rgrove/sanitize/issues/121 - it 'should parse the contents of @media rules properly' do + it "should parse the contents of @media rules properly" do css = '@media { p[class="center"] { text-align: center; }}' _(@relaxed.stylesheet(css)).must_equal css @@ -291,7 +291,7 @@ ].strip end - it 'should parse @page rules properly' do + it "should parse @page rules properly" do css = %[ @page { margin: 2cm } /* All margins set to 2cm */ @@ -324,15 +324,15 @@ .foo { color: green; } ].strip - _(@relaxed.stylesheet(css).strip).must_equal %[ + _(@relaxed.stylesheet(css).strip).must_equal %( .foo { color: green; } - ].strip + ).strip end describe "when blockless at-rules are allowlisted" do before do @scss = Sanitize::CSS.new(Sanitize::Config.merge(Sanitize::Config::RELAXED[:css], { - :at_rules => ['charset', 'import'] + at_rules: ["charset", "import"] })) end @@ -351,24 +351,23 @@ end it "should remove them if they have invalid blocks" do - css = %[ + css = %( @charset { color: green } @import { color: green } .foo { color: green; } - ].strip + ).strip - _(@scss.stylesheet(css).strip).must_equal %[ + _(@scss.stylesheet(css).strip).must_equal %( .foo { color: green; } - ].strip + ).strip end end describe "when validating @import rules" do - describe "with no validation proc specified" do before do @scss = Sanitize::CSS.new(Sanitize::Config.merge(Sanitize::Config::RELAXED[:css], { - :at_rules => ['import'] + at_rules: ["import"] })) end @@ -385,10 +384,10 @@ describe "with a validation proc specified" do before do - google_font_validator = Proc.new { |url| url.start_with?("https://fonts.googleapis.com") } + google_font_validator = proc { |url| url.start_with?("https://fonts.googleapis.com") } @scss = Sanitize::CSS.new(Sanitize::Config.merge(Sanitize::Config::RELAXED[:css], { - :at_rules => ['import'], :import_url_validator => google_font_validator + at_rules: ["import"], import_url_validator: google_font_validator })) end @@ -411,9 +410,9 @@ @import url('https://nastysite.com/nasty_hax0r.css'); ].strip - _(@scss.stylesheet(css).strip).must_equal %[ + _(@scss.stylesheet(css).strip).must_equal %( @import 'https://fonts.googleapis.com/css?family=Indie+Flower'; - ].strip + ).strip end it "should not allow a blank url" do @@ -423,9 +422,9 @@ @import url(''); ].strip - _(@scss.stylesheet(css).strip).must_equal %[ + _(@scss.stylesheet(css).strip).must_equal %( @import 'https://fonts.googleapis.com/css?family=Indie+Flower'; - ].strip + ).strip end end end diff --git a/test/test_transformers.rb b/test/test_transformers.rb index af2a80d..8d6d203 100644 --- a/test/test_transformers.rb +++ b/test/test_transformers.rb @@ -1,124 +1,121 @@ # frozen_string_literal: true -require_relative 'common' +require_relative "common" -describe 'Transformers' do +describe "Transformers" do make_my_diffs_pretty! parallelize_me! - it 'should receive a complete env Hash as input' do - Sanitize.fragment('<SPAN>foo</SPAN>', - :foo => :bar, - :transformers => lambda {|env| + it "should receive a complete env Hash as input" do + Sanitize.fragment("<SPAN>foo</SPAN>", + foo: :bar, + transformers: lambda { |env| return unless env[:node].element? _(env[:config][:foo]).must_equal :bar _(env[:is_allowlisted]).must_equal false _(env[:is_whitelisted]).must_equal env[:is_allowlisted] _(env[:node]).must_be_kind_of Nokogiri::XML::Node - _(env[:node_name]).must_equal 'span' + _(env[:node_name]).must_equal "span" _(env[:node_allowlist]).must_be_kind_of Set _(env[:node_allowlist]).must_be_empty _(env[:node_whitelist]).must_equal env[:node_allowlist] - } - ) + }) end - it 'should traverse all node types, including the fragment itself' do + it "should traverse all node types, including the fragment itself" do nodes = [] - Sanitize.fragment('<div>foo</div><!--bar--><script>cdata!</script>', - :transformers => proc {|env| nodes << env[:node_name] } - ) + Sanitize.fragment("<div>foo</div><!--bar--><script>cdata!</script>", + transformers: proc { |env| nodes << env[:node_name] }) _(nodes).must_equal %w[ #document-fragment div text text text comment script text ] end - it 'should perform top-down traversal' do + it "should perform top-down traversal" do nodes = [] - Sanitize.fragment('<div><span><strong>foo</strong></span><b></b></div><p>bar</p>', - :transformers => proc {|env| nodes << env[:node_name] if env[:node].element? } - ) + Sanitize.fragment("<div><span><strong>foo</strong></span><b></b></div><p>bar</p>", + transformers: proc { |env| nodes << env[:node_name] if env[:node].element? }) _(nodes).must_equal %w[div span strong b p] end - it 'should allowlist nodes in the node allowlist' do + it "should allowlist nodes in the node allowlist" do _(Sanitize.fragment('<div class="foo">foo</div><span>bar</span>', - :transformers => [ - proc {|env| - {:node_allowlist => [env[:node]]} if env[:node_name] == 'div' + transformers: [ + proc { |env| + {node_allowlist: [env[:node]]} if env[:node_name] == "div" }, - proc {|env| - _(env[:is_allowlisted]).must_equal false unless env[:node_name] == 'div' - _(env[:is_allowlisted]).must_equal true if env[:node_name] == 'div' - _(env[:node_allowlist]).must_include env[:node] if env[:node_name] == 'div' + proc { |env| + _(env[:is_allowlisted]).must_equal false unless env[:node_name] == "div" + _(env[:is_allowlisted]).must_equal true if env[:node_name] == "div" + _(env[:node_allowlist]).must_include env[:node] if env[:node_name] == "div" _(env[:is_whitelisted]).must_equal env[:is_allowlisted] _(env[:node_whitelist]).must_equal env[:node_allowlist] } - ] - )).must_equal '<div class="foo">foo</div>bar' + ])).must_equal '<div class="foo">foo</div>bar' end - it 'should clear the node allowlist after each fragment' do + it "should clear the node allowlist after each fragment" do called = false - Sanitize.fragment('<div>foo</div>', - :transformers => proc {|env| {:node_allowlist => [env[:node]]}} - ) + Sanitize.fragment("<div>foo</div>", + transformers: proc { |env| {node_allowlist: [env[:node]]} }) - Sanitize.fragment('<div>foo</div>', - :transformers => proc {|env| + Sanitize.fragment("<div>foo</div>", + transformers: proc { |env| called = true _(env[:is_allowlisted]).must_equal false _(env[:is_whitelisted]).must_equal env[:is_allowlisted] _(env[:node_allowlist]).must_be_empty _(env[:node_whitelist]).must_equal env[:node_allowlist] - } - ) + }) _(called).must_equal true end - it 'should accept a method transformer' do - def transformer(env); end - _(Sanitize.fragment('<div>foo</div>', :transformers => method(:transformer))) - .must_equal(' foo ') + it "should accept a method transformer" do + def transformer(env) + end + _(Sanitize.fragment("<div>foo</div>", transformers: method(:transformer))) + .must_equal(" foo ") end - describe 'Image allowlist transformer' do - require 'uri' + describe "Image allowlist transformer" do + require "uri" image_allowlist_transformer = lambda do |env| # Ignore everything except <img> elements. - return unless env[:node_name] == 'img' + return unless env[:node_name] == "img" - node = env[:node] - image_uri = URI.parse(node['src']) + node = env[:node] + image_uri = URI.parse(node["src"]) # Only allow relative URLs or URLs with the example.com domain. The # image_uri.host.nil? check ensures that protocol-relative URLs like - # "//evil.com/foo.jpg". - unless image_uri.host == 'example.com' || (image_uri.host.nil? && image_uri.relative?) - node.unlink # `Nokogiri::XML::Node#unlink` removes a node from the document + # "//evil.com/foo.jpg" are not allowed. + unless image_uri.host == "example.com" + unless image_uri.host.nil? && image_uri.relative? + node.unlink # `Nokogiri::XML::Node#unlink` removes a node from the document + end end end before do @s = Sanitize.new(Sanitize::Config.merge(Sanitize::Config::RELAXED, - :transformers => image_allowlist_transformer)) + transformers: image_allowlist_transformer)) end - it 'should allow images with relative URLs' do + it "should allow images with relative URLs" do input = '<img src="/foo/bar.jpg">' _(@s.fragment(input)).must_equal(input) end - it 'should allow images at the example.com domain' do + it "should allow images at the example.com domain" do input = '<img src="http://example.com/foo/bar.jpg">' _(@s.fragment(input)).must_equal(input) @@ -129,103 +126,103 @@ def transformer(env); end _(@s.fragment(input)).must_equal(input) end - it 'should not allow images at other domains' do + it "should not allow images at other domains" do input = '<img src="http://evil.com/foo/bar.jpg">' - _(@s.fragment(input)).must_equal('') + _(@s.fragment(input)).must_equal("") input = '<img src="https://evil.com/foo/bar.jpg">' - _(@s.fragment(input)).must_equal('') + _(@s.fragment(input)).must_equal("") input = '<img src="//evil.com/foo/bar.jpg">' - _(@s.fragment(input)).must_equal('') + _(@s.fragment(input)).must_equal("") input = '<img src="http://subdomain.example.com/foo/bar.jpg">' - _(@s.fragment(input)).must_equal('') + _(@s.fragment(input)).must_equal("") end end - describe 'YouTube transformer' do + describe "YouTube transformer" do youtube_transformer = lambda do |env| - node = env[:node] + node = env[:node] node_name = env[:node_name] # Don't continue if this node is already allowlisted or is not an element. return if env[:is_allowlisted] || !node.element? # Don't continue unless the node is an iframe. - return unless node_name == 'iframe' + return unless node_name == "iframe" # Verify that the video URL is actually a valid YouTube video URL. - return unless node['src'] =~ %r|\A(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/| + return unless %r{\A(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/}.match?(node["src"]) # We're now certain that this is a YouTube embed, but we still need to run # it through a special Sanitize step to ensure that no unwanted elements or # attributes that don't belong in a YouTube embed can sneak in. Sanitize.node!(node, { - :elements => %w[iframe], + elements: %w[iframe], - :attributes => { - 'iframe' => %w[allowfullscreen frameborder height src width] + attributes: { + "iframe" => %w[allowfullscreen frameborder height src width] } }) # Now that we're sure that this is a valid YouTube embed and that there are # no unwanted elements or attributes hidden inside it, we can tell Sanitize # to allowlist the current node. - {:node_allowlist => [node]} + {node_allowlist: [node]} end - it 'should allow HTTP YouTube video embeds' do + it "should allow HTTP YouTube video embeds" do input = '<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>' - _(Sanitize.fragment(input, :transformers => youtube_transformer)) + _(Sanitize.fragment(input, transformers: youtube_transformer)) .must_equal '<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen=""></iframe>' end - it 'should allow HTTPS YouTube video embeds' do + it "should allow HTTPS YouTube video embeds" do input = '<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>' - _(Sanitize.fragment(input, :transformers => youtube_transformer)) + _(Sanitize.fragment(input, transformers: youtube_transformer)) .must_equal '<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen=""></iframe>' end - it 'should allow protocol-relative YouTube video embeds' do + it "should allow protocol-relative YouTube video embeds" do input = '<iframe width="420" height="315" src="//www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>' - _(Sanitize.fragment(input, :transformers => youtube_transformer)) + _(Sanitize.fragment(input, transformers: youtube_transformer)) .must_equal '<iframe width="420" height="315" src="//www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen=""></iframe>' end - it 'should allow privacy-enhanced YouTube video embeds' do + it "should allow privacy-enhanced YouTube video embeds" do input = '<iframe width="420" height="315" src="https://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>' - _(Sanitize.fragment(input, :transformers => youtube_transformer)) + _(Sanitize.fragment(input, transformers: youtube_transformer)) .must_equal '<iframe width="420" height="315" src="https://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen=""></iframe>' end - it 'should not allow non-YouTube video embeds' do + it "should not allow non-YouTube video embeds" do input = '<iframe width="420" height="315" src="http://www.fake-youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen></iframe>' - _(Sanitize.fragment(input, :transformers => youtube_transformer)) - .must_equal('') + _(Sanitize.fragment(input, transformers: youtube_transformer)) + .must_equal("") end end - describe 'DOM modification transformer' do + describe "DOM modification transformer" do b_to_strong_tag_transformer = lambda do |env| - node = env[:node] + node = env[:node] node_name = env[:node_name] - if node_name == 'b' - node.name = 'strong' + if node_name == "b" + node.name = "strong" end end - it 'should allow the <b> tag to be changed to a <strong> tag' do - input = '<b>text</b>' + it "should allow the <b> tag to be changed to a <strong> tag" do + input = "<b>text</b>" - _(Sanitize.fragment(input, :elements => ['strong'], :transformers => b_to_strong_tag_transformer)) - .must_equal '<strong>text</strong>' + _(Sanitize.fragment(input, elements: ["strong"], transformers: b_to_strong_tag_transformer)) + .must_equal "<strong>text</strong>" end end end