From adece3ce67e7e5dd05569aa97eeecd13c70fa344 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 11 Jun 2024 09:38:13 -0400 Subject: [PATCH 1/2] Revert "fix: raise CSS::SyntaxError if a pseudo-class is not an XPath Name" This reverts commit d3a60cb5a28e5cda3d1a88fb2b1ae08f2d1bcf91. --- CHANGELOG.md | 3 +- lib/nokogiri/css/parser.rb | 463 ++++++++++++++++----------------- lib/nokogiri/css/parser.y | 14 +- lib/nokogiri/css/tokenizer.rb | 7 +- lib/nokogiri/css/tokenizer.rex | 6 +- test/css/test_tokenizer.rb | 3 +- test/css/test_xpath_visitor.rb | 5 - 7 files changed, 237 insertions(+), 264 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4216a54f6..9fa29e7c2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,9 +15,8 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA ### Fixed -* `Node#clone`, `NodeSet#clone`, and `*::Document#clone` all properly copy the metaclass of the original as expected. Previously, `#clone` had been aliased to `#dup` for these classes (since v1.3.0 in 2009). [#316, #3117] @flavorjones -* CSS queries for pseudo-selectors that cannot be transpiled into XPath queries now raise a more descriptive `Nokogiri::CSS::SyntaxError` when they are parsed. Previously, an invalid XPath query was created and a hard-to-understand XPath error was being raised by the query engine. [#3197] @flavorjones * [CRuby] libgumbo (the HTML5 parser) treats reaching max-depth as EOF. This addresses a class of issues when the parser is interrupted in this way. [#3121] @stevecheckoway +* `Node#clone`, `NodeSet#clone`, and `*::Document#clone` all properly copy the metaclass of the original as expected. Previously, `#clone` had been aliased to `#dup` for these classes (since v1.3.0 in 2009). [#316, #3117] @flavorjones * [CRuby] Update node GC lifecycle to avoid a potential memory leak with fragments in libxml 2.13.0 caused by changes in `xmlAddChild`. [#3156] @flavorjones diff --git a/lib/nokogiri/css/parser.rb b/lib/nokogiri/css/parser.rb index a896b34fb2..dfbfb3c1a0 100644 --- a/lib/nokogiri/css/parser.rb +++ b/lib/nokogiri/css/parser.rb @@ -39,98 +39,96 @@ def unescape_css_string(str) ##### State transition tables begin ### racc_action_table = [ - 27, 11, 64, 38, 36, 61, 62, 40, 60, 27, - 25, 98, 97, 36, 12, 48, 49, 26, 57, 25, - 28, 27, 14, 26, 30, 29, 14, 21, 23, 28, - 30, 73, 26, 36, 29, 14, 21, 23, 27, 30, - -26, 56, 36, 12, 100, 81, 29, 27, 25, 43, - -26, 36, 26, 92, 107, 93, 21, 25, 28, 30, - 95, 26, -26, 29, 14, 21, 23, 28, 30, 27, - 26, 96, 29, 14, 21, 23, 27, 30, 101, 50, - 36, 53, 110, 109, 100, 111, 25, 99, 102, 56, - 103, 104, 36, 97, 29, 14, 28, 61, 65, 26, - 67, 29, 14, 21, 23, 36, 30, 36, 43, 105, - 108, 26, 112, 36, 14, 21, 23, 113, 30, 36, - 100, 43, 115, 43, 26, 36, 26, 118, 21, 43, - 21, 30, 26, 30, 36, 43, 21, 120, 26, 30, - 25, 43, 21, 36, 26, 30, 121, 122, 21, nil, - 43, 30, nil, 26, nil, nil, nil, 21, 23, 43, - 30, nil, 26, 89, 90, nil, 21, nil, nil, 30, - nil, nil, 89, 90, nil, 85, 86, 87, nil, 88, - nil, nil, nil, 84, 85, 86, 87, nil, 88, nil, - 61, 94, 84, 67, 61, 94, nil, 67, 61, 94, - nil, 67, 61, 94, nil, 67, nil, 14, nil, 61, - 94, 14, 67, nil, 4, 14, 5, 10, 4, 14, - 5, 47, nil, nil, nil, 6, 14, 8, 7, 6, - nil, 8, 7, 4, nil, 5, 10, nil, nil, nil, - nil, nil, nil, nil, 6, nil, 8, 7 ] + 27, 11, 38, 99, 36, 12, 40, 26, 48, 25, + 49, 27, 100, 12, 30, 36, 105, 99, -26, 28, + 25, -26, 26, 27, 29, 14, 21, 23, 80, 30, + 28, 36, 72, 26, -26, 29, 14, 21, 23, 27, + 30, 91, 56, 36, 97, 96, 43, 29, 25, 26, + 27, 92, 94, 21, 36, 95, 30, 98, 28, 25, + 101, 26, 102, 29, 14, 21, 23, 96, 30, 28, + 36, 36, 26, 103, 29, 14, 21, 23, 27, 30, + 108, 107, 36, 109, 106, 43, 43, 25, 26, 26, + 27, 110, 21, 21, 111, 30, 30, 28, 99, 50, + 26, 53, 29, 14, 21, 23, 36, 30, 36, 56, + 61, 64, 113, 66, 29, 14, 116, 36, 118, 36, + nil, 43, nil, 43, 26, nil, 26, 14, 21, 23, + 21, 30, 43, 30, 43, 26, nil, 26, 36, 21, + 36, 21, 30, 25, 30, nil, nil, nil, nil, nil, + nil, 61, 62, 43, 60, 43, 26, nil, 26, nil, + 21, 23, 21, 30, 57, 30, 88, 89, 14, nil, + nil, 88, 89, nil, nil, nil, nil, 84, 85, 86, + nil, 87, 84, 85, 86, 83, 87, nil, 61, 93, + 83, 66, 61, 93, nil, 66, 61, 93, nil, 66, + 61, 93, nil, 66, nil, 14, nil, 61, 93, 14, + 66, nil, nil, 14, nil, nil, nil, 14, 4, 5, + 10, nil, nil, nil, 14, 4, 5, 47, 6, nil, + 8, 7, 4, 5, 10, 6, nil, 8, 7, nil, + nil, nil, 6, nil, 8, 7 ] racc_action_check = [ - 3, 1, 27, 11, 3, 27, 27, 14, 27, 9, - 3, 60, 60, 9, 1, 21, 24, 17, 27, 9, - 3, 30, 27, 3, 17, 3, 3, 3, 3, 9, - 3, 30, 9, 16, 9, 9, 9, 9, 12, 9, - 25, 30, 12, 71, 65, 49, 30, 46, 12, 16, - 50, 46, 16, 53, 71, 54, 16, 46, 12, 16, - 58, 12, 65, 12, 12, 12, 12, 46, 12, 26, - 46, 59, 46, 46, 46, 46, 47, 46, 62, 26, - 47, 26, 83, 83, 62, 83, 47, 61, 63, 26, - 64, 66, 28, 67, 26, 26, 47, 28, 28, 47, - 28, 47, 47, 47, 47, 31, 47, 32, 28, 68, - 82, 28, 91, 33, 28, 28, 28, 93, 28, 34, - 94, 31, 98, 32, 31, 35, 32, 101, 31, 33, - 32, 31, 33, 32, 43, 34, 33, 103, 34, 33, - 43, 35, 34, 69, 35, 34, 115, 120, 35, nil, - 43, 35, nil, 43, nil, nil, nil, 43, 43, 69, - 43, nil, 69, 51, 51, nil, 69, nil, nil, 69, - nil, nil, 52, 52, nil, 51, 51, 51, nil, 51, - nil, nil, nil, 51, 52, 52, 52, nil, 52, nil, - 56, 56, 52, 56, 97, 97, nil, 97, 99, 99, - nil, 99, 100, 100, nil, 100, nil, 56, nil, 102, - 102, 97, 102, nil, 0, 99, 0, 0, 20, 100, - 20, 20, nil, nil, nil, 0, 102, 0, 0, 20, - nil, 20, 20, 29, nil, 29, 29, nil, nil, nil, - nil, nil, nil, nil, 29, nil, 29, 29 ] + 3, 1, 11, 64, 3, 70, 14, 17, 21, 3, + 24, 9, 62, 1, 17, 9, 70, 62, 25, 3, + 9, 64, 3, 30, 3, 3, 3, 3, 49, 3, + 9, 16, 30, 9, 50, 9, 9, 9, 9, 12, + 9, 53, 30, 12, 60, 60, 16, 30, 12, 16, + 46, 54, 58, 16, 46, 59, 16, 61, 12, 46, + 63, 12, 65, 12, 12, 12, 12, 66, 12, 46, + 31, 32, 46, 67, 46, 46, 46, 46, 47, 46, + 82, 82, 47, 82, 81, 31, 32, 47, 31, 32, + 26, 90, 31, 32, 92, 31, 32, 47, 93, 26, + 47, 26, 47, 47, 47, 47, 28, 47, 33, 26, + 28, 28, 97, 28, 26, 26, 100, 34, 113, 35, + nil, 28, nil, 33, 28, nil, 33, 28, 28, 28, + 33, 28, 34, 33, 35, 34, nil, 35, 43, 34, + 68, 35, 34, 43, 35, nil, nil, nil, nil, nil, + nil, 27, 27, 43, 27, 68, 43, nil, 68, nil, + 43, 43, 68, 43, 27, 68, 51, 51, 27, nil, + nil, 52, 52, nil, nil, nil, nil, 51, 51, 51, + nil, 51, 52, 52, 52, 51, 52, nil, 56, 56, + 52, 56, 96, 96, nil, 96, 98, 98, nil, 98, + 99, 99, nil, 99, nil, 56, nil, 101, 101, 96, + 101, nil, nil, 98, nil, nil, nil, 99, 0, 0, + 0, nil, nil, nil, 101, 20, 20, 20, 0, nil, + 0, 0, 29, 29, 29, 20, nil, 20, 20, nil, + nil, nil, 29, nil, 29, 29 ] racc_action_pointer = [ - 207, 1, nil, -2, nil, nil, nil, nil, nil, 7, - nil, 3, 36, nil, -5, nil, 27, -8, nil, nil, - 211, 3, nil, nil, -15, 9, 67, -6, 86, 226, - 19, 99, 101, 107, 113, 119, nil, nil, nil, nil, - nil, nil, nil, 128, nil, nil, 45, 74, nil, 33, - 19, 160, 169, 27, 24, nil, 179, nil, 36, 47, - -1, 74, 71, 75, 78, 31, 67, 80, 85, 137, - nil, 30, nil, nil, nil, nil, nil, nil, nil, nil, - nil, nil, 84, 71, nil, nil, nil, nil, nil, nil, - nil, 86, nil, 105, 107, nil, nil, 183, 115, 187, - 191, 113, 198, 130, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, 132, nil, nil, nil, nil, - 133, nil, nil ] + 211, 1, nil, -2, nil, nil, nil, nil, nil, 9, + nil, 2, 37, nil, -5, nil, 25, -17, nil, nil, + 218, -3, nil, nil, -20, -12, 88, 141, 100, 225, + 21, 64, 65, 102, 111, 113, nil, nil, nil, nil, + nil, nil, nil, 132, nil, nil, 48, 76, nil, 17, + 4, 163, 168, 16, 21, nil, 178, nil, 29, 32, + 33, 45, 5, 48, -9, 39, 55, 50, 134, nil, + -7, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, 59, 70, nil, nil, nil, nil, nil, nil, nil, + 66, nil, 83, 86, nil, nil, 182, 105, 186, 190, + 103, 197, nil, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, 105, nil, nil, nil, nil, nil ] racc_action_default = [ - -82, -83, -2, -27, -4, -5, -6, -7, -8, -27, - -81, -83, -27, -3, -83, -10, -54, -12, -15, -16, - -20, -83, -22, -23, -83, -25, -27, -83, -27, -82, - -83, -60, -61, -62, -63, -64, -65, -17, 123, -1, - -9, -11, -53, -27, -13, -14, -27, -27, -21, -83, - -32, -69, -69, -83, -83, -33, -83, -34, -83, -83, - -43, -44, -45, -46, -83, -25, -83, -43, -83, -78, - -80, -83, -51, -52, -55, -56, -57, -58, -59, -18, - -19, -24, -83, -83, -70, -71, -72, -73, -74, -75, - -76, -83, -30, -83, -45, -35, -36, -83, -50, -83, - -83, -83, -83, -83, -37, -77, -79, -38, -28, -66, - -67, -68, -29, -31, -39, -83, -40, -41, -48, -42, - -83, -47, -49 ] + -81, -82, -2, -27, -4, -5, -6, -7, -8, -27, + -80, -82, -27, -3, -82, -10, -53, -12, -15, -16, + -20, -82, -22, -23, -82, -25, -27, -82, -27, -81, + -82, -59, -60, -61, -62, -63, -64, -17, 119, -1, + -9, -11, -52, -27, -13, -14, -27, -27, -21, -82, + -32, -68, -68, -82, -82, -33, -82, -34, -82, -82, + -43, -44, -45, -46, -25, -82, -43, -82, -77, -79, + -82, -50, -51, -54, -55, -56, -57, -58, -18, -19, + -24, -82, -82, -69, -70, -71, -72, -73, -74, -75, + -82, -30, -82, -45, -35, -36, -82, -49, -82, -82, + -82, -82, -37, -76, -78, -38, -28, -65, -66, -67, + -29, -31, -39, -82, -40, -41, -48, -42, -47 ] racc_goto_table = [ - 58, 42, 13, 1, 46, 52, 19, 69, 37, 72, - 41, 39, 19, 70, 44, 19, 74, 75, 76, 77, - 78, 45, 69, 82, 91, 54, 51, 59, 70, 55, - nil, nil, 71, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, 79, 80, nil, nil, 19, - 19, nil, nil, nil, 106, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - 114, nil, 116, 117, nil, 119 ] + 58, 42, 13, 1, 46, 52, 19, 68, 37, 71, + 41, 39, 19, 69, 44, 19, 73, 74, 75, 76, + 77, 45, 68, 81, 90, 54, 51, 59, 69, 55, + nil, nil, 70, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, 78, 79, nil, nil, 19, + 19, nil, nil, 104, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, nil, 112, + nil, 114, 115, nil, 117 ] racc_goto_check = [ 20, 14, 2, 1, 5, 11, 7, 9, 2, 11, @@ -138,9 +136,9 @@ def unescape_css_string(str) 14, 13, 9, 19, 19, 17, 18, 21, 14, 7, nil, nil, 1, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, 2, 2, nil, nil, 7, - 7, nil, nil, nil, 14, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - 20, nil, 20, 20, nil, 20 ] + 7, nil, nil, 14, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, nil, 20, + nil, 20, 20, nil, 20 ] racc_goto_pointer = [ nil, 3, -1, nil, nil, -16, nil, 3, nil, -21, @@ -150,96 +148,95 @@ def unescape_css_string(str) racc_goto_default = [ nil, nil, nil, 2, 3, 9, 15, 63, 20, 16, nil, 17, 34, 33, 18, 32, 22, 24, nil, nil, - 66, nil, 31, 35, 83, 68 ] + 65, nil, 31, 35, 82, 67 ] racc_reduce_table = [ 0, 0, :racc_error, - 3, 34, :_reduce_1, - 1, 34, :_reduce_2, - 2, 34, :_reduce_3, - 1, 38, :_reduce_4, - 1, 38, :_reduce_5, - 1, 38, :_reduce_6, - 1, 38, :_reduce_7, - 1, 38, :_reduce_8, - 2, 39, :_reduce_9, - 1, 40, :_reduce_10, - 2, 41, :_reduce_11, + 3, 33, :_reduce_1, + 1, 33, :_reduce_2, + 2, 33, :_reduce_3, + 1, 37, :_reduce_4, + 1, 37, :_reduce_5, + 1, 37, :_reduce_6, + 1, 37, :_reduce_7, + 1, 37, :_reduce_8, + 2, 38, :_reduce_9, + 1, 39, :_reduce_10, + 2, 40, :_reduce_11, + 1, 40, :_reduce_none, + 2, 40, :_reduce_13, + 2, 40, :_reduce_14, + 1, 40, :_reduce_15, + 1, 40, :_reduce_none, + 2, 35, :_reduce_17, + 3, 34, :_reduce_18, + 3, 34, :_reduce_19, + 1, 34, :_reduce_none, + 2, 47, :_reduce_21, 1, 41, :_reduce_none, - 2, 41, :_reduce_13, - 2, 41, :_reduce_14, - 1, 41, :_reduce_15, - 1, 41, :_reduce_none, - 2, 36, :_reduce_17, - 3, 35, :_reduce_18, - 3, 35, :_reduce_19, - 1, 35, :_reduce_none, - 2, 48, :_reduce_21, + 1, 41, :_reduce_23, + 3, 48, :_reduce_24, + 1, 48, :_reduce_25, + 1, 49, :_reduce_26, + 0, 49, :_reduce_none, + 4, 45, :_reduce_28, + 4, 45, :_reduce_29, + 3, 45, :_reduce_30, + 3, 50, :_reduce_31, + 1, 50, :_reduce_32, + 1, 50, :_reduce_none, + 2, 43, :_reduce_34, + 3, 43, :_reduce_35, + 3, 43, :_reduce_36, + 3, 43, :_reduce_37, + 3, 43, :_reduce_38, + 3, 52, :_reduce_39, + 3, 52, :_reduce_40, + 3, 52, :_reduce_41, + 3, 52, :_reduce_42, + 1, 52, :_reduce_none, + 1, 52, :_reduce_none, + 1, 52, :_reduce_45, + 1, 52, :_reduce_none, + 4, 53, :_reduce_47, + 3, 53, :_reduce_48, + 2, 53, :_reduce_49, + 2, 44, :_reduce_50, + 2, 44, :_reduce_51, 1, 42, :_reduce_none, - 1, 42, :_reduce_23, - 3, 49, :_reduce_24, - 1, 49, :_reduce_25, - 1, 50, :_reduce_26, - 0, 50, :_reduce_none, - 4, 46, :_reduce_28, - 4, 46, :_reduce_29, - 3, 46, :_reduce_30, - 3, 51, :_reduce_31, - 1, 51, :_reduce_32, - 1, 51, :_reduce_none, - 2, 44, :_reduce_34, - 3, 44, :_reduce_35, - 3, 44, :_reduce_36, - 3, 44, :_reduce_37, - 3, 44, :_reduce_38, - 3, 53, :_reduce_39, - 3, 53, :_reduce_40, - 3, 53, :_reduce_41, - 3, 53, :_reduce_42, - 1, 53, :_reduce_none, - 1, 53, :_reduce_none, - 1, 53, :_reduce_45, - 1, 53, :_reduce_none, - 4, 54, :_reduce_47, - 3, 54, :_reduce_48, - 4, 54, :_reduce_49, - 2, 54, :_reduce_50, - 2, 45, :_reduce_51, - 2, 45, :_reduce_52, - 1, 43, :_reduce_none, - 0, 43, :_reduce_none, - 2, 47, :_reduce_55, - 2, 47, :_reduce_56, - 2, 47, :_reduce_57, - 2, 47, :_reduce_58, - 2, 47, :_reduce_59, - 1, 47, :_reduce_none, - 1, 47, :_reduce_none, - 1, 47, :_reduce_none, - 1, 47, :_reduce_none, - 1, 47, :_reduce_none, - 1, 55, :_reduce_65, - 2, 52, :_reduce_66, - 2, 52, :_reduce_67, - 2, 52, :_reduce_68, - 0, 52, :_reduce_none, - 1, 57, :_reduce_70, - 1, 57, :_reduce_71, - 1, 57, :_reduce_72, - 1, 57, :_reduce_73, - 1, 57, :_reduce_74, - 1, 57, :_reduce_75, - 1, 57, :_reduce_76, - 3, 56, :_reduce_77, - 1, 58, :_reduce_none, - 2, 58, :_reduce_none, - 1, 58, :_reduce_none, - 1, 37, :_reduce_none, - 0, 37, :_reduce_none ] - -racc_reduce_n = 83 - -racc_shift_n = 123 + 0, 42, :_reduce_none, + 2, 46, :_reduce_54, + 2, 46, :_reduce_55, + 2, 46, :_reduce_56, + 2, 46, :_reduce_57, + 2, 46, :_reduce_58, + 1, 46, :_reduce_none, + 1, 46, :_reduce_none, + 1, 46, :_reduce_none, + 1, 46, :_reduce_none, + 1, 46, :_reduce_none, + 1, 54, :_reduce_64, + 2, 51, :_reduce_65, + 2, 51, :_reduce_66, + 2, 51, :_reduce_67, + 0, 51, :_reduce_none, + 1, 56, :_reduce_69, + 1, 56, :_reduce_70, + 1, 56, :_reduce_71, + 1, 56, :_reduce_72, + 1, 56, :_reduce_73, + 1, 56, :_reduce_74, + 1, 56, :_reduce_75, + 3, 55, :_reduce_76, + 1, 57, :_reduce_none, + 2, 57, :_reduce_none, + 1, 57, :_reduce_none, + 1, 36, :_reduce_none, + 0, 36, :_reduce_none ] + +racc_reduce_n = 82 + +racc_shift_n = 119 racc_token_table = { false => 0, @@ -250,33 +247,32 @@ def unescape_css_string(str) :LBRACE => 5, :HASH => 6, :PLUS => 7, - :MINUS => 8, - :GREATER => 9, - :S => 10, - :STRING => 11, - :IDENT => 12, - :COMMA => 13, - :NUMBER => 14, - :PREFIXMATCH => 15, - :SUFFIXMATCH => 16, - :SUBSTRINGMATCH => 17, - :TILDE => 18, - :NOT_EQUAL => 19, - :SLASH => 20, - :DOUBLESLASH => 21, - :NOT => 22, - :EQUAL => 23, - :RPAREN => 24, - :LSQUARE => 25, - :RSQUARE => 26, - :HAS => 27, - "@" => 28, - "." => 29, - "*" => 30, - "|" => 31, - ":" => 32 } - -racc_nt_base = 33 + :GREATER => 8, + :S => 9, + :STRING => 10, + :IDENT => 11, + :COMMA => 12, + :NUMBER => 13, + :PREFIXMATCH => 14, + :SUFFIXMATCH => 15, + :SUBSTRINGMATCH => 16, + :TILDE => 17, + :NOT_EQUAL => 18, + :SLASH => 19, + :DOUBLESLASH => 20, + :NOT => 21, + :EQUAL => 22, + :RPAREN => 23, + :LSQUARE => 24, + :RSQUARE => 25, + :HAS => 26, + "@" => 27, + "." => 28, + "*" => 29, + "|" => 30, + ":" => 31 } + +racc_nt_base = 32 racc_use_result_var = true @@ -306,7 +302,6 @@ def unescape_css_string(str) "LBRACE", "HASH", "PLUS", - "MINUS", "GREATER", "S", "STRING", @@ -605,21 +600,13 @@ def _reduce_47(val, _values, result) end def _reduce_48(val, _values, result) - # n+3 + # n+3, -n+3 if val[0] == 'n' val.unshift("1") result = Node.new(:NTH, val) - else - raise Racc::ParseError, "parse error on IDENT '#{val[0]}'" - end - - result -end - -def _reduce_49(val, _values, result) - # -n+3 - if val[1] == 'n' - val[0] = '-1' + elsif val[0] == '-n' + val[0] = 'n' + val.unshift("-1") result = Node.new(:NTH, val) else raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" @@ -628,7 +615,7 @@ def _reduce_49(val, _values, result) result end -def _reduce_50(val, _values, result) +def _reduce_49(val, _values, result) # 5n, -5n, 10n-1 n = val[1] if n[0, 2] == 'n-' @@ -648,20 +635,26 @@ def _reduce_50(val, _values, result) result end -def _reduce_51(val, _values, result) +def _reduce_50(val, _values, result) result = Node.new(:PSEUDO_CLASS, [val[1]]) result end -def _reduce_52(val, _values, result) +def _reduce_51(val, _values, result) result = Node.new(:PSEUDO_CLASS, [val[1]]) result end +# reduce 52 omitted + # reduce 53 omitted -# reduce 54 omitted +def _reduce_54(val, _values, result) + result = Node.new(:COMBINATOR, val) + + result +end def _reduce_55(val, _values, result) result = Node.new(:COMBINATOR, val) @@ -687,11 +680,7 @@ def _reduce_58(val, _values, result) result end -def _reduce_59(val, _values, result) - result = Node.new(:COMBINATOR, val) - - result -end +# reduce 59 omitted # reduce 60 omitted @@ -701,71 +690,71 @@ def _reduce_59(val, _values, result) # reduce 63 omitted -# reduce 64 omitted - -def _reduce_65(val, _values, result) +def _reduce_64(val, _values, result) result = Node.new(:ID, [unescape_css_identifier(val[0])]) result end -def _reduce_66(val, _values, result) +def _reduce_65(val, _values, result) result = [val[0], unescape_css_identifier(val[1])] result end -def _reduce_67(val, _values, result) +def _reduce_66(val, _values, result) result = [val[0], unescape_css_string(val[1])] result end -def _reduce_68(val, _values, result) +def _reduce_67(val, _values, result) result = [val[0], val[1]] result end -# reduce 69 omitted +# reduce 68 omitted -def _reduce_70(val, _values, result) +def _reduce_69(val, _values, result) result = :equal result end -def _reduce_71(val, _values, result) +def _reduce_70(val, _values, result) result = :prefix_match result end -def _reduce_72(val, _values, result) +def _reduce_71(val, _values, result) result = :suffix_match result end -def _reduce_73(val, _values, result) +def _reduce_72(val, _values, result) result = :substring_match result end -def _reduce_74(val, _values, result) +def _reduce_73(val, _values, result) result = :not_equal result end -def _reduce_75(val, _values, result) +def _reduce_74(val, _values, result) result = :includes result end -def _reduce_76(val, _values, result) +def _reduce_75(val, _values, result) result = :dash_match result end -def _reduce_77(val, _values, result) +def _reduce_76(val, _values, result) result = Node.new(:NOT, [val[1]]) result end +# reduce 77 omitted + # reduce 78 omitted # reduce 79 omitted @@ -774,8 +763,6 @@ def _reduce_77(val, _values, result) # reduce 81 omitted -# reduce 82 omitted - def _reduce_none(val, _values, result) val[0] end diff --git a/lib/nokogiri/css/parser.y b/lib/nokogiri/css/parser.y index e4f0969dfc..0431a479e8 100644 --- a/lib/nokogiri/css/parser.y +++ b/lib/nokogiri/css/parser.y @@ -1,6 +1,6 @@ class Nokogiri::CSS::Parser -token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS MINUS GREATER S STRING IDENT +token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS @@ -143,17 +143,13 @@ rule raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" end } - | IDENT PLUS NUMBER { # n+3 + | IDENT PLUS NUMBER { # n+3, -n+3 if val[0] == 'n' val.unshift("1") result = Node.new(:NTH, val) - else - raise Racc::ParseError, "parse error on IDENT '#{val[0]}'" - end - } - | MINUS IDENT PLUS NUMBER { # -n+3 - if val[1] == 'n' - val[0] = '-1' + elsif val[0] == '-n' + val[0] = 'n' + val.unshift("-1") result = Node.new(:NTH, val) else raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" diff --git a/lib/nokogiri/css/tokenizer.rb b/lib/nokogiri/css/tokenizer.rb index f23c5bbd5e..c548d9fa50 100644 --- a/lib/nokogiri/css/tokenizer.rb +++ b/lib/nokogiri/css/tokenizer.rb @@ -63,10 +63,10 @@ def _next_token when (text = @ss.scan(/has\([\s]*/)) action { [:HAS, text] } - when (text = @ss.scan(/([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*\([\s]*/)) + when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*\([\s]*/)) action { [:FUNCTION, text] } - when (text = @ss.scan(/([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*/)) + when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*/)) action { [:IDENT, text] } when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))+/)) @@ -120,9 +120,6 @@ def _next_token when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/)) action { [:NUMBER, text] } - when (text = @ss.scan(/[\s]*\-[\s]*/)) - action { [:MINUS, text] } - when (text = @ss.scan(/[\s]*\/\/[\s]*/)) action { [:DOUBLESLASH, text] } diff --git a/lib/nokogiri/css/tokenizer.rex b/lib/nokogiri/css/tokenizer.rex index b9f15810c3..52500a5591 100644 --- a/lib/nokogiri/css/tokenizer.rex +++ b/lib/nokogiri/css/tokenizer.rex @@ -14,6 +14,7 @@ macro nmchar ([_A-Za-z0-9-]|{nonascii}|{escape}) nmstart ([_A-Za-z]|{nonascii}|{escape}) name {nmstart}{nmchar}* + ident -?{name} charref {nmchar}+ string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(? Date: Tue, 11 Jun 2024 09:49:30 -0400 Subject: [PATCH 2/2] fix: CSS pseudo-classes that are invalid XPath function names raise This is an alternative to #3197 (which was reverted) in which the exception is raised from the XPathVisitor and not the CSS Parser. Semantically, this is valid CSS, and so the Parser shouldn't raise. But it is invalid XPath, and so it's the responsibility of the Visitor to raise. Closes #3193. --- CHANGELOG.md | 3 ++- lib/nokogiri/css/xpath_visitor.rb | 9 +++++++++ test/css/test_xpath_visitor.rb | 6 ++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fa29e7c2c..e3011cc861 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,8 +15,9 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA ### Fixed -* [CRuby] libgumbo (the HTML5 parser) treats reaching max-depth as EOF. This addresses a class of issues when the parser is interrupted in this way. [#3121] @stevecheckoway * `Node#clone`, `NodeSet#clone`, and `*::Document#clone` all properly copy the metaclass of the original as expected. Previously, `#clone` had been aliased to `#dup` for these classes (since v1.3.0 in 2009). [#316, #3117] @flavorjones +* CSS queries for pseudo-selectors that cannot be translated into XPath expressions now raise a more descriptive `Nokogiri::CSS::SyntaxError` when they are parsed. Previously, an invalid XPath expression was evaluated and a hard-to-understand XPath error was raised by the query engine. [#3193] @flavorjones +* [CRuby] libgumbo (the HTML5 parser) treats reaching max-depth as EOF. This addresses a class of issues when the parser is interrupted in this way. [#3121] @stevecheckoway * [CRuby] Update node GC lifecycle to avoid a potential memory leak with fragments in libxml 2.13.0 caused by changes in `xmlAddChild`. [#3156] @flavorjones diff --git a/lib/nokogiri/css/xpath_visitor.rb b/lib/nokogiri/css/xpath_visitor.rb index 44521f2fbe..b4d33e5113 100644 --- a/lib/nokogiri/css/xpath_visitor.rb +++ b/lib/nokogiri/css/xpath_visitor.rb @@ -128,6 +128,8 @@ def visit_function(node) is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)" ".#{"//" unless is_direct}#{node.value[1].accept(self)}" else + validate_xpath_function_name(node.value.first) + # xpath function call, let's marshal those arguments args = ["."] args += node.value[1..-1].map do |n| @@ -207,6 +209,7 @@ def visit_pseudo_class(node) when "parent" then "node()" when "root" then "not(parent::*)" else + validate_xpath_function_name(node.value.first) "nokogiri:#{node.value.first}(.)" end end @@ -270,6 +273,12 @@ def accept(node) private + def validate_xpath_function_name(name) + if name.start_with?("-") + raise Nokogiri::CSS::SyntaxError, "Invalid XPath function name '#{name}'" + end + end + def html5_element_name_needs_namespace_handling(node) # if this is the wildcard selector "*", use it as normal node.value.first != "*" && diff --git a/test/css/test_xpath_visitor.rb b/test/css/test_xpath_visitor.rb index dec81369c4..aeaf3c1dc3 100644 --- a/test/css/test_xpath_visitor.rb +++ b/test/css/test_xpath_visitor.rb @@ -369,6 +369,12 @@ def assert_xpath(expecteds, asts) assert_xpath("//*[not(@id='foo')]", parser.parse(":not(#foo)")) assert_xpath("//*[count(preceding-sibling::*)=0]", parser.parse(":first-child")) end + + it "raises an exception for pseudo-classes that are not XPath Names" do + # see https://github.com/sparklemotion/nokogiri/issues/3193 + assert_raises(Nokogiri::CSS::SyntaxError) { Nokogiri::CSS.xpath_for("div:-moz-drag-over") } + assert_raises(Nokogiri::CSS::SyntaxError) { Nokogiri::CSS.xpath_for("div:-moz-drag-over()") } + end end describe "combinators" do