From d3a60cb5a28e5cda3d1a88fb2b1ae08f2d1bcf91 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 20 May 2024 16:35:27 -0400 Subject: [PATCH] fix: raise CSS::SyntaxError if a pseudo-class is not an XPath Name Some pseudo-classes cannot be converted into an XPath function name, and libxml2 will raise an Nokogiri::XML::XPath::SyntaxError at query-time: nokogiri/xml/searchable.rb:238:in `evaluate': ERROR: Invalid expression: //*:div[nokogiri:-moz-drag-over(.)] (Nokogiri::XML::XPath::SyntaxError) This change moves the error from query-time to parse-time, in the hopes that this is more rescuable (and the error is more descriptive): nokogiri/css/parser_extras.rb:86:in `on_error': unexpected '-' after ':' (Nokogiri::CSS::SyntaxError) Closes #3193 --- CHANGELOG.md | 3 +- lib/nokogiri/css/parser.rb | 463 +++++++++++++++++---------------- lib/nokogiri/css/parser.y | 14 +- lib/nokogiri/css/tokenizer.rb | 7 +- lib/nokogiri/css/tokenizer.rex | 6 +- test/css/test_tokenizer.rb | 3 +- test/css/test_xpath_visitor.rb | 5 + 7 files changed, 264 insertions(+), 237 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fa29e7c2c..e4216a54f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,8 +15,9 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA ### Fixed -* [CRuby] libgumbo (the HTML5 parser) treats reaching max-depth as EOF. This addresses a class of issues when the parser is interrupted in this way. [#3121] @stevecheckoway * `Node#clone`, `NodeSet#clone`, and `*::Document#clone` all properly copy the metaclass of the original as expected. Previously, `#clone` had been aliased to `#dup` for these classes (since v1.3.0 in 2009). [#316, #3117] @flavorjones +* CSS queries for pseudo-selectors that cannot be transpiled into XPath queries now raise a more descriptive `Nokogiri::CSS::SyntaxError` when they are parsed. Previously, an invalid XPath query was created and a hard-to-understand XPath error was being raised by the query engine. [#3197] @flavorjones +* [CRuby] libgumbo (the HTML5 parser) treats reaching max-depth as EOF. This addresses a class of issues when the parser is interrupted in this way. [#3121] @stevecheckoway * [CRuby] Update node GC lifecycle to avoid a potential memory leak with fragments in libxml 2.13.0 caused by changes in `xmlAddChild`. [#3156] @flavorjones diff --git a/lib/nokogiri/css/parser.rb b/lib/nokogiri/css/parser.rb index dfbfb3c1a0..a896b34fb2 100644 --- a/lib/nokogiri/css/parser.rb +++ b/lib/nokogiri/css/parser.rb @@ -39,96 +39,98 @@ def unescape_css_string(str) ##### State transition tables begin ### racc_action_table = [ - 27, 11, 38, 99, 36, 12, 40, 26, 48, 25, - 49, 27, 100, 12, 30, 36, 105, 99, -26, 28, - 25, -26, 26, 27, 29, 14, 21, 23, 80, 30, - 28, 36, 72, 26, -26, 29, 14, 21, 23, 27, - 30, 91, 56, 36, 97, 96, 43, 29, 25, 26, - 27, 92, 94, 21, 36, 95, 30, 98, 28, 25, - 101, 26, 102, 29, 14, 21, 23, 96, 30, 28, - 36, 36, 26, 103, 29, 14, 21, 23, 27, 30, - 108, 107, 36, 109, 106, 43, 43, 25, 26, 26, - 27, 110, 21, 21, 111, 30, 30, 28, 99, 50, - 26, 53, 29, 14, 21, 23, 36, 30, 36, 56, - 61, 64, 113, 66, 29, 14, 116, 36, 118, 36, - nil, 43, nil, 43, 26, nil, 26, 14, 21, 23, - 21, 30, 43, 30, 43, 26, nil, 26, 36, 21, - 36, 21, 30, 25, 30, nil, nil, nil, nil, nil, - nil, 61, 62, 43, 60, 43, 26, nil, 26, nil, - 21, 23, 21, 30, 57, 30, 88, 89, 14, nil, - nil, 88, 89, nil, nil, nil, nil, 84, 85, 86, - nil, 87, 84, 85, 86, 83, 87, nil, 61, 93, - 83, 66, 61, 93, nil, 66, 61, 93, nil, 66, - 61, 93, nil, 66, nil, 14, nil, 61, 93, 14, - 66, nil, nil, 14, nil, nil, nil, 14, 4, 5, - 10, nil, nil, nil, 14, 4, 5, 47, 6, nil, - 8, 7, 4, 5, 10, 6, nil, 8, 7, nil, - nil, nil, 6, nil, 8, 7 ] + 27, 11, 64, 38, 36, 61, 62, 40, 60, 27, + 25, 98, 97, 36, 12, 48, 49, 26, 57, 25, + 28, 27, 14, 26, 30, 29, 14, 21, 23, 28, + 30, 73, 26, 36, 29, 14, 21, 23, 27, 30, + -26, 56, 36, 12, 100, 81, 29, 27, 25, 43, + -26, 36, 26, 92, 107, 93, 21, 25, 28, 30, + 95, 26, -26, 29, 14, 21, 23, 28, 30, 27, + 26, 96, 29, 14, 21, 23, 27, 30, 101, 50, + 36, 53, 110, 109, 100, 111, 25, 99, 102, 56, + 103, 104, 36, 97, 29, 14, 28, 61, 65, 26, + 67, 29, 14, 21, 23, 36, 30, 36, 43, 105, + 108, 26, 112, 36, 14, 21, 23, 113, 30, 36, + 100, 43, 115, 43, 26, 36, 26, 118, 21, 43, + 21, 30, 26, 30, 36, 43, 21, 120, 26, 30, + 25, 43, 21, 36, 26, 30, 121, 122, 21, nil, + 43, 30, nil, 26, nil, nil, nil, 21, 23, 43, + 30, nil, 26, 89, 90, nil, 21, nil, nil, 30, + nil, nil, 89, 90, nil, 85, 86, 87, nil, 88, + nil, nil, nil, 84, 85, 86, 87, nil, 88, nil, + 61, 94, 84, 67, 61, 94, nil, 67, 61, 94, + nil, 67, 61, 94, nil, 67, nil, 14, nil, 61, + 94, 14, 67, nil, 4, 14, 5, 10, 4, 14, + 5, 47, nil, nil, nil, 6, 14, 8, 7, 6, + nil, 8, 7, 4, nil, 5, 10, nil, nil, nil, + nil, nil, nil, nil, 6, nil, 8, 7 ] racc_action_check = [ - 3, 1, 11, 64, 3, 70, 14, 17, 21, 3, - 24, 9, 62, 1, 17, 9, 70, 62, 25, 3, - 9, 64, 3, 30, 3, 3, 3, 3, 49, 3, - 9, 16, 30, 9, 50, 9, 9, 9, 9, 12, - 9, 53, 30, 12, 60, 60, 16, 30, 12, 16, - 46, 54, 58, 16, 46, 59, 16, 61, 12, 46, - 63, 12, 65, 12, 12, 12, 12, 66, 12, 46, - 31, 32, 46, 67, 46, 46, 46, 46, 47, 46, - 82, 82, 47, 82, 81, 31, 32, 47, 31, 32, - 26, 90, 31, 32, 92, 31, 32, 47, 93, 26, - 47, 26, 47, 47, 47, 47, 28, 47, 33, 26, - 28, 28, 97, 28, 26, 26, 100, 34, 113, 35, - nil, 28, nil, 33, 28, nil, 33, 28, 28, 28, - 33, 28, 34, 33, 35, 34, nil, 35, 43, 34, - 68, 35, 34, 43, 35, nil, nil, nil, nil, nil, - nil, 27, 27, 43, 27, 68, 43, nil, 68, nil, - 43, 43, 68, 43, 27, 68, 51, 51, 27, nil, - nil, 52, 52, nil, nil, nil, nil, 51, 51, 51, - nil, 51, 52, 52, 52, 51, 52, nil, 56, 56, - 52, 56, 96, 96, nil, 96, 98, 98, nil, 98, - 99, 99, nil, 99, nil, 56, nil, 101, 101, 96, - 101, nil, nil, 98, nil, nil, nil, 99, 0, 0, - 0, nil, nil, nil, 101, 20, 20, 20, 0, nil, - 0, 0, 29, 29, 29, 20, nil, 20, 20, nil, - nil, nil, 29, nil, 29, 29 ] + 3, 1, 27, 11, 3, 27, 27, 14, 27, 9, + 3, 60, 60, 9, 1, 21, 24, 17, 27, 9, + 3, 30, 27, 3, 17, 3, 3, 3, 3, 9, + 3, 30, 9, 16, 9, 9, 9, 9, 12, 9, + 25, 30, 12, 71, 65, 49, 30, 46, 12, 16, + 50, 46, 16, 53, 71, 54, 16, 46, 12, 16, + 58, 12, 65, 12, 12, 12, 12, 46, 12, 26, + 46, 59, 46, 46, 46, 46, 47, 46, 62, 26, + 47, 26, 83, 83, 62, 83, 47, 61, 63, 26, + 64, 66, 28, 67, 26, 26, 47, 28, 28, 47, + 28, 47, 47, 47, 47, 31, 47, 32, 28, 68, + 82, 28, 91, 33, 28, 28, 28, 93, 28, 34, + 94, 31, 98, 32, 31, 35, 32, 101, 31, 33, + 32, 31, 33, 32, 43, 34, 33, 103, 34, 33, + 43, 35, 34, 69, 35, 34, 115, 120, 35, nil, + 43, 35, nil, 43, nil, nil, nil, 43, 43, 69, + 43, nil, 69, 51, 51, nil, 69, nil, nil, 69, + nil, nil, 52, 52, nil, 51, 51, 51, nil, 51, + nil, nil, nil, 51, 52, 52, 52, nil, 52, nil, + 56, 56, 52, 56, 97, 97, nil, 97, 99, 99, + nil, 99, 100, 100, nil, 100, nil, 56, nil, 102, + 102, 97, 102, nil, 0, 99, 0, 0, 20, 100, + 20, 20, nil, nil, nil, 0, 102, 0, 0, 20, + nil, 20, 20, 29, nil, 29, 29, nil, nil, nil, + nil, nil, nil, nil, 29, nil, 29, 29 ] racc_action_pointer = [ - 211, 1, nil, -2, nil, nil, nil, nil, nil, 9, - nil, 2, 37, nil, -5, nil, 25, -17, nil, nil, - 218, -3, nil, nil, -20, -12, 88, 141, 100, 225, - 21, 64, 65, 102, 111, 113, nil, nil, nil, nil, - nil, nil, nil, 132, nil, nil, 48, 76, nil, 17, - 4, 163, 168, 16, 21, nil, 178, nil, 29, 32, - 33, 45, 5, 48, -9, 39, 55, 50, 134, nil, - -7, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, 59, 70, nil, nil, nil, nil, nil, nil, nil, - 66, nil, 83, 86, nil, nil, 182, 105, 186, 190, - 103, 197, nil, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, 105, nil, nil, nil, nil, nil ] + 207, 1, nil, -2, nil, nil, nil, nil, nil, 7, + nil, 3, 36, nil, -5, nil, 27, -8, nil, nil, + 211, 3, nil, nil, -15, 9, 67, -6, 86, 226, + 19, 99, 101, 107, 113, 119, nil, nil, nil, nil, + nil, nil, nil, 128, nil, nil, 45, 74, nil, 33, + 19, 160, 169, 27, 24, nil, 179, nil, 36, 47, + -1, 74, 71, 75, 78, 31, 67, 80, 85, 137, + nil, 30, nil, nil, nil, nil, nil, nil, nil, nil, + nil, nil, 84, 71, nil, nil, nil, nil, nil, nil, + nil, 86, nil, 105, 107, nil, nil, 183, 115, 187, + 191, 113, 198, 130, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, 132, nil, nil, nil, nil, + 133, nil, nil ] racc_action_default = [ - -81, -82, -2, -27, -4, -5, -6, -7, -8, -27, - -80, -82, -27, -3, -82, -10, -53, -12, -15, -16, - -20, -82, -22, -23, -82, -25, -27, -82, -27, -81, - -82, -59, -60, -61, -62, -63, -64, -17, 119, -1, - -9, -11, -52, -27, -13, -14, -27, -27, -21, -82, - -32, -68, -68, -82, -82, -33, -82, -34, -82, -82, - -43, -44, -45, -46, -25, -82, -43, -82, -77, -79, - -82, -50, -51, -54, -55, -56, -57, -58, -18, -19, - -24, -82, -82, -69, -70, -71, -72, -73, -74, -75, - -82, -30, -82, -45, -35, -36, -82, -49, -82, -82, - -82, -82, -37, -76, -78, -38, -28, -65, -66, -67, - -29, -31, -39, -82, -40, -41, -48, -42, -47 ] + -82, -83, -2, -27, -4, -5, -6, -7, -8, -27, + -81, -83, -27, -3, -83, -10, -54, -12, -15, -16, + -20, -83, -22, -23, -83, -25, -27, -83, -27, -82, + -83, -60, -61, -62, -63, -64, -65, -17, 123, -1, + -9, -11, -53, -27, -13, -14, -27, -27, -21, -83, + -32, -69, -69, -83, -83, -33, -83, -34, -83, -83, + -43, -44, -45, -46, -83, -25, -83, -43, -83, -78, + -80, -83, -51, -52, -55, -56, -57, -58, -59, -18, + -19, -24, -83, -83, -70, -71, -72, -73, -74, -75, + -76, -83, -30, -83, -45, -35, -36, -83, -50, -83, + -83, -83, -83, -83, -37, -77, -79, -38, -28, -66, + -67, -68, -29, -31, -39, -83, -40, -41, -48, -42, + -83, -47, -49 ] racc_goto_table = [ - 58, 42, 13, 1, 46, 52, 19, 68, 37, 71, - 41, 39, 19, 69, 44, 19, 73, 74, 75, 76, - 77, 45, 68, 81, 90, 54, 51, 59, 69, 55, - nil, nil, 70, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, 78, 79, nil, nil, 19, - 19, nil, nil, 104, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, nil, nil, nil, nil, 112, - nil, 114, 115, nil, 117 ] + 58, 42, 13, 1, 46, 52, 19, 69, 37, 72, + 41, 39, 19, 70, 44, 19, 74, 75, 76, 77, + 78, 45, 69, 82, 91, 54, 51, 59, 70, 55, + nil, nil, 71, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, 79, 80, nil, nil, 19, + 19, nil, nil, nil, 106, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, + 114, nil, 116, 117, nil, 119 ] racc_goto_check = [ 20, 14, 2, 1, 5, 11, 7, 9, 2, 11, @@ -136,9 +138,9 @@ def unescape_css_string(str) 14, 13, 9, 19, 19, 17, 18, 21, 14, 7, nil, nil, 1, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, 2, 2, nil, nil, 7, - 7, nil, nil, 14, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, nil, nil, nil, nil, 20, - nil, 20, 20, nil, 20 ] + 7, nil, nil, nil, 14, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, + 20, nil, 20, 20, nil, 20 ] racc_goto_pointer = [ nil, 3, -1, nil, nil, -16, nil, 3, nil, -21, @@ -148,95 +150,96 @@ def unescape_css_string(str) racc_goto_default = [ nil, nil, nil, 2, 3, 9, 15, 63, 20, 16, nil, 17, 34, 33, 18, 32, 22, 24, nil, nil, - 65, nil, 31, 35, 82, 67 ] + 66, nil, 31, 35, 83, 68 ] racc_reduce_table = [ 0, 0, :racc_error, - 3, 33, :_reduce_1, - 1, 33, :_reduce_2, - 2, 33, :_reduce_3, - 1, 37, :_reduce_4, - 1, 37, :_reduce_5, - 1, 37, :_reduce_6, - 1, 37, :_reduce_7, - 1, 37, :_reduce_8, - 2, 38, :_reduce_9, - 1, 39, :_reduce_10, - 2, 40, :_reduce_11, - 1, 40, :_reduce_none, - 2, 40, :_reduce_13, - 2, 40, :_reduce_14, - 1, 40, :_reduce_15, - 1, 40, :_reduce_none, - 2, 35, :_reduce_17, - 3, 34, :_reduce_18, - 3, 34, :_reduce_19, - 1, 34, :_reduce_none, - 2, 47, :_reduce_21, + 3, 34, :_reduce_1, + 1, 34, :_reduce_2, + 2, 34, :_reduce_3, + 1, 38, :_reduce_4, + 1, 38, :_reduce_5, + 1, 38, :_reduce_6, + 1, 38, :_reduce_7, + 1, 38, :_reduce_8, + 2, 39, :_reduce_9, + 1, 40, :_reduce_10, + 2, 41, :_reduce_11, 1, 41, :_reduce_none, - 1, 41, :_reduce_23, - 3, 48, :_reduce_24, - 1, 48, :_reduce_25, - 1, 49, :_reduce_26, - 0, 49, :_reduce_none, - 4, 45, :_reduce_28, - 4, 45, :_reduce_29, - 3, 45, :_reduce_30, - 3, 50, :_reduce_31, - 1, 50, :_reduce_32, - 1, 50, :_reduce_none, - 2, 43, :_reduce_34, - 3, 43, :_reduce_35, - 3, 43, :_reduce_36, - 3, 43, :_reduce_37, - 3, 43, :_reduce_38, - 3, 52, :_reduce_39, - 3, 52, :_reduce_40, - 3, 52, :_reduce_41, - 3, 52, :_reduce_42, - 1, 52, :_reduce_none, - 1, 52, :_reduce_none, - 1, 52, :_reduce_45, - 1, 52, :_reduce_none, - 4, 53, :_reduce_47, - 3, 53, :_reduce_48, - 2, 53, :_reduce_49, - 2, 44, :_reduce_50, - 2, 44, :_reduce_51, + 2, 41, :_reduce_13, + 2, 41, :_reduce_14, + 1, 41, :_reduce_15, + 1, 41, :_reduce_none, + 2, 36, :_reduce_17, + 3, 35, :_reduce_18, + 3, 35, :_reduce_19, + 1, 35, :_reduce_none, + 2, 48, :_reduce_21, 1, 42, :_reduce_none, - 0, 42, :_reduce_none, - 2, 46, :_reduce_54, - 2, 46, :_reduce_55, - 2, 46, :_reduce_56, - 2, 46, :_reduce_57, - 2, 46, :_reduce_58, - 1, 46, :_reduce_none, - 1, 46, :_reduce_none, - 1, 46, :_reduce_none, - 1, 46, :_reduce_none, - 1, 46, :_reduce_none, - 1, 54, :_reduce_64, - 2, 51, :_reduce_65, - 2, 51, :_reduce_66, - 2, 51, :_reduce_67, - 0, 51, :_reduce_none, - 1, 56, :_reduce_69, - 1, 56, :_reduce_70, - 1, 56, :_reduce_71, - 1, 56, :_reduce_72, - 1, 56, :_reduce_73, - 1, 56, :_reduce_74, - 1, 56, :_reduce_75, - 3, 55, :_reduce_76, - 1, 57, :_reduce_none, - 2, 57, :_reduce_none, - 1, 57, :_reduce_none, - 1, 36, :_reduce_none, - 0, 36, :_reduce_none ] - -racc_reduce_n = 82 - -racc_shift_n = 119 + 1, 42, :_reduce_23, + 3, 49, :_reduce_24, + 1, 49, :_reduce_25, + 1, 50, :_reduce_26, + 0, 50, :_reduce_none, + 4, 46, :_reduce_28, + 4, 46, :_reduce_29, + 3, 46, :_reduce_30, + 3, 51, :_reduce_31, + 1, 51, :_reduce_32, + 1, 51, :_reduce_none, + 2, 44, :_reduce_34, + 3, 44, :_reduce_35, + 3, 44, :_reduce_36, + 3, 44, :_reduce_37, + 3, 44, :_reduce_38, + 3, 53, :_reduce_39, + 3, 53, :_reduce_40, + 3, 53, :_reduce_41, + 3, 53, :_reduce_42, + 1, 53, :_reduce_none, + 1, 53, :_reduce_none, + 1, 53, :_reduce_45, + 1, 53, :_reduce_none, + 4, 54, :_reduce_47, + 3, 54, :_reduce_48, + 4, 54, :_reduce_49, + 2, 54, :_reduce_50, + 2, 45, :_reduce_51, + 2, 45, :_reduce_52, + 1, 43, :_reduce_none, + 0, 43, :_reduce_none, + 2, 47, :_reduce_55, + 2, 47, :_reduce_56, + 2, 47, :_reduce_57, + 2, 47, :_reduce_58, + 2, 47, :_reduce_59, + 1, 47, :_reduce_none, + 1, 47, :_reduce_none, + 1, 47, :_reduce_none, + 1, 47, :_reduce_none, + 1, 47, :_reduce_none, + 1, 55, :_reduce_65, + 2, 52, :_reduce_66, + 2, 52, :_reduce_67, + 2, 52, :_reduce_68, + 0, 52, :_reduce_none, + 1, 57, :_reduce_70, + 1, 57, :_reduce_71, + 1, 57, :_reduce_72, + 1, 57, :_reduce_73, + 1, 57, :_reduce_74, + 1, 57, :_reduce_75, + 1, 57, :_reduce_76, + 3, 56, :_reduce_77, + 1, 58, :_reduce_none, + 2, 58, :_reduce_none, + 1, 58, :_reduce_none, + 1, 37, :_reduce_none, + 0, 37, :_reduce_none ] + +racc_reduce_n = 83 + +racc_shift_n = 123 racc_token_table = { false => 0, @@ -247,32 +250,33 @@ def unescape_css_string(str) :LBRACE => 5, :HASH => 6, :PLUS => 7, - :GREATER => 8, - :S => 9, - :STRING => 10, - :IDENT => 11, - :COMMA => 12, - :NUMBER => 13, - :PREFIXMATCH => 14, - :SUFFIXMATCH => 15, - :SUBSTRINGMATCH => 16, - :TILDE => 17, - :NOT_EQUAL => 18, - :SLASH => 19, - :DOUBLESLASH => 20, - :NOT => 21, - :EQUAL => 22, - :RPAREN => 23, - :LSQUARE => 24, - :RSQUARE => 25, - :HAS => 26, - "@" => 27, - "." => 28, - "*" => 29, - "|" => 30, - ":" => 31 } - -racc_nt_base = 32 + :MINUS => 8, + :GREATER => 9, + :S => 10, + :STRING => 11, + :IDENT => 12, + :COMMA => 13, + :NUMBER => 14, + :PREFIXMATCH => 15, + :SUFFIXMATCH => 16, + :SUBSTRINGMATCH => 17, + :TILDE => 18, + :NOT_EQUAL => 19, + :SLASH => 20, + :DOUBLESLASH => 21, + :NOT => 22, + :EQUAL => 23, + :RPAREN => 24, + :LSQUARE => 25, + :RSQUARE => 26, + :HAS => 27, + "@" => 28, + "." => 29, + "*" => 30, + "|" => 31, + ":" => 32 } + +racc_nt_base = 33 racc_use_result_var = true @@ -302,6 +306,7 @@ def unescape_css_string(str) "LBRACE", "HASH", "PLUS", + "MINUS", "GREATER", "S", "STRING", @@ -600,13 +605,21 @@ def _reduce_47(val, _values, result) end def _reduce_48(val, _values, result) - # n+3, -n+3 + # n+3 if val[0] == 'n' val.unshift("1") result = Node.new(:NTH, val) - elsif val[0] == '-n' - val[0] = 'n' - val.unshift("-1") + else + raise Racc::ParseError, "parse error on IDENT '#{val[0]}'" + end + + result +end + +def _reduce_49(val, _values, result) + # -n+3 + if val[1] == 'n' + val[0] = '-1' result = Node.new(:NTH, val) else raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" @@ -615,7 +628,7 @@ def _reduce_48(val, _values, result) result end -def _reduce_49(val, _values, result) +def _reduce_50(val, _values, result) # 5n, -5n, 10n-1 n = val[1] if n[0, 2] == 'n-' @@ -635,26 +648,20 @@ def _reduce_49(val, _values, result) result end -def _reduce_50(val, _values, result) +def _reduce_51(val, _values, result) result = Node.new(:PSEUDO_CLASS, [val[1]]) result end -def _reduce_51(val, _values, result) +def _reduce_52(val, _values, result) result = Node.new(:PSEUDO_CLASS, [val[1]]) result end -# reduce 52 omitted - # reduce 53 omitted -def _reduce_54(val, _values, result) - result = Node.new(:COMBINATOR, val) - - result -end +# reduce 54 omitted def _reduce_55(val, _values, result) result = Node.new(:COMBINATOR, val) @@ -680,7 +687,11 @@ def _reduce_58(val, _values, result) result end -# reduce 59 omitted +def _reduce_59(val, _values, result) + result = Node.new(:COMBINATOR, val) + + result +end # reduce 60 omitted @@ -690,71 +701,71 @@ def _reduce_58(val, _values, result) # reduce 63 omitted -def _reduce_64(val, _values, result) +# reduce 64 omitted + +def _reduce_65(val, _values, result) result = Node.new(:ID, [unescape_css_identifier(val[0])]) result end -def _reduce_65(val, _values, result) +def _reduce_66(val, _values, result) result = [val[0], unescape_css_identifier(val[1])] result end -def _reduce_66(val, _values, result) +def _reduce_67(val, _values, result) result = [val[0], unescape_css_string(val[1])] result end -def _reduce_67(val, _values, result) +def _reduce_68(val, _values, result) result = [val[0], val[1]] result end -# reduce 68 omitted +# reduce 69 omitted -def _reduce_69(val, _values, result) +def _reduce_70(val, _values, result) result = :equal result end -def _reduce_70(val, _values, result) +def _reduce_71(val, _values, result) result = :prefix_match result end -def _reduce_71(val, _values, result) +def _reduce_72(val, _values, result) result = :suffix_match result end -def _reduce_72(val, _values, result) +def _reduce_73(val, _values, result) result = :substring_match result end -def _reduce_73(val, _values, result) +def _reduce_74(val, _values, result) result = :not_equal result end -def _reduce_74(val, _values, result) +def _reduce_75(val, _values, result) result = :includes result end -def _reduce_75(val, _values, result) +def _reduce_76(val, _values, result) result = :dash_match result end -def _reduce_76(val, _values, result) +def _reduce_77(val, _values, result) result = Node.new(:NOT, [val[1]]) result end -# reduce 77 omitted - # reduce 78 omitted # reduce 79 omitted @@ -763,6 +774,8 @@ def _reduce_76(val, _values, result) # reduce 81 omitted +# reduce 82 omitted + def _reduce_none(val, _values, result) val[0] end diff --git a/lib/nokogiri/css/parser.y b/lib/nokogiri/css/parser.y index 0431a479e8..e4f0969dfc 100644 --- a/lib/nokogiri/css/parser.y +++ b/lib/nokogiri/css/parser.y @@ -1,6 +1,6 @@ class Nokogiri::CSS::Parser -token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT +token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS MINUS GREATER S STRING IDENT token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS @@ -143,13 +143,17 @@ rule raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" end } - | IDENT PLUS NUMBER { # n+3, -n+3 + | IDENT PLUS NUMBER { # n+3 if val[0] == 'n' val.unshift("1") result = Node.new(:NTH, val) - elsif val[0] == '-n' - val[0] = 'n' - val.unshift("-1") + else + raise Racc::ParseError, "parse error on IDENT '#{val[0]}'" + end + } + | MINUS IDENT PLUS NUMBER { # -n+3 + if val[1] == 'n' + val[0] = '-1' result = Node.new(:NTH, val) else raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" diff --git a/lib/nokogiri/css/tokenizer.rb b/lib/nokogiri/css/tokenizer.rb index c548d9fa50..f23c5bbd5e 100644 --- a/lib/nokogiri/css/tokenizer.rb +++ b/lib/nokogiri/css/tokenizer.rb @@ -63,10 +63,10 @@ def _next_token when (text = @ss.scan(/has\([\s]*/)) action { [:HAS, text] } - when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*\([\s]*/)) + when (text = @ss.scan(/([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*\([\s]*/)) action { [:FUNCTION, text] } - when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*/)) + when (text = @ss.scan(/([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*/)) action { [:IDENT, text] } when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))+/)) @@ -120,6 +120,9 @@ def _next_token when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/)) action { [:NUMBER, text] } + when (text = @ss.scan(/[\s]*\-[\s]*/)) + action { [:MINUS, text] } + when (text = @ss.scan(/[\s]*\/\/[\s]*/)) action { [:DOUBLESLASH, text] } diff --git a/lib/nokogiri/css/tokenizer.rex b/lib/nokogiri/css/tokenizer.rex index 52500a5591..b9f15810c3 100644 --- a/lib/nokogiri/css/tokenizer.rex +++ b/lib/nokogiri/css/tokenizer.rex @@ -14,7 +14,6 @@ macro nmchar ([_A-Za-z0-9-]|{nonascii}|{escape}) nmstart ([_A-Za-z]|{nonascii}|{escape}) name {nmstart}{nmchar}* - ident -?{name} charref {nmchar}+ string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?