Skip to content

Commit

Permalink
refactor css/tokenizer.rb
Browse files Browse the repository at this point in the history
- use parens more intentionally in the rexical macros
- rename the CSS ID token macro from `name` to `charref` to match XPath CharRef
- extract a `name` macro that matches XPath Name
  • Loading branch information
flavorjones committed May 24, 2024
1 parent 0aa46e1 commit 3d3238c
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
8 changes: 4 additions & 4 deletions lib/nokogiri/css/tokenizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,13 @@ def _next_token
when (text = @ss.scan(/has\([\s]*/))
action { [:HAS, text] }

when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*\([\s]*/))
action { [:FUNCTION, text] }

when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*/))
action { [:IDENT, text] }

when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))+/))
action { [:HASH, text] }

when (text = @ss.scan(/[\s]*~=[\s]*/))
Expand Down Expand Up @@ -132,7 +132,7 @@ def _next_token
when (text = @ss.scan(/[\s]+/))
action { [:S, text] }

when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
when (text = @ss.scan(/("([^\n\r\f"]|(\n|\r\n|\r|\f)|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|(\n|\r\n|\r|\f)|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*(?<!\\)(?:\\{2})*')/))
action { [:STRING, text] }

when (text = @ss.scan(/./))
Expand Down
17 changes: 9 additions & 8 deletions lib/nokogiri/css/tokenizer.rex
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,21 @@ module CSS
class Tokenizer

macro
nl \n|\r\n|\r|\f
nl (\n|\r\n|\r|\f)
w [\s]*
nonascii [^\0-\177]
num -?([0-9]+|[0-9]*\.[0-9]+)
unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?

escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
nmstart [_A-Za-z]|{nonascii}|{escape}
ident -?({nmstart})({nmchar})*
name ({nmchar})+
escape ({unicode}|\\[^\n\r\f0-9A-Fa-f])
nmchar ([_A-Za-z0-9-]|{nonascii}|{escape})
nmstart ([_A-Za-z]|{nonascii}|{escape})
name {nmstart}{nmchar}*
ident -?{name}
charref {nmchar}+
string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
string {string1}|{string2}
string ({string1}|{string2})
rule
Expand All @@ -26,7 +27,7 @@ rule
has\({w} { [:HAS, text] }
{ident}\({w} { [:FUNCTION, text] }
{ident} { [:IDENT, text] }
\#{name} { [:HASH, text] }
\#{charref} { [:HASH, text] }
{w}~={w} { [:INCLUDES, text] }
{w}\|={w} { [:DASHMATCH, text] }
{w}\^={w} { [:PREFIXMATCH, text] }
Expand Down

0 comments on commit 3d3238c

Please sign in to comment.