Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't output extra whitespace in YAML multiline #993

Merged
merged 1 commit into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lexers/embedded/yaml.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
<bygroups>
<token type="Punctuation"/>
<token type="LiteralStringDoc"/>
<token type="TextWhitespace"/>
<token type="Ignore"/>
</bygroups>
</rule>
<rule pattern="(false|False|FALSE|true|True|TRUE|null|Off|off|yes|Yes|YES|OFF|On|ON|no|No|on|NO|n|N|Y|y)\b">
Expand Down
40 changes: 20 additions & 20 deletions lexers/testdata/yaml.expected
Original file line number Diff line number Diff line change
Expand Up @@ -191,69 +191,69 @@
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|"},
{"type":"LiteralStringDoc","value":"\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved.\n\n The literal continues until de-dented, and the leading indentation is\n stripped.\n\n Any lines that are 'more-indented' keep the rest of their indentation -\n these lines will be indented by 4 spaces."},
{"type":"TextWhitespace","value":" \n"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameTag","value":"folded_style"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"\u003e"},
{"type":"LiteralStringDoc","value":"\n This entire block of text will be the value of 'folded_style', but this\n time, all newlines will be replaced with a single space.\n\n Blank lines, like above, are converted to a newline character.\n\n 'More-indented' lines keep their newlines, too -\n this text will appear over two lines."},
{"type":"TextWhitespace","value":" \n"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameTag","value":"literal_block_with_strip_chomping"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|-"},
{"type":"LiteralStringDoc","value":"\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved and the strip chomping indicator.\n\n The literal continues until de-dented, and the leading indentation is\n stripped.\n\n Any lines that are 'more-indented' keep the rest of their indentation -\n these lines will be indented by 4 spaces."},
{"type":"TextWhitespace","value":" \n"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameTag","value":"literal_block_with_keep_chomping"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|+"},
{"type":"LiteralStringDoc","value":"\n This entire block of text will be the value of the 'literal_block' key,\n with line breaks being preserved and the keep chomping indicator.\n\n The literal continues until de-dented, and the leading indentation is\n stripped."},
{"type":"TextWhitespace","value":" \n\n"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"NameTag","value":"a"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|"},
{"type":"LiteralStringDoc","value":"\n multiline literal\n line 2"},
{"type":"TextWhitespace","value":" \n"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameTag","value":"b"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"\u003e"},
{"type":"LiteralStringDoc","value":"\n multiline: folded\n line 2"},
{"type":"TextWhitespace","value":" \n"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameTag","value":"c"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|-"},
{"type":"LiteralStringDoc","value":"\n multiline # literal strip\n line 2"},
{"type":"TextWhitespace","value":" \n"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameTag","value":"d"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"\u003e-"},
{"type":"LiteralStringDoc","value":"\n multiline folded strip\n line 2: test\n\n # not a comment\n indented by 1"},
{"type":"TextWhitespace","value":" \n"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameTag","value":"e"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|+"},
{"type":"LiteralStringDoc","value":"\n multiline literal keep\n line: 2"},
{"type":"TextWhitespace","value":" \n"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Comment","value":"# this is a comment"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameTag","value":"f"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"\u003e+"},
{"type":"LiteralStringDoc","value":"\n multiline folded keep one space\n line 2"},
{"type":"TextWhitespace","value":" \n"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameTag","value":"g"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|"},
{"type":"LiteralStringDoc","value":"\n multiline literal with only one line"},
{"type":"TextWhitespace","value":" \n"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameTag","value":"h"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
Expand All @@ -267,45 +267,45 @@
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|"},
{"type":"LiteralStringDoc","value":"\n multiline literal\n line 2"},
{"type":"TextWhitespace","value":" \n "},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameTag","value":"b"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"\u003e"},
{"type":"LiteralStringDoc","value":"\n multiline: folded\n line 2"},
{"type":"TextWhitespace","value":" \n "},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameTag","value":"c"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|-"},
{"type":"LiteralStringDoc","value":"\n multiline # literal strip\n line 2 6 leading spaces"},
{"type":"TextWhitespace","value":" \n\n "},
{"type":"TextWhitespace","value":"\n\n "},
{"type":"NameTag","value":"d"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"\u003e-"},
{"type":"LiteralStringDoc","value":"\n multiline folded strip\n line 2: test\n # not a comment"},
{"type":"TextWhitespace","value":" \n "},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameTag","value":"e"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|+"},
{"type":"LiteralStringDoc","value":"\n multiline literal keep\n line: 2"},
{"type":"TextWhitespace","value":" \n "},
{"type":"TextWhitespace","value":"\n "},
{"type":"Comment","value":"# this is a comment"},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameTag","value":"f"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"\u003e+"},
{"type":"LiteralStringDoc","value":"\n multiline folded keep\n line 2"},
{"type":"TextWhitespace","value":" \n "},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameTag","value":"g"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|"},
{"type":"LiteralStringDoc","value":"\n multiline literal with only one line"},
{"type":"TextWhitespace","value":" \n "},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameTag","value":"h"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
Expand Down Expand Up @@ -355,7 +355,7 @@
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|"},
{"type":"LiteralStringDoc","value":"\n This is a key\n that has multiple lines"},
{"type":"TextWhitespace","value":" \n"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Literal","value":"and this is its value"},
Expand Down Expand Up @@ -622,7 +622,7 @@
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"|"},
{"type":"LiteralStringDoc","value":"\n R0lGODlhDAAMAIQAAP//9/X17unp5WZmZgAAAOfn515eXvPz7Y6OjuDg4J+fn5\n OTk6enp56enmlpaWNjY6Ojo4SEhP/++f/++f/++f/++f/++f/++f/++f/++f/+\n +f/++f/++f/++f/++f/++SH+Dk1hZGUgd2l0aCBHSU1QACwAAAAADAAMAAAFLC\n AgjoEwnuNAFOhpEMTRiggcz4BNJHrv/zCFcLiwMWYNG84BwwEeECcgggoBADs="},
{"type":"TextWhitespace","value":" \n\n"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"Comment","value":"# YAML also has a set type, which looks like this:"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameTag","value":"set"},
Expand Down
6 changes: 6 additions & 0 deletions regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit
for len(l.iteratorStack) > 0 {
n := len(l.iteratorStack) - 1
t := l.iteratorStack[n]()
if t.Type == Ignore {
continue
}
if t == EOF {
l.iteratorStack = l.iteratorStack[:n]
continue
Expand Down Expand Up @@ -243,6 +246,9 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit
for len(l.iteratorStack) > 0 {
n := len(l.iteratorStack) - 1
t := l.iteratorStack[n]()
if t.Type == Ignore {
continue
}
if t == EOF {
l.iteratorStack = l.iteratorStack[:n]
continue
Expand Down
11 changes: 11 additions & 0 deletions regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,14 @@ func TestByGroupNames(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, []Token{{Error, `abc=123`}}, it.Tokens())
}

func TestIgnoreToken(t *testing.T) {
l := Coalesce(mustNewLexer(t, &Config{EnsureNL: true}, Rules{ // nolint: forbidigo
"root": {
{`(\s*)(\w+)(?:\1)(\n)`, ByGroups(Ignore, Keyword, Whitespace), nil},
},
}))
it, err := l.Tokenise(nil, ` hello `)
assert.NoError(t, err)
assert.Equal(t, []Token{{Keyword, "hello"}, {TextWhitespace, "\n"}}, it.Tokens())
}
Loading