Skip to content

Commit d67cb15

Browse files
committed
Fix bug with parsing unfinished quoted strings
1 parent f6f2be3 commit d67cb15

File tree

4 files changed

+249
-190
lines changed

4 files changed

+249
-190
lines changed

selector/lexer/token.go

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -106,15 +106,6 @@ func (t Token) IsKind(kind ...TokenKind) bool {
106106
return slices.Contains(kind, t.Kind)
107107
}
108108

109-
type PositionalError struct {
110-
Pos int
111-
Err error
112-
}
113-
114-
func (e *PositionalError) Error() string {
115-
return fmt.Sprintf("%v. Position %d.", e.Pos, e.Err)
116-
}
117-
118109
type UnexpectedTokenError struct {
119110
Pos int
120111
Token rune
@@ -123,3 +114,11 @@ type UnexpectedTokenError struct {
123114
func (e *UnexpectedTokenError) Error() string {
124115
return fmt.Sprintf("failed to tokenize: unexpected token: %s at position %d.", string(e.Token), e.Pos)
125116
}
117+
118+
type UnexpectedEOFError struct {
119+
Pos int
120+
}
121+
122+
func (e *UnexpectedEOFError) Error() string {
123+
return fmt.Sprintf("failed to tokenize: unexpected EOF at position %d.", e.Pos)
124+
}

selector/lexer/tokenize.go

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -160,25 +160,35 @@ func (p *Tokenizer) parseCurRune() (Token, error) {
160160
pos := p.i
161161
buf := make([]rune, 0)
162162
pos++
163-
var escaped bool
163+
foundCloseRune := false
164164
for pos < p.srcLen {
165-
if p.src[pos] == p.src[p.i] && !escaped {
165+
if p.src[pos] == p.src[p.i] {
166+
foundCloseRune = true
166167
break
167168
}
168-
if escaped {
169-
escaped = false
170-
buf = append(buf, rune(p.src[pos]))
171-
pos++
172-
continue
173-
}
174169
if p.src[pos] == '\\' {
175170
pos++
176-
escaped = true
171+
buf = append(buf, rune(p.src[pos]))
172+
pos++
177173
continue
178174
}
179175
buf = append(buf, rune(p.src[pos]))
180176
pos++
181177
}
178+
if !foundCloseRune {
179+
// We didn't find a closing quote.
180+
if pos < p.srcLen {
181+
// This shouldn't be possible.
182+
return Token{}, &UnexpectedTokenError{
183+
Pos: p.i,
184+
Token: rune(p.src[pos]),
185+
}
186+
}
187+
// This can happen if the selector ends before the closing quote.
188+
return Token{}, &UnexpectedEOFError{
189+
Pos: pos,
190+
}
191+
}
182192
res := NewToken(String, string(buf), p.i, pos+1-p.i)
183193
return res, nil
184194
default:

selector/lexer/tokenize_test.go

Lines changed: 113 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,135 @@
1-
package lexer
1+
package lexer_test
22

3-
import "testing"
3+
import (
4+
"errors"
5+
"testing"
46

5-
func TestTokenizer_Parse(t *testing.T) {
6-
type testCase struct {
7-
in string
8-
out []TokenKind
7+
"github.com/tomwright/dasel/v3/selector/lexer"
8+
)
9+
10+
type testCase struct {
11+
in string
12+
out []lexer.TokenKind
13+
}
14+
15+
func (tc testCase) run(t *testing.T) {
16+
tok := lexer.NewTokenizer(tc.in)
17+
tokens, err := tok.Tokenize()
18+
if err != nil {
19+
t.Fatalf("unexpected error: %v", err)
20+
}
21+
if len(tokens) != len(tc.out) {
22+
t.Fatalf("unexpected number of tokens: %d", len(tokens))
23+
}
24+
for i := range tokens {
25+
if tokens[i].Kind != tc.out[i] {
26+
t.Errorf("unexpected token kind at position %d: exp %v, got %v", i, tc.out[i], tokens[i].Kind)
27+
return
28+
}
929
}
30+
}
31+
32+
type errTestCase struct {
33+
in string
34+
match func(error) bool
35+
}
1036

11-
runTest := func(tc testCase) func(t *testing.T) {
12-
return func(t *testing.T) {
13-
tok := NewTokenizer(tc.in)
14-
tokens, err := tok.Tokenize()
15-
if err != nil {
16-
t.Fatalf("unexpected error: %v", err)
17-
}
18-
if len(tokens) != len(tc.out) {
19-
t.Fatalf("unexpected number of tokens: %d", len(tokens))
20-
}
21-
for i := range tokens {
22-
if tokens[i].Kind != tc.out[i] {
23-
t.Errorf("unexpected token kind at position %d: exp %v, got %v", i, tc.out[i], tokens[i].Kind)
24-
return
25-
}
26-
}
37+
func (tc errTestCase) run(t *testing.T) {
38+
tok := lexer.NewTokenizer(tc.in)
39+
tokens, err := tok.Tokenize()
40+
if !tc.match(err) {
41+
t.Errorf("unexpected error, got %v", err)
42+
}
43+
if tokens != nil {
44+
t.Errorf("unexpected tokens: %v", tokens)
45+
}
46+
}
47+
48+
func matchUnexpectedError(r rune, p int) func(error) bool {
49+
return func(err error) bool {
50+
var e *lexer.UnexpectedTokenError
51+
if !errors.As(err, &e) {
52+
return false
2753
}
54+
55+
return e.Token == r && e.Pos == p
2856
}
57+
}
58+
59+
func matchUnexpectedEOFError(p int) func(error) bool {
60+
return func(err error) bool {
61+
var e *lexer.UnexpectedEOFError
62+
if !errors.As(err, &e) {
63+
return false
64+
}
2965

30-
t.Run("variables", runTest(testCase{
66+
return e.Pos == p
67+
}
68+
}
69+
70+
func TestTokenizer_Parse(t *testing.T) {
71+
t.Run("variables", testCase{
3172
in: "$foo $bar123 $baz $",
32-
out: []TokenKind{
33-
Variable,
34-
Variable,
35-
Variable,
36-
Dollar,
73+
out: []lexer.TokenKind{
74+
lexer.Variable,
75+
lexer.Variable,
76+
lexer.Variable,
77+
lexer.Dollar,
3778
},
38-
}))
79+
}.run)
3980

40-
t.Run("if", runTest(testCase{
81+
t.Run("if", testCase{
4182
in: `if elseif else`,
42-
out: []TokenKind{
43-
If,
44-
ElseIf,
45-
Else,
83+
out: []lexer.TokenKind{
84+
lexer.If,
85+
lexer.ElseIf,
86+
lexer.Else,
4687
},
47-
}))
88+
}.run)
4889

49-
t.Run("regex", runTest(testCase{
90+
t.Run("regex", testCase{
5091
in: `r/asd/ r/hello there/`,
51-
out: []TokenKind{
52-
RegexPattern,
53-
RegexPattern,
92+
out: []lexer.TokenKind{
93+
lexer.RegexPattern,
94+
lexer.RegexPattern,
5495
},
55-
}))
96+
}.run)
5697

57-
t.Run("sort by", runTest(testCase{
98+
t.Run("sort by", testCase{
5899
in: `sortBy(foo, asc)`,
59-
out: []TokenKind{
60-
SortBy,
61-
OpenParen,
62-
Symbol,
63-
Comma,
64-
Asc,
65-
CloseParen,
100+
out: []lexer.TokenKind{
101+
lexer.SortBy,
102+
lexer.OpenParen,
103+
lexer.Symbol,
104+
lexer.Comma,
105+
lexer.Asc,
106+
lexer.CloseParen,
66107
},
67-
}))
108+
}.run)
68109

69-
t.Run("everything", runTest(testCase{
110+
t.Run("everything", testCase{
70111
in: "foo.bar.baz[1] != 42.123 || foo.bar.baz['hello'] == 42 && x == 'a\\'b' + false true . .... asd... $name null",
71-
out: []TokenKind{
72-
Symbol, Dot, Symbol, Dot, Symbol, OpenBracket, Number, CloseBracket, NotEqual, Number,
73-
Or,
74-
Symbol, Dot, Symbol, Dot, Symbol, OpenBracket, String, CloseBracket, Equal, Number,
75-
And,
76-
Symbol, Equal, String,
77-
Plus, Bool, Bool,
78-
Dot, Spread, Dot,
79-
Symbol, Spread,
80-
Variable, Null,
112+
out: []lexer.TokenKind{
113+
lexer.Symbol, lexer.Dot, lexer.Symbol, lexer.Dot, lexer.Symbol, lexer.OpenBracket, lexer.Number, lexer.CloseBracket, lexer.NotEqual, lexer.Number,
114+
lexer.Or,
115+
lexer.Symbol, lexer.Dot, lexer.Symbol, lexer.Dot, lexer.Symbol, lexer.OpenBracket, lexer.String, lexer.CloseBracket, lexer.Equal, lexer.Number,
116+
lexer.And,
117+
lexer.Symbol, lexer.Equal, lexer.String,
118+
lexer.Plus, lexer.Bool, lexer.Bool,
119+
lexer.Dot, lexer.Spread, lexer.Dot,
120+
lexer.Symbol, lexer.Spread,
121+
lexer.Variable, lexer.Null,
81122
},
82-
}))
123+
}.run)
124+
125+
t.Run("unhappy", func(t *testing.T) {
126+
t.Run("unfinished double quote", errTestCase{
127+
in: `"hello`,
128+
match: matchUnexpectedEOFError(6),
129+
}.run)
130+
t.Run("unfinished single quote", errTestCase{
131+
in: `'hello`,
132+
match: matchUnexpectedEOFError(6),
133+
}.run)
134+
})
83135
}

0 commit comments

Comments
 (0)