Skip to content

Commit 80a6c50

Browse files
committed
improve spaces check
1 parent 591d543 commit 80a6c50

File tree

8 files changed

+40
-10
lines changed

8 files changed

+40
-10
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ jobs:
66
strategy:
77
fail-fast: false
88
matrix:
9-
go-version: [1.15.x]
9+
go-version: [1.20.x]
1010
platform: [ubuntu-latest]
1111
runs-on: ${{ matrix.platform }}
1212
steps:

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
module github.com/wmentor/tokens
22

3-
go 1.15
3+
go 1.20
44

5-
require github.com/wmentor/tbuf v1.0.0 // indirect
5+
require github.com/wmentor/tbuf v1.0.1

go.sum

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
1-
github.com/wmentor/tbuf v1.0.0 h1:KHfiIdOTWor7a/5dSoLovxgGuipLAAU1X4+U3jzdIZ4=
2-
github.com/wmentor/tbuf v1.0.0/go.mod h1:YvYY3BMph/UVPSIMbQoraxgr7+7DCAvYSSJHZk2gsBQ=
1+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
2+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
3+
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
4+
github.com/wmentor/tbuf v1.0.1 h1:IonHpWwR0Wyh3Jfu0AbGSqzVDzUZ1zU61ML5F1CdBno=
5+
github.com/wmentor/tbuf v1.0.1/go.mod h1:1lO+hvrkqqjEcR74vrNfBL3jg0NnpGHDWHeFxRsk7js=
6+
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

runes/const.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// Copyright (c) 2023, Mikhail Kirillov <mikkirillov@yandex.ru>
2+
13
package runes
24

35
const (
@@ -47,4 +49,8 @@ const (
4749
TRADE rune = '™'
4850
UML rune = '¨'
4951
YEN rune = '¥'
52+
ZWSP rune = '\u200B'
53+
ZWNBSP rune = '\uFEFF'
54+
ZWJ rune = '\u200D'
55+
ZWNJ rune = '\u200C'
5056
)

tokenizer.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// Copyright (c) 2023, Mikhail Kirillov <mikkirillov@yandex.ru>
2+
13
package tokens
24

35
import (
@@ -7,6 +9,7 @@ import (
79
"unicode"
810

911
buffer "github.com/wmentor/tbuf"
12+
1013
"github.com/wmentor/tokens/runes"
1114
)
1215

@@ -176,7 +179,7 @@ func (t *Tokenizer) onRune(r rune) {
176179

177180
func (t *Tokenizer) state0(r rune) {
178181
switch {
179-
case unicode.IsSpace(r):
182+
case isSpace(r):
180183
t.mode = 0
181184

182185
case t.isAlNum(r):
@@ -235,7 +238,7 @@ func (t *Tokenizer) state1(r rune) {
235238
case t.isAlNum(r):
236239
t.mkr1.WriteRune(r)
237240

238-
case unicode.IsSpace(r):
241+
case isSpace(r):
239242
t.onToken(t.mkr1.String())
240243
t.mode = 0
241244

@@ -268,7 +271,7 @@ func (t *Tokenizer) state2(r rune) {
268271
t.onToken(string(r))
269272
t.mode = 0
270273

271-
case unicode.IsSpace(r):
274+
case isSpace(r):
272275
t.onToken(t.mkr1.String())
273276
t.onToken(string(t.prevRune))
274277
t.mode = 0
@@ -305,7 +308,7 @@ func (t *Tokenizer) state4(r rune) {
305308
}
306309

307310
func (t *Tokenizer) state5(r rune) {
308-
if unicode.IsSpace(r) {
311+
if isSpace(r) {
309312
t.onToken(t.mkr1.String())
310313
t.mode = 0
311314
} else {
@@ -323,7 +326,7 @@ func (t *Tokenizer) state6(r rune) {
323326
t.mkr1.Reset()
324327
t.mkr1.WriteRune('#')
325328

326-
case unicode.IsSpace(r):
329+
case isSpace(r):
327330
t.onToken(t.mkr1.String())
328331
t.mode = 0
329332

tokenizer_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// Copyright (c) 2023, Mikhail Kirillov <mikkirillov@yandex.ru>
2+
13
package tokens_test
24

35
import (

util.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright (c) 2023, Mikhail Kirillov <mikkirillov@yandex.ru>
2+
3+
package tokens
4+
5+
import (
6+
"unicode"
7+
8+
"github.com/wmentor/tokens/runes"
9+
)
10+
11+
func isSpace(r rune) bool {
12+
return unicode.IsSpace(r) || r == runes.ZWSP || r == runes.ZWNBSP || r == runes.ZWJ || r == runes.ZWNJ
13+
}

vars.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// Copyright (c) 2023, Mikhail Kirillov <mikkirillov@yandex.ru>
2+
13
package tokens
24

35
import (

0 commit comments

Comments
 (0)