Skip to content

Commit 200e590

Browse files
eskip: improve lexer performance 2 (#2870)
* eskip: add BenchmarkParse Add a benchmark for parsing 10000 routes. Signed-off-by: Alexander Yastrebov <[email protected]> * eskip: improve lexer performance 2 * use plain ascii instead of unicode package * use loop for scanSymbol * call scan functions directly instead of selectScanner to aid inlining ``` goos: linux goarch: amd64 pkg: github.com/zalando/skipper/eskip │ HEAD~1 │ HEAD │ │ sec/op │ sec/op vs base │ ParsePredicates-8 9.637µ ± 11% 8.894µ ± 4% -7.71% (p=0.001 n=10) Parse-8 329.1m ± 4% 272.7m ± 2% -17.15% (p=0.000 n=10) geomean 1.781m 1.557m -12.56% │ HEAD~1 │ HEAD │ │ B/op │ B/op vs base │ ParsePredicates-8 2.008Ki ± 0% 2.008Ki ± 0% ~ (p=1.000 n=10) Parse-8 49.94Mi ± 0% 49.94Mi ± 0% ~ (p=0.926 n=10) geomean 320.4Ki 320.4Ki -0.00% │ HEAD~1 │ HEAD │ │ allocs/op │ allocs/op vs base │ ParsePredicates-8 33.00 ± 0% 33.00 ± 0% ~ (p=1.000 n=10) ¹ Parse-8 1.100M ± 0% 1.100M ± 0% ~ (p=0.367 n=10) geomean 6.025k 6.025k +0.00% ¹ all samples are equal ``` See previous #2755 Signed-off-by: Alexander Yastrebov <[email protected]> --------- Signed-off-by: Alexander Yastrebov <[email protected]>
1 parent e63fe2c commit 200e590

File tree

3 files changed

+72
-41
lines changed

3 files changed

+72
-41
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,5 @@ opentracingplugin/build
2323
build/
2424
skptesting/lorem.html
2525
.vscode/*
26+
*.test
27+

eskip/eskip_test.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package eskip
33
import (
44
"reflect"
55
"regexp"
6+
"strings"
67
"testing"
78

89
"github.com/google/go-cmp/cmp"
@@ -849,6 +850,39 @@ func BenchmarkParsePredicates(b *testing.B) {
849850
}
850851
}
851852

853+
func BenchmarkParse(b *testing.B) {
854+
doc := strings.Repeat(`xxxx_xx__xxxxx__xxx_xxxxxxxx_xxxxxxxxxx_xxxxxxx_xxxxxxx_xxxxxxx_xxxxx__xxx__40_0:
855+
Path("/xxxxxxxxx/:xxxxxxxx_xx/xxxxxxxx-xxxxxxxxxx-xxxxxxxxx")
856+
&& Host("^(xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-18[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-19[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-20[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-21[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxxxxxxxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx[.]xxxxxxxxxxx[.]xxx[.]?(:[0-9]+)?)$")
857+
&& Host("^(xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-21[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?)$")
858+
&& Weight(4)
859+
&& Method("GET")
860+
&& JWTPayloadAllKV("xxxxx://xxxxxxxx.xxxxxxx.xxx/xxxxx", "xxxxx")
861+
&& Header("X-Xxxxxxxxx-Xxxxx", "xxxxx")
862+
-> disableAccessLog(2, 3, 40, 500)
863+
-> fifo(1000, 100, "10s")
864+
-> apiUsageMonitoring("{\"xxx_xx\":\"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\",\"xxxxxxxxxxx_xx\":\"xxx-xxxxxxxx-xxxxxxxxxx\",\"xxxx_xxxxxxxxx\":[\"/xxxxxxxxx/{xxxxxxxx_xx}/xxxxxxxx-xxxxxxxxxx\",\"/xxxxxxxxx/{xxxxxxxx_xx}/xxxxxxxx-xxxxxxxxxx-xxxxxxx\",\"/xxxxxxxxx/{xxxxxxxx_xx}/xxxxxxxx-xxxxxxxxxx-xxxxxxxxx\"]}")
865+
-> oauthTokeninfoAnyKV("xxxxx", "/xxxxxxxxx")
866+
-> unverifiedAuditLog("xxxxx://xxxxxxxx.xxxxxxx.xxx/xxxxxxx-xx")
867+
-> oauthTokeninfoAllScope("xxx")
868+
-> flowId("reuse")
869+
-> forwardToken("X-XxxxxXxxx-Xxxxxxx", "xxx", "xxxxx", "xxxxx")
870+
-> stateBagToTag("xxxx-xxxx", "xxxxxx.xxx")
871+
-> <powerOfRandomNChoices, "http://1.2.1.1:8080", "http://1.2.1.2:8080", "http://1.2.1.3:8080", "http://1.2.1.4:8080", "http://1.2.1.5:8080">;
872+
`, 10_000)
873+
874+
_, err := Parse(doc)
875+
if err != nil {
876+
b.Fatal(err)
877+
}
878+
879+
b.ReportAllocs()
880+
b.ResetTimer()
881+
for i := 0; i < b.N; i++ {
882+
_, _ = Parse(doc)
883+
}
884+
}
885+
852886
var stringSink string
853887

854888
func BenchmarkRouteString(b *testing.B) {

eskip/lexer.go

Lines changed: 36 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"errors"
55
"fmt"
66
"strings"
7-
"unicode"
87
)
98

109
type token struct {
@@ -14,14 +13,6 @@ type token struct {
1413

1514
type charPredicate func(byte) bool
1615

17-
type scanner interface {
18-
scan(string) (token, string, error)
19-
}
20-
21-
type scannerFunc func(string) (token, string, error)
22-
23-
func (sf scannerFunc) scan(code string) (token, string, error) { return sf(code) }
24-
2516
type eskipLex struct {
2617
code string
2718
lastToken string
@@ -78,11 +69,11 @@ func (l *eskipLex) init(code string) {
7869

7970
func isNewline(c byte) bool { return c == newlineChar }
8071
func isUnderscore(c byte) bool { return c == underscore }
81-
func isAlpha(c byte) bool { return unicode.IsLetter(rune(c)) }
82-
func isDigit(c byte) bool { return unicode.IsDigit(rune(c)) }
83-
func isSymbolChar(c byte) bool { return isUnderscore(c) || isAlpha(c) || isDigit(c) }
72+
func isAlpha(c byte) bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') }
73+
func isDigit(c byte) bool { return c >= '0' && c <= '9' }
74+
func isSymbolChar(c byte) bool { return isAlpha(c) || isDigit(c) || isUnderscore(c) }
8475
func isDecimalChar(c byte) bool { return c == decimalChar }
85-
func isNumberChar(c byte) bool { return isDecimalChar(c) || isDigit(c) }
76+
func isNumberChar(c byte) bool { return isDigit(c) || isDecimalChar(c) }
8677

8778
func scanWhile(code string, p charPredicate) (string, string) {
8879
for i := 0; i < len(code); i++ {
@@ -277,74 +268,78 @@ func scanNumber(code string) (t token, rest string, err error) {
277268

278269
func scanSymbol(code string) (t token, rest string, err error) {
279270
t.id = symbol
280-
t.val, rest = scanWhile(code, isSymbolChar)
271+
for i := 0; i < len(code); i++ {
272+
if !isSymbolChar(code[i]) {
273+
t.val, rest = code[0:i], code[i:]
274+
return
275+
}
276+
}
277+
t.val, rest = code, ""
281278
return
282279
}
283280

284-
func selectScanner(code string) scanner {
281+
func scan(code string) (token, string, error) {
285282
switch code[0] {
286283
case ',':
287-
return commaToken
284+
return commaToken.scan(code)
288285
case ')':
289-
return closeparenToken
286+
return closeparenToken.scan(code)
290287
case '(':
291-
return openparenToken
288+
return openparenToken.scan(code)
292289
case ':':
293-
return colonToken
290+
return colonToken.scan(code)
294291
case ';':
295-
return semicolonToken
292+
return semicolonToken.scan(code)
296293
case '>':
297-
return closearrowToken
294+
return closearrowToken.scan(code)
298295
case '*':
299-
return anyToken
296+
return anyToken.scan(code)
300297
case '&':
301298
if len(code) >= 2 && code[1] == '&' {
302-
return andToken
299+
return andToken.scan(code)
303300
}
304301
case '-':
305302
if len(code) >= 2 && code[1] == '>' {
306-
return arrowToken
303+
return arrowToken.scan(code)
307304
}
308305
case '/':
309-
return scannerFunc(scanRegexpOrComment)
306+
return scanRegexpOrComment(code)
310307
case '"':
311-
return scannerFunc(scanDoubleQuote)
308+
return scanDoubleQuote(code)
312309
case '`':
313-
return scannerFunc(scanBacktick)
310+
return scanBacktick(code)
314311
case '<':
315312
for _, tok := range openarrowPrefixedTokens {
316313
if strings.HasPrefix(code, tok.val) {
317-
return tok
314+
return tok.scan(code)
318315
}
319316
}
320-
return openarrowToken
317+
return openarrowToken.scan(code)
321318
}
322319

323320
if isNumberChar(code[0]) {
324-
return scannerFunc(scanNumber)
321+
return scanNumber(code)
325322
}
326323

327324
if isAlpha(code[0]) || isUnderscore(code[0]) {
328-
return scannerFunc(scanSymbol)
325+
return scanSymbol(code)
329326
}
330327

331-
return nil
328+
return token{}, "", unexpectedToken
332329
}
333330

334-
func (l *eskipLex) next() (t token, err error) {
331+
func (l *eskipLex) next() (token, error) {
335332
l.code = scanWhitespace(l.code)
336333
if len(l.code) == 0 {
337-
err = eof
338-
return
334+
return token{}, eof
339335
}
340336

341-
s := selectScanner(l.code)
342-
if s == nil {
343-
err = unexpectedToken
344-
return
337+
t, rest, err := scan(l.code)
338+
if err == unexpectedToken {
339+
return token{}, err
345340
}
341+
l.code = rest
346342

347-
t, l.code, err = s.scan(l.code)
348343
if err == void {
349344
return l.next()
350345
}
@@ -353,7 +348,7 @@ func (l *eskipLex) next() (t token, err error) {
353348
l.lastToken = t.val
354349
}
355350

356-
return
351+
return t, err
357352
}
358353

359354
func (l *eskipLex) Lex(lval *eskipSymType) int {

0 commit comments

Comments
 (0)