Skip to content

Commit 18a78cb

Browse files
committed
chore: adds memoize implementation for regexes.
Currently we create and allocate memory for every regex we compile, however there are cases where you compile the same regex over and over e.g. corazawaf/coraza-caddy#76. Here we implement the memoize pattern to be able to reuse the regex and reduce the memory consumption.
1 parent 493534d commit 18a78cb

File tree

12 files changed

+287
-15
lines changed

12 files changed

+287
-15
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ require (
2424
github.com/petar-dambovaliev/aho-corasick v0.0.0-20211021192214-5ab2d9280aa9
2525
github.com/tidwall/gjson v1.14.4
2626
golang.org/x/net v0.11.0
27+
golang.org/x/sync v0.1.0
2728
rsc.io/binaryregexp v0.2.0
2829
)
2930

go.sum

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ golang.org/x/net v0.11.0/go.mod h1:2L/ixqYpgIVXmeoSA/4Lu7BzTG4KIyPIryS4IsOd1oQ=
3737
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
3838
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
3939
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
40+
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
4041
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
4142
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
4243
golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

internal/corazawaf/rule.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"github.com/corazawaf/coraza/v3/experimental/plugins/macro"
1616
"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
1717
"github.com/corazawaf/coraza/v3/internal/corazarules"
18+
"github.com/corazawaf/coraza/v3/internal/memoize"
1819
"github.com/corazawaf/coraza/v3/types"
1920
"github.com/corazawaf/coraza/v3/types/variables"
2021
)
@@ -456,7 +457,12 @@ func (r *Rule) AddVariable(v variables.RuleVariable, key string, iscount bool) e
456457
var re *regexp.Regexp
457458
if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' {
458459
key = key[1 : len(key)-1]
459-
re = regexp.MustCompile(key)
460+
461+
if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil {
462+
panic(err)
463+
} else {
464+
re = vare.(*regexp.Regexp)
465+
}
460466
}
461467

462468
if multiphaseEvaluation {
@@ -521,7 +527,11 @@ func (r *Rule) AddVariableNegation(v variables.RuleVariable, key string) error {
521527
var re *regexp.Regexp
522528
if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' {
523529
key = key[1 : len(key)-1]
524-
re = regexp.MustCompile(key)
530+
if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil {
531+
panic(err)
532+
} else {
533+
re = vare.(*regexp.Regexp)
534+
}
525535
}
526536
// Prevent sigsev
527537
if r == nil {

internal/memoize/cache.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//go:build !tinygo
5+
6+
// Highly inspired in https://github.com/patrickmn/go-cache/blob/master/cache.go
7+
8+
package memoize
9+
10+
import (
11+
"sync"
12+
)
13+
14+
type cache struct {
15+
mu sync.RWMutex
16+
entries map[string]interface{}
17+
}
18+
19+
func newCache() *cache {
20+
return &cache{
21+
entries: make(map[string]interface{}),
22+
}
23+
}
24+
25+
func (c *cache) set(key string, value interface{}) {
26+
c.mu.Lock()
27+
c.entries[key] = value
28+
c.mu.Unlock()
29+
}
30+
31+
func (c *cache) get(key string) (interface{}, bool) {
32+
c.mu.RLock()
33+
item, found := c.entries[key]
34+
if !found {
35+
c.mu.RUnlock()
36+
return nil, false
37+
}
38+
c.mu.RUnlock()
39+
return item, true
40+
}

internal/memoize/cache_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//go:build !tinygo
5+
6+
package memoize
7+
8+
import "testing"
9+
10+
func TestCache(t *testing.T) {
11+
tc := newCache()
12+
13+
_, found := tc.get("key1")
14+
if want, have := false, found; want != have {
15+
t.Fatalf("unexpected value, want %t, have %t", want, have)
16+
}
17+
18+
tc.set("key1", 1)
19+
20+
item, found := tc.get("key1")
21+
if want, have := true, found; want != have {
22+
t.Fatalf("unexpected value, want %t, have %t", want, have)
23+
}
24+
25+
if want, have := 1, item.(int); want != have {
26+
t.Fatalf("unexpected value, want %d, have %d", want, have)
27+
}
28+
}

internal/memoize/memoize.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//go:build !tinygo
5+
6+
// https://github.com/kofalt/go-memoize/blob/master/memoize.go
7+
8+
package memoize
9+
10+
import (
11+
"golang.org/x/sync/singleflight"
12+
)
13+
14+
var doer = makeDoer(newCache(), &singleflight.Group{})
15+
16+
// Do executes and returns the results of the given function, unless there was a cached
17+
// value of the same key. Only one execution is in-flight for a given key at a time.
18+
// The boolean return value indicates whether v was previously stored.
19+
func Do(key string, fn func() (interface{}, error)) (interface{}, error) {
20+
value, err, _ := doer(key, fn)
21+
return value, err
22+
}
23+
24+
// makeDoer returns a function that executes and returns the results of the given function
25+
func makeDoer(cache *cache, group *singleflight.Group) func(string, func() (interface{}, error)) (interface{}, error, bool) {
26+
return func(key string, fn func() (interface{}, error)) (interface{}, error, bool) {
27+
// Check cache
28+
value, found := cache.get(key)
29+
if found {
30+
return value, nil, true
31+
}
32+
33+
// Combine memoized function with a cache store
34+
value, err, _ := group.Do(key, func() (interface{}, error) {
35+
data, innerErr := fn()
36+
if innerErr == nil {
37+
cache.set(key, data)
38+
}
39+
40+
return data, innerErr
41+
})
42+
43+
return value, err, false
44+
}
45+
}

internal/memoize/memoize_test.go

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//go:build !tinygo
5+
6+
// https://github.com/kofalt/go-memoize/blob/master/memoize.go
7+
8+
package memoize
9+
10+
import (
11+
"errors"
12+
"testing"
13+
14+
"golang.org/x/sync/singleflight"
15+
)
16+
17+
func TestSuccessCall(t *testing.T) {
18+
do := makeDoer(newCache(), &singleflight.Group{})
19+
20+
expensiveCalls := 0
21+
22+
// Function tracks how many times its been called
23+
expensive := func() (interface{}, error) {
24+
expensiveCalls++
25+
return expensiveCalls, nil
26+
}
27+
28+
// First call SHOULD NOT be cached
29+
result, err, cached := do("key1", expensive)
30+
if err != nil {
31+
t.Fatalf("unexpected error: %s", err.Error())
32+
}
33+
34+
if want, have := 1, result.(int); want != have {
35+
t.Fatalf("unexpected value, want %d, have %d", want, have)
36+
}
37+
38+
if want, have := false, cached; want != have {
39+
t.Fatalf("unexpected caching, want %t, have %t", want, have)
40+
}
41+
42+
// Second call on same key SHOULD be cached
43+
result, err, cached = do("key1", expensive)
44+
if err != nil {
45+
t.Fatalf("unexpected error: %s", err.Error())
46+
}
47+
48+
if want, have := 1, result.(int); want != have {
49+
t.Fatalf("unexpected value, want %d, have %d", want, have)
50+
}
51+
52+
if want, have := true, cached; want != have {
53+
t.Fatalf("unexpected caching, want %t, have %t", want, have)
54+
}
55+
56+
// First call on a new key SHOULD NOT be cached
57+
result, err, cached = do("key2", expensive)
58+
if err != nil {
59+
t.Fatalf("unexpected error: %s", err.Error())
60+
}
61+
62+
if want, have := 2, result.(int); want != have {
63+
t.Fatalf("unexpected value, want %d, have %d", want, have)
64+
}
65+
66+
if want, have := false, cached; want != have {
67+
t.Fatalf("unexpected caching, want %t, have %t", want, have)
68+
}
69+
}
70+
71+
func TestFailedCall(t *testing.T) {
72+
do := makeDoer(newCache(), &singleflight.Group{})
73+
74+
calls := 0
75+
76+
// This function will fail IFF it has not been called before.
77+
twoForTheMoney := func() (interface{}, error) {
78+
calls++
79+
80+
if calls == 1 {
81+
return calls, errors.New("Try again")
82+
} else {
83+
return calls, nil
84+
}
85+
}
86+
87+
// First call should fail, and not be cached
88+
result, err, cached := do("key1", twoForTheMoney)
89+
if err == nil {
90+
t.Fatalf("expected error")
91+
}
92+
93+
if want, have := 1, result.(int); want != have {
94+
t.Fatalf("unexpected value, want %d, have %d", want, have)
95+
}
96+
97+
if want, have := false, cached; want != have {
98+
t.Fatalf("unexpected caching, want %t, have %t", want, have)
99+
}
100+
101+
// Second call should succeed, and not be cached
102+
result, err, cached = do("key1", twoForTheMoney)
103+
if err != nil {
104+
t.Fatalf("unexpected error: %s", err.Error())
105+
}
106+
107+
if want, have := 2, result.(int); want != have {
108+
t.Fatalf("unexpected value, want %d, have %d", want, have)
109+
}
110+
111+
if want, have := false, cached; want != have {
112+
t.Fatalf("unexpected caching, want %t, have %t", want, have)
113+
}
114+
115+
// Third call should succeed, and be cached
116+
result, err, cached = do("key1", twoForTheMoney)
117+
if err != nil {
118+
t.Fatalf("unexpected error: %s", err.Error())
119+
}
120+
121+
if want, have := 2, result.(int); want != have {
122+
t.Fatalf("unexpected value, want %d, have %d", want, have)
123+
}
124+
125+
if want, have := true, cached; want != have {
126+
t.Fatalf("unexpected caching, want %t, have %t", want, have)
127+
}
128+
}

internal/memoize/noop.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//go:build tinygo
5+
6+
package memoize
7+
8+
func Do(_ string, fn func() (interface{}, error)) (interface{}, error) {
9+
return fn()
10+
}

internal/operators/restpath.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"strings"
1212

1313
"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
14+
"github.com/corazawaf/coraza/v3/internal/memoize"
1415
)
1516

1617
var rePathTokenRe = regexp.MustCompile(`\{([^\}]+)\}`)
@@ -30,11 +31,12 @@ func newRESTPath(options plugintypes.OperatorOptions) (plugintypes.Operator, err
3031
for _, token := range rePathTokenRe.FindAllStringSubmatch(data, -1) {
3132
data = strings.Replace(data, token[0], fmt.Sprintf("(?P<%s>.*)", token[1]), 1)
3233
}
33-
re, err := regexp.Compile(data)
34+
35+
re, err := memoize.Do(data, func() (interface{}, error) { return regexp.Compile(data) })
3436
if err != nil {
3537
return nil, err
3638
}
37-
return &restpath{re: re}, nil
39+
return &restpath{re: re.(*regexp.Regexp)}, nil
3840
}
3941

4042
func (o *restpath) Evaluate(tx plugintypes.TransactionState, value string) bool {

internal/operators/rx.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"rsc.io/binaryregexp"
1515

1616
"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
17+
"github.com/corazawaf/coraza/v3/internal/memoize"
1718
)
1819

1920
type rx struct {
@@ -35,15 +36,14 @@ func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) {
3536
return newBinaryRX(options)
3637
}
3738

38-
re, err := regexp.Compile(data)
39+
re, err := memoize.Do(data, func() (interface{}, error) { return regexp.Compile(data) })
3940
if err != nil {
4041
return nil, err
4142
}
42-
return &rx{re: re}, nil
43+
return &rx{re: re.(*regexp.Regexp)}, nil
4344
}
4445

4546
func (o *rx) Evaluate(tx plugintypes.TransactionState, value string) bool {
46-
4747
if tx.Capturing() {
4848
match := o.re.FindStringSubmatch(value)
4949
if len(match) == 0 {
@@ -72,15 +72,14 @@ var _ plugintypes.Operator = (*binaryRX)(nil)
7272
func newBinaryRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) {
7373
data := options.Arguments
7474

75-
re, err := binaryregexp.Compile(data)
75+
re, err := memoize.Do(data, func() (interface{}, error) { return binaryregexp.Compile(data) })
7676
if err != nil {
7777
return nil, err
7878
}
79-
return &binaryRX{re: re}, nil
79+
return &binaryRX{re: re.(*binaryregexp.Regexp)}, nil
8080
}
8181

8282
func (o *binaryRX) Evaluate(tx plugintypes.TransactionState, value string) bool {
83-
8483
if tx.Capturing() {
8584
match := o.re.FindStringSubmatch(value)
8685
if len(match) == 0 {

0 commit comments

Comments
 (0)