Skip to content

Commit 42afb3a

Browse files
committed
feat: match prefix or suffix
1 parent 32c3e96 commit 42afb3a

4 files changed

Lines changed: 39 additions & 12 deletions

File tree

pkg/detectors/detectors.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"crypto/rand"
66
"errors"
7+
"fmt"
78
"math/big"
89
"net/url"
910
"strings"
@@ -233,6 +234,27 @@ func PrefixRegex(keywords []string) string {
233234
return pre + middle + post
234235
}
235236

237+
func PrefixOrSuffixRegex(keywords []string, pattern string) string {
238+
prefixPat := `(?i:` + strings.Join(keywords, "|") + `)(?:.{0,100}?|.*?(?:[\r\n]{1,2}.*?){1,15})` + pattern
239+
suffixPat := pattern + `(?:.{0,100}?|.*?(?:[\r\n]{1,2}.*?){0,15})` + `(?i:` + strings.Join(keywords, "|") + `)`
240+
241+
return fmt.Sprintf(`(?:%s|%s)`, prefixPat, suffixPat)
242+
}
243+
244+
// FirstNonEmptyMatch returns the index and value of the first non-empty match.
245+
func FirstNonEmptyMatch(matches []string) string {
246+
if len(matches) <= 1 {
247+
return ""
248+
}
249+
// The first index is the entire matched string.
250+
for _, val := range matches[1:] {
251+
if val != "" {
252+
return val
253+
}
254+
}
255+
return ""
256+
}
257+
236258
// KeyIsRandom is a Low cost check to make sure that 'keys' include a number to reduce FPs.
237259
// Golang doesn't support regex lookaheads, so must be done in separate calls.
238260
// TODO improve checks. Shannon entropy did not work well.

pkg/detectors/github/v1/github_old.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ func (Scanner) Version() int { return 1 }
3636
func (Scanner) CloudEndpoint() string { return "https://api.github.com" }
3737

3838
var (
39-
keyPat = regexp.MustCompile(`(?:(?i:github|token)|(?-i:GH|gh|HUB|[Hh]ub|PAT|[Pp]at|OCTO|[Oo]cto))[^\.].{0,40}[ =:'"]+([a-f0-9]{40})\b`)
39+
keyPat = regexp.MustCompile(detectors.PrefixOrSuffixRegex([]string{`(?:(?i:github|token)|(?-i:GH|gh|HUB|[Hh]ub|PAT|[Pp]at|OCTO|[Oo]cto))`}, `\b([a-f0-9]{40})\b`))
4040
)
4141

4242
// Keywords are used for efficiently pre-filtering chunks.
@@ -51,7 +51,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
5151

5252
uniqueMatches := make(map[string]struct{})
5353
for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) {
54-
m := match[1]
54+
m := detectors.FirstNonEmptyMatch(match)
5555
// Ignore low-entropy matches.
5656
if detectors.StringShannonEntropy(m) < 3 {
5757
continue

pkg/detectors/snykkey/snykkey.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ type Scanner struct {
2222
// Ensure the Scanner satisfies the interface at compile time.
2323
var _ detectors.Detector = (*Scanner)(nil)
2424

25-
var keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"snyk"}) + `\b([0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12})\b`)
25+
var keyPat = regexp.MustCompile(detectors.PrefixOrSuffixRegex([]string{"snyk"}, `\b([0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12})\b`))
2626

2727
// Keywords are used for efficiently pre-filtering chunks.
2828
// Use identifiers in the secret preferably, or the provider name.
@@ -36,7 +36,11 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
3636

3737
tokens := make(map[string]struct{})
3838
for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) {
39-
tokens[match[1]] = struct{}{}
39+
m := detectors.FirstNonEmptyMatch(match)
40+
if detectors.StringShannonEntropy(m) < 3 {
41+
continue
42+
}
43+
tokens[m] = struct{}{}
4044
}
4145

4246
for token := range tokens {

pkg/detectors/snykkey/snykkey_test.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,15 @@ set PATH=%PATH%;C:\Program Files\nodejs\;C:\Program Files\Git\cmd`,
2525
want: []string{"885953dc-2469-443c-983d-5243d2d54116"},
2626
},
2727
// https://docs.snyk.io/snyk-api/get-a-projects-sbom-document-endpoint#how-to-generate-the-sbom-for-a-project
28-
// {
29-
// name: "curl example",
30-
// `curl --get \
31-
// -H "Authorization: token ccc9ae71-913f-46bd-9d23-03356323400a" \
32-
// --data-urlencode "version=2023-03-20" \
33-
// --data-urlencode "format=cyclonedx1.4%2Bjson" \
34-
// https://api.snyk.io/rest/orgs/1234/projects/1234/sbom`,
35-
// },
28+
{
29+
name: "suffix example",
30+
input: `curl --get \
31+
-H "Authorization: token ccc9ae71-913f-46bd-9d23-03356323400a" \
32+
--data-urlencode "version=2023-03-20" \
33+
--data-urlencode "format=cyclonedx1.4%2Bjson" \
34+
https://api.snyk.io/rest/orgs/1234/projects/1234/sbom`,
35+
want: []string{"ccc9ae71-913f-46bd-9d23-03356323400a"},
36+
},
3637
}
3738

3839
for _, test := range tests {

0 commit comments

Comments
 (0)