Skip to content

Commit d526837

Browse files
rgmzRichard Gomez
authored andcommitted
feat(gitparse): track commit refs
1 parent 3fb6641 commit d526837

File tree

11 files changed

+493
-378
lines changed

11 files changed

+493
-378
lines changed

hack/snifftest/main.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,15 +197,16 @@ func main() {
197197
SkipBinaries: true,
198198
SkipArchives: false,
199199
Concurrency: runtime.NumCPU(),
200-
SourceMetadataFunc: func(file, email, commit, timestamp, repository string, line int64) *source_metadatapb.MetaData {
200+
SourceMetadataFunc: func(repository, commit, ref, email, timestamp, file string, line int64) *source_metadatapb.MetaData {
201201
return &source_metadatapb.MetaData{
202202
Data: &source_metadatapb.MetaData_Git{
203203
Git: &source_metadatapb.Git{
204+
Repository: repository,
204205
Commit: commit,
205-
File: file,
206+
CommitRef: ref,
206207
Email: email,
207-
Repository: repository,
208208
Timestamp: timestamp,
209+
File: file,
209210
},
210211
},
211212
}

pkg/gitparse/gitparse.go

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ func (d *Diff) finalize() error { return d.contentWriter.CloseForWriting() }
109109
// Commit contains commit header info and diffs.
110110
type Commit struct {
111111
Hash string
112+
SourceRef string
112113
Author string
113114
Committer string
114115
Date time.Time
@@ -230,13 +231,16 @@ func (c *Parser) RepoPath(ctx context.Context, source string, head string, abbre
230231
"--date=format:%a %b %d %H:%M:%S %Y %z",
231232
"--pretty=fuller", // https://git-scm.com/docs/git-log#_pretty_formats
232233
"--notes", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---notesltrefgt
234+
"--source", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---source
233235
}
234236
if abbreviatedLog {
237+
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---diff-filterACDMRTUXB82308203
235238
args = append(args, "--diff-filter=AM")
236239
}
237240
if head != "" {
238241
args = append(args, head)
239242
} else {
243+
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---all
240244
args = append(args, "--all")
241245
}
242246
for _, glob := range excludedGlobs {
@@ -323,10 +327,9 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
323327
outReader := bufio.NewReader(stdOut)
324328
var (
325329
currentCommit *Commit
326-
327-
totalLogSize int
330+
totalLogSize int
331+
latestState = Initial
328332
)
329-
var latestState = Initial
330333

331334
diff := func(c *Commit, opts ...diffOption) *Diff {
332335
opts = append(opts, withCustomContentWriter(bufferwriter.New()))
@@ -386,10 +389,18 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
386389
// Create a new currentDiff and currentCommit
387390
currentCommit = &Commit{Message: strings.Builder{}}
388391
currentDiff = diff(currentCommit)
389-
// Check that the commit line contains a hash and set it.
390-
if len(line) >= 47 {
391-
currentCommit.Hash = string(line[7:47])
392+
393+
hash, ref := parseCommitLine(line)
394+
if hash == nil || ref == nil {
395+
ctx.Logger().Error(
396+
fmt.Errorf(`expected line to match 'commit <hash> <ref>', got "%s"`, line),
397+
"Failed to parse CommitLine")
398+
latestState = ParseFailure
399+
continue
392400
}
401+
402+
currentCommit.Hash = string(hash)
403+
currentCommit.SourceRef = string(ref)
393404
case isMergeLine(isStaged, latestState, line):
394405
latestState = MergeLine
395406
case isAuthorLine(isStaged, latestState, line):
@@ -605,6 +616,22 @@ func isCommitLine(isStaged bool, latestState ParseState, line []byte) bool {
605616
return false
606617
}
607618

619+
func parseCommitLine(line []byte) (hash []byte, ref []byte) {
620+
// Check that the commit line contains a 40-character hash and set it.
621+
// `commit e5575cd6f2d21d3a1a604287c7bf4a7eab2266e0\n`
622+
if len(line) >= 47 {
623+
hash = line[7:47]
624+
}
625+
626+
// Check if the commit line includes branch references.
627+
// `commit 2dbbb28727c7c2954438666dafba57bb8c714d3b refs/heads/fix/github-enterprise-gist\n`
628+
if len(line) > 48 {
629+
ref = line[48 : len(line)-1]
630+
}
631+
632+
return
633+
}
634+
608635
// Author: Bill Rich <[email protected]>
609636
func isAuthorLine(isStaged bool, latestState ParseState, line []byte) bool {
610637
if isStaged || !(latestState == CommitLine || latestState == MergeLine) {

pkg/gitparse/gitparse_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,23 @@ func TestLineChecksNoStaged(t *testing.T) {
703703
}
704704
}
705705

706+
func Test_parseCommitLine(t *testing.T) {
707+
cases := map[string][]string{
708+
"commit 198c63cb8212a99cc4352bc72f25e5444a786291 refs/heads/main\n": {"198c63cb8212a99cc4352bc72f25e5444a786291", "refs/heads/main"},
709+
"commit e76dfb98ab9001daa869191b6aebe8cf4cd3b22a refs/remotes/origin/debug/aws-logging\n": {"e76dfb98ab9001daa869191b6aebe8cf4cd3b22a", "refs/remotes/origin/debug/aws-logging"},
710+
}
711+
712+
for line, expected := range cases {
713+
hash, ref := parseCommitLine([]byte(line))
714+
if string(hash) != expected[0] {
715+
t.Errorf("Expected: %s, Got: %s", expected[0], hash)
716+
}
717+
if string(ref) != expected[1] {
718+
t.Errorf("Expected: %s, Got: %s", expected[1], ref)
719+
}
720+
}
721+
}
722+
706723
func TestBinaryPathParse(t *testing.T) {
707724
cases := map[string]string{
708725
"Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "",

pkg/output/plain.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,33 @@ func (p *PlainPrinter) Print(_ context.Context, r *detectors.ResultWithMetadata)
8787

8888
for _, data := range meta {
8989
for k, v := range data {
90+
// Only print Git commit refs when they're associated with a pull/merge request.
91+
// Otherwise, this information is not useful.
92+
if strings.EqualFold(k, "Commit_ref") {
93+
ref, ok := v.(string)
94+
if !ok {
95+
continue
96+
}
97+
98+
var prNum string
99+
if strings.HasPrefix(ref, "refs/heads/trufflehog/pull/") {
100+
prNum = ref[27:]
101+
} else if strings.HasPrefix(ref, "refs/heads/trufflehog/merge-requests/") {
102+
prNum = ref[37:]
103+
}
104+
105+
if prNum == "" {
106+
continue
107+
}
108+
109+
k = "Pull Request"
110+
if strings.HasSuffix(prNum, "/head") {
111+
v = prNum[:len(prNum)-5]
112+
} else if strings.HasSuffix(prNum, "/merge") {
113+
v = prNum[:len(prNum)-6]
114+
}
115+
}
116+
90117
aggregateDataKeys = append(aggregateDataKeys, k)
91118
aggregateData[k] = v
92119
}

0 commit comments

Comments
 (0)