Skip to content

Commit c9a7acd

Browse files
rgmzRichard Gomez
authored andcommitted
feat(gitparse): track commit refs
1 parent c179f00 commit c9a7acd

File tree

11 files changed

+505
-384
lines changed

11 files changed

+505
-384
lines changed

hack/snifftest/main.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,15 +197,16 @@ func main() {
197197
SkipBinaries: true,
198198
SkipArchives: false,
199199
Concurrency: runtime.NumCPU(),
200-
SourceMetadataFunc: func(file, email, commit, timestamp, repository string, line int64) *source_metadatapb.MetaData {
200+
SourceMetadataFunc: func(repository, commit, ref, email, timestamp, file string, line int64) *source_metadatapb.MetaData {
201201
return &source_metadatapb.MetaData{
202202
Data: &source_metadatapb.MetaData_Git{
203203
Git: &source_metadatapb.Git{
204+
Repository: repository,
204205
Commit: commit,
205-
File: file,
206+
CommitRef: ref,
206207
Email: email,
207-
Repository: repository,
208208
Timestamp: timestamp,
209+
File: file,
209210
},
210211
},
211212
}

pkg/gitparse/gitparse.go

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,12 @@ func (d *Diff) finalize() error {
106106

107107
// Commit contains commit header info and diffs.
108108
type Commit struct {
109-
Hash string
110-
Author string
111-
Date time.Time
112-
Message strings.Builder
113-
Size int // in bytes
109+
SourceRef string
110+
Hash string
111+
Author string
112+
Date time.Time
113+
Message strings.Builder
114+
Size int // in bytes
114115

115116
hasDiffs bool
116117
}
@@ -209,13 +210,22 @@ func NewParser(options ...Option) *Parser {
209210
// RepoPath parses the output of the `git log` command for the `source` path.
210211
// The Diff chan will return diffs in the order they are parsed from the log.
211212
func (c *Parser) RepoPath(ctx context.Context, source string, head string, abbreviatedLog bool, excludedGlobs []string, isBare bool) (chan *Diff, error) {
212-
args := []string{"-C", source, "log", "-p", "--full-history", "--date=format:%a %b %d %H:%M:%S %Y %z"}
213+
args := []string{
214+
"-C", source,
215+
"log",
216+
"-p", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---patch
217+
"--full-history", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---full-history
218+
"--date=format:%a %b %d %H:%M:%S %Y %z", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---dateltformatgt
219+
"--source", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---source
220+
}
213221
if abbreviatedLog {
222+
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---diff-filterACDMRTUXB82308203
214223
args = append(args, "--diff-filter=AM")
215224
}
216225
if head != "" {
217226
args = append(args, head)
218227
} else {
228+
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---all
219229
args = append(args, "--all")
220230
}
221231
for _, glob := range excludedGlobs {
@@ -302,10 +312,9 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
302312
outReader := bufio.NewReader(stdOut)
303313
var (
304314
currentCommit *Commit
305-
306-
totalLogSize int
315+
totalLogSize int
316+
latestState = Initial
307317
)
308-
var latestState = Initial
309318

310319
diff := func(c *Commit, opts ...diffOption) *Diff {
311320
opts = append(opts, withCustomContentWriter(bufferwriter.New(ctx)))
@@ -365,10 +374,18 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
365374
// Create a new currentDiff and currentCommit
366375
currentCommit = &Commit{Message: strings.Builder{}}
367376
currentDiff = diff(currentCommit)
368-
// Check that the commit line contains a hash and set it.
369-
if len(line) >= 47 {
370-
currentCommit.Hash = string(line[7:47])
377+
378+
hash, ref := parseCommitLine(line)
379+
if hash == nil || ref == nil {
380+
ctx.Logger().Error(
381+
fmt.Errorf(`expected line to match 'commit <hash> <ref>', got "%s"`, line),
382+
"Failed to parse CommitLine")
383+
latestState = ParseFailure
384+
continue
371385
}
386+
387+
currentCommit.Hash = string(hash)
388+
currentCommit.SourceRef = string(ref)
372389
case isMergeLine(isStaged, latestState, line):
373390
latestState = MergeLine
374391
case isAuthorLine(isStaged, latestState, line):
@@ -566,6 +583,22 @@ func isCommitLine(isStaged bool, latestState ParseState, line []byte) bool {
566583
return false
567584
}
568585

586+
func parseCommitLine(line []byte) (hash []byte, ref []byte) {
587+
// Check that the commit line contains a 40-character hash and set it.
588+
// `commit e5575cd6f2d21d3a1a604287c7bf4a7eab2266e0\n`
589+
if len(line) >= 47 {
590+
hash = line[7:47]
591+
}
592+
593+
// Check if the commit line includes branch references.
594+
// `commit 2dbbb28727c7c2954438666dafba57bb8c714d3b refs/heads/fix/github-enterprise-gist\n`
595+
if len(line) > 48 {
596+
ref = line[48 : len(line)-1]
597+
}
598+
599+
return
600+
}
601+
569602
// Author: Bill Rich <[email protected]>
570603
func isAuthorLine(isStaged bool, latestState ParseState, line []byte) bool {
571604
if isStaged || !(latestState == CommitLine || latestState == MergeLine) {

pkg/gitparse/gitparse_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,23 @@ func TestLineChecksNoStaged(t *testing.T) {
588588
}
589589
}
590590

591+
func Test_parseCommitLine(t *testing.T) {
592+
cases := map[string][]string{
593+
"commit 198c63cb8212a99cc4352bc72f25e5444a786291 refs/heads/main\n": {"198c63cb8212a99cc4352bc72f25e5444a786291", "refs/heads/main"},
594+
"commit e76dfb98ab9001daa869191b6aebe8cf4cd3b22a refs/remotes/origin/debug/aws-logging\n": {"e76dfb98ab9001daa869191b6aebe8cf4cd3b22a", "refs/remotes/origin/debug/aws-logging"},
595+
}
596+
597+
for line, expected := range cases {
598+
hash, ref := parseCommitLine([]byte(line))
599+
if string(hash) != expected[0] {
600+
t.Errorf("Expected: %s, Got: %s", expected[0], hash)
601+
}
602+
if string(ref) != expected[1] {
603+
t.Errorf("Expected: %s, Got: %s", expected[1], ref)
604+
}
605+
}
606+
}
607+
591608
func TestBinaryPathParse(t *testing.T) {
592609
cases := map[string]string{
593610
"Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "",

pkg/output/plain.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,33 @@ func (p *PlainPrinter) Print(_ context.Context, r *detectors.ResultWithMetadata)
8989

9090
for _, data := range meta {
9191
for k, v := range data {
92+
// Only print Git commit refs when they're associated with a pull/merge request.
93+
// Otherwise, this information is not useful.
94+
if strings.EqualFold(k, "Commit_ref") {
95+
ref, ok := v.(string)
96+
if !ok {
97+
continue
98+
}
99+
100+
var prNum string
101+
if strings.HasPrefix(ref, "refs/heads/trufflehog/pull/") {
102+
prNum = ref[27:]
103+
} else if strings.HasPrefix(ref, "refs/heads/trufflehog/merge-requests/") {
104+
prNum = ref[37:]
105+
}
106+
107+
if prNum == "" {
108+
continue
109+
}
110+
111+
k = "Pull Request"
112+
if strings.HasSuffix(prNum, "/head") {
113+
v = prNum[:len(prNum)-5]
114+
} else if strings.HasSuffix(prNum, "/merge") {
115+
v = prNum[:len(prNum)-6]
116+
}
117+
}
118+
92119
aggregateDataKeys = append(aggregateDataKeys, k)
93120
aggregateData[k] = v
94121
}

0 commit comments

Comments
 (0)