Skip to content

Commit 9324aa6

Browse files
committed
feat(gitparse): track ref sources
'Hidden' refs, such as 'refs/pull/1004/head' may cause confusion if reported upon. GitHub, for example, will display a banner saying that the commit doesn't belong to the repository. This parse the output of 'git log --source' and converts it to a human-readable format, IF the ref is 'hidden'.
1 parent 1744fdc commit 9324aa6

File tree

12 files changed

+667
-506
lines changed

12 files changed

+667
-506
lines changed

hack/snifftest/main.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -197,15 +197,16 @@ func main() {
197197
SkipBinaries: true,
198198
SkipArchives: false,
199199
Concurrency: runtime.NumCPU(),
200-
SourceMetadataFunc: func(file, email, commit, timestamp, repository string, line int64) *source_metadatapb.MetaData {
200+
SourceMetadataFunc: func(repository, commit, commitSource, email, timestamp, file string, line int64) *source_metadatapb.MetaData {
201201
return &source_metadatapb.MetaData{
202202
Data: &source_metadatapb.MetaData_Git{
203203
Git: &source_metadatapb.Git{
204-
Commit: commit,
205-
File: file,
206-
Email: email,
207-
Repository: repository,
208-
Timestamp: timestamp,
204+
Repository: repository,
205+
Commit: commit,
206+
CommitSource: commitSource,
207+
Email: email,
208+
Timestamp: timestamp,
209+
File: file,
209210
},
210211
},
211212
}

pkg/gitparse/gitparse.go

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,9 @@ func (d *Diff) finalize() error { return d.contentWriter.CloseForWriting() }
108108

109109
// Commit contains commit header info and diffs.
110110
type Commit struct {
111-
Hash string
111+
Hash string
112+
// The source of a commit, if it doesn't exist in the repository's history.
113+
Source string
112114
Author string
113115
Committer string
114116
Date time.Time
@@ -232,19 +234,23 @@ func (c *Parser) RepoPath(
232234
) (chan *Diff, error) {
233235
args := []string{
234236
"-C", source,
237+
"--no-replace-objects",
235238
"log",
236239
"--patch", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---patch
237240
"--full-history",
238241
"--date=format:%a %b %d %H:%M:%S %Y %z",
239242
"--pretty=fuller", // https://git-scm.com/docs/git-log#_pretty_formats
240243
"--notes", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---notesltrefgt
244+
"--source", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---source
241245
}
242246
if abbreviatedLog {
247+
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---diff-filterACDMRTUXB82308203
243248
args = append(args, "--diff-filter=AM")
244249
}
245250
if head != "" {
246251
args = append(args, head)
247252
} else {
253+
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---all
248254
args = append(args, "--all")
249255
}
250256
args = append(args, additionalArgs...) // These need to come before --
@@ -334,10 +340,9 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
334340
outReader := bufio.NewReader(stdOut)
335341
var (
336342
currentCommit *Commit
337-
338-
totalLogSize int
343+
totalLogSize int
344+
latestState = Initial
339345
)
340-
var latestState = Initial
341346

342347
diff := func(c *Commit, opts ...diffOption) *Diff {
343348
opts = append(opts, withCustomContentWriter(bufferwriter.New()))
@@ -397,10 +402,18 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
397402
// Create a new currentDiff and currentCommit
398403
currentCommit = &Commit{Message: strings.Builder{}}
399404
currentDiff = diff(currentCommit)
400-
// Check that the commit line contains a hash and set it.
401-
if len(line) >= 47 {
402-
currentCommit.Hash = string(line[7:47])
405+
406+
hash, ref := parseCommitLine(line)
407+
if hash == nil || ref == nil {
408+
ctx.Logger().Error(
409+
fmt.Errorf(`expected line to match 'commit <hash> <ref>', got %q`, line),
410+
"Failed to parse CommitLine")
411+
latestState = ParseFailure
412+
continue
403413
}
414+
415+
currentCommit.Hash = string(hash)
416+
currentCommit.Source = parseSourceRef(ref)
404417
case isMergeLine(isStaged, latestState, line):
405418
latestState = MergeLine
406419
case isAuthorLine(isStaged, latestState, line):
@@ -616,6 +629,47 @@ func isCommitLine(isStaged bool, latestState ParseState, line []byte) bool {
616629
return false
617630
}
618631

632+
func parseCommitLine(line []byte) (hash []byte, ref []byte) {
633+
// Check that the commit line contains a 40-character hash and set it.
634+
// `commit e5575cd6f2d21d3a1a604287c7bf4a7eab2266e0\n`
635+
if len(line) >= 47 {
636+
hash = line[7:47]
637+
}
638+
639+
// Check if the commit line includes branch references.
640+
// `commit 2dbbb28727c7c2954438666dafba57bb8c714d3b refs/heads/fix/github-enterprise-gist\n`
641+
if len(line) > 48 {
642+
ref = line[48 : len(line)-1]
643+
}
644+
645+
return
646+
}
647+
648+
// ParseCommitSource s
649+
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---source
650+
func parseSourceRef(ref []byte) string {
651+
// We don't care about 'normal' refs.
652+
if bytes.HasPrefix(ref, []byte("refs/heads/")) || bytes.HasPrefix(ref, []byte("refs/tags/")) {
653+
return ""
654+
}
655+
656+
// Handle GitHub pull requests.
657+
// e.g., `refs/pull/238/head` or `refs/pull/1234/merge`
658+
if after, ok := bytes.CutPrefix(ref, []byte("refs/pull/")); ok {
659+
prNumber := after[:bytes.Index(after, []byte("/"))]
660+
return "Pull request #" + string(prNumber)
661+
}
662+
663+
// Handle GitLab merge requests
664+
// e.g., `refs/merge-requests/238/head` or `refs/merge-requests/1234/merge`
665+
if after, ok := bytes.CutPrefix(ref, []byte("refs/merge-requests/")); ok {
666+
mrNumber := after[:bytes.Index(after, []byte("/"))]
667+
return "Merge request #" + string(mrNumber)
668+
}
669+
670+
return fmt.Sprintf("%s (hidden ref)", string(ref))
671+
}
672+
619673
// Author: Bill Rich <[email protected]>
620674
func isAuthorLine(isStaged bool, latestState ParseState, line []byte) bool {
621675
if isStaged || !(latestState == CommitLine || latestState == MergeLine) {

pkg/gitparse/gitparse_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,45 @@ func TestLineChecksNoStaged(t *testing.T) {
708708
}
709709
}
710710

711+
func Test_parseCommitLine(t *testing.T) {
712+
cases := map[string][]string{
713+
"commit 198c63cb8212a99cc4352bc72f25e5444a786291 refs/heads/main\n": {"198c63cb8212a99cc4352bc72f25e5444a786291", "refs/heads/main"},
714+
"commit e76dfb98ab9001daa869191b6aebe8cf4cd3b22a refs/remotes/origin/debug/aws-logging\n": {"e76dfb98ab9001daa869191b6aebe8cf4cd3b22a", "refs/remotes/origin/debug/aws-logging"},
715+
}
716+
717+
for line, expected := range cases {
718+
hash, ref := parseCommitLine([]byte(line))
719+
if string(hash) != expected[0] {
720+
t.Errorf("Expected: %s, Got: %s", expected[0], hash)
721+
}
722+
if string(ref) != expected[1] {
723+
t.Errorf("Expected: %s, Got: %s", expected[1], ref)
724+
}
725+
}
726+
}
727+
728+
func Test_parseSourceRef(t *testing.T) {
729+
cases := map[string]string{
730+
"refs/heads/master": "",
731+
"refs/tags/v3.0.5": "",
732+
// refs/merge-requests/33/head
733+
"refs/heads/thog/mr/33/head": "Merge request #33",
734+
// refs/merge-requests/19/merge
735+
"refs/heads/thog/mr/19/merge": "Merge request #19",
736+
// refs/pull/980/head
737+
"refs/heads/thog/pr/980/head": "Pull request #980",
738+
// refs/pull/1644/merge
739+
"refs/heads/thog/pr/1644/merge": "Pull request #1644",
740+
}
741+
742+
for line, expected := range cases {
743+
source := parseSourceRef([]byte(line))
744+
if source != expected {
745+
t.Errorf("Expected: %s, Got: %s", expected, source)
746+
}
747+
}
748+
}
749+
711750
func TestBinaryPathParse(t *testing.T) {
712751
cases := map[string]string{
713752
"Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "",

0 commit comments

Comments
 (0)