Skip to content

Commit 73c5795

Browse files
committed
feat(gitparse): track ref sources
'Hidden' refs, such as 'refs/pull/1004/head' may cause confusion if reported upon. GitHub, for example, will display a banner saying that the commit doesn't belong to the repository. This parse the output of 'git log --source' and converts it to a human-readable format, IF the ref is 'hidden'.
1 parent f1950e9 commit 73c5795

File tree

12 files changed

+667
-506
lines changed

12 files changed

+667
-506
lines changed

hack/snifftest/main.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -197,15 +197,16 @@ func main() {
197197
SkipBinaries: true,
198198
SkipArchives: false,
199199
Concurrency: runtime.NumCPU(),
200-
SourceMetadataFunc: func(file, email, commit, timestamp, repository string, line int64) *source_metadatapb.MetaData {
200+
SourceMetadataFunc: func(repository, commit, commitSource, email, timestamp, file string, line int64) *source_metadatapb.MetaData {
201201
return &source_metadatapb.MetaData{
202202
Data: &source_metadatapb.MetaData_Git{
203203
Git: &source_metadatapb.Git{
204-
Commit: commit,
205-
File: file,
206-
Email: email,
207-
Repository: repository,
208-
Timestamp: timestamp,
204+
Repository: repository,
205+
Commit: commit,
206+
CommitSource: commitSource,
207+
Email: email,
208+
Timestamp: timestamp,
209+
File: file,
209210
},
210211
},
211212
}

pkg/gitparse/gitparse.go

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,9 @@ func (d *Diff) finalize() error { return d.contentWriter.CloseForWriting() }
108108

109109
// Commit contains commit header info and diffs.
110110
type Commit struct {
111-
Hash string
111+
Hash string
112+
// The source of a commit, if it doesn't exist in the repository's history.
113+
Source string
112114
Author string
113115
Committer string
114116
Date time.Time
@@ -232,19 +234,23 @@ func (c *Parser) RepoPath(
232234
) (chan *Diff, error) {
233235
args := []string{
234236
"-C", source,
237+
"--no-replace-objects",
235238
"log",
236239
"--patch", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---patch
237240
"--full-history",
238241
"--date=format:%a %b %d %H:%M:%S %Y %z",
239242
"--pretty=fuller", // https://git-scm.com/docs/git-log#_pretty_formats
240243
"--notes", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---notesltrefgt
244+
"--source", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---source
241245
}
242246
if abbreviatedLog {
247+
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---diff-filterACDMRTUXB82308203
243248
args = append(args, "--diff-filter=AM")
244249
}
245250
if head != "" {
246251
args = append(args, head)
247252
} else {
253+
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---all
248254
args = append(args, "--all")
249255
}
250256
args = append(args, additionalArgs...) // These need to come before --
@@ -334,10 +340,9 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
334340
outReader := bufio.NewReader(stdOut)
335341
var (
336342
currentCommit *Commit
337-
338-
totalLogSize int
343+
totalLogSize int
344+
latestState = Initial
339345
)
340-
var latestState = Initial
341346

342347
diff := func(c *Commit, opts ...diffOption) *Diff {
343348
opts = append(opts, withCustomContentWriter(bufferwriter.New()))
@@ -397,10 +402,18 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
397402
// Create a new currentDiff and currentCommit
398403
currentCommit = &Commit{Message: strings.Builder{}}
399404
currentDiff = diff(currentCommit)
400-
// Check that the commit line contains a hash and set it.
401-
if len(line) >= 47 {
402-
currentCommit.Hash = string(line[7:47])
405+
406+
hash, ref := parseCommitLine(line)
407+
if hash == nil || ref == nil {
408+
ctx.Logger().Error(
409+
fmt.Errorf(`expected line to match 'commit <hash> <ref>', got %q`, line),
410+
"Failed to parse CommitLine")
411+
latestState = ParseFailure
412+
continue
403413
}
414+
415+
currentCommit.Hash = string(hash)
416+
currentCommit.Source = parseSourceRef(ref)
404417
case isMergeLine(isStaged, latestState, line):
405418
latestState = MergeLine
406419
case isAuthorLine(isStaged, latestState, line):
@@ -616,6 +629,47 @@ func isCommitLine(isStaged bool, latestState ParseState, line []byte) bool {
616629
return false
617630
}
618631

632+
func parseCommitLine(line []byte) (hash []byte, ref []byte) {
633+
// Check that the commit line contains a 40-character hash and set it.
634+
// `commit e5575cd6f2d21d3a1a604287c7bf4a7eab2266e0\n`
635+
if len(line) >= 47 {
636+
hash = line[7:47]
637+
}
638+
639+
// Check if the commit line includes branch references.
640+
// `commit 2dbbb28727c7c2954438666dafba57bb8c714d3b refs/heads/fix/github-enterprise-gist\n`
641+
if len(line) > 48 {
642+
ref = line[48 : len(line)-1]
643+
}
644+
645+
return
646+
}
647+
648+
// ParseCommitSource s
649+
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---source
650+
func parseSourceRef(ref []byte) string {
651+
// We don't care about 'normal' refs.
652+
if bytes.HasPrefix(ref, []byte("refs/heads/")) || bytes.HasPrefix(ref, []byte("refs/tags/")) {
653+
return ""
654+
}
655+
656+
// Handle GitHub pull requests.
657+
// e.g., `refs/pull/238/head` or `refs/pull/1234/merge`
658+
if after, ok := bytes.CutPrefix(ref, []byte("refs/pull/")); ok {
659+
prNumber := after[:bytes.Index(after, []byte("/"))]
660+
return "Pull request #" + string(prNumber)
661+
}
662+
663+
// Handle GitLab merge requests
664+
// e.g., `refs/merge-requests/238/head` or `refs/merge-requests/1234/merge`
665+
if after, ok := bytes.CutPrefix(ref, []byte("refs/merge-requests/")); ok {
666+
mrNumber := after[:bytes.Index(after, []byte("/"))]
667+
return "Merge request #" + string(mrNumber)
668+
}
669+
670+
return fmt.Sprintf("%s (hidden ref)", string(ref))
671+
}
672+
619673
// Author: Bill Rich <[email protected]>
620674
func isAuthorLine(isStaged bool, latestState ParseState, line []byte) bool {
621675
if isStaged || !(latestState == CommitLine || latestState == MergeLine) {

pkg/gitparse/gitparse_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,45 @@ func TestLineChecksNoStaged(t *testing.T) {
704704
}
705705
}
706706

707+
func Test_parseCommitLine(t *testing.T) {
708+
cases := map[string][]string{
709+
"commit 198c63cb8212a99cc4352bc72f25e5444a786291 refs/heads/main\n": {"198c63cb8212a99cc4352bc72f25e5444a786291", "refs/heads/main"},
710+
"commit e76dfb98ab9001daa869191b6aebe8cf4cd3b22a refs/remotes/origin/debug/aws-logging\n": {"e76dfb98ab9001daa869191b6aebe8cf4cd3b22a", "refs/remotes/origin/debug/aws-logging"},
711+
}
712+
713+
for line, expected := range cases {
714+
hash, ref := parseCommitLine([]byte(line))
715+
if string(hash) != expected[0] {
716+
t.Errorf("Expected: %s, Got: %s", expected[0], hash)
717+
}
718+
if string(ref) != expected[1] {
719+
t.Errorf("Expected: %s, Got: %s", expected[1], ref)
720+
}
721+
}
722+
}
723+
724+
func Test_parseSourceRef(t *testing.T) {
725+
cases := map[string]string{
726+
"refs/heads/master": "",
727+
"refs/tags/v3.0.5": "",
728+
// refs/merge-requests/33/head
729+
"refs/heads/thog/mr/33/head": "Merge request #33",
730+
// refs/merge-requests/19/merge
731+
"refs/heads/thog/mr/19/merge": "Merge request #19",
732+
// refs/pull/980/head
733+
"refs/heads/thog/pr/980/head": "Pull request #980",
734+
// refs/pull/1644/merge
735+
"refs/heads/thog/pr/1644/merge": "Pull request #1644",
736+
}
737+
738+
for line, expected := range cases {
739+
source := parseSourceRef([]byte(line))
740+
if source != expected {
741+
t.Errorf("Expected: %s, Got: %s", expected, source)
742+
}
743+
}
744+
}
745+
707746
func TestBinaryPathParse(t *testing.T) {
708747
cases := map[string]string{
709748
"Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "",

0 commit comments

Comments
 (0)