Skip to content

Commit bef8f77

Browse files
committed
feat(gitparse): track ref sources
'Hidden' refs, such as 'refs/pull/1004/head' may cause confusion if reported upon. GitHub, for example, will display a banner saying that the commit doesn't belong to the repository. This parse the output of 'git log --source' and converts it to a human-readable format, IF the ref is 'hidden'.
1 parent 58289a6 commit bef8f77

File tree

12 files changed

+628
-488
lines changed

12 files changed

+628
-488
lines changed

hack/snifftest/main.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -197,15 +197,16 @@ func main() {
197197
SkipBinaries: true,
198198
SkipArchives: false,
199199
Concurrency: runtime.NumCPU(),
200-
SourceMetadataFunc: func(file, email, commit, timestamp, repository string, line int64) *source_metadatapb.MetaData {
200+
SourceMetadataFunc: func(repository, commit, commitSource, email, timestamp, file string, line int64) *source_metadatapb.MetaData {
201201
return &source_metadatapb.MetaData{
202202
Data: &source_metadatapb.MetaData_Git{
203203
Git: &source_metadatapb.Git{
204-
Commit: commit,
205-
File: file,
206-
Email: email,
207-
Repository: repository,
208-
Timestamp: timestamp,
204+
Repository: repository,
205+
Commit: commit,
206+
CommitSource: commitSource,
207+
Email: email,
208+
Timestamp: timestamp,
209+
File: file,
209210
},
210211
},
211212
}

pkg/gitparse/gitparse.go

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,9 @@ func (d *Diff) finalize() error { return d.contentWriter.CloseForWriting() }
108108

109109
// Commit contains commit header info and diffs.
110110
type Commit struct {
111-
Hash string
111+
Hash string
112+
// The source of a commit, if it doesn't exist in the repository's history.
113+
Source string
112114
Author string
113115
Committer string
114116
Date time.Time
@@ -231,19 +233,23 @@ func (c *Parser) RepoPath(
231233
) (chan *Diff, error) {
232234
args := []string{
233235
"-C", source,
236+
"--no-replace-objects",
234237
"log",
235238
"--patch", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---patch
236239
"--full-history",
237240
"--date=format:%a %b %d %H:%M:%S %Y %z",
238241
"--pretty=fuller", // https://git-scm.com/docs/git-log#_pretty_formats
239242
"--notes", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---notesltrefgt
243+
"--source", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---source
240244
}
241245
if abbreviatedLog {
246+
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---diff-filterACDMRTUXB82308203
242247
args = append(args, "--diff-filter=AM")
243248
}
244249
if head != "" {
245250
args = append(args, head)
246251
} else {
252+
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---all
247253
args = append(args, "--all")
248254
}
249255
for _, glob := range excludedGlobs {
@@ -332,10 +338,9 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
332338
outReader := bufio.NewReader(stdOut)
333339
var (
334340
currentCommit *Commit
335-
336-
totalLogSize int
341+
totalLogSize int
342+
latestState = Initial
337343
)
338-
var latestState = Initial
339344

340345
diff := func(c *Commit, opts ...diffOption) *Diff {
341346
opts = append(opts, withCustomContentWriter(bufferwriter.New()))
@@ -395,10 +400,18 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
395400
// Create a new currentDiff and currentCommit
396401
currentCommit = &Commit{Message: strings.Builder{}}
397402
currentDiff = diff(currentCommit)
398-
// Check that the commit line contains a hash and set it.
399-
if len(line) >= 47 {
400-
currentCommit.Hash = string(line[7:47])
403+
404+
hash, ref := parseCommitLine(line)
405+
if hash == nil || ref == nil {
406+
ctx.Logger().Error(
407+
fmt.Errorf(`expected line to match 'commit <hash> <ref>', got "%s"`, line),
408+
"Failed to parse CommitLine")
409+
latestState = ParseFailure
410+
continue
401411
}
412+
413+
currentCommit.Hash = string(hash)
414+
currentCommit.Source = parseSourceRef(ref)
402415
case isMergeLine(isStaged, latestState, line):
403416
latestState = MergeLine
404417
case isAuthorLine(isStaged, latestState, line):
@@ -614,6 +627,47 @@ func isCommitLine(isStaged bool, latestState ParseState, line []byte) bool {
614627
return false
615628
}
616629

630+
func parseCommitLine(line []byte) (hash []byte, ref []byte) {
631+
// Check that the commit line contains a 40-character hash and set it.
632+
// `commit e5575cd6f2d21d3a1a604287c7bf4a7eab2266e0\n`
633+
if len(line) >= 47 {
634+
hash = line[7:47]
635+
}
636+
637+
// Check if the commit line includes branch references.
638+
// `commit 2dbbb28727c7c2954438666dafba57bb8c714d3b refs/heads/fix/github-enterprise-gist\n`
639+
if len(line) > 48 {
640+
ref = line[48 : len(line)-1]
641+
}
642+
643+
return
644+
}
645+
646+
// ParseCommitSource s
647+
// https://git-scm.com/docs/git-log#Documentation/git-log.txt---source
648+
func parseSourceRef(ref []byte) string {
649+
// We don't care about 'normal' refs.
650+
if bytes.HasPrefix(ref, []byte("refs/heads/")) || bytes.HasPrefix(ref, []byte("refs/tags/")) {
651+
return ""
652+
}
653+
654+
// Handle GitHub pull requests.
655+
// e.g., `refs/pull/238/head` or `refs/pull/1234/merge`
656+
if after, ok := bytes.CutPrefix(ref, []byte("refs/pull/")); ok {
657+
prNumber := after[:bytes.Index(after, []byte("/"))]
658+
return "Pull request #" + string(prNumber)
659+
}
660+
661+
// Handle GitLab merge requests
662+
// e.g., `refs/merge-requests/238/head` or `refs/merge-requests/1234/merge`
663+
if after, ok := bytes.CutPrefix(ref, []byte("refs/merge-requests/")); ok {
664+
mrNumber := after[:bytes.Index(after, []byte("/"))]
665+
return "Merge request #" + string(mrNumber)
666+
}
667+
668+
return fmt.Sprintf("%s (hidden ref)", string(ref))
669+
}
670+
617671
// Author: Bill Rich <[email protected]>
618672
func isAuthorLine(isStaged bool, latestState ParseState, line []byte) bool {
619673
if isStaged || !(latestState == CommitLine || latestState == MergeLine) {

pkg/gitparse/gitparse_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,45 @@ func TestLineChecksNoStaged(t *testing.T) {
704704
}
705705
}
706706

707+
func Test_parseCommitLine(t *testing.T) {
708+
cases := map[string][]string{
709+
"commit 198c63cb8212a99cc4352bc72f25e5444a786291 refs/heads/main\n": {"198c63cb8212a99cc4352bc72f25e5444a786291", "refs/heads/main"},
710+
"commit e76dfb98ab9001daa869191b6aebe8cf4cd3b22a refs/remotes/origin/debug/aws-logging\n": {"e76dfb98ab9001daa869191b6aebe8cf4cd3b22a", "refs/remotes/origin/debug/aws-logging"},
711+
}
712+
713+
for line, expected := range cases {
714+
hash, ref := parseCommitLine([]byte(line))
715+
if string(hash) != expected[0] {
716+
t.Errorf("Expected: %s, Got: %s", expected[0], hash)
717+
}
718+
if string(ref) != expected[1] {
719+
t.Errorf("Expected: %s, Got: %s", expected[1], ref)
720+
}
721+
}
722+
}
723+
724+
func Test_parseSourceRef(t *testing.T) {
725+
cases := map[string]string{
726+
"refs/heads/master": "",
727+
"refs/tags/v3.0.5": "",
728+
// refs/merge-requests/33/head
729+
"refs/heads/thog/mr/33/head": "Merge request #33",
730+
// refs/merge-requests/19/merge
731+
"refs/heads/thog/mr/19/merge": "Merge request #19",
732+
// refs/pull/980/head
733+
"refs/heads/thog/pr/980/head": "Pull request #980",
734+
// refs/pull/1644/merge
735+
"refs/heads/thog/pr/1644/merge": "Pull request #1644",
736+
}
737+
738+
for line, expected := range cases {
739+
source := parseSourceRef([]byte(line))
740+
if source != expected {
741+
t.Errorf("Expected: %s, Got: %s", expected, source)
742+
}
743+
}
744+
}
745+
707746
func TestBinaryPathParse(t *testing.T) {
708747
cases := map[string]string{
709748
"Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "",

0 commit comments

Comments
 (0)