diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2026-01-12 17:10:21 +0100 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2026-01-13 14:18:14 +0000 |
| commit | 85894026b1b003844e8fda94056d80d88294b0be (patch) | |
| tree | 405e1ed964a1d9ed2df0d813a366ffdde29b4780 /pkg | |
| parent | ce94b10583a205d365175b0f18ed2c9d0765559c (diff) | |
pkg/vcs: return multiple base commit candidates
Return the commits that represent unique sets of branches.
Sort the list topologically, breaking ties by commit date.
Diffstat (limited to 'pkg')
| -rw-r--r-- | pkg/vcs/git.go | 67 | ||||
| -rw-r--r-- | pkg/vcs/git_test.go | 23 |
2 files changed, 62 insertions, 28 deletions
diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go index 6bb826edb..8408715d0 100644 --- a/pkg/vcs/git.go +++ b/pkg/vcs/git.go @@ -232,8 +232,7 @@ func (git *gitRepo) initRepo(reason error) error { } func (git *gitRepo) Contains(commit string) (bool, error) { - _, err := git.Run("merge-base", "--is-ancestor", commit, HEAD) - return err == nil, nil + return git.containedIn(HEAD, commit) } const gitDateFormat = "Mon Jan 2 15:04:05 2006 -0700" @@ -753,9 +752,11 @@ type BaseCommit struct { Branches []string } -// BaseForDiff selects the most recent commit that could have been the base commit +// BaseForDiff returns a list of commits that could have been the base commit // for the specified git patch. -func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCommit, error) { +// The returned list is minimized to only contain the commits that are represented in different +// subsets of branches. +func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*BaseCommit, error) { // We can't just query git log with --find-object=HASH because that will only return // the revisions where the hashed content was introduced or removed, while what we actually // want is the latest revision(s) where the content modified in the diff is still in place. @@ -771,7 +772,7 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo "--no-renames", // Note that we cannot accelerate it by specifying "--since" "-n", "100", - `--format=%P`, + `--format=%H:%P`, } var fileNames []string nameToHash := map[string]string{} @@ -812,22 +813,25 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo for s.Scan() { // TODO: we can further reduce the search space by adding "--raw" to args // and only considering the commits that introduce the blobs from the diff. - firstParent, _, _ := strings.Cut(s.Text(), " ") - if firstParent == "" { - continue + commit, parents, _ := strings.Cut(s.Text(), ":") + // Focus on the first parent. + candidate, _, _ := strings.Cut(parents, " ") + if candidate == "" { + // For the first commit, there's no parent. + candidate = commit } // Only focus on branches that are still alive. const cutOffDays = 60 - list, err := git.branchesThatContain(firstParent, time.Now().Add(-time.Hour*24*cutOffDays)) + list, err := git.branchesThatContain(candidate, time.Now().Add(-time.Hour*24*cutOffDays)) if err != nil { return nil, fmt.Errorf("failed to query branches: %w", err) } for _, info := range list { - record(firstParent, info.Branch) + record(candidate, info.Branch) record(info.Commit, info.Branch) } } - var ret *BaseCommit + var ret []*BaseCommit for commit, branches := range commitBranches { tracer.Log("considering %q [%q]", commit, branches) fileHashes, err := git.fileHashes(commit, fileNames) @@ -853,13 +857,39 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo if err != nil { return nil, fmt.Errorf("failed to extract commit info: %w", err) } - tracer.Log("commit date is %v", info.CommitDate) - // Select the most recent commit. - if ret == nil || ret.CommitDate.Before(info.CommitDate) { - ret = &BaseCommit{Commit: info, Branches: branchList} + tracer.Log("hashes match, commit date is %v, branches %v", info.CommitDate, branchList) + ret = append(ret, &BaseCommit{Commit: info, Branches: branchList}) + } + return git.minimizeBaseCommits(ret) +} + +func (git Git) minimizeBaseCommits(list []*BaseCommit) ([]*BaseCommit, error) { + // We want to preserve commits that are present in different subsets of branches. + // Then, we want to sort them topologically and break ties by date. + lastCommit := map[string]*BaseCommit{} + for _, item := range list { + key := strings.Join(item.Branches, ":") + prev, ok := lastCommit[key] + if !ok { + lastCommit[key] = item + continue + } + isNewer, err := git.containedIn(item.Hash, prev.Hash) + if err != nil { + return nil, fmt.Errorf("topological sort step failed: %w", err) + } + if isNewer { + lastCommit[key] = item } } - return ret, nil + var filtered []*BaseCommit + for _, item := range lastCommit { + filtered = append(filtered, item) + } + sort.Slice(filtered, func(i, j int) bool { + return filtered[i].CommitDate.After(filtered[j].CommitDate) + }) + return filtered, nil } // fileHashes returns the blob SHA hashes for a particular list of files on a particular commit. @@ -924,6 +954,11 @@ func (git Git) verifyHash(hash string) (bool, error) { return true, nil } +func (git Git) containedIn(parent, commit string) (bool, error) { + _, err := git.Run("merge-base", "--is-ancestor", commit, parent) + return err == nil, nil +} + func (git Git) Diff(commitA, commitB string) ([]byte, error) { return git.Run("diff", commitA+".."+commitB) } diff --git a/pkg/vcs/git_test.go b/pkg/vcs/git_test.go index 542b572ac..5b5a3e14b 100644 --- a/pkg/vcs/git_test.go +++ b/pkg/vcs/git_test.go @@ -523,22 +523,21 @@ func TestBaseForDiff(t *testing.T) { require.NoError(t, err) // Create a different change on top of commit2. repo.Git("checkout", "-b", "branch-a") - time.Sleep(time.Second) repo.commitChangeset("patch a.txt", writeFile{"a.txt", "another change to a.txt"}, ) - // Yet the patch could only be applied to commit2 + // Yet the patch could only be applied to commit1 or commit2. base, err := repo.repo.BaseForDiff(diff, &debugtracer.TestTracer{T: t}) require.NoError(t, err) - require.NotNil(t, base) - assert.Equal(t, []string{"branch-a", "master"}, base.Branches) - assert.Equal(t, commit2.Hash, base.Hash) + require.Len(t, base, 1) + assert.Equal(t, []string{"branch-a", "master"}, base[0].Branches) + assert.Equal(t, commit2.Hash, base[0].Hash) }) t.Run("choose latest", func(t *testing.T) { _, err := repo.repo.SwitchCommit(commit2.Hash) require.NoError(t, err) - // Wait a second and add one more commit. - // (Otherwise the test might be flaky). + // Wait a second before adding another commit. + // Git does not remember milliseconds, so otherwise the commit sorting may be flaky. time.Sleep(time.Second) repo.Git("checkout", "-b", "branch-b") commit4 := repo.commitChangeset("unrelated commit", @@ -547,9 +546,10 @@ func TestBaseForDiff(t *testing.T) { // Since the commit did not touch a.txt, it's the expected one. base, err := repo.repo.BaseForDiff(diff, &debugtracer.TestTracer{T: t}) require.NoError(t, err) - require.NotNil(t, base) - assert.Equal(t, []string{"branch-b"}, base.Branches) - assert.Equal(t, commit4.Hash, base.Hash) + require.Len(t, base, 2) + assert.Equal(t, []string{"branch-b"}, base[0].Branches) + assert.Equal(t, commit4.Hash, base[0].Hash) + assert.Equal(t, commit2.Hash, base[1].Hash) }) t.Run("ignore unknown objects", func(t *testing.T) { // It's okay if the diff contains unknown hashes. @@ -564,7 +564,6 @@ index f70f10e..0000000 twoDiffs := append(append([]byte{}, diff...), diff2...) base, err := repo.repo.BaseForDiff(twoDiffs, &debugtracer.TestTracer{T: t}) require.NoError(t, err) - require.NotNil(t, base) - assert.Equal(t, []string{"branch-b"}, base.Branches) + require.Len(t, base, 2) }) } |
