aboutsummaryrefslogtreecommitdiffstats
path: root/pkg
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2026-01-12 17:10:21 +0100
committerAleksandr Nogikh <nogikh@google.com>2026-01-13 14:18:14 +0000
commit85894026b1b003844e8fda94056d80d88294b0be (patch)
tree405e1ed964a1d9ed2df0d813a366ffdde29b4780 /pkg
parentce94b10583a205d365175b0f18ed2c9d0765559c (diff)
pkg/vcs: return multiple base commit candidates
Return the commits that represent unique sets of branches. Sort the list topologically, breaking ties by commit date.
Diffstat (limited to 'pkg')
-rw-r--r--pkg/vcs/git.go67
-rw-r--r--pkg/vcs/git_test.go23
2 files changed, 62 insertions, 28 deletions
diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go
index 6bb826edb..8408715d0 100644
--- a/pkg/vcs/git.go
+++ b/pkg/vcs/git.go
@@ -232,8 +232,7 @@ func (git *gitRepo) initRepo(reason error) error {
}
func (git *gitRepo) Contains(commit string) (bool, error) {
- _, err := git.Run("merge-base", "--is-ancestor", commit, HEAD)
- return err == nil, nil
+ return git.containedIn(HEAD, commit)
}
const gitDateFormat = "Mon Jan 2 15:04:05 2006 -0700"
@@ -753,9 +752,11 @@ type BaseCommit struct {
Branches []string
}
-// BaseForDiff selects the most recent commit that could have been the base commit
+// BaseForDiff returns a list of commits that could have been the base commit
// for the specified git patch.
-func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCommit, error) {
+// The returned list is minimized to only contain the commits that are represented in different
+// subsets of branches.
+func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*BaseCommit, error) {
// We can't just query git log with --find-object=HASH because that will only return
// the revisions where the hashed content was introduced or removed, while what we actually
// want is the latest revision(s) where the content modified in the diff is still in place.
@@ -771,7 +772,7 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo
"--no-renames",
// Note that we cannot accelerate it by specifying "--since"
"-n", "100",
- `--format=%P`,
+ `--format=%H:%P`,
}
var fileNames []string
nameToHash := map[string]string{}
@@ -812,22 +813,25 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo
for s.Scan() {
// TODO: we can further reduce the search space by adding "--raw" to args
// and only considering the commits that introduce the blobs from the diff.
- firstParent, _, _ := strings.Cut(s.Text(), " ")
- if firstParent == "" {
- continue
+ commit, parents, _ := strings.Cut(s.Text(), ":")
+ // Focus on the first parent.
+ candidate, _, _ := strings.Cut(parents, " ")
+ if candidate == "" {
+ // For the first commit, there's no parent.
+ candidate = commit
}
// Only focus on branches that are still alive.
const cutOffDays = 60
- list, err := git.branchesThatContain(firstParent, time.Now().Add(-time.Hour*24*cutOffDays))
+ list, err := git.branchesThatContain(candidate, time.Now().Add(-time.Hour*24*cutOffDays))
if err != nil {
return nil, fmt.Errorf("failed to query branches: %w", err)
}
for _, info := range list {
- record(firstParent, info.Branch)
+ record(candidate, info.Branch)
record(info.Commit, info.Branch)
}
}
- var ret *BaseCommit
+ var ret []*BaseCommit
for commit, branches := range commitBranches {
tracer.Log("considering %q [%q]", commit, branches)
fileHashes, err := git.fileHashes(commit, fileNames)
@@ -853,13 +857,39 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo
if err != nil {
return nil, fmt.Errorf("failed to extract commit info: %w", err)
}
- tracer.Log("commit date is %v", info.CommitDate)
- // Select the most recent commit.
- if ret == nil || ret.CommitDate.Before(info.CommitDate) {
- ret = &BaseCommit{Commit: info, Branches: branchList}
+ tracer.Log("hashes match, commit date is %v, branches %v", info.CommitDate, branchList)
+ ret = append(ret, &BaseCommit{Commit: info, Branches: branchList})
+ }
+ return git.minimizeBaseCommits(ret)
+}
+
+func (git Git) minimizeBaseCommits(list []*BaseCommit) ([]*BaseCommit, error) {
+ // We want to preserve commits that are present in different subsets of branches.
+ // Then, we want to sort them topologically and break ties by date.
+ lastCommit := map[string]*BaseCommit{}
+ for _, item := range list {
+ key := strings.Join(item.Branches, ":")
+ prev, ok := lastCommit[key]
+ if !ok {
+ lastCommit[key] = item
+ continue
+ }
+ isNewer, err := git.containedIn(item.Hash, prev.Hash)
+ if err != nil {
+ return nil, fmt.Errorf("topological sort step failed: %w", err)
+ }
+ if isNewer {
+ lastCommit[key] = item
}
}
- return ret, nil
+ var filtered []*BaseCommit
+ for _, item := range lastCommit {
+ filtered = append(filtered, item)
+ }
+ sort.Slice(filtered, func(i, j int) bool {
+ return filtered[i].CommitDate.After(filtered[j].CommitDate)
+ })
+ return filtered, nil
}
// fileHashes returns the blob SHA hashes for a particular list of files on a particular commit.
@@ -924,6 +954,11 @@ func (git Git) verifyHash(hash string) (bool, error) {
return true, nil
}
+func (git Git) containedIn(parent, commit string) (bool, error) {
+ _, err := git.Run("merge-base", "--is-ancestor", commit, parent)
+ return err == nil, nil
+}
+
func (git Git) Diff(commitA, commitB string) ([]byte, error) {
return git.Run("diff", commitA+".."+commitB)
}
diff --git a/pkg/vcs/git_test.go b/pkg/vcs/git_test.go
index 542b572ac..5b5a3e14b 100644
--- a/pkg/vcs/git_test.go
+++ b/pkg/vcs/git_test.go
@@ -523,22 +523,21 @@ func TestBaseForDiff(t *testing.T) {
require.NoError(t, err)
// Create a different change on top of commit2.
repo.Git("checkout", "-b", "branch-a")
- time.Sleep(time.Second)
repo.commitChangeset("patch a.txt",
writeFile{"a.txt", "another change to a.txt"},
)
- // Yet the patch could only be applied to commit2
+ // Yet the patch could only be applied to commit1 or commit2.
base, err := repo.repo.BaseForDiff(diff, &debugtracer.TestTracer{T: t})
require.NoError(t, err)
- require.NotNil(t, base)
- assert.Equal(t, []string{"branch-a", "master"}, base.Branches)
- assert.Equal(t, commit2.Hash, base.Hash)
+ require.Len(t, base, 1)
+ assert.Equal(t, []string{"branch-a", "master"}, base[0].Branches)
+ assert.Equal(t, commit2.Hash, base[0].Hash)
})
t.Run("choose latest", func(t *testing.T) {
_, err := repo.repo.SwitchCommit(commit2.Hash)
require.NoError(t, err)
- // Wait a second and add one more commit.
- // (Otherwise the test might be flaky).
+ // Wait a second before adding another commit.
+ // Git does not remember milliseconds, so otherwise the commit sorting may be flaky.
time.Sleep(time.Second)
repo.Git("checkout", "-b", "branch-b")
commit4 := repo.commitChangeset("unrelated commit",
@@ -547,9 +546,10 @@ func TestBaseForDiff(t *testing.T) {
// Since the commit did not touch a.txt, it's the expected one.
base, err := repo.repo.BaseForDiff(diff, &debugtracer.TestTracer{T: t})
require.NoError(t, err)
- require.NotNil(t, base)
- assert.Equal(t, []string{"branch-b"}, base.Branches)
- assert.Equal(t, commit4.Hash, base.Hash)
+ require.Len(t, base, 2)
+ assert.Equal(t, []string{"branch-b"}, base[0].Branches)
+ assert.Equal(t, commit4.Hash, base[0].Hash)
+ assert.Equal(t, commit2.Hash, base[1].Hash)
})
t.Run("ignore unknown objects", func(t *testing.T) {
// It's okay if the diff contains unknown hashes.
@@ -564,7 +564,6 @@ index f70f10e..0000000
twoDiffs := append(append([]byte{}, diff...), diff2...)
base, err := repo.repo.BaseForDiff(twoDiffs, &debugtracer.TestTracer{T: t})
require.NoError(t, err)
- require.NotNil(t, base)
- assert.Equal(t, []string{"branch-b"}, base.Branches)
+ require.Len(t, base, 2)
})
}