diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2026-01-12 17:10:21 +0100 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2026-01-13 14:18:14 +0000 |
| commit | 85894026b1b003844e8fda94056d80d88294b0be (patch) | |
| tree | 405e1ed964a1d9ed2df0d813a366ffdde29b4780 /pkg/vcs/git.go | |
| parent | ce94b10583a205d365175b0f18ed2c9d0765559c (diff) | |
pkg/vcs: return multiple base commit candidates
Return the commits that represent unique sets of branches.
Sort the list topologically, breaking ties by commit date.
Diffstat (limited to 'pkg/vcs/git.go')
| -rw-r--r-- | pkg/vcs/git.go | 67 |
1 files changed, 51 insertions, 16 deletions
diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go index 6bb826edb..8408715d0 100644 --- a/pkg/vcs/git.go +++ b/pkg/vcs/git.go @@ -232,8 +232,7 @@ func (git *gitRepo) initRepo(reason error) error { } func (git *gitRepo) Contains(commit string) (bool, error) { - _, err := git.Run("merge-base", "--is-ancestor", commit, HEAD) - return err == nil, nil + return git.containedIn(HEAD, commit) } const gitDateFormat = "Mon Jan 2 15:04:05 2006 -0700" @@ -753,9 +752,11 @@ type BaseCommit struct { Branches []string } -// BaseForDiff selects the most recent commit that could have been the base commit +// BaseForDiff returns a list of commits that could have been the base commit // for the specified git patch. -func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCommit, error) { +// The returned list is minimized to only contain the commits that are represented in different +// subsets of branches. +func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*BaseCommit, error) { // We can't just query git log with --find-object=HASH because that will only return // the revisions where the hashed content was introduced or removed, while what we actually // want is the latest revision(s) where the content modified in the diff is still in place. @@ -771,7 +772,7 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo "--no-renames", // Note that we cannot accelerate it by specifying "--since" "-n", "100", - `--format=%P`, + `--format=%H:%P`, } var fileNames []string nameToHash := map[string]string{} @@ -812,22 +813,25 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo for s.Scan() { // TODO: we can further reduce the search space by adding "--raw" to args // and only considering the commits that introduce the blobs from the diff. - firstParent, _, _ := strings.Cut(s.Text(), " ") - if firstParent == "" { - continue + commit, parents, _ := strings.Cut(s.Text(), ":") + // Focus on the first parent. + candidate, _, _ := strings.Cut(parents, " ") + if candidate == "" { + // For the first commit, there's no parent. + candidate = commit } // Only focus on branches that are still alive. const cutOffDays = 60 - list, err := git.branchesThatContain(firstParent, time.Now().Add(-time.Hour*24*cutOffDays)) + list, err := git.branchesThatContain(candidate, time.Now().Add(-time.Hour*24*cutOffDays)) if err != nil { return nil, fmt.Errorf("failed to query branches: %w", err) } for _, info := range list { - record(firstParent, info.Branch) + record(candidate, info.Branch) record(info.Commit, info.Branch) } } - var ret *BaseCommit + var ret []*BaseCommit for commit, branches := range commitBranches { tracer.Log("considering %q [%q]", commit, branches) fileHashes, err := git.fileHashes(commit, fileNames) @@ -853,13 +857,39 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) (*BaseCo if err != nil { return nil, fmt.Errorf("failed to extract commit info: %w", err) } - tracer.Log("commit date is %v", info.CommitDate) - // Select the most recent commit. - if ret == nil || ret.CommitDate.Before(info.CommitDate) { - ret = &BaseCommit{Commit: info, Branches: branchList} + tracer.Log("hashes match, commit date is %v, branches %v", info.CommitDate, branchList) + ret = append(ret, &BaseCommit{Commit: info, Branches: branchList}) + } + return git.minimizeBaseCommits(ret) +} + +func (git Git) minimizeBaseCommits(list []*BaseCommit) ([]*BaseCommit, error) { + // We want to preserve commits that are present in different subsets of branches. + // Then, we want to sort them topologically and break ties by date. + lastCommit := map[string]*BaseCommit{} + for _, item := range list { + key := strings.Join(item.Branches, ":") + prev, ok := lastCommit[key] + if !ok { + lastCommit[key] = item + continue + } + isNewer, err := git.containedIn(item.Hash, prev.Hash) + if err != nil { + return nil, fmt.Errorf("topological sort step failed: %w", err) + } + if isNewer { + lastCommit[key] = item } } - return ret, nil + var filtered []*BaseCommit + for _, item := range lastCommit { + filtered = append(filtered, item) + } + sort.Slice(filtered, func(i, j int) bool { + return filtered[i].CommitDate.After(filtered[j].CommitDate) + }) + return filtered, nil } // fileHashes returns the blob SHA hashes for a particular list of files on a particular commit. @@ -924,6 +954,11 @@ func (git Git) verifyHash(hash string) (bool, error) { return true, nil } +func (git Git) containedIn(parent, commit string) (bool, error) { + _, err := git.Run("merge-base", "--is-ancestor", commit, parent) + return err == nil, nil +} + func (git Git) Diff(commitA, commitB string) ([]byte, error) { return git.Run("diff", commitA+".."+commitB) } |
