diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2026-02-13 14:56:42 +0100 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2026-02-16 13:38:59 +0000 |
| commit | 9a9eeb63872d625a7653bbafdf3fb01fb717bb8d (patch) | |
| tree | 8962766a46552633b699ed109e1d4c4bc27a5f18 /pkg/vcs/git.go | |
| parent | 1141f03b552018b61c969c59b55606a923421c9e (diff) | |
pkg/vcs: consider merge commits in base commit detection
Merge commits are important in two contexts:
1) They may bring together blobs created in different commits.
2) They may create new blob hashes when file changes are merged.
3) They replace blob hashes we were looking for.
A particularly unpleasant case for the previous approach is when a blob
hash disappears after a merge and tips of the branches are no longer to
trust as the files of interest have also been replaced by the merges.
Add a test that sets up this complicated setup and change the logic to
make the test pass:
1) Add a -m flag to consider merge commits.
2) Increase -n as there are lots of merge commits in the Linux kernel.
3) Consider all parents of merge commits.
Fixes #6777.
Diffstat (limited to 'pkg/vcs/git.go')
| -rw-r--r-- | pkg/vcs/git.go | 34 |
1 files changed, 17 insertions, 17 deletions
diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go index 4ca19ebd7..dd9981873 100644 --- a/pkg/vcs/git.go +++ b/pkg/vcs/git.go @@ -782,8 +782,8 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base "log", "--all", "--no-renames", - // Note that we cannot accelerate it by specifying "--since" - "-n", "100", + "-m", + "-n", "500", `--format=%H:%P`, } var fileNames []string @@ -834,21 +834,21 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base // TODO: we can further reduce the search space by adding "--raw" to args // and only considering the commits that introduce the blobs from the diff. commit, parents, _ := strings.Cut(s.Text(), ":") - // Focus on the first parent. - candidate, _, _ := strings.Cut(parents, " ") - if candidate == "" { - // For the first commit, there's no parent. - candidate = commit - } - // Only focus on branches that are still alive. - const cutOffDays = 60 - list, err := git.BranchesThatContain(candidate, time.Now().Add(-time.Hour*24*cutOffDays)) - if err != nil { - return nil, fmt.Errorf("failed to query branches: %w", err) - } - for _, info := range list { - record(candidate, info.Branch) - record(info.Commit, info.Branch) + candidates := []string{commit} + if parents != "" { + candidates = append(candidates, strings.Split(parents, " ")...) + } + for _, candidate := range candidates { + // Only focus on branches that are still alive. + const cutOffDays = 60 + list, err := git.BranchesThatContain(candidate, time.Now().Add(-time.Hour*24*cutOffDays)) + if err != nil { + return nil, fmt.Errorf("failed to query branches: %w", err) + } + for _, info := range list { + record(candidate, info.Branch) + record(info.Commit, info.Branch) + } } } var ret []*BaseCommit |
