aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/vcs/git.go
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2026-02-13 14:56:42 +0100
committerAleksandr Nogikh <nogikh@google.com>2026-02-16 13:38:59 +0000
commit9a9eeb63872d625a7653bbafdf3fb01fb717bb8d (patch)
tree8962766a46552633b699ed109e1d4c4bc27a5f18 /pkg/vcs/git.go
parent1141f03b552018b61c969c59b55606a923421c9e (diff)
pkg/vcs: consider merge commits in base commit detection
Merge commits are important in two contexts: 1) They may bring together blobs created in different commits. 2) They may create new blob hashes when file changes are merged. 3) They replace blob hashes we were looking for. A particularly unpleasant case for the previous approach is when a blob hash disappears after a merge and tips of the branches are no longer to trust as the files of interest have also been replaced by the merges. Add a test that sets up this complicated setup and change the logic to make the test pass: 1) Add a -m flag to consider merge commits. 2) Increase -n as there are lots of merge commits in the Linux kernel. 3) Consider all parents of merge commits. Fixes #6777.
Diffstat (limited to 'pkg/vcs/git.go')
-rw-r--r--pkg/vcs/git.go34
1 files changed, 17 insertions, 17 deletions
diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go
index 4ca19ebd7..dd9981873 100644
--- a/pkg/vcs/git.go
+++ b/pkg/vcs/git.go
@@ -782,8 +782,8 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base
"log",
"--all",
"--no-renames",
- // Note that we cannot accelerate it by specifying "--since"
- "-n", "100",
+ "-m",
+ "-n", "500",
`--format=%H:%P`,
}
var fileNames []string
@@ -834,21 +834,21 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base
// TODO: we can further reduce the search space by adding "--raw" to args
// and only considering the commits that introduce the blobs from the diff.
commit, parents, _ := strings.Cut(s.Text(), ":")
- // Focus on the first parent.
- candidate, _, _ := strings.Cut(parents, " ")
- if candidate == "" {
- // For the first commit, there's no parent.
- candidate = commit
- }
- // Only focus on branches that are still alive.
- const cutOffDays = 60
- list, err := git.BranchesThatContain(candidate, time.Now().Add(-time.Hour*24*cutOffDays))
- if err != nil {
- return nil, fmt.Errorf("failed to query branches: %w", err)
- }
- for _, info := range list {
- record(candidate, info.Branch)
- record(info.Commit, info.Branch)
+ candidates := []string{commit}
+ if parents != "" {
+ candidates = append(candidates, strings.Split(parents, " ")...)
+ }
+ for _, candidate := range candidates {
+ // Only focus on branches that are still alive.
+ const cutOffDays = 60
+ list, err := git.BranchesThatContain(candidate, time.Now().Add(-time.Hour*24*cutOffDays))
+ if err != nil {
+ return nil, fmt.Errorf("failed to query branches: %w", err)
+ }
+ for _, info := range list {
+ record(candidate, info.Branch)
+ record(info.Commit, info.Branch)
+ }
}
}
var ret []*BaseCommit