From 9a9eeb63872d625a7653bbafdf3fb01fb717bb8d Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Fri, 13 Feb 2026 14:56:42 +0100 Subject: pkg/vcs: consider merge commits in base commit detection Merge commits are important in two contexts: 1) They may bring together blobs created in different commits. 2) They may create new blob hashes when file changes are merged. 3) They replace blob hashes we were looking for. A particularly unpleasant case for the previous approach is when a blob hash disappears after a merge and tips of the branches are no longer to trust as the files of interest have also been replaced by the merges. Add a test that sets up this complicated setup and change the logic to make the test pass: 1) Add a -m flag to consider merge commits. 2) Increase -n as there are lots of merge commits in the Linux kernel. 3) Consider all parents of merge commits. Fixes #6777. --- pkg/vcs/git.go | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'pkg/vcs/git.go') diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go index 4ca19ebd7..dd9981873 100644 --- a/pkg/vcs/git.go +++ b/pkg/vcs/git.go @@ -782,8 +782,8 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base "log", "--all", "--no-renames", - // Note that we cannot accelerate it by specifying "--since" - "-n", "100", + "-m", + "-n", "500", `--format=%H:%P`, } var fileNames []string @@ -834,21 +834,21 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base // TODO: we can further reduce the search space by adding "--raw" to args // and only considering the commits that introduce the blobs from the diff. commit, parents, _ := strings.Cut(s.Text(), ":") - // Focus on the first parent. - candidate, _, _ := strings.Cut(parents, " ") - if candidate == "" { - // For the first commit, there's no parent. - candidate = commit - } - // Only focus on branches that are still alive. - const cutOffDays = 60 - list, err := git.BranchesThatContain(candidate, time.Now().Add(-time.Hour*24*cutOffDays)) - if err != nil { - return nil, fmt.Errorf("failed to query branches: %w", err) - } - for _, info := range list { - record(candidate, info.Branch) - record(info.Commit, info.Branch) + candidates := []string{commit} + if parents != "" { + candidates = append(candidates, strings.Split(parents, " ")...) + } + for _, candidate := range candidates { + // Only focus on branches that are still alive. + const cutOffDays = 60 + list, err := git.BranchesThatContain(candidate, time.Now().Add(-time.Hour*24*cutOffDays)) + if err != nil { + return nil, fmt.Errorf("failed to query branches: %w", err) + } + for _, info := range list { + record(candidate, info.Branch) + record(info.Commit, info.Branch) + } } } var ret []*BaseCommit -- cgit mrf-deployment