From 20d37d280693b1646e3bd33ab3d8a0a947830a8e Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Thu, 15 Jan 2026 16:27:44 +0100 Subject: pkg/vcs: be more strict in BaseForDiff Do not tolerate unknown blob hashes - it means that we are unable to find the correct base commit given the repository. Explicitly ignore newly added files - we definitely won't find their hashes. Explicitly skip malformed patches that won't have any blob hashes - otherwise we could end up with too many candidates and waste too much time. --- pkg/vcs/git.go | 16 ++++++++++++---- pkg/vcs/git_test.go | 31 +++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go index 8408715d0..8a51b3cfb 100644 --- a/pkg/vcs/git.go +++ b/pkg/vcs/git.go @@ -776,17 +776,22 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base } var fileNames []string nameToHash := map[string]string{} + ignoreFiles := map[string]struct{}{} for _, file := range ParseGitDiff(diff) { if strings.Trim(file.LeftHash, "0") == "" { - // Newly created file are not of any help here. + // Newly created files are not of any help here. + ignoreFiles[file.Name] = struct{}{} + continue + } + if _, ignore := ignoreFiles[file.Name]; ignore { continue } if ok, err := git.verifyHash(file.LeftHash); err != nil { return nil, fmt.Errorf("hash verification failed: %w", err) } else if !ok { - // The object is not known in this repository. - // Ignore it, or otherwise the command will fail. - continue + // The object is not known in this repository, so we won't find the exact base commit. + tracer.Log("unknown object %s, stopping base commit search", file.LeftHash) + return nil, nil } if _, ok := nameToHash[file.Name]; !ok { // If the diff is actually a concatenation of several diffs, we only @@ -797,6 +802,9 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base args = append(args, "--find-object="+file.LeftHash) } tracer.Log("extracted %d left blob hashes", len(nameToHash)) + if len(nameToHash) == 0 { + return nil, nil + } output, err := git.Run(args...) if err != nil { return nil, err diff --git a/pkg/vcs/git_test.go b/pkg/vcs/git_test.go index 5b5a3e14b..855587263 100644 --- a/pkg/vcs/git_test.go +++ b/pkg/vcs/git_test.go @@ -551,8 +551,8 @@ func TestBaseForDiff(t *testing.T) { assert.Equal(t, commit4.Hash, base[0].Hash) assert.Equal(t, commit2.Hash, base[1].Hash) }) - t.Run("ignore unknown objects", func(t *testing.T) { - // It's okay if the diff contains unknown hashes. + t.Run("unknown objects", func(t *testing.T) { + // It's not okay if the diff contains unknown hashes. diff2 := ` diff --git a/b.txt b/b.txt deleted file mode 100644 @@ -561,9 +561,36 @@ index f70f10e..0000000 +++ /dev/null @@ -1 +0,0 @@ -A` + twoDiffs := append(append([]byte{}, diff...), diff2...) + base, err := repo.repo.BaseForDiff(twoDiffs, &debugtracer.TestTracer{T: t}) + require.NoError(t, err) + require.Nil(t, base) + }) + t.Run("ignore new files", func(t *testing.T) { + diff2 := ` +diff --git a/a.txt b/a.txt +new file mode 100644 +index 0000000..fa49b07 +--- /dev/null ++++ b/a.txt +@@ -0,0 +1 @@ ++new file +diff --git a/a.txt b/a.txt +index fa49b07..01c887f 100644 +--- a/a.txt ++++ b/a.txt +@@ -1 +1 @@ +-new file ++edit file +` twoDiffs := append(append([]byte{}, diff...), diff2...) base, err := repo.repo.BaseForDiff(twoDiffs, &debugtracer.TestTracer{T: t}) require.NoError(t, err) require.Len(t, base, 2) }) + t.Run("empty patch", func(t *testing.T) { + base, err := repo.repo.BaseForDiff([]byte{}, &debugtracer.TestTracer{T: t}) + require.NoError(t, err) + require.Nil(t, base) + }) } -- cgit mrf-deployment