From 2cf092b8cfccda3ad7fc30eeaf025f97bd331bb8 Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Fri, 27 Feb 2026 12:55:04 +0000 Subject: pkg/vcs: fix repetitive change handling in BaseForDiff The current implementation runs into problems if we modify the same file several times in the patch series since the sequential blob hashes will not be found in the reproducer. Fix it by moving the check for already processed files up. --- pkg/vcs/git.go | 12 ++++++------ pkg/vcs/git_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 6 deletions(-) (limited to 'pkg') diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go index f1311e312..efcb64470 100644 --- a/pkg/vcs/git.go +++ b/pkg/vcs/git.go @@ -805,6 +805,10 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base if _, ignore := ignoreFiles[file.Name]; ignore { continue } + if _, ok := nameToHash[file.Name]; ok { + // We only care about the first occurrence of a file in the diff series. + continue + } if ok, err := git.verifyHash(file.LeftHash); err != nil { return nil, fmt.Errorf("hash verification failed: %w", err) } else if !ok { @@ -812,12 +816,8 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base tracer.Logf("unknown object %s, stopping base commit search", file.LeftHash) return nil, nil } - if _, ok := nameToHash[file.Name]; !ok { - // If the diff is actually a concatenation of several diffs, we only - // want to remember the first left side hash for each file. - fileNames = append(fileNames, file.Name) - nameToHash[file.Name] = file.LeftHash - } + fileNames = append(fileNames, file.Name) + nameToHash[file.Name] = file.LeftHash args = append(args, "--find-object="+file.LeftHash) } tracer.Logf("extracted %d left blob hashes", len(nameToHash)) diff --git a/pkg/vcs/git_test.go b/pkg/vcs/git_test.go index f2fd9e059..d867ed692 100644 --- a/pkg/vcs/git_test.go +++ b/pkg/vcs/git_test.go @@ -599,6 +599,31 @@ index fa49b07..01c887f 100644 require.NoError(t, err) require.Nil(t, base) }) + t.Run("multiple modifications", func(t *testing.T) { + map1, _ := repo.repo.fileHashes(commit3.Hash, []string{"a.txt"}) + + twoDiffs := []byte(fmt.Sprintf(`diff --git a/a.txt b/a.txt +index %s..1111111 100644 +--- a/a.txt ++++ b/a.txt +@@ -1 +1 @@ +-update a.txt ++update a.txt again +diff --git a/a.txt b/a.txt +index 1111111..2222222 100644 +--- a/a.txt ++++ b/a.txt +@@ -1 +1 @@ +-update a.txt again ++update a.txt again and again +`, map1["a.txt"])) + + base, err := repo.repo.BaseForDiff(twoDiffs, &debugtracer.TestTracer{T: t}) + require.NoError(t, err) + require.Len(t, base, 1) + + assert.Equal(t, commit3.Hash, base[0].Hash) + }) } func TestBaseForDiffMerge(t *testing.T) { -- cgit mrf-deployment