diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2026-01-15 16:27:44 +0100 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2026-01-16 10:03:21 +0000 |
| commit | 20d37d280693b1646e3bd33ab3d8a0a947830a8e (patch) | |
| tree | 6f93d858125f8c627f26ca54d0e9809d2c2f1cd8 | |
| parent | 0c2e1647a23d32431208ff218f6ee59cd9c216b5 (diff) | |
pkg/vcs: be more strict in BaseForDiff
Do not tolerate unknown blob hashes - it means that we are unable to
find the correct base commit given the repository.
Explicitly ignore newly added files - we definitely won't find their
hashes.
Explicitly skip malformed patches that won't have any blob hashes -
otherwise we could end up with too many candidates and waste too much
time.
| -rw-r--r-- | pkg/vcs/git.go | 16 | ||||
| -rw-r--r-- | pkg/vcs/git_test.go | 31 |
2 files changed, 41 insertions, 6 deletions
diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go index 8408715d0..8a51b3cfb 100644 --- a/pkg/vcs/git.go +++ b/pkg/vcs/git.go @@ -776,17 +776,22 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base } var fileNames []string nameToHash := map[string]string{} + ignoreFiles := map[string]struct{}{} for _, file := range ParseGitDiff(diff) { if strings.Trim(file.LeftHash, "0") == "" { - // Newly created file are not of any help here. + // Newly created files are not of any help here. + ignoreFiles[file.Name] = struct{}{} + continue + } + if _, ignore := ignoreFiles[file.Name]; ignore { continue } if ok, err := git.verifyHash(file.LeftHash); err != nil { return nil, fmt.Errorf("hash verification failed: %w", err) } else if !ok { - // The object is not known in this repository. - // Ignore it, or otherwise the command will fail. - continue + // The object is not known in this repository, so we won't find the exact base commit. + tracer.Log("unknown object %s, stopping base commit search", file.LeftHash) + return nil, nil } if _, ok := nameToHash[file.Name]; !ok { // If the diff is actually a concatenation of several diffs, we only @@ -797,6 +802,9 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base args = append(args, "--find-object="+file.LeftHash) } tracer.Log("extracted %d left blob hashes", len(nameToHash)) + if len(nameToHash) == 0 { + return nil, nil + } output, err := git.Run(args...) if err != nil { return nil, err diff --git a/pkg/vcs/git_test.go b/pkg/vcs/git_test.go index 5b5a3e14b..855587263 100644 --- a/pkg/vcs/git_test.go +++ b/pkg/vcs/git_test.go @@ -551,8 +551,8 @@ func TestBaseForDiff(t *testing.T) { assert.Equal(t, commit4.Hash, base[0].Hash) assert.Equal(t, commit2.Hash, base[1].Hash) }) - t.Run("ignore unknown objects", func(t *testing.T) { - // It's okay if the diff contains unknown hashes. + t.Run("unknown objects", func(t *testing.T) { + // It's not okay if the diff contains unknown hashes. diff2 := ` diff --git a/b.txt b/b.txt deleted file mode 100644 @@ -564,6 +564,33 @@ index f70f10e..0000000 twoDiffs := append(append([]byte{}, diff...), diff2...) base, err := repo.repo.BaseForDiff(twoDiffs, &debugtracer.TestTracer{T: t}) require.NoError(t, err) + require.Nil(t, base) + }) + t.Run("ignore new files", func(t *testing.T) { + diff2 := ` +diff --git a/a.txt b/a.txt +new file mode 100644 +index 0000000..fa49b07 +--- /dev/null ++++ b/a.txt +@@ -0,0 +1 @@ ++new file +diff --git a/a.txt b/a.txt +index fa49b07..01c887f 100644 +--- a/a.txt ++++ b/a.txt +@@ -1 +1 @@ +-new file ++edit file +` + twoDiffs := append(append([]byte{}, diff...), diff2...) + base, err := repo.repo.BaseForDiff(twoDiffs, &debugtracer.TestTracer{T: t}) + require.NoError(t, err) require.Len(t, base, 2) }) + t.Run("empty patch", func(t *testing.T) { + base, err := repo.repo.BaseForDiff([]byte{}, &debugtracer.TestTracer{T: t}) + require.NoError(t, err) + require.Nil(t, base) + }) } |
