aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2026-01-15 16:27:44 +0100
committerAleksandr Nogikh <nogikh@google.com>2026-01-16 10:03:21 +0000
commit20d37d280693b1646e3bd33ab3d8a0a947830a8e (patch)
tree6f93d858125f8c627f26ca54d0e9809d2c2f1cd8
parent0c2e1647a23d32431208ff218f6ee59cd9c216b5 (diff)
pkg/vcs: be more strict in BaseForDiff
Do not tolerate unknown blob hashes - it means that we are unable to find the correct base commit given the repository. Explicitly ignore newly added files - we definitely won't find their hashes. Explicitly skip malformed patches that won't have any blob hashes - otherwise we could end up with too many candidates and waste too much time.
-rw-r--r--pkg/vcs/git.go16
-rw-r--r--pkg/vcs/git_test.go31
2 files changed, 41 insertions, 6 deletions
diff --git a/pkg/vcs/git.go b/pkg/vcs/git.go
index 8408715d0..8a51b3cfb 100644
--- a/pkg/vcs/git.go
+++ b/pkg/vcs/git.go
@@ -776,17 +776,22 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base
}
var fileNames []string
nameToHash := map[string]string{}
+ ignoreFiles := map[string]struct{}{}
for _, file := range ParseGitDiff(diff) {
if strings.Trim(file.LeftHash, "0") == "" {
- // Newly created file are not of any help here.
+ // Newly created files are not of any help here.
+ ignoreFiles[file.Name] = struct{}{}
+ continue
+ }
+ if _, ignore := ignoreFiles[file.Name]; ignore {
continue
}
if ok, err := git.verifyHash(file.LeftHash); err != nil {
return nil, fmt.Errorf("hash verification failed: %w", err)
} else if !ok {
- // The object is not known in this repository.
- // Ignore it, or otherwise the command will fail.
- continue
+ // The object is not known in this repository, so we won't find the exact base commit.
+ tracer.Log("unknown object %s, stopping base commit search", file.LeftHash)
+ return nil, nil
}
if _, ok := nameToHash[file.Name]; !ok {
// If the diff is actually a concatenation of several diffs, we only
@@ -797,6 +802,9 @@ func (git Git) BaseForDiff(diff []byte, tracer debugtracer.DebugTracer) ([]*Base
args = append(args, "--find-object="+file.LeftHash)
}
tracer.Log("extracted %d left blob hashes", len(nameToHash))
+ if len(nameToHash) == 0 {
+ return nil, nil
+ }
output, err := git.Run(args...)
if err != nil {
return nil, err
diff --git a/pkg/vcs/git_test.go b/pkg/vcs/git_test.go
index 5b5a3e14b..855587263 100644
--- a/pkg/vcs/git_test.go
+++ b/pkg/vcs/git_test.go
@@ -551,8 +551,8 @@ func TestBaseForDiff(t *testing.T) {
assert.Equal(t, commit4.Hash, base[0].Hash)
assert.Equal(t, commit2.Hash, base[1].Hash)
})
- t.Run("ignore unknown objects", func(t *testing.T) {
- // It's okay if the diff contains unknown hashes.
+ t.Run("unknown objects", func(t *testing.T) {
+ // It's not okay if the diff contains unknown hashes.
diff2 := `
diff --git a/b.txt b/b.txt
deleted file mode 100644
@@ -564,6 +564,33 @@ index f70f10e..0000000
twoDiffs := append(append([]byte{}, diff...), diff2...)
base, err := repo.repo.BaseForDiff(twoDiffs, &debugtracer.TestTracer{T: t})
require.NoError(t, err)
+ require.Nil(t, base)
+ })
+ t.Run("ignore new files", func(t *testing.T) {
+ diff2 := `
+diff --git a/a.txt b/a.txt
+new file mode 100644
+index 0000000..fa49b07
+--- /dev/null
++++ b/a.txt
+@@ -0,0 +1 @@
++new file
+diff --git a/a.txt b/a.txt
+index fa49b07..01c887f 100644
+--- a/a.txt
++++ b/a.txt
+@@ -1 +1 @@
+-new file
++edit file
+`
+ twoDiffs := append(append([]byte{}, diff...), diff2...)
+ base, err := repo.repo.BaseForDiff(twoDiffs, &debugtracer.TestTracer{T: t})
+ require.NoError(t, err)
require.Len(t, base, 2)
})
+ t.Run("empty patch", func(t *testing.T) {
+ base, err := repo.repo.BaseForDiff([]byte{}, &debugtracer.TestTracer{T: t})
+ require.NoError(t, err)
+ require.Nil(t, base)
+ })
}