diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2026-01-12 22:47:10 +0100 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2026-01-13 14:18:14 +0000 |
| commit | 393201ac23707dff38037f4961f955db7e413b67 (patch) | |
| tree | ba9ad0823a72fcfb92dd90fa6e8a215c39076e4d | |
| parent | 85894026b1b003844e8fda94056d80d88294b0be (diff) | |
syz-cluster: prioritize blob-based base commits
Consider Cc'd mailing lists when selecting the exact base commit.
Among the base commits determined based on blob sha value from the git
patch, first select the ones that match both the trees of the Cc'd
subsystems and their primary branches.
If it gives no exact match, select a base commit that comes from a tree
of a Cc'd subsystem. As fallback, take any subsystem tree.
This should prevent valid, but suprising patch series triage results.
| -rw-r--r-- | syz-cluster/pkg/triage/commit.go | 37 | ||||
| -rw-r--r-- | syz-cluster/pkg/triage/commit_test.go | 74 | ||||
| -rw-r--r-- | syz-cluster/pkg/triage/git.go | 8 | ||||
| -rw-r--r-- | syz-cluster/pkg/triage/tree.go | 11 | ||||
| -rw-r--r-- | syz-cluster/pkg/triage/tree_test.go | 12 | ||||
| -rw-r--r-- | syz-cluster/workflow/triage-step/main.go | 41 |
6 files changed, 145 insertions, 38 deletions
diff --git a/syz-cluster/pkg/triage/commit.go b/syz-cluster/pkg/triage/commit.go index 53229e32c..6d8644623 100644 --- a/syz-cluster/pkg/triage/commit.go +++ b/syz-cluster/pkg/triage/commit.go @@ -80,3 +80,40 @@ func (cs *CommitSelector) Select(series *api.Series, tree *api.Tree, lastBuild * } return SelectResult{Reason: reasonNotApplies}, nil } + +func FromBaseCommits(series *api.Series, baseCommits []*vcs.BaseCommit, trees []*api.Tree) (*api.Tree, string) { + // Technically, any one of baseCommits could be a good match. + // However, the developers have their own expectations regarding + // what tree and what branch are actually preferred there. + // So, among baseCommits, we still give preference to those that + // align with the mailing lists Cc'd by the patch series. + tree, commit := bestCommit(baseCommits, SelectTrees(series, trees)) + if tree != nil { + return tree, commit + } + return bestCommit(baseCommits, trees) +} + +func bestCommit(baseCommits []*vcs.BaseCommit, trees []*api.Tree) (*api.Tree, string) { + retTreeIdx, retSameBranch, retCommit := -1, false, "" + for _, commit := range baseCommits { + for _, commitBranch := range commit.Branches { + treeIdx, branch := FindTree(trees, commitBranch) + if treeIdx < 0 { + continue + } + sameBranch := branch == trees[treeIdx].Branch + // If, for the same tree, we also have matched the branch, even better. + if retTreeIdx < 0 || treeIdx < retTreeIdx || + treeIdx == retTreeIdx && !retSameBranch && sameBranch { + retTreeIdx = treeIdx + retSameBranch = sameBranch + retCommit = commit.Hash + } + } + } + if retTreeIdx < 0 { + return nil, "" + } + return trees[retTreeIdx], retCommit +} diff --git a/syz-cluster/pkg/triage/commit_test.go b/syz-cluster/pkg/triage/commit_test.go index d510d5b9b..80f6b99b2 100644 --- a/syz-cluster/pkg/triage/commit_test.go +++ b/syz-cluster/pkg/triage/commit_test.go @@ -87,6 +87,80 @@ func TestCommitSelector(t *testing.T) { } } +func TestFromBaseCommits(t *testing.T) { + trees := []*api.Tree{ + { + Name: "A", + Branch: "master", + EmailLists: []string{"list_A"}, + }, + { + Name: "B", + Branch: "master", + EmailLists: []string{"list_B"}, + }, + { + Name: "C", + Branch: "main", + EmailLists: []string{"list_C"}, + }, + { + Name: "D", + Branch: "main", + EmailLists: nil, + }, + } + commits := []*vcs.BaseCommit{ + { + Commit: &vcs.Commit{Hash: "first"}, + Branches: []string{"C/main"}, + }, + { + Commit: &vcs.Commit{Hash: "second"}, + Branches: []string{"A/other", "B/other"}, + }, + { + Commit: &vcs.Commit{Hash: "third"}, + Branches: []string{"A/master", "A/other"}, + }, + } + t.Run("best branch", func(t *testing.T) { + tree, commit := FromBaseCommits(&api.Series{ + Cc: []string{"list_A"}, + }, commits, trees) + assert.Equal(t, "A", tree.Name) + assert.Equal(t, "third", commit) + }) + t.Run("best tree", func(t *testing.T) { + // Even though C/main matches perfectly, there's a commit + // in the higher prio tree B. + tree, commit := FromBaseCommits(&api.Series{ + Cc: []string{"list_B", "list_C"}, + }, commits, trees) + assert.Equal(t, "B", tree.Name) + assert.Equal(t, "second", commit) + }) + t.Run("any tree", func(t *testing.T) { + // If no trees matching by Cc'd list are in the base commit list, + // consider all trees. + commits := []*vcs.BaseCommit{ + { + Commit: &vcs.Commit{Hash: "first"}, + Branches: []string{"B/main"}, + }, + { + Commit: &vcs.Commit{Hash: "second"}, + Branches: []string{"C/main"}, + }, + } + tree, commit := FromBaseCommits(&api.Series{ + Cc: []string{"list_A"}, + }, commits, trees) + assert.Equal(t, "B", tree.Name) + assert.Equal(t, "first", commit) + }) +} + func date(date string) time.Time { t, err := time.Parse("2006-Jan-02", date) if err != nil { diff --git a/syz-cluster/pkg/triage/git.go b/syz-cluster/pkg/triage/git.go index fb23a90b3..3119997f6 100644 --- a/syz-cluster/pkg/triage/git.go +++ b/syz-cluster/pkg/triage/git.go @@ -57,10 +57,6 @@ func (ops *GitTreeOps) ApplySeries(commit string, patches [][]byte) error { return nil } -func (ops *GitTreeOps) BaseForDiff(patch []byte, tracer debugtracer.DebugTracer) (*vcs.BaseCommit, error) { - list, err := ops.Git.BaseForDiff(patch, tracer) - if len(list) == 0 || err != nil { - return nil, err - } - return list[0], nil +func (ops *GitTreeOps) BaseForDiff(patch []byte, tracer debugtracer.DebugTracer) ([]*vcs.BaseCommit, error) { + return ops.Git.BaseForDiff(patch, tracer) } diff --git a/syz-cluster/pkg/triage/tree.go b/syz-cluster/pkg/triage/tree.go index 5d75c380a..b9f87b073 100644 --- a/syz-cluster/pkg/triage/tree.go +++ b/syz-cluster/pkg/triage/tree.go @@ -46,11 +46,12 @@ func SelectTrees(series *api.Series, trees []*api.Tree) []*api.Tree { return result } -func TreeFromBranch(trees []*api.Tree, branch string) *api.Tree { - for _, tree := range trees { - if strings.HasPrefix(branch, tree.Name+"/") { - return tree +func FindTree(trees []*api.Tree, branch string) (int, string) { + for idx, tree := range trees { + branchName, ok := strings.CutPrefix(branch, tree.Name+"/") + if ok { + return idx, branchName } } - return nil + return -1, "" } diff --git a/syz-cluster/pkg/triage/tree_test.go b/syz-cluster/pkg/triage/tree_test.go index 92d3ce014..ef39ea72e 100644 --- a/syz-cluster/pkg/triage/tree_test.go +++ b/syz-cluster/pkg/triage/tree_test.go @@ -76,7 +76,13 @@ func TestSelectTrees(t *testing.T) { } func TestTreeFromBranch(t *testing.T) { - treeA, treeB := &api.Tree{Name: "a"}, &api.Tree{Name: "b"} - assert.Equal(t, treeA, TreeFromBranch([]*api.Tree{treeA, treeB}, "a/some_branch")) - assert.Equal(t, treeB, TreeFromBranch([]*api.Tree{treeA, treeB}, "b/some_branch")) + trees := []*api.Tree{{Name: "a"}, {Name: "b"}} + treeIdx, branch := FindTree(trees, "a/some_branch") + assert.Equal(t, 0, treeIdx) + assert.Equal(t, "some_branch", branch) + treeIdx, branch = FindTree(trees, "b/some_branch") + assert.Equal(t, 1, treeIdx) + assert.Equal(t, "some_branch", branch) + treeIdx, _ = FindTree(trees, "c/some_branch") + assert.Equal(t, -1, treeIdx) } diff --git a/syz-cluster/workflow/triage-step/main.go b/syz-cluster/workflow/triage-step/main.go index ff2b51d26..042409f97 100644 --- a/syz-cluster/workflow/triage-step/main.go +++ b/syz-cluster/workflow/triage-step/main.go @@ -79,12 +79,6 @@ func (triager *seriesTriager) GetVerdict(ctx context.Context, sessionID string) if err != nil { return nil, fmt.Errorf("failed to query trees: %w", err) } - selectedTrees := triage.SelectTrees(series, treesResp.Trees) - if len(selectedTrees) == 0 { - return &api.TriageResult{ - SkipReason: "no suitable base kernel trees found", - }, nil - } fuzzConfigs := triage.MergeKernelFuzzConfigs(triage.SelectFuzzConfigs(series, treesResp.FuzzTargets)) if len(fuzzConfigs) == 0 { return &api.TriageResult{ @@ -93,7 +87,7 @@ func (triager *seriesTriager) GetVerdict(ctx context.Context, sessionID string) } ret := &api.TriageResult{} for _, campaign := range fuzzConfigs { - fuzzTask, err := triager.prepareFuzzingTask(ctx, series, selectedTrees, campaign) + fuzzTask, err := triager.prepareFuzzingTask(ctx, series, treesResp.Trees, campaign) var skipErr *SkipTriageError if errors.As(err, &skipErr) { ret.SkipReason = skipErr.Reason.Error() @@ -123,7 +117,7 @@ func (triager *seriesTriager) prepareFuzzingTask(ctx context.Context, series *ap } } if result != nil { - triager.Log("continuing with %+v", result) + triager.Log("continuing with %v in %v", result.Commit, result.Tree.Name) base := api.BuildRequest{ TreeName: result.Tree.Name, TreeURL: result.Tree.URL, @@ -158,31 +152,30 @@ func (triager *seriesTriager) selectFromBlobs(series *api.Series, trees []*api.T diff = append(diff, patch.Body...) diff = append(diff, '\n') } - base, err := triager.ops.BaseForDiff(diff, triager.DebugTracer) + baseList, err := triager.ops.BaseForDiff(diff, triager.DebugTracer) if err != nil { return nil, err - } else if base == nil { + } + tree, commit := triage.FromBaseCommits(series, baseList, trees) + if tree == nil { triager.Log("no candidate base commit is found") return nil, nil } - for _, branch := range base.Branches { - tree := triage.TreeFromBranch(trees, branch) - if tree != nil { - return &SelectResult{ - Tree: tree, - Commit: base.Hash, - Arch: fuzzArch, - }, nil - } - } - triager.Log("cannot identify the tree from %q", base.Branches) - return nil, nil + return &SelectResult{ + Tree: tree, + Commit: commit, + Arch: fuzzArch, + }, nil } func (triager *seriesTriager) selectFromList(ctx context.Context, series *api.Series, trees []*api.Tree, target *triage.MergedFuzzConfig) (*SelectResult, error) { - skipErr := SkipError("empty tree list") - for _, tree := range trees { + selectedTrees := triage.SelectTrees(series, trees) + if len(selectedTrees) == 0 { + return nil, SkipError("no suitable base kernel trees found") + } + var skipErr error + for _, tree := range selectedTrees { triager.Log("considering tree %q", tree.Name) lastBuild, err := triager.client.LastBuild(ctx, &api.LastBuildReq{ Arch: fuzzArch, |
