From ccdcad97ffb81efdf1d1c0a06bae8b57da28991d Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Thu, 19 Feb 2026 15:28:48 +0000 Subject: syz-cluster: deduplicate findings during reporting Now that the same bug could be found on in multiple session tests (e.g. during fuzzing and during repro retesting), ensure that we do not report it twice. Add a test and fix the logic. --- syz-cluster/pkg/controller/api_test.go | 6 +-- syz-cluster/pkg/reporter/dedup_test.go | 89 ++++++++++++++++++++++++++++++++++ syz-cluster/pkg/service/finding.go | 66 ++++++++++++++++++++++--- 3 files changed, 150 insertions(+), 11 deletions(-) create mode 100644 syz-cluster/pkg/reporter/dedup_test.go diff --git a/syz-cluster/pkg/controller/api_test.go b/syz-cluster/pkg/controller/api_test.go index 7818c95f7..297b20b3f 100644 --- a/syz-cluster/pkg/controller/api_test.go +++ b/syz-cluster/pkg/controller/api_test.go @@ -283,12 +283,12 @@ func TestAPIListPreviousFindings(t *testing.T) { finding1, err := client.GetFinding(ctx, list[0]) require.NoError(t, err) - assert.Equal(t, "Crash in foo", finding1.Title) - assert.Equal(t, idsV2.SessionID, finding1.SessionID) + assert.Equal(t, "Crash in bar", finding1.Title) + assert.Equal(t, idsV1.SessionID, finding1.SessionID) finding2, err := client.GetFinding(ctx, list[1]) require.NoError(t, err) - assert.Equal(t, "Crash in bar", finding2.Title) + assert.Equal(t, "Crash in foo", finding2.Title) assert.Equal(t, idsV1.SessionID, finding2.SessionID) list, err = client.ListPreviousFindings(ctx, &api.ListPreviousFindingsReq{ diff --git a/syz-cluster/pkg/reporter/dedup_test.go b/syz-cluster/pkg/reporter/dedup_test.go new file mode 100644 index 000000000..17a375557 --- /dev/null +++ b/syz-cluster/pkg/reporter/dedup_test.go @@ -0,0 +1,89 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package reporter + +import ( + "testing" + + "github.com/google/syzkaller/syz-cluster/pkg/api" + "github.com/google/syzkaller/syz-cluster/pkg/app" + "github.com/google/syzkaller/syz-cluster/pkg/controller" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDeduplicationInReport(t *testing.T) { + env, ctx := app.TestEnvironment(t) + client := controller.TestServer(t, env) + series := controller.DummySeries() + + _, err := client.UploadSeries(ctx, series) + require.NoError(t, err) + + ulSession, err := client.UploadSession(ctx, &api.NewSession{ + ExtID: series.ExtID, + }) + require.NoError(t, err) + sessionID := ulSession.ID + + build := controller.DummyBuild() + ulBuild, err := client.UploadBuild(ctx, &api.UploadBuildReq{ + Build: *build, + }) + require.NoError(t, err) + buildID := ulBuild.ID + + err = client.UploadSessionTest(ctx, &api.SessionTest{ + SessionID: sessionID, + TestName: "step-1", + PatchedBuildID: buildID, + BaseBuildID: buildID, + Result: api.TestPassed, + }) + require.NoError(t, err) + + err = client.UploadFinding(ctx, &api.RawFinding{ + SessionID: sessionID, + TestName: "step-1", + Title: "duplicate-title", + Log: []byte("log1"), + Report: []byte("report1"), + }) + require.NoError(t, err) + + err = client.UploadSessionTest(ctx, &api.SessionTest{ + SessionID: sessionID, + TestName: "step-2", + PatchedBuildID: buildID, + BaseBuildID: buildID, + Result: api.TestPassed, + }) + require.NoError(t, err) + + err = client.UploadFinding(ctx, &api.RawFinding{ + SessionID: sessionID, + TestName: "step-2", + Title: "duplicate-title", + Log: []byte("log2"), + Report: []byte("report2"), + }) + require.NoError(t, err) + + controller.MarkSessionFinished(t, env, sessionID) + + generator := NewGenerator(env) + err = generator.Process(ctx, 1) + require.NoError(t, err) + + reportClient := TestServer(t, env) + nextResp, err := reportClient.GetNextReport(ctx, api.LKMLReporter) + require.NoError(t, err) + require.NotNil(t, nextResp.Report) + + require.Len(t, nextResp.Report.Findings, 1) + f := nextResp.Report.Findings[0] + assert.Equal(t, "duplicate-title", f.Title) + assert.Equal(t, "report1", f.Report) + require.NoError(t, reportClient.ConfirmReport(ctx, nextResp.Report.ID)) +} diff --git a/syz-cluster/pkg/service/finding.go b/syz-cluster/pkg/service/finding.go index bfa7d13f1..d8d7c46ca 100644 --- a/syz-cluster/pkg/service/finding.go +++ b/syz-cluster/pkg/service/finding.go @@ -9,6 +9,7 @@ import ( "errors" "fmt" "slices" + "strings" "time" "cloud.google.com/go/spanner" @@ -118,6 +119,7 @@ func (s *FindingService) List(ctx context.Context, sessionID string, limit int) if err != nil { return nil, fmt.Errorf("failed to query the list: %w", err) } + list = deduplicateFindings(list) tests, err := s.sessionTestRepo.BySession(ctx, sessionID) if err != nil { return nil, fmt.Errorf("failed to query session tests: %w", err) @@ -168,7 +170,7 @@ func (s *FindingService) ListPreviousFindings(ctx context.Context, req *api.List return nil, fmt.Errorf("failed to list all versions: %w", err) } var ret []string - seenTitles := map[string]bool{} + var allFindings []*db.Finding // Prefer newer versions. for _, ver := range slices.Backward(allVersions) { if ver.ID == req.SeriesID { @@ -199,17 +201,17 @@ func (s *FindingService) ListPreviousFindings(ctx context.Context, req *api.List if err != nil { return nil, fmt.Errorf("failed to list findings for session %s: %w", sessionID, err) } - for _, f := range findings { - if !f.InvalidatedAt.IsNull() { + for _, finding := range findings { + if !finding.InvalidatedAt.IsNull() { continue } - if seenTitles[f.Title] { - continue - } - seenTitles[f.Title] = true - ret = append(ret, f.ID) + allFindings = append(allFindings, finding) } } + allFindings = deduplicateFindings(allFindings) + for _, f := range allFindings { + ret = append(ret, f.ID) + } return ret, nil } @@ -280,3 +282,51 @@ func (s *FindingService) matchesPrevFindingsReq( } return true, nil } + +func deduplicateFindings(findings []*db.Finding) []*db.Finding { + grouped := make(map[string][]*db.Finding) + for _, f := range findings { + grouped[f.Title] = append(grouped[f.Title], f) + } + + var ret []*db.Finding + for _, group := range grouped { + best := group[0] + for _, f := range group[1:] { + if isBetterFinding(f, best) { + best = f + } + } + ret = append(ret, best) + } + // Ensure the result is deterministic. + slices.SortFunc(ret, func(a, b *db.Finding) int { + return strings.Compare(a.Title, b.Title) + }) + return ret +} + +func isBetterFinding(newF, oldF *db.Finding) bool { + // 1. Prefer C repro. + if newF.CReproURI != "" && oldF.CReproURI == "" { + return true + } + if newF.CReproURI == "" && oldF.CReproURI != "" { + return false + } + // 2. Prefer Syz repro. + if newF.SyzReproURI != "" && oldF.SyzReproURI == "" { + return true + } + if newF.SyzReproURI == "" && oldF.SyzReproURI != "" { + return false + } + // 3. First reporterd come first. + if newF.CreatedAt.Valid != oldF.CreatedAt.Valid { + return newF.CreatedAt.Valid + } + if newF.CreatedAt.Valid { + return newF.CreatedAt.Time.Before(oldF.CreatedAt.Time) + } + return newF.ID < oldF.ID +} -- cgit mrf-deployment