diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2026-02-19 15:28:48 +0000 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2026-03-04 18:19:12 +0000 |
| commit | ccdcad97ffb81efdf1d1c0a06bae8b57da28991d (patch) | |
| tree | 169f1cf0fcdde48927367cbbeae5b5dae092a68e | |
| parent | d51c35ebcaaadf15489b4adc62c951d5e6900a6a (diff) | |
syz-cluster: deduplicate findings during reporting
Now that the same bug could be found on in multiple session tests (e.g.
during fuzzing and during repro retesting), ensure that we do not report
it twice.
Add a test and fix the logic.
| -rw-r--r-- | syz-cluster/pkg/controller/api_test.go | 6 | ||||
| -rw-r--r-- | syz-cluster/pkg/reporter/dedup_test.go | 89 | ||||
| -rw-r--r-- | syz-cluster/pkg/service/finding.go | 66 |
3 files changed, 150 insertions, 11 deletions
diff --git a/syz-cluster/pkg/controller/api_test.go b/syz-cluster/pkg/controller/api_test.go index 7818c95f7..297b20b3f 100644 --- a/syz-cluster/pkg/controller/api_test.go +++ b/syz-cluster/pkg/controller/api_test.go @@ -283,12 +283,12 @@ func TestAPIListPreviousFindings(t *testing.T) { finding1, err := client.GetFinding(ctx, list[0]) require.NoError(t, err) - assert.Equal(t, "Crash in foo", finding1.Title) - assert.Equal(t, idsV2.SessionID, finding1.SessionID) + assert.Equal(t, "Crash in bar", finding1.Title) + assert.Equal(t, idsV1.SessionID, finding1.SessionID) finding2, err := client.GetFinding(ctx, list[1]) require.NoError(t, err) - assert.Equal(t, "Crash in bar", finding2.Title) + assert.Equal(t, "Crash in foo", finding2.Title) assert.Equal(t, idsV1.SessionID, finding2.SessionID) list, err = client.ListPreviousFindings(ctx, &api.ListPreviousFindingsReq{ diff --git a/syz-cluster/pkg/reporter/dedup_test.go b/syz-cluster/pkg/reporter/dedup_test.go new file mode 100644 index 000000000..17a375557 --- /dev/null +++ b/syz-cluster/pkg/reporter/dedup_test.go @@ -0,0 +1,89 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package reporter + +import ( + "testing" + + "github.com/google/syzkaller/syz-cluster/pkg/api" + "github.com/google/syzkaller/syz-cluster/pkg/app" + "github.com/google/syzkaller/syz-cluster/pkg/controller" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDeduplicationInReport(t *testing.T) { + env, ctx := app.TestEnvironment(t) + client := controller.TestServer(t, env) + series := controller.DummySeries() + + _, err := client.UploadSeries(ctx, series) + require.NoError(t, err) + + ulSession, err := client.UploadSession(ctx, &api.NewSession{ + ExtID: series.ExtID, + }) + require.NoError(t, err) + sessionID := ulSession.ID + + build := controller.DummyBuild() + ulBuild, err := client.UploadBuild(ctx, &api.UploadBuildReq{ + Build: *build, + }) + require.NoError(t, err) + buildID := ulBuild.ID + + err = client.UploadSessionTest(ctx, &api.SessionTest{ + SessionID: sessionID, + TestName: "step-1", + PatchedBuildID: buildID, + BaseBuildID: buildID, + Result: api.TestPassed, + }) + require.NoError(t, err) + + err = client.UploadFinding(ctx, &api.RawFinding{ + SessionID: sessionID, + TestName: "step-1", + Title: "duplicate-title", + Log: []byte("log1"), + Report: []byte("report1"), + }) + require.NoError(t, err) + + err = client.UploadSessionTest(ctx, &api.SessionTest{ + SessionID: sessionID, + TestName: "step-2", + PatchedBuildID: buildID, + BaseBuildID: buildID, + Result: api.TestPassed, + }) + require.NoError(t, err) + + err = client.UploadFinding(ctx, &api.RawFinding{ + SessionID: sessionID, + TestName: "step-2", + Title: "duplicate-title", + Log: []byte("log2"), + Report: []byte("report2"), + }) + require.NoError(t, err) + + controller.MarkSessionFinished(t, env, sessionID) + + generator := NewGenerator(env) + err = generator.Process(ctx, 1) + require.NoError(t, err) + + reportClient := TestServer(t, env) + nextResp, err := reportClient.GetNextReport(ctx, api.LKMLReporter) + require.NoError(t, err) + require.NotNil(t, nextResp.Report) + + require.Len(t, nextResp.Report.Findings, 1) + f := nextResp.Report.Findings[0] + assert.Equal(t, "duplicate-title", f.Title) + assert.Equal(t, "report1", f.Report) + require.NoError(t, reportClient.ConfirmReport(ctx, nextResp.Report.ID)) +} diff --git a/syz-cluster/pkg/service/finding.go b/syz-cluster/pkg/service/finding.go index bfa7d13f1..d8d7c46ca 100644 --- a/syz-cluster/pkg/service/finding.go +++ b/syz-cluster/pkg/service/finding.go @@ -9,6 +9,7 @@ import ( "errors" "fmt" "slices" + "strings" "time" "cloud.google.com/go/spanner" @@ -118,6 +119,7 @@ func (s *FindingService) List(ctx context.Context, sessionID string, limit int) if err != nil { return nil, fmt.Errorf("failed to query the list: %w", err) } + list = deduplicateFindings(list) tests, err := s.sessionTestRepo.BySession(ctx, sessionID) if err != nil { return nil, fmt.Errorf("failed to query session tests: %w", err) @@ -168,7 +170,7 @@ func (s *FindingService) ListPreviousFindings(ctx context.Context, req *api.List return nil, fmt.Errorf("failed to list all versions: %w", err) } var ret []string - seenTitles := map[string]bool{} + var allFindings []*db.Finding // Prefer newer versions. for _, ver := range slices.Backward(allVersions) { if ver.ID == req.SeriesID { @@ -199,17 +201,17 @@ func (s *FindingService) ListPreviousFindings(ctx context.Context, req *api.List if err != nil { return nil, fmt.Errorf("failed to list findings for session %s: %w", sessionID, err) } - for _, f := range findings { - if !f.InvalidatedAt.IsNull() { + for _, finding := range findings { + if !finding.InvalidatedAt.IsNull() { continue } - if seenTitles[f.Title] { - continue - } - seenTitles[f.Title] = true - ret = append(ret, f.ID) + allFindings = append(allFindings, finding) } } + allFindings = deduplicateFindings(allFindings) + for _, f := range allFindings { + ret = append(ret, f.ID) + } return ret, nil } @@ -280,3 +282,51 @@ func (s *FindingService) matchesPrevFindingsReq( } return true, nil } + +func deduplicateFindings(findings []*db.Finding) []*db.Finding { + grouped := make(map[string][]*db.Finding) + for _, f := range findings { + grouped[f.Title] = append(grouped[f.Title], f) + } + + var ret []*db.Finding + for _, group := range grouped { + best := group[0] + for _, f := range group[1:] { + if isBetterFinding(f, best) { + best = f + } + } + ret = append(ret, best) + } + // Ensure the result is deterministic. + slices.SortFunc(ret, func(a, b *db.Finding) int { + return strings.Compare(a.Title, b.Title) + }) + return ret +} + +func isBetterFinding(newF, oldF *db.Finding) bool { + // 1. Prefer C repro. + if newF.CReproURI != "" && oldF.CReproURI == "" { + return true + } + if newF.CReproURI == "" && oldF.CReproURI != "" { + return false + } + // 2. Prefer Syz repro. + if newF.SyzReproURI != "" && oldF.SyzReproURI == "" { + return true + } + if newF.SyzReproURI == "" && oldF.SyzReproURI != "" { + return false + } + // 3. First reporterd come first. + if newF.CreatedAt.Valid != oldF.CreatedAt.Valid { + return newF.CreatedAt.Valid + } + if newF.CreatedAt.Valid { + return newF.CreatedAt.Time.Before(oldF.CreatedAt.Time) + } + return newF.ID < oldF.ID +} |
