diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2025-06-23 15:27:39 +0200 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2025-06-26 12:49:47 +0000 |
| commit | c3f8bb06ed4d6ad6211729efcd0d5aa4a26d5c4d (patch) | |
| tree | f6f96131b5eb73e696dd87cf812b5756e556d106 /pkg | |
| parent | 5fd74b4cc1dd3fa8867f53dbbb67c950188250de (diff) | |
pkg/repro: validate the result
The whole pkg/repro algorithm is very sensitive to random kernel
crashes, yet all other parts of the system rely on pkg/repro reproducers
being reliable enough to draw meaningful conclusions from running them.
A single unrelated kernel crash during repro extraction may divert the
whole process since all the checks we do during the process (e.g. during
minimization or when we drop prog opts) assume that if the kernel didn't
crash, it was due to the fact that the removed part was essential for
reproduction, and not due to the fact that our reproducer is already
broken.
Since such problem may happen at any moment, let's do a single
validation check at the very end of repro generation. Overall, these
cases are not super frequent, so it's not worth it to re-check every
step.
Calculate the reliability score of thre reproducer and use a 15% default
cut-off for flaky results.
Diffstat (limited to 'pkg')
| -rw-r--r-- | pkg/repro/repro.go | 43 | ||||
| -rw-r--r-- | pkg/repro/repro_test.go | 107 |
2 files changed, 139 insertions, 11 deletions
diff --git a/pkg/repro/repro.go b/pkg/repro/repro.go index 25bfbc3dd..b10d29784 100644 --- a/pkg/repro/repro.go +++ b/pkg/repro/repro.go @@ -33,6 +33,9 @@ type Result struct { // Information about the final (non-symbolized) crash that we reproduced. // Can be different from what we started reproducing. Report *report.Report + // A very rough estimate of the probability with which the resulting syz + // reproducer crashes the kernel. + Reliability float64 } type Stats struct { @@ -263,9 +266,49 @@ func (ctx *reproContext) repro() (*Result, error) { } } } + // Validate the resulting reproducer - a random rare kernel crash might have diverted the process. + res.Reliability, err = calculateReliability(func() (bool, error) { + ret, err := ctx.testProg(res.Prog, res.Duration, res.Opts, false) + if err != nil { + return false, err + } + ctx.reproLogf(2, "validation run: crashed=%v", ret.Crashed) + return ret.Crashed, nil + }) + if err != nil { + ctx.reproLogf(2, "could not calculate reliability, err=%v", err) + return nil, err + } + + const minReliability = 0.15 + if res.Reliability < minReliability { + ctx.reproLogf(1, "reproducer is too unreliable: %.2f", res.Reliability) + return nil, err + } + return res, nil } +func calculateReliability(cb func() (bool, error)) (float64, error) { + const ( + maxRuns = 10 + enoughOK = 3 + ) + total := 0 + okCount := 0 + for i := 0; i < maxRuns && okCount < enoughOK; i++ { + total++ + ok, err := cb() + if err != nil { + return 0, err + } + if ok { + okCount++ + } + } + return float64(okCount) / float64(total), nil +} + func (ctx *reproContext) extractProg(entries []*prog.LogEntry) (*Result, error) { ctx.reproLogf(2, "extracting reproducer from %v programs", len(entries)) start := time.Now() diff --git a/pkg/repro/repro_test.go b/pkg/repro/repro_test.go index b9a5cb19f..21e7782f0 100644 --- a/pkg/repro/repro_test.go +++ b/pkg/repro/repro_test.go @@ -8,7 +8,9 @@ import ( "fmt" "math/rand" "regexp" + "sort" "testing" + "time" "github.com/google/go-cmp/cmp" "github.com/google/syzkaller/pkg/csource" @@ -19,6 +21,8 @@ import ( "github.com/google/syzkaller/pkg/testutil" "github.com/google/syzkaller/prog" "github.com/google/syzkaller/sys/targets" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func initTest(t *testing.T) (*rand.Rand, int) { @@ -138,13 +142,14 @@ func runTestRepro(t *testing.T, log string, exec execInterface) (*Result, *Stats if err != nil { t.Fatal(err) } - return runInner(context.Background(), []byte(log), Environment{ + env := Environment{ Config: mgrConfig, Features: flatrpc.AllFeatures, Fast: false, Reporter: reporter, logf: t.Logf, - }, exec) + } + return runInner(context.Background(), []byte(log), env, exec) } const testReproLog = ` @@ -165,16 +170,26 @@ getpid() // Only crash if `pause()` is followed by `alarm(0xa)`. var testCrashCondition = regexp.MustCompile(`(?s)pause\(\).*alarm\(0xa\)`) +var ( + expectedReproducer = "pause()\nalarm(0xa)\n" +) + +func fakeCrashResult(title string) *instance.RunResult { + ret := &instance.RunResult{} + if title != "" { + ret.Report = &report.Report{ + Title: title, + } + } + return ret +} + func testExecRunner(log []byte) (*instance.RunResult, error) { crash := testCrashCondition.Match(log) if crash { - ret := &instance.RunResult{} - ret.Report = &report.Report{ - Title: `some crash`, - } - return ret, nil + return fakeCrashResult("crashed"), nil } - return &instance.RunResult{}, nil + return fakeCrashResult(""), nil } // Just a pkg/repro smoke test: check that we can extract a two-call reproducer. @@ -186,9 +201,7 @@ func TestPlainRepro(t *testing.T) { if err != nil { t.Fatal(err) } - if diff := cmp.Diff(`pause() -alarm(0xa) -`, string(result.Prog.Serialize())); diff != "" { + if diff := cmp.Diff(expectedReproducer, string(result.Prog.Serialize())); diff != "" { t.Fatal(diff) } } @@ -263,3 +276,75 @@ alarm(0xa) t.Fatal(diff) } } + +func TestFlakyCrashes(t *testing.T) { + t.Parallel() + // A single flaky crash may divert the whole process. + // Let's check if the Reliability score provides a reasonable cut-off for such fake results. + + r := rand.New(testutil.RandSource(t)) + iters := 250 + + success := 0 + for i := 0; i < iters; i++ { + counter, lastFake := 0, 0 + result, _, err := runTestRepro(t, testReproLog, &testExecInterface{ + run: func(log []byte) (*instance.RunResult, error) { + // Throw in a fake crash with 5% probability, + // but not more often than once in 10 consecutive runs. + counter++ + if r.Intn(20) == 0 && counter-lastFake >= 10 { + lastFake = counter + return fakeCrashResult("flaky crash"), nil + } + return testExecRunner(log) + }, + }) + // It should either find nothing (=> validation worked) or find the exact reproducer. + require.NoError(t, err) + if result == nil { + continue + } + success++ + assert.Equal(t, expectedReproducer, string(result.Prog.Serialize()), "reliability: %.2f", result.Reliability) + } + + // There was no deep reasoning behind the success rate. It's not 100% due to flakiness, + // but there should still be some significant number of success cases. + assert.Greater(t, success, iters/3*2, "must succeed >2/3 of cases") +} + +func BenchmarkCalculateReliability(b *testing.B) { + r := rand.New(rand.NewSource(time.Now().UnixNano())) + + for base := 0.0; base < 1.0; base += 0.1 { + b.Run(fmt.Sprintf("p=%.2f", base), func(b *testing.B) { + if b.N == 0 { + return + } + neededRuns := make([]int, 0, b.N) + reliability := make([]float64, 0, b.N) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + runs := 0 + ret, err := calculateReliability(func() (bool, error) { + runs++ + return r.Float64() < base, nil + }) + require.NoError(b, err) + neededRuns = append(neededRuns, runs) + reliability = append(reliability, ret) + } + b.StopTimer() + + sort.Ints(neededRuns) + b.ReportMetric(float64(neededRuns[len(neededRuns)/2]), "runs") + + sort.Float64s(reliability) + b.ReportMetric(reliability[len(reliability)/10], "p10") + b.ReportMetric(reliability[len(reliability)/2], "median") + b.ReportMetric(reliability[len(reliability)*9/10], "p90") + }) + } +} |
