aboutsummaryrefslogtreecommitdiffstats
path: root/pkg
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2025-06-23 15:27:39 +0200
committerAleksandr Nogikh <nogikh@google.com>2025-06-26 12:49:47 +0000
commitc3f8bb06ed4d6ad6211729efcd0d5aa4a26d5c4d (patch)
treef6f96131b5eb73e696dd87cf812b5756e556d106 /pkg
parent5fd74b4cc1dd3fa8867f53dbbb67c950188250de (diff)
pkg/repro: validate the result
The whole pkg/repro algorithm is very sensitive to random kernel crashes, yet all other parts of the system rely on pkg/repro reproducers being reliable enough to draw meaningful conclusions from running them. A single unrelated kernel crash during repro extraction may divert the whole process since all the checks we do during the process (e.g. during minimization or when we drop prog opts) assume that if the kernel didn't crash, it was due to the fact that the removed part was essential for reproduction, and not due to the fact that our reproducer is already broken. Since such problem may happen at any moment, let's do a single validation check at the very end of repro generation. Overall, these cases are not super frequent, so it's not worth it to re-check every step. Calculate the reliability score of thre reproducer and use a 15% default cut-off for flaky results.
Diffstat (limited to 'pkg')
-rw-r--r--pkg/repro/repro.go43
-rw-r--r--pkg/repro/repro_test.go107
2 files changed, 139 insertions, 11 deletions
diff --git a/pkg/repro/repro.go b/pkg/repro/repro.go
index 25bfbc3dd..b10d29784 100644
--- a/pkg/repro/repro.go
+++ b/pkg/repro/repro.go
@@ -33,6 +33,9 @@ type Result struct {
// Information about the final (non-symbolized) crash that we reproduced.
// Can be different from what we started reproducing.
Report *report.Report
+ // A very rough estimate of the probability with which the resulting syz
+ // reproducer crashes the kernel.
+ Reliability float64
}
type Stats struct {
@@ -263,9 +266,49 @@ func (ctx *reproContext) repro() (*Result, error) {
}
}
}
+ // Validate the resulting reproducer - a random rare kernel crash might have diverted the process.
+ res.Reliability, err = calculateReliability(func() (bool, error) {
+ ret, err := ctx.testProg(res.Prog, res.Duration, res.Opts, false)
+ if err != nil {
+ return false, err
+ }
+ ctx.reproLogf(2, "validation run: crashed=%v", ret.Crashed)
+ return ret.Crashed, nil
+ })
+ if err != nil {
+ ctx.reproLogf(2, "could not calculate reliability, err=%v", err)
+ return nil, err
+ }
+
+ const minReliability = 0.15
+ if res.Reliability < minReliability {
+ ctx.reproLogf(1, "reproducer is too unreliable: %.2f", res.Reliability)
+ return nil, err
+ }
+
return res, nil
}
+func calculateReliability(cb func() (bool, error)) (float64, error) {
+ const (
+ maxRuns = 10
+ enoughOK = 3
+ )
+ total := 0
+ okCount := 0
+ for i := 0; i < maxRuns && okCount < enoughOK; i++ {
+ total++
+ ok, err := cb()
+ if err != nil {
+ return 0, err
+ }
+ if ok {
+ okCount++
+ }
+ }
+ return float64(okCount) / float64(total), nil
+}
+
func (ctx *reproContext) extractProg(entries []*prog.LogEntry) (*Result, error) {
ctx.reproLogf(2, "extracting reproducer from %v programs", len(entries))
start := time.Now()
diff --git a/pkg/repro/repro_test.go b/pkg/repro/repro_test.go
index b9a5cb19f..21e7782f0 100644
--- a/pkg/repro/repro_test.go
+++ b/pkg/repro/repro_test.go
@@ -8,7 +8,9 @@ import (
"fmt"
"math/rand"
"regexp"
+ "sort"
"testing"
+ "time"
"github.com/google/go-cmp/cmp"
"github.com/google/syzkaller/pkg/csource"
@@ -19,6 +21,8 @@ import (
"github.com/google/syzkaller/pkg/testutil"
"github.com/google/syzkaller/prog"
"github.com/google/syzkaller/sys/targets"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
)
func initTest(t *testing.T) (*rand.Rand, int) {
@@ -138,13 +142,14 @@ func runTestRepro(t *testing.T, log string, exec execInterface) (*Result, *Stats
if err != nil {
t.Fatal(err)
}
- return runInner(context.Background(), []byte(log), Environment{
+ env := Environment{
Config: mgrConfig,
Features: flatrpc.AllFeatures,
Fast: false,
Reporter: reporter,
logf: t.Logf,
- }, exec)
+ }
+ return runInner(context.Background(), []byte(log), env, exec)
}
const testReproLog = `
@@ -165,16 +170,26 @@ getpid()
// Only crash if `pause()` is followed by `alarm(0xa)`.
var testCrashCondition = regexp.MustCompile(`(?s)pause\(\).*alarm\(0xa\)`)
+var (
+ expectedReproducer = "pause()\nalarm(0xa)\n"
+)
+
+func fakeCrashResult(title string) *instance.RunResult {
+ ret := &instance.RunResult{}
+ if title != "" {
+ ret.Report = &report.Report{
+ Title: title,
+ }
+ }
+ return ret
+}
+
func testExecRunner(log []byte) (*instance.RunResult, error) {
crash := testCrashCondition.Match(log)
if crash {
- ret := &instance.RunResult{}
- ret.Report = &report.Report{
- Title: `some crash`,
- }
- return ret, nil
+ return fakeCrashResult("crashed"), nil
}
- return &instance.RunResult{}, nil
+ return fakeCrashResult(""), nil
}
// Just a pkg/repro smoke test: check that we can extract a two-call reproducer.
@@ -186,9 +201,7 @@ func TestPlainRepro(t *testing.T) {
if err != nil {
t.Fatal(err)
}
- if diff := cmp.Diff(`pause()
-alarm(0xa)
-`, string(result.Prog.Serialize())); diff != "" {
+ if diff := cmp.Diff(expectedReproducer, string(result.Prog.Serialize())); diff != "" {
t.Fatal(diff)
}
}
@@ -263,3 +276,75 @@ alarm(0xa)
t.Fatal(diff)
}
}
+
+func TestFlakyCrashes(t *testing.T) {
+ t.Parallel()
+ // A single flaky crash may divert the whole process.
+ // Let's check if the Reliability score provides a reasonable cut-off for such fake results.
+
+ r := rand.New(testutil.RandSource(t))
+ iters := 250
+
+ success := 0
+ for i := 0; i < iters; i++ {
+ counter, lastFake := 0, 0
+ result, _, err := runTestRepro(t, testReproLog, &testExecInterface{
+ run: func(log []byte) (*instance.RunResult, error) {
+ // Throw in a fake crash with 5% probability,
+ // but not more often than once in 10 consecutive runs.
+ counter++
+ if r.Intn(20) == 0 && counter-lastFake >= 10 {
+ lastFake = counter
+ return fakeCrashResult("flaky crash"), nil
+ }
+ return testExecRunner(log)
+ },
+ })
+ // It should either find nothing (=> validation worked) or find the exact reproducer.
+ require.NoError(t, err)
+ if result == nil {
+ continue
+ }
+ success++
+ assert.Equal(t, expectedReproducer, string(result.Prog.Serialize()), "reliability: %.2f", result.Reliability)
+ }
+
+ // There was no deep reasoning behind the success rate. It's not 100% due to flakiness,
+ // but there should still be some significant number of success cases.
+ assert.Greater(t, success, iters/3*2, "must succeed >2/3 of cases")
+}
+
+func BenchmarkCalculateReliability(b *testing.B) {
+ r := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+ for base := 0.0; base < 1.0; base += 0.1 {
+ b.Run(fmt.Sprintf("p=%.2f", base), func(b *testing.B) {
+ if b.N == 0 {
+ return
+ }
+ neededRuns := make([]int, 0, b.N)
+ reliability := make([]float64, 0, b.N)
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ runs := 0
+ ret, err := calculateReliability(func() (bool, error) {
+ runs++
+ return r.Float64() < base, nil
+ })
+ require.NoError(b, err)
+ neededRuns = append(neededRuns, runs)
+ reliability = append(reliability, ret)
+ }
+ b.StopTimer()
+
+ sort.Ints(neededRuns)
+ b.ReportMetric(float64(neededRuns[len(neededRuns)/2]), "runs")
+
+ sort.Float64s(reliability)
+ b.ReportMetric(reliability[len(reliability)/10], "p10")
+ b.ReportMetric(reliability[len(reliability)/2], "median")
+ b.ReportMetric(reliability[len(reliability)*9/10], "p90")
+ })
+ }
+}