2 files changed, 139 insertions, 11 deletions
diff --git a/pkg/repro/repro.go b/pkg/repro/repro.go
index 25bfbc3dd..b10d29784 100644
--- a/pkg/repro/repro.go
+++ b/pkg/repro/repro.go
@@ -33,6 +33,9 @@ type Result struct {
 	// Information about the final (non-symbolized) crash that we reproduced.
 	// Can be different from what we started reproducing.
 	Report *report.Report
+	// A very rough estimate of the probability with which the resulting syz
+	// reproducer crashes the kernel.
+	Reliability float64
 }
 
 type Stats struct {
@@ -263,9 +266,49 @@ func (ctx *reproContext) repro() (*Result, error) {
 			}
 		}
 	}
+	// Validate the resulting reproducer - a random rare kernel crash might have diverted the process.
+	res.Reliability, err = calculateReliability(func() (bool, error) {
+		ret, err := ctx.testProg(res.Prog, res.Duration, res.Opts, false)
+		if err != nil {
+			return false, err
+		}
+		ctx.reproLogf(2, "validation run: crashed=%v", ret.Crashed)
+		return ret.Crashed, nil
+	})
+	if err != nil {
+		ctx.reproLogf(2, "could not calculate reliability, err=%v", err)
+		return nil, err
+	}
+
+	const minReliability = 0.15
+	if res.Reliability < minReliability {
+		ctx.reproLogf(1, "reproducer is too unreliable: %.2f", res.Reliability)
+		return nil, err
+	}
+
 	return res, nil
 }
 
+func calculateReliability(cb func() (bool, error)) (float64, error) {
+	const (
+		maxRuns  = 10
+		enoughOK = 3
+	)
+	total := 0
+	okCount := 0
+	for i := 0; i < maxRuns && okCount < enoughOK; i++ {
+		total++
+		ok, err := cb()
+		if err != nil {
+			return 0, err
+		}
+		if ok {
+			okCount++
+		}
+	}
+	return float64(okCount) / float64(total), nil
+}
+
 func (ctx *reproContext) extractProg(entries []*prog.LogEntry) (*Result, error) {
 	ctx.reproLogf(2, "extracting reproducer from %v programs", len(entries))
 	start := time.Now()
diff --git a/pkg/repro/repro_test.go b/pkg/repro/repro_test.go
index b9a5cb19f..21e7782f0 100644
--- a/pkg/repro/repro_test.go
+++ b/pkg/repro/repro_test.go
@@ -8,7 +8,9 @@ import (
 	"fmt"
 	"math/rand"
 	"regexp"
+	"sort"
 	"testing"
+	"time"
 
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/syzkaller/pkg/csource"
@@ -19,6 +21,8 @@ import (
 	"github.com/google/syzkaller/pkg/testutil"
 	"github.com/google/syzkaller/prog"
 	"github.com/google/syzkaller/sys/targets"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )
 
 func initTest(t *testing.T) (*rand.Rand, int) {
@@ -138,13 +142,14 @@ func runTestRepro(t *testing.T, log string, exec execInterface) (*Result, *Stats
 	if err != nil {
 		t.Fatal(err)
 	}
-	return runInner(context.Background(), []byte(log), Environment{
+	env := Environment{
 		Config:   mgrConfig,
 		Features: flatrpc.AllFeatures,
 		Fast:     false,
 		Reporter: reporter,
 		logf:     t.Logf,
-	}, exec)
+	}
+	return runInner(context.Background(), []byte(log), env, exec)
 }
 
 const testReproLog = `
@@ -165,16 +170,26 @@ getpid()
 // Only crash if `pause()` is followed by `alarm(0xa)`.
 var testCrashCondition = regexp.MustCompile(`(?s)pause\(\).*alarm\(0xa\)`)
 
+var (
+	expectedReproducer = "pause()\nalarm(0xa)\n"
+)
+
+func fakeCrashResult(title string) *instance.RunResult {
+	ret := &instance.RunResult{}
+	if title != "" {
+		ret.Report = &report.Report{
+			Title: title,
+		}
+	}
+	return ret
+}
+
 func testExecRunner(log []byte) (*instance.RunResult, error) {
 	crash := testCrashCondition.Match(log)
 	if crash {
-		ret := &instance.RunResult{}
-		ret.Report = &report.Report{
-			Title: `some crash`,
-		}
-		return ret, nil
+		return fakeCrashResult("crashed"), nil
 	}
-	return &instance.RunResult{}, nil
+	return fakeCrashResult(""), nil
 }
 
 // Just a pkg/repro smoke test: check that we can extract a two-call reproducer.
@@ -186,9 +201,7 @@ func TestPlainRepro(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if diff := cmp.Diff(`pause()
-alarm(0xa)
-`, string(result.Prog.Serialize())); diff != "" {
+	if diff := cmp.Diff(expectedReproducer, string(result.Prog.Serialize())); diff != "" {
 		t.Fatal(diff)
 	}
 }
@@ -263,3 +276,75 @@ alarm(0xa)
 		t.Fatal(diff)
 	}
 }
+
+func TestFlakyCrashes(t *testing.T) {
+	t.Parallel()
+	// A single flaky crash may divert the whole process.
+	// Let's check if the Reliability score provides a reasonable cut-off for such fake results.
+
+	r := rand.New(testutil.RandSource(t))
+	iters := 250
+
+	success := 0
+	for i := 0; i < iters; i++ {
+		counter, lastFake := 0, 0
+		result, _, err := runTestRepro(t, testReproLog, &testExecInterface{
+			run: func(log []byte) (*instance.RunResult, error) {
+				// Throw in a fake crash with 5% probability,
+				// but not more often than once in 10 consecutive runs.
+				counter++
+				if r.Intn(20) == 0 && counter-lastFake >= 10 {
+					lastFake = counter
+					return fakeCrashResult("flaky crash"), nil
+				}
+				return testExecRunner(log)
+			},
+		})
+		// It should either find nothing (=> validation worked) or find the exact reproducer.
+		require.NoError(t, err)
+		if result == nil {
+			continue
+		}
+		success++
+		assert.Equal(t, expectedReproducer, string(result.Prog.Serialize()), "reliability: %.2f", result.Reliability)
+	}
+
+	// There was no deep reasoning behind the success rate. It's not 100% due to flakiness,
+	// but there should still be some significant number of success cases.
+	assert.Greater(t, success, iters/3*2, "must succeed >2/3 of cases")
+}
+
+func BenchmarkCalculateReliability(b *testing.B) {
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	for base := 0.0; base < 1.0; base += 0.1 {
+		b.Run(fmt.Sprintf("p=%.2f", base), func(b *testing.B) {
+			if b.N == 0 {
+				return
+			}
+			neededRuns := make([]int, 0, b.N)
+			reliability := make([]float64, 0, b.N)
+
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				runs := 0
+				ret, err := calculateReliability(func() (bool, error) {
+					runs++
+					return r.Float64() < base, nil
+				})
+				require.NoError(b, err)
+				neededRuns = append(neededRuns, runs)
+				reliability = append(reliability, ret)
+			}
+			b.StopTimer()
+
+			sort.Ints(neededRuns)
+			b.ReportMetric(float64(neededRuns[len(neededRuns)/2]), "runs")
+
+			sort.Float64s(reliability)
+			b.ReportMetric(reliability[len(reliability)/10], "p10")
+			b.ReportMetric(reliability[len(reliability)/2], "median")
+			b.ReportMetric(reliability[len(reliability)*9/10], "p90")
+		})
+	}
+}