From e8709b21d7c474a0fb6b8ff13039702865fd83bb Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Mon, 15 Jul 2024 17:31:34 +0200 Subject: pkg/bisect: set a lower bound for BisectBad verdict The "1 crashed, 9 OK" cases are a frequent reason of invalid bisection results on syzbot. Let's define a cutoff for a BisectBad verdict and use it to prevent such obvious outliers. We cannot safely declare such results as BisectGood either, so let's return BisectSkip in this case. --- pkg/bisect/bisect.go | 3 ++- pkg/bisect/bisect_test.go | 28 +++++++++++++++++++--------- 2 files changed, 21 insertions(+), 10 deletions(-) (limited to 'pkg/bisect') diff --git a/pkg/bisect/bisect.go b/pkg/bisect/bisect.go index b75b2d2e0..2654b5dec 100644 --- a/pkg/bisect/bisect.go +++ b/pkg/bisect/bisect.go @@ -830,6 +830,7 @@ func (env *env) bisectionDecision(total, bad, good, infra int) (vcs.BisectResult // Boot errors, image test errors, skipped crashes. skip := total - bad - good - infra + wantBadRuns := max(2, (total-infra)/6) // For 10 runs, require 2 crashes. For 20, require 3. wantGoodRuns := total / 2 wantTotalRuns := total / 2 if env.flaky { @@ -840,7 +841,7 @@ func (env *env) bisectionDecision(total, bad, good, infra int) (vcs.BisectResult // We need a big enough number of good results, otherwise the chance of a false // positive is too high. return vcs.BisectGood, nil - } else if bad > 0 && (good+bad) >= wantTotalRuns { + } else if bad >= wantBadRuns && (good+bad) >= wantTotalRuns { // We need enough (good+bad) results to conclude that the kernel revision itself // is not too broken. return vcs.BisectBad, nil diff --git a/pkg/bisect/bisect_test.go b/pkg/bisect/bisect_test.go index d44532fe0..eead42335 100644 --- a/pkg/bisect/bisect_test.go +++ b/pkg/bisect/bisect_test.go @@ -105,7 +105,8 @@ func (env *testEnv) Test(numVMs int, reproSyz, reproOpts, reproC []byte) ([]inst if (env.config == "baseline-repro" || env.config == "new-minimized-config" || env.config == "original config") && introduced && !fixed { if env.test.flaky { - ret = crashErrors(1, numVMs-1, "crash occurs", env.test.reportType) + crashed := max(2, numVMs/6) + ret = crashErrors(crashed, numVMs-crashed, "crash occurs", env.test.reportType) } else { ret = crashErrors(numVMs, 0, "crash occurs", env.test.reportType) } @@ -352,10 +353,10 @@ var bisectionTests = []BisectionTest{ flaky: true, introduced: "605", extraTest: func(t *testing.T, res *Result) { - // False negative probability of each run is ~35%. - // We get three "good" results, so our accumulated confidence is ~27%. - assert.Less(t, res.Confidence, 0.3) - assert.Greater(t, res.Confidence, 0.2) + // False negative probability of each run is ~4%. + // We get three "good" results, so our accumulated confidence is ~85%. + assert.Less(t, res.Confidence, 0.9) + assert.Greater(t, res.Confidence, 0.8) }, }, // Test bisection returns correct cause with different baseline/config combinations. @@ -797,8 +798,8 @@ func TestBisectVerdict(t *testing.T) { { name: "many-total-and-infra", total: 10, - good: 5, - bad: 1, + good: 4, + bad: 2, infra: 2, skip: 2, verdict: vcs.BisectBad, @@ -846,12 +847,21 @@ func TestBisectVerdict(t *testing.T) { name: "flaky-many-skips", flaky: true, total: 20, - good: 9, - bad: 1, + good: 7, + bad: 3, infra: 0, skip: 10, verdict: vcs.BisectBad, }, + { + name: "outlier-bad", + total: 10, + good: 9, + bad: 1, + infra: 0, + skip: 0, + verdict: vcs.BisectSkip, + }, } for _, test := range tests { -- cgit mrf-deployment