diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2024-07-15 17:31:34 +0200 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2024-07-15 15:49:07 +0000 |
| commit | e8709b21d7c474a0fb6b8ff13039702865fd83bb (patch) | |
| tree | 482713d13d70ea1ec6670139c6d3fc8eb4a0c795 /pkg/bisect | |
| parent | efee4ed2240b89b4959ac8a0490a88f26e7ab506 (diff) | |
pkg/bisect: set a lower bound for BisectBad verdict
The "1 crashed, 9 OK" cases are a frequent reason of invalid bisection
results on syzbot.
Let's define a cutoff for a BisectBad verdict and use it to prevent such
obvious outliers. We cannot safely declare such results as BisectGood
either, so let's return BisectSkip in this case.
Diffstat (limited to 'pkg/bisect')
| -rw-r--r-- | pkg/bisect/bisect.go | 3 | ||||
| -rw-r--r-- | pkg/bisect/bisect_test.go | 28 |
2 files changed, 21 insertions, 10 deletions
diff --git a/pkg/bisect/bisect.go b/pkg/bisect/bisect.go index b75b2d2e0..2654b5dec 100644 --- a/pkg/bisect/bisect.go +++ b/pkg/bisect/bisect.go @@ -830,6 +830,7 @@ func (env *env) bisectionDecision(total, bad, good, infra int) (vcs.BisectResult // Boot errors, image test errors, skipped crashes. skip := total - bad - good - infra + wantBadRuns := max(2, (total-infra)/6) // For 10 runs, require 2 crashes. For 20, require 3. wantGoodRuns := total / 2 wantTotalRuns := total / 2 if env.flaky { @@ -840,7 +841,7 @@ func (env *env) bisectionDecision(total, bad, good, infra int) (vcs.BisectResult // We need a big enough number of good results, otherwise the chance of a false // positive is too high. return vcs.BisectGood, nil - } else if bad > 0 && (good+bad) >= wantTotalRuns { + } else if bad >= wantBadRuns && (good+bad) >= wantTotalRuns { // We need enough (good+bad) results to conclude that the kernel revision itself // is not too broken. return vcs.BisectBad, nil diff --git a/pkg/bisect/bisect_test.go b/pkg/bisect/bisect_test.go index d44532fe0..eead42335 100644 --- a/pkg/bisect/bisect_test.go +++ b/pkg/bisect/bisect_test.go @@ -105,7 +105,8 @@ func (env *testEnv) Test(numVMs int, reproSyz, reproOpts, reproC []byte) ([]inst if (env.config == "baseline-repro" || env.config == "new-minimized-config" || env.config == "original config") && introduced && !fixed { if env.test.flaky { - ret = crashErrors(1, numVMs-1, "crash occurs", env.test.reportType) + crashed := max(2, numVMs/6) + ret = crashErrors(crashed, numVMs-crashed, "crash occurs", env.test.reportType) } else { ret = crashErrors(numVMs, 0, "crash occurs", env.test.reportType) } @@ -352,10 +353,10 @@ var bisectionTests = []BisectionTest{ flaky: true, introduced: "605", extraTest: func(t *testing.T, res *Result) { - // False negative probability of each run is ~35%. - // We get three "good" results, so our accumulated confidence is ~27%. - assert.Less(t, res.Confidence, 0.3) - assert.Greater(t, res.Confidence, 0.2) + // False negative probability of each run is ~4%. + // We get three "good" results, so our accumulated confidence is ~85%. + assert.Less(t, res.Confidence, 0.9) + assert.Greater(t, res.Confidence, 0.8) }, }, // Test bisection returns correct cause with different baseline/config combinations. @@ -797,8 +798,8 @@ func TestBisectVerdict(t *testing.T) { { name: "many-total-and-infra", total: 10, - good: 5, - bad: 1, + good: 4, + bad: 2, infra: 2, skip: 2, verdict: vcs.BisectBad, @@ -846,12 +847,21 @@ func TestBisectVerdict(t *testing.T) { name: "flaky-many-skips", flaky: true, total: 20, - good: 9, - bad: 1, + good: 7, + bad: 3, infra: 0, skip: 10, verdict: vcs.BisectBad, }, + { + name: "outlier-bad", + total: 10, + good: 9, + bad: 1, + infra: 0, + skip: 0, + verdict: vcs.BisectSkip, + }, } for _, test := range tests { |
