diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2024-07-15 17:31:34 +0200 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2024-07-15 15:49:07 +0000 |
| commit | e8709b21d7c474a0fb6b8ff13039702865fd83bb (patch) | |
| tree | 482713d13d70ea1ec6670139c6d3fc8eb4a0c795 /pkg/bisect/bisect_test.go | |
| parent | efee4ed2240b89b4959ac8a0490a88f26e7ab506 (diff) | |
pkg/bisect: set a lower bound for BisectBad verdict
The "1 crashed, 9 OK" cases are a frequent reason of invalid bisection
results on syzbot.
Let's define a cutoff for a BisectBad verdict and use it to prevent such
obvious outliers. We cannot safely declare such results as BisectGood
either, so let's return BisectSkip in this case.
Diffstat (limited to 'pkg/bisect/bisect_test.go')
| -rw-r--r-- | pkg/bisect/bisect_test.go | 28 |
1 files changed, 19 insertions, 9 deletions
diff --git a/pkg/bisect/bisect_test.go b/pkg/bisect/bisect_test.go index d44532fe0..eead42335 100644 --- a/pkg/bisect/bisect_test.go +++ b/pkg/bisect/bisect_test.go @@ -105,7 +105,8 @@ func (env *testEnv) Test(numVMs int, reproSyz, reproOpts, reproC []byte) ([]inst if (env.config == "baseline-repro" || env.config == "new-minimized-config" || env.config == "original config") && introduced && !fixed { if env.test.flaky { - ret = crashErrors(1, numVMs-1, "crash occurs", env.test.reportType) + crashed := max(2, numVMs/6) + ret = crashErrors(crashed, numVMs-crashed, "crash occurs", env.test.reportType) } else { ret = crashErrors(numVMs, 0, "crash occurs", env.test.reportType) } @@ -352,10 +353,10 @@ var bisectionTests = []BisectionTest{ flaky: true, introduced: "605", extraTest: func(t *testing.T, res *Result) { - // False negative probability of each run is ~35%. - // We get three "good" results, so our accumulated confidence is ~27%. - assert.Less(t, res.Confidence, 0.3) - assert.Greater(t, res.Confidence, 0.2) + // False negative probability of each run is ~4%. + // We get three "good" results, so our accumulated confidence is ~85%. + assert.Less(t, res.Confidence, 0.9) + assert.Greater(t, res.Confidence, 0.8) }, }, // Test bisection returns correct cause with different baseline/config combinations. @@ -797,8 +798,8 @@ func TestBisectVerdict(t *testing.T) { { name: "many-total-and-infra", total: 10, - good: 5, - bad: 1, + good: 4, + bad: 2, infra: 2, skip: 2, verdict: vcs.BisectBad, @@ -846,12 +847,21 @@ func TestBisectVerdict(t *testing.T) { name: "flaky-many-skips", flaky: true, total: 20, - good: 9, - bad: 1, + good: 7, + bad: 3, infra: 0, skip: 10, verdict: vcs.BisectBad, }, + { + name: "outlier-bad", + total: 10, + good: 9, + bad: 1, + infra: 0, + skip: 0, + verdict: vcs.BisectSkip, + }, } for _, test := range tests { |
