aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/bisect
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2024-07-15 17:31:34 +0200
committerAleksandr Nogikh <nogikh@google.com>2024-07-15 15:49:07 +0000
commite8709b21d7c474a0fb6b8ff13039702865fd83bb (patch)
tree482713d13d70ea1ec6670139c6d3fc8eb4a0c795 /pkg/bisect
parentefee4ed2240b89b4959ac8a0490a88f26e7ab506 (diff)
pkg/bisect: set a lower bound for BisectBad verdict
The "1 crashed, 9 OK" cases are a frequent reason of invalid bisection results on syzbot. Let's define a cutoff for a BisectBad verdict and use it to prevent such obvious outliers. We cannot safely declare such results as BisectGood either, so let's return BisectSkip in this case.
Diffstat (limited to 'pkg/bisect')
-rw-r--r--pkg/bisect/bisect.go3
-rw-r--r--pkg/bisect/bisect_test.go28
2 files changed, 21 insertions, 10 deletions
diff --git a/pkg/bisect/bisect.go b/pkg/bisect/bisect.go
index b75b2d2e0..2654b5dec 100644
--- a/pkg/bisect/bisect.go
+++ b/pkg/bisect/bisect.go
@@ -830,6 +830,7 @@ func (env *env) bisectionDecision(total, bad, good, infra int) (vcs.BisectResult
// Boot errors, image test errors, skipped crashes.
skip := total - bad - good - infra
+ wantBadRuns := max(2, (total-infra)/6) // For 10 runs, require 2 crashes. For 20, require 3.
wantGoodRuns := total / 2
wantTotalRuns := total / 2
if env.flaky {
@@ -840,7 +841,7 @@ func (env *env) bisectionDecision(total, bad, good, infra int) (vcs.BisectResult
// We need a big enough number of good results, otherwise the chance of a false
// positive is too high.
return vcs.BisectGood, nil
- } else if bad > 0 && (good+bad) >= wantTotalRuns {
+ } else if bad >= wantBadRuns && (good+bad) >= wantTotalRuns {
// We need enough (good+bad) results to conclude that the kernel revision itself
// is not too broken.
return vcs.BisectBad, nil
diff --git a/pkg/bisect/bisect_test.go b/pkg/bisect/bisect_test.go
index d44532fe0..eead42335 100644
--- a/pkg/bisect/bisect_test.go
+++ b/pkg/bisect/bisect_test.go
@@ -105,7 +105,8 @@ func (env *testEnv) Test(numVMs int, reproSyz, reproOpts, reproC []byte) ([]inst
if (env.config == "baseline-repro" || env.config == "new-minimized-config" || env.config == "original config") &&
introduced && !fixed {
if env.test.flaky {
- ret = crashErrors(1, numVMs-1, "crash occurs", env.test.reportType)
+ crashed := max(2, numVMs/6)
+ ret = crashErrors(crashed, numVMs-crashed, "crash occurs", env.test.reportType)
} else {
ret = crashErrors(numVMs, 0, "crash occurs", env.test.reportType)
}
@@ -352,10 +353,10 @@ var bisectionTests = []BisectionTest{
flaky: true,
introduced: "605",
extraTest: func(t *testing.T, res *Result) {
- // False negative probability of each run is ~35%.
- // We get three "good" results, so our accumulated confidence is ~27%.
- assert.Less(t, res.Confidence, 0.3)
- assert.Greater(t, res.Confidence, 0.2)
+ // False negative probability of each run is ~4%.
+ // We get three "good" results, so our accumulated confidence is ~85%.
+ assert.Less(t, res.Confidence, 0.9)
+ assert.Greater(t, res.Confidence, 0.8)
},
},
// Test bisection returns correct cause with different baseline/config combinations.
@@ -797,8 +798,8 @@ func TestBisectVerdict(t *testing.T) {
{
name: "many-total-and-infra",
total: 10,
- good: 5,
- bad: 1,
+ good: 4,
+ bad: 2,
infra: 2,
skip: 2,
verdict: vcs.BisectBad,
@@ -846,12 +847,21 @@ func TestBisectVerdict(t *testing.T) {
name: "flaky-many-skips",
flaky: true,
total: 20,
- good: 9,
- bad: 1,
+ good: 7,
+ bad: 3,
infra: 0,
skip: 10,
verdict: vcs.BisectBad,
},
+ {
+ name: "outlier-bad",
+ total: 10,
+ good: 9,
+ bad: 1,
+ infra: 0,
+ skip: 0,
+ verdict: vcs.BisectSkip,
+ },
}
for _, test := range tests {