pkg/bisect: change verdict calculation rules

1) For the good verdict, demand a certain minimum number of good runs to be present. 2) For the bad verdict, demand that at least 50% of runs did not fail with boot/image test error. Refactor the code and add tests.
author: Aleksandr Nogikh <nogikh@google.com> 2023-06-28 16:20:26 +0200
committer: Aleksandr Nogikh <nogikh@google.com> 2023-06-29 11:42:01 +0000
commit: 01fe857264dc35af4faf4ab53c8c849afdf91968 (patch)
tree: af19606cd5f332230bd4f1fc549fd6fb1ad3a39e /pkg/bisect
parent: 573dd3df4750cfd2b79826f6cbbeade35625417e (diff)
2 files changed, 150 insertions, 20 deletions
diff --git a/pkg/bisect/bisect.go b/pkg/bisect/bisect.go
index 71cab039f..8342275b9 100644
--- a/pkg/bisect/bisect.go
+++ b/pkg/bisect/bisect.go
@@ -613,35 +613,55 @@ func (env *env) test() (*testResult, error) {
 		return res, &InfraError{Title: problem}
 	}
 	bad, good, infra, rep := env.processResults(current, results)
-	res.rep = rep
-	res.verdict = vcs.BisectSkip
-	if infra > len(results)/2 {
-		// More than 1/2 of runs failed with infrastructure error,
-		// no sense in continuing the bisection at the moment.
-		return res, &InfraError{Title: "more than 50% of runs failed with an infra error"}
-	}
-	if bad != 0 {
-		res.verdict = vcs.BisectBad
-		if !env.flaky && bad < good {
-			env.log("reproducer seems to be flaky")
-			env.flaky = true
-		}
-	} else if 2*good > len(results) {
-		// Require at least 50% of successful runs to say it's a good commit.
-		// Otherwise we just have too little data to come to any conclusion.
-		res.verdict = vcs.BisectGood
-		env.log("too many neither good nor bad results, skipping this commit")
-	} else {
+	res.verdict, err = env.bisectionDecision(len(results), bad, good, infra)
+	if err != nil {
+		return nil, err
+	}
+	if !env.flaky && bad > 0 && bad < good {
+		env.log("reproducer seems to be flaky")
+		env.flaky = true
+	}
+	if res.verdict == vcs.BisectSkip {
 		res.rep = &report.Report{
 			Title: fmt.Sprintf("failed testing reproducer on %v", current.Hash),
 		}
+	} else {
+		res.rep = rep
 	}
-	// If most runs failed with a boot/test error, we end up with BisectSkip.
 	// TODO: when we start supporting boot/test error bisection, we need to make
 	// processResults treat that verdit as "good".
 	return res, nil
 }
 
+func (env *env) bisectionDecision(total, bad, good, infra int) (vcs.BisectResult, error) {
+	// Boot errors, image test errors, skipped crashes.
+	skip := total - bad - good - infra
+
+	wantGoodRuns := total / 2
+	wantTotalRuns := total / 2
+	if env.flaky {
+		// The reproducer works less than 50% of time, so we need really many good results.
+		wantGoodRuns = total * 3 / 4
+	}
+	if bad == 0 && good >= wantGoodRuns {
+		// We need a big enough number of good results, otherwise the chance of a false
+		// positive is too high.
+		return vcs.BisectGood, nil
+	} else if bad > 0 && (good+bad) >= wantTotalRuns {
+		// We need enough (good+bad) results to conclude that the kernel revision itself
+		// is not too broken.
+		return vcs.BisectBad, nil
+	} else if infra > skip {
+		// We have been unable to determine a verdict mostly because of infra errors.
+		// Abort the bisection.
+		return vcs.BisectSkip,
+			&InfraError{Title: "unable to determine the verdict because of infra errors"}
+	}
+	env.log("unable to determine the verdict: %d good runs (wanted %d), for bad wanted %d in total, got %d",
+		good, wantGoodRuns, wantTotalRuns, good+bad)
+	return vcs.BisectSkip, nil
+}
+
 func (env *env) processResults(current *vcs.Commit, results []instance.EnvTestResult) (
 	bad, good, infra int, rep *report.Report) {
 	var verdicts []string
diff --git a/pkg/bisect/bisect_test.go b/pkg/bisect/bisect_test.go
index 53426c86e..aeed167aa 100644
--- a/pkg/bisect/bisect_test.go
+++ b/pkg/bisect/bisect_test.go
@@ -17,6 +17,7 @@ import (
 	"github.com/google/syzkaller/pkg/report"
 	"github.com/google/syzkaller/pkg/vcs"
 	"github.com/google/syzkaller/sys/targets"
+	"github.com/stretchr/testify/assert"
 )
 
 // testEnv will implement instance.BuilderTester. This allows us to
@@ -629,3 +630,112 @@ func crashErrors(crashing, nonCrashing int, title string) []instance.EnvTestResu
 	}
 	return ret
 }
+
+func TestBisectVerdict(t *testing.T) {
+	t.Parallel()
+	tests := []struct {
+		name    string
+		flaky   bool
+		total   int
+		good    int
+		bad     int
+		infra   int
+		skip    int
+		verdict vcs.BisectResult
+		abort   bool
+	}{
+		{
+			name:  "bad-but-many-infra",
+			total: 10,
+			bad:   1,
+			infra: 8,
+			skip:  1,
+			abort: true,
+		},
+		{
+			name:    "many-good-and-infra",
+			total:   10,
+			good:    5,
+			infra:   3,
+			skip:    2,
+			verdict: vcs.BisectGood,
+		},
+		{
+			name:    "many-total-and-infra",
+			total:   10,
+			good:    5,
+			bad:     1,
+			infra:   2,
+			skip:    2,
+			verdict: vcs.BisectBad,
+		},
+		{
+			name:    "too-many-skips",
+			total:   10,
+			good:    2,
+			bad:     2,
+			infra:   3,
+			skip:    3,
+			verdict: vcs.BisectSkip,
+		},
+		{
+			name:  "flaky-need-more-good",
+			flaky: true,
+			total: 20,
+			// For flaky bisections, we'd want 15.
+			good:    10,
+			infra:   3,
+			skip:    7,
+			verdict: vcs.BisectSkip,
+		},
+		{
+			name:    "flaky-enough-good",
+			flaky:   true,
+			total:   20,
+			good:    15,
+			infra:   3,
+			skip:    2,
+			verdict: vcs.BisectGood,
+		},
+		{
+			name:  "flaky-too-many-skips",
+			flaky: true,
+			total: 20,
+			// We want (good+bad) take at least 50%.
+			good:    6,
+			bad:     1,
+			infra:   0,
+			skip:    13,
+			verdict: vcs.BisectSkip,
+		},
+		{
+			name:    "flaky-many-skips",
+			flaky:   true,
+			total:   20,
+			good:    9,
+			bad:     1,
+			infra:   0,
+			skip:    10,
+			verdict: vcs.BisectBad,
+		},
+	}
+
+	for _, test := range tests {
+		test := test
+		t.Run(test.name, func(t *testing.T) {
+			sum := test.good + test.bad + test.infra + test.skip
+			assert.Equal(t, test.total, sum)
+			env := &env{
+				cfg: &Config{
+					Trace: &debugtracer.NullTracer{},
+				},
+				flaky: test.flaky,
+			}
+			ret, err := env.bisectionDecision(test.total, test.bad, test.good, test.infra)
+			assert.Equal(t, test.abort, err != nil)
+			if !test.abort {
+				assert.Equal(t, test.verdict, ret)
+			}
+		})
+	}
+}
author	Aleksandr Nogikh <nogikh@google.com>	2023-06-28 16:20:26 +0200
committer	Aleksandr Nogikh <nogikh@google.com>	2023-06-29 11:42:01 +0000
commit	01fe857264dc35af4faf4ab53c8c849afdf91968 (patch)
tree	af19606cd5f332230bd4f1fc549fd6fb1ad3a39e /pkg/bisect
parent	573dd3df4750cfd2b79826f6cbbeade35625417e (diff)