From 01fe857264dc35af4faf4ab53c8c849afdf91968 Mon Sep 17 00:00:00 2001
From: Aleksandr Nogikh <nogikh@google.com>
Date: Wed, 28 Jun 2023 16:20:26 +0200
Subject: pkg/bisect: change verdict calculation rules

1) For the good verdict, demand a certain minimum number of good runs to
   be present.
2) For the bad verdict, demand that at least 50% of runs did not fail
   with boot/image test error.

Refactor the code and add tests.
---
 pkg/bisect/bisect.go | 60 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 40 insertions(+), 20 deletions(-)

(limited to 'pkg/bisect/bisect.go')

diff --git a/pkg/bisect/bisect.go b/pkg/bisect/bisect.go
index 71cab039f..8342275b9 100644
--- a/pkg/bisect/bisect.go
+++ b/pkg/bisect/bisect.go
@@ -613,35 +613,55 @@ func (env *env) test() (*testResult, error) {
 		return res, &InfraError{Title: problem}
 	}
 	bad, good, infra, rep := env.processResults(current, results)
-	res.rep = rep
-	res.verdict = vcs.BisectSkip
-	if infra > len(results)/2 {
-		// More than 1/2 of runs failed with infrastructure error,
-		// no sense in continuing the bisection at the moment.
-		return res, &InfraError{Title: "more than 50% of runs failed with an infra error"}
-	}
-	if bad != 0 {
-		res.verdict = vcs.BisectBad
-		if !env.flaky && bad < good {
-			env.log("reproducer seems to be flaky")
-			env.flaky = true
-		}
-	} else if 2*good > len(results) {
-		// Require at least 50% of successful runs to say it's a good commit.
-		// Otherwise we just have too little data to come to any conclusion.
-		res.verdict = vcs.BisectGood
-		env.log("too many neither good nor bad results, skipping this commit")
-	} else {
+	res.verdict, err = env.bisectionDecision(len(results), bad, good, infra)
+	if err != nil {
+		return nil, err
+	}
+	if !env.flaky && bad > 0 && bad < good {
+		env.log("reproducer seems to be flaky")
+		env.flaky = true
+	}
+	if res.verdict == vcs.BisectSkip {
 		res.rep = &report.Report{
 			Title: fmt.Sprintf("failed testing reproducer on %v", current.Hash),
 		}
+	} else {
+		res.rep = rep
 	}
-	// If most runs failed with a boot/test error, we end up with BisectSkip.
 	// TODO: when we start supporting boot/test error bisection, we need to make
 	// processResults treat that verdit as "good".
 	return res, nil
 }
 
+func (env *env) bisectionDecision(total, bad, good, infra int) (vcs.BisectResult, error) {
+	// Boot errors, image test errors, skipped crashes.
+	skip := total - bad - good - infra
+
+	wantGoodRuns := total / 2
+	wantTotalRuns := total / 2
+	if env.flaky {
+		// The reproducer works less than 50% of time, so we need really many good results.
+		wantGoodRuns = total * 3 / 4
+	}
+	if bad == 0 && good >= wantGoodRuns {
+		// We need a big enough number of good results, otherwise the chance of a false
+		// positive is too high.
+		return vcs.BisectGood, nil
+	} else if bad > 0 && (good+bad) >= wantTotalRuns {
+		// We need enough (good+bad) results to conclude that the kernel revision itself
+		// is not too broken.
+		return vcs.BisectBad, nil
+	} else if infra > skip {
+		// We have been unable to determine a verdict mostly because of infra errors.
+		// Abort the bisection.
+		return vcs.BisectSkip,
+			&InfraError{Title: "unable to determine the verdict because of infra errors"}
+	}
+	env.log("unable to determine the verdict: %d good runs (wanted %d), for bad wanted %d in total, got %d",
+		good, wantGoodRuns, wantTotalRuns, good+bad)
+	return vcs.BisectSkip, nil
+}
+
 func (env *env) processResults(current *vcs.Commit, results []instance.EnvTestResult) (
 	bad, good, infra int, rep *report.Report) {
 	var verdicts []string
-- 
cgit mrf-deployment