From 4c9f39d2f78fe8ca0c9687a484e1c2579b9768f2 Mon Sep 17 00:00:00 2001
From: Aleksandr Nogikh <nogikh@google.com>
Date: Mon, 8 May 2023 14:08:29 +0200
Subject: pkg/bisect: abort on infrastructure errors

There's not much sense to continue the bisection if something in the
testing infrastructure got broken. Indicate it with the InfraError
error.
---
 pkg/bisect/bisect.go      | 42 ++++++++++++++++++++++++----------
 pkg/bisect/bisect_test.go | 58 ++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 80 insertions(+), 20 deletions(-)

(limited to 'pkg')

diff --git a/pkg/bisect/bisect.go b/pkg/bisect/bisect.go
index 294ba7a2a..b4b211af6 100644
--- a/pkg/bisect/bisect.go
+++ b/pkg/bisect/bisect.go
@@ -111,6 +111,14 @@ type Result struct {
 	IsRelease  bool
 }
 
+type InfraError struct {
+	Title string
+}
+
+func (e InfraError) Error() string {
+	return e.Title
+}
+
 // Run does the bisection and returns either the Result,
 // or, if the crash is not reproduced on the start commit, an error.
 func Run(cfg *Config) (*Result, error) {
@@ -577,33 +585,39 @@ func (env *env) test() (*testResult, error) {
 	results, err := env.inst.Test(numTests, cfg.Repro.Syz, cfg.Repro.Opts, cfg.Repro.C)
 	env.testTime += time.Since(testStart)
 	if err != nil {
-		res.rep = &report.Report{
-			Title: fmt.Sprintf("failed testing reproducer on %v: %v", current.Hash, err),
-		}
-		env.log(res.rep.Title)
-		return res, nil
+		problem := fmt.Sprintf("repro testing failure: %v", err)
+		env.log(problem)
+		return res, &InfraError{Title: problem}
 	}
-	bad, good, rep := env.processResults(current, results)
+	bad, good, infra, rep := env.processResults(current, results)
 	res.rep = rep
 	res.verdict = vcs.BisectSkip
+	if infra > len(results)/2 {
+		// More than 1/2 of runs failed with infrastructure error,
+		// no sense in continuing the bisection at the moment.
+		return res, &InfraError{Title: "more than 50% of runs failed with an infra error"}
+	}
 	if bad != 0 {
 		res.verdict = vcs.BisectBad
 		if !env.flaky && bad < good {
 			env.log("reproducer seems to be flaky")
 			env.flaky = true
 		}
-	} else if len(results)-good-bad > len(results)/3*2 {
-		// More than 2/3 of instances failed with infrastructure error,
-		// can't reliably tell that the commit is good.
-		res.verdict = vcs.BisectSkip
 	} else if good != 0 {
 		res.verdict = vcs.BisectGood
+	} else {
+		res.rep = &report.Report{
+			Title: fmt.Sprintf("failed testing reproducer on %v", current.Hash),
+		}
 	}
+	// If all runs failed with a boot/test error, we just end up with BisectSkip.
+	// TODO: when we start supporting boot/test error bisection, we need to make
+	// processResults treat that verdit as "good".
 	return res, nil
 }
 
 func (env *env) processResults(current *vcs.Commit, results []instance.EnvTestResult) (
-	bad, good int, rep *report.Report) {
+	bad, good, infra int, rep *report.Report) {
 	var verdicts []string
 	for i, res := range results {
 		if res.Error == nil {
@@ -613,7 +627,10 @@ func (env *env) processResults(current *vcs.Commit, results []instance.EnvTestRe
 		}
 		switch err := res.Error.(type) {
 		case *instance.TestError:
-			if err.Boot {
+			if err.Infra {
+				infra++
+				verdicts = append(verdicts, fmt.Sprintf("infra problem: %v", err))
+			} else if err.Boot {
 				verdicts = append(verdicts, fmt.Sprintf("boot failed: %v", err))
 			} else {
 				verdicts = append(verdicts, fmt.Sprintf("basic kernel testing failed: %v", err))
@@ -633,6 +650,7 @@ func (env *env) processResults(current *vcs.Commit, results []instance.EnvTestRe
 			}
 			env.saveDebugFile(current.Hash, i, output)
 		default:
+			infra++
 			verdicts = append(verdicts, fmt.Sprintf("failed: %v", err))
 		}
 	}
diff --git a/pkg/bisect/bisect_test.go b/pkg/bisect/bisect_test.go
index d5c369272..4dbed9e34 100644
--- a/pkg/bisect/bisect_test.go
+++ b/pkg/bisect/bisect_test.go
@@ -4,6 +4,7 @@
 package bisect
 
 import (
+	"errors"
 	"fmt"
 	"strconv"
 	"testing"
@@ -51,7 +52,33 @@ func (env *testEnv) Test(numVMs int, reproSyz, reproOpts, reproC []byte) ([]inst
 	commit := env.headCommit()
 	if commit >= env.test.brokenStart && commit <= env.test.brokenEnd ||
 		env.config == "baseline-skip" {
-		return nil, fmt.Errorf("broken build")
+		var ret []instance.EnvTestResult
+		for i := 0; i < numVMs; i++ {
+			ret = append(ret, instance.EnvTestResult{
+				Error: &instance.TestError{
+					Boot:  true,
+					Title: "kernel doesn't boot",
+				},
+			})
+		}
+		return ret, nil
+	}
+	if commit >= env.test.infraErrStart && commit <= env.test.infraErrEnd {
+		var ret []instance.EnvTestResult
+		for i := 0; i < numVMs; i++ {
+			var err error
+			// More than 50% failures.
+			if i*2 <= numVMs {
+				err = &instance.TestError{
+					Infra: true,
+					Title: "failed to create a VM",
+				}
+			}
+			ret = append(ret, instance.EnvTestResult{
+				Error: err,
+			})
+		}
+		return ret, nil
 	}
 	var ret []instance.EnvTestResult
 	if (env.config == "baseline-repro" || env.config == "new-minimized-config" || env.config == "original config") &&
@@ -147,6 +174,9 @@ func testBisection(t *testing.T, baseDir string, test BisectionTest) {
 		if test.expectErr != (err != nil) {
 			t.Fatalf("expected error %v, got %v", test.expectErr, err)
 		}
+		if test.expectErrType != nil && !errors.As(err, &test.expectErrType) {
+			t.Fatalf("expected %#v error, got %#v", test.expectErrType, err)
+		}
 		if err != nil {
 			if res != nil {
 				t.Fatalf("got both result and error: '%v' %+v", err, *res)
@@ -194,16 +224,19 @@ func checkBisectionResult(t *testing.T, test BisectionTest, res *Result) {
 
 type BisectionTest struct {
 	// input environment
-	name        string
-	fix         bool
-	startCommit int
-	brokenStart int
-	brokenEnd   int
+	name          string
+	fix           bool
+	startCommit   int
+	brokenStart   int
+	brokenEnd     int
+	infraErrStart int
+	infraErrEnd   int
 	// Range of commits that result in the same kernel binary signature.
 	sameBinaryStart int
 	sameBinaryEnd   int
 	// expected output
-	expectErr bool
+	expectErr     bool
+	expectErrType any
 	// Expect res.Report != nil.
 	expectRep  bool
 	noopChange bool
@@ -492,6 +525,16 @@ var bisectionTests = []BisectionTest{
 		sameBinaryEnd:   650,
 		noopChange:      true,
 	},
+	{
+		name:          "cause-infra-problems",
+		startCommit:   905,
+		expectRep:     false,
+		expectErr:     true,
+		expectErrType: &InfraError{},
+		infraErrStart: 600,
+		infraErrEnd:   800,
+		culprit:       602,
+	},
 }
 
 func TestBisectionResults(t *testing.T) {
@@ -525,7 +568,6 @@ func checkTest(t *testing.T, test BisectionTest) {
 		(test.commitLen != 0 ||
 			test.expectRep ||
 			test.oldestLatest != 0 ||
-			test.culprit != 0 ||
 			test.resultingConfig != "") {
 		t.Fatalf("expecting non-default values on error")
 	}
-- 
cgit mrf-deployment