diff options
| author | Space Meyer <spm@google.com> | 2023-02-18 18:17:03 +0100 |
|---|---|---|
| committer | Space Meyer <git@the-space.agency> | 2023-05-05 21:46:31 +0200 |
| commit | 90c93c40627cb0ac3c2c7cb99d807fd4c137adcb (patch) | |
| tree | 911075b668bc38c22890ed361de6493f33d3ecc5 /pkg/bisect | |
| parent | 4cec9341d5812957f3f34bafeef1c11036e286c0 (diff) | |
pkg/bisect: mark jobs with untestable history as failed
Background info:
When a bisection is started, we only know the kernel commit on which
syzkaller encountered the crash. Before the actual bisection begins,
bisect() needs to find a good commit. Then we can bisect between them.
For fix bisections bisect() tests HEAD. If HEAD doesn't have the bug
the fixing commit must be somewhere in the middle. For cause bisection
we test a number of old releases, starting with the newest release.
Both this commit range search and the actual bisection later use test()
to build the kernel and run the reproducer.
During actual bisections we invoke test() for every step. If any test()
invocation returns a non nil error, the bisection it was called from
is aborted. Any non-fatal errors should be signaled via the testResult
returned from test(). For this reason a build/boot failure does not
return an error. Instead testResult.verdict will be vcs.BisectSkip.
The Problem:
Given the following call stack:
bisect() -> commitRange() -> commitRangeFor{Fix,Cause}() -> test()
Previously we reported fix bisections, where HEAD was build broken and
cause bisections where all tested releases were build broken as
inconclusive.
This is confusing for users. For fix bisections it looks like the
fixing commit is either the commit from the original crash or HEAD.
For cause bisections it looks like the breaking commit is either the
original commit or the commit of the oldest tested release. Neither
is correct.
For fix bisections we see this, when the HEAD of a tested branch is
build broken. When this happens all attempted fix bisections will get
nonsense results. For cause bisections we see this, when changes to the
bisection compilers or test rootfs cause us to not be able to build or
boot very old kernels.
These inconclusive bisection results will not be retried automatically
and we don't have an easy way to clear them.
The Solution:
For fix bisections: Retry the bisection later. If HEAD is completely
broken we will know, because fuzzing will stop.
For cause bisections: Mark the bisection as failed. The result is
unlikely to change in the future without intervention by the syzbot
admins. Users won't bother looking at those bisections and the
dashboard already has code to mass-retry failed bisections.
- In test(): Populate testResult.rep with a meaningful report before
returning, after build/boot failures.
- In bisect(): Explicitly check the testResult.verdict of the last
commit tested in commitRange(), instead of using testResult.rep==nil
as an oracle for aborting the bisection.
- In bisect(): Don't return an inconclusive result when build/boot
failures prevent us from finding a commit range to bisect between.
Diffstat (limited to 'pkg/bisect')
| -rw-r--r-- | pkg/bisect/bisect.go | 109 | ||||
| -rw-r--r-- | pkg/bisect/bisect_test.go | 48 |
2 files changed, 110 insertions, 47 deletions
diff --git a/pkg/bisect/bisect.go b/pkg/bisect/bisect.go index 5df566581..294ba7a2a 100644 --- a/pkg/bisect/bisect.go +++ b/pkg/bisect/bisect.go @@ -178,12 +178,14 @@ func runImpl(cfg *Config, repo vcs.Repo, inst instance.Env) (*Result, error) { } if len(res.Commits) == 0 { if cfg.Fix { - env.log("the crash still happens on HEAD") + env.log("crash still not fixed on HEAD or HEAD had kernel test errors") } else { - env.log("the crash already happened on the oldest tested release") + env.log("oldest tested release already had the bug or it had kernel test errors") } env.log("commit msg: %v", res.Commit.Title) - env.log("crash: %v\n%s", res.Report.Title, res.Report.Report) + if res.Report != nil { + env.log("crash: %v\n%s", res.Report.Title, res.Report.Report) + } return res, nil } what := "bad" @@ -252,20 +254,11 @@ func (env *env) bisect() (*Result, error) { } } - bad, good, rep1, results1, err := env.commitRange() - if err != nil { - return nil, err - } - if rep1 != nil { - return &Result{Report: rep1, Commit: bad, Config: env.kernelConfig}, - nil // still not fixed/happens on the oldest release - } - if good == nil { - // Special case: all previous releases are build broken. - // It's unclear what's the best way to report this. - // We return 2 commits which means "inconclusive". - return &Result{Commits: []*vcs.Commit{com, bad}, Config: env.kernelConfig}, nil + bad, good, results1, fatalResult, err := env.commitRange() + if fatalResult != nil || err != nil { + return fatalResult, err } + results := map[string]*testResult{cfg.Kernel.Commit: testRes} for _, res := range results1 { results[res.com.Hash] = res @@ -400,60 +393,95 @@ func (env *env) detectNoopChange(results map[string]*testResult, com *vcs.Commit return testRes.kernelSign == parentRes.kernelSign, nil } -func (env *env) commitRange() (*vcs.Commit, *vcs.Commit, *report.Report, []*testResult, error) { +func (env *env) commitRange() (*vcs.Commit, *vcs.Commit, []*testResult, *Result, error) { + rangeFunc := env.commitRangeForCause if env.cfg.Fix { - return env.commitRangeForFix() + rangeFunc = env.commitRangeForFix + } + + bad, good, results1, err := rangeFunc() + if err != nil { + return bad, good, results1, nil, err } - return env.commitRangeForBug() + + fatalResult, err := env.validateCommitRange(bad, good, results1) + return bad, good, results1, fatalResult, err } -func (env *env) commitRangeForFix() (*vcs.Commit, *vcs.Commit, *report.Report, []*testResult, error) { +func (env *env) commitRangeForFix() (*vcs.Commit, *vcs.Commit, []*testResult, error) { env.log("testing current HEAD %v", env.head.Hash) if _, err := env.repo.SwitchCommit(env.head.Hash); err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, err } res, err := env.test() if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, err } if res.verdict != vcs.BisectGood { - return env.head, nil, res.rep, []*testResult{res}, nil + return env.head, nil, []*testResult{res}, nil } - return env.head, env.commit, nil, []*testResult{res}, nil + return env.head, env.commit, []*testResult{res}, nil } -func (env *env) commitRangeForBug() (*vcs.Commit, *vcs.Commit, *report.Report, []*testResult, error) { +func (env *env) commitRangeForCause() (*vcs.Commit, *vcs.Commit, []*testResult, error) { cfg := env.cfg tags, err := env.bisecter.PreviousReleaseTags(cfg.Kernel.Commit, cfg.CompilerType) if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, err } if len(tags) == 0 { - return nil, nil, nil, nil, fmt.Errorf("no release tags before this commit") + return nil, nil, nil, fmt.Errorf("no release tags before this commit") } lastBad := env.commit - var lastRep *report.Report var results []*testResult for _, tag := range tags { env.log("testing release %v", tag) com, err := env.repo.SwitchCommit(tag) if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, err } res, err := env.test() if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, err } results = append(results, res) if res.verdict == vcs.BisectGood { - return lastBad, com, nil, results, nil + return lastBad, com, results, nil } if res.verdict == vcs.BisectBad { lastBad = com - lastRep = res.rep } } - return lastBad, nil, lastRep, results, nil + // All tags were vcs.BisectBad or vcs.BisectSkip. + return lastBad, nil, results, nil +} + +func (env *env) validateCommitRange(bad, good *vcs.Commit, results []*testResult) (*Result, error) { + if len(results) < 1 { + return nil, fmt.Errorf("commitRange returned no results") + } + + finalResult := results[len(results)-1] // HEAD test for fix, oldest tested test for cause bisection + if finalResult.verdict == vcs.BisectBad { + // For cause bisection: Oldest tested release already had the bug. Giving up. + // For fix bisection: Crash still not fixed on HEAD. Leaving Result.Commits empty causes + // syzbot to retry this bisection later. + env.log("crash still not fixed/happens on the oldest tested release") + return &Result{Report: finalResult.rep, Commit: bad, Config: env.kernelConfig}, nil + } + if finalResult.verdict == vcs.BisectSkip { + if env.cfg.Fix { + // HEAD is moving target. Sometimes changes break syzkaller fuzzing. + // Leaving Result.Commits empty so syzbot retries this bisection again later. + env.log("HEAD had kernel build, boot or test errors") + return &Result{Report: finalResult.rep, Commit: bad, Config: env.kernelConfig}, nil + } + // The oldest tested release usually doesn't change. Retrying would give us the same result, + // unless we change the syz-ci setup (e.g. new rootfs, new compilers). + return nil, fmt.Errorf("oldest tested release had kernel build, boot or test errors") + } + + return nil, nil } type testResult struct { @@ -502,6 +530,7 @@ func (env *env) build() (*vcs.Commit, string, error) { // Note: When this function returns an error, the bisection it was called from is aborted. // Hence recoverable errors must be handled and the callers must treat testResult with care. +// e.g. testResult.verdict will be vcs.BisectSkip for a broken build, but err will be nil. func (env *env) test() (*testResult, error) { cfg := env.cfg if cfg.Timeout != 0 && time.Since(env.startTime) > cfg.Timeout { @@ -518,15 +547,20 @@ func (env *env) test() (*testResult, error) { return res, fmt.Errorf("couldn't get repo HEAD: %v", err) } if err != nil { + errInfo := fmt.Sprintf("failed building %v: ", current.Hash) if verr, ok := err.(*osutil.VerboseError); ok { - env.log("%v", verr.Title) + errInfo += verr.Title env.saveDebugFile(current.Hash, 0, verr.Output) } else if verr, ok := err.(*build.KernelError); ok { - env.log("%s", verr.Report) + errInfo += string(verr.Report) env.saveDebugFile(current.Hash, 0, verr.Output) } else { + errInfo += err.Error() env.log("%v", err) } + + env.log("%s", errInfo) + res.rep = &report.Report{Title: errInfo} return res, nil } @@ -543,7 +577,10 @@ func (env *env) test() (*testResult, error) { results, err := env.inst.Test(numTests, cfg.Repro.Syz, cfg.Repro.Opts, cfg.Repro.C) env.testTime += time.Since(testStart) if err != nil { - env.log("failed: %v", err) + res.rep = &report.Report{ + Title: fmt.Sprintf("failed testing reproducer on %v: %v", current.Hash, err), + } + env.log(res.rep.Title) return res, nil } bad, good, rep := env.processResults(current, results) diff --git a/pkg/bisect/bisect_test.go b/pkg/bisect/bisect_test.go index 2d613759b..d5c369272 100644 --- a/pkg/bisect/bisect_test.go +++ b/pkg/bisect/bisect_test.go @@ -203,14 +203,19 @@ type BisectionTest struct { sameBinaryStart int sameBinaryEnd int // expected output - expectErr bool - expectRep bool - noopChange bool - isRelease bool - flaky bool - commitLen int + expectErr bool + // Expect res.Report != nil. + expectRep bool + noopChange bool + isRelease bool + flaky bool + // Expected number of returned commits for inconclusive bisection. + commitLen int + // For cause bisection: Oldest commit returned by bisection. + // For fix bisection: Newest commit returned by bisection. oldestLatest int - // input and output + // For cause bisection: The commit introducing the bug. + // For fix bisection: The commit fixing the bug. culprit int baselineConfig string resultingConfig string @@ -329,7 +334,11 @@ var bisectionTests = []BisectionTest{ startCommit: 802, brokenStart: 100, brokenEnd: 800, - commitLen: 2, + // We mark these as failed, because build/boot failures of ancient releases are unlikely to get fixed + // without manual intervention by syz-ci admins. + commitLen: 0, + expectRep: false, + expectErr: true, }, // Tests that bisection returns the correct fix commit. { @@ -348,16 +357,33 @@ var bisectionTests = []BisectionTest{ startCommit: 905, expectErr: true, }, + // Tests that no commits are returned when HEAD is build broken. + // Fix bisection equivalent of all-releases-broken. + { + name: "fix-HEAD-broken", + fix: true, + startCommit: 400, + brokenStart: 500, + brokenEnd: 1000, + culprit: 1000, + oldestLatest: 905, + // We mark these as re-tryable, because build/boot failures of HEAD will also be caught during regular fuzzing + // and are fixed by kernel devs or syz-ci admins in a timely manner. + commitLen: 0, + expectRep: true, + expectErr: false, + }, // Tests that no commits are returned when crash occurs on HEAD // for fix bisection. { - name: "fix-crashes-HEAD", + name: "fix-HEAD-crashes", fix: true, startCommit: 400, - commitLen: 0, - expectRep: true, culprit: 1000, oldestLatest: 905, + commitLen: 0, + expectRep: true, + expectErr: false, }, // Tests that more than 1 commit is returned when fix bisection is inconclusive. { |
