diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2023-05-08 14:59:26 +0200 |
|---|---|---|
| committer | Aleksandr Nogikh <wp32pw@gmail.com> | 2023-05-09 16:23:28 +0200 |
| commit | e25e3643c228b131336a47ce46e0140001b98bbb (patch) | |
| tree | 0a57de255a5c6d50543a08a7831d39ac6169020e | |
| parent | 3354d81aa507bee152702604b3104e9d712401d2 (diff) | |
dashboard: restart failed cause bisections
If the bisection failed due to infrastructure problems, let's retry it
in 7 days.
| -rw-r--r-- | dashboard/app/entities.go | 43 | ||||
| -rw-r--r-- | dashboard/app/jobs.go | 19 | ||||
| -rw-r--r-- | dashboard/app/jobs_test.go | 71 | ||||
| -rw-r--r-- | dashboard/dashapi/dashapi.go | 9 | ||||
| -rw-r--r-- | syz-ci/jobs.go | 3 |
5 files changed, 118 insertions, 27 deletions
diff --git a/dashboard/app/entities.go b/dashboard/app/entities.go index 05757c1e0..ed36fa4cd 100644 --- a/dashboard/app/entities.go +++ b/dashboard/app/entities.go @@ -93,27 +93,28 @@ type Bug struct { NumRepro int64 // ReproLevel is the best ever found repro level for this bug. // HeadReproLevel is best known repro level that still works on the HEAD commit. - ReproLevel dashapi.ReproLevel - HeadReproLevel dashapi.ReproLevel `datastore:"HeadReproLevel"` - BisectCause BisectStatus - BisectFix BisectStatus - HasReport bool - NeedCommitInfo bool - FirstTime time.Time - LastTime time.Time - LastSavedCrash time.Time - LastReproTime time.Time - FixTime time.Time // when we become aware of the fixing commit - LastActivity time.Time // last time we observed any activity related to the bug - Closed time.Time - SubsystemsTime time.Time // when we have updated subsystems last time - SubsystemsRev int - Reporting []BugReporting - Commits []string // titles of fixing commmits - CommitInfo []Commit // additional info for commits (for historical reasons parallel array to Commits) - HappenedOn []string // list of managers - PatchedOn []string `datastore:",noindex"` // list of managers - UNCC []string // don't CC these emails on this bug + ReproLevel dashapi.ReproLevel + HeadReproLevel dashapi.ReproLevel `datastore:"HeadReproLevel"` + BisectCause BisectStatus + BisectFix BisectStatus + HasReport bool + NeedCommitInfo bool + FirstTime time.Time + LastTime time.Time + LastSavedCrash time.Time + LastReproTime time.Time + LastCauseBisect time.Time + FixTime time.Time // when we become aware of the fixing commit + LastActivity time.Time // last time we observed any activity related to the bug + Closed time.Time + SubsystemsTime time.Time // when we have updated subsystems last time + SubsystemsRev int + Reporting []BugReporting + Commits []string // titles of fixing commmits + CommitInfo []Commit // additional info for commits (for historical reasons parallel array to Commits) + HappenedOn []string // list of managers + PatchedOn []string `datastore:",noindex"` // list of managers + UNCC []string // don't CC these emails on this bug // Kcidb publishing status bitmask: // bit 0 - the bug is published // bit 1 - don't want to publish it (syzkaller build/test errors) diff --git a/dashboard/app/jobs.go b/dashboard/app/jobs.go index d3eb08c21..6ca7ddd16 100644 --- a/dashboard/app/jobs.go +++ b/dashboard/app/jobs.go @@ -574,7 +574,12 @@ func findBugsForBisection(c context.Context, managers map[string]bool, if crash == nil { continue } - if jobType == JobBisectFix && timeSince(c, bug.LastTime) < 24*30*time.Hour { + const fixJobRepeat = 24 * 30 * time.Hour + if jobType == JobBisectFix && timeSince(c, bug.LastTime) < fixJobRepeat { + continue + } + const causeJobRepeat = 24 * 7 * time.Hour + if jobType == JobBisectCause && timeSince(c, bug.LastCauseBisect) < causeJobRepeat { continue } return createBisectJobForBug(c, bug, crash, keys[bi], crashKey, jobType) @@ -998,15 +1003,25 @@ func updateBugBisection(c context.Context, job *Job, jobKey *db.Key, req *dashap } if job.Type == JobBisectCause { bug.BisectCause = result + bug.LastCauseBisect = now } else { bug.BisectFix = result } + infraError := (req.Flags & dashapi.BisectResultInfraError) == dashapi.BisectResultInfraError + if infraError { + log.Errorf(c, "bisection of %q failed due to infra errors", job.BugTitle) + } // If the crash still occurs on HEAD, update the bug's LastTime so that it will be // retried after 30 days. - if job.Type == JobBisectFix && req.Error == nil && len(req.Commits) == 0 && len(req.CrashLog) != 0 { + if job.Type == JobBisectFix && (result != BisectError || infraError) && + len(req.Commits) == 0 && len(req.CrashLog) != 0 { bug.BisectFix = BisectNot bug.LastTime = now } + // If the cause bisection failed due to infrastructure problems, also repeat it. + if job.Type == JobBisectCause && infraError { + bug.BisectCause = BisectNot + } if _, err := db.Put(c, bugKey, bug); err != nil { return fmt.Errorf("failed to put bug: %v", err) } diff --git a/dashboard/app/jobs_test.go b/dashboard/app/jobs_test.go index 3a212563f..fe43f905e 100644 --- a/dashboard/app/jobs_test.go +++ b/dashboard/app/jobs_test.go @@ -1165,3 +1165,74 @@ func TestParallelJobs(t *testing.T) { emptyPollResp = client.pollJobs(build.Manager) c.expectEQ(emptyPollResp, &dashapi.JobPollResp{}) } + +// Test that JobBisectCause jobs are re-tried if there were infra problems. +func TestJobCauseRetry(t *testing.T) { + c := NewCtx(t) + defer c.Close() + + client := c.client2 + // Upload a crash report. + build := testBuild(1) + client.UploadBuild(build) + crash := testCrashWithRepro(build, 1) + client.ReportCrash(crash) + client.pollEmailBug() + + // Release the report to the second stage. + c.advanceTime(15 * 24 * time.Hour) + client.pollEmailBug() // "Sending report to the next stage" email. + client.pollEmailBug() // New report. + + // Emulate an infra failure. + resp := client.pollSpecificJobs(build.Manager, dashapi.ManagerJobs{ + BisectCause: true, + }) + client.expectNE(resp.ID, "") + client.expectEQ(resp.Type, dashapi.JobBisectCause) + done := &dashapi.JobDoneReq{ + ID: resp.ID, + Error: []byte("infra problem"), + Flags: dashapi.BisectResultInfraError, + } + client.expectOK(client.JobDone(done)) + c.expectNoEmail() + + // Ensure we don't recreate the job right away. + c.advanceTime(24 * time.Hour) + resp = client.pollSpecificJobs(build.Manager, dashapi.ManagerJobs{ + BisectCause: true, + }) + client.expectEQ(resp.ID, "") + + // Wait the end of the freeze period. + c.advanceTime(7 * 24 * time.Hour) + resp = client.pollSpecificJobs(build.Manager, dashapi.ManagerJobs{ + BisectCause: true, + }) + client.expectNE(resp.ID, "") + client.expectEQ(resp.Type, dashapi.JobBisectCause) + + done = &dashapi.JobDoneReq{ + ID: resp.ID, + Build: *testBuild(2), + Log: []byte("bisect log"), + CrashTitle: "bisect crash title", + CrashLog: []byte("bisect crash log"), + CrashReport: []byte("bisect crash report"), + Commits: []dashapi.Commit{ + { + Hash: "36e65cb4a0448942ec316b24d60446bbd5cc7827", + Title: "kernel: add a bug", + Author: "author@kernel.org", + CC: []string{"user@domain.com"}, + Date: time.Date(2000, 2, 9, 4, 5, 6, 7, time.UTC), + }, + }, + } + done.Build.ID = resp.ID + c.expectOK(client.JobDone(done)) + + msg := c.pollEmailBug() + c.expectTrue(strings.Contains(msg.Body, "syzbot has bisected this issue to:")) +} diff --git a/dashboard/dashapi/dashapi.go b/dashboard/dashapi/dashapi.go index 6419894d2..4d5df7e9a 100644 --- a/dashboard/dashapi/dashapi.go +++ b/dashboard/dashapi/dashapi.go @@ -219,10 +219,11 @@ const ( type JobDoneFlags int64 const ( - BisectResultMerge JobDoneFlags = 1 << iota // bisected to a merge commit - BisectResultNoop // commit does not affect resulting kernel binary - BisectResultRelease // commit is a kernel release - BisectResultIgnore // this particular commit should be ignored, see syz-ci/jobs.go + BisectResultMerge JobDoneFlags = 1 << iota // bisected to a merge commit + BisectResultNoop // commit does not affect resulting kernel binary + BisectResultRelease // commit is a kernel release + BisectResultIgnore // this particular commit should be ignored, see syz-ci/jobs.go + BisectResultInfraError // the bisect failed due to an infrastructure problem ) func (dash *Dashboard) JobPoll(req *JobPollReq) (*JobPollResp, error) { diff --git a/syz-ci/jobs.go b/syz-ci/jobs.go index b717ade4f..b01b11021 100644 --- a/syz-ci/jobs.go +++ b/syz-ci/jobs.go @@ -521,6 +521,9 @@ func (jp *JobProcessor) bisect(job *Job, mgrcfg *mgrconfig.Config) error { res, err := bisect.Run(cfg) resp.Log = trace.Bytes() if err != nil { + if _, ok := err.(*bisect.InfraError); ok { + resp.Flags |= dashapi.BisectResultInfraError + } return err } for _, com := range res.Commits { |
