aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2023-05-08 14:59:26 +0200
committerAleksandr Nogikh <wp32pw@gmail.com>2023-05-09 16:23:28 +0200
commite25e3643c228b131336a47ce46e0140001b98bbb (patch)
tree0a57de255a5c6d50543a08a7831d39ac6169020e
parent3354d81aa507bee152702604b3104e9d712401d2 (diff)
dashboard: restart failed cause bisections
If the bisection failed due to infrastructure problems, let's retry it in 7 days.
-rw-r--r--dashboard/app/entities.go43
-rw-r--r--dashboard/app/jobs.go19
-rw-r--r--dashboard/app/jobs_test.go71
-rw-r--r--dashboard/dashapi/dashapi.go9
-rw-r--r--syz-ci/jobs.go3
5 files changed, 118 insertions, 27 deletions
diff --git a/dashboard/app/entities.go b/dashboard/app/entities.go
index 05757c1e0..ed36fa4cd 100644
--- a/dashboard/app/entities.go
+++ b/dashboard/app/entities.go
@@ -93,27 +93,28 @@ type Bug struct {
NumRepro int64
// ReproLevel is the best ever found repro level for this bug.
// HeadReproLevel is best known repro level that still works on the HEAD commit.
- ReproLevel dashapi.ReproLevel
- HeadReproLevel dashapi.ReproLevel `datastore:"HeadReproLevel"`
- BisectCause BisectStatus
- BisectFix BisectStatus
- HasReport bool
- NeedCommitInfo bool
- FirstTime time.Time
- LastTime time.Time
- LastSavedCrash time.Time
- LastReproTime time.Time
- FixTime time.Time // when we become aware of the fixing commit
- LastActivity time.Time // last time we observed any activity related to the bug
- Closed time.Time
- SubsystemsTime time.Time // when we have updated subsystems last time
- SubsystemsRev int
- Reporting []BugReporting
- Commits []string // titles of fixing commmits
- CommitInfo []Commit // additional info for commits (for historical reasons parallel array to Commits)
- HappenedOn []string // list of managers
- PatchedOn []string `datastore:",noindex"` // list of managers
- UNCC []string // don't CC these emails on this bug
+ ReproLevel dashapi.ReproLevel
+ HeadReproLevel dashapi.ReproLevel `datastore:"HeadReproLevel"`
+ BisectCause BisectStatus
+ BisectFix BisectStatus
+ HasReport bool
+ NeedCommitInfo bool
+ FirstTime time.Time
+ LastTime time.Time
+ LastSavedCrash time.Time
+ LastReproTime time.Time
+ LastCauseBisect time.Time
+ FixTime time.Time // when we become aware of the fixing commit
+ LastActivity time.Time // last time we observed any activity related to the bug
+ Closed time.Time
+ SubsystemsTime time.Time // when we have updated subsystems last time
+ SubsystemsRev int
+ Reporting []BugReporting
+ Commits []string // titles of fixing commmits
+ CommitInfo []Commit // additional info for commits (for historical reasons parallel array to Commits)
+ HappenedOn []string // list of managers
+ PatchedOn []string `datastore:",noindex"` // list of managers
+ UNCC []string // don't CC these emails on this bug
// Kcidb publishing status bitmask:
// bit 0 - the bug is published
// bit 1 - don't want to publish it (syzkaller build/test errors)
diff --git a/dashboard/app/jobs.go b/dashboard/app/jobs.go
index d3eb08c21..6ca7ddd16 100644
--- a/dashboard/app/jobs.go
+++ b/dashboard/app/jobs.go
@@ -574,7 +574,12 @@ func findBugsForBisection(c context.Context, managers map[string]bool,
if crash == nil {
continue
}
- if jobType == JobBisectFix && timeSince(c, bug.LastTime) < 24*30*time.Hour {
+ const fixJobRepeat = 24 * 30 * time.Hour
+ if jobType == JobBisectFix && timeSince(c, bug.LastTime) < fixJobRepeat {
+ continue
+ }
+ const causeJobRepeat = 24 * 7 * time.Hour
+ if jobType == JobBisectCause && timeSince(c, bug.LastCauseBisect) < causeJobRepeat {
continue
}
return createBisectJobForBug(c, bug, crash, keys[bi], crashKey, jobType)
@@ -998,15 +1003,25 @@ func updateBugBisection(c context.Context, job *Job, jobKey *db.Key, req *dashap
}
if job.Type == JobBisectCause {
bug.BisectCause = result
+ bug.LastCauseBisect = now
} else {
bug.BisectFix = result
}
+ infraError := (req.Flags & dashapi.BisectResultInfraError) == dashapi.BisectResultInfraError
+ if infraError {
+ log.Errorf(c, "bisection of %q failed due to infra errors", job.BugTitle)
+ }
// If the crash still occurs on HEAD, update the bug's LastTime so that it will be
// retried after 30 days.
- if job.Type == JobBisectFix && req.Error == nil && len(req.Commits) == 0 && len(req.CrashLog) != 0 {
+ if job.Type == JobBisectFix && (result != BisectError || infraError) &&
+ len(req.Commits) == 0 && len(req.CrashLog) != 0 {
bug.BisectFix = BisectNot
bug.LastTime = now
}
+ // If the cause bisection failed due to infrastructure problems, also repeat it.
+ if job.Type == JobBisectCause && infraError {
+ bug.BisectCause = BisectNot
+ }
if _, err := db.Put(c, bugKey, bug); err != nil {
return fmt.Errorf("failed to put bug: %v", err)
}
diff --git a/dashboard/app/jobs_test.go b/dashboard/app/jobs_test.go
index 3a212563f..fe43f905e 100644
--- a/dashboard/app/jobs_test.go
+++ b/dashboard/app/jobs_test.go
@@ -1165,3 +1165,74 @@ func TestParallelJobs(t *testing.T) {
emptyPollResp = client.pollJobs(build.Manager)
c.expectEQ(emptyPollResp, &dashapi.JobPollResp{})
}
+
+// Test that JobBisectCause jobs are re-tried if there were infra problems.
+func TestJobCauseRetry(t *testing.T) {
+ c := NewCtx(t)
+ defer c.Close()
+
+ client := c.client2
+ // Upload a crash report.
+ build := testBuild(1)
+ client.UploadBuild(build)
+ crash := testCrashWithRepro(build, 1)
+ client.ReportCrash(crash)
+ client.pollEmailBug()
+
+ // Release the report to the second stage.
+ c.advanceTime(15 * 24 * time.Hour)
+ client.pollEmailBug() // "Sending report to the next stage" email.
+ client.pollEmailBug() // New report.
+
+ // Emulate an infra failure.
+ resp := client.pollSpecificJobs(build.Manager, dashapi.ManagerJobs{
+ BisectCause: true,
+ })
+ client.expectNE(resp.ID, "")
+ client.expectEQ(resp.Type, dashapi.JobBisectCause)
+ done := &dashapi.JobDoneReq{
+ ID: resp.ID,
+ Error: []byte("infra problem"),
+ Flags: dashapi.BisectResultInfraError,
+ }
+ client.expectOK(client.JobDone(done))
+ c.expectNoEmail()
+
+ // Ensure we don't recreate the job right away.
+ c.advanceTime(24 * time.Hour)
+ resp = client.pollSpecificJobs(build.Manager, dashapi.ManagerJobs{
+ BisectCause: true,
+ })
+ client.expectEQ(resp.ID, "")
+
+ // Wait the end of the freeze period.
+ c.advanceTime(7 * 24 * time.Hour)
+ resp = client.pollSpecificJobs(build.Manager, dashapi.ManagerJobs{
+ BisectCause: true,
+ })
+ client.expectNE(resp.ID, "")
+ client.expectEQ(resp.Type, dashapi.JobBisectCause)
+
+ done = &dashapi.JobDoneReq{
+ ID: resp.ID,
+ Build: *testBuild(2),
+ Log: []byte("bisect log"),
+ CrashTitle: "bisect crash title",
+ CrashLog: []byte("bisect crash log"),
+ CrashReport: []byte("bisect crash report"),
+ Commits: []dashapi.Commit{
+ {
+ Hash: "36e65cb4a0448942ec316b24d60446bbd5cc7827",
+ Title: "kernel: add a bug",
+ Author: "author@kernel.org",
+ CC: []string{"user@domain.com"},
+ Date: time.Date(2000, 2, 9, 4, 5, 6, 7, time.UTC),
+ },
+ },
+ }
+ done.Build.ID = resp.ID
+ c.expectOK(client.JobDone(done))
+
+ msg := c.pollEmailBug()
+ c.expectTrue(strings.Contains(msg.Body, "syzbot has bisected this issue to:"))
+}
diff --git a/dashboard/dashapi/dashapi.go b/dashboard/dashapi/dashapi.go
index 6419894d2..4d5df7e9a 100644
--- a/dashboard/dashapi/dashapi.go
+++ b/dashboard/dashapi/dashapi.go
@@ -219,10 +219,11 @@ const (
type JobDoneFlags int64
const (
- BisectResultMerge JobDoneFlags = 1 << iota // bisected to a merge commit
- BisectResultNoop // commit does not affect resulting kernel binary
- BisectResultRelease // commit is a kernel release
- BisectResultIgnore // this particular commit should be ignored, see syz-ci/jobs.go
+ BisectResultMerge JobDoneFlags = 1 << iota // bisected to a merge commit
+ BisectResultNoop // commit does not affect resulting kernel binary
+ BisectResultRelease // commit is a kernel release
+ BisectResultIgnore // this particular commit should be ignored, see syz-ci/jobs.go
+ BisectResultInfraError // the bisect failed due to an infrastructure problem
)
func (dash *Dashboard) JobPoll(req *JobPollReq) (*JobPollResp, error) {
diff --git a/syz-ci/jobs.go b/syz-ci/jobs.go
index b717ade4f..b01b11021 100644
--- a/syz-ci/jobs.go
+++ b/syz-ci/jobs.go
@@ -521,6 +521,9 @@ func (jp *JobProcessor) bisect(job *Job, mgrcfg *mgrconfig.Config) error {
res, err := bisect.Run(cfg)
resp.Log = trace.Bytes()
if err != nil {
+ if _, ok := err.(*bisect.InfraError); ok {
+ resp.Flags |= dashapi.BisectResultInfraError
+ }
return err
}
for _, com := range res.Commits {