aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2017-11-30 17:14:18 +0100
committerDmitry Vyukov <dvyukov@google.com>2017-12-01 13:58:11 +0100
commit2fa91450df792689c42bd52f98ffdacee99ace91 (patch)
tree6bac47c9c556725b596af31c0212d57fc6157575
parent5683420f11c9eb812a57f2c5786b38015a652fa0 (diff)
dashboard/app: add manager monitoring
Make it possible to monitor health and operation of all managers from dashboard. 1. Notify dashboard about internal syz-ci errors (currently we don't know when/if they happen). 2. Send statistics from managers to dashboard.
-rw-r--r--dashboard/app/api.go56
-rw-r--r--dashboard/app/entities.go87
-rw-r--r--dashboard/app/handler.go22
-rw-r--r--dashboard/app/main.go174
-rw-r--r--dashboard/app/main.html49
-rw-r--r--dashboard/app/reporting.go3
-rw-r--r--dashboard/dashapi/dashapi.go18
-rw-r--r--syz-ci/jobs.go41
-rw-r--r--syz-ci/manager.go33
-rw-r--r--syz-ci/managercmd.go19
-rw-r--r--syz-ci/testing.go9
-rw-r--r--syz-manager/manager.go39
-rw-r--r--vm/vm.go9
-rw-r--r--vm/vmimpl/vmimpl.go4
14 files changed, 477 insertions, 86 deletions
diff --git a/dashboard/app/api.go b/dashboard/app/api.go
index db265f8b8..ef1f344fd 100644
--- a/dashboard/app/api.go
+++ b/dashboard/app/api.go
@@ -43,6 +43,7 @@ var apiNamespaceHandlers = map[string]APINamespaceHandler{
"report_crash": apiReportCrash,
"report_failed_repro": apiReportFailedRepro,
"need_repro": apiNeedRepro,
+ "manager_stats": apiManagerStats,
}
type JSONHandler func(c context.Context, r *http.Request) (interface{}, error)
@@ -204,6 +205,12 @@ func apiUploadBuild(c context.Context, ns string, r *http.Request) (interface{},
return nil, err
}
}
+ if err := updateManager(c, ns, req.Manager, func(mgr *Manager, stats *ManagerStats) {
+ mgr.CurrentBuild = req.ID
+ mgr.FailedBuildBug = ""
+ }); err != nil {
+ return nil, err
+ }
return nil, nil
}
@@ -374,7 +381,14 @@ func apiReportBuildError(c context.Context, ns string, r *http.Request) (interfa
if err := uploadBuild(c, ns, &req.Build, BuildFailed); err != nil {
return nil, err
}
- if _, err := reportCrash(c, ns, &req.Crash); err != nil {
+ req.Crash.BuildID = req.Build.ID
+ bug, err := reportCrash(c, ns, &req.Crash)
+ if err != nil {
+ return nil, err
+ }
+ if err := updateManager(c, ns, req.Build.Manager, func(mgr *Manager, stats *ManagerStats) {
+ mgr.FailedBuildBug = bugKeyHash(bug.Namespace, bug.Title, bug.Seq)
+ }); err != nil {
return nil, err
}
return nil, nil
@@ -387,10 +401,17 @@ func apiReportCrash(c context.Context, ns string, r *http.Request) (interface{},
if err := json.NewDecoder(r.Body).Decode(req); err != nil {
return nil, fmt.Errorf("failed to unmarshal request: %v", err)
}
- return reportCrash(c, ns, req)
+ bug, err := reportCrash(c, ns, req)
+ if err != nil {
+ return nil, err
+ }
+ resp := &dashapi.ReportCrashResp{
+ NeedRepro: needRepro(bug),
+ }
+ return resp, nil
}
-func reportCrash(c context.Context, ns string, req *dashapi.Crash) (interface{}, error) {
+func reportCrash(c context.Context, ns string, req *dashapi.Crash) (*Bug, error) {
req.Title = limitLength(req.Title, maxTextLen)
req.Maintainers = email.MergeEmailLists(req.Maintainers)
if req.Corrupted {
@@ -488,10 +509,7 @@ func reportCrash(c context.Context, ns string, req *dashapi.Crash) (interface{},
if saveCrash {
purgeOldCrashes(c, bug, bugKey)
}
- resp := &dashapi.ReportCrashResp{
- NeedRepro: needRepro(bug),
- }
- return resp, nil
+ return bug, nil
}
func purgeOldCrashes(c context.Context, bug *Bug, bugKey *datastore.Key) {
@@ -603,6 +621,30 @@ func apiNeedRepro(c context.Context, ns string, r *http.Request) (interface{}, e
return resp, nil
}
+func apiManagerStats(c context.Context, ns string, r *http.Request) (interface{}, error) {
+ req := new(dashapi.ManagerStatsReq)
+ if err := json.NewDecoder(r.Body).Decode(req); err != nil {
+ return nil, fmt.Errorf("failed to unmarshal request: %v", err)
+ }
+ now := timeNow(c)
+ if err := updateManager(c, ns, req.Name, func(mgr *Manager, stats *ManagerStats) {
+ mgr.LastAlive = now
+ mgr.CurrentUpTime = req.UpTime
+ if cur := int64(req.Corpus); cur > stats.MaxCorpus {
+ stats.MaxCorpus = cur
+ }
+ if cur := int64(req.Cover); cur > stats.MaxCover {
+ stats.MaxCover = cur
+ }
+ stats.TotalFuzzingTime += req.FuzzingTime
+ stats.TotalCrashes += int64(req.Crashes)
+ stats.TotalExecs += int64(req.Execs)
+ }); err != nil {
+ return nil, err
+ }
+ return nil, nil
+}
+
func findBugForCrash(c context.Context, ns, title string) (*Bug, *datastore.Key, error) {
var bugs []*Bug
keys, err := datastore.NewQuery("Bug").
diff --git a/dashboard/app/entities.go b/dashboard/app/entities.go
index 6661b84b7..492ca198e 100644
--- a/dashboard/app/entities.go
+++ b/dashboard/app/entities.go
@@ -24,6 +24,26 @@ const (
maxCrashes = 40
)
+type Manager struct {
+ Namespace string
+ Name string
+ CurrentBuild string
+ FailedBuildBug string
+ LastAlive time.Time
+ CurrentUpTime time.Duration
+}
+
+// ManagerStats holds per-day manager runtime stats.
+// Has Manager as parent entity. Keyed by Date.
+type ManagerStats struct {
+ Date int // YYYYMMDD
+ MaxCorpus int64
+ MaxCover int64
+ TotalFuzzingTime time.Duration
+ TotalCrashes int64
+ TotalExecs int64
+}
+
type Build struct {
Namespace string
Manager string
@@ -93,7 +113,7 @@ type ReportingStateEntry struct {
Name string
// Current reporting quota consumption.
Sent int
- Date int
+ Date int // YYYYMMDD
}
// Job represent a single patch testing job for syz-ci.
@@ -163,6 +183,65 @@ const (
BuildJob
)
+// updateManager does transactional compare-and-swap on the manager and its current stats.
+func updateManager(c context.Context, ns, name string, fn func(mgr *Manager, stats *ManagerStats)) error {
+ date := timeDate(timeNow(c))
+ tx := func(c context.Context) error {
+ mgr := new(Manager)
+ mgrKey := datastore.NewKey(c, "Manager", fmt.Sprintf("%v-%v", ns, name), 0, nil)
+ if err := datastore.Get(c, mgrKey, mgr); err != nil {
+ if err != datastore.ErrNoSuchEntity {
+ return fmt.Errorf("failed to get manager %v/%v: %v", ns, name, err)
+ }
+ mgr = &Manager{
+ Namespace: ns,
+ Name: name,
+ }
+ }
+ stats := new(ManagerStats)
+ statsKey := datastore.NewKey(c, "ManagerStats", "", int64(date), mgrKey)
+ if err := datastore.Get(c, statsKey, stats); err != nil {
+ if err != datastore.ErrNoSuchEntity {
+ return fmt.Errorf("failed to get stats %v/%v/%v: %v", ns, name, date, err)
+ }
+ stats = &ManagerStats{
+ Date: date,
+ }
+ }
+
+ fn(mgr, stats)
+
+ if _, err := datastore.Put(c, mgrKey, mgr); err != nil {
+ return fmt.Errorf("failed to put manager: %v", err)
+ }
+ if _, err := datastore.Put(c, statsKey, stats); err != nil {
+ return fmt.Errorf("failed to put manager stats: %v", err)
+ }
+ return nil
+ }
+ return datastore.RunInTransaction(c, tx, &datastore.TransactionOptions{Attempts: 10})
+}
+
+func loadAllManagers(c context.Context) ([]*Manager, []*datastore.Key, error) {
+ var managers []*Manager
+ keys, err := datastore.NewQuery("Manager").
+ GetAll(c, &managers)
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to query managers: %v", err)
+ }
+ var result []*Manager
+ var resultKeys []*datastore.Key
+
+ for i, mgr := range managers {
+ if _, ok := config.Namespaces[mgr.Namespace].DecommissionedManagers[mgr.Name]; ok {
+ continue
+ }
+ result = append(result, mgr)
+ resultKeys = append(resultKeys, keys[i])
+ }
+ return result, resultKeys, nil
+}
+
func buildKey(c context.Context, ns, id string) *datastore.Key {
if ns == "" {
panic("requesting build key outside of namespace")
@@ -237,3 +316,9 @@ func textLink(tag string, id int64) string {
}
return fmt.Sprintf("/text?tag=%v&id=%v", tag, id)
}
+
+// timeDate returns t's date as a single int YYYYMMDD.
+func timeDate(t time.Time) int {
+ year, month, day := t.Date()
+ return year*10000 + int(month)*100 + day
+}
diff --git a/dashboard/app/handler.go b/dashboard/app/handler.go
index 431261ff2..21fe67e36 100644
--- a/dashboard/app/handler.go
+++ b/dashboard/app/handler.go
@@ -68,6 +68,26 @@ func formatTime(t time.Time) string {
return t.Format("Jan 02 15:04")
}
+func formatClock(t time.Time) string {
+ if t.IsZero() {
+ return ""
+ }
+ return t.Format("15:04")
+}
+
+func formatDuration(d time.Duration) string {
+ if d == 0 {
+ return ""
+ }
+ days := int(d / (24 * time.Hour))
+ hours := int(d / time.Hour % 24)
+ mins := int(d / time.Minute % 60)
+ if days != 0 {
+ return fmt.Sprintf("%vd%vh", days, hours)
+ }
+ return fmt.Sprintf("%vh%vm", hours, mins)
+}
+
func formatReproLevel(l dashapi.ReproLevel) string {
switch l {
case ReproLevelSyz:
@@ -84,6 +104,8 @@ var (
templateFuncs = template.FuncMap{
"formatTime": formatTime,
+ "formatClock": formatClock,
+ "formatDuration": formatDuration,
"formatReproLevel": formatReproLevel,
}
)
diff --git a/dashboard/app/main.go b/dashboard/app/main.go
index d24b9b6ee..99c31d699 100644
--- a/dashboard/app/main.go
+++ b/dashboard/app/main.go
@@ -29,10 +29,35 @@ func init() {
type uiMain struct {
Header *uiHeader
Log []byte
+ Managers []*uiManager
Jobs []*uiJob
BugGroups []*uiBugGroup
}
+type uiManager struct {
+ Namespace string
+ Name string
+ CurrentBuild *uiBuild
+ FailedBuildBugLink string
+ LastActive time.Time
+ LastActiveBad bool
+ CurrentUpTime time.Duration
+ MaxCorpus int64
+ MaxCover int64
+ TotalFuzzingTime time.Duration
+ TotalCrashes int64
+ TotalExecs int64
+}
+
+type uiBuild struct {
+ Time time.Time
+ SyzkallerCommit string
+ KernelRepo string
+ KernelBranch string
+ KernelCommit string
+ KernelConfigLink string
+}
+
type uiBugPage struct {
Header *uiHeader
Bug *uiBug
@@ -46,7 +71,6 @@ type uiBugGroup struct {
type uiBug struct {
Namespace string
- ID string
Title string
NumCrashes int64
FirstTime time.Time
@@ -55,29 +79,27 @@ type uiBug struct {
ReportingIndex int
Status string
Link string
+ ExternalLink string
Commits string
PatchedOn []string
MissingOn []string
}
type uiCrash struct {
- Manager string
- Time time.Time
- Maintainers string
- LogLink string
- ReportLink string
- ReproSyzLink string
- ReproCLink string
- SyzkallerCommit string
- KernelRepo string
- KernelBranch string
- KernelCommit string
- KernelConfigLink string
+ Manager string
+ Time time.Time
+ Maintainers string
+ LogLink string
+ ReportLink string
+ ReproSyzLink string
+ ReproCLink string
+ *uiBuild
}
type uiJob struct {
Created time.Time
- Link string
+ BugLink string
+ ExternalLink string
User string
Reporting string
Namespace string
@@ -108,6 +130,10 @@ func handleMain(c context.Context, w http.ResponseWriter, r *http.Request) error
if err != nil {
return err
}
+ managers, err := loadManagers(c)
+ if err != nil {
+ return err
+ }
jobs, err := loadRecentJobs(c)
if err != nil {
return err
@@ -119,6 +145,7 @@ func handleMain(c context.Context, w http.ResponseWriter, r *http.Request) error
data := &uiMain{
Header: h,
Log: errorLog,
+ Managers: managers,
Jobs: jobs,
BugGroups: groups,
}
@@ -227,9 +254,9 @@ func createUIBug(c context.Context, bug *Bug, state *ReportingState, managers []
if status == "" {
status = "???"
}
+ id := bugKeyHash(bug.Namespace, bug.Title, bug.Seq)
uiBug := &uiBug{
Namespace: bug.Namespace,
- ID: bugKeyHash(bug.Namespace, bug.Title, bug.Seq),
Title: bug.displayTitle(),
NumCrashes: bug.NumCrashes,
FirstTime: bug.FirstTime,
@@ -237,7 +264,8 @@ func createUIBug(c context.Context, bug *Bug, state *ReportingState, managers []
ReproLevel: bug.ReproLevel,
ReportingIndex: reportingIdx,
Status: status,
- Link: link,
+ Link: bugLink(id),
+ ExternalLink: link,
PatchedOn: bug.PatchedOn,
}
if len(bug.Commits) != 0 {
@@ -278,24 +306,92 @@ func loadCrashesForBug(c context.Context, bug *Bug) ([]*uiCrash, error) {
builds[crash.BuildID] = build
}
ui := &uiCrash{
- Manager: crash.Manager,
- Time: crash.Time,
- Maintainers: fmt.Sprintf("%q", crash.Maintainers),
- LogLink: textLink("CrashLog", crash.Log),
- ReportLink: textLink("CrashReport", crash.Report),
- ReproSyzLink: textLink("ReproSyz", crash.ReproSyz),
- ReproCLink: textLink("ReproC", crash.ReproC),
- SyzkallerCommit: build.SyzkallerCommit,
- KernelRepo: build.KernelRepo,
- KernelBranch: build.KernelBranch,
- KernelCommit: build.KernelCommit,
- KernelConfigLink: textLink("KernelConfig", build.KernelConfig),
+ Manager: crash.Manager,
+ Time: crash.Time,
+ Maintainers: fmt.Sprintf("%q", crash.Maintainers),
+ LogLink: textLink("CrashLog", crash.Log),
+ ReportLink: textLink("CrashReport", crash.Report),
+ ReproSyzLink: textLink("ReproSyz", crash.ReproSyz),
+ ReproCLink: textLink("ReproC", crash.ReproC),
+ uiBuild: makeUIBuild(build),
}
results = append(results, ui)
}
return results, nil
}
+func makeUIBuild(build *Build) *uiBuild {
+ return &uiBuild{
+ Time: build.Time,
+ SyzkallerCommit: build.SyzkallerCommit,
+ KernelRepo: build.KernelRepo,
+ KernelBranch: build.KernelBranch,
+ KernelCommit: build.KernelCommit,
+ KernelConfigLink: textLink("KernelConfig", build.KernelConfig),
+ }
+}
+
+func loadManagers(c context.Context) ([]*uiManager, error) {
+ now := timeNow(c)
+ date := timeDate(now)
+ managers, managerKeys, err := loadAllManagers(c)
+ if err != nil {
+ return nil, err
+ }
+ var buildKeys []*datastore.Key
+ var statsKeys []*datastore.Key
+ for i, mgr := range managers {
+ if mgr.CurrentBuild != "" {
+ buildKeys = append(buildKeys, buildKey(c, mgr.Namespace, mgr.CurrentBuild))
+ }
+ if timeDate(mgr.LastAlive) == date {
+ statsKeys = append(statsKeys,
+ datastore.NewKey(c, "ManagerStats", "", int64(date), managerKeys[i]))
+ }
+ }
+ builds := make([]*Build, len(buildKeys))
+ if err := datastore.GetMulti(c, buildKeys, builds); err != nil {
+ return nil, err
+ }
+ uiBuilds := make(map[string]*uiBuild)
+ for _, build := range builds {
+ uiBuilds[build.Namespace+"|"+build.ID] = makeUIBuild(build)
+ }
+ stats := make([]*ManagerStats, len(statsKeys))
+ if err := datastore.GetMulti(c, statsKeys, stats); err != nil {
+ return nil, err
+ }
+ var fullStats []*ManagerStats
+ for _, mgr := range managers {
+ if timeDate(mgr.LastAlive) != date {
+ fullStats = append(fullStats, &ManagerStats{})
+ continue
+ }
+ fullStats = append(fullStats, stats[0])
+ stats = stats[1:]
+ }
+ var results []*uiManager
+ for i, mgr := range managers {
+ stats := fullStats[i]
+ results = append(results, &uiManager{
+ Namespace: mgr.Namespace,
+ Name: mgr.Name,
+ CurrentBuild: uiBuilds[mgr.Namespace+"|"+mgr.CurrentBuild],
+ FailedBuildBugLink: bugLink(mgr.FailedBuildBug),
+ LastActive: mgr.LastAlive,
+ LastActiveBad: now.Sub(mgr.LastAlive) > 12*time.Hour,
+ CurrentUpTime: mgr.CurrentUpTime,
+ MaxCorpus: stats.MaxCorpus,
+ MaxCover: stats.MaxCover,
+ TotalFuzzingTime: stats.TotalFuzzingTime,
+ TotalCrashes: stats.TotalCrashes,
+ TotalExecs: stats.TotalExecs,
+ })
+ }
+ sort.Sort(uiManagerSorter(results))
+ return results, nil
+}
+
func loadRecentJobs(c context.Context) ([]*uiJob, error) {
var jobs []*Job
keys, err := datastore.NewQuery("Job").
@@ -309,13 +405,13 @@ func loadRecentJobs(c context.Context) ([]*uiJob, error) {
for i, job := range jobs {
ui := &uiJob{
Created: job.Created,
- Link: job.Link,
+ BugLink: bugLink(keys[i].Parent().StringID()),
+ ExternalLink: job.Link,
User: job.User,
Reporting: job.Reporting,
Namespace: job.Namespace,
Manager: job.Manager,
BugTitle: job.BugTitle,
- BugID: keys[i].Parent().StringID(),
KernelRepo: job.KernelRepo,
KernelBranch: job.KernelBranch,
PatchLink: textLink("Patch", job.Patch),
@@ -376,6 +472,24 @@ func fetchErrorLogs(c context.Context) ([]byte, error) {
return buf.Bytes(), nil
}
+func bugLink(id string) string {
+ if id == "" {
+ return ""
+ }
+ return "/bug?id=" + id
+}
+
+type uiManagerSorter []*uiManager
+
+func (a uiManagerSorter) Len() int { return len(a) }
+func (a uiManagerSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+func (a uiManagerSorter) Less(i, j int) bool {
+ if a[i].Namespace != a[j].Namespace {
+ return a[i].Namespace < a[j].Namespace
+ }
+ return a[i].Name < a[j].Name
+}
+
type uiBugSorter []*uiBug
func (a uiBugSorter) Len() int { return len(a) }
diff --git a/dashboard/app/main.html b/dashboard/app/main.html
index ea1eec7c9..a5bad8efa 100644
--- a/dashboard/app/main.html
+++ b/dashboard/app/main.html
@@ -11,11 +11,11 @@
</tr>
{{range $b := $.Bugs}}
<tr>
- <td class="title"><a href="/bug?id={{$b.ID}}">{{$b.Title}}</a></td>
+ <td class="title"><a href="{{$b.Link}}">{{$b.Title}}</a></td>
<td class="count">{{$b.NumCrashes}}</td>
<td class="repro">{{formatReproLevel $b.ReproLevel}}</td>
<td class="time">{{formatTime $b.LastTime}}</td>
- <td class="status">{{if $b.Link}}<a href="{{$b.Link}}">{{$b.Status}}</a>{{else}}{{$b.Status}}{{end}}</td>
+ <td class="status">{{if $b.Link}}<a href="{{$b.ExternalLink}}">{{$b.Status}}</a>{{else}}{{$b.Status}}{{end}}</td>
<td class="patched" title="{{$b.Commits}}">{{if $b.Commits}}{{len $b.PatchedOn}}/{{len $b.MissingOn}}{{end}}</td>
</tr>
{{end}}
@@ -40,6 +40,49 @@
<br><br>
<table class="list_table">
+ <caption>Managers:</caption>
+ <tr>
+ <th>Name</th>
+ <th>Last Active</th>
+ <th>Current Build</th>
+ <th>Failed Build</th>
+ <th>Today: Uptime</th>
+ <th>Fuzzing Time</th>
+ <th>Corpus</th>
+ <th>Coverage</th>
+ <th>Crashes</th>
+ <th>Execs</th>
+ </tr>
+ {{range $mgr := $.Managers}}
+ <tr>
+ <td>{{$mgr.Namespace}}/{{$mgr.Name}}</td>
+ {{if $mgr.LastActiveBad}}
+ <td style="color:#f00">{{formatTime $mgr.LastActive}}</td>
+ {{else}}
+ <td>{{formatClock $mgr.LastActive}}</td>
+ {{end}}
+ {{if $mgr.CurrentBuild}}
+ <td title="{{$mgr.CurrentBuild.KernelRepo}}/{{$mgr.CurrentBuild.KernelBranch}}/{{$mgr.CurrentBuild.KernelCommit}} (syzkaller {{$mgr.CurrentBuild.SyzkallerCommit}})">{{formatTime $mgr.CurrentBuild.Time}}</td>
+ {{else}}
+ <td></td>
+ {{end}}
+ {{if $mgr.FailedBuildBugLink}}
+ <td><a href="{{$mgr.FailedBuildBugLink}}" style="color:#f00">failed</a></td>
+ {{else}}
+ <td></td>
+ {{end}}
+ <td>{{formatDuration $mgr.CurrentUpTime}}</td>
+ <td>{{formatDuration $mgr.TotalFuzzingTime}}</td>
+ <td>{{$mgr.MaxCorpus}}</td>
+ <td>{{$mgr.MaxCover}}</td>
+ <td>{{$mgr.TotalCrashes}}</td>
+ <td>{{$mgr.TotalExecs}}</td>
+ </tr>
+ {{end}}
+ </table>
+ <br><br>
+
+ <table class="list_table">
<caption>Recent jobs:</caption>
<tr>
<th>Created</th>
@@ -59,7 +102,7 @@
<td class="time">{{formatTime $job.Started}}{{if gt $job.Attempts 1}} ({{$job.Attempts}}){{end}}</td>
<td class="time">{{formatTime $job.Finished}}</td>
<td>{{$job.User}}</td>
- <td class="title"><a href="/bug?id={{$job.BugID}}">{{$job.BugTitle}}</a></td>
+ <td class="title"><a href="{{$job.BugLink}}">{{$job.BugTitle}}</a></td>
<td><a href="{{$job.PatchLink}}">patch</a></td>
<td>{{$job.Namespace}}/{{$job.Reporting}}</td>
<td>{{$job.Manager}}</td>
diff --git a/dashboard/app/reporting.go b/dashboard/app/reporting.go
index 9180c14ad..ad2ad0d1f 100644
--- a/dashboard/app/reporting.go
+++ b/dashboard/app/reporting.go
@@ -577,8 +577,7 @@ func (state *ReportingState) getEntry(now time.Time, namespace, name string) *Re
panic(fmt.Sprintf("requesting reporting state for %v/%v", namespace, name))
}
// Convert time to date of the form 20170125.
- year, month, day := now.Date()
- date := year*10000 + int(month)*100 + day
+ date := timeDate(now)
for i := range state.Entries {
ent := &state.Entries[i]
if ent.Namespace == namespace && ent.Name == name {
diff --git a/dashboard/dashapi/dashapi.go b/dashboard/dashapi/dashapi.go
index 8ebea29ac..4ba797f32 100644
--- a/dashboard/dashapi/dashapi.go
+++ b/dashboard/dashapi/dashapi.go
@@ -16,6 +16,7 @@ import (
"net/url"
"reflect"
"strings"
+ "time"
)
type Dashboard struct {
@@ -249,6 +250,23 @@ type PollResponse struct {
Reports []*BugReport
}
+type ManagerStatsReq struct {
+ Name string
+ // Current level:
+ UpTime time.Duration
+ Corpus uint64
+ Cover uint64
+
+ // Delta since last sync:
+ FuzzingTime time.Duration
+ Crashes uint64
+ Execs uint64
+}
+
+func (dash *Dashboard) UploadManagerStats(req *ManagerStatsReq) error {
+ return dash.query("manager_stats", req, nil)
+}
+
type (
BugStatus int
ReproLevel int
diff --git a/syz-ci/jobs.go b/syz-ci/jobs.go
index 604dd1895..5c5b0c5a8 100644
--- a/syz-ci/jobs.go
+++ b/syz-ci/jobs.go
@@ -24,12 +24,14 @@ import (
)
type JobProcessor struct {
+ name string
managers []*Manager
dash *dashapi.Dashboard
}
func newJobProcessor(cfg *Config, managers []*Manager) *JobProcessor {
jp := &JobProcessor{
+ name: fmt.Sprintf("%v-job", cfg.Name),
managers: managers,
}
if cfg.Dashboard_Addr != "" && cfg.Dashboard_Client != "" {
@@ -62,7 +64,7 @@ func (jp *JobProcessor) poll() {
}
req, err := jp.dash.JobPoll(names)
if err != nil {
- Logf(0, "failed to poll jobs: %v", err)
+ jp.Errorf("failed to poll jobs: %v", err)
return
}
if req.ID == "" {
@@ -76,7 +78,7 @@ func (jp *JobProcessor) poll() {
}
}
if mgr == nil {
- Logf(0, "got job for unknown manager: %v", req.Manager)
+ jp.Errorf("got job for unknown manager: %v", req.Manager)
return
}
job := &Job{
@@ -85,11 +87,11 @@ func (jp *JobProcessor) poll() {
}
Logf(0, "starting job %v for manager %v on %v/%v",
req.ID, req.Manager, req.KernelRepo, req.KernelBranch)
- resp := job.process()
+ resp := jp.process(job)
Logf(0, "done job %v: commit %v, crash %q, error: %s",
resp.ID, resp.Build.KernelCommit, resp.CrashTitle, resp.Error)
if err := jp.dash.JobDone(resp); err != nil {
- Logf(0, "failed to mark job as done: %v", err)
+ jp.Errorf("failed to mark job as done: %v", err)
return
}
}
@@ -101,7 +103,7 @@ type Job struct {
mgrcfg *mgrconfig.Config
}
-func (job *Job) process() *dashapi.JobDoneReq {
+func (jp *JobProcessor) process(job *Job) *dashapi.JobDoneReq {
req, mgr := job.req, job.mgr
build := dashapi.Build{
Manager: mgr.name,
@@ -134,6 +136,7 @@ func (job *Job) process() *dashapi.JobDoneReq {
for _, req := range required {
if !req.ok {
job.resp.Error = []byte(req.name + " is empty")
+ jp.Errorf("%s", job.resp.Error)
return job.resp
}
}
@@ -145,20 +148,21 @@ func (job *Job) process() *dashapi.JobDoneReq {
case "gce", "qemu":
default:
job.resp.Error = []byte(fmt.Sprintf("testing is not yet supported for %v machine type.", typ))
+ jp.Errorf("%s", job.resp.Error)
return job.resp
}
- if err := job.buildImage(); err != nil {
+ if err := jp.buildImage(job); err != nil {
job.resp.Error = []byte(err.Error())
return job.resp
}
- if err := job.test(); err != nil {
+ if err := jp.test(job); err != nil {
job.resp.Error = []byte(err.Error())
return job.resp
}
return job.resp
}
-func (job *Job) buildImage() error {
+func (jp *JobProcessor) buildImage(job *Job) error {
kernelBuildSem <- struct{}{}
defer func() { <-kernelBuildSem }()
req, resp, mgr := job.req, job.resp, job.mgr
@@ -242,7 +246,7 @@ func (job *Job) buildImage() error {
return nil
}
-func (job *Job) test() error {
+func (jp *JobProcessor) test(job *Job) error {
req, mgrcfg := job.req, job.mgrcfg
Logf(0, "job: booting VM...")
@@ -305,7 +309,7 @@ func (job *Job) test() error {
" -fault_call=%v -fault_nth=%v -repeat=0 -cover=0 %v",
execprogBin, executorBin, mgrcfg.TargetArch, mgrcfg.Procs, opts.Sandbox,
opts.FaultCall, opts.FaultNth, vmProgFile)
- crashed, err := job.testProgram(inst, cmdSyz, reporter, 7*time.Minute)
+ crashed, err := jp.testProgram(job, inst, cmdSyz, reporter, 7*time.Minute)
if crashed || err != nil {
return err
}
@@ -330,7 +334,7 @@ func (job *Job) test() error {
}
// We should test for longer (e.g. 5 mins), but the problem is that
// reproducer does not print anything, so after 3 mins we detect "no output".
- crashed, err := job.testProgram(inst, vmBin, reporter, time.Minute)
+ crashed, err := jp.testProgram(job, inst, vmBin, reporter, time.Minute)
if crashed || err != nil {
return err
}
@@ -338,8 +342,8 @@ func (job *Job) test() error {
return nil
}
-func (job *Job) testProgram(inst *vm.Instance, command string, reporter report.Reporter,
- testTime time.Duration) (bool, error) {
+func (jp *JobProcessor) testProgram(job *Job, inst *vm.Instance, command string,
+ reporter report.Reporter, testTime time.Duration) (bool, error) {
outc, errc, err := inst.Run(testTime, nil, command)
if err != nil {
return false, fmt.Errorf("failed to run binary in VM: %v", err)
@@ -349,11 +353,18 @@ func (job *Job) testProgram(inst *vm.Instance, command string, reporter report.R
return false, nil
}
if err := reporter.Symbolize(rep); err != nil {
- // TODO(dvyukov): send such errors to dashboard.
- Logf(0, "job: failed to symbolize report: %v", err)
+ jp.Errorf("failed to symbolize report: %v", err)
}
job.resp.CrashTitle = rep.Title
job.resp.CrashReport = rep.Report
job.resp.CrashLog = rep.Output
return true, nil
}
+
+// Errorf logs non-fatal error and sends it to dashboard.
+func (jp *JobProcessor) Errorf(msg string, args ...interface{}) {
+ Logf(0, "job: "+msg, args...)
+ if jp.dash != nil {
+ jp.dash.LogError(jp.name, msg, args...)
+ }
+}
diff --git a/syz-ci/manager.go b/syz-ci/manager.go
index 4e4f03485..9ad2f8e53 100644
--- a/syz-ci/manager.go
+++ b/syz-ci/manager.go
@@ -147,7 +147,7 @@ loop:
rebuildAfter := buildRetryPeriod
commit, err := git.Poll(mgr.kernelDir, mgr.mgrcfg.Repo, mgr.mgrcfg.Branch)
if err != nil {
- Logf(0, "%v: failed to poll: %v", mgr.name, err)
+ mgr.Errorf("failed to poll: %v", err)
} else {
Logf(0, "%v: poll: %v", mgr.name, commit)
if commit != lastCommit &&
@@ -166,7 +166,7 @@ loop:
rebuildAfter = kernelRebuildPeriod
latestInfo = mgr.checkLatest()
if latestInfo == nil {
- Logf(0, "%v: failed to read build info after build", mgr.name)
+ mgr.Errorf("failed to read build info after build")
}
}
<-kernelBuildSem
@@ -239,6 +239,7 @@ func (mgr *Manager) build() error {
}
var tagData []byte
+ tagData = append(tagData, mgr.name...)
tagData = append(tagData, kernelCommit...)
tagData = append(tagData, mgr.compilerID...)
tagData = append(tagData, mgr.configTag...)
@@ -274,7 +275,7 @@ func (mgr *Manager) build() error {
Output: []byte(err.Error()),
}
if err := mgr.reportBuildError(rep, info, tmpDir); err != nil {
- Logf(0, "%v: failed to report image error: %v", mgr.name, err)
+ mgr.Errorf("failed to report image error: %v", err)
}
return fmt.Errorf("kernel build failed: %v", err)
}
@@ -307,7 +308,7 @@ func (mgr *Manager) build() error {
func (mgr *Manager) restartManager() {
if !osutil.FilesExist(mgr.latestDir, imageFiles) {
- Logf(0, "%v: can't start manager, image files missing", mgr.name)
+ mgr.Errorf("can't start manager, image files missing")
return
}
if mgr.cmd != nil {
@@ -315,26 +316,26 @@ func (mgr *Manager) restartManager() {
mgr.cmd = nil
}
if err := osutil.LinkFiles(mgr.latestDir, mgr.currentDir, imageFiles); err != nil {
- Logf(0, "%v: failed to create current image dir: %v", mgr.name, err)
+ mgr.Errorf("failed to create current image dir: %v", err)
return
}
info, err := loadBuildInfo(mgr.currentDir)
if err != nil {
- Logf(0, "%v: failed to load build info: %v", mgr.name, err)
+ mgr.Errorf("failed to load build info: %v", err)
return
}
cfgFile, err := mgr.writeConfig(info)
if err != nil {
- Logf(0, "%v: failed to create manager config: %v", mgr.name, err)
+ mgr.Errorf("failed to create manager config: %v", err)
return
}
if err := mgr.uploadBuild(info, mgr.currentDir); err != nil {
- Logf(0, "%v: failed to upload build: %v", mgr.name, err)
+ mgr.Errorf("failed to upload build: %v", err)
return
}
bin := filepath.FromSlash("syzkaller/current/bin/syz-manager")
logFile := filepath.Join(mgr.currentDir, "manager.log")
- mgr.cmd = NewManagerCmd(mgr.name, logFile, bin, "-config", cfgFile)
+ mgr.cmd = NewManagerCmd(mgr.name, logFile, mgr.Errorf, bin, "-config", cfgFile)
}
func (mgr *Manager) testImage(imageDir string, info *BuildInfo) error {
@@ -361,7 +362,7 @@ func (mgr *Manager) testImage(imageDir string, info *BuildInfo) error {
if rep != nil {
rep.Title = fmt.Sprintf("%v boot error: %v", mgr.mgrcfg.Repo_Alias, rep.Title)
if err := mgr.reportBuildError(rep, info, imageDir); err != nil {
- Logf(0, "%v: failed to report image error: %v", mgr.name, err)
+ mgr.Errorf("failed to report image error: %v", err)
}
return fmt.Errorf("VM boot failed with: %v", rep.Title)
}
@@ -373,7 +374,7 @@ func (mgr *Manager) testImage(imageDir string, info *BuildInfo) error {
if rep != nil {
rep.Title = fmt.Sprintf("%v test error: %v", mgr.mgrcfg.Repo_Alias, rep.Title)
if err := mgr.reportBuildError(rep, info, imageDir); err != nil {
- Logf(0, "%v: failed to report image error: %v", mgr.name, err)
+ mgr.Errorf("failed to report image error: %v", err)
}
return fmt.Errorf("VM testing failed with: %v", rep.Title)
}
@@ -482,7 +483,7 @@ func (mgr *Manager) uploadBuild(info *BuildInfo, imageDir string) error {
commits, err := mgr.pollCommits(info.KernelCommit)
if err != nil {
// This is not critical for operation.
- Logf(0, "%v: failed to poll commits: %v", mgr.name, err)
+ mgr.Errorf("failed to poll commits: %v", err)
}
build.Commits = commits
return mgr.dash.UploadBuild(build)
@@ -533,3 +534,11 @@ func (mgr *Manager) pollCommits(buildCommit string) ([]string, error) {
}
return present, nil
}
+
+// Errorf logs non-fatal error and sends it to dashboard.
+func (mgr *Manager) Errorf(msg string, args ...interface{}) {
+ Logf(0, mgr.name+": "+msg, args...)
+ if mgr.dash != nil {
+ mgr.dash.LogError(mgr.name, msg, args...)
+ }
+}
diff --git a/syz-ci/managercmd.go b/syz-ci/managercmd.go
index 77339e5c0..143eb8011 100644
--- a/syz-ci/managercmd.go
+++ b/syz-ci/managercmd.go
@@ -19,19 +19,23 @@ import (
type ManagerCmd struct {
name string
log string
+ errorf Errorf
bin string
args []string
closing chan bool
}
+type Errorf func(msg string, args ...interface{})
+
// NewManagerCmd starts new syz-manager process.
// name - name for logging.
// log - manager log file with stdout/stderr.
// bin/args - process binary/args.
-func NewManagerCmd(name, log, bin string, args ...string) *ManagerCmd {
+func NewManagerCmd(name, log string, errorf Errorf, bin string, args ...string) *ManagerCmd {
mc := &ManagerCmd{
name: name,
log: log,
+ errorf: errorf,
bin: bin,
args: args,
closing: make(chan bool),
@@ -48,8 +52,8 @@ func (mc *ManagerCmd) Close() {
func (mc *ManagerCmd) loop() {
const (
- restartPeriod = time.Minute // don't restart crashing manager more frequently than that
- interruptTimeout = time.Minute // give manager that much time to react to SIGINT
+ restartPeriod = 10 * time.Minute // don't restart crashing manager more frequently than that
+ interruptTimeout = time.Minute // give manager that much time to react to SIGINT
)
var (
cmd *exec.Cmd
@@ -73,7 +77,7 @@ func (mc *ManagerCmd) loop() {
os.Rename(mc.log, mc.log+".old")
logfile, err := os.Create(mc.log)
if err != nil {
- Logf(0, "%v: failed to create manager log: %v", mc.name, err)
+ mc.errorf("failed to create manager log: %v", err)
} else {
cmd = osutil.Command(mc.bin, mc.args...)
cmd.Stdout = logfile
@@ -81,7 +85,7 @@ func (mc *ManagerCmd) loop() {
err := cmd.Start()
logfile.Close()
if err != nil {
- Logf(0, "%v: failed to start manager: %v", mc.name, err)
+ mc.errorf("failed to start manager: %v", err)
cmd = nil
} else {
Logf(1, "%v: started manager", mc.name)
@@ -110,7 +114,10 @@ func (mc *ManagerCmd) loop() {
}
case err := <-stopped:
if cmd == nil {
- panic("spurious stop signal")
+ mc.errorf("spurious stop signal: %v", err)
+ }
+ if closing != nil {
+ mc.errorf("manager exited unexpectedly: %v", err)
}
cmd = nil
Logf(1, "%v: manager exited with %v", mc.name, err)
diff --git a/syz-ci/testing.go b/syz-ci/testing.go
index dcee197ac..adf223f04 100644
--- a/syz-ci/testing.go
+++ b/syz-ci/testing.go
@@ -31,12 +31,13 @@ func bootInstance(mgrcfg *mgrconfig.Config) (*vm.Instance, report.Reporter, *rep
}
inst, err := vmPool.Create(0)
if err != nil {
- if bootErr, ok := err.(vm.BootError); ok {
- rep := reporter.Parse(bootErr.Output)
+ if bootErr, ok := err.(vm.BootErrorer); ok {
+ title, output := bootErr.BootError()
+ rep := reporter.Parse(output)
if rep == nil {
rep = &report.Report{
- Title: bootErr.Title,
- Output: bootErr.Output,
+ Title: title,
+ Output: output,
}
}
if err := reporter.Symbolize(rep); err != nil {
diff --git a/syz-manager/manager.go b/syz-manager/manager.go
index 49faa6e83..6be5761ba 100644
--- a/syz-manager/manager.go
+++ b/syz-manager/manager.go
@@ -307,6 +307,10 @@ func RunManager(cfg *mgrconfig.Config, target *prog.Target, syscalls map[int]boo
}()
}
+ if mgr.dash != nil {
+ go mgr.dashboardReporter()
+ }
+
if mgr.cfg.Hub_Client != "" {
go func() {
for {
@@ -1167,3 +1171,38 @@ func (mgr *Manager) checkUsedFiles() {
}
}
}
+
+func (mgr *Manager) dashboardReporter() {
+ var lastFuzzingTime time.Duration
+ var lastCrashes, lastExecs uint64
+ for {
+ time.Sleep(time.Minute)
+ mgr.mu.Lock()
+ if mgr.firstConnect.IsZero() {
+ mgr.mu.Unlock()
+ continue
+ }
+ crashes := mgr.stats["crashes"]
+ execs := mgr.stats["exec total"]
+ req := &dashapi.ManagerStatsReq{
+ Name: mgr.cfg.Name,
+ UpTime: time.Since(mgr.firstConnect),
+ Corpus: uint64(len(mgr.corpus)),
+ Cover: uint64(len(mgr.corpusSignal)),
+ FuzzingTime: mgr.fuzzingTime - lastFuzzingTime,
+ Crashes: crashes - lastCrashes,
+ Execs: execs - lastExecs,
+ }
+ mgr.mu.Unlock()
+
+ if err := mgr.dash.UploadManagerStats(req); err != nil {
+ Logf(0, "faield to upload dashboard stats: %v", err)
+ continue
+ }
+ mgr.mu.Lock()
+ lastFuzzingTime += req.FuzzingTime
+ lastCrashes += req.Crashes
+ lastExecs += req.Execs
+ mgr.mu.Unlock()
+ }
+}
diff --git a/vm/vm.go b/vm/vm.go
index 48b68d597..519859a5e 100644
--- a/vm/vm.go
+++ b/vm/vm.go
@@ -37,18 +37,15 @@ type Instance struct {
index int
}
-type (
- Env vmimpl.Env
- BootError vmimpl.BootError
-)
+type Env vmimpl.Env
var (
Shutdown = vmimpl.Shutdown
TimeoutErr = vmimpl.TimeoutErr
)
-func (err BootError) Error() string {
- return fmt.Sprintf("%v\n%s", err.Title, err.Output)
+type BootErrorer interface {
+ BootError() (string, []byte)
}
func Create(typ string, env *Env) (*Pool, error) {
diff --git a/vm/vmimpl/vmimpl.go b/vm/vmimpl/vmimpl.go
index 81f798d26..2e3833d89 100644
--- a/vm/vmimpl/vmimpl.go
+++ b/vm/vmimpl/vmimpl.go
@@ -66,6 +66,10 @@ func (err BootError) Error() string {
return fmt.Sprintf("%v\n%s", err.Title, err.Output)
}
+func (err BootError) BootError() (string, []byte) {
+ return err.Title, err.Output
+}
+
// Create creates a VM type that can be used to create individual VMs.
func Create(typ string, env *Env) (Pool, error) {
ctor := ctors[typ]