From 2fa91450df792689c42bd52f98ffdacee99ace91 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 30 Nov 2017 17:14:18 +0100 Subject: dashboard/app: add manager monitoring Make it possible to monitor health and operation of all managers from dashboard. 1. Notify dashboard about internal syz-ci errors (currently we don't know when/if they happen). 2. Send statistics from managers to dashboard. --- dashboard/app/api.go | 56 ++++++++++++-- dashboard/app/entities.go | 87 +++++++++++++++++++++- dashboard/app/handler.go | 22 ++++++ dashboard/app/main.go | 174 +++++++++++++++++++++++++++++++++++-------- dashboard/app/main.html | 49 +++++++++++- dashboard/app/reporting.go | 3 +- dashboard/dashapi/dashapi.go | 18 +++++ 7 files changed, 366 insertions(+), 43 deletions(-) (limited to 'dashboard') diff --git a/dashboard/app/api.go b/dashboard/app/api.go index db265f8b8..ef1f344fd 100644 --- a/dashboard/app/api.go +++ b/dashboard/app/api.go @@ -43,6 +43,7 @@ var apiNamespaceHandlers = map[string]APINamespaceHandler{ "report_crash": apiReportCrash, "report_failed_repro": apiReportFailedRepro, "need_repro": apiNeedRepro, + "manager_stats": apiManagerStats, } type JSONHandler func(c context.Context, r *http.Request) (interface{}, error) @@ -204,6 +205,12 @@ func apiUploadBuild(c context.Context, ns string, r *http.Request) (interface{}, return nil, err } } + if err := updateManager(c, ns, req.Manager, func(mgr *Manager, stats *ManagerStats) { + mgr.CurrentBuild = req.ID + mgr.FailedBuildBug = "" + }); err != nil { + return nil, err + } return nil, nil } @@ -374,7 +381,14 @@ func apiReportBuildError(c context.Context, ns string, r *http.Request) (interfa if err := uploadBuild(c, ns, &req.Build, BuildFailed); err != nil { return nil, err } - if _, err := reportCrash(c, ns, &req.Crash); err != nil { + req.Crash.BuildID = req.Build.ID + bug, err := reportCrash(c, ns, &req.Crash) + if err != nil { + return nil, err + } + if err := updateManager(c, ns, req.Build.Manager, func(mgr *Manager, stats *ManagerStats) { + mgr.FailedBuildBug = bugKeyHash(bug.Namespace, bug.Title, bug.Seq) + }); err != nil { return nil, err } return nil, nil @@ -387,10 +401,17 @@ func apiReportCrash(c context.Context, ns string, r *http.Request) (interface{}, if err := json.NewDecoder(r.Body).Decode(req); err != nil { return nil, fmt.Errorf("failed to unmarshal request: %v", err) } - return reportCrash(c, ns, req) + bug, err := reportCrash(c, ns, req) + if err != nil { + return nil, err + } + resp := &dashapi.ReportCrashResp{ + NeedRepro: needRepro(bug), + } + return resp, nil } -func reportCrash(c context.Context, ns string, req *dashapi.Crash) (interface{}, error) { +func reportCrash(c context.Context, ns string, req *dashapi.Crash) (*Bug, error) { req.Title = limitLength(req.Title, maxTextLen) req.Maintainers = email.MergeEmailLists(req.Maintainers) if req.Corrupted { @@ -488,10 +509,7 @@ func reportCrash(c context.Context, ns string, req *dashapi.Crash) (interface{}, if saveCrash { purgeOldCrashes(c, bug, bugKey) } - resp := &dashapi.ReportCrashResp{ - NeedRepro: needRepro(bug), - } - return resp, nil + return bug, nil } func purgeOldCrashes(c context.Context, bug *Bug, bugKey *datastore.Key) { @@ -603,6 +621,30 @@ func apiNeedRepro(c context.Context, ns string, r *http.Request) (interface{}, e return resp, nil } +func apiManagerStats(c context.Context, ns string, r *http.Request) (interface{}, error) { + req := new(dashapi.ManagerStatsReq) + if err := json.NewDecoder(r.Body).Decode(req); err != nil { + return nil, fmt.Errorf("failed to unmarshal request: %v", err) + } + now := timeNow(c) + if err := updateManager(c, ns, req.Name, func(mgr *Manager, stats *ManagerStats) { + mgr.LastAlive = now + mgr.CurrentUpTime = req.UpTime + if cur := int64(req.Corpus); cur > stats.MaxCorpus { + stats.MaxCorpus = cur + } + if cur := int64(req.Cover); cur > stats.MaxCover { + stats.MaxCover = cur + } + stats.TotalFuzzingTime += req.FuzzingTime + stats.TotalCrashes += int64(req.Crashes) + stats.TotalExecs += int64(req.Execs) + }); err != nil { + return nil, err + } + return nil, nil +} + func findBugForCrash(c context.Context, ns, title string) (*Bug, *datastore.Key, error) { var bugs []*Bug keys, err := datastore.NewQuery("Bug"). diff --git a/dashboard/app/entities.go b/dashboard/app/entities.go index 6661b84b7..492ca198e 100644 --- a/dashboard/app/entities.go +++ b/dashboard/app/entities.go @@ -24,6 +24,26 @@ const ( maxCrashes = 40 ) +type Manager struct { + Namespace string + Name string + CurrentBuild string + FailedBuildBug string + LastAlive time.Time + CurrentUpTime time.Duration +} + +// ManagerStats holds per-day manager runtime stats. +// Has Manager as parent entity. Keyed by Date. +type ManagerStats struct { + Date int // YYYYMMDD + MaxCorpus int64 + MaxCover int64 + TotalFuzzingTime time.Duration + TotalCrashes int64 + TotalExecs int64 +} + type Build struct { Namespace string Manager string @@ -93,7 +113,7 @@ type ReportingStateEntry struct { Name string // Current reporting quota consumption. Sent int - Date int + Date int // YYYYMMDD } // Job represent a single patch testing job for syz-ci. @@ -163,6 +183,65 @@ const ( BuildJob ) +// updateManager does transactional compare-and-swap on the manager and its current stats. +func updateManager(c context.Context, ns, name string, fn func(mgr *Manager, stats *ManagerStats)) error { + date := timeDate(timeNow(c)) + tx := func(c context.Context) error { + mgr := new(Manager) + mgrKey := datastore.NewKey(c, "Manager", fmt.Sprintf("%v-%v", ns, name), 0, nil) + if err := datastore.Get(c, mgrKey, mgr); err != nil { + if err != datastore.ErrNoSuchEntity { + return fmt.Errorf("failed to get manager %v/%v: %v", ns, name, err) + } + mgr = &Manager{ + Namespace: ns, + Name: name, + } + } + stats := new(ManagerStats) + statsKey := datastore.NewKey(c, "ManagerStats", "", int64(date), mgrKey) + if err := datastore.Get(c, statsKey, stats); err != nil { + if err != datastore.ErrNoSuchEntity { + return fmt.Errorf("failed to get stats %v/%v/%v: %v", ns, name, date, err) + } + stats = &ManagerStats{ + Date: date, + } + } + + fn(mgr, stats) + + if _, err := datastore.Put(c, mgrKey, mgr); err != nil { + return fmt.Errorf("failed to put manager: %v", err) + } + if _, err := datastore.Put(c, statsKey, stats); err != nil { + return fmt.Errorf("failed to put manager stats: %v", err) + } + return nil + } + return datastore.RunInTransaction(c, tx, &datastore.TransactionOptions{Attempts: 10}) +} + +func loadAllManagers(c context.Context) ([]*Manager, []*datastore.Key, error) { + var managers []*Manager + keys, err := datastore.NewQuery("Manager"). + GetAll(c, &managers) + if err != nil { + return nil, nil, fmt.Errorf("failed to query managers: %v", err) + } + var result []*Manager + var resultKeys []*datastore.Key + + for i, mgr := range managers { + if _, ok := config.Namespaces[mgr.Namespace].DecommissionedManagers[mgr.Name]; ok { + continue + } + result = append(result, mgr) + resultKeys = append(resultKeys, keys[i]) + } + return result, resultKeys, nil +} + func buildKey(c context.Context, ns, id string) *datastore.Key { if ns == "" { panic("requesting build key outside of namespace") @@ -237,3 +316,9 @@ func textLink(tag string, id int64) string { } return fmt.Sprintf("/text?tag=%v&id=%v", tag, id) } + +// timeDate returns t's date as a single int YYYYMMDD. +func timeDate(t time.Time) int { + year, month, day := t.Date() + return year*10000 + int(month)*100 + day +} diff --git a/dashboard/app/handler.go b/dashboard/app/handler.go index 431261ff2..21fe67e36 100644 --- a/dashboard/app/handler.go +++ b/dashboard/app/handler.go @@ -68,6 +68,26 @@ func formatTime(t time.Time) string { return t.Format("Jan 02 15:04") } +func formatClock(t time.Time) string { + if t.IsZero() { + return "" + } + return t.Format("15:04") +} + +func formatDuration(d time.Duration) string { + if d == 0 { + return "" + } + days := int(d / (24 * time.Hour)) + hours := int(d / time.Hour % 24) + mins := int(d / time.Minute % 60) + if days != 0 { + return fmt.Sprintf("%vd%vh", days, hours) + } + return fmt.Sprintf("%vh%vm", hours, mins) +} + func formatReproLevel(l dashapi.ReproLevel) string { switch l { case ReproLevelSyz: @@ -84,6 +104,8 @@ var ( templateFuncs = template.FuncMap{ "formatTime": formatTime, + "formatClock": formatClock, + "formatDuration": formatDuration, "formatReproLevel": formatReproLevel, } ) diff --git a/dashboard/app/main.go b/dashboard/app/main.go index d24b9b6ee..99c31d699 100644 --- a/dashboard/app/main.go +++ b/dashboard/app/main.go @@ -29,10 +29,35 @@ func init() { type uiMain struct { Header *uiHeader Log []byte + Managers []*uiManager Jobs []*uiJob BugGroups []*uiBugGroup } +type uiManager struct { + Namespace string + Name string + CurrentBuild *uiBuild + FailedBuildBugLink string + LastActive time.Time + LastActiveBad bool + CurrentUpTime time.Duration + MaxCorpus int64 + MaxCover int64 + TotalFuzzingTime time.Duration + TotalCrashes int64 + TotalExecs int64 +} + +type uiBuild struct { + Time time.Time + SyzkallerCommit string + KernelRepo string + KernelBranch string + KernelCommit string + KernelConfigLink string +} + type uiBugPage struct { Header *uiHeader Bug *uiBug @@ -46,7 +71,6 @@ type uiBugGroup struct { type uiBug struct { Namespace string - ID string Title string NumCrashes int64 FirstTime time.Time @@ -55,29 +79,27 @@ type uiBug struct { ReportingIndex int Status string Link string + ExternalLink string Commits string PatchedOn []string MissingOn []string } type uiCrash struct { - Manager string - Time time.Time - Maintainers string - LogLink string - ReportLink string - ReproSyzLink string - ReproCLink string - SyzkallerCommit string - KernelRepo string - KernelBranch string - KernelCommit string - KernelConfigLink string + Manager string + Time time.Time + Maintainers string + LogLink string + ReportLink string + ReproSyzLink string + ReproCLink string + *uiBuild } type uiJob struct { Created time.Time - Link string + BugLink string + ExternalLink string User string Reporting string Namespace string @@ -108,6 +130,10 @@ func handleMain(c context.Context, w http.ResponseWriter, r *http.Request) error if err != nil { return err } + managers, err := loadManagers(c) + if err != nil { + return err + } jobs, err := loadRecentJobs(c) if err != nil { return err @@ -119,6 +145,7 @@ func handleMain(c context.Context, w http.ResponseWriter, r *http.Request) error data := &uiMain{ Header: h, Log: errorLog, + Managers: managers, Jobs: jobs, BugGroups: groups, } @@ -227,9 +254,9 @@ func createUIBug(c context.Context, bug *Bug, state *ReportingState, managers [] if status == "" { status = "???" } + id := bugKeyHash(bug.Namespace, bug.Title, bug.Seq) uiBug := &uiBug{ Namespace: bug.Namespace, - ID: bugKeyHash(bug.Namespace, bug.Title, bug.Seq), Title: bug.displayTitle(), NumCrashes: bug.NumCrashes, FirstTime: bug.FirstTime, @@ -237,7 +264,8 @@ func createUIBug(c context.Context, bug *Bug, state *ReportingState, managers [] ReproLevel: bug.ReproLevel, ReportingIndex: reportingIdx, Status: status, - Link: link, + Link: bugLink(id), + ExternalLink: link, PatchedOn: bug.PatchedOn, } if len(bug.Commits) != 0 { @@ -278,24 +306,92 @@ func loadCrashesForBug(c context.Context, bug *Bug) ([]*uiCrash, error) { builds[crash.BuildID] = build } ui := &uiCrash{ - Manager: crash.Manager, - Time: crash.Time, - Maintainers: fmt.Sprintf("%q", crash.Maintainers), - LogLink: textLink("CrashLog", crash.Log), - ReportLink: textLink("CrashReport", crash.Report), - ReproSyzLink: textLink("ReproSyz", crash.ReproSyz), - ReproCLink: textLink("ReproC", crash.ReproC), - SyzkallerCommit: build.SyzkallerCommit, - KernelRepo: build.KernelRepo, - KernelBranch: build.KernelBranch, - KernelCommit: build.KernelCommit, - KernelConfigLink: textLink("KernelConfig", build.KernelConfig), + Manager: crash.Manager, + Time: crash.Time, + Maintainers: fmt.Sprintf("%q", crash.Maintainers), + LogLink: textLink("CrashLog", crash.Log), + ReportLink: textLink("CrashReport", crash.Report), + ReproSyzLink: textLink("ReproSyz", crash.ReproSyz), + ReproCLink: textLink("ReproC", crash.ReproC), + uiBuild: makeUIBuild(build), } results = append(results, ui) } return results, nil } +func makeUIBuild(build *Build) *uiBuild { + return &uiBuild{ + Time: build.Time, + SyzkallerCommit: build.SyzkallerCommit, + KernelRepo: build.KernelRepo, + KernelBranch: build.KernelBranch, + KernelCommit: build.KernelCommit, + KernelConfigLink: textLink("KernelConfig", build.KernelConfig), + } +} + +func loadManagers(c context.Context) ([]*uiManager, error) { + now := timeNow(c) + date := timeDate(now) + managers, managerKeys, err := loadAllManagers(c) + if err != nil { + return nil, err + } + var buildKeys []*datastore.Key + var statsKeys []*datastore.Key + for i, mgr := range managers { + if mgr.CurrentBuild != "" { + buildKeys = append(buildKeys, buildKey(c, mgr.Namespace, mgr.CurrentBuild)) + } + if timeDate(mgr.LastAlive) == date { + statsKeys = append(statsKeys, + datastore.NewKey(c, "ManagerStats", "", int64(date), managerKeys[i])) + } + } + builds := make([]*Build, len(buildKeys)) + if err := datastore.GetMulti(c, buildKeys, builds); err != nil { + return nil, err + } + uiBuilds := make(map[string]*uiBuild) + for _, build := range builds { + uiBuilds[build.Namespace+"|"+build.ID] = makeUIBuild(build) + } + stats := make([]*ManagerStats, len(statsKeys)) + if err := datastore.GetMulti(c, statsKeys, stats); err != nil { + return nil, err + } + var fullStats []*ManagerStats + for _, mgr := range managers { + if timeDate(mgr.LastAlive) != date { + fullStats = append(fullStats, &ManagerStats{}) + continue + } + fullStats = append(fullStats, stats[0]) + stats = stats[1:] + } + var results []*uiManager + for i, mgr := range managers { + stats := fullStats[i] + results = append(results, &uiManager{ + Namespace: mgr.Namespace, + Name: mgr.Name, + CurrentBuild: uiBuilds[mgr.Namespace+"|"+mgr.CurrentBuild], + FailedBuildBugLink: bugLink(mgr.FailedBuildBug), + LastActive: mgr.LastAlive, + LastActiveBad: now.Sub(mgr.LastAlive) > 12*time.Hour, + CurrentUpTime: mgr.CurrentUpTime, + MaxCorpus: stats.MaxCorpus, + MaxCover: stats.MaxCover, + TotalFuzzingTime: stats.TotalFuzzingTime, + TotalCrashes: stats.TotalCrashes, + TotalExecs: stats.TotalExecs, + }) + } + sort.Sort(uiManagerSorter(results)) + return results, nil +} + func loadRecentJobs(c context.Context) ([]*uiJob, error) { var jobs []*Job keys, err := datastore.NewQuery("Job"). @@ -309,13 +405,13 @@ func loadRecentJobs(c context.Context) ([]*uiJob, error) { for i, job := range jobs { ui := &uiJob{ Created: job.Created, - Link: job.Link, + BugLink: bugLink(keys[i].Parent().StringID()), + ExternalLink: job.Link, User: job.User, Reporting: job.Reporting, Namespace: job.Namespace, Manager: job.Manager, BugTitle: job.BugTitle, - BugID: keys[i].Parent().StringID(), KernelRepo: job.KernelRepo, KernelBranch: job.KernelBranch, PatchLink: textLink("Patch", job.Patch), @@ -376,6 +472,24 @@ func fetchErrorLogs(c context.Context) ([]byte, error) { return buf.Bytes(), nil } +func bugLink(id string) string { + if id == "" { + return "" + } + return "/bug?id=" + id +} + +type uiManagerSorter []*uiManager + +func (a uiManagerSorter) Len() int { return len(a) } +func (a uiManagerSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a uiManagerSorter) Less(i, j int) bool { + if a[i].Namespace != a[j].Namespace { + return a[i].Namespace < a[j].Namespace + } + return a[i].Name < a[j].Name +} + type uiBugSorter []*uiBug func (a uiBugSorter) Len() int { return len(a) } diff --git a/dashboard/app/main.html b/dashboard/app/main.html index ea1eec7c9..a5bad8efa 100644 --- a/dashboard/app/main.html +++ b/dashboard/app/main.html @@ -11,11 +11,11 @@ {{range $b := $.Bugs}} - {{$b.Title}} + {{$b.Title}} {{$b.NumCrashes}} {{formatReproLevel $b.ReproLevel}} {{formatTime $b.LastTime}} - {{if $b.Link}}{{$b.Status}}{{else}}{{$b.Status}}{{end}} + {{if $b.Link}}{{$b.Status}}{{else}}{{$b.Status}}{{end}} {{if $b.Commits}}{{len $b.PatchedOn}}/{{len $b.MissingOn}}{{end}} {{end}} @@ -39,6 +39,49 @@

+ + + + + + + + + + + + + + + {{range $mgr := $.Managers}} + + + {{if $mgr.LastActiveBad}} + + {{else}} + + {{end}} + {{if $mgr.CurrentBuild}} + + {{else}} + + {{end}} + {{if $mgr.FailedBuildBugLink}} + + {{else}} + + {{end}} + + + + + + + + {{end}} +
Managers:
NameLast ActiveCurrent BuildFailed BuildToday: UptimeFuzzing TimeCorpusCoverageCrashesExecs
{{$mgr.Namespace}}/{{$mgr.Name}}{{formatTime $mgr.LastActive}}{{formatClock $mgr.LastActive}}{{formatTime $mgr.CurrentBuild.Time}}failed{{formatDuration $mgr.CurrentUpTime}}{{formatDuration $mgr.TotalFuzzingTime}}{{$mgr.MaxCorpus}}{{$mgr.MaxCover}}{{$mgr.TotalCrashes}}{{$mgr.TotalExecs}}
+

+ @@ -59,7 +102,7 @@ - + diff --git a/dashboard/app/reporting.go b/dashboard/app/reporting.go index 9180c14ad..ad2ad0d1f 100644 --- a/dashboard/app/reporting.go +++ b/dashboard/app/reporting.go @@ -577,8 +577,7 @@ func (state *ReportingState) getEntry(now time.Time, namespace, name string) *Re panic(fmt.Sprintf("requesting reporting state for %v/%v", namespace, name)) } // Convert time to date of the form 20170125. - year, month, day := now.Date() - date := year*10000 + int(month)*100 + day + date := timeDate(now) for i := range state.Entries { ent := &state.Entries[i] if ent.Namespace == namespace && ent.Name == name { diff --git a/dashboard/dashapi/dashapi.go b/dashboard/dashapi/dashapi.go index 8ebea29ac..4ba797f32 100644 --- a/dashboard/dashapi/dashapi.go +++ b/dashboard/dashapi/dashapi.go @@ -16,6 +16,7 @@ import ( "net/url" "reflect" "strings" + "time" ) type Dashboard struct { @@ -249,6 +250,23 @@ type PollResponse struct { Reports []*BugReport } +type ManagerStatsReq struct { + Name string + // Current level: + UpTime time.Duration + Corpus uint64 + Cover uint64 + + // Delta since last sync: + FuzzingTime time.Duration + Crashes uint64 + Execs uint64 +} + +func (dash *Dashboard) UploadManagerStats(req *ManagerStatsReq) error { + return dash.query("manager_stats", req, nil) +} + type ( BugStatus int ReproLevel int -- cgit mrf-deployment
Recent jobs:
{{formatTime $job.Started}}{{if gt $job.Attempts 1}} ({{$job.Attempts}}){{end}} {{formatTime $job.Finished}} {{$job.User}}{{$job.BugTitle}}{{$job.BugTitle}} patch {{$job.Namespace}}/{{$job.Reporting}} {{$job.Manager}}