diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2024-03-29 15:02:10 +0100 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2024-04-09 07:55:50 +0000 |
| commit | 1be1a06281dccada078a2a51e8b483811af8f596 (patch) | |
| tree | 6340df1c2d1704f1784ba63164d3088b7c91ef61 /syz-manager | |
| parent | 73f4b622a34ffc998a542f5e109fb05a1d892272 (diff) | |
all: refactor stats
Add ability for each package to create and export own stats.
Each stat is self-contained, describes how it should be presented,
and there is not need to copy them from one package to another.
Stats also keep historical data and allow building graphs over time.
Diffstat (limited to 'syz-manager')
| -rw-r--r-- | syz-manager/http.go | 137 | ||||
| -rw-r--r-- | syz-manager/hub.go | 33 | ||||
| -rw-r--r-- | syz-manager/manager.go | 132 | ||||
| -rw-r--r-- | syz-manager/rpc.go | 39 | ||||
| -rw-r--r-- | syz-manager/stats.go | 188 |
5 files changed, 201 insertions, 328 deletions
diff --git a/syz-manager/http.go b/syz-manager/http.go index 22af491fd..b14c97fe9 100644 --- a/syz-manager/http.go +++ b/syz-manager/http.go @@ -24,6 +24,7 @@ import ( "github.com/google/syzkaller/pkg/html/pages" "github.com/google/syzkaller/pkg/log" "github.com/google/syzkaller/pkg/osutil" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/pkg/vcs" "github.com/google/syzkaller/prog" "github.com/gorilla/handlers" @@ -38,6 +39,7 @@ func (mgr *Manager) initHTTP() { handle("/", mgr.httpSummary) handle("/config", mgr.httpConfig) handle("/expert_mode", mgr.httpExpertMode) + handle("/stats", mgr.httpStats) handle("/metrics", promhttp.HandlerFor(prometheus.DefaultGatherer, promhttp.HandlerOpts{}).ServeHTTP) handle("/syscalls", mgr.httpSyscalls) handle("/corpus", mgr.httpCorpus) @@ -71,10 +73,24 @@ func (mgr *Manager) initHTTP() { func (mgr *Manager) httpSummary(w http.ResponseWriter, r *http.Request) { data := &UISummaryData{ - Name: mgr.cfg.Name, - Expert: mgr.expertMode, - Log: log.CachedLogOutput(), - Stats: mgr.collectStats(), + Name: mgr.cfg.Name, + Revision: prog.GitRevisionBase[:8], + RevisionLink: vcs.LogLink(vcs.SyzkallerRepo, prog.GitRevisionBase), + Expert: mgr.expertMode, + Log: log.CachedLogOutput(), + } + + level := stats.Simple + if mgr.expertMode { + level = stats.All + } + for _, stat := range stats.Collect(level) { + data.Stats = append(data.Stats, UIStat{ + Name: stat.Name, + Value: stat.Value, + Hint: stat.Desc, + Link: stat.Link, + }) } var err error @@ -122,89 +138,19 @@ func (mgr *Manager) httpSyscalls(w http.ResponseWriter, r *http.Request) { executeTemplate(w, syscallsTemplate, data) } -func (mgr *Manager) collectStats() []UIStat { - mgr.mu.Lock() - defer mgr.mu.Unlock() - - configName := mgr.cfg.Name - if configName == "" { - configName = "config" - } - secs := uint64(1) - if !mgr.firstConnect.IsZero() { - secs = uint64(time.Since(mgr.firstConnect).Seconds()) + 1 - } - rawStats := mgr.stats.all() - head := prog.GitRevisionBase - stats := []UIStat{ - {Name: "revision", Value: fmt.Sprint(head[:8]), Link: vcs.LogLink(vcs.SyzkallerRepo, head)}, - {Name: "config", Value: configName, Link: "/config"}, - {Name: "uptime", Value: fmt.Sprint(time.Since(mgr.startTime) / 1e9 * 1e9)}, - {Name: "fuzzing time", Value: fmt.Sprint(mgr.fuzzingTime / 60e9 * 60e9)}, - {Name: "corpus", Value: fmt.Sprint(mgr.corpus.Stats().Progs), Link: "/corpus"}, - {Name: "triage queue", Value: fmt.Sprint(mgr.stats.triageQueueLen.get())}, - {Name: "crashes", Value: rateStat(rawStats["crashes"], secs)}, - {Name: "crash types", Value: rateStat(rawStats["crash types"], secs)}, - {Name: "suppressed", Value: rateStat(rawStats["suppressed"], secs)}, - {Name: "signal", Value: fmt.Sprint(rawStats["signal"])}, - {Name: "coverage", Value: fmt.Sprint(rawStats["coverage"]), Link: "/cover"}, - {Name: "exec total", Value: rateStat(rawStats["exec total"], secs)}, - } - if mgr.coverFilter != nil { - stats = append(stats, UIStat{ - Name: "filtered coverage", - Value: fmt.Sprintf("%v / %v (%v%%)", - rawStats["filtered coverage"], len(mgr.coverFilter), - rawStats["filtered coverage"]*100/uint64(len(mgr.coverFilter))), - Link: "/cover?filter=yes", - }) - } else { - delete(rawStats, "filtered coverage") - } - if mgr.checkResult != nil { - stats = append(stats, UIStat{ - Name: "syscalls", - Value: fmt.Sprint(len(mgr.checkResult.EnabledCalls[mgr.cfg.Sandbox])), - Link: "/syscalls", - }) - } - for _, stat := range stats { - delete(rawStats, stat.Name) - } - if mgr.expertMode { - var intStats []UIStat - for k, v := range rawStats { - val := "" - switch { - case k == "fuzzer jobs" || strings.HasPrefix(k, "rpc exchange"): - val = fmt.Sprint(v) - default: - val = rateStat(v, secs) - } - intStats = append(intStats, UIStat{Name: k, Value: val}) - } - sort.Slice(intStats, func(i, j int) bool { - return intStats[i].Name < intStats[j].Name - }) - stats = append(stats, intStats...) - } - return stats -} - -func rateStat(v, secs uint64) string { - if x := v / secs; x >= 10 { - return fmt.Sprintf("%v (%v/sec)", v, x) - } - if x := v * 60 / secs; x >= 10 { - return fmt.Sprintf("%v (%v/min)", v, x) +func (mgr *Manager) httpStats(w http.ResponseWriter, r *http.Request) { + data, err := stats.RenderHTML() + if err != nil { + log.Logf(0, "failed to execute template: %v", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return } - x := v * 60 * 60 / secs - return fmt.Sprintf("%v (%v/hour)", v, x) + w.Write(data) } func (mgr *Manager) httpCrash(w http.ResponseWriter, r *http.Request) { crashID := r.FormValue("id") - crash := readCrash(mgr.cfg.Workdir, crashID, nil, mgr.startTime, true) + crash := readCrash(mgr.cfg.Workdir, crashID, nil, mgr.firstConnect.Load(), true) if crash == nil { http.Error(w, "failed to read crash info", http.StatusInternalServerError) return @@ -621,7 +567,7 @@ func (mgr *Manager) collectCrashes(workdir string) ([]*UICrashType, error) { } var crashTypes []*UICrashType for _, dir := range dirs { - crash := readCrash(workdir, dir, repros, mgr.startTime, false) + crash := readCrash(workdir, dir, repros, mgr.firstConnect.Load(), false) if crash != nil { crashTypes = append(crashTypes, crash) } @@ -632,7 +578,7 @@ func (mgr *Manager) collectCrashes(workdir string) ([]*UICrashType, error) { return crashTypes, nil } -func readCrash(workdir, dir string, repros map[string]bool, start time.Time, full bool) *UICrashType { +func readCrash(workdir, dir string, repros map[string]bool, start int64, full bool) *UICrashType { if len(dir) != 40 { return nil } @@ -691,7 +637,7 @@ func readCrash(workdir, dir string, repros map[string]bool, start time.Time, ful crash.Log = filepath.Join("crashes", dir, "log"+index) if stat, err := os.Stat(filepath.Join(workdir, crash.Log)); err == nil { crash.Time = stat.ModTime() - crash.Active = crash.Time.After(start) + crash.Active = start != 0 && crash.Time.Unix() >= start } tag, _ := os.ReadFile(filepath.Join(crashdir, dir, "tag"+index)) crash.Tag = string(tag) @@ -709,7 +655,7 @@ func readCrash(workdir, dir string, repros map[string]bool, start time.Time, ful return &UICrashType{ Description: desc, LastTime: modTime, - Active: modTime.After(start), + Active: start != 0 && modTime.Unix() >= start, ID: dir, Count: len(crashes), Triaged: triaged, @@ -751,11 +697,13 @@ func trimNewLines(data []byte) []byte { } type UISummaryData struct { - Name string - Expert bool - Stats []UIStat - Crashes []*UICrashType - Log string + Name string + Revision string + RevisionLink string + Expert bool + Stats []UIStat + Crashes []*UICrashType + Log string } type UISyscallsData struct { @@ -786,6 +734,7 @@ type UICrash struct { type UIStat struct { Name string Value string + Hint string Link string } @@ -817,14 +766,16 @@ var summaryTemplate = pages.Create(` </head> <body> <b>{{.Name }} syzkaller</b> +<a href='/config'>[config]</a> +<a href='{{.RevisionLink}}'>{{.Revision}}</a> <a class="navigation_tab" href='expert_mode'>{{if .Expert}}disable{{else}}enable{{end}} expert mode</a> <br> <table class="list_table"> - <caption>Stats:</caption> + <caption><a href='/stats'>Stats 📈</a></caption> {{range $s := $.Stats}} <tr> - <td class="stat_name">{{$s.Name}}</td> + <td class="stat_name" title="{{$s.Hint}}">{{$s.Name}}</td> <td class="stat_value"> {{if $s.Link}} <a href="{{$s.Link}}">{{$s.Value}}</a> diff --git a/syz-manager/hub.go b/syz-manager/hub.go index 04904664d..405ab3075 100644 --- a/syz-manager/hub.go +++ b/syz-manager/hub.go @@ -17,6 +17,7 @@ import ( "github.com/google/syzkaller/pkg/report" "github.com/google/syzkaller/pkg/report/crash" "github.com/google/syzkaller/pkg/rpctype" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/prog" ) @@ -41,13 +42,20 @@ func (mgr *Manager) hubSyncLoop(keyGet keyGetter) { mgr: mgr, cfg: mgr.cfg, target: mgr.target, - stats: mgr.stats, domain: mgr.cfg.TargetOS + "/" + mgr.cfg.HubDomain, enabledCalls: mgr.targetEnabledSyscalls, leak: mgr.checkResult.Features[host.FeatureLeak].Enabled, fresh: mgr.fresh, hubReproQueue: mgr.externalReproQueue, keyGet: keyGet, + + statSendProgAdd: stats.Create("hub send prog add", "", stats.Graph("hub progs")), + statSendProgDel: stats.Create("hub send prog del", "", stats.Graph("hub progs")), + statRecvProg: stats.Create("hub recv prog", "", stats.Graph("hub progs")), + statRecvProgDrop: stats.Create("hub recv prog drop", "", stats.NoGraph), + statSendRepro: stats.Create("hub send repro", "", stats.Graph("hub repros")), + statRecvRepro: stats.Create("hub recv repro", "", stats.Graph("hub repros")), + statRecvReproDrop: stats.Create("hub recv repro drop", "", stats.NoGraph), } if mgr.cfg.Reproduce && mgr.dash != nil { hc.needMoreRepros = mgr.needMoreRepros @@ -59,7 +67,6 @@ type HubConnector struct { mgr HubManagerView cfg *mgrconfig.Config target *prog.Target - stats *Stats domain string enabledCalls map[*prog.Syscall]bool leak bool @@ -69,6 +76,14 @@ type HubConnector struct { hubReproQueue chan *Crash needMoreRepros chan chan bool keyGet keyGetter + + statSendProgAdd *stats.Val + statSendProgDel *stats.Val + statRecvProg *stats.Val + statRecvProgDrop *stats.Val + statSendRepro *stats.Val + statRecvRepro *stats.Val + statRecvReproDrop *stats.Val } // HubManagerView restricts interface between HubConnector and Manager. @@ -196,13 +211,13 @@ func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus [][]byte) error { } minimized, smashed, progDropped := hc.processProgs(r.Inputs) reproDropped := hc.processRepros(r.Repros) - hc.stats.hubSendProgAdd.add(len(a.Add)) - hc.stats.hubSendProgDel.add(len(a.Del)) - hc.stats.hubSendRepro.add(len(a.Repros)) - hc.stats.hubRecvProg.add(len(r.Inputs) - progDropped) - hc.stats.hubRecvProgDrop.add(progDropped) - hc.stats.hubRecvRepro.add(len(r.Repros) - reproDropped) - hc.stats.hubRecvReproDrop.add(reproDropped) + hc.statSendProgAdd.Add(len(a.Add)) + hc.statSendProgDel.Add(len(a.Del)) + hc.statSendRepro.Add(len(a.Repros)) + hc.statRecvProg.Add(len(r.Inputs) - progDropped) + hc.statRecvProgDrop.Add(progDropped) + hc.statRecvRepro.Add(len(r.Repros) - reproDropped) + hc.statRecvReproDrop.Add(reproDropped) log.Logf(0, "hub sync: send: add %v, del %v, repros %v;"+ " recv: progs %v (min %v, smash %v), repros %v; more %v", len(a.Add), len(a.Del), len(a.Repros), diff --git a/syz-manager/manager.go b/syz-manager/manager.go index e7519b707..57e656034 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -36,6 +36,7 @@ import ( crash_pkg "github.com/google/syzkaller/pkg/report/crash" "github.com/google/syzkaller/pkg/repro" "github.com/google/syzkaller/pkg/rpctype" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/prog" "github.com/google/syzkaller/sys/targets" "github.com/google/syzkaller/vm" @@ -59,18 +60,13 @@ type Manager struct { corpusDB *db.DB corpusDBMu sync.Mutex // for concurrent operations on corpusDB corpusPreloaded chan bool - startTime time.Time - firstConnect time.Time - fuzzingTime time.Duration - stats *Stats + firstConnect atomic.Int64 // unix time, or 0 if not connected crashTypes map[string]bool vmStop chan bool checkResult *rpctype.CheckArgs fresh bool netCompression bool expertMode bool - numFuzzing uint32 - numReproducing uint32 nextInstanceID atomic.Uint64 dash *dashapi.Dashboard @@ -103,6 +99,8 @@ type Manager struct { afterTriageStatSent bool assetStorage *asset.Storage + + Stats } const ( @@ -178,8 +176,6 @@ func RunManager(cfg *mgrconfig.Config) { sysTarget: cfg.SysTarget, reporter: reporter, crashdir: crashdir, - startTime: time.Now(), - stats: &Stats{haveHub: cfg.HubClient != ""}, crashTypes: make(map[string]bool), disabledHashes: make(map[string]struct{}), memoryLeakFrames: make(map[string]bool), @@ -194,9 +190,9 @@ func RunManager(cfg *mgrconfig.Config) { saturatedCalls: make(map[string]bool), } + mgr.initStats() go mgr.preloadCorpus() - mgr.initStats() // Initializes prometheus variables. - mgr.initHTTP() // Creates HTTP server. + mgr.initHTTP() // Creates HTTP server. mgr.collectUsedFiles() go mgr.corpusInputHandler(corpusUpdates) @@ -224,26 +220,17 @@ func RunManager(cfg *mgrconfig.Config) { for lastTime := time.Now(); ; { time.Sleep(10 * time.Second) now := time.Now() - diff := now.Sub(lastTime) + diff := int(now.Sub(lastTime)) lastTime = now - mgr.mu.Lock() - if mgr.firstConnect.IsZero() { - mgr.mu.Unlock() + if mgr.firstConnect.Load() == 0 { continue } - mgr.fuzzingTime += diff * time.Duration(atomic.LoadUint32(&mgr.numFuzzing)) - mgr.mu.Unlock() - executed := mgr.stats.execTotal.get() - crashes := mgr.stats.crashes.get() - corpusCover := mgr.stats.corpusCover.get() - corpusSignal := mgr.stats.corpusSignal.get() - maxSignal := mgr.stats.maxSignal.get() - triageQLen := mgr.stats.triageQueueLen.get() - numReproducing := atomic.LoadUint32(&mgr.numReproducing) - numFuzzing := atomic.LoadUint32(&mgr.numFuzzing) - - log.Logf(0, "VMs %v, executed %v, cover %v, signal %v/%v, crashes %v, repro %v, triageQLen %v", - numFuzzing, executed, corpusCover, corpusSignal, maxSignal, crashes, numReproducing, triageQLen) + mgr.statFuzzingTime.Add(diff * mgr.statNumFuzzing.Val()) + buf := new(bytes.Buffer) + for _, stat := range stats.Collect(stats.Console) { + fmt.Fprintf(buf, "%v=%v ", stat.Name, stat.Value) + } + log.Logf(0, "%s", buf.String()) } }() @@ -275,20 +262,10 @@ func (mgr *Manager) initBench() { go func() { for { time.Sleep(time.Minute) - vals := mgr.stats.all() - mgr.mu.Lock() - if mgr.firstConnect.IsZero() { - mgr.mu.Unlock() - continue + vals := make(map[string]int) + for _, stat := range stats.Collect(stats.All) { + vals[stat.Name] = stat.V } - mgr.minimizeCorpusUnlocked() - stat := mgr.corpus.Stats() - vals["corpus"] = uint64(stat.Progs) - vals["uptime"] = uint64(time.Since(mgr.firstConnect)) / 1e9 - vals["fuzzing"] = uint64(mgr.fuzzingTime) / 1e9 - vals["candidates"] = uint64(mgr.fuzzer.Load().Stats().Candidates) - mgr.mu.Unlock() - data, err := json.MarshalIndent(vals, "", " ") if err != nil { log.Fatalf("failed to serialize bench data") @@ -361,7 +338,7 @@ func (mgr *Manager) vmLoop() { canRepro := func() bool { return phase >= phaseTriagedHub && len(reproQueue) != 0 && - (int(atomic.LoadUint32(&mgr.numReproducing))+1)*instancesPerRepro <= maxReproVMs + (mgr.statNumReproducing.Val()+1)*instancesPerRepro <= maxReproVMs } if shutdown != nil { @@ -374,7 +351,7 @@ func (mgr *Manager) vmLoop() { crash := reproQueue[last] reproQueue[last] = nil reproQueue = reproQueue[:last] - atomic.AddUint32(&mgr.numReproducing, 1) + mgr.statNumReproducing.Add(1) log.Logf(0, "loop: starting repro of '%v' on instances %+v", crash.Title, vmIndexes) go func() { reproDone <- mgr.runRepro(crash, vmIndexes, instances.Put) @@ -422,7 +399,7 @@ func (mgr *Manager) vmLoop() { } } case res := <-reproDone: - atomic.AddUint32(&mgr.numReproducing, ^uint32(0)) + mgr.statNumReproducing.Add(-1) crepro := false title := "" if res.repro != nil { @@ -820,8 +797,8 @@ func (mgr *Manager) runInstanceInner(index int, instanceName string) (*report.Re // Run the fuzzer binary. start := time.Now() - atomic.AddUint32(&mgr.numFuzzing, 1) - defer atomic.AddUint32(&mgr.numFuzzing, ^uint32(0)) + mgr.statNumFuzzing.Add(1) + defer mgr.statNumFuzzing.Add(-1) args := &instance.FuzzerCmdArgs{ Fuzzer: fuzzerBin, @@ -909,14 +886,14 @@ func (mgr *Manager) saveCrash(crash *Crash) bool { // Collect all of them into a single bucket so that it's possible to control and assess them, // e.g. if there are some spikes in suppressed reports. crash.Title = "suppressed report" - mgr.stats.crashSuppressed.inc() + mgr.statSuppressed.Add(1) } - mgr.stats.crashes.inc() + mgr.statCrashes.Add(1) mgr.mu.Lock() if !mgr.crashTypes[crash.Title] { mgr.crashTypes[crash.Title] = true - mgr.stats.crashTypes.inc() + mgr.statCrashTypes.Add(1) } mgr.mu.Unlock() @@ -1223,7 +1200,6 @@ func fullReproLog(stats *repro.Stats) []byte { func (mgr *Manager) corpusInputHandler(updates <-chan corpus.NewItemEvent) { for update := range updates { - mgr.stats.newInputs.inc() mgr.serv.updateFilteredCover(update.NewCover) if update.Exists { @@ -1268,12 +1244,12 @@ func (mgr *Manager) addNewCandidates(candidates []fuzzer.Candidate) { } func (mgr *Manager) minimizeCorpusUnlocked() { - currSize := mgr.corpus.Stats().Progs + currSize := mgr.corpus.StatProgs.Val() if mgr.phase < phaseLoadedCorpus || currSize <= mgr.lastMinCorpus*103/100 { return } mgr.corpus.Minimize(mgr.cfg.Cover) - newSize := mgr.corpus.Stats().Progs + newSize := mgr.corpus.StatProgs.Val() log.Logf(1, "minimized corpus: %v -> %v", currSize, newSize) mgr.lastMinCorpus = newSize @@ -1390,7 +1366,10 @@ func (mgr *Manager) machineChecked(a *rpctype.CheckArgs, enabledSyscalls map[*pr mgr.checkResult = a mgr.targetEnabledSyscalls = enabledSyscalls mgr.target.UpdateGlobs(a.GlobFiles) - mgr.firstConnect = time.Now() + mgr.firstConnect.Store(time.Now().Unix()) + statSyscalls := stats.Create("syscalls", "Number of enabled syscalls", + stats.Simple, stats.NoGraph, stats.Link("/syscalls")) + statSyscalls.Add(len(mgr.checkResult.EnabledCalls[mgr.cfg.Sandbox])) rnd := rand.New(rand.NewSource(time.Now().UnixNano())) calls := make(map[*prog.Syscall]bool) @@ -1439,7 +1418,7 @@ func (mgr *Manager) fuzzerSignalRotation(fuzzer *fuzzer.Fuzzer) { // 3000/60000 = 5%. execsBetweenRotates = 60000 ) - var lastExecTotal uint64 + lastExecTotal := 0 lastRotation := time.Now() for { time.Sleep(time.Minute * 5) @@ -1449,7 +1428,7 @@ func (mgr *Manager) fuzzerSignalRotation(fuzzer *fuzzer.Fuzzer) { if phase < phaseTriagedCorpus { continue } - if mgr.stats.execTotal.get()-lastExecTotal < execsBetweenRotates { + if mgr.statExecs.Val()-lastExecTotal < execsBetweenRotates { continue } if time.Since(lastRotation) < timeBetweenRotates { @@ -1457,7 +1436,7 @@ func (mgr *Manager) fuzzerSignalRotation(fuzzer *fuzzer.Fuzzer) { } fuzzer.RotateMaxSignal(rotateSignals) lastRotation = time.Now() - lastExecTotal = mgr.stats.execTotal.get() + lastExecTotal = mgr.statExecs.Val() } } @@ -1471,18 +1450,8 @@ func (mgr *Manager) fuzzerLoop(fuzzer *fuzzer.Fuzzer) { len(newSignal), len(dropSignal)) mgr.serv.distributeSignalDelta(newSignal, dropSignal) - // Collect statistics. - fuzzerStats := fuzzer.Stats() - mgr.stats.setNamed(fuzzerStats.Named) - mgr.stats.corpusCover.set(fuzzerStats.Cover) - mgr.stats.corpusSignal.set(fuzzerStats.Signal) - mgr.stats.maxSignal.set(fuzzerStats.MaxSignal) - mgr.stats.triageQueueLen.set(fuzzerStats.Candidates) - mgr.stats.fuzzerJobs.set(fuzzerStats.RunningJobs) - mgr.stats.rpcTraffic.add(int(mgr.serv.server.TotalBytes.Swap(0))) - // Update the state machine. - if fuzzerStats.Candidates == 0 { + if fuzzer.StatCandidates.Val() == 0 { mgr.mu.Lock() if mgr.phase == phaseLoadedCorpus { if mgr.cfg.HubClient != "" { @@ -1562,32 +1531,27 @@ func (mgr *Manager) dashboardReporter() { var lastCrashes, lastSuppressedCrashes, lastExecs uint64 for { time.Sleep(time.Minute) - mgr.mu.Lock() - if mgr.firstConnect.IsZero() { - mgr.mu.Unlock() + if mgr.firstConnect.Load() == 0 { continue } - crashes := mgr.stats.crashes.get() - suppressedCrashes := mgr.stats.crashSuppressed.get() - execs := mgr.stats.execTotal.get() - corpusStat := mgr.corpus.Stats() + mgr.mu.Lock() req := &dashapi.ManagerStatsReq{ Name: mgr.cfg.Name, Addr: webAddr, - UpTime: time.Since(mgr.firstConnect), - Corpus: uint64(corpusStat.Progs), - PCs: mgr.stats.corpusCover.get(), - Cover: mgr.stats.corpusSignal.get(), - CrashTypes: mgr.stats.crashTypes.get(), - FuzzingTime: mgr.fuzzingTime - lastFuzzingTime, - Crashes: crashes - lastCrashes, - SuppressedCrashes: suppressedCrashes - lastSuppressedCrashes, - Execs: execs - lastExecs, + UpTime: time.Duration(mgr.statUptime.Val()) * time.Second, + Corpus: uint64(mgr.corpus.StatProgs.Val()), + PCs: uint64(mgr.corpus.StatCover.Val()), + Cover: uint64(mgr.corpus.StatSignal.Val()), + CrashTypes: uint64(mgr.statCrashTypes.Val()), + FuzzingTime: time.Duration(mgr.statFuzzingTime.Val()) - lastFuzzingTime, + Crashes: uint64(mgr.statCrashes.Val()) - lastCrashes, + SuppressedCrashes: uint64(mgr.statSuppressed.Val()) - lastSuppressedCrashes, + Execs: uint64(mgr.statExecs.Val()) - lastExecs, } if mgr.phase >= phaseTriagedCorpus && !mgr.afterTriageStatSent { mgr.afterTriageStatSent = true - req.TriagedCoverage = mgr.stats.corpusSignal.get() - req.TriagedPCs = mgr.stats.corpusCover.get() + req.TriagedCoverage = uint64(mgr.corpus.StatSignal.Val()) + req.TriagedPCs = uint64(mgr.corpus.StatCover.Val()) } mgr.mu.Unlock() diff --git a/syz-manager/rpc.go b/syz-manager/rpc.go index 185b44d0f..c9fa4d28d 100644 --- a/syz-manager/rpc.go +++ b/syz-manager/rpc.go @@ -17,6 +17,7 @@ import ( "github.com/google/syzkaller/pkg/mgrconfig" "github.com/google/syzkaller/pkg/rpctype" "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/prog" ) @@ -28,7 +29,6 @@ type RPCServer struct { port int targetEnabledSyscalls map[*prog.Syscall]bool coverFilter map[uint32]uint32 - stats *Stats canonicalModules *cover.Canonicalizer mu sync.Mutex @@ -40,6 +40,13 @@ type RPCServer struct { // We did not finish these requests because of VM restarts. // They will be eventually given to other VMs. rescuedInputs []*fuzzer.Request + + statVMRestarts *stats.Val + statExchangeCalls *stats.Val + statExchangeProgs *stats.Val + statExchangeServerLatency *stats.Val + statExchangeClientLatency *stats.Val + statCorpusCoverFiltered *stats.Val } type Runner struct { @@ -70,9 +77,19 @@ type RPCManagerView interface { func startRPCServer(mgr *Manager) (*RPCServer, error) { serv := &RPCServer{ - mgr: mgr, - cfg: mgr.cfg, - stats: mgr.stats, + mgr: mgr, + cfg: mgr.cfg, + statVMRestarts: stats.Create("vm restarts", "Total number of VM starts", + stats.Rate{}, stats.NoGraph), + statExchangeCalls: stats.Create("exchange calls", "Number of RPC Exchange calls", + stats.Rate{}), + statExchangeProgs: stats.Create("exchange progs", "Test programs exchanged per RPC call", + stats.Distribution{}), + statExchangeServerLatency: stats.Create("exchange manager latency", + "Manager RPC Exchange call latency (us)", stats.Distribution{}), + statExchangeClientLatency: stats.Create("exchange fuzzer latency", + "End-to-end fuzzer RPC Exchange call latency (us)", stats.Distribution{}), + statCorpusCoverFiltered: stats.Create("filtered coverage", "", stats.NoGraph), } s, err := rpctype.NewRPCServer(mgr.cfg.RPC, "Manager", serv, mgr.netCompression) if err != nil { @@ -87,7 +104,7 @@ func startRPCServer(mgr *Manager) (*RPCServer, error) { func (serv *RPCServer) Connect(a *rpctype.ConnectArgs, r *rpctype.ConnectRes) error { log.Logf(1, "fuzzer %v connected", a.Name) - serv.stats.vmRestarts.inc() + serv.statVMRestarts.Add(1) serv.mu.Lock() if serv.canonicalModules == nil { @@ -218,7 +235,7 @@ func (serv *RPCServer) ExchangeInfo(a *rpctype.ExchangeInfoRequest, r *rpctype.E runner.doneRequest(result, fuzzer) } - serv.stats.mergeNamed(a.StatsDelta) + stats.Import(a.StatsDelta) runner.mu.Lock() // Let's transfer new max signal in portions. @@ -234,10 +251,10 @@ func (serv *RPCServer) ExchangeInfo(a *rpctype.ExchangeInfoRequest, r *rpctype.E log.Logf(2, "exchange with %s: %d done, %d new requests, %d new max signal, %d drop signal", a.Name, len(a.Results), len(r.Requests), len(r.NewMaxSignal), len(r.DropMaxSignal)) - serv.stats.rpcExchangeCalls.inc() - serv.stats.rpcExchangeProgs.add(a.NeedProgs) - serv.stats.rpcExchangeClientLatency.add(int(a.Latency)) - serv.stats.rpcExchangeServerLatency.add(int(time.Since(start).Nanoseconds())) + serv.statExchangeCalls.Add(1) + serv.statExchangeProgs.Add(a.NeedProgs) + serv.statExchangeClientLatency.Add(int(a.Latency.Microseconds())) + serv.statExchangeServerLatency.Add(int(time.Since(start).Microseconds())) return nil } @@ -256,7 +273,7 @@ func (serv *RPCServer) updateFilteredCover(pcs []uint32) error { filtered++ } } - serv.stats.corpusCoverFiltered.add(filtered) + serv.statCorpusCoverFiltered.Add(filtered) return nil } diff --git a/syz-manager/stats.go b/syz-manager/stats.go index 88db8ed17..7cb67d5ca 100644 --- a/syz-manager/stats.go +++ b/syz-manager/stats.go @@ -4,145 +4,71 @@ package main import ( - "sync" - "sync/atomic" + "fmt" + "runtime" + "time" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/google/syzkaller/pkg/stats" ) -type Stat uint64 - type Stats struct { - crashes Stat - crashTypes Stat - crashSuppressed Stat - vmRestarts Stat - newInputs Stat - execTotal Stat - rpcTraffic Stat - rpcExchangeCalls Stat - rpcExchangeProgs Stat - rpcExchangeServerLatency Stat - rpcExchangeClientLatency Stat - hubSendProgAdd Stat - hubSendProgDel Stat - hubSendRepro Stat - hubRecvProg Stat - hubRecvProgDrop Stat - hubRecvRepro Stat - hubRecvReproDrop Stat - corpusCover Stat - corpusCoverFiltered Stat - corpusSignal Stat - maxSignal Stat - triageQueueLen Stat - fuzzerJobs Stat - - mu sync.Mutex - namedStats map[string]uint64 - haveHub bool + statNumFuzzing *stats.Val + statNumReproducing *stats.Val + statExecs *stats.Val + statCrashes *stats.Val + statCrashTypes *stats.Val + statSuppressed *stats.Val + statUptime *stats.Val + statFuzzingTime *stats.Val } func (mgr *Manager) initStats() { - // Prometheus Instrumentation https://prometheus.io/docs/guides/go-application . - prometheus.Register(promauto.NewGaugeFunc(prometheus.GaugeOpts{ - Name: "syz_exec_total", - Help: "Total executions during current execution of syz-manager", - }, - func() float64 { return float64(mgr.stats.execTotal.get()) }, - )) - prometheus.Register(promauto.NewGaugeFunc(prometheus.GaugeOpts{ - Name: "syz_corpus_cover", - Help: "Corpus coverage during current execution of syz-manager", - }, - func() float64 { return float64(mgr.stats.corpusCover.get()) }, - )) - prometheus.Register(promauto.NewGaugeFunc(prometheus.GaugeOpts{ - Name: "syz_crash_total", - Help: "Count of crashes during current execution of syz-manager", - }, - func() float64 { return float64(mgr.stats.crashes.get()) }, - )) -} + mgr.statNumFuzzing = stats.Create("VMs", "Number of VMs that are currently fuzzing", + stats.Console, stats.NoGraph) + mgr.statNumReproducing = stats.Create("reproducing", "Number of crashes being reproduced", + stats.Console, stats.NoGraph) + mgr.statExecs = stats.Create("exec total", "Total test program executions", + stats.Console, stats.Rate{}, stats.Prometheus("syz_exec_total")) + mgr.statCrashes = stats.Create("crashes", "Total number of VM crashes", + stats.Simple, stats.Prometheus("syz_crash_total")) + mgr.statCrashTypes = stats.Create("crash types", "Number of unique crashes types", + stats.Simple, stats.NoGraph) + mgr.statSuppressed = stats.Create("suppressed", "Total number of suppressed VM crashes", + stats.Simple, stats.NoGraph) + mgr.statFuzzingTime = stats.Create("fuzzing", "Total fuzzing time in all VMs (seconds)", + stats.NoGraph, func(v int, period time.Duration) string { return fmt.Sprintf("%v sec", v/1e9) }) -func (stats *Stats) all() map[string]uint64 { - m := map[string]uint64{ - "crashes": stats.crashes.get(), - "crash types": stats.crashTypes.get(), - "suppressed": stats.crashSuppressed.get(), - "vm restarts": stats.vmRestarts.get(), - "new inputs": stats.newInputs.get(), - "exec total": stats.execTotal.get(), - "coverage": stats.corpusCover.get(), - "filtered coverage": stats.corpusCoverFiltered.get(), - "signal": stats.corpusSignal.get(), - "max signal": stats.maxSignal.get(), - "rpc traffic (MB)": stats.rpcTraffic.get() >> 20, - "fuzzer jobs": stats.fuzzerJobs.get(), - } - if exchanges := stats.rpcExchangeCalls.get(); exchanges != 0 { - m["exchange calls"] = exchanges - m["exchange progs"] = uint64(float64(stats.rpcExchangeProgs.get())/float64(exchanges) + 0.5) - m["exchange lat server (us)"] = stats.rpcExchangeServerLatency.get() / exchanges / 1e3 - m["exchange lat client (us)"] = stats.rpcExchangeClientLatency.get() / exchanges / 1e3 - } - if stats.haveHub { - m["hub: send prog add"] = stats.hubSendProgAdd.get() - m["hub: send prog del"] = stats.hubSendProgDel.get() - m["hub: send repro"] = stats.hubSendRepro.get() - m["hub: recv prog"] = stats.hubRecvProg.get() - m["hub: recv prog drop"] = stats.hubRecvProgDrop.get() - m["hub: recv repro"] = stats.hubRecvRepro.get() - m["hub: recv repro drop"] = stats.hubRecvReproDrop.get() - } - stats.mu.Lock() - defer stats.mu.Unlock() - for k, v := range stats.namedStats { - m[k] = v - } - return m -} + mgr.statUptime = stats.Create("uptime", "Total uptime (seconds)", stats.Simple, stats.NoGraph, + func() int { + firstConnect := mgr.firstConnect.Load() + if firstConnect == 0 { + return 0 + } + return int(time.Now().Unix() - firstConnect) + }, func(v int, period time.Duration) string { + return fmt.Sprintf("%v sec", v) + }) -func (stats *Stats) mergeNamed(named map[string]uint64) { - stats.mu.Lock() - defer stats.mu.Unlock() - if stats.namedStats == nil { - stats.namedStats = make(map[string]uint64) - } - for k, v := range named { - switch k { - case "exec total": - stats.execTotal.add(int(v)) - default: - stats.namedStats[k] += v - } - } -} - -func (stats *Stats) setNamed(named map[string]uint64) { - stats.mu.Lock() - defer stats.mu.Unlock() - if stats.namedStats == nil { - stats.namedStats = make(map[string]uint64) - } - for k, v := range named { - stats.namedStats[k] = v - } -} - -func (s *Stat) get() uint64 { - return atomic.LoadUint64((*uint64)(s)) -} - -func (s *Stat) inc() { - s.add(1) -} - -func (s *Stat) add(v int) { - atomic.AddUint64((*uint64)(s), uint64(v)) -} + stats.Create("heap", "Process heap size (bytes)", stats.Graph("memory"), + func() int { + var ms runtime.MemStats + runtime.ReadMemStats(&ms) + return int(ms.Alloc) + }, func(v int, period time.Duration) string { + return fmt.Sprintf("%v MB", v>>20) + }) + stats.Create("VM", "Process VM size (bytes)", stats.Graph("memory"), + func() int { + var ms runtime.MemStats + runtime.ReadMemStats(&ms) + return int(ms.Sys - ms.HeapReleased) + }, func(v int, period time.Duration) string { + return fmt.Sprintf("%v MB", v>>20) + }) -func (s *Stat) set(v int) { - atomic.StoreUint64((*uint64)(s), uint64(v)) + // Stats imported from the fuzzer (names must match the the fuzzer names). + stats.Create("executor restarts", "Number of times executor process was restarted", stats.Rate{}) + stats.Create("buffer too small", "Program serialization overflowed exec buffer", stats.NoGraph) + stats.Create("no exec requests", "Number of times fuzzer was stalled with no exec requests", stats.Rate{}) + stats.Create("no exec duration", "Total duration fuzzer was stalled with no exec requests (ns/sec)", stats.Rate{}) } |
