From 1be1a06281dccada078a2a51e8b483811af8f596 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 29 Mar 2024 15:02:10 +0100 Subject: all: refactor stats Add ability for each package to create and export own stats. Each stat is self-contained, describes how it should be presented, and there is not need to copy them from one package to another. Stats also keep historical data and allow building graphs over time. --- pkg/corpus/corpus.go | 30 ++- pkg/corpus/corpus_test.go | 11 +- pkg/fuzzer/cover.go | 20 +- pkg/fuzzer/fuzzer.go | 43 ++-- pkg/fuzzer/fuzzer_test.go | 19 +- pkg/fuzzer/job.go | 43 ++-- pkg/fuzzer/job_test.go | 4 +- pkg/fuzzer/stats.go | 81 ++++---- pkg/html/pages/style.css | 8 +- pkg/ipc/ipc.go | 14 +- pkg/rpctype/rpc.go | 18 +- pkg/stats/set.go | 499 ++++++++++++++++++++++++++++++++++++++++++++++ pkg/stats/set_test.go | 219 ++++++++++++++++++++ syz-fuzzer/fuzzer.go | 2 + syz-fuzzer/proc.go | 6 +- syz-manager/http.go | 137 ++++--------- syz-manager/hub.go | 33 ++- syz-manager/manager.go | 132 +++++------- syz-manager/rpc.go | 39 +++- syz-manager/stats.go | 188 ++++++----------- 20 files changed, 1074 insertions(+), 472 deletions(-) create mode 100644 pkg/stats/set.go create mode 100644 pkg/stats/set_test.go diff --git a/pkg/corpus/corpus.go b/pkg/corpus/corpus.go index 19dfee483..46e388366 100644 --- a/pkg/corpus/corpus.go +++ b/pkg/corpus/corpus.go @@ -10,6 +10,7 @@ import ( "github.com/google/syzkaller/pkg/cover" "github.com/google/syzkaller/pkg/hash" "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/prog" ) @@ -23,6 +24,9 @@ type Corpus struct { cover cover.Cover // total coverage of all items updates chan<- NewItemEvent *ProgramsList + StatProgs *stats.Val + StatSignal *stats.Val + StatCover *stats.Val } func NewCorpus(ctx context.Context) *Corpus { @@ -30,12 +34,19 @@ func NewCorpus(ctx context.Context) *Corpus { } func NewMonitoredCorpus(ctx context.Context, updates chan<- NewItemEvent) *Corpus { - return &Corpus{ + corpus := &Corpus{ ctx: ctx, progs: make(map[string]*Item), updates: updates, ProgramsList: &ProgramsList{}, } + corpus.StatProgs = stats.Create("corpus", "Number of test programs in the corpus", stats.Console, + stats.Link("/corpus"), stats.Graph("corpus"), stats.LenOf(&corpus.progs, &corpus.mu)) + corpus.StatSignal = stats.Create("signal", "Fuzzing signal in the corpus", + stats.LenOf(&corpus.signal, &corpus.mu)) + corpus.StatCover = stats.Create("coverage", "Source coverage in the corpus", stats.Console, + stats.Link("/cover"), stats.Prometheus("syz_corpus_cover"), stats.LenOf(&corpus.cover, &corpus.mu)) + return corpus } // It may happen that a single program is relevant because of several @@ -161,23 +172,6 @@ func (corpus *Corpus) Item(sig string) *Item { return corpus.progs[sig] } -// Stats is a snapshot of the relevant current state figures. -type Stats struct { - Progs int - Signal int - Cover int -} - -func (corpus *Corpus) Stats() Stats { - corpus.mu.RLock() - defer corpus.mu.RUnlock() - return Stats{ - Progs: len(corpus.progs), - Signal: len(corpus.signal), - Cover: len(corpus.cover), - } -} - type CallCov struct { Count int Cover cover.Cover diff --git a/pkg/corpus/corpus_test.go b/pkg/corpus/corpus_test.go index 0b5a85505..62aad1e04 100644 --- a/pkg/corpus/corpus_test.go +++ b/pkg/corpus/corpus_test.go @@ -48,12 +48,9 @@ func TestCorpusOperation(t *testing.T) { } // Verify the total signal. - stat := corpus.Stats() - assert.Equal(t, Stats{ - Signal: 5, - Cover: 0, - Progs: 2, - }, stat) + assert.Equal(t, corpus.StatSignal.Val(), 5) + assert.Equal(t, corpus.StatCover.Val(), 0) + assert.Equal(t, corpus.StatProgs.Val(), 2) corpus.Minimize(true) } @@ -77,7 +74,7 @@ func TestCorpusCoverage(t *testing.T) { assert.Equal(t, []uint32{12}, event.NewCover) // Check the total corpus size. - assert.Equal(t, corpus.Stats().Cover, 3) + assert.Equal(t, corpus.StatCover.Val(), 3) } func TestCorpusSaveConcurrency(t *testing.T) { diff --git a/pkg/fuzzer/cover.go b/pkg/fuzzer/cover.go index 31d1fee1b..03580128d 100644 --- a/pkg/fuzzer/cover.go +++ b/pkg/fuzzer/cover.go @@ -7,6 +7,7 @@ import ( "sync" "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/pkg/stats" ) // Cover keeps track of the signal known to the fuzzer. @@ -17,6 +18,13 @@ type Cover struct { dropSignal signal.Signal // the newly dropped max signal } +func newCover() *Cover { + cover := new(Cover) + stats.Create("max signal", "Maximum fuzzing signal (including flakes)", + stats.Graph("signal"), stats.LenOf(&cover.maxSignal, &cover.mu)) + return cover +} + // Signal that should no longer be chased after. // It is not returned in GrabSignalDelta(). func (cover *Cover) AddMaxSignal(sign signal.Signal) { @@ -61,18 +69,6 @@ func (cover *Cover) GrabSignalDelta() (plus, minus signal.Signal) { return } -type CoverStats struct { - MaxSignal int -} - -func (cover *Cover) Stats() CoverStats { - cover.mu.RLock() - defer cover.mu.RUnlock() - return CoverStats{ - MaxSignal: len(cover.maxSignal), - } -} - func (cover *Cover) subtract(delta signal.Signal) { cover.mu.Lock() defer cover.mu.Unlock() diff --git a/pkg/fuzzer/fuzzer.go b/pkg/fuzzer/fuzzer.go index 6a1df6ba6..6ed79aa9a 100644 --- a/pkg/fuzzer/fuzzer.go +++ b/pkg/fuzzer/fuzzer.go @@ -16,10 +16,12 @@ import ( "github.com/google/syzkaller/pkg/ipc" "github.com/google/syzkaller/pkg/rpctype" "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/prog" ) type Fuzzer struct { + Stats Config *Config Cover *Cover @@ -36,9 +38,6 @@ type Fuzzer struct { nextExec *priorityQueue[*Request] nextJobID atomic.Int64 - - runningJobs atomic.Int64 - queuedCandidates atomic.Int64 } func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand, @@ -49,8 +48,9 @@ func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand, } } f := &Fuzzer{ + Stats: newStats(), Config: cfg, - Cover: &Cover{}, + Cover: newCover(), ctx: ctx, stats: map[string]uint64{}, @@ -94,7 +94,7 @@ type Request struct { SignalFilter signal.Signal // If specified, the resulting signal MAY be a subset of it. // Fields that are only relevant within pkg/fuzzer. flags ProgTypes - stat string + stat *stats.Val resultC chan *Result } @@ -117,10 +117,10 @@ func (fuzzer *Fuzzer) Done(req *Request, res *Result) { if req.resultC != nil { req.resultC <- res } - // Update stats. - fuzzer.mu.Lock() - fuzzer.stats[req.stat]++ - fuzzer.mu.Unlock() + if res.Info != nil { + fuzzer.statExecTime.Add(int(res.Info.Elapsed.Milliseconds())) + } + req.stat.Add(1) } func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int, @@ -140,7 +140,7 @@ func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int, return } fuzzer.Logf(2, "found new signal in call %d in %s", call, p) - fuzzer.startJob(&triageJob{ + fuzzer.startJob(fuzzer.statJobsTriage, &triageJob{ p: p.Clone(), call: call, info: *info, @@ -171,10 +171,8 @@ type Candidate struct { func (fuzzer *Fuzzer) NextInput() *Request { req := fuzzer.nextInput() - if req.stat == statCandidate { - if fuzzer.queuedCandidates.Add(-1) < 0 { - panic("queuedCandidates is out of sync") - } + if req.stat == fuzzer.statExecCandidate { + fuzzer.StatCandidates.Add(-1) } return req } @@ -209,7 +207,7 @@ func (fuzzer *Fuzzer) nextInput() *Request { return genProgRequest(fuzzer, rnd) } -func (fuzzer *Fuzzer) startJob(newJob job) { +func (fuzzer *Fuzzer) startJob(stat *stats.Val, newJob job) { fuzzer.Logf(2, "started %T", newJob) if impl, ok := newJob.(jobSaveID); ok { // E.g. for big and slow hint jobs, we would prefer not to serialize them, @@ -217,9 +215,11 @@ func (fuzzer *Fuzzer) startJob(newJob job) { impl.saveID(-fuzzer.nextJobID.Add(1)) } go func() { - fuzzer.runningJobs.Add(1) + stat.Add(1) + fuzzer.statJobs.Add(1) newJob.run(fuzzer) - fuzzer.runningJobs.Add(-1) + fuzzer.statJobs.Add(-1) + stat.Add(-1) }() } @@ -231,9 +231,9 @@ func (fuzzer *Fuzzer) Logf(level int, msg string, args ...interface{}) { } func (fuzzer *Fuzzer) AddCandidates(candidates []Candidate) { - fuzzer.queuedCandidates.Add(int64(len(candidates))) + fuzzer.StatCandidates.Add(len(candidates)) for _, candidate := range candidates { - fuzzer.pushExec(candidateRequest(candidate), priority{candidatePrio}) + fuzzer.pushExec(candidateRequest(fuzzer, candidate), priority{candidatePrio}) } } @@ -248,9 +248,6 @@ func (fuzzer *Fuzzer) nextRand() int64 { } func (fuzzer *Fuzzer) pushExec(req *Request, prio priority) { - if req.stat == "" { - panic("Request.Stat field must be set") - } if req.NeedHints && (req.NeedCover || req.NeedSignal != rpctype.NoSignal) { panic("Request.NeedHints is mutually exclusive with other fields") } @@ -327,7 +324,7 @@ func (fuzzer *Fuzzer) logCurrentStats() { runtime.ReadMemStats(&m) str := fmt.Sprintf("exec queue size: %d, running jobs: %d, heap (MB): %d", - fuzzer.nextExec.Len(), fuzzer.runningJobs.Load(), m.Alloc/1000/1000) + fuzzer.nextExec.Len(), fuzzer.statJobs.Val(), m.Alloc/1000/1000) fuzzer.Logf(0, "%s", str) } } diff --git a/pkg/fuzzer/fuzzer_test.go b/pkg/fuzzer/fuzzer_test.go index 0f7a0ae58..87068064f 100644 --- a/pkg/fuzzer/fuzzer_test.go +++ b/pkg/fuzzer/fuzzer_test.go @@ -159,15 +159,13 @@ func TestRotate(t *testing.T) { }) fuzzer.Cover.AddMaxSignal(fakeSignal(1000)) - stats := fuzzer.Stats() - assert.Equal(t, 1000, stats.MaxSignal) - assert.Equal(t, 100, stats.Signal) + assert.Equal(t, 1000, len(fuzzer.Cover.maxSignal)) + assert.Equal(t, 100, corpusObj.StatSignal.Val()) // Rotate some of the signal. fuzzer.RotateMaxSignal(200) - stats = fuzzer.Stats() - assert.Equal(t, 800, stats.MaxSignal) - assert.Equal(t, 100, stats.Signal) + assert.Equal(t, 800, len(fuzzer.Cover.maxSignal)) + assert.Equal(t, 100, corpusObj.StatSignal.Val()) plus, minus := fuzzer.Cover.GrabSignalDelta() assert.Equal(t, 0, plus.Len()) @@ -175,9 +173,8 @@ func TestRotate(t *testing.T) { // Rotate the rest. fuzzer.RotateMaxSignal(1000) - stats = fuzzer.Stats() - assert.Equal(t, 100, stats.MaxSignal) - assert.Equal(t, 100, stats.Signal) + assert.Equal(t, 100, len(fuzzer.Cover.maxSignal)) + assert.Equal(t, 100, corpusObj.StatSignal.Val()) plus, minus = fuzzer.Cover.GrabSignalDelta() assert.Equal(t, 0, plus.Len()) assert.Equal(t, 700, minus.Len()) @@ -230,9 +227,9 @@ func (f *testFuzzer) oneMore() bool { defer f.mu.Unlock() f.iter++ if f.iter%100 == 0 { - stat := f.fuzzer.Stats() f.t.Logf(": corpus %d, signal %d, max signal %d, crash types %d, running jobs %d", - f.iter, stat.Progs, stat.Signal, stat.MaxSignal, len(f.crashes), stat.RunningJobs) + f.iter, f.fuzzer.Config.Corpus.StatProgs.Val(), f.fuzzer.Config.Corpus.StatSignal.Val(), + len(f.fuzzer.Cover.maxSignal), len(f.crashes), f.fuzzer.statJobs.Val()) } return f.iter < f.iterLimit && (f.expectedCrashes == nil || len(f.crashes) != len(f.expectedCrashes)) diff --git a/pkg/fuzzer/job.go b/pkg/fuzzer/job.go index e287b9599..2086016bf 100644 --- a/pkg/fuzzer/job.go +++ b/pkg/fuzzer/job.go @@ -12,6 +12,7 @@ import ( "github.com/google/syzkaller/pkg/ipc" "github.com/google/syzkaller/pkg/rpctype" "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/prog" ) @@ -71,7 +72,7 @@ func genProgRequest(fuzzer *Fuzzer, rnd *rand.Rand) *Request { return &Request{ Prog: p, NeedSignal: rpctype.NewSignal, - stat: statGenerate, + stat: fuzzer.statExecGenerate, } } @@ -90,11 +91,11 @@ func mutateProgRequest(fuzzer *Fuzzer, rnd *rand.Rand) *Request { return &Request{ Prog: newP, NeedSignal: rpctype.NewSignal, - stat: statFuzz, + stat: fuzzer.statExecFuzz, } } -func candidateRequest(input Candidate) *Request { +func candidateRequest(fuzzer *Fuzzer, input Candidate) *Request { flags := progCandidate if input.Minimized { flags |= progMinimized @@ -105,7 +106,7 @@ func candidateRequest(input Candidate) *Request { return &Request{ Prog: input.Prog, NeedSignal: rpctype.NewSignal, - stat: statCandidate, + stat: fuzzer.statExecCandidate, flags: flags, } } @@ -131,10 +132,11 @@ func triageJobPrio(flags ProgTypes) jobPriority { } func (job *triageJob) run(fuzzer *Fuzzer) { + fuzzer.statNewInputs.Add(1) callName := fmt.Sprintf("call #%v %v", job.call, job.p.CallName(job.call)) fuzzer.Logf(3, "triaging input for %v (new signal=%v)", callName, job.newSignal.Len()) // Compute input coverage and non-flaky signal for minimization. - info, stop := job.deflake(fuzzer.exec, fuzzer.Config.FetchRawCover) + info, stop := job.deflake(fuzzer.exec, fuzzer.statExecTriage, fuzzer.Config.FetchRawCover) if stop || info.newStableSignal.Empty() { return } @@ -149,7 +151,7 @@ func (job *triageJob) run(fuzzer *Fuzzer) { } fuzzer.Logf(2, "added new input for %v to the corpus: %s", callName, job.p) if job.flags&progSmashed == 0 { - fuzzer.startJob(&smashJob{ + fuzzer.startJob(fuzzer.statJobsSmash, &smashJob{ p: job.p.Clone(), call: job.call, }) @@ -171,7 +173,8 @@ type deflakedCover struct { rawCover []uint32 } -func (job *triageJob) deflake(exec func(job, *Request) *Result, rawCover bool) (info deflakedCover, stop bool) { +func (job *triageJob) deflake(exec func(job, *Request) *Result, stat *stats.Val, rawCover bool) ( + info deflakedCover, stop bool) { // As demonstrated in #4639, programs reproduce with a very high, but not 100% probability. // The triage algorithm must tolerate this, so let's pick the signal that is common // to 3 out of 5 runs. @@ -196,7 +199,7 @@ func (job *triageJob) deflake(exec func(job, *Request) *Result, rawCover bool) ( NeedSignal: rpctype.AllSignal, NeedCover: true, NeedRawCover: rawCover, - stat: statTriage, + stat: stat, flags: progInTriage, }) if result.Stop { @@ -236,7 +239,7 @@ func (job *triageJob) minimize(fuzzer *Fuzzer, newSignal signal.Signal) (stop bo Prog: p1, NeedSignal: rpctype.AllSignal, SignalFilter: newSignal, - stat: statMinimize, + stat: fuzzer.statExecMinimize, }) if result.Stop { stop = true @@ -292,7 +295,10 @@ func (job *smashJob) priority() priority { func (job *smashJob) run(fuzzer *Fuzzer) { fuzzer.Logf(2, "smashing the program %s (call=%d):", job.p, job.call) if fuzzer.Config.Comparisons && job.call >= 0 { - fuzzer.startJob(newHintsJob(job.p.Clone(), job.call)) + fuzzer.startJob(fuzzer.statJobsHints, &hintsJob{ + p: job.p.Clone(), + call: job.call, + }) } const iters = 75 @@ -306,7 +312,7 @@ func (job *smashJob) run(fuzzer *Fuzzer) { result := fuzzer.exec(job, &Request{ Prog: p, NeedSignal: rpctype.NewSignal, - stat: statSmash, + stat: fuzzer.statExecSmash, }) if result.Stop { return @@ -314,7 +320,7 @@ func (job *smashJob) run(fuzzer *Fuzzer) { if fuzzer.Config.Collide { result := fuzzer.exec(job, &Request{ Prog: randomCollide(p, rnd), - stat: statCollide, + stat: fuzzer.statExecCollide, }) if result.Stop { return @@ -356,7 +362,7 @@ func (job *smashJob) faultInjection(fuzzer *Fuzzer) { newProg.Calls[job.call].Props.FailNth = nth result := fuzzer.exec(job, &Request{ Prog: job.p, - stat: statSmash, + stat: fuzzer.statExecSmash, }) if result.Stop { return @@ -374,13 +380,6 @@ type hintsJob struct { call int } -func newHintsJob(p *prog.Prog, call int) *hintsJob { - return &hintsJob{ - p: p, - call: call, - } -} - func (job *hintsJob) priority() priority { return priority{smashPrio} } @@ -391,7 +390,7 @@ func (job *hintsJob) run(fuzzer *Fuzzer) { result := fuzzer.exec(job, &Request{ Prog: p, NeedHints: true, - stat: statSeed, + stat: fuzzer.statExecSeed, }) if result.Stop || result.Info == nil { return @@ -404,7 +403,7 @@ func (job *hintsJob) run(fuzzer *Fuzzer) { result := fuzzer.exec(job, &Request{ Prog: p, NeedSignal: rpctype.NewSignal, - stat: statHint, + stat: fuzzer.statExecHint, }) return !result.Stop }) diff --git a/pkg/fuzzer/job_test.go b/pkg/fuzzer/job_test.go index d9f7873dc..70efc0bb6 100644 --- a/pkg/fuzzer/job_test.go +++ b/pkg/fuzzer/job_test.go @@ -32,7 +32,7 @@ func TestDeflakeFail(t *testing.T) { run++ // For first, we return 0 and 1. For second, 1 and 2. And so on. return fakeResult(0, []uint32{uint32(run), uint32(run + 1)}, []uint32{10, 20}) - }, false) + }, nil, false) assert.False(t, stop) assert.Equal(t, 5, run) assert.Empty(t, ret.stableSignal.ToRaw()) @@ -69,7 +69,7 @@ func TestDeflakeSuccess(t *testing.T) { // We expect it to have finished earlier. t.Fatal("only 4 runs were expected") return nil - }, false) + }, nil, false) assert.False(t, stop) // Cover is a union of all coverages. assert.ElementsMatch(t, []uint32{10, 20, 30, 40}, ret.cover.Serialize()) diff --git a/pkg/fuzzer/stats.go b/pkg/fuzzer/stats.go index 044febc64..38bef0405 100644 --- a/pkg/fuzzer/stats.go +++ b/pkg/fuzzer/stats.go @@ -3,44 +3,55 @@ package fuzzer -import "github.com/google/syzkaller/pkg/corpus" - -const ( - statGenerate = "exec gen" - statFuzz = "exec fuzz" - statCandidate = "exec candidate" - statTriage = "exec triage" - statMinimize = "exec minimize" - statSmash = "exec smash" - statHint = "exec hints" - statSeed = "exec seeds" - statCollide = "exec collide" - statExecTotal = "exec total" - statBufferTooSmall = "buffer too small" -) +import "github.com/google/syzkaller/pkg/stats" type Stats struct { - CoverStats - corpus.Stats - Candidates int - RunningJobs int - // Let's keep stats in Named as long as the rest of the code does not depend - // on their specific values. - Named map[string]uint64 + StatCandidates *stats.Val + statNewInputs *stats.Val + statJobs *stats.Val + statJobsTriage *stats.Val + statJobsSmash *stats.Val + statJobsHints *stats.Val + statExecTime *stats.Val + statExecGenerate *stats.Val + statExecFuzz *stats.Val + statExecCandidate *stats.Val + statExecTriage *stats.Val + statExecMinimize *stats.Val + statExecSmash *stats.Val + statExecHint *stats.Val + statExecSeed *stats.Val + statExecCollide *stats.Val } -func (fuzzer *Fuzzer) Stats() Stats { - ret := Stats{ - CoverStats: fuzzer.Cover.Stats(), - Stats: fuzzer.Config.Corpus.Stats(), - Candidates: int(fuzzer.queuedCandidates.Load()), - RunningJobs: int(fuzzer.runningJobs.Load()), - Named: make(map[string]uint64), - } - fuzzer.mu.Lock() - defer fuzzer.mu.Unlock() - for k, v := range fuzzer.stats { - ret.Named[k] = v +func newStats() Stats { + return Stats{ + StatCandidates: stats.Create("candidates", "Number of candidate programs in triage queue", + stats.Graph("corpus")), + statNewInputs: stats.Create("new inputs", "Potential untriaged corpus candidates", + stats.Graph("corpus")), + statJobs: stats.Create("fuzzer jobs", "Total running fuzzer jobs", stats.NoGraph), + statJobsTriage: stats.Create("triage jobs", "Running triage jobs", stats.StackedGraph("jobs")), + statJobsSmash: stats.Create("smash jobs", "Running smash jobs", stats.StackedGraph("jobs")), + statJobsHints: stats.Create("hints jobs", "Running hints jobs", stats.StackedGraph("jobs")), + statExecTime: stats.Create("prog exec time", "Test program execution time (ms)", stats.Distribution{}), + statExecGenerate: stats.Create("exec gen", "Executions of generated programs", stats.Rate{}, + stats.StackedGraph("exec")), + statExecFuzz: stats.Create("exec fuzz", "Executions of mutated programs", + stats.Rate{}, stats.StackedGraph("exec")), + statExecCandidate: stats.Create("exec candidate", "Executions of candidate programs", + stats.Rate{}, stats.StackedGraph("exec")), + statExecTriage: stats.Create("exec triage", "Executions of corpus triage programs", + stats.Rate{}, stats.StackedGraph("exec")), + statExecMinimize: stats.Create("exec minimize", "Executions of programs during minimization", + stats.Rate{}, stats.StackedGraph("exec")), + statExecSmash: stats.Create("exec smash", "Executions of smashed programs", + stats.Rate{}, stats.StackedGraph("exec")), + statExecHint: stats.Create("exec hints", "Executions of programs generated using hints", + stats.Rate{}, stats.StackedGraph("exec")), + statExecSeed: stats.Create("exec seeds", "Executions of programs for hints extraction", + stats.Rate{}, stats.StackedGraph("exec")), + statExecCollide: stats.Create("exec collide", "Executions of programs in collide mode", + stats.Rate{}, stats.StackedGraph("exec")), } - return ret } diff --git a/pkg/html/pages/style.css b/pkg/html/pages/style.css index 156bf61ba..9ec565616 100644 --- a/pkg/html/pages/style.css +++ b/pkg/html/pages/style.css @@ -180,14 +180,14 @@ table td, table th { } .list_table .stat_name { - width: 150pt; - max-width: 150pt; + width: 180pt; + max-width: 180pt; font-family: monospace; } .list_table .stat_value { - width: 120pt; - max-width: 120pt; + width: 200pt; + max-width: 200pt; font-family: monospace; } diff --git a/pkg/ipc/ipc.go b/pkg/ipc/ipc.go index c90e56caf..f0aeb2b41 100644 --- a/pkg/ipc/ipc.go +++ b/pkg/ipc/ipc.go @@ -95,8 +95,9 @@ type CallInfo struct { } type ProgInfo struct { - Calls []CallInfo - Extra CallInfo // stores Signal and Cover collected from background threads + Calls []CallInfo + Extra CallInfo // stores Signal and Cover collected from background threads + Elapsed time.Duration // total execution time of the program } type Env struct { @@ -275,7 +276,9 @@ func (env *Env) Exec(opts *ExecOpts, p *prog.Prog) (output []byte, info *ProgInf return } + start := osutil.MonotonicNano() output, hanged, err0 = env.cmd.exec(opts, progData) + elapsed := osutil.MonotonicNano() - start if err0 != nil { env.cmd.close() env.cmd = nil @@ -283,8 +286,11 @@ func (env *Env) Exec(opts *ExecOpts, p *prog.Prog) (output []byte, info *ProgInf } info, err0 = env.parseOutput(p, opts) - if info != nil && env.config.Flags&FlagSignal == 0 { - addFallbackSignal(p, info) + if info != nil { + info.Elapsed = elapsed + if env.config.Flags&FlagSignal == 0 { + addFallbackSignal(p, info) + } } if !env.config.UseForkServer { env.cmd.close() diff --git a/pkg/rpctype/rpc.go b/pkg/rpctype/rpc.go index 01f0f1b02..b90e13f08 100644 --- a/pkg/rpctype/rpc.go +++ b/pkg/rpctype/rpc.go @@ -10,18 +10,18 @@ import ( "net" "net/rpc" "os" - "sync/atomic" "time" "github.com/google/syzkaller/pkg/log" + "github.com/google/syzkaller/pkg/stats" ) type RPCServer struct { - TotalBytes atomic.Uint64 // total compressed bytes transmitted - ln net.Listener s *rpc.Server useCompression bool + statSent *stats.Val + statRecv *stats.Val } func NewRPCServer(addr, name string, receiver interface{}, useCompression bool) (*RPCServer, error) { @@ -33,10 +33,18 @@ func NewRPCServer(addr, name string, receiver interface{}, useCompression bool) if err := s.RegisterName(name, receiver); err != nil { return nil, err } + formatMB := func(v int, period time.Duration) string { + const KB, MB = 1 << 10, 1 << 20 + return fmt.Sprintf("%v MB (%v kb/sec)", (v+MB/2)/MB, (v+KB/2)/KB/int(period/time.Second)) + } serv := &RPCServer{ ln: ln, s: s, useCompression: useCompression, + statSent: stats.Create("rpc sent", "Uncompressed outbound RPC traffic", + stats.Graph("RPC traffic"), stats.Rate{}, formatMB), + statRecv: stats.Create("rpc recv", "Uncompressed inbound RPC traffic", + stats.Graph("RPC traffic"), stats.Rate{}, formatMB), } return serv, nil } @@ -178,12 +186,12 @@ func newCountedConn(server *RPCServer, func (cc countedConn) Read(p []byte) (n int, err error) { n, err = cc.ReadWriteCloser.Read(p) - cc.server.TotalBytes.Add(uint64(n)) + cc.server.statRecv.Add(n) return } func (cc countedConn) Write(b []byte) (n int, err error) { n, err = cc.ReadWriteCloser.Write(b) - cc.server.TotalBytes.Add(uint64(n)) + cc.server.statSent.Add(n) return } diff --git a/pkg/stats/set.go b/pkg/stats/set.go new file mode 100644 index 000000000..54e44e457 --- /dev/null +++ b/pkg/stats/set.go @@ -0,0 +1,499 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package stats + +import ( + "bytes" + "fmt" + "reflect" + "sort" + "strconv" + "sync" + "sync/atomic" + "time" + + "github.com/bsm/histogram/v3" + "github.com/google/syzkaller/pkg/html/pages" + "github.com/prometheus/client_golang/prometheus" +) + +// This file provides prometheus/streamz style metrics (Val type) for instrumenting code for monitoring. +// It also provides a registry for such metrics (set type) and a global default registry. +// +// Simple uses of metrics: +// +// statFoo := stats.Create("metric name", "metric description") +// statFoo.Add(1) +// +// stats.Create("metric name", "metric description", LenOf(mySlice, rwMutex)) +// +// Metric visualization code uses Collect/RenderHTML functions to obtain values of all registered metrics. + +type UI struct { + Name string + Desc string + Link string + Level Level + Value string + V int +} + +func Create(name, desc string, opts ...any) *Val { + return global.Create(name, desc, opts...) +} + +func Collect(level Level) []UI { + return global.Collect(level) +} + +func RenderHTML() ([]byte, error) { + return global.RenderHTML() +} + +func Import(named map[string]uint64) { + global.Import(named) +} + +var global = newSet(256, true) + +type set struct { + mu sync.Mutex + vals map[string]*Val + graphs map[string]*graph + totalTicks int + historySize int + historyTicks int + historyPos int + historyScale int +} + +type graph struct { + level Level + stacked bool + lines map[string]*line +} + +type line struct { + desc string + rate bool + data []float64 + hist []*histogram.Histogram +} + +const ( + tickPeriod = time.Second + histogramBuckets = 255 +) + +func newSet(histSize int, tick bool) *set { + s := &set{ + vals: make(map[string]*Val), + historySize: histSize, + historyScale: 1, + graphs: make(map[string]*graph), + } + if tick { + go func() { + for range time.NewTicker(tickPeriod).C { + s.tick() + } + }() + } + return s +} + +func (s *set) Collect(level Level) []UI { + s.mu.Lock() + defer s.mu.Unlock() + period := time.Duration(s.totalTicks) * tickPeriod + if period == 0 { + period = tickPeriod + } + var res []UI + for _, v := range s.vals { + if v.level < level { + continue + } + val := v.Val() + res = append(res, UI{ + Name: v.name, + Desc: v.desc, + Link: v.link, + Level: v.level, + Value: v.fmt(val, period), + V: val, + }) + } + sort.Slice(res, func(i, j int) bool { + if res[i].Level != res[j].Level { + return res[i].Level > res[j].Level + } + return res[i].Name < res[j].Name + }) + return res +} + +func (s *set) Import(named map[string]uint64) { + s.mu.Lock() + defer s.mu.Unlock() + for name, val := range named { + v := s.vals[name] + if v == nil { + panic(fmt.Sprintf("imported stat %v is missing", name)) + } + v.Add(int(val)) + } +} + +// Additional options for Val metrics. + +// Level controls if the metric should be printed to console in periodic heartbeat logs, +// or showed on the simple web interface, or showed in the expert interface only. +type Level int + +const ( + All Level = iota + Simple + Console +) + +// Link adds a hyperlink to metric name. +type Link string + +// Prometheus exports the metric to Prometheus under the given name. +type Prometheus string + +// Rate says to collect/visualize metric rate per unit of time rather then total value. +type Rate struct{} + +// Distribution says to collect/visualize histogram of individual sample distributions. +type Distribution struct{} + +// Graph allows to combine multiple related metrics on a single graph. +type Graph string + +// StackedGraph is like Graph, but shows metrics on a stacked graph. +type StackedGraph string + +// NoGraph says to not visualize the metric as a graph. +const NoGraph Graph = "" + +// LenOf reads the metric value from the given slice/map/chan. +func LenOf(containerPtr any, mu *sync.RWMutex) func() int { + v := reflect.ValueOf(containerPtr) + _ = v.Elem().Len() // panics if container is not slice/map/chan + return func() int { + mu.RLock() + defer mu.RUnlock() + return v.Elem().Len() + } +} + +// Addittionally a custom 'func() int' can be passed to read the metric value from the function. +// and 'func(int, time.Duration) string' can be passed for custom formatting of the metric value. + +func (s *set) Create(name, desc string, opts ...any) *Val { + v := &Val{ + name: name, + desc: desc, + graph: name, + fmt: func(v int, period time.Duration) string { return strconv.Itoa(v) }, + } + stacked := false + for _, o := range opts { + switch opt := o.(type) { + case Level: + v.level = opt + case Link: + v.link = string(opt) + case Graph: + v.graph = string(opt) + case StackedGraph: + v.graph = string(opt) + stacked = true + case Rate: + v.rate = true + v.fmt = formatRate + case Distribution: + v.hist = true + case func() int: + v.ext = opt + case func(int, time.Duration) string: + v.fmt = opt + case Prometheus: + // Prometheus Instrumentation https://prometheus.io/docs/guides/go-application. + prometheus.Register(prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: string(opt), + Help: desc, + }, + func() float64 { return float64(v.Val()) }, + )) + default: + panic(fmt.Sprintf("unknown stats option %#v", o)) + } + } + s.mu.Lock() + defer s.mu.Unlock() + s.vals[name] = v + if v.graph != "" { + if s.graphs[v.graph] == nil { + s.graphs[v.graph] = &graph{ + lines: make(map[string]*line), + } + } + if s.graphs[v.graph].level < v.level { + s.graphs[v.graph].level = v.level + } + s.graphs[v.graph].stacked = stacked + } + return v +} + +type Val struct { + name string + desc string + link string + graph string + level Level + val atomic.Uint64 + ext func() int + fmt func(int, time.Duration) string + rate bool + hist bool + prev int + histMu sync.Mutex + histVal *histogram.Histogram +} + +func (v *Val) Add(val int) { + if v.ext != nil { + panic(fmt.Sprintf("stat %v is in external mode", v.name)) + } + if v.hist { + v.histMu.Lock() + if v.histVal == nil { + v.histVal = histogram.New(histogramBuckets) + } + v.histVal.Add(float64(val)) + v.histMu.Unlock() + return + } + v.val.Add(uint64(val)) +} + +func (v *Val) Val() int { + if v.ext != nil { + return v.ext() + } + if v.hist { + v.histMu.Lock() + defer v.histMu.Unlock() + if v.histVal == nil { + return 0 + } + return int(v.histVal.Mean()) + } + return int(v.val.Load()) +} + +func formatRate(v int, period time.Duration) string { + secs := int(period.Seconds()) + if x := v / secs; x >= 10 { + return fmt.Sprintf("%v (%v/sec)", v, x) + } + if x := v * 60 / secs; x >= 10 { + return fmt.Sprintf("%v (%v/min)", v, x) + } + x := v * 60 * 60 / secs + return fmt.Sprintf("%v (%v/hour)", v, x) +} + +func (s *set) tick() { + s.mu.Lock() + defer s.mu.Unlock() + + if s.historyPos == s.historySize { + s.compress() + } + + s.totalTicks++ + s.historyTicks++ + for _, v := range s.vals { + if v.graph == "" { + continue + } + graph := s.graphs[v.graph] + ln := graph.lines[v.name] + if ln == nil { + ln = &line{ + desc: v.desc, + rate: v.rate, + } + if v.hist { + ln.hist = make([]*histogram.Histogram, s.historySize) + } else { + ln.data = make([]float64, s.historySize) + } + graph.lines[v.name] = ln + } + if v.hist { + if s.historyTicks == s.historyScale { + v.histMu.Lock() + ln.hist[s.historyPos] = v.histVal + v.histVal = nil + v.histMu.Unlock() + } + } else { + val := v.Val() + pv := &ln.data[s.historyPos] + if v.rate { + *pv += float64(val-v.prev) / float64(s.historyScale) + v.prev = val + } else { + if *pv < float64(val) { + *pv = float64(val) + } + } + } + } + if s.historyTicks != s.historyScale { + return + } + s.historyTicks = 0 + s.historyPos++ +} + +func (s *set) compress() { + half := s.historySize / 2 + s.historyPos = half + s.historyScale *= 2 + for _, graph := range s.graphs { + for _, line := range graph.lines { + for i := 0; i < half; i++ { + if line.hist != nil { + h1, h2 := line.hist[2*i], line.hist[2*i+1] + line.hist[2*i], line.hist[2*i+1] = nil, nil + line.hist[i] = h1 + if h1 == nil { + line.hist[i] = h2 + } + } else { + v1, v2 := line.data[2*i], line.data[2*i+1] + line.data[2*i], line.data[2*i+1] = 0, 0 + if line.rate { + line.data[i] = (v1 + v2) / 2 + } else { + line.data[i] = v1 + if v2 > v1 { + line.data[i] = v2 + } + } + } + } + } + } +} + +func (s *set) RenderHTML() ([]byte, error) { + s.mu.Lock() + defer s.mu.Unlock() + type Point struct { + X int + Y []float64 + } + type Graph struct { + ID int + Title string + Stacked bool + Level Level + Lines []string + Points []Point + } + var graphs []Graph + tick := s.historyScale * int(tickPeriod.Seconds()) + for title, graph := range s.graphs { + if len(graph.lines) == 0 { + continue + } + g := Graph{ + ID: len(graphs), + Title: title, + Stacked: graph.stacked, + Level: graph.level, + Points: make([]Point, s.historyPos), + } + for i := 0; i < s.historyPos; i++ { + g.Points[i].X = i * tick + } + for name, ln := range graph.lines { + if ln.hist == nil { + g.Lines = append(g.Lines, name+": "+ln.desc) + for i := 0; i < s.historyPos; i++ { + g.Points[i].Y = append(g.Points[i].Y, ln.data[i]) + } + } else { + for _, percent := range []int{10, 50, 90} { + g.Lines = append(g.Lines, fmt.Sprintf("%v%%", percent)) + for i := 0; i < s.historyPos; i++ { + v := 0.0 + if ln.hist[i] != nil { + v = ln.hist[i].Quantile(float64(percent) / 100) + } + g.Points[i].Y = append(g.Points[i].Y, v) + } + } + } + } + graphs = append(graphs, g) + } + sort.Slice(graphs, func(i, j int) bool { + if graphs[i].Level != graphs[j].Level { + return graphs[i].Level > graphs[j].Level + } + return graphs[i].Title < graphs[j].Title + }) + buf := new(bytes.Buffer) + err := htmlTemplate.Execute(buf, graphs) + return buf.Bytes(), err +} + +var htmlTemplate = pages.Create(` + + + + syzkaller stats + + {{HEAD}} + + +{{range $g := .}} +
+ +{{end}} + + +`) diff --git a/pkg/stats/set_test.go b/pkg/stats/set_test.go new file mode 100644 index 000000000..9a19011bb --- /dev/null +++ b/pkg/stats/set_test.go @@ -0,0 +1,219 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package stats + +import ( + "fmt" + "math/rand" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestSet(t *testing.T) { + a := assert.New(t) + set := newSet(4, false) + a.Empty(set.Collect(All)) + _, err := set.RenderHTML() + a.NoError(err) + + v0 := set.Create("v0", "desc0") + a.Equal(v0.Val(), 0) + v0.Add(1) + a.Equal(v0.Val(), 1) + v0.Add(1) + a.Equal(v0.Val(), 2) + + vv1 := 0 + v1 := set.Create("v1", "desc1", Simple, func() int { return vv1 }) + a.Equal(v1.Val(), 0) + vv1 = 11 + a.Equal(v1.Val(), 11) + a.Panics(func() { v1.Add(1) }) + + v2 := set.Create("v2", "desc2", Console, func(v int, period time.Duration) string { + return fmt.Sprintf("v2 %v %v", v, period) + }) + v2.Add(100) + + v3 := set.Create("v3", "desc3", Link("/v3"), NoGraph, Distribution{}) + a.Equal(v3.Val(), 0) + v3.Add(10) + a.Equal(v3.Val(), 10) + v3.Add(20) + a.Equal(v3.Val(), 15) + v3.Add(20) + a.Equal(v3.Val(), 16) + v3.Add(30) + a.Equal(v3.Val(), 20) + v3.Add(30) + a.Equal(v3.Val(), 22) + v3.Add(30) + v3.Add(30) + a.Equal(v3.Val(), 24) + + v4 := set.Create("v4", "desc4", Rate{}, Graph("graph")) + v4.Add(10) + a.Equal(v4.Val(), 10) + v4.Add(10) + a.Equal(v4.Val(), 20) + + a.Panics(func() { set.Create("v0", "desc0", float64(1)) }) + + ui := set.Collect(All) + a.Equal(len(ui), 5) + a.Equal(ui[0], UI{"v2", "desc2", "", Console, "v2 100 1s", 100}) + a.Equal(ui[1], UI{"v1", "desc1", "", Simple, "11", 11}) + a.Equal(ui[2], UI{"v0", "desc0", "", All, "2", 2}) + a.Equal(ui[3], UI{"v3", "desc3", "/v3", All, "24", 24}) + a.Equal(ui[4], UI{"v4", "desc4", "", All, "20 (20/sec)", 20}) + + ui1 := set.Collect(Simple) + a.Equal(len(ui1), 2) + a.Equal(ui1[0].Name, "v2") + a.Equal(ui1[1].Name, "v1") + + ui2 := set.Collect(Console) + a.Equal(len(ui2), 1) + a.Equal(ui2[0].Name, "v2") + + _, err = set.RenderHTML() + a.NoError(err) +} + +func TestSetRateFormat(t *testing.T) { + a := assert.New(t) + set := newSet(4, false) + v := set.Create("v", "desc", Rate{}) + a.Equal(set.Collect(All)[0].Value, "0 (0/hour)") + v.Add(1) + a.Equal(set.Collect(All)[0].Value, "1 (60/min)") + v.Add(99) + a.Equal(set.Collect(All)[0].Value, "100 (100/sec)") +} + +func TestSetHistoryCounter(t *testing.T) { + a := assert.New(t) + set := newSet(4, false) + v := set.Create("v0", "desc0") + set.tick() + hist := func() []float64 { return set.graphs["v0"].lines["v0"].data[:set.historyPos] } + step := func(n int) []float64 { + v.Add(n) + set.tick() + return hist() + } + a.Equal(hist(), []float64{0}) + v.Add(1) + v.Add(1) + a.Equal(hist(), []float64{0}) + set.tick() + a.Equal(hist(), []float64{0, 2}) + v.Add(3) + a.Equal(hist(), []float64{0, 2}) + set.tick() + a.Equal(hist(), []float64{0, 2, 5}) + a.Equal(step(-1), []float64{0, 2, 5, 4}) + // Compacted, each new history value will require 2 steps. + a.Equal(step(7), []float64{2, 5}) + a.Equal(step(-10), []float64{2, 5, 11}) + a.Equal(step(2), []float64{2, 5, 11}) + a.Equal(step(1), []float64{2, 5, 11, 4}) + // 4 steps for each new value. + a.Equal(step(1), []float64{5, 11}) + a.Equal(step(1), []float64{5, 11}) + a.Equal(step(1), []float64{5, 11}) + a.Equal(step(1), []float64{5, 11, 8}) +} + +func TestSetHistoryRate(t *testing.T) { + a := assert.New(t) + set := newSet(4, false) + v := set.Create("v0", "desc0", Rate{}) + step := func(n int) []float64 { + v.Add(n) + set.tick() + return set.graphs["v0"].lines["v0"].data[:set.historyPos] + } + a.Equal(step(3), []float64{3}) + a.Equal(step(1), []float64{3, 1}) + a.Equal(step(2), []float64{3, 1, 2}) + a.Equal(step(5), []float64{3, 1, 2, 5}) + a.Equal(step(1), []float64{2, 3.5}) + a.Equal(step(2), []float64{2, 3.5, 1.5}) + a.Equal(step(2), []float64{2, 3.5, 1.5}) + a.Equal(step(4), []float64{2, 3.5, 1.5, 3}) + a.Equal(step(1), []float64{2.75, 2.25}) + a.Equal(step(2), []float64{2.75, 2.25}) + a.Equal(step(3), []float64{2.75, 2.25}) + a.Equal(step(4), []float64{2.75, 2.25, 2.5}) +} + +func TestSetHistoryDistribution(t *testing.T) { + a := assert.New(t) + set := newSet(4, false) + v := set.Create("v0", "desc0", Distribution{}) + step := func(n int) [3][]float64 { + v.Add(n) + set.tick() + var history [3][]float64 + for p, percent := range []int{10, 50, 90} { + history[p] = make([]float64, set.historyPos) + for i := 0; i < set.historyPos; i++ { + hist := set.graphs["v0"].lines["v0"].hist[i] + if hist != nil { + history[p][i] = hist.Quantile(float64(percent) / 100) + } + } + } + return history + } + a.Equal(step(3), [3][]float64{{3}, {3}, {3}}) + a.Equal(step(6), [3][]float64{{3, 6}, {3, 6}, {3, 6}}) + a.Equal(step(1), [3][]float64{{3, 6, 1}, {3, 6, 1}, {3, 6, 1}}) + a.Equal(step(2), [3][]float64{{3, 6, 1, 2}, {3, 6, 1, 2}, {3, 6, 1, 2}}) + a.Equal(step(1), [3][]float64{{3, 1}, {3, 1}, {3, 1}}) + a.Equal(step(10), [3][]float64{{3, 1, 1}, {3, 1, 10}, {3, 1, 10}}) +} + +func TestSetStress(t *testing.T) { + set := newSet(4, false) + var stop atomic.Bool + var seq atomic.Uint64 + start := func(f func()) { + go func() { + for !stop.Load() { + f() + } + }() + } + for p := 0; p < 2; p++ { + for _, opt := range []any{Link(""), NoGraph, Rate{}, Distribution{}} { + opt := opt + go func() { + v := set.Create(fmt.Sprintf("v%v", seq.Add(1)), "desc", opt) + for p1 := 0; p1 < 2; p1++ { + start(func() { v.Val() }) + start(func() { v.Add(rand.Intn(10000)) }) + } + }() + } + go func() { + var vv atomic.Uint64 + v := set.Create(fmt.Sprintf("v%v", seq.Add(1)), "desc", + func() int { return int(vv.Load()) }) + for p1 := 0; p1 < 2; p1++ { + start(func() { v.Val() }) + start(func() { vv.Store(uint64(rand.Intn(10000))) }) + } + }() + start(func() { set.Collect(All) }) + start(func() { set.RenderHTML() }) + start(func() { set.tick() }) + } + time.Sleep(time.Second) + stop.Store(true) +} diff --git a/syz-fuzzer/fuzzer.go b/syz-fuzzer/fuzzer.go index 4448718a1..8e02c2739 100644 --- a/syz-fuzzer/fuzzer.go +++ b/syz-fuzzer/fuzzer.go @@ -45,6 +45,7 @@ type FuzzerTool struct { bufferTooSmall atomic.Uint64 noExecRequests atomic.Uint64 + noExecDuration atomic.Uint64 resetAccState bool inputs chan executionRequest @@ -414,6 +415,7 @@ func (tool *FuzzerTool) grabStats() map[string]uint64 { } stats["buffer too small"] = tool.bufferTooSmall.Swap(0) stats["no exec requests"] = tool.noExecRequests.Swap(0) + stats["no exec duration"] = tool.noExecDuration.Swap(0) return stats } diff --git a/syz-fuzzer/proc.go b/syz-fuzzer/proc.go index 76e8ec435..4f7a00076 100644 --- a/syz-fuzzer/proc.go +++ b/syz-fuzzer/proc.go @@ -14,6 +14,7 @@ import ( "github.com/google/syzkaller/pkg/ipc" "github.com/google/syzkaller/pkg/log" + "github.com/google/syzkaller/pkg/osutil" "github.com/google/syzkaller/pkg/rpctype" "github.com/google/syzkaller/prog" ) @@ -82,8 +83,11 @@ func (proc *Proc) nextRequest() executionRequest { } // Not having enough inputs to execute is a sign of RPC communication problems. // Let's count and report such situations. + start := osutil.MonotonicNano() + req := <-proc.tool.inputs + proc.tool.noExecDuration.Add(uint64(osutil.MonotonicNano() - start)) proc.tool.noExecRequests.Add(1) - return <-proc.tool.inputs + return req } func (proc *Proc) executeRaw(opts *ipc.ExecOpts, p *prog.Prog) *ipc.ProgInfo { diff --git a/syz-manager/http.go b/syz-manager/http.go index 22af491fd..b14c97fe9 100644 --- a/syz-manager/http.go +++ b/syz-manager/http.go @@ -24,6 +24,7 @@ import ( "github.com/google/syzkaller/pkg/html/pages" "github.com/google/syzkaller/pkg/log" "github.com/google/syzkaller/pkg/osutil" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/pkg/vcs" "github.com/google/syzkaller/prog" "github.com/gorilla/handlers" @@ -38,6 +39,7 @@ func (mgr *Manager) initHTTP() { handle("/", mgr.httpSummary) handle("/config", mgr.httpConfig) handle("/expert_mode", mgr.httpExpertMode) + handle("/stats", mgr.httpStats) handle("/metrics", promhttp.HandlerFor(prometheus.DefaultGatherer, promhttp.HandlerOpts{}).ServeHTTP) handle("/syscalls", mgr.httpSyscalls) handle("/corpus", mgr.httpCorpus) @@ -71,10 +73,24 @@ func (mgr *Manager) initHTTP() { func (mgr *Manager) httpSummary(w http.ResponseWriter, r *http.Request) { data := &UISummaryData{ - Name: mgr.cfg.Name, - Expert: mgr.expertMode, - Log: log.CachedLogOutput(), - Stats: mgr.collectStats(), + Name: mgr.cfg.Name, + Revision: prog.GitRevisionBase[:8], + RevisionLink: vcs.LogLink(vcs.SyzkallerRepo, prog.GitRevisionBase), + Expert: mgr.expertMode, + Log: log.CachedLogOutput(), + } + + level := stats.Simple + if mgr.expertMode { + level = stats.All + } + for _, stat := range stats.Collect(level) { + data.Stats = append(data.Stats, UIStat{ + Name: stat.Name, + Value: stat.Value, + Hint: stat.Desc, + Link: stat.Link, + }) } var err error @@ -122,89 +138,19 @@ func (mgr *Manager) httpSyscalls(w http.ResponseWriter, r *http.Request) { executeTemplate(w, syscallsTemplate, data) } -func (mgr *Manager) collectStats() []UIStat { - mgr.mu.Lock() - defer mgr.mu.Unlock() - - configName := mgr.cfg.Name - if configName == "" { - configName = "config" - } - secs := uint64(1) - if !mgr.firstConnect.IsZero() { - secs = uint64(time.Since(mgr.firstConnect).Seconds()) + 1 - } - rawStats := mgr.stats.all() - head := prog.GitRevisionBase - stats := []UIStat{ - {Name: "revision", Value: fmt.Sprint(head[:8]), Link: vcs.LogLink(vcs.SyzkallerRepo, head)}, - {Name: "config", Value: configName, Link: "/config"}, - {Name: "uptime", Value: fmt.Sprint(time.Since(mgr.startTime) / 1e9 * 1e9)}, - {Name: "fuzzing time", Value: fmt.Sprint(mgr.fuzzingTime / 60e9 * 60e9)}, - {Name: "corpus", Value: fmt.Sprint(mgr.corpus.Stats().Progs), Link: "/corpus"}, - {Name: "triage queue", Value: fmt.Sprint(mgr.stats.triageQueueLen.get())}, - {Name: "crashes", Value: rateStat(rawStats["crashes"], secs)}, - {Name: "crash types", Value: rateStat(rawStats["crash types"], secs)}, - {Name: "suppressed", Value: rateStat(rawStats["suppressed"], secs)}, - {Name: "signal", Value: fmt.Sprint(rawStats["signal"])}, - {Name: "coverage", Value: fmt.Sprint(rawStats["coverage"]), Link: "/cover"}, - {Name: "exec total", Value: rateStat(rawStats["exec total"], secs)}, - } - if mgr.coverFilter != nil { - stats = append(stats, UIStat{ - Name: "filtered coverage", - Value: fmt.Sprintf("%v / %v (%v%%)", - rawStats["filtered coverage"], len(mgr.coverFilter), - rawStats["filtered coverage"]*100/uint64(len(mgr.coverFilter))), - Link: "/cover?filter=yes", - }) - } else { - delete(rawStats, "filtered coverage") - } - if mgr.checkResult != nil { - stats = append(stats, UIStat{ - Name: "syscalls", - Value: fmt.Sprint(len(mgr.checkResult.EnabledCalls[mgr.cfg.Sandbox])), - Link: "/syscalls", - }) - } - for _, stat := range stats { - delete(rawStats, stat.Name) - } - if mgr.expertMode { - var intStats []UIStat - for k, v := range rawStats { - val := "" - switch { - case k == "fuzzer jobs" || strings.HasPrefix(k, "rpc exchange"): - val = fmt.Sprint(v) - default: - val = rateStat(v, secs) - } - intStats = append(intStats, UIStat{Name: k, Value: val}) - } - sort.Slice(intStats, func(i, j int) bool { - return intStats[i].Name < intStats[j].Name - }) - stats = append(stats, intStats...) - } - return stats -} - -func rateStat(v, secs uint64) string { - if x := v / secs; x >= 10 { - return fmt.Sprintf("%v (%v/sec)", v, x) - } - if x := v * 60 / secs; x >= 10 { - return fmt.Sprintf("%v (%v/min)", v, x) +func (mgr *Manager) httpStats(w http.ResponseWriter, r *http.Request) { + data, err := stats.RenderHTML() + if err != nil { + log.Logf(0, "failed to execute template: %v", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return } - x := v * 60 * 60 / secs - return fmt.Sprintf("%v (%v/hour)", v, x) + w.Write(data) } func (mgr *Manager) httpCrash(w http.ResponseWriter, r *http.Request) { crashID := r.FormValue("id") - crash := readCrash(mgr.cfg.Workdir, crashID, nil, mgr.startTime, true) + crash := readCrash(mgr.cfg.Workdir, crashID, nil, mgr.firstConnect.Load(), true) if crash == nil { http.Error(w, "failed to read crash info", http.StatusInternalServerError) return @@ -621,7 +567,7 @@ func (mgr *Manager) collectCrashes(workdir string) ([]*UICrashType, error) { } var crashTypes []*UICrashType for _, dir := range dirs { - crash := readCrash(workdir, dir, repros, mgr.startTime, false) + crash := readCrash(workdir, dir, repros, mgr.firstConnect.Load(), false) if crash != nil { crashTypes = append(crashTypes, crash) } @@ -632,7 +578,7 @@ func (mgr *Manager) collectCrashes(workdir string) ([]*UICrashType, error) { return crashTypes, nil } -func readCrash(workdir, dir string, repros map[string]bool, start time.Time, full bool) *UICrashType { +func readCrash(workdir, dir string, repros map[string]bool, start int64, full bool) *UICrashType { if len(dir) != 40 { return nil } @@ -691,7 +637,7 @@ func readCrash(workdir, dir string, repros map[string]bool, start time.Time, ful crash.Log = filepath.Join("crashes", dir, "log"+index) if stat, err := os.Stat(filepath.Join(workdir, crash.Log)); err == nil { crash.Time = stat.ModTime() - crash.Active = crash.Time.After(start) + crash.Active = start != 0 && crash.Time.Unix() >= start } tag, _ := os.ReadFile(filepath.Join(crashdir, dir, "tag"+index)) crash.Tag = string(tag) @@ -709,7 +655,7 @@ func readCrash(workdir, dir string, repros map[string]bool, start time.Time, ful return &UICrashType{ Description: desc, LastTime: modTime, - Active: modTime.After(start), + Active: start != 0 && modTime.Unix() >= start, ID: dir, Count: len(crashes), Triaged: triaged, @@ -751,11 +697,13 @@ func trimNewLines(data []byte) []byte { } type UISummaryData struct { - Name string - Expert bool - Stats []UIStat - Crashes []*UICrashType - Log string + Name string + Revision string + RevisionLink string + Expert bool + Stats []UIStat + Crashes []*UICrashType + Log string } type UISyscallsData struct { @@ -786,6 +734,7 @@ type UICrash struct { type UIStat struct { Name string Value string + Hint string Link string } @@ -817,14 +766,16 @@ var summaryTemplate = pages.Create(` {{.Name }} syzkaller +[config] +{{.Revision}} {{if .Expert}}disable{{else}}enable{{end}} expert mode
- + {{range $s := $.Stats}} - +
Stats:Stats 📈
{{$s.Name}}{{$s.Name}} {{if $s.Link}} {{$s.Value}} diff --git a/syz-manager/hub.go b/syz-manager/hub.go index 04904664d..405ab3075 100644 --- a/syz-manager/hub.go +++ b/syz-manager/hub.go @@ -17,6 +17,7 @@ import ( "github.com/google/syzkaller/pkg/report" "github.com/google/syzkaller/pkg/report/crash" "github.com/google/syzkaller/pkg/rpctype" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/prog" ) @@ -41,13 +42,20 @@ func (mgr *Manager) hubSyncLoop(keyGet keyGetter) { mgr: mgr, cfg: mgr.cfg, target: mgr.target, - stats: mgr.stats, domain: mgr.cfg.TargetOS + "/" + mgr.cfg.HubDomain, enabledCalls: mgr.targetEnabledSyscalls, leak: mgr.checkResult.Features[host.FeatureLeak].Enabled, fresh: mgr.fresh, hubReproQueue: mgr.externalReproQueue, keyGet: keyGet, + + statSendProgAdd: stats.Create("hub send prog add", "", stats.Graph("hub progs")), + statSendProgDel: stats.Create("hub send prog del", "", stats.Graph("hub progs")), + statRecvProg: stats.Create("hub recv prog", "", stats.Graph("hub progs")), + statRecvProgDrop: stats.Create("hub recv prog drop", "", stats.NoGraph), + statSendRepro: stats.Create("hub send repro", "", stats.Graph("hub repros")), + statRecvRepro: stats.Create("hub recv repro", "", stats.Graph("hub repros")), + statRecvReproDrop: stats.Create("hub recv repro drop", "", stats.NoGraph), } if mgr.cfg.Reproduce && mgr.dash != nil { hc.needMoreRepros = mgr.needMoreRepros @@ -59,7 +67,6 @@ type HubConnector struct { mgr HubManagerView cfg *mgrconfig.Config target *prog.Target - stats *Stats domain string enabledCalls map[*prog.Syscall]bool leak bool @@ -69,6 +76,14 @@ type HubConnector struct { hubReproQueue chan *Crash needMoreRepros chan chan bool keyGet keyGetter + + statSendProgAdd *stats.Val + statSendProgDel *stats.Val + statRecvProg *stats.Val + statRecvProgDrop *stats.Val + statSendRepro *stats.Val + statRecvRepro *stats.Val + statRecvReproDrop *stats.Val } // HubManagerView restricts interface between HubConnector and Manager. @@ -196,13 +211,13 @@ func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus [][]byte) error { } minimized, smashed, progDropped := hc.processProgs(r.Inputs) reproDropped := hc.processRepros(r.Repros) - hc.stats.hubSendProgAdd.add(len(a.Add)) - hc.stats.hubSendProgDel.add(len(a.Del)) - hc.stats.hubSendRepro.add(len(a.Repros)) - hc.stats.hubRecvProg.add(len(r.Inputs) - progDropped) - hc.stats.hubRecvProgDrop.add(progDropped) - hc.stats.hubRecvRepro.add(len(r.Repros) - reproDropped) - hc.stats.hubRecvReproDrop.add(reproDropped) + hc.statSendProgAdd.Add(len(a.Add)) + hc.statSendProgDel.Add(len(a.Del)) + hc.statSendRepro.Add(len(a.Repros)) + hc.statRecvProg.Add(len(r.Inputs) - progDropped) + hc.statRecvProgDrop.Add(progDropped) + hc.statRecvRepro.Add(len(r.Repros) - reproDropped) + hc.statRecvReproDrop.Add(reproDropped) log.Logf(0, "hub sync: send: add %v, del %v, repros %v;"+ " recv: progs %v (min %v, smash %v), repros %v; more %v", len(a.Add), len(a.Del), len(a.Repros), diff --git a/syz-manager/manager.go b/syz-manager/manager.go index e7519b707..57e656034 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -36,6 +36,7 @@ import ( crash_pkg "github.com/google/syzkaller/pkg/report/crash" "github.com/google/syzkaller/pkg/repro" "github.com/google/syzkaller/pkg/rpctype" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/prog" "github.com/google/syzkaller/sys/targets" "github.com/google/syzkaller/vm" @@ -59,18 +60,13 @@ type Manager struct { corpusDB *db.DB corpusDBMu sync.Mutex // for concurrent operations on corpusDB corpusPreloaded chan bool - startTime time.Time - firstConnect time.Time - fuzzingTime time.Duration - stats *Stats + firstConnect atomic.Int64 // unix time, or 0 if not connected crashTypes map[string]bool vmStop chan bool checkResult *rpctype.CheckArgs fresh bool netCompression bool expertMode bool - numFuzzing uint32 - numReproducing uint32 nextInstanceID atomic.Uint64 dash *dashapi.Dashboard @@ -103,6 +99,8 @@ type Manager struct { afterTriageStatSent bool assetStorage *asset.Storage + + Stats } const ( @@ -178,8 +176,6 @@ func RunManager(cfg *mgrconfig.Config) { sysTarget: cfg.SysTarget, reporter: reporter, crashdir: crashdir, - startTime: time.Now(), - stats: &Stats{haveHub: cfg.HubClient != ""}, crashTypes: make(map[string]bool), disabledHashes: make(map[string]struct{}), memoryLeakFrames: make(map[string]bool), @@ -194,9 +190,9 @@ func RunManager(cfg *mgrconfig.Config) { saturatedCalls: make(map[string]bool), } + mgr.initStats() go mgr.preloadCorpus() - mgr.initStats() // Initializes prometheus variables. - mgr.initHTTP() // Creates HTTP server. + mgr.initHTTP() // Creates HTTP server. mgr.collectUsedFiles() go mgr.corpusInputHandler(corpusUpdates) @@ -224,26 +220,17 @@ func RunManager(cfg *mgrconfig.Config) { for lastTime := time.Now(); ; { time.Sleep(10 * time.Second) now := time.Now() - diff := now.Sub(lastTime) + diff := int(now.Sub(lastTime)) lastTime = now - mgr.mu.Lock() - if mgr.firstConnect.IsZero() { - mgr.mu.Unlock() + if mgr.firstConnect.Load() == 0 { continue } - mgr.fuzzingTime += diff * time.Duration(atomic.LoadUint32(&mgr.numFuzzing)) - mgr.mu.Unlock() - executed := mgr.stats.execTotal.get() - crashes := mgr.stats.crashes.get() - corpusCover := mgr.stats.corpusCover.get() - corpusSignal := mgr.stats.corpusSignal.get() - maxSignal := mgr.stats.maxSignal.get() - triageQLen := mgr.stats.triageQueueLen.get() - numReproducing := atomic.LoadUint32(&mgr.numReproducing) - numFuzzing := atomic.LoadUint32(&mgr.numFuzzing) - - log.Logf(0, "VMs %v, executed %v, cover %v, signal %v/%v, crashes %v, repro %v, triageQLen %v", - numFuzzing, executed, corpusCover, corpusSignal, maxSignal, crashes, numReproducing, triageQLen) + mgr.statFuzzingTime.Add(diff * mgr.statNumFuzzing.Val()) + buf := new(bytes.Buffer) + for _, stat := range stats.Collect(stats.Console) { + fmt.Fprintf(buf, "%v=%v ", stat.Name, stat.Value) + } + log.Logf(0, "%s", buf.String()) } }() @@ -275,20 +262,10 @@ func (mgr *Manager) initBench() { go func() { for { time.Sleep(time.Minute) - vals := mgr.stats.all() - mgr.mu.Lock() - if mgr.firstConnect.IsZero() { - mgr.mu.Unlock() - continue + vals := make(map[string]int) + for _, stat := range stats.Collect(stats.All) { + vals[stat.Name] = stat.V } - mgr.minimizeCorpusUnlocked() - stat := mgr.corpus.Stats() - vals["corpus"] = uint64(stat.Progs) - vals["uptime"] = uint64(time.Since(mgr.firstConnect)) / 1e9 - vals["fuzzing"] = uint64(mgr.fuzzingTime) / 1e9 - vals["candidates"] = uint64(mgr.fuzzer.Load().Stats().Candidates) - mgr.mu.Unlock() - data, err := json.MarshalIndent(vals, "", " ") if err != nil { log.Fatalf("failed to serialize bench data") @@ -361,7 +338,7 @@ func (mgr *Manager) vmLoop() { canRepro := func() bool { return phase >= phaseTriagedHub && len(reproQueue) != 0 && - (int(atomic.LoadUint32(&mgr.numReproducing))+1)*instancesPerRepro <= maxReproVMs + (mgr.statNumReproducing.Val()+1)*instancesPerRepro <= maxReproVMs } if shutdown != nil { @@ -374,7 +351,7 @@ func (mgr *Manager) vmLoop() { crash := reproQueue[last] reproQueue[last] = nil reproQueue = reproQueue[:last] - atomic.AddUint32(&mgr.numReproducing, 1) + mgr.statNumReproducing.Add(1) log.Logf(0, "loop: starting repro of '%v' on instances %+v", crash.Title, vmIndexes) go func() { reproDone <- mgr.runRepro(crash, vmIndexes, instances.Put) @@ -422,7 +399,7 @@ func (mgr *Manager) vmLoop() { } } case res := <-reproDone: - atomic.AddUint32(&mgr.numReproducing, ^uint32(0)) + mgr.statNumReproducing.Add(-1) crepro := false title := "" if res.repro != nil { @@ -820,8 +797,8 @@ func (mgr *Manager) runInstanceInner(index int, instanceName string) (*report.Re // Run the fuzzer binary. start := time.Now() - atomic.AddUint32(&mgr.numFuzzing, 1) - defer atomic.AddUint32(&mgr.numFuzzing, ^uint32(0)) + mgr.statNumFuzzing.Add(1) + defer mgr.statNumFuzzing.Add(-1) args := &instance.FuzzerCmdArgs{ Fuzzer: fuzzerBin, @@ -909,14 +886,14 @@ func (mgr *Manager) saveCrash(crash *Crash) bool { // Collect all of them into a single bucket so that it's possible to control and assess them, // e.g. if there are some spikes in suppressed reports. crash.Title = "suppressed report" - mgr.stats.crashSuppressed.inc() + mgr.statSuppressed.Add(1) } - mgr.stats.crashes.inc() + mgr.statCrashes.Add(1) mgr.mu.Lock() if !mgr.crashTypes[crash.Title] { mgr.crashTypes[crash.Title] = true - mgr.stats.crashTypes.inc() + mgr.statCrashTypes.Add(1) } mgr.mu.Unlock() @@ -1223,7 +1200,6 @@ func fullReproLog(stats *repro.Stats) []byte { func (mgr *Manager) corpusInputHandler(updates <-chan corpus.NewItemEvent) { for update := range updates { - mgr.stats.newInputs.inc() mgr.serv.updateFilteredCover(update.NewCover) if update.Exists { @@ -1268,12 +1244,12 @@ func (mgr *Manager) addNewCandidates(candidates []fuzzer.Candidate) { } func (mgr *Manager) minimizeCorpusUnlocked() { - currSize := mgr.corpus.Stats().Progs + currSize := mgr.corpus.StatProgs.Val() if mgr.phase < phaseLoadedCorpus || currSize <= mgr.lastMinCorpus*103/100 { return } mgr.corpus.Minimize(mgr.cfg.Cover) - newSize := mgr.corpus.Stats().Progs + newSize := mgr.corpus.StatProgs.Val() log.Logf(1, "minimized corpus: %v -> %v", currSize, newSize) mgr.lastMinCorpus = newSize @@ -1390,7 +1366,10 @@ func (mgr *Manager) machineChecked(a *rpctype.CheckArgs, enabledSyscalls map[*pr mgr.checkResult = a mgr.targetEnabledSyscalls = enabledSyscalls mgr.target.UpdateGlobs(a.GlobFiles) - mgr.firstConnect = time.Now() + mgr.firstConnect.Store(time.Now().Unix()) + statSyscalls := stats.Create("syscalls", "Number of enabled syscalls", + stats.Simple, stats.NoGraph, stats.Link("/syscalls")) + statSyscalls.Add(len(mgr.checkResult.EnabledCalls[mgr.cfg.Sandbox])) rnd := rand.New(rand.NewSource(time.Now().UnixNano())) calls := make(map[*prog.Syscall]bool) @@ -1439,7 +1418,7 @@ func (mgr *Manager) fuzzerSignalRotation(fuzzer *fuzzer.Fuzzer) { // 3000/60000 = 5%. execsBetweenRotates = 60000 ) - var lastExecTotal uint64 + lastExecTotal := 0 lastRotation := time.Now() for { time.Sleep(time.Minute * 5) @@ -1449,7 +1428,7 @@ func (mgr *Manager) fuzzerSignalRotation(fuzzer *fuzzer.Fuzzer) { if phase < phaseTriagedCorpus { continue } - if mgr.stats.execTotal.get()-lastExecTotal < execsBetweenRotates { + if mgr.statExecs.Val()-lastExecTotal < execsBetweenRotates { continue } if time.Since(lastRotation) < timeBetweenRotates { @@ -1457,7 +1436,7 @@ func (mgr *Manager) fuzzerSignalRotation(fuzzer *fuzzer.Fuzzer) { } fuzzer.RotateMaxSignal(rotateSignals) lastRotation = time.Now() - lastExecTotal = mgr.stats.execTotal.get() + lastExecTotal = mgr.statExecs.Val() } } @@ -1471,18 +1450,8 @@ func (mgr *Manager) fuzzerLoop(fuzzer *fuzzer.Fuzzer) { len(newSignal), len(dropSignal)) mgr.serv.distributeSignalDelta(newSignal, dropSignal) - // Collect statistics. - fuzzerStats := fuzzer.Stats() - mgr.stats.setNamed(fuzzerStats.Named) - mgr.stats.corpusCover.set(fuzzerStats.Cover) - mgr.stats.corpusSignal.set(fuzzerStats.Signal) - mgr.stats.maxSignal.set(fuzzerStats.MaxSignal) - mgr.stats.triageQueueLen.set(fuzzerStats.Candidates) - mgr.stats.fuzzerJobs.set(fuzzerStats.RunningJobs) - mgr.stats.rpcTraffic.add(int(mgr.serv.server.TotalBytes.Swap(0))) - // Update the state machine. - if fuzzerStats.Candidates == 0 { + if fuzzer.StatCandidates.Val() == 0 { mgr.mu.Lock() if mgr.phase == phaseLoadedCorpus { if mgr.cfg.HubClient != "" { @@ -1562,32 +1531,27 @@ func (mgr *Manager) dashboardReporter() { var lastCrashes, lastSuppressedCrashes, lastExecs uint64 for { time.Sleep(time.Minute) - mgr.mu.Lock() - if mgr.firstConnect.IsZero() { - mgr.mu.Unlock() + if mgr.firstConnect.Load() == 0 { continue } - crashes := mgr.stats.crashes.get() - suppressedCrashes := mgr.stats.crashSuppressed.get() - execs := mgr.stats.execTotal.get() - corpusStat := mgr.corpus.Stats() + mgr.mu.Lock() req := &dashapi.ManagerStatsReq{ Name: mgr.cfg.Name, Addr: webAddr, - UpTime: time.Since(mgr.firstConnect), - Corpus: uint64(corpusStat.Progs), - PCs: mgr.stats.corpusCover.get(), - Cover: mgr.stats.corpusSignal.get(), - CrashTypes: mgr.stats.crashTypes.get(), - FuzzingTime: mgr.fuzzingTime - lastFuzzingTime, - Crashes: crashes - lastCrashes, - SuppressedCrashes: suppressedCrashes - lastSuppressedCrashes, - Execs: execs - lastExecs, + UpTime: time.Duration(mgr.statUptime.Val()) * time.Second, + Corpus: uint64(mgr.corpus.StatProgs.Val()), + PCs: uint64(mgr.corpus.StatCover.Val()), + Cover: uint64(mgr.corpus.StatSignal.Val()), + CrashTypes: uint64(mgr.statCrashTypes.Val()), + FuzzingTime: time.Duration(mgr.statFuzzingTime.Val()) - lastFuzzingTime, + Crashes: uint64(mgr.statCrashes.Val()) - lastCrashes, + SuppressedCrashes: uint64(mgr.statSuppressed.Val()) - lastSuppressedCrashes, + Execs: uint64(mgr.statExecs.Val()) - lastExecs, } if mgr.phase >= phaseTriagedCorpus && !mgr.afterTriageStatSent { mgr.afterTriageStatSent = true - req.TriagedCoverage = mgr.stats.corpusSignal.get() - req.TriagedPCs = mgr.stats.corpusCover.get() + req.TriagedCoverage = uint64(mgr.corpus.StatSignal.Val()) + req.TriagedPCs = uint64(mgr.corpus.StatCover.Val()) } mgr.mu.Unlock() diff --git a/syz-manager/rpc.go b/syz-manager/rpc.go index 185b44d0f..c9fa4d28d 100644 --- a/syz-manager/rpc.go +++ b/syz-manager/rpc.go @@ -17,6 +17,7 @@ import ( "github.com/google/syzkaller/pkg/mgrconfig" "github.com/google/syzkaller/pkg/rpctype" "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/prog" ) @@ -28,7 +29,6 @@ type RPCServer struct { port int targetEnabledSyscalls map[*prog.Syscall]bool coverFilter map[uint32]uint32 - stats *Stats canonicalModules *cover.Canonicalizer mu sync.Mutex @@ -40,6 +40,13 @@ type RPCServer struct { // We did not finish these requests because of VM restarts. // They will be eventually given to other VMs. rescuedInputs []*fuzzer.Request + + statVMRestarts *stats.Val + statExchangeCalls *stats.Val + statExchangeProgs *stats.Val + statExchangeServerLatency *stats.Val + statExchangeClientLatency *stats.Val + statCorpusCoverFiltered *stats.Val } type Runner struct { @@ -70,9 +77,19 @@ type RPCManagerView interface { func startRPCServer(mgr *Manager) (*RPCServer, error) { serv := &RPCServer{ - mgr: mgr, - cfg: mgr.cfg, - stats: mgr.stats, + mgr: mgr, + cfg: mgr.cfg, + statVMRestarts: stats.Create("vm restarts", "Total number of VM starts", + stats.Rate{}, stats.NoGraph), + statExchangeCalls: stats.Create("exchange calls", "Number of RPC Exchange calls", + stats.Rate{}), + statExchangeProgs: stats.Create("exchange progs", "Test programs exchanged per RPC call", + stats.Distribution{}), + statExchangeServerLatency: stats.Create("exchange manager latency", + "Manager RPC Exchange call latency (us)", stats.Distribution{}), + statExchangeClientLatency: stats.Create("exchange fuzzer latency", + "End-to-end fuzzer RPC Exchange call latency (us)", stats.Distribution{}), + statCorpusCoverFiltered: stats.Create("filtered coverage", "", stats.NoGraph), } s, err := rpctype.NewRPCServer(mgr.cfg.RPC, "Manager", serv, mgr.netCompression) if err != nil { @@ -87,7 +104,7 @@ func startRPCServer(mgr *Manager) (*RPCServer, error) { func (serv *RPCServer) Connect(a *rpctype.ConnectArgs, r *rpctype.ConnectRes) error { log.Logf(1, "fuzzer %v connected", a.Name) - serv.stats.vmRestarts.inc() + serv.statVMRestarts.Add(1) serv.mu.Lock() if serv.canonicalModules == nil { @@ -218,7 +235,7 @@ func (serv *RPCServer) ExchangeInfo(a *rpctype.ExchangeInfoRequest, r *rpctype.E runner.doneRequest(result, fuzzer) } - serv.stats.mergeNamed(a.StatsDelta) + stats.Import(a.StatsDelta) runner.mu.Lock() // Let's transfer new max signal in portions. @@ -234,10 +251,10 @@ func (serv *RPCServer) ExchangeInfo(a *rpctype.ExchangeInfoRequest, r *rpctype.E log.Logf(2, "exchange with %s: %d done, %d new requests, %d new max signal, %d drop signal", a.Name, len(a.Results), len(r.Requests), len(r.NewMaxSignal), len(r.DropMaxSignal)) - serv.stats.rpcExchangeCalls.inc() - serv.stats.rpcExchangeProgs.add(a.NeedProgs) - serv.stats.rpcExchangeClientLatency.add(int(a.Latency)) - serv.stats.rpcExchangeServerLatency.add(int(time.Since(start).Nanoseconds())) + serv.statExchangeCalls.Add(1) + serv.statExchangeProgs.Add(a.NeedProgs) + serv.statExchangeClientLatency.Add(int(a.Latency.Microseconds())) + serv.statExchangeServerLatency.Add(int(time.Since(start).Microseconds())) return nil } @@ -256,7 +273,7 @@ func (serv *RPCServer) updateFilteredCover(pcs []uint32) error { filtered++ } } - serv.stats.corpusCoverFiltered.add(filtered) + serv.statCorpusCoverFiltered.Add(filtered) return nil } diff --git a/syz-manager/stats.go b/syz-manager/stats.go index 88db8ed17..7cb67d5ca 100644 --- a/syz-manager/stats.go +++ b/syz-manager/stats.go @@ -4,145 +4,71 @@ package main import ( - "sync" - "sync/atomic" + "fmt" + "runtime" + "time" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/google/syzkaller/pkg/stats" ) -type Stat uint64 - type Stats struct { - crashes Stat - crashTypes Stat - crashSuppressed Stat - vmRestarts Stat - newInputs Stat - execTotal Stat - rpcTraffic Stat - rpcExchangeCalls Stat - rpcExchangeProgs Stat - rpcExchangeServerLatency Stat - rpcExchangeClientLatency Stat - hubSendProgAdd Stat - hubSendProgDel Stat - hubSendRepro Stat - hubRecvProg Stat - hubRecvProgDrop Stat - hubRecvRepro Stat - hubRecvReproDrop Stat - corpusCover Stat - corpusCoverFiltered Stat - corpusSignal Stat - maxSignal Stat - triageQueueLen Stat - fuzzerJobs Stat - - mu sync.Mutex - namedStats map[string]uint64 - haveHub bool + statNumFuzzing *stats.Val + statNumReproducing *stats.Val + statExecs *stats.Val + statCrashes *stats.Val + statCrashTypes *stats.Val + statSuppressed *stats.Val + statUptime *stats.Val + statFuzzingTime *stats.Val } func (mgr *Manager) initStats() { - // Prometheus Instrumentation https://prometheus.io/docs/guides/go-application . - prometheus.Register(promauto.NewGaugeFunc(prometheus.GaugeOpts{ - Name: "syz_exec_total", - Help: "Total executions during current execution of syz-manager", - }, - func() float64 { return float64(mgr.stats.execTotal.get()) }, - )) - prometheus.Register(promauto.NewGaugeFunc(prometheus.GaugeOpts{ - Name: "syz_corpus_cover", - Help: "Corpus coverage during current execution of syz-manager", - }, - func() float64 { return float64(mgr.stats.corpusCover.get()) }, - )) - prometheus.Register(promauto.NewGaugeFunc(prometheus.GaugeOpts{ - Name: "syz_crash_total", - Help: "Count of crashes during current execution of syz-manager", - }, - func() float64 { return float64(mgr.stats.crashes.get()) }, - )) -} + mgr.statNumFuzzing = stats.Create("VMs", "Number of VMs that are currently fuzzing", + stats.Console, stats.NoGraph) + mgr.statNumReproducing = stats.Create("reproducing", "Number of crashes being reproduced", + stats.Console, stats.NoGraph) + mgr.statExecs = stats.Create("exec total", "Total test program executions", + stats.Console, stats.Rate{}, stats.Prometheus("syz_exec_total")) + mgr.statCrashes = stats.Create("crashes", "Total number of VM crashes", + stats.Simple, stats.Prometheus("syz_crash_total")) + mgr.statCrashTypes = stats.Create("crash types", "Number of unique crashes types", + stats.Simple, stats.NoGraph) + mgr.statSuppressed = stats.Create("suppressed", "Total number of suppressed VM crashes", + stats.Simple, stats.NoGraph) + mgr.statFuzzingTime = stats.Create("fuzzing", "Total fuzzing time in all VMs (seconds)", + stats.NoGraph, func(v int, period time.Duration) string { return fmt.Sprintf("%v sec", v/1e9) }) -func (stats *Stats) all() map[string]uint64 { - m := map[string]uint64{ - "crashes": stats.crashes.get(), - "crash types": stats.crashTypes.get(), - "suppressed": stats.crashSuppressed.get(), - "vm restarts": stats.vmRestarts.get(), - "new inputs": stats.newInputs.get(), - "exec total": stats.execTotal.get(), - "coverage": stats.corpusCover.get(), - "filtered coverage": stats.corpusCoverFiltered.get(), - "signal": stats.corpusSignal.get(), - "max signal": stats.maxSignal.get(), - "rpc traffic (MB)": stats.rpcTraffic.get() >> 20, - "fuzzer jobs": stats.fuzzerJobs.get(), - } - if exchanges := stats.rpcExchangeCalls.get(); exchanges != 0 { - m["exchange calls"] = exchanges - m["exchange progs"] = uint64(float64(stats.rpcExchangeProgs.get())/float64(exchanges) + 0.5) - m["exchange lat server (us)"] = stats.rpcExchangeServerLatency.get() / exchanges / 1e3 - m["exchange lat client (us)"] = stats.rpcExchangeClientLatency.get() / exchanges / 1e3 - } - if stats.haveHub { - m["hub: send prog add"] = stats.hubSendProgAdd.get() - m["hub: send prog del"] = stats.hubSendProgDel.get() - m["hub: send repro"] = stats.hubSendRepro.get() - m["hub: recv prog"] = stats.hubRecvProg.get() - m["hub: recv prog drop"] = stats.hubRecvProgDrop.get() - m["hub: recv repro"] = stats.hubRecvRepro.get() - m["hub: recv repro drop"] = stats.hubRecvReproDrop.get() - } - stats.mu.Lock() - defer stats.mu.Unlock() - for k, v := range stats.namedStats { - m[k] = v - } - return m -} + mgr.statUptime = stats.Create("uptime", "Total uptime (seconds)", stats.Simple, stats.NoGraph, + func() int { + firstConnect := mgr.firstConnect.Load() + if firstConnect == 0 { + return 0 + } + return int(time.Now().Unix() - firstConnect) + }, func(v int, period time.Duration) string { + return fmt.Sprintf("%v sec", v) + }) -func (stats *Stats) mergeNamed(named map[string]uint64) { - stats.mu.Lock() - defer stats.mu.Unlock() - if stats.namedStats == nil { - stats.namedStats = make(map[string]uint64) - } - for k, v := range named { - switch k { - case "exec total": - stats.execTotal.add(int(v)) - default: - stats.namedStats[k] += v - } - } -} - -func (stats *Stats) setNamed(named map[string]uint64) { - stats.mu.Lock() - defer stats.mu.Unlock() - if stats.namedStats == nil { - stats.namedStats = make(map[string]uint64) - } - for k, v := range named { - stats.namedStats[k] = v - } -} - -func (s *Stat) get() uint64 { - return atomic.LoadUint64((*uint64)(s)) -} - -func (s *Stat) inc() { - s.add(1) -} - -func (s *Stat) add(v int) { - atomic.AddUint64((*uint64)(s), uint64(v)) -} + stats.Create("heap", "Process heap size (bytes)", stats.Graph("memory"), + func() int { + var ms runtime.MemStats + runtime.ReadMemStats(&ms) + return int(ms.Alloc) + }, func(v int, period time.Duration) string { + return fmt.Sprintf("%v MB", v>>20) + }) + stats.Create("VM", "Process VM size (bytes)", stats.Graph("memory"), + func() int { + var ms runtime.MemStats + runtime.ReadMemStats(&ms) + return int(ms.Sys - ms.HeapReleased) + }, func(v int, period time.Duration) string { + return fmt.Sprintf("%v MB", v>>20) + }) -func (s *Stat) set(v int) { - atomic.StoreUint64((*uint64)(s), uint64(v)) + // Stats imported from the fuzzer (names must match the the fuzzer names). + stats.Create("executor restarts", "Number of times executor process was restarted", stats.Rate{}) + stats.Create("buffer too small", "Program serialization overflowed exec buffer", stats.NoGraph) + stats.Create("no exec requests", "Number of times fuzzer was stalled with no exec requests", stats.Rate{}) + stats.Create("no exec duration", "Total duration fuzzer was stalled with no exec requests (ns/sec)", stats.Rate{}) } -- cgit mrf-deployment