From 1be1a06281dccada078a2a51e8b483811af8f596 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 29 Mar 2024 15:02:10 +0100 Subject: all: refactor stats Add ability for each package to create and export own stats. Each stat is self-contained, describes how it should be presented, and there is not need to copy them from one package to another. Stats also keep historical data and allow building graphs over time. --- pkg/corpus/corpus.go | 30 ++- pkg/corpus/corpus_test.go | 11 +- pkg/fuzzer/cover.go | 20 +- pkg/fuzzer/fuzzer.go | 43 ++-- pkg/fuzzer/fuzzer_test.go | 19 +- pkg/fuzzer/job.go | 43 ++-- pkg/fuzzer/job_test.go | 4 +- pkg/fuzzer/stats.go | 81 ++++---- pkg/html/pages/style.css | 8 +- pkg/ipc/ipc.go | 14 +- pkg/rpctype/rpc.go | 18 +- pkg/stats/set.go | 499 ++++++++++++++++++++++++++++++++++++++++++++++ pkg/stats/set_test.go | 219 ++++++++++++++++++++ 13 files changed, 866 insertions(+), 143 deletions(-) create mode 100644 pkg/stats/set.go create mode 100644 pkg/stats/set_test.go (limited to 'pkg') diff --git a/pkg/corpus/corpus.go b/pkg/corpus/corpus.go index 19dfee483..46e388366 100644 --- a/pkg/corpus/corpus.go +++ b/pkg/corpus/corpus.go @@ -10,6 +10,7 @@ import ( "github.com/google/syzkaller/pkg/cover" "github.com/google/syzkaller/pkg/hash" "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/prog" ) @@ -23,6 +24,9 @@ type Corpus struct { cover cover.Cover // total coverage of all items updates chan<- NewItemEvent *ProgramsList + StatProgs *stats.Val + StatSignal *stats.Val + StatCover *stats.Val } func NewCorpus(ctx context.Context) *Corpus { @@ -30,12 +34,19 @@ func NewCorpus(ctx context.Context) *Corpus { } func NewMonitoredCorpus(ctx context.Context, updates chan<- NewItemEvent) *Corpus { - return &Corpus{ + corpus := &Corpus{ ctx: ctx, progs: make(map[string]*Item), updates: updates, ProgramsList: &ProgramsList{}, } + corpus.StatProgs = stats.Create("corpus", "Number of test programs in the corpus", stats.Console, + stats.Link("/corpus"), stats.Graph("corpus"), stats.LenOf(&corpus.progs, &corpus.mu)) + corpus.StatSignal = stats.Create("signal", "Fuzzing signal in the corpus", + stats.LenOf(&corpus.signal, &corpus.mu)) + corpus.StatCover = stats.Create("coverage", "Source coverage in the corpus", stats.Console, + stats.Link("/cover"), stats.Prometheus("syz_corpus_cover"), stats.LenOf(&corpus.cover, &corpus.mu)) + return corpus } // It may happen that a single program is relevant because of several @@ -161,23 +172,6 @@ func (corpus *Corpus) Item(sig string) *Item { return corpus.progs[sig] } -// Stats is a snapshot of the relevant current state figures. -type Stats struct { - Progs int - Signal int - Cover int -} - -func (corpus *Corpus) Stats() Stats { - corpus.mu.RLock() - defer corpus.mu.RUnlock() - return Stats{ - Progs: len(corpus.progs), - Signal: len(corpus.signal), - Cover: len(corpus.cover), - } -} - type CallCov struct { Count int Cover cover.Cover diff --git a/pkg/corpus/corpus_test.go b/pkg/corpus/corpus_test.go index 0b5a85505..62aad1e04 100644 --- a/pkg/corpus/corpus_test.go +++ b/pkg/corpus/corpus_test.go @@ -48,12 +48,9 @@ func TestCorpusOperation(t *testing.T) { } // Verify the total signal. - stat := corpus.Stats() - assert.Equal(t, Stats{ - Signal: 5, - Cover: 0, - Progs: 2, - }, stat) + assert.Equal(t, corpus.StatSignal.Val(), 5) + assert.Equal(t, corpus.StatCover.Val(), 0) + assert.Equal(t, corpus.StatProgs.Val(), 2) corpus.Minimize(true) } @@ -77,7 +74,7 @@ func TestCorpusCoverage(t *testing.T) { assert.Equal(t, []uint32{12}, event.NewCover) // Check the total corpus size. - assert.Equal(t, corpus.Stats().Cover, 3) + assert.Equal(t, corpus.StatCover.Val(), 3) } func TestCorpusSaveConcurrency(t *testing.T) { diff --git a/pkg/fuzzer/cover.go b/pkg/fuzzer/cover.go index 31d1fee1b..03580128d 100644 --- a/pkg/fuzzer/cover.go +++ b/pkg/fuzzer/cover.go @@ -7,6 +7,7 @@ import ( "sync" "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/pkg/stats" ) // Cover keeps track of the signal known to the fuzzer. @@ -17,6 +18,13 @@ type Cover struct { dropSignal signal.Signal // the newly dropped max signal } +func newCover() *Cover { + cover := new(Cover) + stats.Create("max signal", "Maximum fuzzing signal (including flakes)", + stats.Graph("signal"), stats.LenOf(&cover.maxSignal, &cover.mu)) + return cover +} + // Signal that should no longer be chased after. // It is not returned in GrabSignalDelta(). func (cover *Cover) AddMaxSignal(sign signal.Signal) { @@ -61,18 +69,6 @@ func (cover *Cover) GrabSignalDelta() (plus, minus signal.Signal) { return } -type CoverStats struct { - MaxSignal int -} - -func (cover *Cover) Stats() CoverStats { - cover.mu.RLock() - defer cover.mu.RUnlock() - return CoverStats{ - MaxSignal: len(cover.maxSignal), - } -} - func (cover *Cover) subtract(delta signal.Signal) { cover.mu.Lock() defer cover.mu.Unlock() diff --git a/pkg/fuzzer/fuzzer.go b/pkg/fuzzer/fuzzer.go index 6a1df6ba6..6ed79aa9a 100644 --- a/pkg/fuzzer/fuzzer.go +++ b/pkg/fuzzer/fuzzer.go @@ -16,10 +16,12 @@ import ( "github.com/google/syzkaller/pkg/ipc" "github.com/google/syzkaller/pkg/rpctype" "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/prog" ) type Fuzzer struct { + Stats Config *Config Cover *Cover @@ -36,9 +38,6 @@ type Fuzzer struct { nextExec *priorityQueue[*Request] nextJobID atomic.Int64 - - runningJobs atomic.Int64 - queuedCandidates atomic.Int64 } func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand, @@ -49,8 +48,9 @@ func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand, } } f := &Fuzzer{ + Stats: newStats(), Config: cfg, - Cover: &Cover{}, + Cover: newCover(), ctx: ctx, stats: map[string]uint64{}, @@ -94,7 +94,7 @@ type Request struct { SignalFilter signal.Signal // If specified, the resulting signal MAY be a subset of it. // Fields that are only relevant within pkg/fuzzer. flags ProgTypes - stat string + stat *stats.Val resultC chan *Result } @@ -117,10 +117,10 @@ func (fuzzer *Fuzzer) Done(req *Request, res *Result) { if req.resultC != nil { req.resultC <- res } - // Update stats. - fuzzer.mu.Lock() - fuzzer.stats[req.stat]++ - fuzzer.mu.Unlock() + if res.Info != nil { + fuzzer.statExecTime.Add(int(res.Info.Elapsed.Milliseconds())) + } + req.stat.Add(1) } func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int, @@ -140,7 +140,7 @@ func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int, return } fuzzer.Logf(2, "found new signal in call %d in %s", call, p) - fuzzer.startJob(&triageJob{ + fuzzer.startJob(fuzzer.statJobsTriage, &triageJob{ p: p.Clone(), call: call, info: *info, @@ -171,10 +171,8 @@ type Candidate struct { func (fuzzer *Fuzzer) NextInput() *Request { req := fuzzer.nextInput() - if req.stat == statCandidate { - if fuzzer.queuedCandidates.Add(-1) < 0 { - panic("queuedCandidates is out of sync") - } + if req.stat == fuzzer.statExecCandidate { + fuzzer.StatCandidates.Add(-1) } return req } @@ -209,7 +207,7 @@ func (fuzzer *Fuzzer) nextInput() *Request { return genProgRequest(fuzzer, rnd) } -func (fuzzer *Fuzzer) startJob(newJob job) { +func (fuzzer *Fuzzer) startJob(stat *stats.Val, newJob job) { fuzzer.Logf(2, "started %T", newJob) if impl, ok := newJob.(jobSaveID); ok { // E.g. for big and slow hint jobs, we would prefer not to serialize them, @@ -217,9 +215,11 @@ func (fuzzer *Fuzzer) startJob(newJob job) { impl.saveID(-fuzzer.nextJobID.Add(1)) } go func() { - fuzzer.runningJobs.Add(1) + stat.Add(1) + fuzzer.statJobs.Add(1) newJob.run(fuzzer) - fuzzer.runningJobs.Add(-1) + fuzzer.statJobs.Add(-1) + stat.Add(-1) }() } @@ -231,9 +231,9 @@ func (fuzzer *Fuzzer) Logf(level int, msg string, args ...interface{}) { } func (fuzzer *Fuzzer) AddCandidates(candidates []Candidate) { - fuzzer.queuedCandidates.Add(int64(len(candidates))) + fuzzer.StatCandidates.Add(len(candidates)) for _, candidate := range candidates { - fuzzer.pushExec(candidateRequest(candidate), priority{candidatePrio}) + fuzzer.pushExec(candidateRequest(fuzzer, candidate), priority{candidatePrio}) } } @@ -248,9 +248,6 @@ func (fuzzer *Fuzzer) nextRand() int64 { } func (fuzzer *Fuzzer) pushExec(req *Request, prio priority) { - if req.stat == "" { - panic("Request.Stat field must be set") - } if req.NeedHints && (req.NeedCover || req.NeedSignal != rpctype.NoSignal) { panic("Request.NeedHints is mutually exclusive with other fields") } @@ -327,7 +324,7 @@ func (fuzzer *Fuzzer) logCurrentStats() { runtime.ReadMemStats(&m) str := fmt.Sprintf("exec queue size: %d, running jobs: %d, heap (MB): %d", - fuzzer.nextExec.Len(), fuzzer.runningJobs.Load(), m.Alloc/1000/1000) + fuzzer.nextExec.Len(), fuzzer.statJobs.Val(), m.Alloc/1000/1000) fuzzer.Logf(0, "%s", str) } } diff --git a/pkg/fuzzer/fuzzer_test.go b/pkg/fuzzer/fuzzer_test.go index 0f7a0ae58..87068064f 100644 --- a/pkg/fuzzer/fuzzer_test.go +++ b/pkg/fuzzer/fuzzer_test.go @@ -159,15 +159,13 @@ func TestRotate(t *testing.T) { }) fuzzer.Cover.AddMaxSignal(fakeSignal(1000)) - stats := fuzzer.Stats() - assert.Equal(t, 1000, stats.MaxSignal) - assert.Equal(t, 100, stats.Signal) + assert.Equal(t, 1000, len(fuzzer.Cover.maxSignal)) + assert.Equal(t, 100, corpusObj.StatSignal.Val()) // Rotate some of the signal. fuzzer.RotateMaxSignal(200) - stats = fuzzer.Stats() - assert.Equal(t, 800, stats.MaxSignal) - assert.Equal(t, 100, stats.Signal) + assert.Equal(t, 800, len(fuzzer.Cover.maxSignal)) + assert.Equal(t, 100, corpusObj.StatSignal.Val()) plus, minus := fuzzer.Cover.GrabSignalDelta() assert.Equal(t, 0, plus.Len()) @@ -175,9 +173,8 @@ func TestRotate(t *testing.T) { // Rotate the rest. fuzzer.RotateMaxSignal(1000) - stats = fuzzer.Stats() - assert.Equal(t, 100, stats.MaxSignal) - assert.Equal(t, 100, stats.Signal) + assert.Equal(t, 100, len(fuzzer.Cover.maxSignal)) + assert.Equal(t, 100, corpusObj.StatSignal.Val()) plus, minus = fuzzer.Cover.GrabSignalDelta() assert.Equal(t, 0, plus.Len()) assert.Equal(t, 700, minus.Len()) @@ -230,9 +227,9 @@ func (f *testFuzzer) oneMore() bool { defer f.mu.Unlock() f.iter++ if f.iter%100 == 0 { - stat := f.fuzzer.Stats() f.t.Logf(": corpus %d, signal %d, max signal %d, crash types %d, running jobs %d", - f.iter, stat.Progs, stat.Signal, stat.MaxSignal, len(f.crashes), stat.RunningJobs) + f.iter, f.fuzzer.Config.Corpus.StatProgs.Val(), f.fuzzer.Config.Corpus.StatSignal.Val(), + len(f.fuzzer.Cover.maxSignal), len(f.crashes), f.fuzzer.statJobs.Val()) } return f.iter < f.iterLimit && (f.expectedCrashes == nil || len(f.crashes) != len(f.expectedCrashes)) diff --git a/pkg/fuzzer/job.go b/pkg/fuzzer/job.go index e287b9599..2086016bf 100644 --- a/pkg/fuzzer/job.go +++ b/pkg/fuzzer/job.go @@ -12,6 +12,7 @@ import ( "github.com/google/syzkaller/pkg/ipc" "github.com/google/syzkaller/pkg/rpctype" "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/pkg/stats" "github.com/google/syzkaller/prog" ) @@ -71,7 +72,7 @@ func genProgRequest(fuzzer *Fuzzer, rnd *rand.Rand) *Request { return &Request{ Prog: p, NeedSignal: rpctype.NewSignal, - stat: statGenerate, + stat: fuzzer.statExecGenerate, } } @@ -90,11 +91,11 @@ func mutateProgRequest(fuzzer *Fuzzer, rnd *rand.Rand) *Request { return &Request{ Prog: newP, NeedSignal: rpctype.NewSignal, - stat: statFuzz, + stat: fuzzer.statExecFuzz, } } -func candidateRequest(input Candidate) *Request { +func candidateRequest(fuzzer *Fuzzer, input Candidate) *Request { flags := progCandidate if input.Minimized { flags |= progMinimized @@ -105,7 +106,7 @@ func candidateRequest(input Candidate) *Request { return &Request{ Prog: input.Prog, NeedSignal: rpctype.NewSignal, - stat: statCandidate, + stat: fuzzer.statExecCandidate, flags: flags, } } @@ -131,10 +132,11 @@ func triageJobPrio(flags ProgTypes) jobPriority { } func (job *triageJob) run(fuzzer *Fuzzer) { + fuzzer.statNewInputs.Add(1) callName := fmt.Sprintf("call #%v %v", job.call, job.p.CallName(job.call)) fuzzer.Logf(3, "triaging input for %v (new signal=%v)", callName, job.newSignal.Len()) // Compute input coverage and non-flaky signal for minimization. - info, stop := job.deflake(fuzzer.exec, fuzzer.Config.FetchRawCover) + info, stop := job.deflake(fuzzer.exec, fuzzer.statExecTriage, fuzzer.Config.FetchRawCover) if stop || info.newStableSignal.Empty() { return } @@ -149,7 +151,7 @@ func (job *triageJob) run(fuzzer *Fuzzer) { } fuzzer.Logf(2, "added new input for %v to the corpus: %s", callName, job.p) if job.flags&progSmashed == 0 { - fuzzer.startJob(&smashJob{ + fuzzer.startJob(fuzzer.statJobsSmash, &smashJob{ p: job.p.Clone(), call: job.call, }) @@ -171,7 +173,8 @@ type deflakedCover struct { rawCover []uint32 } -func (job *triageJob) deflake(exec func(job, *Request) *Result, rawCover bool) (info deflakedCover, stop bool) { +func (job *triageJob) deflake(exec func(job, *Request) *Result, stat *stats.Val, rawCover bool) ( + info deflakedCover, stop bool) { // As demonstrated in #4639, programs reproduce with a very high, but not 100% probability. // The triage algorithm must tolerate this, so let's pick the signal that is common // to 3 out of 5 runs. @@ -196,7 +199,7 @@ func (job *triageJob) deflake(exec func(job, *Request) *Result, rawCover bool) ( NeedSignal: rpctype.AllSignal, NeedCover: true, NeedRawCover: rawCover, - stat: statTriage, + stat: stat, flags: progInTriage, }) if result.Stop { @@ -236,7 +239,7 @@ func (job *triageJob) minimize(fuzzer *Fuzzer, newSignal signal.Signal) (stop bo Prog: p1, NeedSignal: rpctype.AllSignal, SignalFilter: newSignal, - stat: statMinimize, + stat: fuzzer.statExecMinimize, }) if result.Stop { stop = true @@ -292,7 +295,10 @@ func (job *smashJob) priority() priority { func (job *smashJob) run(fuzzer *Fuzzer) { fuzzer.Logf(2, "smashing the program %s (call=%d):", job.p, job.call) if fuzzer.Config.Comparisons && job.call >= 0 { - fuzzer.startJob(newHintsJob(job.p.Clone(), job.call)) + fuzzer.startJob(fuzzer.statJobsHints, &hintsJob{ + p: job.p.Clone(), + call: job.call, + }) } const iters = 75 @@ -306,7 +312,7 @@ func (job *smashJob) run(fuzzer *Fuzzer) { result := fuzzer.exec(job, &Request{ Prog: p, NeedSignal: rpctype.NewSignal, - stat: statSmash, + stat: fuzzer.statExecSmash, }) if result.Stop { return @@ -314,7 +320,7 @@ func (job *smashJob) run(fuzzer *Fuzzer) { if fuzzer.Config.Collide { result := fuzzer.exec(job, &Request{ Prog: randomCollide(p, rnd), - stat: statCollide, + stat: fuzzer.statExecCollide, }) if result.Stop { return @@ -356,7 +362,7 @@ func (job *smashJob) faultInjection(fuzzer *Fuzzer) { newProg.Calls[job.call].Props.FailNth = nth result := fuzzer.exec(job, &Request{ Prog: job.p, - stat: statSmash, + stat: fuzzer.statExecSmash, }) if result.Stop { return @@ -374,13 +380,6 @@ type hintsJob struct { call int } -func newHintsJob(p *prog.Prog, call int) *hintsJob { - return &hintsJob{ - p: p, - call: call, - } -} - func (job *hintsJob) priority() priority { return priority{smashPrio} } @@ -391,7 +390,7 @@ func (job *hintsJob) run(fuzzer *Fuzzer) { result := fuzzer.exec(job, &Request{ Prog: p, NeedHints: true, - stat: statSeed, + stat: fuzzer.statExecSeed, }) if result.Stop || result.Info == nil { return @@ -404,7 +403,7 @@ func (job *hintsJob) run(fuzzer *Fuzzer) { result := fuzzer.exec(job, &Request{ Prog: p, NeedSignal: rpctype.NewSignal, - stat: statHint, + stat: fuzzer.statExecHint, }) return !result.Stop }) diff --git a/pkg/fuzzer/job_test.go b/pkg/fuzzer/job_test.go index d9f7873dc..70efc0bb6 100644 --- a/pkg/fuzzer/job_test.go +++ b/pkg/fuzzer/job_test.go @@ -32,7 +32,7 @@ func TestDeflakeFail(t *testing.T) { run++ // For first, we return 0 and 1. For second, 1 and 2. And so on. return fakeResult(0, []uint32{uint32(run), uint32(run + 1)}, []uint32{10, 20}) - }, false) + }, nil, false) assert.False(t, stop) assert.Equal(t, 5, run) assert.Empty(t, ret.stableSignal.ToRaw()) @@ -69,7 +69,7 @@ func TestDeflakeSuccess(t *testing.T) { // We expect it to have finished earlier. t.Fatal("only 4 runs were expected") return nil - }, false) + }, nil, false) assert.False(t, stop) // Cover is a union of all coverages. assert.ElementsMatch(t, []uint32{10, 20, 30, 40}, ret.cover.Serialize()) diff --git a/pkg/fuzzer/stats.go b/pkg/fuzzer/stats.go index 044febc64..38bef0405 100644 --- a/pkg/fuzzer/stats.go +++ b/pkg/fuzzer/stats.go @@ -3,44 +3,55 @@ package fuzzer -import "github.com/google/syzkaller/pkg/corpus" - -const ( - statGenerate = "exec gen" - statFuzz = "exec fuzz" - statCandidate = "exec candidate" - statTriage = "exec triage" - statMinimize = "exec minimize" - statSmash = "exec smash" - statHint = "exec hints" - statSeed = "exec seeds" - statCollide = "exec collide" - statExecTotal = "exec total" - statBufferTooSmall = "buffer too small" -) +import "github.com/google/syzkaller/pkg/stats" type Stats struct { - CoverStats - corpus.Stats - Candidates int - RunningJobs int - // Let's keep stats in Named as long as the rest of the code does not depend - // on their specific values. - Named map[string]uint64 + StatCandidates *stats.Val + statNewInputs *stats.Val + statJobs *stats.Val + statJobsTriage *stats.Val + statJobsSmash *stats.Val + statJobsHints *stats.Val + statExecTime *stats.Val + statExecGenerate *stats.Val + statExecFuzz *stats.Val + statExecCandidate *stats.Val + statExecTriage *stats.Val + statExecMinimize *stats.Val + statExecSmash *stats.Val + statExecHint *stats.Val + statExecSeed *stats.Val + statExecCollide *stats.Val } -func (fuzzer *Fuzzer) Stats() Stats { - ret := Stats{ - CoverStats: fuzzer.Cover.Stats(), - Stats: fuzzer.Config.Corpus.Stats(), - Candidates: int(fuzzer.queuedCandidates.Load()), - RunningJobs: int(fuzzer.runningJobs.Load()), - Named: make(map[string]uint64), - } - fuzzer.mu.Lock() - defer fuzzer.mu.Unlock() - for k, v := range fuzzer.stats { - ret.Named[k] = v +func newStats() Stats { + return Stats{ + StatCandidates: stats.Create("candidates", "Number of candidate programs in triage queue", + stats.Graph("corpus")), + statNewInputs: stats.Create("new inputs", "Potential untriaged corpus candidates", + stats.Graph("corpus")), + statJobs: stats.Create("fuzzer jobs", "Total running fuzzer jobs", stats.NoGraph), + statJobsTriage: stats.Create("triage jobs", "Running triage jobs", stats.StackedGraph("jobs")), + statJobsSmash: stats.Create("smash jobs", "Running smash jobs", stats.StackedGraph("jobs")), + statJobsHints: stats.Create("hints jobs", "Running hints jobs", stats.StackedGraph("jobs")), + statExecTime: stats.Create("prog exec time", "Test program execution time (ms)", stats.Distribution{}), + statExecGenerate: stats.Create("exec gen", "Executions of generated programs", stats.Rate{}, + stats.StackedGraph("exec")), + statExecFuzz: stats.Create("exec fuzz", "Executions of mutated programs", + stats.Rate{}, stats.StackedGraph("exec")), + statExecCandidate: stats.Create("exec candidate", "Executions of candidate programs", + stats.Rate{}, stats.StackedGraph("exec")), + statExecTriage: stats.Create("exec triage", "Executions of corpus triage programs", + stats.Rate{}, stats.StackedGraph("exec")), + statExecMinimize: stats.Create("exec minimize", "Executions of programs during minimization", + stats.Rate{}, stats.StackedGraph("exec")), + statExecSmash: stats.Create("exec smash", "Executions of smashed programs", + stats.Rate{}, stats.StackedGraph("exec")), + statExecHint: stats.Create("exec hints", "Executions of programs generated using hints", + stats.Rate{}, stats.StackedGraph("exec")), + statExecSeed: stats.Create("exec seeds", "Executions of programs for hints extraction", + stats.Rate{}, stats.StackedGraph("exec")), + statExecCollide: stats.Create("exec collide", "Executions of programs in collide mode", + stats.Rate{}, stats.StackedGraph("exec")), } - return ret } diff --git a/pkg/html/pages/style.css b/pkg/html/pages/style.css index 156bf61ba..9ec565616 100644 --- a/pkg/html/pages/style.css +++ b/pkg/html/pages/style.css @@ -180,14 +180,14 @@ table td, table th { } .list_table .stat_name { - width: 150pt; - max-width: 150pt; + width: 180pt; + max-width: 180pt; font-family: monospace; } .list_table .stat_value { - width: 120pt; - max-width: 120pt; + width: 200pt; + max-width: 200pt; font-family: monospace; } diff --git a/pkg/ipc/ipc.go b/pkg/ipc/ipc.go index c90e56caf..f0aeb2b41 100644 --- a/pkg/ipc/ipc.go +++ b/pkg/ipc/ipc.go @@ -95,8 +95,9 @@ type CallInfo struct { } type ProgInfo struct { - Calls []CallInfo - Extra CallInfo // stores Signal and Cover collected from background threads + Calls []CallInfo + Extra CallInfo // stores Signal and Cover collected from background threads + Elapsed time.Duration // total execution time of the program } type Env struct { @@ -275,7 +276,9 @@ func (env *Env) Exec(opts *ExecOpts, p *prog.Prog) (output []byte, info *ProgInf return } + start := osutil.MonotonicNano() output, hanged, err0 = env.cmd.exec(opts, progData) + elapsed := osutil.MonotonicNano() - start if err0 != nil { env.cmd.close() env.cmd = nil @@ -283,8 +286,11 @@ func (env *Env) Exec(opts *ExecOpts, p *prog.Prog) (output []byte, info *ProgInf } info, err0 = env.parseOutput(p, opts) - if info != nil && env.config.Flags&FlagSignal == 0 { - addFallbackSignal(p, info) + if info != nil { + info.Elapsed = elapsed + if env.config.Flags&FlagSignal == 0 { + addFallbackSignal(p, info) + } } if !env.config.UseForkServer { env.cmd.close() diff --git a/pkg/rpctype/rpc.go b/pkg/rpctype/rpc.go index 01f0f1b02..b90e13f08 100644 --- a/pkg/rpctype/rpc.go +++ b/pkg/rpctype/rpc.go @@ -10,18 +10,18 @@ import ( "net" "net/rpc" "os" - "sync/atomic" "time" "github.com/google/syzkaller/pkg/log" + "github.com/google/syzkaller/pkg/stats" ) type RPCServer struct { - TotalBytes atomic.Uint64 // total compressed bytes transmitted - ln net.Listener s *rpc.Server useCompression bool + statSent *stats.Val + statRecv *stats.Val } func NewRPCServer(addr, name string, receiver interface{}, useCompression bool) (*RPCServer, error) { @@ -33,10 +33,18 @@ func NewRPCServer(addr, name string, receiver interface{}, useCompression bool) if err := s.RegisterName(name, receiver); err != nil { return nil, err } + formatMB := func(v int, period time.Duration) string { + const KB, MB = 1 << 10, 1 << 20 + return fmt.Sprintf("%v MB (%v kb/sec)", (v+MB/2)/MB, (v+KB/2)/KB/int(period/time.Second)) + } serv := &RPCServer{ ln: ln, s: s, useCompression: useCompression, + statSent: stats.Create("rpc sent", "Uncompressed outbound RPC traffic", + stats.Graph("RPC traffic"), stats.Rate{}, formatMB), + statRecv: stats.Create("rpc recv", "Uncompressed inbound RPC traffic", + stats.Graph("RPC traffic"), stats.Rate{}, formatMB), } return serv, nil } @@ -178,12 +186,12 @@ func newCountedConn(server *RPCServer, func (cc countedConn) Read(p []byte) (n int, err error) { n, err = cc.ReadWriteCloser.Read(p) - cc.server.TotalBytes.Add(uint64(n)) + cc.server.statRecv.Add(n) return } func (cc countedConn) Write(b []byte) (n int, err error) { n, err = cc.ReadWriteCloser.Write(b) - cc.server.TotalBytes.Add(uint64(n)) + cc.server.statSent.Add(n) return } diff --git a/pkg/stats/set.go b/pkg/stats/set.go new file mode 100644 index 000000000..54e44e457 --- /dev/null +++ b/pkg/stats/set.go @@ -0,0 +1,499 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package stats + +import ( + "bytes" + "fmt" + "reflect" + "sort" + "strconv" + "sync" + "sync/atomic" + "time" + + "github.com/bsm/histogram/v3" + "github.com/google/syzkaller/pkg/html/pages" + "github.com/prometheus/client_golang/prometheus" +) + +// This file provides prometheus/streamz style metrics (Val type) for instrumenting code for monitoring. +// It also provides a registry for such metrics (set type) and a global default registry. +// +// Simple uses of metrics: +// +// statFoo := stats.Create("metric name", "metric description") +// statFoo.Add(1) +// +// stats.Create("metric name", "metric description", LenOf(mySlice, rwMutex)) +// +// Metric visualization code uses Collect/RenderHTML functions to obtain values of all registered metrics. + +type UI struct { + Name string + Desc string + Link string + Level Level + Value string + V int +} + +func Create(name, desc string, opts ...any) *Val { + return global.Create(name, desc, opts...) +} + +func Collect(level Level) []UI { + return global.Collect(level) +} + +func RenderHTML() ([]byte, error) { + return global.RenderHTML() +} + +func Import(named map[string]uint64) { + global.Import(named) +} + +var global = newSet(256, true) + +type set struct { + mu sync.Mutex + vals map[string]*Val + graphs map[string]*graph + totalTicks int + historySize int + historyTicks int + historyPos int + historyScale int +} + +type graph struct { + level Level + stacked bool + lines map[string]*line +} + +type line struct { + desc string + rate bool + data []float64 + hist []*histogram.Histogram +} + +const ( + tickPeriod = time.Second + histogramBuckets = 255 +) + +func newSet(histSize int, tick bool) *set { + s := &set{ + vals: make(map[string]*Val), + historySize: histSize, + historyScale: 1, + graphs: make(map[string]*graph), + } + if tick { + go func() { + for range time.NewTicker(tickPeriod).C { + s.tick() + } + }() + } + return s +} + +func (s *set) Collect(level Level) []UI { + s.mu.Lock() + defer s.mu.Unlock() + period := time.Duration(s.totalTicks) * tickPeriod + if period == 0 { + period = tickPeriod + } + var res []UI + for _, v := range s.vals { + if v.level < level { + continue + } + val := v.Val() + res = append(res, UI{ + Name: v.name, + Desc: v.desc, + Link: v.link, + Level: v.level, + Value: v.fmt(val, period), + V: val, + }) + } + sort.Slice(res, func(i, j int) bool { + if res[i].Level != res[j].Level { + return res[i].Level > res[j].Level + } + return res[i].Name < res[j].Name + }) + return res +} + +func (s *set) Import(named map[string]uint64) { + s.mu.Lock() + defer s.mu.Unlock() + for name, val := range named { + v := s.vals[name] + if v == nil { + panic(fmt.Sprintf("imported stat %v is missing", name)) + } + v.Add(int(val)) + } +} + +// Additional options for Val metrics. + +// Level controls if the metric should be printed to console in periodic heartbeat logs, +// or showed on the simple web interface, or showed in the expert interface only. +type Level int + +const ( + All Level = iota + Simple + Console +) + +// Link adds a hyperlink to metric name. +type Link string + +// Prometheus exports the metric to Prometheus under the given name. +type Prometheus string + +// Rate says to collect/visualize metric rate per unit of time rather then total value. +type Rate struct{} + +// Distribution says to collect/visualize histogram of individual sample distributions. +type Distribution struct{} + +// Graph allows to combine multiple related metrics on a single graph. +type Graph string + +// StackedGraph is like Graph, but shows metrics on a stacked graph. +type StackedGraph string + +// NoGraph says to not visualize the metric as a graph. +const NoGraph Graph = "" + +// LenOf reads the metric value from the given slice/map/chan. +func LenOf(containerPtr any, mu *sync.RWMutex) func() int { + v := reflect.ValueOf(containerPtr) + _ = v.Elem().Len() // panics if container is not slice/map/chan + return func() int { + mu.RLock() + defer mu.RUnlock() + return v.Elem().Len() + } +} + +// Addittionally a custom 'func() int' can be passed to read the metric value from the function. +// and 'func(int, time.Duration) string' can be passed for custom formatting of the metric value. + +func (s *set) Create(name, desc string, opts ...any) *Val { + v := &Val{ + name: name, + desc: desc, + graph: name, + fmt: func(v int, period time.Duration) string { return strconv.Itoa(v) }, + } + stacked := false + for _, o := range opts { + switch opt := o.(type) { + case Level: + v.level = opt + case Link: + v.link = string(opt) + case Graph: + v.graph = string(opt) + case StackedGraph: + v.graph = string(opt) + stacked = true + case Rate: + v.rate = true + v.fmt = formatRate + case Distribution: + v.hist = true + case func() int: + v.ext = opt + case func(int, time.Duration) string: + v.fmt = opt + case Prometheus: + // Prometheus Instrumentation https://prometheus.io/docs/guides/go-application. + prometheus.Register(prometheus.NewGaugeFunc(prometheus.GaugeOpts{ + Name: string(opt), + Help: desc, + }, + func() float64 { return float64(v.Val()) }, + )) + default: + panic(fmt.Sprintf("unknown stats option %#v", o)) + } + } + s.mu.Lock() + defer s.mu.Unlock() + s.vals[name] = v + if v.graph != "" { + if s.graphs[v.graph] == nil { + s.graphs[v.graph] = &graph{ + lines: make(map[string]*line), + } + } + if s.graphs[v.graph].level < v.level { + s.graphs[v.graph].level = v.level + } + s.graphs[v.graph].stacked = stacked + } + return v +} + +type Val struct { + name string + desc string + link string + graph string + level Level + val atomic.Uint64 + ext func() int + fmt func(int, time.Duration) string + rate bool + hist bool + prev int + histMu sync.Mutex + histVal *histogram.Histogram +} + +func (v *Val) Add(val int) { + if v.ext != nil { + panic(fmt.Sprintf("stat %v is in external mode", v.name)) + } + if v.hist { + v.histMu.Lock() + if v.histVal == nil { + v.histVal = histogram.New(histogramBuckets) + } + v.histVal.Add(float64(val)) + v.histMu.Unlock() + return + } + v.val.Add(uint64(val)) +} + +func (v *Val) Val() int { + if v.ext != nil { + return v.ext() + } + if v.hist { + v.histMu.Lock() + defer v.histMu.Unlock() + if v.histVal == nil { + return 0 + } + return int(v.histVal.Mean()) + } + return int(v.val.Load()) +} + +func formatRate(v int, period time.Duration) string { + secs := int(period.Seconds()) + if x := v / secs; x >= 10 { + return fmt.Sprintf("%v (%v/sec)", v, x) + } + if x := v * 60 / secs; x >= 10 { + return fmt.Sprintf("%v (%v/min)", v, x) + } + x := v * 60 * 60 / secs + return fmt.Sprintf("%v (%v/hour)", v, x) +} + +func (s *set) tick() { + s.mu.Lock() + defer s.mu.Unlock() + + if s.historyPos == s.historySize { + s.compress() + } + + s.totalTicks++ + s.historyTicks++ + for _, v := range s.vals { + if v.graph == "" { + continue + } + graph := s.graphs[v.graph] + ln := graph.lines[v.name] + if ln == nil { + ln = &line{ + desc: v.desc, + rate: v.rate, + } + if v.hist { + ln.hist = make([]*histogram.Histogram, s.historySize) + } else { + ln.data = make([]float64, s.historySize) + } + graph.lines[v.name] = ln + } + if v.hist { + if s.historyTicks == s.historyScale { + v.histMu.Lock() + ln.hist[s.historyPos] = v.histVal + v.histVal = nil + v.histMu.Unlock() + } + } else { + val := v.Val() + pv := &ln.data[s.historyPos] + if v.rate { + *pv += float64(val-v.prev) / float64(s.historyScale) + v.prev = val + } else { + if *pv < float64(val) { + *pv = float64(val) + } + } + } + } + if s.historyTicks != s.historyScale { + return + } + s.historyTicks = 0 + s.historyPos++ +} + +func (s *set) compress() { + half := s.historySize / 2 + s.historyPos = half + s.historyScale *= 2 + for _, graph := range s.graphs { + for _, line := range graph.lines { + for i := 0; i < half; i++ { + if line.hist != nil { + h1, h2 := line.hist[2*i], line.hist[2*i+1] + line.hist[2*i], line.hist[2*i+1] = nil, nil + line.hist[i] = h1 + if h1 == nil { + line.hist[i] = h2 + } + } else { + v1, v2 := line.data[2*i], line.data[2*i+1] + line.data[2*i], line.data[2*i+1] = 0, 0 + if line.rate { + line.data[i] = (v1 + v2) / 2 + } else { + line.data[i] = v1 + if v2 > v1 { + line.data[i] = v2 + } + } + } + } + } + } +} + +func (s *set) RenderHTML() ([]byte, error) { + s.mu.Lock() + defer s.mu.Unlock() + type Point struct { + X int + Y []float64 + } + type Graph struct { + ID int + Title string + Stacked bool + Level Level + Lines []string + Points []Point + } + var graphs []Graph + tick := s.historyScale * int(tickPeriod.Seconds()) + for title, graph := range s.graphs { + if len(graph.lines) == 0 { + continue + } + g := Graph{ + ID: len(graphs), + Title: title, + Stacked: graph.stacked, + Level: graph.level, + Points: make([]Point, s.historyPos), + } + for i := 0; i < s.historyPos; i++ { + g.Points[i].X = i * tick + } + for name, ln := range graph.lines { + if ln.hist == nil { + g.Lines = append(g.Lines, name+": "+ln.desc) + for i := 0; i < s.historyPos; i++ { + g.Points[i].Y = append(g.Points[i].Y, ln.data[i]) + } + } else { + for _, percent := range []int{10, 50, 90} { + g.Lines = append(g.Lines, fmt.Sprintf("%v%%", percent)) + for i := 0; i < s.historyPos; i++ { + v := 0.0 + if ln.hist[i] != nil { + v = ln.hist[i].Quantile(float64(percent) / 100) + } + g.Points[i].Y = append(g.Points[i].Y, v) + } + } + } + } + graphs = append(graphs, g) + } + sort.Slice(graphs, func(i, j int) bool { + if graphs[i].Level != graphs[j].Level { + return graphs[i].Level > graphs[j].Level + } + return graphs[i].Title < graphs[j].Title + }) + buf := new(bytes.Buffer) + err := htmlTemplate.Execute(buf, graphs) + return buf.Bytes(), err +} + +var htmlTemplate = pages.Create(` + + + + syzkaller stats + + {{HEAD}} + + +{{range $g := .}} +
+ +{{end}} + + +`) diff --git a/pkg/stats/set_test.go b/pkg/stats/set_test.go new file mode 100644 index 000000000..9a19011bb --- /dev/null +++ b/pkg/stats/set_test.go @@ -0,0 +1,219 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package stats + +import ( + "fmt" + "math/rand" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestSet(t *testing.T) { + a := assert.New(t) + set := newSet(4, false) + a.Empty(set.Collect(All)) + _, err := set.RenderHTML() + a.NoError(err) + + v0 := set.Create("v0", "desc0") + a.Equal(v0.Val(), 0) + v0.Add(1) + a.Equal(v0.Val(), 1) + v0.Add(1) + a.Equal(v0.Val(), 2) + + vv1 := 0 + v1 := set.Create("v1", "desc1", Simple, func() int { return vv1 }) + a.Equal(v1.Val(), 0) + vv1 = 11 + a.Equal(v1.Val(), 11) + a.Panics(func() { v1.Add(1) }) + + v2 := set.Create("v2", "desc2", Console, func(v int, period time.Duration) string { + return fmt.Sprintf("v2 %v %v", v, period) + }) + v2.Add(100) + + v3 := set.Create("v3", "desc3", Link("/v3"), NoGraph, Distribution{}) + a.Equal(v3.Val(), 0) + v3.Add(10) + a.Equal(v3.Val(), 10) + v3.Add(20) + a.Equal(v3.Val(), 15) + v3.Add(20) + a.Equal(v3.Val(), 16) + v3.Add(30) + a.Equal(v3.Val(), 20) + v3.Add(30) + a.Equal(v3.Val(), 22) + v3.Add(30) + v3.Add(30) + a.Equal(v3.Val(), 24) + + v4 := set.Create("v4", "desc4", Rate{}, Graph("graph")) + v4.Add(10) + a.Equal(v4.Val(), 10) + v4.Add(10) + a.Equal(v4.Val(), 20) + + a.Panics(func() { set.Create("v0", "desc0", float64(1)) }) + + ui := set.Collect(All) + a.Equal(len(ui), 5) + a.Equal(ui[0], UI{"v2", "desc2", "", Console, "v2 100 1s", 100}) + a.Equal(ui[1], UI{"v1", "desc1", "", Simple, "11", 11}) + a.Equal(ui[2], UI{"v0", "desc0", "", All, "2", 2}) + a.Equal(ui[3], UI{"v3", "desc3", "/v3", All, "24", 24}) + a.Equal(ui[4], UI{"v4", "desc4", "", All, "20 (20/sec)", 20}) + + ui1 := set.Collect(Simple) + a.Equal(len(ui1), 2) + a.Equal(ui1[0].Name, "v2") + a.Equal(ui1[1].Name, "v1") + + ui2 := set.Collect(Console) + a.Equal(len(ui2), 1) + a.Equal(ui2[0].Name, "v2") + + _, err = set.RenderHTML() + a.NoError(err) +} + +func TestSetRateFormat(t *testing.T) { + a := assert.New(t) + set := newSet(4, false) + v := set.Create("v", "desc", Rate{}) + a.Equal(set.Collect(All)[0].Value, "0 (0/hour)") + v.Add(1) + a.Equal(set.Collect(All)[0].Value, "1 (60/min)") + v.Add(99) + a.Equal(set.Collect(All)[0].Value, "100 (100/sec)") +} + +func TestSetHistoryCounter(t *testing.T) { + a := assert.New(t) + set := newSet(4, false) + v := set.Create("v0", "desc0") + set.tick() + hist := func() []float64 { return set.graphs["v0"].lines["v0"].data[:set.historyPos] } + step := func(n int) []float64 { + v.Add(n) + set.tick() + return hist() + } + a.Equal(hist(), []float64{0}) + v.Add(1) + v.Add(1) + a.Equal(hist(), []float64{0}) + set.tick() + a.Equal(hist(), []float64{0, 2}) + v.Add(3) + a.Equal(hist(), []float64{0, 2}) + set.tick() + a.Equal(hist(), []float64{0, 2, 5}) + a.Equal(step(-1), []float64{0, 2, 5, 4}) + // Compacted, each new history value will require 2 steps. + a.Equal(step(7), []float64{2, 5}) + a.Equal(step(-10), []float64{2, 5, 11}) + a.Equal(step(2), []float64{2, 5, 11}) + a.Equal(step(1), []float64{2, 5, 11, 4}) + // 4 steps for each new value. + a.Equal(step(1), []float64{5, 11}) + a.Equal(step(1), []float64{5, 11}) + a.Equal(step(1), []float64{5, 11}) + a.Equal(step(1), []float64{5, 11, 8}) +} + +func TestSetHistoryRate(t *testing.T) { + a := assert.New(t) + set := newSet(4, false) + v := set.Create("v0", "desc0", Rate{}) + step := func(n int) []float64 { + v.Add(n) + set.tick() + return set.graphs["v0"].lines["v0"].data[:set.historyPos] + } + a.Equal(step(3), []float64{3}) + a.Equal(step(1), []float64{3, 1}) + a.Equal(step(2), []float64{3, 1, 2}) + a.Equal(step(5), []float64{3, 1, 2, 5}) + a.Equal(step(1), []float64{2, 3.5}) + a.Equal(step(2), []float64{2, 3.5, 1.5}) + a.Equal(step(2), []float64{2, 3.5, 1.5}) + a.Equal(step(4), []float64{2, 3.5, 1.5, 3}) + a.Equal(step(1), []float64{2.75, 2.25}) + a.Equal(step(2), []float64{2.75, 2.25}) + a.Equal(step(3), []float64{2.75, 2.25}) + a.Equal(step(4), []float64{2.75, 2.25, 2.5}) +} + +func TestSetHistoryDistribution(t *testing.T) { + a := assert.New(t) + set := newSet(4, false) + v := set.Create("v0", "desc0", Distribution{}) + step := func(n int) [3][]float64 { + v.Add(n) + set.tick() + var history [3][]float64 + for p, percent := range []int{10, 50, 90} { + history[p] = make([]float64, set.historyPos) + for i := 0; i < set.historyPos; i++ { + hist := set.graphs["v0"].lines["v0"].hist[i] + if hist != nil { + history[p][i] = hist.Quantile(float64(percent) / 100) + } + } + } + return history + } + a.Equal(step(3), [3][]float64{{3}, {3}, {3}}) + a.Equal(step(6), [3][]float64{{3, 6}, {3, 6}, {3, 6}}) + a.Equal(step(1), [3][]float64{{3, 6, 1}, {3, 6, 1}, {3, 6, 1}}) + a.Equal(step(2), [3][]float64{{3, 6, 1, 2}, {3, 6, 1, 2}, {3, 6, 1, 2}}) + a.Equal(step(1), [3][]float64{{3, 1}, {3, 1}, {3, 1}}) + a.Equal(step(10), [3][]float64{{3, 1, 1}, {3, 1, 10}, {3, 1, 10}}) +} + +func TestSetStress(t *testing.T) { + set := newSet(4, false) + var stop atomic.Bool + var seq atomic.Uint64 + start := func(f func()) { + go func() { + for !stop.Load() { + f() + } + }() + } + for p := 0; p < 2; p++ { + for _, opt := range []any{Link(""), NoGraph, Rate{}, Distribution{}} { + opt := opt + go func() { + v := set.Create(fmt.Sprintf("v%v", seq.Add(1)), "desc", opt) + for p1 := 0; p1 < 2; p1++ { + start(func() { v.Val() }) + start(func() { v.Add(rand.Intn(10000)) }) + } + }() + } + go func() { + var vv atomic.Uint64 + v := set.Create(fmt.Sprintf("v%v", seq.Add(1)), "desc", + func() int { return int(vv.Load()) }) + for p1 := 0; p1 < 2; p1++ { + start(func() { v.Val() }) + start(func() { vv.Store(uint64(rand.Intn(10000))) }) + } + }() + start(func() { set.Collect(All) }) + start(func() { set.RenderHTML() }) + start(func() { set.tick() }) + } + time.Sleep(time.Second) + stop.Store(true) +} -- cgit mrf-deployment