From f44873e33df5bb834265aeced37ef199ce0717c7 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 15 Apr 2024 11:15:51 +0200 Subject: syz-fuzzer: don't sleep after transient executor errors There is non-0 rate of transient executor errors. Currently we do full GC, free OS memory and sleep for a second after then. This was more meaningful when the fuzzer was in the VM as the fuzzer process consumed lots of memory. Now it consumes only ~20MB, any OOMs are likely not due to the fuzzer process. So instead sleep briefly and only after several retries (I would assume most errors are fixed after 1 retry). --- syz-fuzzer/fuzzer.go | 2 ++ syz-fuzzer/proc.go | 7 ++++--- syz-manager/stats.go | 6 +++++- tools/syz-execprog/execprog.go | 4 +++- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/syz-fuzzer/fuzzer.go b/syz-fuzzer/fuzzer.go index 2d62d65d5..52e8de1a5 100644 --- a/syz-fuzzer/fuzzer.go +++ b/syz-fuzzer/fuzzer.go @@ -42,6 +42,7 @@ type FuzzerTool struct { logMu sync.Mutex bufferTooSmall atomic.Uint64 + execRetries atomic.Uint64 noExecRequests atomic.Uint64 noExecDuration atomic.Uint64 resetAccState bool @@ -398,6 +399,7 @@ func (tool *FuzzerTool) grabStats() map[string]uint64 { stats["executor restarts"] += atomic.SwapUint64(&proc.env.StatRestarts, 0) } stats["buffer too small"] = tool.bufferTooSmall.Swap(0) + stats["exec retries"] = tool.execRetries.Swap(0) stats["no exec requests"] = tool.noExecRequests.Swap(0) stats["no exec duration"] = tool.noExecDuration.Swap(0) return stats diff --git a/syz-fuzzer/proc.go b/syz-fuzzer/proc.go index 0a94cdc69..e2a308b83 100644 --- a/syz-fuzzer/proc.go +++ b/syz-fuzzer/proc.go @@ -6,7 +6,6 @@ package main import ( "fmt" "math/rand" - "runtime/debug" "time" "github.com/google/syzkaller/pkg/ipc" @@ -110,12 +109,14 @@ func (proc *Proc) executeRaw(opts *ipc.ExecOpts, p *prog.Prog) *ipc.ProgInfo { proc.tool.bufferTooSmall.Add(1) return nil } + proc.tool.execRetries.Add(1) if try > 10 { log.SyzFatalf("executor %v failed %v times: %v\n%s", proc.pid, try, err, output) } log.Logf(4, "fuzzer detected executor failure='%v', retrying #%d", err, try+1) - debug.FreeOSMemory() - time.Sleep(time.Second) + if try > 3 { + time.Sleep(100 * time.Millisecond) + } continue } log.Logf(2, "result hanged=%v: %s", hanged, output) diff --git a/syz-manager/stats.go b/syz-manager/stats.go index c3d37d391..b8e53e8f7 100644 --- a/syz-manager/stats.go +++ b/syz-manager/stats.go @@ -76,7 +76,11 @@ func (mgr *Manager) initStats() { }) // Stats imported from the fuzzer (names must match the the fuzzer names). - stats.Create("executor restarts", "Number of times executor process was restarted", stats.Rate{}) + stats.Create("executor restarts", "Number of times executor process was restarted", + stats.Rate{}, stats.Graph("executor")) + stats.Create("exec retries", + "Number of times a test program was restarted because the first run failed", + stats.Rate{}, stats.Graph("executor")) stats.Create("buffer too small", "Program serialization overflowed exec buffer", stats.NoGraph) stats.Create("no exec requests", "Number of times fuzzer was stalled with no exec requests", stats.Rate{}) stats.Create("no exec duration", "Total duration fuzzer was stalled with no exec requests (ns/sec)", stats.Rate{}) diff --git a/tools/syz-execprog/execprog.go b/tools/syz-execprog/execprog.go index b1c355a1b..d2d194da2 100644 --- a/tools/syz-execprog/execprog.go +++ b/tools/syz-execprog/execprog.go @@ -185,7 +185,9 @@ func (ctx *Context) execute(pid int, env *ipc.Env, p *prog.Prog, progIndex int) } // Don't print err/output in this case as it may contain "SYZFAIL" and we want to fail yet. log.Logf(1, "executor failed, retrying") - time.Sleep(time.Second) + if try > 3 { + time.Sleep(100 * time.Millisecond) + } continue } if ctx.config.Flags&ipc.FlagDebug != 0 || err != nil { -- cgit mrf-deployment