From f44873e33df5bb834265aeced37ef199ce0717c7 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Mon, 15 Apr 2024 11:15:51 +0200
Subject: syz-fuzzer: don't sleep after transient executor errors

There is non-0 rate of transient executor errors.
Currently we do full GC, free OS memory and sleep for a second after then.
This was more meaningful when the fuzzer was in the VM as the fuzzer process
consumed lots of memory. Now it consumes only ~20MB, any OOMs are likely
not due to the fuzzer process.

So instead sleep briefly and only after several retries
(I would assume most errors are fixed after 1 retry).
---
 syz-fuzzer/fuzzer.go           | 2 ++
 syz-fuzzer/proc.go             | 7 ++++---
 syz-manager/stats.go           | 6 +++++-
 tools/syz-execprog/execprog.go | 4 +++-
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/syz-fuzzer/fuzzer.go b/syz-fuzzer/fuzzer.go
index 2d62d65d5..52e8de1a5 100644
--- a/syz-fuzzer/fuzzer.go
+++ b/syz-fuzzer/fuzzer.go
@@ -42,6 +42,7 @@ type FuzzerTool struct {
 	logMu sync.Mutex
 
 	bufferTooSmall atomic.Uint64
+	execRetries    atomic.Uint64
 	noExecRequests atomic.Uint64
 	noExecDuration atomic.Uint64
 	resetAccState  bool
@@ -398,6 +399,7 @@ func (tool *FuzzerTool) grabStats() map[string]uint64 {
 		stats["executor restarts"] += atomic.SwapUint64(&proc.env.StatRestarts, 0)
 	}
 	stats["buffer too small"] = tool.bufferTooSmall.Swap(0)
+	stats["exec retries"] = tool.execRetries.Swap(0)
 	stats["no exec requests"] = tool.noExecRequests.Swap(0)
 	stats["no exec duration"] = tool.noExecDuration.Swap(0)
 	return stats
diff --git a/syz-fuzzer/proc.go b/syz-fuzzer/proc.go
index 0a94cdc69..e2a308b83 100644
--- a/syz-fuzzer/proc.go
+++ b/syz-fuzzer/proc.go
@@ -6,7 +6,6 @@ package main
 import (
 	"fmt"
 	"math/rand"
-	"runtime/debug"
 	"time"
 
 	"github.com/google/syzkaller/pkg/ipc"
@@ -110,12 +109,14 @@ func (proc *Proc) executeRaw(opts *ipc.ExecOpts, p *prog.Prog) *ipc.ProgInfo {
 				proc.tool.bufferTooSmall.Add(1)
 				return nil
 			}
+			proc.tool.execRetries.Add(1)
 			if try > 10 {
 				log.SyzFatalf("executor %v failed %v times: %v\n%s", proc.pid, try, err, output)
 			}
 			log.Logf(4, "fuzzer detected executor failure='%v', retrying #%d", err, try+1)
-			debug.FreeOSMemory()
-			time.Sleep(time.Second)
+			if try > 3 {
+				time.Sleep(100 * time.Millisecond)
+			}
 			continue
 		}
 		log.Logf(2, "result hanged=%v: %s", hanged, output)
diff --git a/syz-manager/stats.go b/syz-manager/stats.go
index c3d37d391..b8e53e8f7 100644
--- a/syz-manager/stats.go
+++ b/syz-manager/stats.go
@@ -76,7 +76,11 @@ func (mgr *Manager) initStats() {
 		})
 
 	// Stats imported from the fuzzer (names must match the the fuzzer names).
-	stats.Create("executor restarts", "Number of times executor process was restarted", stats.Rate{})
+	stats.Create("executor restarts", "Number of times executor process was restarted",
+		stats.Rate{}, stats.Graph("executor"))
+	stats.Create("exec retries",
+		"Number of times a test program was restarted because the first run failed",
+		stats.Rate{}, stats.Graph("executor"))
 	stats.Create("buffer too small", "Program serialization overflowed exec buffer", stats.NoGraph)
 	stats.Create("no exec requests", "Number of times fuzzer was stalled with no exec requests", stats.Rate{})
 	stats.Create("no exec duration", "Total duration fuzzer was stalled with no exec requests (ns/sec)", stats.Rate{})
diff --git a/tools/syz-execprog/execprog.go b/tools/syz-execprog/execprog.go
index b1c355a1b..d2d194da2 100644
--- a/tools/syz-execprog/execprog.go
+++ b/tools/syz-execprog/execprog.go
@@ -185,7 +185,9 @@ func (ctx *Context) execute(pid int, env *ipc.Env, p *prog.Prog, progIndex int)
 			}
 			// Don't print err/output in this case as it may contain "SYZFAIL" and we want to fail yet.
 			log.Logf(1, "executor failed, retrying")
-			time.Sleep(time.Second)
+			if try > 3 {
+				time.Sleep(100 * time.Millisecond)
+			}
 			continue
 		}
 		if ctx.config.Flags&ipc.FlagDebug != 0 || err != nil {
-- 
cgit mrf-deployment