From e16e2c9a4cb6937323e861b646792a6c4c978a3c Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 4 Jun 2024 12:55:41 +0200
Subject: executor: add runner mode

Move all syz-fuzzer logic into syz-executor and remove syz-fuzzer.
Also restore syz-runtest functionality in the manager.

Update #4917 (sets most signal handlers to SIG_IGN)
---
 executor/common.h          |  19 +-
 executor/conn.h            | 192 +++++++++++
 executor/cover_filter.h    |   8 +-
 executor/executor.cc       | 735 +++++++++++++++++++++--------------------
 executor/executor_linux.h  |  17 +-
 executor/executor_runner.h | 801 +++++++++++++++++++++++++++++++++++++++++++++
 executor/files.h           |  85 +++++
 executor/shmem.h           |  19 +-
 executor/style_test.go     |   5 +-
 executor/subprocess.h      | 129 ++++++++
 executor/test.h            |   2 +-
 11 files changed, 1617 insertions(+), 395 deletions(-)
 create mode 100644 executor/conn.h
 create mode 100644 executor/executor_runner.h
 create mode 100644 executor/files.h
 create mode 100644 executor/subprocess.h

(limited to 'executor')
diff --git a/executor/common.h b/executor/common.h
index 3a735a086..243d388c2 100644
--- a/executor/common.h
+++ b/executor/common.h
@@ -226,7 +226,7 @@ static void use_temporary_dir(void)
 #endif
 
 #if GOOS_netbsd || GOOS_freebsd || GOOS_darwin || GOOS_openbsd || GOOS_test
-#if (SYZ_EXECUTOR || SYZ_REPEAT) && SYZ_EXECUTOR_USES_FORK_SERVER && (SYZ_EXECUTOR || SYZ_USE_TMP_DIR)
+#if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_USE_TMP_DIR && SYZ_EXECUTOR_USES_FORK_SERVER
 #include <dirent.h>
 #include <errno.h>
 #include <stdio.h>
@@ -594,10 +594,6 @@ static void loop(void)
 
 #if SYZ_EXECUTOR || SYZ_REPEAT
 static void execute_one(void);
-#if SYZ_EXECUTOR_USES_FORK_SERVER
-#include <signal.h>
-#include <sys/types.h>
-#include <sys/wait.h>
 
 #if GOOS_linux
 #define WAIT_FLAGS __WALL
@@ -605,9 +601,10 @@ static void execute_one(void);
 #define WAIT_FLAGS 0
 #endif
 
-#if SYZ_EXECUTOR
-static void reply_handshake();
-#endif
+#if SYZ_EXECUTOR_USES_FORK_SERVER
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
 
 static void loop(void)
 {
@@ -616,7 +613,7 @@ static void loop(void)
 #endif
 #if SYZ_EXECUTOR
 	// Tell parent that we are ready to serve.
-	reply_handshake();
+	reply_execute(0);
 #endif
 	int iter = 0;
 #if SYZ_REPEAT_TIMES
@@ -675,7 +672,7 @@ static void loop(void)
 		uint64 start = current_time_ms();
 #if SYZ_EXECUTOR
 		uint64 last_executed = start;
-		uint32 executed_calls = __atomic_load_n(output_data, __ATOMIC_RELAXED);
+		uint32 executed_calls = output_data->completed.load(std::memory_order_relaxed);
 #endif
 		for (;;) {
 			sleep_ms(10);
@@ -695,7 +692,7 @@ static void loop(void)
 			uint64 min_timeout_ms = program_timeout_ms * 3 / 5;
 			uint64 inactive_timeout_ms = syscall_timeout_ms * 20;
 			uint64 now = current_time_ms();
-			uint32 now_executed = __atomic_load_n(output_data, __ATOMIC_RELAXED);
+			uint32 now_executed = output_data->completed.load(std::memory_order_relaxed);
 			if (executed_calls != now_executed) {
 				executed_calls = now_executed;
 				last_executed = now;
diff --git a/executor/conn.h b/executor/conn.h
new file mode 100644
index 000000000..19026748d
--- /dev/null
+++ b/executor/conn.h
@@ -0,0 +1,192 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <netdb.h>
+#include <string.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+
+#include <vector>
+
+// Connection represents a client TCP connection.
+// It connects to the given addr:port and allows to send/receive
+// flatbuffers-encoded messages.
+class Connection
+{
+public:
+	Connection(const char* addr, const char* port)
+	    : fd_(Connect(addr, port))
+	{
+	}
+
+	int FD() const
+	{
+		return fd_;
+	}
+
+	template <typename Msg>
+	void Send(const Msg& msg)
+	{
+		typedef typename Msg::TableType Raw;
+		auto off = Raw::Pack(fbb_, &msg);
+		fbb_.FinishSizePrefixed(off);
+		auto data = fbb_.GetBufferSpan();
+		Send(data.data(), data.size());
+		fbb_.Reset();
+	}
+
+	template <typename Msg>
+	void Recv(Msg& msg)
+	{
+		typedef typename Msg::TableType Raw;
+		flatbuffers::uoffset_t size;
+		Recv(&size, sizeof(size));
+		recv_buf_.resize(size);
+		Recv(recv_buf_.data(), size);
+		auto raw = flatbuffers::GetRoot<Raw>(recv_buf_.data());
+		raw->UnPackTo(&msg);
+	}
+
+	void Send(const void* data, size_t size)
+	{
+		for (size_t sent = 0; sent < size;) {
+			ssize_t n = write(fd_, static_cast<const char*>(data) + sent, size - sent);
+			if (n > 0) {
+				sent += n;
+				continue;
+			}
+			if (errno == EINTR)
+				continue;
+			if (errno == EAGAIN) {
+				sleep_ms(1);
+				continue;
+			}
+			failmsg("failed to send rpc", "fd=%d want=%zu sent=%zu n=%zd", fd_, size, sent, n);
+		}
+	}
+
+private:
+	const int fd_;
+	std::vector<char> recv_buf_;
+	flatbuffers::FlatBufferBuilder fbb_;
+
+	void Recv(void* data, size_t size)
+	{
+		for (size_t recv = 0; recv < size;) {
+			ssize_t n = read(fd_, static_cast<char*>(data) + recv, size - recv);
+			if (n > 0) {
+				recv += n;
+				continue;
+			}
+			if (errno == EINTR)
+				continue;
+			if (errno == EAGAIN) {
+				sleep_ms(1);
+				continue;
+			}
+			failmsg("failed to recv rpc", "fd=%d want=%zu sent=%zu n=%zd", fd_, size, recv, n);
+		}
+	}
+
+	static int Connect(const char* addr, const char* ports)
+	{
+		int port = atoi(ports);
+		if (port == 0)
+			failmsg("failed to parse manager port", "port=%s", ports);
+		if (!strcmp(addr, "stdin"))
+			return STDIN_FILENO;
+		sockaddr_in saddr4 = {};
+		saddr4.sin_family = AF_INET;
+		saddr4.sin_port = htons(port);
+		if (inet_pton(AF_INET, addr, &saddr4.sin_addr))
+			return Connect(&saddr4, &saddr4.sin_addr, port);
+		sockaddr_in6 saddr6 = {};
+		saddr6.sin6_family = AF_INET6;
+		saddr6.sin6_port = htons(port);
+		if (inet_pton(AF_INET6, addr, &saddr6.sin6_addr))
+			return Connect(&saddr6, &saddr6.sin6_addr, port);
+		auto* hostent = gethostbyname(addr);
+		if (!hostent)
+			failmsg("failed to resolve manager addr", "addr=%s h_errno=%d", addr, h_errno);
+		for (char** addr = hostent->h_addr_list; *addr; addr++) {
+			int fd;
+			if (hostent->h_addrtype == AF_INET) {
+				memcpy(&saddr4.sin_addr, *addr, std::min<size_t>(hostent->h_length, sizeof(saddr4.sin_addr)));
+				fd = Connect(&saddr4, &saddr4.sin_addr, port);
+			} else if (hostent->h_addrtype == AF_INET6) {
+				memcpy(&saddr6.sin6_addr, *addr, std::min<size_t>(hostent->h_length, sizeof(saddr6.sin6_addr)));
+				fd = Connect(&saddr6, &saddr6.sin6_addr, port);
+			} else {
+				failmsg("unknown socket family", "family=%d", hostent->h_addrtype);
+			}
+			if (fd != -1)
+				return fd;
+		}
+		failmsg("can't connect to manager", "addr=%s:%s", addr, ports);
+	}
+
+	template <typename addr_t>
+	static int Connect(addr_t* addr, void* ip, int port)
+	{
+		auto* saddr = reinterpret_cast<sockaddr*>(addr);
+		int fd = socket(saddr->sa_family, SOCK_STREAM, IPPROTO_TCP);
+		if (fd == -1)
+			fail("failed to create socket");
+		char str[128] = {};
+		inet_ntop(saddr->sa_family, ip, str, sizeof(str));
+		if (connect(fd, saddr, sizeof(*addr))) {
+			printf("failed to connect to manager at %s:%d: %s\n", str, port, strerror(errno));
+			close(fd);
+			return -1;
+		}
+		return fd;
+	}
+
+	Connection(const Connection&) = delete;
+	Connection& operator=(const Connection&) = delete;
+};
+
+// Select is a wrapper around select system call.
+class Select
+{
+public:
+	Select()
+	{
+		FD_ZERO(&rdset_);
+	}
+
+	void Arm(int fd)
+	{
+		FD_SET(fd, &rdset_);
+		max_fd_ = std::max(max_fd_, fd);
+	}
+
+	bool Ready(int fd) const
+	{
+		return FD_ISSET(fd, &rdset_);
+	}
+
+	void Wait(int ms)
+	{
+		timespec timeout = {.tv_sec = ms / 1000, .tv_nsec = (ms % 1000) * 1000 * 1000};
+		if (pselect(max_fd_ + 1, &rdset_, nullptr, nullptr, &timeout, nullptr) < 0) {
+			if (errno != EINTR && errno != EAGAIN)
+				fail("pselect failed");
+		}
+	}
+
+	static void Prepare(int fd)
+	{
+		if (fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_NONBLOCK))
+			fail("fcntl(O_NONBLOCK) failed");
+	}
+
+private:
+	fd_set rdset_;
+	int max_fd_ = -1;
+
+	Select(const Select&) = delete;
+	Select& operator=(const Select&) = delete;
+};
diff --git a/executor/cover_filter.h b/executor/cover_filter.h
index 672e9fbec..c303d8b23 100644
--- a/executor/cover_filter.h
+++ b/executor/cover_filter.h
@@ -26,13 +26,15 @@
 class CoverFilter
 {
 public:
-	CoverFilter(const char* file, void* preferred = nullptr)
-	    : shmem_(file, preferred, kMemSize), tab_(static_cast<Table*>(shmem_.Mem()))
+	CoverFilter()
+	    : shmem_(kMemSize),
+	      tab_(static_cast<Table*>(shmem_.Mem()))
 	{
 	}
 
 	CoverFilter(int fd, void* preferred = nullptr)
-	    : shmem_(fd, preferred, kMemSize, false), tab_(static_cast<Table*>(shmem_.Mem()))
+	    : shmem_(fd, preferred, kMemSize, false),
+	      tab_(static_cast<Table*>(shmem_.Mem()))
 	{
 	}
 
diff --git a/executor/executor.cc b/executor/executor.cc
index ca728a6aa..a4ea17f47 100644
--- a/executor/executor.cc
+++ b/executor/executor.cc
@@ -3,9 +3,6 @@
 
 // +build
 
-// Currently this is unused (included only to test building).
-#include "pkg/flatrpc/flatrpc.h"
-
 #include <algorithm>
 #include <errno.h>
 #include <limits.h>
@@ -18,12 +15,17 @@
 #include <string.h>
 #include <time.h>
 
+#include <atomic>
+#include <optional>
+
 #if !GOOS_windows
 #include <unistd.h>
 #endif
 
 #include "defs.h"
 
+#include "pkg/flatrpc/flatrpc.h"
+
 #if defined(__GNUC__)
 #define SYSCALLAPI
 #define NORETURN __attribute__((noreturn))
@@ -75,6 +77,7 @@ typedef unsigned char uint8;
 // Note: zircon max fd is 256.
 // Some common_OS.h files know about this constant for RLIMIT_NOFILE.
 const int kMaxFd = 250;
+const int kFdLimit = 256;
 const int kMaxThreads = 32;
 const int kInPipeFd = kMaxFd - 1; // remapped from stdin
 const int kOutPipeFd = kMaxFd - 2; // remapped from stdout
@@ -90,8 +93,8 @@ const int kCoverOptimizedPreMmap = 3; // this many will be mmapped inside main()
 const int kCoverDefaultCount = 6; // otherwise we only init kcov instances inside main()
 
 // Logical error (e.g. invalid input program), use as an assert() alternative.
-// If such error happens 10+ times in a row, it will be detected as a bug by syz-fuzzer.
-// syz-fuzzer will fail and syz-manager will create a bug for this.
+// If such error happens 10+ times in a row, it will be detected as a bug by the runner process.
+// The runner will fail and syz-manager will create a bug for this.
 // Note: err is used for bug deduplication, thus distinction between err (constant message)
 // and msg (varying part).
 static NORETURN void fail(const char* err);
@@ -118,12 +121,8 @@ void debug_dump_data(const char* data, int length);
 #endif
 
 static void receive_execute();
-static void reply_execute(int status);
-
-#if SYZ_EXECUTOR_USES_FORK_SERVER
+static void reply_execute(uint32 status);
 static void receive_handshake();
-static void reply_handshake();
-#endif
 
 #if SYZ_EXECUTOR_USES_FORK_SERVER
 // Allocating (and forking) virtual memory for each executed process is expensive, so we only mmap
@@ -133,28 +132,133 @@ const int kMaxOutputCoverage = 6 << 20; // coverage is needed in ~ up to 1/3 of
 const int kMaxOutputSignal = 4 << 20;
 const int kMinOutput = 256 << 10; // if we don't need to send signal, the output is rather short.
 const int kInitialOutput = kMinOutput; // the minimal size to be allocated in the parent process
+const int kMaxOutput = kMaxOutputComparisons;
 #else
 // We don't fork and allocate the memory only once, so prepare for the worst case.
 const int kInitialOutput = 14 << 20;
+const int kMaxOutput = kInitialOutput;
 #endif
 
+// For use with flatrpc bit flags.
+template <typename T>
+bool IsSet(T flags, T f)
+{
+	return (flags & f) != T::NONE;
+}
+
 // TODO: allocate a smaller amount of memory in the parent once we merge the patches that enable
 // prog execution with neither signal nor coverage. Likely 64kb will be enough in that case.
 
+const uint32 kMaxCalls = 64;
+
+struct alignas(8) OutputData {
+	std::atomic<uint32> size;
+	std::atomic<uint32> consumed;
+	std::atomic<uint32> completed;
+	struct {
+		// Call index in the test program (they may be out-of-order is some syscalls block).
+		int index;
+		// Offset of the CallInfo object in the output region.
+		flatbuffers::Offset<rpc::CallInfoRaw> offset;
+	} calls[kMaxCalls];
+
+	void Reset()
+	{
+		size.store(0, std::memory_order_relaxed);
+		consumed.store(0, std::memory_order_relaxed);
+		completed.store(0, std::memory_order_relaxed);
+	}
+};
+
+// ShmemAllocator/ShmemBuilder help to construct flatbuffers ExecResult reply message in shared memory.
+//
+// To avoid copying the reply (in particular coverage/signal/comparisons which may be large), the child
+// process starts forming CallInfo objects as it handles completion of syscalls, then the top-most runner
+// process uses these CallInfo to form an array of them, and adds ProgInfo object with a reference to the array.
+// In order to make this possible, OutputData object is placed at the beginning of the shared memory region,
+// and it records metadata required to start serialization in one process and continue later in another process.
+//
+// OutputData::size is the size of the whole shmem region that the child uses (it different size when coverage/
+// comparisons are requested). Note that flatbuffers serialization happens from the end of the buffer backwards.
+// OutputData::consumed records currently consumed amount memory in the shmem region so that the parent process
+// can continue from that point.
+// OutputData::completed records number of completed calls (entries in OutputData::calls arrays).
+// Flatbuffers identifies everything using offsets in the buffer, OutputData::calls::offset records this offset
+// for the call object so that we can use it in the parent process to construct the array of calls.
+//
+// FlatBufferBuilder generally grows the underlying buffer incrementally as necessary and copying data
+// (std::vector style). We cannot do this in the shared memory since we have only a single region.
+// To allow serialization into the shared memory region, ShmemBuilder passes initial buffer size which is equal
+// to the overall shmem region size (minus OutputData header size) to FlatBufferBuilder, and the custom
+// ShmemAllocator allocator. As the result, FlatBufferBuilder does exactly one allocation request
+// to ShmemAllocator and never reallocates (if we overflow the buffer and FlatBufferBuilder does another request,
+// ShmemAllocator will fail).
+class ShmemAllocator : public flatbuffers::Allocator
+{
+public:
+	ShmemAllocator(void* buf, size_t size)
+	    : buf_(buf),
+	      size_(size)
+	{
+	}
+
+private:
+	void* buf_;
+	size_t size_;
+	bool allocated_ = false;
+
+	uint8_t* allocate(size_t size) override
+	{
+		if (allocated_ || size != size_)
+			failmsg("bad allocate request", "allocated=%d size=%zu/%zu", allocated_, size_, size);
+		allocated_ = true;
+		return static_cast<uint8_t*>(buf_);
+	}
+
+	void deallocate(uint8_t* p, size_t size) override
+	{
+		if (!allocated_ || buf_ != p || size_ != size)
+			failmsg("bad deallocate request", "allocated=%d buf=%p/%p size=%zu/%zu",
+				allocated_, buf_, p, size_, size);
+		allocated_ = false;
+	}
+
+	uint8_t* reallocate_downward(uint8_t* old_p, size_t old_size,
+				     size_t new_size, size_t in_use_back,
+				     size_t in_use_front) override
+	{
+		fail("can't reallocate");
+	}
+};
+
+class ShmemBuilder : ShmemAllocator, public flatbuffers::FlatBufferBuilder
+{
+public:
+	ShmemBuilder(OutputData* data, size_t size)
+	    : ShmemAllocator(data + 1, size - sizeof(*data)),
+	      FlatBufferBuilder(size - sizeof(*data), this)
+	{
+		data->size.store(size, std::memory_order_relaxed);
+		size_t consumed = data->consumed.load(std::memory_order_relaxed);
+		if (consumed >= size - sizeof(*data))
+			failmsg("ShmemBuilder: too large output offset", "size=%zd consumed=%zd", size, consumed);
+		if (consumed)
+			FlatBufferBuilder::buf_.make_space(consumed);
+	}
+};
+
 const int kInFd = 3;
 const int kOutFd = 4;
-static uint32* output_data;
-static uint32* output_pos;
-static int output_size;
-static void mmap_output(int size);
-static uint32* write_output(uint32 v);
-static uint32* write_output_64(uint64 v);
-static void write_completed(uint32 completed);
+const int kMaxSignalFd = 5;
+const int kCoverFilterFd = 6;
+static OutputData* output_data;
+static std::optional<ShmemBuilder> output_builder;
+static uint32 output_size;
+static void mmap_output(uint32 size);
 static uint32 hash(uint32 a);
 static bool dedup(uint32 sig);
 
-uint64 start_time_ms = 0;
-
+static uint64 start_time_ms = 0;
 static bool flag_debug;
 static bool flag_coverage;
 static bool flag_sandbox_none;
@@ -181,6 +285,10 @@ static bool flag_threaded;
 // If true, then executor should write the comparisons data to fuzzer.
 static bool flag_comparisons;
 
+static uint64 request_id;
+static uint64 all_call_signal;
+static bool all_extra_signal;
+
 // Tunable timeouts, received with execute_req.
 static uint64 syscall_timeout_ms;
 static uint64 program_timeout_ms;
@@ -193,8 +301,8 @@ static bool in_execute_one = false;
 #define SYZ_EXECUTOR 1
 #include "common.h"
 
-const int kMaxInput = 4 << 20; // keep in sync with prog.ExecBufferSize
-const int kMaxCommands = 1000; // prog package knows about this constant (prog.execMaxCommands)
+const size_t kMaxInput = 4 << 20; // keep in sync with prog.ExecBufferSize
+const size_t kMaxCommands = 1000; // prog package knows about this constant (prog.execMaxCommands)
 
 const uint64 instr_eof = -1;
 const uint64 instr_copyin = -2;
@@ -294,29 +402,25 @@ struct res_t {
 static res_t results[kMaxCommands];
 
 const uint64 kInMagic = 0xbadc0ffeebadface;
-const uint32 kOutMagic = 0xbadf00d;
 
 struct handshake_req {
 	uint64 magic;
-	uint64 flags; // env flags
+	rpc::ExecEnv flags;
 	uint64 pid;
 	uint64 sandbox_arg;
-	uint64 cover_filter_size;
-	// Followed by uint64[cover_filter_size] filter.
-};
-
-struct handshake_reply {
-	uint32 magic;
 };
 
 struct execute_req {
 	uint64 magic;
-	uint64 env_flags;
+	uint64 id;
+	rpc::ExecEnv env_flags;
 	uint64 exec_flags;
 	uint64 pid;
 	uint64 syscall_timeout_ms;
 	uint64 program_timeout_ms;
 	uint64 slowdown_scale;
+	uint64 all_call_signal;
+	bool all_extra_signal;
 };
 
 struct execute_reply {
@@ -325,25 +429,6 @@ struct execute_reply {
 	uint32 status;
 };
 
-// call_reply.flags
-const uint32 call_flag_executed = 1 << 0;
-const uint32 call_flag_finished = 1 << 1;
-const uint32 call_flag_blocked = 1 << 2;
-const uint32 call_flag_fault_injected = 1 << 3;
-
-struct call_reply {
-	execute_reply header;
-	uint32 magic;
-	uint32 call_index;
-	uint32 call_num;
-	uint32 reserrno;
-	uint32 flags;
-	uint32 signal_size;
-	uint32 cover_size;
-	uint32 comps_size;
-	// signal/cover/comps follow
-};
-
 enum {
 	KCOV_CMP_CONST = 1,
 	KCOV_CMP_SIZE1 = 0,
@@ -359,11 +444,6 @@ struct kcov_comparison_t {
 	uint64 arg1;
 	uint64 arg2;
 	uint64 pc;
-
-	bool ignore() const;
-	void write();
-	bool operator==(const struct kcov_comparison_t& other) const;
-	bool operator<(const struct kcov_comparison_t& other) const;
 };
 
 typedef char kcov_comparison_size[sizeof(kcov_comparison_t) == 4 * sizeof(uint64) ? 1 : -1];
@@ -390,8 +470,8 @@ static uint64 swap(uint64 v, uint64 size, uint64 bf);
 static void copyin(char* addr, uint64 val, uint64 size, uint64 bf, uint64 bf_off, uint64 bf_len);
 static bool copyout(char* addr, uint64 size, uint64* res);
 static void setup_control_pipes();
-static void setup_features(char** enable, int n);
 static bool coverage_filter(uint64 pc);
+static std::tuple<rpc::ComparisonRaw, bool, bool> convert(const kcov_comparison_t& cmp);
 
 #include "syscalls.h"
 
@@ -417,10 +497,16 @@ static feature_t features[] = {};
 
 #include "shmem.h"
 
+#include "conn.h"
 #include "cover_filter.h"
+#include "files.h"
+#include "subprocess.h"
+
+#include "executor_runner.h"
 
 #include "test.h"
 
+static std::optional<CoverFilter> max_signal;
 static std::optional<CoverFilter> cover_filter;
 
 #if SYZ_HAVE_SANDBOX_ANDROID
@@ -429,27 +515,15 @@ static uint64 sandbox_arg = 0;
 
 int main(int argc, char** argv)
 {
-	if (argc == 2 && strcmp(argv[1], "version") == 0) {
-		puts(GOOS " " GOARCH " " SYZ_REVISION " " GIT_REVISION);
-		return 0;
-	}
-	if (argc >= 2 && strcmp(argv[1], "setup") == 0) {
-		setup_features(argv + 2, argc - 2);
-		return 0;
+	if (argc >= 2 && strcmp(argv[1], "runner") == 0) {
+		runner(argv, argc);
+		fail("runner returned");
 	}
 	if (argc >= 2 && strcmp(argv[1], "leak") == 0) {
 #if SYZ_HAVE_LEAK_CHECK
 		check_leaks(argv + 2, argc - 2);
 #else
 		fail("leak checking is not implemented");
-#endif
-		return 0;
-	}
-	if (argc >= 2 && strcmp(argv[1], "setup_kcsan_filterlist") == 0) {
-#if SYZ_HAVE_KCSAN
-		setup_kcsan_filterlist(argv + 2, argc - 2, true);
-#else
-		fail("KCSAN is not implemented");
 #endif
 		return 0;
 	}
@@ -482,12 +556,24 @@ int main(int argc, char** argv)
 	// For SYZ_EXECUTOR_USES_FORK_SERVER, close(kOutFd) is invoked in the forked child,
 	// after the program has been received.
 
+	if (fcntl(kMaxSignalFd, F_GETFD) != -1) {
+		// Use random addresses for coverage filters to not collide with output_data.
+		max_signal.emplace(kMaxSignalFd, reinterpret_cast<void*>(0x110c230000ull));
+		close(kMaxSignalFd);
+	}
+	if (fcntl(kCoverFilterFd, F_GETFD) != -1) {
+		cover_filter.emplace(kCoverFilterFd, reinterpret_cast<void*>(0x110f230000ull));
+		close(kCoverFilterFd);
+	}
+
 	use_temporary_dir();
 	install_segv_handler();
 	setup_control_pipes();
-#if SYZ_EXECUTOR_USES_FORK_SERVER
 	receive_handshake();
-#else
+#if !SYZ_EXECUTOR_USES_FORK_SERVER
+	// We receive/reply handshake when fork server is disabled just to simplify runner logic.
+	// It's a bit suboptimal, but no fork server is much slower anyway.
+	reply_execute(0);
 	receive_execute();
 #endif
 	if (flag_coverage) {
@@ -537,10 +623,6 @@ int main(int argc, char** argv)
 
 #if SYZ_EXECUTOR_USES_FORK_SERVER
 	fprintf(stderr, "loop exited with status %d\n", status);
-	// Other statuses happen when fuzzer processes manages to kill loop, e.g. with:
-	// ptrace(PTRACE_SEIZE, 1, 0, 0x100040)
-	if (status != kFailStatus)
-		status = 0;
 	// If an external sandbox process wraps executor, the out pipe will be closed
 	// before the sandbox process exits this will make ipc package kill the sandbox.
 	// As the result sandbox process will exit with exit status 9 instead of the executor
@@ -557,18 +639,18 @@ int main(int argc, char** argv)
 
 // This method can be invoked as many times as one likes - MMAP_FIXED can overwrite the previous
 // mapping without any problems. The only precondition - kOutFd must not be closed.
-static void mmap_output(int size)
+static void mmap_output(uint32 size)
 {
 	if (size <= output_size)
 		return;
 	if (size % SYZ_PAGE_SIZE != 0)
 		failmsg("trying to mmap output area that is not divisible by page size", "page=%d,area=%d", SYZ_PAGE_SIZE, size);
 	uint32* mmap_at = NULL;
-	int fixed_flag = MAP_FIXED;
 	if (output_data == NULL) {
 		if (kAddressSanitizer) {
-			// Don't use fixed address under ASAN b/c it may overlap with shadow.
-			fixed_flag = 0;
+			// ASan allows user mappings only at some specific address ranges,
+			// so we don't randomize. But we also assume 64-bits and that we are running tests.
+			mmap_at = (uint32*)0x7f0000000000ull;
 		} else {
 			// It's the first time we map output region - generate its location.
 			// The output region is the only thing in executor process for which consistency matters.
@@ -587,11 +669,11 @@ static void mmap_output(int size)
 		mmap_at = (uint32*)((char*)(output_data) + output_size);
 	}
 	void* result = mmap(mmap_at, size - output_size,
-			    PROT_READ | PROT_WRITE, MAP_SHARED | fixed_flag, kOutFd, output_size);
+			    PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, kOutFd, output_size);
 	if (result == MAP_FAILED || (mmap_at && result != mmap_at))
 		failmsg("mmap of output file failed", "want %p, got %p", mmap_at, result);
 	if (output_data == NULL)
-		output_data = static_cast<uint32*>(result);
+		output_data = static_cast<OutputData*>(result);
 	output_size = size;
 }
 
@@ -609,33 +691,28 @@ void setup_control_pipes()
 		fail("dup2(2, 0) failed");
 }
 
-void parse_env_flags(uint64 flags)
+void parse_env_flags(rpc::ExecEnv flags)
 {
 	// Note: Values correspond to ordering in pkg/ipc/ipc.go, e.g. FlagSandboxNamespace
-	flag_debug = flags & (1 << 0);
-	flag_coverage = flags & (1 << 1);
-	if (flags & (1 << 2))
-		flag_sandbox_setuid = true;
-	else if (flags & (1 << 3))
-		flag_sandbox_namespace = true;
-	else if (flags & (1 << 4))
-		flag_sandbox_android = true;
-	else
-		flag_sandbox_none = true;
-	flag_extra_coverage = flags & (1 << 5);
-	flag_net_injection = flags & (1 << 6);
-	flag_net_devices = flags & (1 << 7);
-	flag_net_reset = flags & (1 << 8);
-	flag_cgroups = flags & (1 << 9);
-	flag_close_fds = flags & (1 << 10);
-	flag_devlink_pci = flags & (1 << 11);
-	flag_vhci_injection = flags & (1 << 12);
-	flag_wifi = flags & (1 << 13);
-	flag_delay_kcov_mmap = flags & (1 << 14);
-	flag_nic_vf = flags & (1 << 15);
+	flag_debug = (bool)(flags & rpc::ExecEnv::Debug);
+	flag_coverage = (bool)(flags & rpc::ExecEnv::Signal);
+	flag_sandbox_none = (bool)(flags & rpc::ExecEnv::SandboxNone);
+	flag_sandbox_setuid = (bool)(flags & rpc::ExecEnv::SandboxSetuid);
+	flag_sandbox_namespace = (bool)(flags & rpc::ExecEnv::SandboxNamespace);
+	flag_sandbox_android = (bool)(flags & rpc::ExecEnv::SandboxAndroid);
+	flag_extra_coverage = (bool)(flags & rpc::ExecEnv::ExtraCover);
+	flag_net_injection = (bool)(flags & rpc::ExecEnv::EnableTun);
+	flag_net_devices = (bool)(flags & rpc::ExecEnv::EnableNetDev);
+	flag_net_reset = (bool)(flags & rpc::ExecEnv::EnableNetReset);
+	flag_cgroups = (bool)(flags & rpc::ExecEnv::EnableCgroups);
+	flag_close_fds = (bool)(flags & rpc::ExecEnv::EnableCloseFds);
+	flag_devlink_pci = (bool)(flags & rpc::ExecEnv::EnableDevlinkPCI);
+	flag_vhci_injection = (bool)(flags & rpc::ExecEnv::EnableVhciInjection);
+	flag_wifi = (bool)(flags & rpc::ExecEnv::EnableWifi);
+	flag_delay_kcov_mmap = (bool)(flags & rpc::ExecEnv::DelayKcovMmap);
+	flag_nic_vf = (bool)(flags & rpc::ExecEnv::EnableNicVF);
 }
 
-#if SYZ_EXECUTOR_USES_FORK_SERVER
 void receive_handshake()
 {
 	handshake_req req = {};
@@ -649,40 +726,22 @@ void receive_handshake()
 #endif
 	parse_env_flags(req.flags);
 	procid = req.pid;
-	if (!req.cover_filter_size)
-		return;
-	// A random address for bitmap. Don't corrupt output_data.
-	cover_filter.emplace("syz-cover-filer", reinterpret_cast<void*>(0x110f230000ull));
-	std::vector<uint64> pcs(req.cover_filter_size);
-	const ssize_t filter_size = req.cover_filter_size * sizeof(uint64);
-	n = read(kInPipeFd, &pcs[0], filter_size);
-	if (n != filter_size)
-		failmsg("failed to read cover filter", "read=%zu", n);
-	for (auto pc : pcs)
-		cover_filter->Insert(pc);
-	cover_filter->Seal();
 }
 
-void reply_handshake()
-{
-	handshake_reply reply = {};
-	reply.magic = kOutMagic;
-	if (write(kOutPipeFd, &reply, sizeof(reply)) != sizeof(reply))
-		fail("control pipe write failed");
-}
-#endif
-
 static execute_req last_execute_req;
 
 void receive_execute()
 {
 	execute_req& req = last_execute_req;
-	if (read(kInPipeFd, &req, sizeof(req)) != (ssize_t)sizeof(req))
-		fail("control pipe read failed");
+	ssize_t n = read(kInPipeFd, &req, sizeof(req));
+	if (n != (ssize_t)sizeof(req))
+		failmsg("control pipe read failed", "read=%zd want=%zd", n, sizeof(req));
 	if (req.magic != kInMagic)
 		failmsg("bad execute request magic", "magic=0x%llx", req.magic);
+	request_id = req.id;
 	parse_env_flags(req.env_flags);
 	procid = req.pid;
+	request_id = req.id;
 	syscall_timeout_ms = req.syscall_timeout_ms;
 	program_timeout_ms = req.program_timeout_ms;
 	slowdown_scale = req.slowdown_scale;
@@ -691,12 +750,14 @@ void receive_execute()
 	flag_dedup_cover = req.exec_flags & (1 << 2);
 	flag_comparisons = req.exec_flags & (1 << 3);
 	flag_threaded = req.exec_flags & (1 << 4);
+	all_call_signal = req.all_call_signal;
+	all_extra_signal = req.all_extra_signal;
 
 	debug("[%llums] exec opts: procid=%llu threaded=%d cover=%d comps=%d dedup=%d signal=%d "
-	      " timeouts=%llu/%llu/%llu\n",
+	      " sandbox=%d/%d/%d/%d timeouts=%llu/%llu/%llu\n",
 	      current_time_ms() - start_time_ms, procid, flag_threaded, flag_collect_cover,
-	      flag_comparisons, flag_dedup_cover, flag_collect_signal, syscall_timeout_ms,
-	      program_timeout_ms, slowdown_scale);
+	      flag_comparisons, flag_dedup_cover, flag_collect_signal, flag_sandbox_none, flag_sandbox_setuid,
+	      flag_sandbox_namespace, flag_sandbox_android, syscall_timeout_ms, program_timeout_ms, slowdown_scale);
 	if (syscall_timeout_ms == 0 || program_timeout_ms <= syscall_timeout_ms || slowdown_scale == 0)
 		failmsg("bad timeouts", "syscall=%llu, program=%llu, scale=%llu",
 			syscall_timeout_ms, program_timeout_ms, slowdown_scale);
@@ -707,13 +768,9 @@ bool cover_collection_required()
 	return flag_coverage && (flag_collect_signal || flag_collect_cover || flag_comparisons);
 }
 
-void reply_execute(int status)
+void reply_execute(uint32 status)
 {
-	execute_reply reply = {};
-	reply.magic = kOutMagic;
-	reply.done = true;
-	reply.status = status;
-	if (write(kOutPipeFd, &reply, sizeof(reply)) != sizeof(reply))
+	if (write(kOutPipeFd, &status, sizeof(status)) != sizeof(status))
 		fail("control pipe write failed");
 }
 
@@ -736,11 +793,17 @@ void execute_one()
 {
 	in_execute_one = true;
 	realloc_output_data();
-	output_pos = output_data;
-	write_output(0); // Number of executed syscalls (updated later).
+	output_builder.emplace(output_data, output_size);
 	uint64 start = current_time_ms();
 	uint8* input_pos = input_data;
 
+#if GOOS_linux
+	char buf[64];
+	// Linux TASK_COMM_LEN is only 16, so the name needs to be compact.
+	snprintf(buf, sizeof(buf), "syz.%llu.%llu", procid, request_id);
+	prctl(PR_SET_NAME, buf);
+#endif
+
 	if (cover_collection_required()) {
 		if (!flag_threaded)
 			cover_enable(&threads[0].cov, flag_comparisons, false);
@@ -991,55 +1054,96 @@ thread_t* schedule_call(int call_index, int call_num, uint64 copyout_index, uint
 }
 
 template <typename cover_data_t>
-void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover_count_pos)
+uint32 write_signal(flatbuffers::FlatBufferBuilder& fbb, cover_t* cov, bool all)
 {
 	// Write out feedback signals.
 	// Currently it is code edges computed as xor of two subsequent basic block PCs.
+	fbb.StartVector(0, sizeof(uint64));
 	cover_data_t* cover_data = (cover_data_t*)(cov->data + cov->data_offset);
-	if (flag_collect_signal) {
-		uint32 nsig = 0;
-		cover_data_t prev_pc = 0;
-		bool prev_filter = true;
-		for (uint32 i = 0; i < cov->size; i++) {
-			cover_data_t pc = cover_data[i] + cov->pc_offset;
-			uint64 sig = pc;
-			if (is_kernel_pc(pc) < 0)
-				exitf("got bad pc: 0x%llx", (uint64)pc);
-			if (use_cover_edges(pc)) {
-				// Only hash the lower 12 bits so the hash is independent of any module offsets.
-				const uint64 mask = (1 << 12) - 1;
-				sig ^= hash(prev_pc & mask) & mask;
-			}
-			bool filter = coverage_filter(pc);
-			// Ignore the edge only if both current and previous PCs are filtered out
-			// to capture all incoming and outcoming edges into the interesting code.
-			bool ignore = !filter && !prev_filter;
-			prev_pc = pc;
-			prev_filter = filter;
-			if (ignore || dedup(sig))
-				continue;
-			write_output_64(sig);
-			nsig++;
+	uint32 nsig = 0;
+	cover_data_t prev_pc = 0;
+	bool prev_filter = true;
+	for (uint32 i = 0; i < cov->size; i++) {
+		cover_data_t pc = cover_data[i] + cov->pc_offset;
+		if (is_kernel_pc(pc) < 0)
+			exitf("got bad pc: 0x%llx", (uint64)pc);
+		uint64 sig = pc;
+		if (use_cover_edges(pc)) {
+			// Only hash the lower 12 bits so the hash is independent of any module offsets.
+			const uint64 mask = (1 << 12) - 1;
+			sig ^= hash(prev_pc & mask) & mask;
 		}
-		// Write out number of signals.
-		*signal_count_pos = nsig;
+		bool filter = coverage_filter(pc);
+		// Ignore the edge only if both current and previous PCs are filtered out
+		// to capture all incoming and outcoming edges into the interesting code.
+		bool ignore = !filter && !prev_filter;
+		prev_pc = pc;
+		prev_filter = filter;
+		if (ignore || dedup(sig))
+			continue;
+		if (!all && max_signal && max_signal->Contains(sig))
+			continue;
+		fbb.PushElement(uint64(sig));
+		nsig++;
 	}
+	return fbb.EndVector(nsig);
+}
 
-	if (flag_collect_cover) {
-		// Write out real coverage (basic block PCs).
-		uint32 cover_size = cov->size;
-		if (flag_dedup_cover) {
-			cover_data_t* end = cover_data + cover_size;
-			cover_unprotect(cov);
-			std::sort(cover_data, end);
-			cover_size = std::unique(cover_data, end) - cover_data;
-			cover_protect(cov);
-		}
-		// Always sent uint64 PCs.
-		for (uint32 i = 0; i < cover_size; i++)
-			write_output_64(cover_data[i] + cov->pc_offset);
-		*cover_count_pos = cover_size;
+template <typename cover_data_t>
+uint32 write_cover(flatbuffers::FlatBufferBuilder& fbb, cover_t* cov)
+{
+	uint32 cover_size = cov->size;
+	cover_data_t* cover_data = (cover_data_t*)(cov->data + cov->data_offset);
+	if (flag_dedup_cover) {
+		cover_data_t* end = cover_data + cover_size;
+		cover_unprotect(cov);
+		std::sort(cover_data, end);
+		cover_size = std::unique(cover_data, end) - cover_data;
+		cover_protect(cov);
+	}
+	fbb.StartVector(cover_size, sizeof(uint64));
+	for (uint32 i = 0; i < cover_size; i++)
+		fbb.PushElement(uint64(cover_data[i] + cov->pc_offset));
+	return fbb.EndVector(cover_size);
+}
+
+uint32 write_comparisons(flatbuffers::FlatBufferBuilder& fbb, cover_t* cov)
+{
+	// Collect only the comparisons
+	uint64 ncomps = *(uint64_t*)cov->data;
+	kcov_comparison_t* cov_start = (kcov_comparison_t*)(cov->data + sizeof(uint64));
+	if ((char*)(cov_start + ncomps) > cov->data_end)
+		failmsg("too many comparisons", "ncomps=%llu", ncomps);
+	cover_unprotect(cov);
+	rpc::ComparisonRaw* start = (rpc::ComparisonRaw*)cov_start;
+	rpc::ComparisonRaw* end = start;
+	// We will convert kcov_comparison_t to ComparisonRaw inplace
+	// and potentially double number of elements, so ensure we have space.
+	static_assert(sizeof(kcov_comparison_t) >= 2 * sizeof(rpc::ComparisonRaw));
+	for (uint32 i = 0; i < ncomps; i++) {
+		auto [raw, swap, ok] = convert(cov_start[i]);
+		if (!ok)
+			continue;
+		*end++ = raw;
+		// Compiler marks comparisons with a const with KCOV_CMP_CONST flag.
+		// If the flag is set, then we need to export only one order of operands
+		// (because only one of them could potentially come from the input).
+		// If the flag is not set, then we export both orders as both operands
+		// could come from the input.
+		if (swap)
+			*end++ = {raw.op2(), raw.op1()};
 	}
+	std::sort(start, end, [](rpc::ComparisonRaw a, rpc::ComparisonRaw b) -> bool {
+		if (a.op1() != b.op1())
+			return a.op1() < b.op1();
+		return a.op2() < b.op2();
+	});
+	ncomps = std::unique(start, end, [](rpc::ComparisonRaw a, rpc::ComparisonRaw b) -> bool {
+			 return a.op1() == b.op1() && a.op2() == b.op2();
+		 }) -
+		 start;
+	cover_protect(cov);
+	return fbb.CreateVectorOfStructs(start, ncomps).o;
 }
 
 bool coverage_filter(uint64 pc)
@@ -1109,56 +1213,67 @@ void copyout_call_results(thread_t* th)
 	}
 }
 
+void write_output(int index, cover_t* cov, rpc::CallFlag flags, uint32 error, bool all_signal)
+{
+	auto& fbb = *output_builder;
+	const uint32 start_size = output_builder->GetSize();
+	(void)start_size;
+	uint32 signal_off = 0;
+	uint32 cover_off = 0;
+	uint32 comps_off = 0;
+	if (flag_comparisons) {
+		comps_off = write_comparisons(fbb, cov);
+	} else {
+		if (flag_collect_signal) {
+			if (is_kernel_64_bit)
+				signal_off = write_signal<uint64>(fbb, cov, all_signal);
+			else
+				signal_off = write_signal<uint32>(fbb, cov, all_signal);
+		}
+		if (flag_collect_cover) {
+			if (is_kernel_64_bit)
+				cover_off = write_cover<uint64>(fbb, cov);
+			else
+				cover_off = write_cover<uint32>(fbb, cov);
+		}
+	}
+
+	rpc::CallInfoRawBuilder builder(*output_builder);
+	builder.add_flags(flags);
+	builder.add_error(error);
+	if (signal_off)
+		builder.add_signal(signal_off);
+	if (cover_off)
+		builder.add_cover(cover_off);
+	if (comps_off)
+		builder.add_comps(comps_off);
+	auto off = builder.Finish();
+	uint32 slot = output_data->completed.load(std::memory_order_relaxed);
+	if (slot >= kMaxCalls)
+		failmsg("too many calls in output", "slot=%d", slot);
+	auto& call = output_data->calls[slot];
+	call.index = index;
+	call.offset = off;
+	output_data->consumed.store(output_builder->GetSize(), std::memory_order_release);
+	output_data->completed.store(slot + 1, std::memory_order_release);
+	debug_verbose("out #%u: index=%u errno=%d flags=0x%x total_size=%u\n",
+		      slot + 1, index, error, static_cast<unsigned>(flags), call.data_size - start_size);
+}
+
 void write_call_output(thread_t* th, bool finished)
 {
 	uint32 reserrno = ENOSYS;
-	const bool blocked = finished && th != last_scheduled;
-	uint32 call_flags = call_flag_executed | (blocked ? call_flag_blocked : 0);
+	rpc::CallFlag flags = rpc::CallFlag::Executed;
+	if (finished && th != last_scheduled)
+		flags |= rpc::CallFlag::Blocked;
 	if (finished) {
 		reserrno = th->res != -1 ? 0 : th->reserrno;
-		call_flags |= call_flag_finished |
-			      (th->fault_injected ? call_flag_fault_injected : 0);
+		flags |= rpc::CallFlag::Finished;
+		if (th->fault_injected)
+			flags |= rpc::CallFlag::FaultInjected;
 	}
-	write_output(kOutMagic);
-	write_output(th->call_index);
-	write_output(th->call_num);
-	write_output(reserrno);
-	write_output(call_flags);
-	uint32* signal_count_pos = write_output(0); // filled in later
-	uint32* cover_count_pos = write_output(0); // filled in later
-	uint32* comps_count_pos = write_output(0); // filled in later
-
-	if (flag_comparisons) {
-		// Collect only the comparisons
-		uint64 ncomps = *(uint64_t*)th->cov.data;
-		kcov_comparison_t* start = (kcov_comparison_t*)(th->cov.data + sizeof(uint64));
-		kcov_comparison_t* end = start + ncomps;
-		if ((char*)end > th->cov.data_end)
-			failmsg("too many comparisons", "ncomps=%llu", ncomps);
-		cover_unprotect(&th->cov);
-		std::sort(start, end);
-		ncomps = std::unique(start, end) - start;
-		cover_protect(&th->cov);
-		uint32 comps_size = 0;
-		for (uint32 i = 0; i < ncomps; ++i) {
-			if (start[i].ignore())
-				continue;
-			comps_size++;
-			start[i].write();
-		}
-		// Write out number of comparisons.
-		*comps_count_pos = comps_size;
-	} else if (flag_collect_signal || flag_collect_cover) {
-		if (is_kernel_64_bit)
-			write_coverage_signal<uint64>(&th->cov, signal_count_pos, cover_count_pos);
-		else
-			write_coverage_signal<uint32>(&th->cov, signal_count_pos, cover_count_pos);
-	}
-	debug_verbose("out #%u: index=%u num=%u errno=%d finished=%d blocked=%d sig=%u cover=%u comps=%llu\n",
-		      completed, th->call_index, th->call_num, reserrno, finished, blocked,
-		      *signal_count_pos, *cover_count_pos, *comps_count_pos);
-	completed++;
-	write_completed(completed);
+	bool all_signal = th->call_index < 64 ? (all_call_signal & (1ull << th->call_index)) : false;
+	write_output(th->call_index, &th->cov, flags, reserrno, all_signal);
 }
 
 void write_extra_output()
@@ -1168,22 +1283,7 @@ void write_extra_output()
 	cover_collect(&extra_cov);
 	if (!extra_cov.size)
 		return;
-	write_output(kOutMagic);
-	write_output(-1); // call index
-	write_output(-1); // call num
-	write_output(999); // errno
-	write_output(0); // call flags
-	uint32* signal_count_pos = write_output(0); // filled in later
-	uint32* cover_count_pos = write_output(0); // filled in later
-	write_output(0); // comps_count_pos
-	if (is_kernel_64_bit)
-		write_coverage_signal<uint64>(&extra_cov, signal_count_pos, cover_count_pos);
-	else
-		write_coverage_signal<uint32>(&extra_cov, signal_count_pos, cover_count_pos);
-	cover_reset(&extra_cov);
-	debug_verbose("extra: sig=%u cover=%u\n", *signal_count_pos, *cover_count_pos);
-	completed++;
-	write_completed(completed);
+	write_output(-1, &extra_cov, rpc::CallFlag::NONE, 997, all_extra_signal);
 }
 
 void thread_create(thread_t* th, int id, bool need_coverage)
@@ -1518,45 +1618,42 @@ uint64 read_input(uint8** input_posp, bool peek)
 	return v;
 }
 
-uint32* write_output(uint32 v)
-{
-	if (output_pos < output_data || (char*)output_pos >= (char*)output_data + output_size)
-		failmsg("output overflow", "pos=%p region=[%p:%p]",
-			output_pos, output_data, (char*)output_data + output_size);
-	*output_pos = v;
-	return output_pos++;
-}
-
-uint32* write_output_64(uint64 v)
-{
-	if (output_pos < output_data || (char*)(output_pos + 1) >= (char*)output_data + output_size)
-		failmsg("output overflow", "pos=%p region=[%p:%p]",
-			output_pos, output_data, (char*)output_data + output_size);
-	*(uint64*)output_pos = v;
-	output_pos += 2;
-	return output_pos;
-}
-
-void write_completed(uint32 completed)
+std::tuple<rpc::ComparisonRaw, bool, bool> convert(const kcov_comparison_t& cmp)
 {
-	__atomic_store_n(output_data, completed, __ATOMIC_RELEASE);
-}
+	if (cmp.type > (KCOV_CMP_CONST | KCOV_CMP_SIZE_MASK))
+		failmsg("invalid kcov comp type", "type=%llx", cmp.type);
+	uint64 arg1 = cmp.arg1;
+	uint64 arg2 = cmp.arg2;
+	// Comparisons with 0 are not interesting, fuzzer should be able to guess 0's without help.
+	if (arg1 == 0 && (arg2 == 0 || (cmp.type & KCOV_CMP_CONST)))
+		return {};
+	// Successful comparison is not interesting.
+	if (arg1 == arg2)
+		return {};
 
-void kcov_comparison_t::write()
-{
-	if (type > (KCOV_CMP_CONST | KCOV_CMP_SIZE_MASK))
-		failmsg("invalid kcov comp type", "type=%llx", type);
-
-	// Write order: type arg1 arg2 pc.
-	write_output((uint32)type);
-
-	// KCOV converts all arguments of size x first to uintx_t and then to
-	// uint64. We want to properly extend signed values, e.g we want
-	// int8 c = 0xfe to be represented as 0xfffffffffffffffe.
-	// Note that uint8 c = 0xfe will be represented the same way.
-	// This is ok because during hints processing we will anyways try
-	// the value 0x00000000000000fe.
-	switch (type & KCOV_CMP_SIZE_MASK) {
+	// This can be a pointer (assuming 64-bit kernel).
+	// First of all, we want avert fuzzer from our output region.
+	// Without this fuzzer manages to discover and corrupt it.
+	uint64 out_start = (uint64)output_data;
+	uint64 out_end = out_start + output_size;
+	if (arg1 >= out_start && arg1 <= out_end)
+		return {};
+	if (arg2 >= out_start && arg2 <= out_end)
+		return {};
+	// Filter out kernel physical memory addresses.
+	// These are internal kernel comparisons and should not be interesting.
+	bool kptr1 = is_kernel_data(arg1) || is_kernel_pc(arg1) > 0 || arg1 == 0;
+	bool kptr2 = is_kernel_data(arg2) || is_kernel_pc(arg2) > 0 || arg2 == 0;
+	if (kptr1 && kptr2)
+		return {};
+	if (!coverage_filter(cmp.pc))
+		return {};
+
+	// KCOV converts all arguments of size x first to uintx_t and then to uint64.
+	// We want to properly extend signed values, e.g we want int8 c = 0xfe to be represented
+	// as 0xfffffffffffffffe. Note that uint8 c = 0xfe will be represented the same way.
+	// This is ok because during hints processing we will anyways try the value 0x00000000000000fe.
+	switch (cmp.type & KCOV_CMP_SIZE_MASK) {
 	case KCOV_CMP_SIZE1:
 		arg1 = (uint64)(long long)(signed char)arg1;
 		arg2 = (uint64)(long long)(signed char)arg2;
@@ -1570,88 +1667,10 @@ void kcov_comparison_t::write()
 		arg2 = (uint64)(long long)(int)arg2;
 		break;
 	}
-	bool is_size_8 = (type & KCOV_CMP_SIZE_MASK) == KCOV_CMP_SIZE8;
-	if (!is_size_8) {
-		write_output((uint32)arg1);
-		write_output((uint32)arg2);
-	} else {
-		write_output_64(arg1);
-		write_output_64(arg2);
-	}
-}
 
-bool kcov_comparison_t::ignore() const
-{
-	// Comparisons with 0 are not interesting, fuzzer should be able to guess 0's without help.
-	if (arg1 == 0 && (arg2 == 0 || (type & KCOV_CMP_CONST)))
-		return true;
-	// This can be a pointer (assuming 64-bit kernel).
-	// First of all, we want avert fuzzer from our output region.
-	// Without this fuzzer manages to discover and corrupt it.
-	uint64 out_start = (uint64)output_data;
-	uint64 out_end = out_start + output_size;
-	if (arg1 >= out_start && arg1 <= out_end)
-		return true;
-	if (arg2 >= out_start && arg2 <= out_end)
-		return true;
-	// Filter out kernel physical memory addresses.
-	// These are internal kernel comparisons and should not be interesting.
-	bool kptr1 = is_kernel_data(arg1) || is_kernel_pc(arg1) > 0 || arg1 == 0;
-	bool kptr2 = is_kernel_data(arg2) || is_kernel_pc(arg2) > 0 || arg2 == 0;
-	if (kptr1 && kptr2)
-		return true;
-	return !coverage_filter(pc);
-}
-
-bool kcov_comparison_t::operator==(const struct kcov_comparison_t& other) const
-{
-	// We don't check for PC equality now, because it is not used.
-	return type == other.type && arg1 == other.arg1 && arg2 == other.arg2;
-}
-
-bool kcov_comparison_t::operator<(const struct kcov_comparison_t& other) const
-{
-	if (type != other.type)
-		return type < other.type;
-	if (arg1 != other.arg1)
-		return arg1 < other.arg1;
-	// We don't check for PC equality now, because it is not used.
-	return arg2 < other.arg2;
-}
-
-void setup_features(char** enable, int n)
-{
-	// This does any one-time setup for the requested features on the machine.
-	// Note: this can be called multiple times and must be idempotent.
-	flag_debug = true;
-	if (n != 1)
-		fail("setup: more than one feature");
-	char* endptr = nullptr;
-	auto feature = static_cast<rpc::Feature>(strtoull(enable[0], &endptr, 10));
-	if (endptr == enable[0] || (feature > rpc::Feature::ANY) ||
-	    __builtin_popcountll(static_cast<uint64>(feature)) > 1)
-		failmsg("setup: failed to parse feature", "feature='%s'", enable[0]);
-	if (feature == rpc::Feature::NONE) {
-#if SYZ_HAVE_FEATURES
-		setup_sysctl();
-		setup_cgroups();
-#endif
-#if SYZ_HAVE_SETUP_EXT
-		// This can be defined in common_ext.h.
-		setup_ext();
-#endif
-		return;
-	}
-	for (size_t i = 0; i < sizeof(features) / sizeof(features[0]); i++) {
-		if (features[i].id == feature) {
-			const char* reason = features[i].setup();
-			if (reason)
-				fail(reason);
-			return;
-		}
-	}
-	// Note: pkg/host knows about this error message.
-	fail("feature setup is not needed");
+	// Prog package expects operands in the opposite order (first operand may come from the input,
+	// the second operand was computed in the kernel), so swap operands.
+	return {{arg2, arg1}, !(cmp.type & KCOV_CMP_CONST), true};
 }
 
 void failmsg(const char* err, const char* msg, ...)
diff --git a/executor/executor_linux.h b/executor/executor_linux.h
index 30c10a615..cb980838f 100644
--- a/executor/executor_linux.h
+++ b/executor/executor_linux.h
@@ -279,22 +279,15 @@ NORETURN void doexit_thread(int status)
 }
 
 #define SYZ_HAVE_KCSAN 1
-static void setup_kcsan_filterlist(char** frames, int nframes, bool suppress)
+static void setup_kcsan_filter(const std::vector<std::string>& frames)
 {
+	if (frames.empty())
+		return;
 	int fd = open("/sys/kernel/debug/kcsan", O_WRONLY);
 	if (fd == -1)
 		fail("failed to open kcsan debugfs file");
-
-	printf("%s KCSAN reports in functions: ",
-	       suppress ? "suppressing" : "only showing");
-	if (!suppress)
-		dprintf(fd, "whitelist\n");
-	for (int i = 0; i < nframes; ++i) {
-		printf("'%s' ", frames[i]);
-		dprintf(fd, "!%s\n", frames[i]);
-	}
-	printf("\n");
-
+	for (const auto& frame : frames)
+		dprintf(fd, "!%s\n", frame.c_str());
 	close(fd);
 }
 
diff --git a/executor/executor_runner.h b/executor/executor_runner.h
new file mode 100644
index 000000000..55a6c422c
--- /dev/null
+++ b/executor/executor_runner.h
@@ -0,0 +1,801 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <deque>
+#include <iomanip>
+#include <memory>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+inline std::ostream& operator<<(std::ostream& ss, const rpc::ExecRequestRawT& req)
+{
+	return ss << "id=" << req.id
+		  << " flags=0x" << std::hex << static_cast<uint64>(req.flags)
+		  << " env_flags=0x" << std::hex << static_cast<uint64>(req.exec_opts->env_flags())
+		  << " exec_flags=0x" << std::hex << static_cast<uint64>(req.exec_opts->exec_flags())
+		  << " prod_data=" << std::dec << req.prog_data.size()
+		  << "\n";
+}
+
+// Proc represents one subprocess that runs tests (re-execed syz-executor with 'exec' argument).
+// The object is persistent and re-starts subprocess when it crashes.
+class Proc
+{
+public:
+	Proc(Connection& conn, const char* bin, int id, int max_signal_fd, int cover_filter_fd,
+	     uint32 slowdown, uint32 syscall_timeout_ms, uint32 program_timeout_ms)
+	    : conn_(conn),
+	      bin_(bin),
+	      id_(id),
+	      max_signal_fd_(max_signal_fd),
+	      cover_filter_fd_(cover_filter_fd),
+	      slowdown_(slowdown),
+	      syscall_timeout_ms_(syscall_timeout_ms),
+	      program_timeout_ms_(program_timeout_ms),
+	      req_shmem_(kMaxInput),
+	      resp_shmem_(kMaxOutput),
+	      resp_mem_(static_cast<OutputData*>(resp_shmem_.Mem()))
+	{
+		Start();
+	}
+
+	bool Execute(rpc::ExecRequestRawT& msg)
+	{
+		if (state_ != State::Started && state_ != State::Idle)
+			return false;
+		if (msg_)
+			fail("already have pending msg");
+		if (wait_start_)
+			wait_end_ = current_time_ms();
+		if (state_ == State::Idle &&
+		    (exec_env_ != msg.exec_opts->env_flags() || sandbox_arg_ != msg.exec_opts->sandbox_arg()))
+			Restart();
+		attempts_ = 0;
+		msg_ = std::move(msg);
+		if (state_ == State::Started)
+			Handshake();
+		else
+			Execute();
+		return true;
+	}
+
+	void Arm(Select& select)
+	{
+		select.Arm(resp_pipe_);
+		select.Arm(stdout_pipe_);
+	}
+
+	void Ready(Select& select, uint64 now, bool out_of_requests)
+	{
+		if (state_ == State::Handshaking || state_ == State::Executing) {
+			// Check if the subprocess has hung.
+#if SYZ_EXECUTOR_USES_FORK_SERVER
+			// Child process has an internal timeout and protects against most hangs when
+			// fork server is enabled, so we use quite large timeout. Child process can be slow
+			// due to global locks in namespaces and other things, so let's better wait than
+			// report false misleading crashes.
+			uint64 timeout = 2 * program_timeout_ms_;
+#else
+			uint64 timeout = program_timeout_ms_;
+#endif
+			// Sandbox setup can take significant time.
+			if (state_ == State::Handshaking)
+				timeout = 60 * 1000 * slowdown_;
+			if (now > exec_start_ + timeout) {
+				Restart();
+				return;
+			}
+		}
+
+		if (select.Ready(stdout_pipe_) && !ReadOutput()) {
+#if SYZ_EXECUTOR_USES_FORK_SERVER
+			// In non-forking mode the subprocess exits after test execution
+			// and the pipe read fails with EOF, so we rely on the resp_pipe_ instead.
+			Restart();
+			return;
+#endif
+		}
+		if (select.Ready(resp_pipe_) && !ReadResponse(out_of_requests)) {
+			Restart();
+			return;
+		}
+		return;
+	}
+
+private:
+	enum State : uint8 {
+		// The process has just started.
+		Started,
+		// We sent the process env flags and waiting for handshake reply.
+		Handshaking,
+		// Handshaked and ready to execute programs.
+		Idle,
+		// Currently executing a test program.
+		Executing,
+	};
+
+	Connection& conn_;
+	const char* const bin_;
+	const int id_;
+	const int max_signal_fd_;
+	const int cover_filter_fd_;
+	const uint32 slowdown_;
+	const uint32 syscall_timeout_ms_;
+	const uint32 program_timeout_ms_;
+	State state_ = State::Started;
+	std::optional<Subprocess> process_;
+	ShmemFile req_shmem_;
+	ShmemFile resp_shmem_;
+	OutputData* resp_mem_;
+	int req_pipe_ = -1;
+	int resp_pipe_ = -1;
+	int stdout_pipe_ = -1;
+	rpc::ExecEnv exec_env_ = rpc::ExecEnv::NONE;
+	int64_t sandbox_arg_ = 0;
+	std::optional<rpc::ExecRequestRawT> msg_;
+	std::vector<uint8_t> output_;
+	size_t debug_output_pos_ = 0;
+	uint64 attempts_ = 0;
+	uint64 freshness_ = 0;
+	uint64 exec_start_ = 0;
+	uint64 wait_start_ = 0;
+	uint64 wait_end_ = 0;
+
+	friend std::ostream& operator<<(std::ostream& ss, const Proc& proc)
+	{
+		ss << "id=" << proc.id_
+		   << " state=" << static_cast<int>(proc.state_)
+		   << " freshness=" << proc.freshness_
+		   << " attempts=" << proc.attempts_
+		   << " exec_start=" << current_time_ms() - proc.exec_start_
+		   << "\n";
+		if (proc.msg_)
+			ss << "\tcurrent request: " << *proc.msg_;
+		return ss;
+	}
+
+	void Restart()
+	{
+		debug("proc %d: restarting subprocess, current state %u attempts %llu\n", id_, state_, attempts_);
+		int status = process_->KillAndWait();
+		process_.reset();
+		debug("proc %d: subprocess exit status %d\n", id_, status);
+		if (++attempts_ > 20) {
+			while (ReadOutput())
+				;
+			// Write the subprocess output first. If it contains own SYFAIL,
+			// we want it to be before our SYZFAIL.
+			ssize_t wrote = write(STDERR_FILENO, output_.data(), output_.size());
+			if (wrote != static_cast<ssize_t>(output_.size()))
+				fprintf(stderr, "output truncated: %zd/%zd (errno=%d)\n",
+					wrote, output_.size(), errno);
+			uint64 req_id = msg_ ? msg_->id : -1;
+			failmsg("repeatedly failed to execute the program", "proc=%d req=%lld state=%d status=%d",
+				id_, req_id, state_, status);
+		}
+		// Ignore all other errors.
+		// Without fork server executor can legitimately exit (program contains exit_group),
+		// with fork server the top process can exit with kFailStatus if it wants special handling.
+		if (status != kFailStatus)
+			status = 0;
+		if (FailCurrentRequest(status == kFailStatus)) {
+			// Read out all pening output until EOF.
+			if (IsSet(msg_->flags, rpc::RequestFlag::ReturnOutput)) {
+				while (ReadOutput())
+					;
+			}
+			HandleCompletion(status);
+		} else if (attempts_ > 3)
+			sleep_ms(100 * attempts_);
+		Start();
+	}
+
+	bool FailCurrentRequest(bool failed)
+	{
+		if (state_ == State::Handshaking)
+			return failed && IsSet(msg_->flags, rpc::RequestFlag::ReturnError);
+		if (state_ == State::Executing)
+			return !failed || IsSet(msg_->flags, rpc::RequestFlag::ReturnError);
+		return false;
+	}
+
+	void Start()
+	{
+		state_ = State::Started;
+		freshness_ = 0;
+		int req_pipe[2];
+		if (pipe(req_pipe))
+			fail("pipe failed");
+		int resp_pipe[2];
+		if (pipe(resp_pipe))
+			fail("pipe failed");
+		int stdout_pipe[2];
+		if (pipe(stdout_pipe))
+			fail("pipe failed");
+
+		std::vector<std::pair<int, int>> fds = {
+		    {req_pipe[0], STDIN_FILENO},
+		    {resp_pipe[1], STDOUT_FILENO},
+		    {stdout_pipe[1], STDERR_FILENO},
+		    {req_shmem_.FD(), kInFd},
+		    {resp_shmem_.FD(), kOutFd},
+		    {max_signal_fd_, kMaxSignalFd},
+		    {cover_filter_fd_, kCoverFilterFd},
+		};
+		const char* argv[] = {bin_, "exec", nullptr};
+		process_.emplace(argv, fds);
+
+		Select::Prepare(resp_pipe[0]);
+		Select::Prepare(stdout_pipe[0]);
+
+		close(req_pipe[0]);
+		close(resp_pipe[1]);
+		close(stdout_pipe[1]);
+
+		close(req_pipe_);
+		close(resp_pipe_);
+		close(stdout_pipe_);
+
+		req_pipe_ = req_pipe[1];
+		resp_pipe_ = resp_pipe[0];
+		stdout_pipe_ = stdout_pipe[0];
+
+		if (msg_)
+			Handshake();
+	}
+
+	void Handshake()
+	{
+		if (state_ != State::Started || !msg_)
+			fail("wrong handshake state");
+		debug("proc %d: handshaking to execute request %llu\n", id_, static_cast<uint64>(msg_->id));
+		state_ = State::Handshaking;
+		exec_start_ = current_time_ms();
+		exec_env_ = msg_->exec_opts->env_flags() & ~rpc::ExecEnv::ResetState;
+		sandbox_arg_ = msg_->exec_opts->sandbox_arg();
+		handshake_req req = {
+		    .magic = kInMagic,
+		    .flags = exec_env_,
+		    .pid = static_cast<uint64>(id_),
+		    .sandbox_arg = static_cast<uint64>(sandbox_arg_),
+		};
+		if (write(req_pipe_, &req, sizeof(req)) != sizeof(req)) {
+			debug("request pipe write failed (errno=%d)\n", errno);
+			Restart();
+		}
+	}
+
+	void Execute()
+	{
+		if (state_ != State::Idle || !msg_)
+			fail("wrong state for execute");
+
+		debug("proc %d: start executing request %llu\n", id_, static_cast<uint64>(msg_->id));
+
+		rpc::ExecutingMessageRawT exec;
+		exec.id = msg_->id;
+		exec.proc_id = id_;
+		exec.try_ = attempts_;
+
+		if (wait_start_) {
+			exec.wait_duration = (wait_end_ - wait_start_) * 1000 * 1000;
+			wait_end_ = wait_start_ = 0;
+		}
+
+		rpc::ExecutorMessageRawT raw;
+		raw.msg.Set(std::move(exec));
+		conn_.Send(raw);
+
+		uint64 all_call_signal = 0;
+		bool all_extra_signal = false;
+		for (int32_t call : msg_->all_signal) {
+			// This code assumes that call indices can be represented as bits in uint64 all_call_signal.
+			static_assert(kMaxCalls == 64);
+			if (call < -1 || call >= static_cast<int32_t>(kMaxCalls))
+				failmsg("bad all_signal call", "call=%d", call);
+			if (call < 0)
+				all_extra_signal = true;
+			else
+				all_call_signal |= 1ull << call;
+		}
+		memcpy(req_shmem_.Mem(), msg_->prog_data.data(), std::min(msg_->prog_data.size(), kMaxInput));
+		execute_req req{
+		    .magic = kInMagic,
+		    .id = static_cast<uint64>(msg_->id),
+		    .env_flags = exec_env_,
+		    .exec_flags = static_cast<uint64>(msg_->exec_opts->exec_flags()),
+		    .pid = static_cast<uint64>(id_),
+		    .syscall_timeout_ms = syscall_timeout_ms_,
+		    .program_timeout_ms = program_timeout_ms_,
+		    .slowdown_scale = slowdown_,
+		    .all_call_signal = all_call_signal,
+		    .all_extra_signal = all_extra_signal,
+		};
+		exec_start_ = current_time_ms();
+		state_ = State::Executing;
+		if (write(req_pipe_, &req, sizeof(req)) != sizeof(req)) {
+			debug("request pipe write failed (errno=%d)\n", errno);
+			Restart();
+		}
+	}
+
+	void HandleCompletion(uint32 status)
+	{
+		if (!msg_)
+			fail("don't have executed msg");
+
+		// Note: if the child process crashed during handshake and the request has ReturnError flag,
+		// we have not started executing the request yet.
+		uint64 elapsed = (current_time_ms() - exec_start_) * 1000 * 1000;
+		uint8* prog_data = msg_->prog_data.data();
+		input_data = prog_data;
+		uint32 num_calls = read_input(&prog_data);
+
+		int output_size = resp_mem_->size.load(std::memory_order_relaxed) ?: kMaxOutput;
+		uint32 completed = resp_mem_->completed.load(std::memory_order_relaxed);
+		completed = std::min(completed, kMaxCalls);
+		debug("handle completion: completed=%u output_size=%u\n", completed, output_size);
+		ShmemBuilder fbb(resp_mem_, output_size);
+		auto empty_call = rpc::CreateCallInfoRawDirect(fbb, rpc::CallFlag::NONE, 998);
+		std::vector<flatbuffers::Offset<rpc::CallInfoRaw>> calls(num_calls, empty_call);
+		std::vector<flatbuffers::Offset<rpc::CallInfoRaw>> extra;
+		for (uint32_t i = 0; i < completed; i++) {
+			const auto& call = resp_mem_->calls[i];
+			if (call.index == -1) {
+				extra.push_back(call.offset);
+				continue;
+			}
+			if (call.index < 0 || call.index >= static_cast<int>(num_calls) || call.offset.o > kMaxOutput) {
+				debug("bad call index/offset: proc=%d req=%llu call=%d/%d completed=%d offset=%u",
+				      id_, static_cast<uint64>(msg_->id), call.index, num_calls,
+				      completed, call.offset.o);
+				continue;
+			}
+			calls[call.index] = call.offset;
+		}
+
+		auto prog_info_off = rpc::CreateProgInfoRawDirect(fbb, &calls, &extra, 0, elapsed, freshness_++);
+
+		flatbuffers::Offset<flatbuffers::String> error_off = 0;
+		if (status == kFailStatus)
+			error_off = fbb.CreateString("process failed");
+		flatbuffers::Offset<flatbuffers::Vector<uint8_t>> output_off = 0;
+		if (IsSet(msg_->flags, rpc::RequestFlag::ReturnOutput)) {
+			if (status) {
+				char tmp[128];
+				snprintf(tmp, sizeof(tmp), "\nprocess exited with status %d\n", status);
+				output_.insert(output_.end(), tmp, tmp + strlen(tmp));
+			}
+			output_off = fbb.CreateVector(output_);
+		}
+		auto exec_off = rpc::CreateExecResultRaw(fbb, msg_->id, output_off, error_off, prog_info_off);
+		auto msg_off = rpc::CreateExecutorMessageRaw(fbb, rpc::ExecutorMessagesRaw::ExecResult,
+							     flatbuffers::Offset<void>(exec_off.o));
+		fbb.FinishSizePrefixed(msg_off);
+		auto data = fbb.GetBufferSpan();
+		conn_.Send(data.data(), data.size());
+
+		resp_mem_->Reset();
+		msg_.reset();
+		output_.clear();
+		debug_output_pos_ = 0;
+		state_ = State::Idle;
+#if !SYZ_EXECUTOR_USES_FORK_SERVER
+		if (process_)
+			Restart();
+#endif
+	}
+
+	bool ReadResponse(bool out_of_requests)
+	{
+		uint32 status;
+		ssize_t n = read(resp_pipe_, &status, sizeof(status));
+		if (n == 0) {
+			debug("proc %d: response pipe EOF\n", id_);
+			return false;
+		}
+		if (n != sizeof(status))
+			failmsg("proc resp pipe read failed", "n=%zd", n);
+		if (state_ == State::Handshaking) {
+			debug("proc %d: got handshake reply\n", id_);
+			state_ = State::Idle;
+			Execute();
+		} else if (state_ == State::Executing) {
+			debug("proc %d: got execute reply\n", id_);
+			HandleCompletion(status);
+			if (out_of_requests)
+				wait_start_ = current_time_ms();
+		} else {
+			debug("got data on response pipe in wrong state %d\n", state_);
+			return false;
+		}
+		return true;
+	}
+
+	bool ReadOutput()
+	{
+		const size_t kChunk = 1024;
+		output_.resize(output_.size() + kChunk);
+		ssize_t n = read(stdout_pipe_, output_.data() + output_.size() - kChunk, kChunk);
+		output_.resize(output_.size() - kChunk + std::max<ssize_t>(n, 0));
+		if (n < 0) {
+			if (errno == EINTR || errno == EAGAIN)
+				return true;
+			fail("proc stdout read failed");
+		}
+		if (n == 0) {
+			debug("proc %d: output pipe EOF\n", id_);
+			return false;
+		}
+		if (flag_debug) {
+			output_.resize(output_.size() + 1);
+			debug("proc %d: got output: %s\n", id_, output_.data() + debug_output_pos_);
+			output_.resize(output_.size() - 1);
+			debug_output_pos_ = output_.size();
+		}
+		return true;
+	}
+};
+
+// Runner manages a set of test subprocesses (Proc's), receives new test requests from the manager,
+// and dispatches them to subprocesses.
+class Runner
+{
+public:
+	Runner(Connection& conn, const char* name, const char* bin)
+	    : conn_(conn),
+	      name_(name)
+	{
+		size_t num_procs = Handshake();
+		int max_signal_fd = max_signal_ ? max_signal_->FD() : -1;
+		int cover_filter_fd = cover_filter_ ? cover_filter_->FD() : -1;
+		for (size_t i = 0; i < num_procs; i++)
+			procs_.emplace_back(new Proc(conn, bin, i, max_signal_fd, cover_filter_fd,
+						     slowdown_, syscall_timeout_ms_, program_timeout_ms_));
+
+		for (;;)
+			Loop();
+	}
+
+private:
+	Connection& conn_;
+	const char* const name_;
+	std::optional<CoverFilter> max_signal_;
+	std::optional<CoverFilter> cover_filter_;
+	std::vector<std::unique_ptr<Proc>> procs_;
+	std::deque<rpc::ExecRequestRawT> requests_;
+	std::vector<std::string> leak_frames_;
+	uint32 slowdown_ = 0;
+	uint32 syscall_timeout_ms_ = 0;
+	uint32 program_timeout_ms_ = 0;
+
+	friend std::ostream& operator<<(std::ostream& ss, const Runner& runner)
+	{
+		ss << "procs:\n";
+		for (const auto& proc : runner.procs_)
+			ss << *proc;
+		ss << "\nqueued requests (" << runner.requests_.size() << "):\n";
+		for (const auto& req : runner.requests_)
+			ss << req;
+		return ss;
+	}
+
+	void Loop()
+	{
+		Select select;
+		select.Arm(conn_.FD());
+		for (auto& proc : procs_)
+			proc->Arm(select);
+		// Wait for ready host connection and subprocess pipes.
+		// Timeout is for terminating hanged subprocesses.
+		select.Wait(1000);
+		uint64 now = current_time_ms();
+
+		if (select.Ready(conn_.FD())) {
+			rpc::HostMessageRawT raw;
+			conn_.Recv(raw);
+			if (auto* msg = raw.msg.AsExecRequest())
+				Handle(*msg);
+			else if (auto* msg = raw.msg.AsSignalUpdate())
+				Handle(*msg);
+			else if (auto* msg = raw.msg.AsStartLeakChecks())
+				Handle(*msg);
+			else if (auto* msg = raw.msg.AsStateRequest())
+				Handle(*msg);
+			else
+				failmsg("unknown host message type", "type=%d", static_cast<int>(raw.msg.type));
+		}
+
+		for (auto& proc : procs_) {
+			proc->Ready(select, now, requests_.empty());
+			if (!requests_.empty()) {
+				if (proc->Execute(requests_.front()))
+					requests_.pop_front();
+			}
+		}
+	}
+
+	size_t Handshake()
+	{
+		rpc::ConnectRequestRawT conn_req;
+		conn_req.name = name_;
+		conn_req.arch = GOARCH;
+		conn_req.git_revision = GIT_REVISION;
+		conn_req.syz_revision = SYZ_REVISION;
+		conn_.Send(conn_req);
+
+		rpc::ConnectReplyRawT conn_reply;
+		conn_.Recv(conn_reply);
+		if (conn_reply.debug)
+			flag_debug = true;
+		debug("connected to manager: procs=%d slowdown=%d syscall_timeout=%u"
+		      " program_timeout=%u features=0x%llx\n",
+		      conn_reply.procs, conn_reply.slowdown, conn_reply.syscall_timeout_ms,
+		      conn_reply.program_timeout_ms, static_cast<uint64>(conn_reply.features));
+		leak_frames_ = conn_reply.leak_frames;
+		slowdown_ = conn_reply.slowdown;
+		syscall_timeout_ms_ = conn_reply.syscall_timeout_ms;
+		program_timeout_ms_ = conn_reply.program_timeout_ms;
+		if (conn_reply.cover)
+			max_signal_.emplace();
+
+		rpc::InfoRequestRawT info_req;
+		info_req.files = ReadFiles(conn_reply.files);
+		info_req.globs = ReadGlobs(conn_reply.globs);
+
+		// This does any one-time setup for the requested features on the machine.
+		// Note: this can be called multiple times and must be idempotent.
+		// is_kernel_64_bit = detect_kernel_bitness();
+#if SYZ_HAVE_FEATURES
+		setup_sysctl();
+		setup_cgroups();
+#endif
+#if SYZ_HAVE_SETUP_EXT
+		// This can be defined in common_ext.h.
+		setup_ext();
+#endif
+		for (const auto& feat : features) {
+			if (!(conn_reply.features & feat.id))
+				continue;
+			debug("setting up feature %s\n", rpc::EnumNameFeature(feat.id));
+			const char* reason = feat.setup();
+			conn_reply.features &= ~feat.id;
+			std::unique_ptr<rpc::FeatureInfoRawT> res(new rpc::FeatureInfoRawT);
+			res->id = feat.id;
+			res->need_setup = true;
+			if (reason) {
+				debug("failed: %s\n", reason);
+				res->reason = reason;
+			}
+			info_req.features.push_back(std::move(res));
+		}
+		for (auto id : rpc::EnumValuesFeature()) {
+			if (!(conn_reply.features & id))
+				continue;
+			std::unique_ptr<rpc::FeatureInfoRawT> res(new rpc::FeatureInfoRawT);
+			res->id = id;
+			res->need_setup = false;
+			info_req.features.push_back(std::move(res));
+		}
+
+#if SYZ_HAVE_KCSAN
+		setup_kcsan_filter(conn_reply.race_frames);
+#endif
+
+		conn_.Send(info_req);
+
+		rpc::InfoReplyRawT info_reply;
+		conn_.Recv(info_reply);
+		debug("received info reply: covfilter=%zu\n", info_reply.cover_filter.size());
+		if (!info_reply.cover_filter.empty()) {
+			cover_filter_.emplace();
+			for (auto pc : info_reply.cover_filter)
+				cover_filter_->Insert(pc);
+		}
+
+		Select::Prepare(conn_.FD());
+		return conn_reply.procs;
+	}
+
+	void Handle(rpc::ExecRequestRawT& msg)
+	{
+		debug("recv exec request %llu: flags=0x%llx env=0x%llx exec=0x%llx size=%zu\n",
+		      static_cast<uint64>(msg.id),
+		      static_cast<uint64>(msg.flags),
+		      static_cast<uint64>(msg.exec_opts->env_flags()),
+		      static_cast<uint64>(msg.exec_opts->exec_flags()),
+		      msg.prog_data.size());
+		if (IsSet(msg.flags, rpc::RequestFlag::IsBinary)) {
+			ExecuteBinary(msg);
+			return;
+		}
+		for (auto& proc : procs_) {
+			if (proc->Execute(msg))
+				return;
+		}
+		requests_.push_back(std::move(msg));
+	}
+
+	void Handle(const rpc::SignalUpdateRawT& msg)
+	{
+		debug("recv signal update: new=%zu drop=%zu\n", msg.new_max.size(), msg.drop_max.size());
+		if (!max_signal_)
+			fail("signal update when no signal filter installed");
+		for (auto pc : msg.new_max)
+			max_signal_->Insert(pc);
+		for (auto pc : msg.drop_max)
+			max_signal_->Remove(pc);
+	}
+
+	void Handle(const rpc::StartLeakChecksRawT& msg)
+	{
+		// TODO: repair leak checking (#4728).
+		debug("recv start leak checks\n");
+	}
+
+	void Handle(const rpc::StateRequestRawT& msg)
+	{
+		// Debug request about our internal state.
+		std::ostringstream ss;
+		ss << *this;
+		const std::string& str = ss.str();
+		rpc::StateResultRawT res;
+		res.data.insert(res.data.begin(), str.data(), str.data() + str.size());
+		rpc::ExecutorMessageRawT raw;
+		raw.msg.Set(std::move(res));
+		conn_.Send(raw);
+	}
+
+	void ExecuteBinary(rpc::ExecRequestRawT& msg)
+	{
+		rpc::ExecutingMessageRawT exec;
+		exec.id = msg.id;
+		rpc::ExecutorMessageRawT raw;
+		raw.msg.Set(std::move(exec));
+		conn_.Send(raw);
+
+		char dir_template[] = "syz-bin-dirXXXXXX";
+		char* dir = mkdtemp(dir_template);
+		if (dir == nullptr)
+			fail("mkdtemp failed");
+		if (chmod(dir, 0777))
+			fail("chmod failed");
+		auto [err, output] = ExecuteBinaryImpl(msg, dir);
+		if (!err.empty()) {
+			char tmp[64];
+			snprintf(tmp, sizeof(tmp), " (errno %d: %s)", errno, strerror(errno));
+			err += tmp;
+		}
+		remove_dir(dir);
+		rpc::ExecResultRawT res;
+		res.id = msg.id;
+		res.error = std::move(err);
+		res.output = std::move(output);
+		raw.msg.Set(std::move(res));
+		conn_.Send(raw);
+	}
+
+	std::tuple<std::string, std::vector<uint8_t>> ExecuteBinaryImpl(rpc::ExecRequestRawT& msg, const char* dir)
+	{
+		// For simplicity we just wait for binary tests to complete blocking everything else.
+		std::string file = std::string(dir) + "/syz-executor";
+		int fd = open(file.c_str(), O_WRONLY | O_CLOEXEC | O_CREAT, 0755);
+		if (fd == -1)
+			return {"binary file creation failed", {}};
+		ssize_t wrote = write(fd, msg.prog_data.data(), msg.prog_data.size());
+		close(fd);
+		if (wrote != static_cast<ssize_t>(msg.prog_data.size()))
+			return {"binary file write failed", {}};
+
+		int stdin_pipe[2];
+		if (pipe(stdin_pipe))
+			fail("pipe failed");
+		int stdout_pipe[2];
+		if (pipe(stdout_pipe))
+			fail("pipe failed");
+
+		const char* argv[] = {file.c_str(), nullptr};
+		std::vector<std::pair<int, int>> fds = {
+		    {stdin_pipe[0], STDIN_FILENO},
+		    {stdout_pipe[1], STDOUT_FILENO},
+		    {stdout_pipe[1], STDERR_FILENO},
+		};
+		Subprocess process(argv, fds);
+
+		close(stdin_pipe[0]);
+		close(stdout_pipe[1]);
+
+		int status = process.WaitAndKill(5 * program_timeout_ms_);
+
+		std::vector<uint8_t> output;
+		for (;;) {
+			const size_t kChunk = 1024;
+			output.resize(output.size() + kChunk);
+			ssize_t n = read(stdout_pipe[0], output.data() + output.size() - kChunk, kChunk);
+			output.resize(output.size() - kChunk + std::max<ssize_t>(n, 0));
+			if (n <= 0)
+				break;
+		}
+		close(stdin_pipe[1]);
+		close(stdout_pipe[0]);
+
+		return {status == kFailStatus ? "process failed" : "", std::move(output)};
+	}
+};
+
+static void SigintHandler(int sig)
+{
+	// GCE VM preemption is signalled as SIGINT, notify syz-manager.
+	exitf("SYZ-EXECUTOR: PREEMPTED");
+}
+
+static void SigchldHandler(int sig)
+{
+	// We need just blocking syscall preemption.
+}
+
+static void SigsegvHandler(int sig, siginfo_t* info, void* ucontext)
+{
+	// Print minimal debugging info we can extract reasonably easy.
+	auto& mctx = static_cast<ucontext_t*>(ucontext)->uc_mcontext;
+	(void)mctx;
+	uintptr_t pc = 0xdeadbeef;
+#if GOOS_linux
+#if GOARCH_amd64
+	pc = mctx.gregs[REG_RIP];
+#elif GOARCH_arm64
+	pc = mctx.pc;
+#endif
+#endif
+	// Print the current function PC so that it's possible to map the failing PC
+	// to a symbol in the binary offline (we usually compile as PIE).
+	failmsg("SIGSEGV", "sig:%d handler:%p pc:%p addr:%p",
+		sig, SigsegvHandler, info->si_addr, reinterpret_cast<void*>(pc));
+}
+
+static void runner(char** argv, int argc)
+{
+	if (argc != 5)
+		fail("usage: syz-executor runner <name> <manager-addr> <manager-port>");
+	const char* const name = argv[2];
+	const char* const manager_addr = argv[3];
+	const char* const manager_port = argv[4];
+
+	struct rlimit rlim;
+	rlim.rlim_cur = rlim.rlim_max = kFdLimit;
+	if (setrlimit(RLIMIT_NOFILE, &rlim))
+		fail("setrlimit(RLIMIT_NOFILE) failed");
+
+	// Ignore all signals we are not interested in.
+	// In particular we want to ignore SIGPIPE, but also everything else since
+	// test processes manage to send random signals using tracepoints with bpf programs.
+	// This is not a bullet-proof protection, but it won't harm either.
+	for (int sig = 0; sig <= 64; sig++)
+		signal(sig, SIG_IGN);
+	if (signal(SIGINT, SigintHandler) == SIG_ERR)
+		fail("signal(SIGINT) failed");
+	if (signal(SIGTERM, SigintHandler) == SIG_ERR)
+		fail("signal(SIGTERM) failed");
+	if (signal(SIGCHLD, SigchldHandler) == SIG_ERR)
+		fail("signal(SIGCHLD) failed");
+	struct sigaction act = {};
+	act.sa_flags = SA_SIGINFO;
+	act.sa_sigaction = SigsegvHandler;
+	if (sigaction(SIGSEGV, &act, nullptr))
+		fail("signal(SIGSEGV) failed");
+	if (sigaction(SIGBUS, &act, nullptr))
+		fail("signal(SIGBUS) failed");
+
+	Connection conn(manager_addr, manager_port);
+	Runner(conn, name, argv[0]);
+}
diff --git a/executor/files.h b/executor/files.h
new file mode 100644
index 000000000..f952a07dc
--- /dev/null
+++ b/executor/files.h
@@ -0,0 +1,85 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <glob.h>
+#include <string.h>
+#include <unistd.h>
+
+static std::vector<std::string> Glob(const std::string& pattern)
+{
+	glob_t buf = {};
+	int res = glob(pattern.c_str(), GLOB_MARK | GLOB_NOSORT, nullptr, &buf);
+	if (res != 0 && res != GLOB_NOMATCH)
+		failmsg("glob failed", "pattern='%s' res=%d", pattern.c_str(), res);
+	std::vector<std::string> files;
+	for (size_t i = 0; i < buf.gl_pathc; i++) {
+		const char* file = buf.gl_pathv[i];
+		if (file[strlen(file) - 1] == '/')
+			continue;
+		files.push_back(file);
+	}
+	globfree(&buf);
+	return files;
+}
+
+static std::unique_ptr<rpc::FileInfoRawT> ReadFile(const std::string& file)
+{
+	auto info = std::make_unique<rpc::FileInfoRawT>();
+	info->name = file;
+	int fd = open(file.c_str(), O_RDONLY);
+	if (fd == -1) {
+		info->exists = errno != EEXIST && errno != ENOENT;
+		info->error = strerror(errno);
+	} else {
+		info->exists = true;
+		for (;;) {
+			constexpr size_t kChunk = 4 << 10;
+			info->data.resize(info->data.size() + kChunk);
+			ssize_t n = read(fd, info->data.data() + info->data.size() - kChunk, kChunk);
+			if (n < 0) {
+				info->error = strerror(errno);
+				break;
+			}
+			info->data.resize(info->data.size() - kChunk + n);
+			if (n == 0)
+				break;
+		}
+		close(fd);
+	}
+	debug("reading file %s: size=%zu exists=%d error=%s\n",
+	      info->name.c_str(), info->data.size(), info->exists, info->error.c_str());
+	return info;
+}
+
+static std::vector<std::unique_ptr<rpc::FileInfoRawT>> ReadFiles(const std::vector<std::string>& files)
+{
+	std::vector<std::unique_ptr<rpc::FileInfoRawT>> results;
+	for (const auto& file : files) {
+		if (!strchr(file.c_str(), '*')) {
+			results.push_back(ReadFile(file));
+			continue;
+		}
+		for (const auto& match : Glob(file))
+			results.push_back(ReadFile(match));
+	}
+	return results;
+}
+
+static std::vector<std::unique_ptr<rpc::GlobInfoRawT>> ReadGlobs(const std::vector<std::string>& patterns)
+{
+	std::vector<std::unique_ptr<rpc::GlobInfoRawT>> results;
+	for (const auto& pattern : patterns) {
+		auto info = std::make_unique<rpc::GlobInfoRawT>();
+		info->name = pattern;
+		info->files = Glob(pattern);
+		results.push_back(std::move(info));
+	}
+	return results;
+}
diff --git a/executor/shmem.h b/executor/shmem.h
index ab9d17300..b7722ff99 100644
--- a/executor/shmem.h
+++ b/executor/shmem.h
@@ -3,6 +3,7 @@
 
 #include <fcntl.h>
 #include <stddef.h>
+#include <stdlib.h>
 #include <sys/mman.h>
 #include <unistd.h>
 
@@ -10,20 +11,22 @@
 class ShmemFile
 {
 public:
-	// Maps shared memory region of size 'size' from a new file 'file', preferably at the address 'preferred'.
-	ShmemFile(const char* file, void* preferred, size_t size)
+	// Maps shared memory region of size 'size' from a new temp file.
+	ShmemFile(size_t size)
 	{
-		fd_ = open(file, O_RDWR | O_CREAT | O_TRUNC, 0600);
+		char file_name[] = "syz.XXXXXX";
+		fd_ = mkstemp(file_name);
 		if (fd_ == -1)
-			failmsg("shmem open failed", "file=%s", file);
-		if (fallocate(fd_, 0, 0, size))
+			failmsg("shmem open failed", "file=%s", file_name);
+		if (posix_fallocate(fd_, 0, size))
 			failmsg("shmem fallocate failed", "size=%zu", size);
-		Mmap(fd_, preferred, size, true);
-		if (unlink(file))
+		Mmap(fd_, nullptr, size, true);
+		if (unlink(file_name))
 			fail("shmem unlink failed");
 	}
 
-	// Maps shared memory region from the file 'fd' in read/write or write-only mode.
+	// Maps shared memory region from the file 'fd' in read/write or write-only mode,
+	// preferably at the address 'preferred'.
 	ShmemFile(int fd, void* preferred, size_t size, bool write)
 	{
 		Mmap(fd, preferred, size, write);
diff --git a/executor/style_test.go b/executor/style_test.go
index e41674d1c..c5f7177b2 100644
--- a/executor/style_test.go
+++ b/executor/style_test.go
@@ -90,9 +90,10 @@ if (foo) {
 			// This detects C89-style variable declarations in the beginning of block in a best-effort manner.
 			// Struct fields look exactly as C89 variable declarations, to filter them out we look for "{"
 			// at the beginning of the line.
+			// nolint: lll
 			pattern: `
 {[^{]*
-\s+((unsigned )?[a-zA-Z][a-zA-Z0-9_]+\s*\*?|(struct )?[a-zA-Z][a-zA-Z0-9_]+\*)\s+([a-zA-Z][a-zA-Z0-9_]*(,\s*)?)+;
+\s+((unsigned )?([A-Z][A-Z0-9_]+|[a-z][a-z0-9_]+)\s*\*?|(struct )?[a-zA-Z][a-zA-Z0-9_]+\*)\s+([a-zA-Z][a-zA-Z0-9_]*(,\s*)?)+;
 `,
 			suppression: `return |goto |va_list |pthread_|zx_`,
 			message:     "Don't use C89 var declarations. Declare vars where they are needed and combine with initialization",
@@ -155,7 +156,7 @@ if (foo) {
 		re := regexp.MustCompile(check.pattern)
 		for _, test := range check.tests {
 			if !re.MatchString(test) {
-				t.Fatalf("patter %q does not match test %q", check.pattern, test)
+				t.Fatalf("pattern %q does not match test %q", check.pattern, test)
 			}
 		}
 	}
diff --git a/executor/subprocess.h b/executor/subprocess.h
new file mode 100644
index 000000000..ef4bd9656
--- /dev/null
+++ b/executor/subprocess.h
@@ -0,0 +1,129 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+#include <spawn.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <vector>
+
+// Subprocess allows to start and wait for a subprocess.
+class Subprocess
+{
+public:
+	Subprocess(const char** argv, const std::vector<std::pair<int, int>>& fds)
+	{
+		posix_spawn_file_actions_t actions;
+		if (posix_spawn_file_actions_init(&actions))
+			fail("posix_spawn_file_actions_init failed");
+		int max_fd = 0;
+		for (auto pair : fds) {
+			max_fd = std::max(max_fd, pair.second);
+			if (pair.first != -1) {
+				if (posix_spawn_file_actions_adddup2(&actions, pair.first, pair.second))
+					fail("posix_spawn_file_actions_adddup2 failed");
+			} else {
+				if (posix_spawn_file_actions_addclose(&actions, pair.second))
+					fail("posix_spawn_file_actions_addclose failed");
+			}
+		}
+		for (int i = max_fd + 1; i < kFdLimit; i++) {
+			if (posix_spawn_file_actions_addclose(&actions, i))
+				fail("posix_spawn_file_actions_addclose failed");
+		}
+
+		posix_spawnattr_t attr;
+		if (posix_spawnattr_init(&attr))
+			fail("posix_spawnattr_init failed");
+		// Create new process group so that we can kill all processes in the group.
+		if (posix_spawnattr_setflags(&attr, POSIX_SPAWN_SETPGROUP))
+			fail("posix_spawnattr_setflags failed");
+
+		const char* child_envp[] = {
+		    // Tell ASAN to not mess with our NONFAILING.
+		    "ASAN_OPTIONS=handle_segv=0 allow_user_segv_handler=1",
+		    // Disable rseq since we don't use it and we want to [ab]use it ourselves for kernel testing.
+		    "GLIBC_TUNABLES=glibc.pthread.rseq=0",
+		    nullptr};
+
+		if (posix_spawn(&pid_, argv[0], &actions, &attr,
+				const_cast<char**>(argv), const_cast<char**>(child_envp)))
+			fail("posix_spawn failed");
+
+		if (posix_spawn_file_actions_destroy(&actions))
+			fail("posix_spawn_file_actions_destroy failed");
+		if (posix_spawnattr_destroy(&attr))
+			fail("posix_spawnattr_destroy failed");
+	}
+
+	~Subprocess()
+	{
+		if (pid_)
+			KillAndWait();
+	}
+
+	int KillAndWait()
+	{
+		if (!pid_)
+			fail("subprocess hasn't started or already waited");
+		kill(-pid_, SIGKILL);
+		kill(pid_, SIGKILL);
+		int pid = 0;
+		int wstatus = 0;
+		do
+			pid = waitpid(pid_, &wstatus, WAIT_FLAGS);
+		while (pid == -1 && errno == EINTR);
+		if (pid != pid_)
+			failmsg("child wait failed", "pid_=%d pid=%d", pid_, pid);
+		if (WIFSTOPPED(wstatus))
+			failmsg("child stopped", "status=%d", wstatus);
+		pid_ = 0;
+		return ExitStatus(wstatus);
+	}
+
+	int WaitAndKill(uint64 timeout_ms)
+	{
+		if (!pid_)
+			fail("subprocess hasn't started or already waited");
+		uint64 start = current_time_ms();
+		int wstatus = 0;
+		for (;;) {
+			sleep_ms(10);
+			if (waitpid(pid_, &wstatus, WNOHANG | WAIT_FLAGS) == pid_)
+				break;
+			if (current_time_ms() - start > timeout_ms) {
+				kill(-pid_, SIGKILL);
+				kill(pid_, SIGKILL);
+			}
+		}
+		pid_ = 0;
+		return ExitStatus(wstatus);
+	}
+
+private:
+	int pid_ = 0;
+
+	static int ExitStatus(int wstatus)
+	{
+		if (WIFEXITED(wstatus))
+			return WEXITSTATUS(wstatus);
+		if (WIFSIGNALED(wstatus)) {
+			// Map signal numbers to some reasonable exit statuses.
+			// We only log them and compare to kFailStatus, so ensure it's not kFailStatus
+			// and not 0, otherwise return the signal as is (e.g. exit status 11 is SIGSEGV).
+			switch (WTERMSIG(wstatus)) {
+			case kFailStatus:
+				return kFailStatus - 1;
+			case 0:
+				return kFailStatus - 2;
+			default:
+				return WTERMSIG(wstatus);
+			}
+		}
+		// This may be possible in WIFSTOPPED case for C programs.
+		return kFailStatus - 3;
+	}
+
+	Subprocess(const Subprocess&) = delete;
+	Subprocess& operator=(const Subprocess&) = delete;
+};
diff --git a/executor/test.h b/executor/test.h
index c49459033..e6fabf63f 100644
--- a/executor/test.h
+++ b/executor/test.h
@@ -204,7 +204,7 @@ static int test_csum_inet_acc()
 static int test_cover_filter()
 {
 	char* tmp = tempnam(nullptr, "syz-test-cover-filter");
-	CoverFilter filter(tmp);
+	CoverFilter filter;
 	CoverFilter child(filter.FD());
 	free(tmp);
 
-- 
cgit mrf-deployment