From f8f67d67d3daa783c2bb7c9993c0ccb637af1a0e Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 22 Dec 2020 12:01:52 +0100
Subject: executor: remove hardcoded timeouts

In preparation for making timeouts tunable based
on OS/arch/VM/etc de-hardcode all (almost) timeouts in executor.
---
 executor/common.h    | 18 ++++++++++++------
 executor/executor.cc | 31 +++++++++++++++++++++----------
 2 files changed, 33 insertions(+), 16 deletions(-)

(limited to 'executor')

diff --git a/executor/common.h b/executor/common.h
index 0a943a88f..0361fac4e 100644
--- a/executor/common.h
+++ b/executor/common.h
@@ -526,7 +526,7 @@ again:
 			if (collide && (call % 2) == 0)
 				break;
 #endif
-			event_timedwait(&th->done, /*{{{CALL_TIMEOUT}}}*/);
+			event_timedwait(&th->done, /*{{{CALL_TIMEOUT_MS}}}*/);
 			break;
 		}
 	}
@@ -651,7 +651,7 @@ static void loop(void)
 			sleep_ms(1);
 #if SYZ_EXECUTOR && SYZ_EXECUTOR_USES_SHMEM
 			// Even though the test process executes exit at the end
-			// and execution time of each syscall is bounded by 20ms,
+			// and execution time of each syscall is bounded by syscall_timeout_ms (~50ms),
 			// this backup watchdog is necessary and its performance is important.
 			// The problem is that exit in the test processes can fail (sic).
 			// One observed scenario is that the test processes prohibits
@@ -659,7 +659,9 @@ static void loop(void)
 			// is that the test processes setups a userfaultfd for itself,
 			// then the main thread hangs when it wants to page in a page.
 			// Below we check if the test process still executes syscalls
-			// and kill it after 1s of inactivity.
+			// and kill it after ~1s of inactivity.
+			uint64 min_timeout_ms = program_timeout_ms * 3 / 5;
+			uint64 inactive_timeout_ms = syscall_timeout_ms * 20;
 			uint64 now = current_time_ms();
 			uint32 now_executed = __atomic_load_n(output_data, __ATOMIC_RELAXED);
 			if (executed_calls != now_executed) {
@@ -667,11 +669,15 @@ static void loop(void)
 				last_executed = now;
 			}
 			// TODO: adjust timeout for progs with syz_usb_connect call.
-			if ((now - start < 5 * 1000) && (now - start < 3 * 1000 || now - last_executed < 1000))
+			if ((now - start < program_timeout_ms) &&
+			    (now - start < min_timeout_ms || now - last_executed < inactive_timeout_ms))
 				continue;
-#else
-			if (current_time_ms() - start < 5 * 1000)
+#elif SYZ_EXECUTOR
+			if (current_time_ms() - start < program_timeout_ms)
 				continue;
+#else
+		if (current_time_ms() - start < /*{{{PROGRAM_TIMEOUT_MS}}}*/)
+			continue;
 #endif
 			debug("killing hanging pid %d\n", pid);
 			kill_and_wait(pid, &status);
diff --git a/executor/executor.cc b/executor/executor.cc
index cbd3048f9..76d172cc3 100644
--- a/executor/executor.cc
+++ b/executor/executor.cc
@@ -151,6 +151,11 @@ static bool flag_fault;
 static int flag_fault_call;
 static int flag_fault_nth;
 
+// Tunable timeouts, received with execute_req.
+static uint64 syscall_timeout_ms;
+static uint64 program_timeout_ms;
+static uint64 slowdown_scale;
+
 #define SYZ_EXECUTOR 1
 #include "common.h"
 
@@ -259,6 +264,9 @@ struct execute_req {
 	uint64 pid;
 	uint64 fault_call;
 	uint64 fault_nth;
+	uint64 syscall_timeout_ms;
+	uint64 program_timeout_ms;
+	uint64 slowdown_scale;
 	uint64 prog_size;
 };
 
@@ -545,6 +553,9 @@ void receive_execute()
 		fail("bad execute prog size 0x%llx", req.prog_size);
 	parse_env_flags(req.env_flags);
 	procid = req.pid;
+	syscall_timeout_ms = req.syscall_timeout_ms;
+	program_timeout_ms = req.program_timeout_ms;
+	slowdown_scale = req.slowdown_scale;
 	flag_collect_cover = req.exec_flags & (1 << 0);
 	flag_dedup_cover = req.exec_flags & (1 << 1);
 	flag_fault = req.exec_flags & (1 << 2);
@@ -725,11 +736,11 @@ retry:
 		if (call->attrs.disabled)
 			fail("executing disabled syscall %s", call->name);
 		if (prog_extra_timeout < call->attrs.prog_timeout)
-			prog_extra_timeout = call->attrs.prog_timeout;
+			prog_extra_timeout = call->attrs.prog_timeout * slowdown_scale;
 		if (strncmp(syscalls[call_num].name, "syz_usb", strlen("syz_usb")) == 0)
-			prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, (uint64)500);
+			prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, 500 * slowdown_scale);
 		if (strncmp(syscalls[call_num].name, "syz_80211_inject_frame", strlen("syz_80211_inject_frame")) == 0)
-			prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, (uint64)300);
+			prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, 300 * slowdown_scale);
 		uint64 copyout_index = read_input(&input_pos);
 		uint64 num_args = read_input(&input_pos);
 		if (num_args > kMaxArgs)
@@ -749,7 +760,9 @@ retry:
 			// Wait for call completion.
 			// Note: sys/linux knows about this 45 ms timeout when it generates timespec/timeval values.
 			// Note: pkg/csource also knows about this 45 ms per-call timeout.
-			uint64 timeout_ms = 45 + call->attrs.timeout;
+			uint64 timeout_ms = syscall_timeout_ms + call->attrs.timeout * slowdown_scale;
+			// This is because of printing pre/post call. Ideally we print everything in the main thread
+			// and then remove this (would also avoid intermixed output).
 			if (flag_debug && timeout_ms < 1000)
 				timeout_ms = 1000;
 			if (event_timedwait(&th->done, timeout_ms))
@@ -775,14 +788,12 @@ retry:
 	if (!colliding && !collide && running > 0) {
 		// Give unfinished syscalls some additional time.
 		last_scheduled = 0;
-		uint64 wait = 100;
 		uint64 wait_start = current_time_ms();
-		uint64 wait_end = wait_start + wait;
-		if (wait_end < start + 800)
-			wait_end = start + 800;
-		wait_end += prog_extra_timeout;
+		uint64 wait_end = wait_start + 2 * syscall_timeout_ms;
+		wait_end = std::max(wait_end, start + program_timeout_ms / 6);
+		wait_end = std::max(wait_end, wait_start + prog_extra_timeout);
 		while (running > 0 && current_time_ms() <= wait_end) {
-			sleep_ms(1);
+			sleep_ms(1 * slowdown_scale);
 			for (int i = 0; i < kMaxThreads; i++) {
 				thread_t* th = &threads[i];
 				if (th->executing && event_isset(&th->done))
-- 
cgit mrf-deployment