From f8f67d67d3daa783c2bb7c9993c0ccb637af1a0e Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 22 Dec 2020 12:01:52 +0100 Subject: executor: remove hardcoded timeouts In preparation for making timeouts tunable based on OS/arch/VM/etc de-hardcode all (almost) timeouts in executor. --- executor/common.h | 18 ++++++++++++------ executor/executor.cc | 31 +++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 16 deletions(-) (limited to 'executor') diff --git a/executor/common.h b/executor/common.h index 0a943a88f..0361fac4e 100644 --- a/executor/common.h +++ b/executor/common.h @@ -526,7 +526,7 @@ again: if (collide && (call % 2) == 0) break; #endif - event_timedwait(&th->done, /*{{{CALL_TIMEOUT}}}*/); + event_timedwait(&th->done, /*{{{CALL_TIMEOUT_MS}}}*/); break; } } @@ -651,7 +651,7 @@ static void loop(void) sleep_ms(1); #if SYZ_EXECUTOR && SYZ_EXECUTOR_USES_SHMEM // Even though the test process executes exit at the end - // and execution time of each syscall is bounded by 20ms, + // and execution time of each syscall is bounded by syscall_timeout_ms (~50ms), // this backup watchdog is necessary and its performance is important. // The problem is that exit in the test processes can fail (sic). // One observed scenario is that the test processes prohibits @@ -659,7 +659,9 @@ static void loop(void) // is that the test processes setups a userfaultfd for itself, // then the main thread hangs when it wants to page in a page. // Below we check if the test process still executes syscalls - // and kill it after 1s of inactivity. + // and kill it after ~1s of inactivity. + uint64 min_timeout_ms = program_timeout_ms * 3 / 5; + uint64 inactive_timeout_ms = syscall_timeout_ms * 20; uint64 now = current_time_ms(); uint32 now_executed = __atomic_load_n(output_data, __ATOMIC_RELAXED); if (executed_calls != now_executed) { @@ -667,11 +669,15 @@ static void loop(void) last_executed = now; } // TODO: adjust timeout for progs with syz_usb_connect call. - if ((now - start < 5 * 1000) && (now - start < 3 * 1000 || now - last_executed < 1000)) + if ((now - start < program_timeout_ms) && + (now - start < min_timeout_ms || now - last_executed < inactive_timeout_ms)) continue; -#else - if (current_time_ms() - start < 5 * 1000) +#elif SYZ_EXECUTOR + if (current_time_ms() - start < program_timeout_ms) continue; +#else + if (current_time_ms() - start < /*{{{PROGRAM_TIMEOUT_MS}}}*/) + continue; #endif debug("killing hanging pid %d\n", pid); kill_and_wait(pid, &status); diff --git a/executor/executor.cc b/executor/executor.cc index cbd3048f9..76d172cc3 100644 --- a/executor/executor.cc +++ b/executor/executor.cc @@ -151,6 +151,11 @@ static bool flag_fault; static int flag_fault_call; static int flag_fault_nth; +// Tunable timeouts, received with execute_req. +static uint64 syscall_timeout_ms; +static uint64 program_timeout_ms; +static uint64 slowdown_scale; + #define SYZ_EXECUTOR 1 #include "common.h" @@ -259,6 +264,9 @@ struct execute_req { uint64 pid; uint64 fault_call; uint64 fault_nth; + uint64 syscall_timeout_ms; + uint64 program_timeout_ms; + uint64 slowdown_scale; uint64 prog_size; }; @@ -545,6 +553,9 @@ void receive_execute() fail("bad execute prog size 0x%llx", req.prog_size); parse_env_flags(req.env_flags); procid = req.pid; + syscall_timeout_ms = req.syscall_timeout_ms; + program_timeout_ms = req.program_timeout_ms; + slowdown_scale = req.slowdown_scale; flag_collect_cover = req.exec_flags & (1 << 0); flag_dedup_cover = req.exec_flags & (1 << 1); flag_fault = req.exec_flags & (1 << 2); @@ -725,11 +736,11 @@ retry: if (call->attrs.disabled) fail("executing disabled syscall %s", call->name); if (prog_extra_timeout < call->attrs.prog_timeout) - prog_extra_timeout = call->attrs.prog_timeout; + prog_extra_timeout = call->attrs.prog_timeout * slowdown_scale; if (strncmp(syscalls[call_num].name, "syz_usb", strlen("syz_usb")) == 0) - prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, (uint64)500); + prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, 500 * slowdown_scale); if (strncmp(syscalls[call_num].name, "syz_80211_inject_frame", strlen("syz_80211_inject_frame")) == 0) - prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, (uint64)300); + prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, 300 * slowdown_scale); uint64 copyout_index = read_input(&input_pos); uint64 num_args = read_input(&input_pos); if (num_args > kMaxArgs) @@ -749,7 +760,9 @@ retry: // Wait for call completion. // Note: sys/linux knows about this 45 ms timeout when it generates timespec/timeval values. // Note: pkg/csource also knows about this 45 ms per-call timeout. - uint64 timeout_ms = 45 + call->attrs.timeout; + uint64 timeout_ms = syscall_timeout_ms + call->attrs.timeout * slowdown_scale; + // This is because of printing pre/post call. Ideally we print everything in the main thread + // and then remove this (would also avoid intermixed output). if (flag_debug && timeout_ms < 1000) timeout_ms = 1000; if (event_timedwait(&th->done, timeout_ms)) @@ -775,14 +788,12 @@ retry: if (!colliding && !collide && running > 0) { // Give unfinished syscalls some additional time. last_scheduled = 0; - uint64 wait = 100; uint64 wait_start = current_time_ms(); - uint64 wait_end = wait_start + wait; - if (wait_end < start + 800) - wait_end = start + 800; - wait_end += prog_extra_timeout; + uint64 wait_end = wait_start + 2 * syscall_timeout_ms; + wait_end = std::max(wait_end, start + program_timeout_ms / 6); + wait_end = std::max(wait_end, wait_start + prog_extra_timeout); while (running > 0 && current_time_ms() <= wait_end) { - sleep_ms(1); + sleep_ms(1 * slowdown_scale); for (int i = 0; i < kMaxThreads; i++) { thread_t* th = &threads[i]; if (th->executing && event_isset(&th->done)) -- cgit mrf-deployment