aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2020-12-22 12:01:52 +0100
committerDmitry Vyukov <dvyukov@google.com>2020-12-25 10:12:41 +0100
commitf8f67d67d3daa783c2bb7c9993c0ccb637af1a0e (patch)
treed17cd1f31315f1466673d951dad6ee9e309d66ec
parent549d0d03df068e7cc1d48f92ba1ba76153c91339 (diff)
executor: remove hardcoded timeouts
In preparation for making timeouts tunable based on OS/arch/VM/etc de-hardcode all (almost) timeouts in executor.
-rw-r--r--executor/common.h18
-rw-r--r--executor/executor.cc31
-rw-r--r--pkg/csource/csource.go3
-rw-r--r--pkg/csource/generated.go14
-rw-r--r--pkg/ipc/ipc.go34
5 files changed, 65 insertions, 35 deletions
diff --git a/executor/common.h b/executor/common.h
index 0a943a88f..0361fac4e 100644
--- a/executor/common.h
+++ b/executor/common.h
@@ -526,7 +526,7 @@ again:
if (collide && (call % 2) == 0)
break;
#endif
- event_timedwait(&th->done, /*{{{CALL_TIMEOUT}}}*/);
+ event_timedwait(&th->done, /*{{{CALL_TIMEOUT_MS}}}*/);
break;
}
}
@@ -651,7 +651,7 @@ static void loop(void)
sleep_ms(1);
#if SYZ_EXECUTOR && SYZ_EXECUTOR_USES_SHMEM
// Even though the test process executes exit at the end
- // and execution time of each syscall is bounded by 20ms,
+ // and execution time of each syscall is bounded by syscall_timeout_ms (~50ms),
// this backup watchdog is necessary and its performance is important.
// The problem is that exit in the test processes can fail (sic).
// One observed scenario is that the test processes prohibits
@@ -659,7 +659,9 @@ static void loop(void)
// is that the test processes setups a userfaultfd for itself,
// then the main thread hangs when it wants to page in a page.
// Below we check if the test process still executes syscalls
- // and kill it after 1s of inactivity.
+ // and kill it after ~1s of inactivity.
+ uint64 min_timeout_ms = program_timeout_ms * 3 / 5;
+ uint64 inactive_timeout_ms = syscall_timeout_ms * 20;
uint64 now = current_time_ms();
uint32 now_executed = __atomic_load_n(output_data, __ATOMIC_RELAXED);
if (executed_calls != now_executed) {
@@ -667,11 +669,15 @@ static void loop(void)
last_executed = now;
}
// TODO: adjust timeout for progs with syz_usb_connect call.
- if ((now - start < 5 * 1000) && (now - start < 3 * 1000 || now - last_executed < 1000))
+ if ((now - start < program_timeout_ms) &&
+ (now - start < min_timeout_ms || now - last_executed < inactive_timeout_ms))
continue;
-#else
- if (current_time_ms() - start < 5 * 1000)
+#elif SYZ_EXECUTOR
+ if (current_time_ms() - start < program_timeout_ms)
continue;
+#else
+ if (current_time_ms() - start < /*{{{PROGRAM_TIMEOUT_MS}}}*/)
+ continue;
#endif
debug("killing hanging pid %d\n", pid);
kill_and_wait(pid, &status);
diff --git a/executor/executor.cc b/executor/executor.cc
index cbd3048f9..76d172cc3 100644
--- a/executor/executor.cc
+++ b/executor/executor.cc
@@ -151,6 +151,11 @@ static bool flag_fault;
static int flag_fault_call;
static int flag_fault_nth;
+// Tunable timeouts, received with execute_req.
+static uint64 syscall_timeout_ms;
+static uint64 program_timeout_ms;
+static uint64 slowdown_scale;
+
#define SYZ_EXECUTOR 1
#include "common.h"
@@ -259,6 +264,9 @@ struct execute_req {
uint64 pid;
uint64 fault_call;
uint64 fault_nth;
+ uint64 syscall_timeout_ms;
+ uint64 program_timeout_ms;
+ uint64 slowdown_scale;
uint64 prog_size;
};
@@ -545,6 +553,9 @@ void receive_execute()
fail("bad execute prog size 0x%llx", req.prog_size);
parse_env_flags(req.env_flags);
procid = req.pid;
+ syscall_timeout_ms = req.syscall_timeout_ms;
+ program_timeout_ms = req.program_timeout_ms;
+ slowdown_scale = req.slowdown_scale;
flag_collect_cover = req.exec_flags & (1 << 0);
flag_dedup_cover = req.exec_flags & (1 << 1);
flag_fault = req.exec_flags & (1 << 2);
@@ -725,11 +736,11 @@ retry:
if (call->attrs.disabled)
fail("executing disabled syscall %s", call->name);
if (prog_extra_timeout < call->attrs.prog_timeout)
- prog_extra_timeout = call->attrs.prog_timeout;
+ prog_extra_timeout = call->attrs.prog_timeout * slowdown_scale;
if (strncmp(syscalls[call_num].name, "syz_usb", strlen("syz_usb")) == 0)
- prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, (uint64)500);
+ prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, 500 * slowdown_scale);
if (strncmp(syscalls[call_num].name, "syz_80211_inject_frame", strlen("syz_80211_inject_frame")) == 0)
- prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, (uint64)300);
+ prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, 300 * slowdown_scale);
uint64 copyout_index = read_input(&input_pos);
uint64 num_args = read_input(&input_pos);
if (num_args > kMaxArgs)
@@ -749,7 +760,9 @@ retry:
// Wait for call completion.
// Note: sys/linux knows about this 45 ms timeout when it generates timespec/timeval values.
// Note: pkg/csource also knows about this 45 ms per-call timeout.
- uint64 timeout_ms = 45 + call->attrs.timeout;
+ uint64 timeout_ms = syscall_timeout_ms + call->attrs.timeout * slowdown_scale;
+ // This is because of printing pre/post call. Ideally we print everything in the main thread
+ // and then remove this (would also avoid intermixed output).
if (flag_debug && timeout_ms < 1000)
timeout_ms = 1000;
if (event_timedwait(&th->done, timeout_ms))
@@ -775,14 +788,12 @@ retry:
if (!colliding && !collide && running > 0) {
// Give unfinished syscalls some additional time.
last_scheduled = 0;
- uint64 wait = 100;
uint64 wait_start = current_time_ms();
- uint64 wait_end = wait_start + wait;
- if (wait_end < start + 800)
- wait_end = start + 800;
- wait_end += prog_extra_timeout;
+ uint64 wait_end = wait_start + 2 * syscall_timeout_ms;
+ wait_end = std::max(wait_end, start + program_timeout_ms / 6);
+ wait_end = std::max(wait_end, wait_start + prog_extra_timeout);
while (running > 0 && current_time_ms() <= wait_end) {
- sleep_ms(1);
+ sleep_ms(1 * slowdown_scale);
for (int i = 0; i < kMaxThreads; i++) {
thread_t* th = &threads[i];
if (th->executing && event_isset(&th->done))
diff --git a/pkg/csource/csource.go b/pkg/csource/csource.go
index 49babbf41..b53b3b97f 100644
--- a/pkg/csource/csource.go
+++ b/pkg/csource/csource.go
@@ -93,13 +93,14 @@ func Write(p *prog.Prog, opts Options) ([]byte, error) {
replacements["SANDBOX_FUNC"] = replacements["SYSCALLS"]
replacements["SYSCALLS"] = "unused"
}
+ replacements["PROGRAM_TIMEOUT_MS"] = "5000"
timeoutExpr := "45"
for i, call := range p.Calls {
if timeout := call.Meta.Attrs.Timeout; timeout != 0 {
timeoutExpr += fmt.Sprintf(" + (call == %d ? %d : 0)", i, timeout)
}
}
- replacements["CALL_TIMEOUT"] = timeoutExpr
+ replacements["CALL_TIMEOUT_MS"] = timeoutExpr
result, err := createCommonHeader(p, mmapProg, replacements, opts)
if err != nil {
return nil, err
diff --git a/pkg/csource/generated.go b/pkg/csource/generated.go
index 5f38c02c6..a1d204fd1 100644
--- a/pkg/csource/generated.go
+++ b/pkg/csource/generated.go
@@ -10093,7 +10093,7 @@ again:
if (collide && (call % 2) == 0)
break;
#endif
- event_timedwait(&th->done, /*{{{CALL_TIMEOUT}}}*/);
+ event_timedwait(&th->done, /*{{{CALL_TIMEOUT_MS}}}*/);
break;
}
}
@@ -10208,17 +10208,23 @@ static void loop(void)
break;
sleep_ms(1);
#if SYZ_EXECUTOR && SYZ_EXECUTOR_USES_SHMEM
+ uint64 min_timeout_ms = program_timeout_ms * 3 / 5;
+ uint64 inactive_timeout_ms = syscall_timeout_ms * 20;
uint64 now = current_time_ms();
uint32 now_executed = __atomic_load_n(output_data, __ATOMIC_RELAXED);
if (executed_calls != now_executed) {
executed_calls = now_executed;
last_executed = now;
}
- if ((now - start < 5 * 1000) && (now - start < 3 * 1000 || now - last_executed < 1000))
+ if ((now - start < program_timeout_ms) &&
+ (now - start < min_timeout_ms || now - last_executed < inactive_timeout_ms))
continue;
-#else
- if (current_time_ms() - start < 5 * 1000)
+#elif SYZ_EXECUTOR
+ if (current_time_ms() - start < program_timeout_ms)
continue;
+#else
+ if (current_time_ms() - start < /*{{{PROGRAM_TIMEOUT_MS}}}*/)
+ continue;
#endif
debug("killing hanging pid %d\n", pid);
kill_and_wait(pid, &status);
diff --git a/pkg/ipc/ipc.go b/pkg/ipc/ipc.go
index cb6dcd8e9..0f1bd50a6 100644
--- a/pkg/ipc/ipc.go
+++ b/pkg/ipc/ipc.go
@@ -500,13 +500,16 @@ type handshakeReply struct {
}
type executeReq struct {
- magic uint64
- envFlags uint64 // env flags
- execFlags uint64 // exec flags
- pid uint64
- faultCall uint64
- faultNth uint64
- progSize uint64
+ magic uint64
+ envFlags uint64 // env flags
+ execFlags uint64 // exec flags
+ pid uint64
+ faultCall uint64
+ faultNth uint64
+ syscallTimeoutMS uint64
+ programTimeoutMS uint64
+ slowdownScale uint64
+ progSize uint64
// This structure is followed by a serialized test program in encodingexec format.
// Both when sent over a pipe or in shared memory.
}
@@ -722,13 +725,16 @@ func (c *command) wait() error {
func (c *command) exec(opts *ExecOpts, progData []byte) (output []byte, hanged bool, err0 error) {
req := &executeReq{
- magic: inMagic,
- envFlags: uint64(c.config.Flags),
- execFlags: uint64(opts.Flags),
- pid: uint64(c.pid),
- faultCall: uint64(opts.FaultCall),
- faultNth: uint64(opts.FaultNth),
- progSize: uint64(len(progData)),
+ magic: inMagic,
+ envFlags: uint64(c.config.Flags),
+ execFlags: uint64(opts.Flags),
+ pid: uint64(c.pid),
+ faultCall: uint64(opts.FaultCall),
+ faultNth: uint64(opts.FaultNth),
+ syscallTimeoutMS: 50,
+ programTimeoutMS: 5000,
+ slowdownScale: 1,
+ progSize: uint64(len(progData)),
}
reqData := (*[unsafe.Sizeof(*req)]byte)(unsafe.Pointer(req))[:]
if _, err := c.outwp.Write(reqData); err != nil {