diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2020-12-22 12:01:52 +0100 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2020-12-25 10:12:41 +0100 |
| commit | f8f67d67d3daa783c2bb7c9993c0ccb637af1a0e (patch) | |
| tree | d17cd1f31315f1466673d951dad6ee9e309d66ec | |
| parent | 549d0d03df068e7cc1d48f92ba1ba76153c91339 (diff) | |
executor: remove hardcoded timeouts
In preparation for making timeouts tunable based
on OS/arch/VM/etc de-hardcode all (almost) timeouts in executor.
| -rw-r--r-- | executor/common.h | 18 | ||||
| -rw-r--r-- | executor/executor.cc | 31 | ||||
| -rw-r--r-- | pkg/csource/csource.go | 3 | ||||
| -rw-r--r-- | pkg/csource/generated.go | 14 | ||||
| -rw-r--r-- | pkg/ipc/ipc.go | 34 |
5 files changed, 65 insertions, 35 deletions
diff --git a/executor/common.h b/executor/common.h index 0a943a88f..0361fac4e 100644 --- a/executor/common.h +++ b/executor/common.h @@ -526,7 +526,7 @@ again: if (collide && (call % 2) == 0) break; #endif - event_timedwait(&th->done, /*{{{CALL_TIMEOUT}}}*/); + event_timedwait(&th->done, /*{{{CALL_TIMEOUT_MS}}}*/); break; } } @@ -651,7 +651,7 @@ static void loop(void) sleep_ms(1); #if SYZ_EXECUTOR && SYZ_EXECUTOR_USES_SHMEM // Even though the test process executes exit at the end - // and execution time of each syscall is bounded by 20ms, + // and execution time of each syscall is bounded by syscall_timeout_ms (~50ms), // this backup watchdog is necessary and its performance is important. // The problem is that exit in the test processes can fail (sic). // One observed scenario is that the test processes prohibits @@ -659,7 +659,9 @@ static void loop(void) // is that the test processes setups a userfaultfd for itself, // then the main thread hangs when it wants to page in a page. // Below we check if the test process still executes syscalls - // and kill it after 1s of inactivity. + // and kill it after ~1s of inactivity. + uint64 min_timeout_ms = program_timeout_ms * 3 / 5; + uint64 inactive_timeout_ms = syscall_timeout_ms * 20; uint64 now = current_time_ms(); uint32 now_executed = __atomic_load_n(output_data, __ATOMIC_RELAXED); if (executed_calls != now_executed) { @@ -667,11 +669,15 @@ static void loop(void) last_executed = now; } // TODO: adjust timeout for progs with syz_usb_connect call. - if ((now - start < 5 * 1000) && (now - start < 3 * 1000 || now - last_executed < 1000)) + if ((now - start < program_timeout_ms) && + (now - start < min_timeout_ms || now - last_executed < inactive_timeout_ms)) continue; -#else - if (current_time_ms() - start < 5 * 1000) +#elif SYZ_EXECUTOR + if (current_time_ms() - start < program_timeout_ms) continue; +#else + if (current_time_ms() - start < /*{{{PROGRAM_TIMEOUT_MS}}}*/) + continue; #endif debug("killing hanging pid %d\n", pid); kill_and_wait(pid, &status); diff --git a/executor/executor.cc b/executor/executor.cc index cbd3048f9..76d172cc3 100644 --- a/executor/executor.cc +++ b/executor/executor.cc @@ -151,6 +151,11 @@ static bool flag_fault; static int flag_fault_call; static int flag_fault_nth; +// Tunable timeouts, received with execute_req. +static uint64 syscall_timeout_ms; +static uint64 program_timeout_ms; +static uint64 slowdown_scale; + #define SYZ_EXECUTOR 1 #include "common.h" @@ -259,6 +264,9 @@ struct execute_req { uint64 pid; uint64 fault_call; uint64 fault_nth; + uint64 syscall_timeout_ms; + uint64 program_timeout_ms; + uint64 slowdown_scale; uint64 prog_size; }; @@ -545,6 +553,9 @@ void receive_execute() fail("bad execute prog size 0x%llx", req.prog_size); parse_env_flags(req.env_flags); procid = req.pid; + syscall_timeout_ms = req.syscall_timeout_ms; + program_timeout_ms = req.program_timeout_ms; + slowdown_scale = req.slowdown_scale; flag_collect_cover = req.exec_flags & (1 << 0); flag_dedup_cover = req.exec_flags & (1 << 1); flag_fault = req.exec_flags & (1 << 2); @@ -725,11 +736,11 @@ retry: if (call->attrs.disabled) fail("executing disabled syscall %s", call->name); if (prog_extra_timeout < call->attrs.prog_timeout) - prog_extra_timeout = call->attrs.prog_timeout; + prog_extra_timeout = call->attrs.prog_timeout * slowdown_scale; if (strncmp(syscalls[call_num].name, "syz_usb", strlen("syz_usb")) == 0) - prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, (uint64)500); + prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, 500 * slowdown_scale); if (strncmp(syscalls[call_num].name, "syz_80211_inject_frame", strlen("syz_80211_inject_frame")) == 0) - prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, (uint64)300); + prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, 300 * slowdown_scale); uint64 copyout_index = read_input(&input_pos); uint64 num_args = read_input(&input_pos); if (num_args > kMaxArgs) @@ -749,7 +760,9 @@ retry: // Wait for call completion. // Note: sys/linux knows about this 45 ms timeout when it generates timespec/timeval values. // Note: pkg/csource also knows about this 45 ms per-call timeout. - uint64 timeout_ms = 45 + call->attrs.timeout; + uint64 timeout_ms = syscall_timeout_ms + call->attrs.timeout * slowdown_scale; + // This is because of printing pre/post call. Ideally we print everything in the main thread + // and then remove this (would also avoid intermixed output). if (flag_debug && timeout_ms < 1000) timeout_ms = 1000; if (event_timedwait(&th->done, timeout_ms)) @@ -775,14 +788,12 @@ retry: if (!colliding && !collide && running > 0) { // Give unfinished syscalls some additional time. last_scheduled = 0; - uint64 wait = 100; uint64 wait_start = current_time_ms(); - uint64 wait_end = wait_start + wait; - if (wait_end < start + 800) - wait_end = start + 800; - wait_end += prog_extra_timeout; + uint64 wait_end = wait_start + 2 * syscall_timeout_ms; + wait_end = std::max(wait_end, start + program_timeout_ms / 6); + wait_end = std::max(wait_end, wait_start + prog_extra_timeout); while (running > 0 && current_time_ms() <= wait_end) { - sleep_ms(1); + sleep_ms(1 * slowdown_scale); for (int i = 0; i < kMaxThreads; i++) { thread_t* th = &threads[i]; if (th->executing && event_isset(&th->done)) diff --git a/pkg/csource/csource.go b/pkg/csource/csource.go index 49babbf41..b53b3b97f 100644 --- a/pkg/csource/csource.go +++ b/pkg/csource/csource.go @@ -93,13 +93,14 @@ func Write(p *prog.Prog, opts Options) ([]byte, error) { replacements["SANDBOX_FUNC"] = replacements["SYSCALLS"] replacements["SYSCALLS"] = "unused" } + replacements["PROGRAM_TIMEOUT_MS"] = "5000" timeoutExpr := "45" for i, call := range p.Calls { if timeout := call.Meta.Attrs.Timeout; timeout != 0 { timeoutExpr += fmt.Sprintf(" + (call == %d ? %d : 0)", i, timeout) } } - replacements["CALL_TIMEOUT"] = timeoutExpr + replacements["CALL_TIMEOUT_MS"] = timeoutExpr result, err := createCommonHeader(p, mmapProg, replacements, opts) if err != nil { return nil, err diff --git a/pkg/csource/generated.go b/pkg/csource/generated.go index 5f38c02c6..a1d204fd1 100644 --- a/pkg/csource/generated.go +++ b/pkg/csource/generated.go @@ -10093,7 +10093,7 @@ again: if (collide && (call % 2) == 0) break; #endif - event_timedwait(&th->done, /*{{{CALL_TIMEOUT}}}*/); + event_timedwait(&th->done, /*{{{CALL_TIMEOUT_MS}}}*/); break; } } @@ -10208,17 +10208,23 @@ static void loop(void) break; sleep_ms(1); #if SYZ_EXECUTOR && SYZ_EXECUTOR_USES_SHMEM + uint64 min_timeout_ms = program_timeout_ms * 3 / 5; + uint64 inactive_timeout_ms = syscall_timeout_ms * 20; uint64 now = current_time_ms(); uint32 now_executed = __atomic_load_n(output_data, __ATOMIC_RELAXED); if (executed_calls != now_executed) { executed_calls = now_executed; last_executed = now; } - if ((now - start < 5 * 1000) && (now - start < 3 * 1000 || now - last_executed < 1000)) + if ((now - start < program_timeout_ms) && + (now - start < min_timeout_ms || now - last_executed < inactive_timeout_ms)) continue; -#else - if (current_time_ms() - start < 5 * 1000) +#elif SYZ_EXECUTOR + if (current_time_ms() - start < program_timeout_ms) continue; +#else + if (current_time_ms() - start < /*{{{PROGRAM_TIMEOUT_MS}}}*/) + continue; #endif debug("killing hanging pid %d\n", pid); kill_and_wait(pid, &status); diff --git a/pkg/ipc/ipc.go b/pkg/ipc/ipc.go index cb6dcd8e9..0f1bd50a6 100644 --- a/pkg/ipc/ipc.go +++ b/pkg/ipc/ipc.go @@ -500,13 +500,16 @@ type handshakeReply struct { } type executeReq struct { - magic uint64 - envFlags uint64 // env flags - execFlags uint64 // exec flags - pid uint64 - faultCall uint64 - faultNth uint64 - progSize uint64 + magic uint64 + envFlags uint64 // env flags + execFlags uint64 // exec flags + pid uint64 + faultCall uint64 + faultNth uint64 + syscallTimeoutMS uint64 + programTimeoutMS uint64 + slowdownScale uint64 + progSize uint64 // This structure is followed by a serialized test program in encodingexec format. // Both when sent over a pipe or in shared memory. } @@ -722,13 +725,16 @@ func (c *command) wait() error { func (c *command) exec(opts *ExecOpts, progData []byte) (output []byte, hanged bool, err0 error) { req := &executeReq{ - magic: inMagic, - envFlags: uint64(c.config.Flags), - execFlags: uint64(opts.Flags), - pid: uint64(c.pid), - faultCall: uint64(opts.FaultCall), - faultNth: uint64(opts.FaultNth), - progSize: uint64(len(progData)), + magic: inMagic, + envFlags: uint64(c.config.Flags), + execFlags: uint64(opts.Flags), + pid: uint64(c.pid), + faultCall: uint64(opts.FaultCall), + faultNth: uint64(opts.FaultNth), + syscallTimeoutMS: 50, + programTimeoutMS: 5000, + slowdownScale: 1, + progSize: uint64(len(progData)), } reqData := (*[unsafe.Sizeof(*req)]byte)(unsafe.Pointer(req))[:] if _, err := c.outwp.Write(reqData); err != nil { |
