From d606e60dfe3d50499812f7d740dae6e727fa9f76 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 20 Sep 2017 16:55:28 +0200 Subject: executor: split source per-OS Update #191 --- executor/executor.cc | 1001 ------------------------------------------ executor/executor_fuchsia.cc | 9 + executor/executor_linux.cc | 1001 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1010 insertions(+), 1001 deletions(-) delete mode 100644 executor/executor.cc create mode 100644 executor/executor_fuchsia.cc create mode 100644 executor/executor_linux.cc (limited to 'executor') diff --git a/executor/executor.cc b/executor/executor.cc deleted file mode 100644 index cd0e406ff..000000000 --- a/executor/executor.cc +++ /dev/null @@ -1,1001 +0,0 @@ -// Copyright 2015 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -// +build - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "syscalls.h" - -#define SYZ_EXECUTOR -#include "common.h" - -#define KCOV_INIT_TRACE _IOR('c', 1, unsigned long long) -#define KCOV_INIT_CMP _IOR('c', 2, unsigned long long) -#define KCOV_ENABLE _IO('c', 100) -#define KCOV_DISABLE _IO('c', 101) - -const unsigned long KCOV_TRACE_PC = 0; -const unsigned long KCOV_TRACE_CMP = 1; - -const int kInFd = 3; -const int kOutFd = 4; -const int kInPipeFd = 5; -const int kOutPipeFd = 6; -const int kMaxInput = 2 << 20; -const int kMaxOutput = 16 << 20; -const int kMaxArgs = 9; -const int kMaxThreads = 16; -const int kMaxCommands = 16 << 10; -const int kCoverSize = 64 << 10; -const int kPageSize = 4 << 10; - -const uint64_t instr_eof = -1; -const uint64_t instr_copyin = -2; -const uint64_t instr_copyout = -3; - -const uint64_t arg_const = 0; -const uint64_t arg_result = 1; -const uint64_t arg_data = 2; -const uint64_t arg_csum = 3; - -// We use the default value instead of results of failed syscalls. -// -1 is an invalid fd and an invalid address and deterministic, -// so good enough for our purposes. -const uint64_t default_value = -1; - -enum sandbox_type { - sandbox_none, - sandbox_setuid, - sandbox_namespace, -}; - -bool flag_cover; -bool flag_threaded; -bool flag_collide; -bool flag_sandbox_privs; -sandbox_type flag_sandbox; -bool flag_enable_tun; -bool flag_enable_fault_injection; - -bool flag_collect_cover; -bool flag_dedup_cover; - -// If true, then executor should write the comparisons data to fuzzer. -bool flag_collect_comps; - -// Inject fault into flag_fault_nth-th operation in flag_fault_call-th syscall. -bool flag_inject_fault; -int flag_fault_call; -int flag_fault_nth; - -__attribute__((aligned(64 << 10))) char input_data[kMaxInput]; -uint32_t* output_data; -uint32_t* output_pos; -uint32_t completed; -int running; -bool collide; - -struct res_t { - bool executed; - uint64_t val; -}; - -res_t results[kMaxCommands]; - -enum { - KCOV_CMP_CONST = 1, - KCOV_CMP_SIZE1 = 0, - KCOV_CMP_SIZE2 = 2, - KCOV_CMP_SIZE4 = 4, - KCOV_CMP_SIZE8 = 6, - KCOV_CMP_SIZE_MASK = 6, -}; - -struct kcov_comparison_t { - uint64_t type; - uint64_t arg1; - uint64_t arg2; - - bool operator==(const struct kcov_comparison_t& other) const - { - return type == other.type && arg1 == other.arg1 && arg2 == other.arg2; - } - - bool operator<(const struct kcov_comparison_t& other) const - { - if (type != other.type) - return type < other.type; - if (arg1 != other.arg1) - return arg1 < other.arg1; - return arg2 < other.arg2; - } - - // Writes the structure using the write_one function for each field. - // Inspired by write_output() function. - void write(uint32_t* (*write_one)(uint32_t)) - { - // Write order: type arg1 arg2. - write_one((uint32_t)type); - - // KCOV converts all arguments of size x first to uintx_t and then to - // uint64_t. We want to properly extend signed values, e.g we want - // int8_t c = 0xfe to be represented as 0xfffffffffffffffe. - // Note that uint8_t c = 0xfe will be represented the same way. - // This is ok because during hints processing we will anyways try - // the value 0x00000000000000fe. - switch (type & KCOV_CMP_SIZE_MASK) { - case KCOV_CMP_SIZE1: - arg1 = (uint64_t)(int64_t)(int8_t)arg1; - arg2 = (uint64_t)(int64_t)(int8_t)arg2; - break; - case KCOV_CMP_SIZE2: - arg1 = (uint64_t)(int64_t)(int16_t)arg1; - arg2 = (uint64_t)(int64_t)(int16_t)arg2; - break; - case KCOV_CMP_SIZE4: - arg1 = (uint64_t)(int64_t)(int32_t)arg1; - arg2 = (uint64_t)(int64_t)(int32_t)arg2; - break; - } - bool is_size_8 = (type & KCOV_CMP_SIZE_MASK) == KCOV_CMP_SIZE8; - if (!is_size_8) { - write_one((uint32_t)arg1); - write_one((uint32_t)arg2); - return; - } - // If we have 64 bits arguments then write them in Little-endian. - write_one((uint32_t)(arg1 & 0xFFFFFFFF)); - write_one((uint32_t)(arg1 >> 32)); - write_one((uint32_t)(arg2 & 0xFFFFFFFF)); - write_one((uint32_t)(arg2 >> 32)); - } -}; - -struct thread_t { - bool created; - int id; - pthread_t th; - // TODO(dvyukov): this assumes 64-bit kernel. This must be "kernel long" somehow. - uint64_t* cover_data; - // Pointer to the size of coverage (stored as first word of memory). - uint64_t* cover_size_ptr; - - uint64_t* copyout_pos; - int ready; - int done; - bool handled; - int call_n; - int call_index; - int call_num; - int num_args; - uintptr_t args[kMaxArgs]; - uintptr_t res; - uint32_t reserrno; - uint64_t cover_size; - bool fault_injected; - int cover_fd; -}; - -thread_t threads[kMaxThreads]; - -// Checksum kinds. -const uint64_t arg_csum_inet = 0; - -// Checksum chunk kinds. -const uint64_t arg_csum_chunk_data = 0; -const uint64_t arg_csum_chunk_const = 1; - -void execute_one(); -uint64_t read_input(uint64_t** input_posp, bool peek = false); -uint64_t read_arg(uint64_t** input_posp); -uint64_t read_result(uint64_t** input_posp); -uint32_t* write_output(uint32_t v); -void copyin(char* addr, uint64_t val, uint64_t size, uint64_t bf_off, uint64_t bf_len); -uint64_t copyout(char* addr, uint64_t size); -thread_t* schedule_call(int n, int call_index, int call_num, uint64_t num_args, uint64_t* args, uint64_t* pos); -void execute_call(thread_t* th); -void handle_completion(thread_t* th); -void thread_create(thread_t* th, int id); -void* worker_thread(void* arg); -void cover_open(); -void cover_enable(thread_t* th); -void cover_reset(thread_t* th); -uint64_t read_cover_size(thread_t* th); -static uint32_t hash(uint32_t a); -static bool dedup(uint32_t sig); - -#ifndef GIT_REVISION -#define GIT_REVISION "unknown" -#endif - -int main(int argc, char** argv) -{ - if (argc == 2 && strcmp(argv[1], "version") == 0) { - puts("linux " GOARCH " " SYZ_REVISION " " GIT_REVISION); - return 0; - } - - prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); - if (mmap(&input_data[0], kMaxInput, PROT_READ, MAP_PRIVATE | MAP_FIXED, kInFd, 0) != &input_data[0]) - fail("mmap of input file failed"); - // The output region is the only thing in executor process for which consistency matters. - // If it is corrupted ipc package will fail to parse its contents and panic. - // But fuzzer constantly invents new ways of how to currupt the region, - // so we map the region at a (hopefully) hard to guess address surrounded by unmapped pages. - void* const kOutputDataAddr = (void*)0x1ddbc20000; - output_data = (uint32_t*)mmap(kOutputDataAddr, kMaxOutput, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, kOutFd, 0); - if (output_data != kOutputDataAddr) - fail("mmap of output file failed"); - // Prevent random programs to mess with these fds. - // Due to races in collider mode, a program can e.g. ftruncate one of these fds, - // which will cause fuzzer to crash. - // That's also the reason why we close kInPipeFd/kOutPipeFd below. - close(kInFd); - close(kOutFd); - - uint64_t flags = *(uint64_t*)input_data; - flag_debug = flags & (1 << 0); - flag_cover = flags & (1 << 1); - flag_threaded = flags & (1 << 2); - flag_collide = flags & (1 << 3); - flag_sandbox = sandbox_none; - if (flags & (1 << 4)) - flag_sandbox = sandbox_setuid; - else if (flags & (1 << 5)) - flag_sandbox = sandbox_namespace; - if (!flag_threaded) - flag_collide = false; - flag_enable_tun = flags & (1 << 6); - flag_enable_fault_injection = flags & (1 << 7); - - uint64_t executor_pid = *((uint64_t*)input_data + 1); - cover_open(); - install_segv_handler(); - use_temporary_dir(); - -#if defined(__i386__) || defined(__arm__) - // mmap syscall on i386/arm is translated to old_mmap and has different signature. - // As a workaround fix it up to mmap2, which has signature that we expect. - // pkg/csource has the same hack. - for (size_t i = 0; i < sizeof(syscalls) / sizeof(syscalls[0]); i++) { - if (syscalls[i].sys_nr == __NR_mmap) - syscalls[i].sys_nr = __NR_mmap2; - } -#endif - - int pid = -1; - switch (flag_sandbox) { - case sandbox_none: - pid = do_sandbox_none(executor_pid, flag_enable_tun); - break; - case sandbox_setuid: - pid = do_sandbox_setuid(executor_pid, flag_enable_tun); - break; - case sandbox_namespace: - pid = do_sandbox_namespace(executor_pid, flag_enable_tun); - break; - default: - fail("unknown sandbox type"); - } - if (pid < 0) - fail("clone failed"); - debug("spawned loop pid %d\n", pid); - int status = 0; - while (waitpid(-1, &status, __WALL) != pid) { - } - status = WEXITSTATUS(status); - // If an external sandbox process wraps executor, the out pipe will be closed - // before the sandbox process exits this will make ipc package kill the sandbox. - // As the result sandbox process will exit with exit status 9 instead of the executor - // exit status (notably kRetryStatus). Consequently, ipc will treat it as hard - // failure rather than a temporal failure. So we duplicate the exit status on the pipe. - char tmp = status; - if (write(kOutPipeFd, &tmp, 1)) { - // Not much we can do, but gcc wants us to check the return value. - } - errno = 0; - if (status == kFailStatus) - fail("loop failed"); - if (status == kErrorStatus) - error("loop errored"); - // Loop can be killed by a test process with e.g.: - // ptrace(PTRACE_SEIZE, 1, 0, 0x100040) - // This is unfortunate, but I don't have a better solution than ignoring it for now. - exitf("loop exited with status %d", status); - // Unreachable. - return 1; -} - -void loop() -{ - // Tell parent that we are ready to serve. - char tmp = 0; - if (write(kOutPipeFd, &tmp, 1) != 1) - fail("control pipe write failed"); - - for (int iter = 0;; iter++) { - // Create a new private work dir for this test (removed at the end of the loop). - char cwdbuf[256]; - sprintf(cwdbuf, "./%d", iter); - if (mkdir(cwdbuf, 0777)) - fail("failed to mkdir"); - - // TODO: consider moving the read into the child. - // Potentially it can speed up things a bit -- when the read finishes - // we already have a forked worker process. - uint64_t in_cmd[3] = {}; - if (read(kInPipeFd, &in_cmd[0], sizeof(in_cmd)) != (ssize_t)sizeof(in_cmd)) - fail("control pipe read failed"); - flag_collect_cover = in_cmd[0] & (1 << 0); - flag_dedup_cover = in_cmd[0] & (1 << 1); - flag_inject_fault = in_cmd[0] & (1 << 2); - flag_collect_comps = in_cmd[0] & (1 << 3); - flag_fault_call = in_cmd[1]; - flag_fault_nth = in_cmd[2]; - debug("exec opts: cover=%d comps=%d dedup=%d fault=%d/%d/%d\n", flag_collect_cover, - flag_collect_comps, flag_dedup_cover, - flag_inject_fault, flag_fault_call, flag_fault_nth); - - int pid = fork(); - if (pid < 0) - fail("clone failed"); - if (pid == 0) { - prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); - setpgrp(); - if (chdir(cwdbuf)) - fail("failed to chdir"); - close(kInPipeFd); - close(kOutPipeFd); - if (flag_enable_tun) { - // Read all remaining packets from tun to better - // isolate consequently executing programs. - flush_tun(); - } - execute_one(); - debug("worker exiting\n"); - doexit(0); - } - debug("spawned worker pid %d\n", pid); - - // We used to use sigtimedwait(SIGCHLD) to wait for the subprocess. - // But SIGCHLD is also delivered when a process stops/continues, - // so it would require a loop with status analysis and timeout recalculation. - // SIGCHLD should also unblock the usleep below, so the spin loop - // should be as efficient as sigtimedwait. - int status = 0; - uint64_t start = current_time_ms(); - uint64_t last_executed = start; - uint32_t executed_calls = __atomic_load_n(output_data, __ATOMIC_RELAXED); - for (;;) { - int res = waitpid(-1, &status, __WALL | WNOHANG); - int errno0 = errno; - if (res == pid) { - debug("waitpid(%d)=%d (%d)\n", pid, res, errno0); - break; - } - usleep(1000); - // Even though the test process executes exit at the end - // and execution time of each syscall is bounded by 20ms, - // this backup watchdog is necessary and its performance is important. - // The problem is that exit in the test processes can fail (sic). - // One observed scenario is that the test processes prohibits - // exit_group syscall using seccomp. Another observed scenario - // is that the test processes setups a userfaultfd for itself, - // then the main thread hangs when it wants to page in a page. - // Below we check if the test process still executes syscalls - // and kill it after 200ms of inactivity. - uint64_t now = current_time_ms(); - uint32_t now_executed = __atomic_load_n(output_data, __ATOMIC_RELAXED); - if (executed_calls != now_executed) { - executed_calls = now_executed; - last_executed = now; - } - if ((now - start < 3 * 1000) && (now - last_executed < 500)) - continue; - debug("waitpid(%d)=%d (%d)\n", pid, res, errno0); - debug("killing\n"); - kill(-pid, SIGKILL); - kill(pid, SIGKILL); - for (;;) { - int res = waitpid(-1, &status, __WALL); - debug("waitpid(%d)=%d (%d)\n", pid, res, errno); - if (res == pid) - break; - } - break; - } - status = WEXITSTATUS(status); - if (status == kFailStatus) - fail("child failed"); - if (status == kErrorStatus) - error("child errored"); - remove_dir(cwdbuf); - if (write(kOutPipeFd, &tmp, 1) != 1) - fail("control pipe write failed"); - } -} - -void execute_one() -{ -retry: - uint64_t* input_pos = (uint64_t*)&input_data[0]; - read_input(&input_pos); // flags - read_input(&input_pos); // pid - output_pos = output_data; - write_output(0); // Number of executed syscalls (updated later). - - if (!collide && !flag_threaded) - cover_enable(&threads[0]); - - int call_index = 0; - for (int n = 0;; n++) { - uint64_t call_num = read_input(&input_pos); - if (call_num == instr_eof) - break; - if (call_num == instr_copyin) { - char* addr = (char*)read_input(&input_pos); - uint64_t typ = read_input(&input_pos); - uint64_t size = read_input(&input_pos); - debug("copyin to %p\n", addr); - switch (typ) { - case arg_const: { - uint64_t arg = read_input(&input_pos); - uint64_t bf_off = read_input(&input_pos); - uint64_t bf_len = read_input(&input_pos); - copyin(addr, arg, size, bf_off, bf_len); - break; - } - case arg_result: { - uint64_t val = read_result(&input_pos); - copyin(addr, val, size, 0, 0); - break; - } - case arg_data: { - NONFAILING(memcpy(addr, input_pos, size)); - // Read out the data. - for (uint64_t i = 0; i < (size + 7) / 8; i++) - read_input(&input_pos); - break; - } - case arg_csum: { - debug("checksum found at %llx\n", addr); - char* csum_addr = addr; - uint64_t csum_size = size; - uint64_t csum_kind = read_input(&input_pos); - switch (csum_kind) { - case arg_csum_inet: { - if (csum_size != 2) { - fail("inet checksum must be 2 bytes, not %lu", size); - } - debug("calculating checksum for %llx\n", csum_addr); - struct csum_inet csum; - csum_inet_init(&csum); - uint64_t chunks_num = read_input(&input_pos); - uint64_t chunk; - for (chunk = 0; chunk < chunks_num; chunk++) { - uint64_t chunk_kind = read_input(&input_pos); - uint64_t chunk_value = read_input(&input_pos); - uint64_t chunk_size = read_input(&input_pos); - switch (chunk_kind) { - case arg_csum_chunk_data: - debug("#%d: data chunk, addr: %llx, size: %llu\n", chunk, chunk_value, chunk_size); - NONFAILING(csum_inet_update(&csum, (const uint8_t*)chunk_value, chunk_size)); - break; - case arg_csum_chunk_const: - if (chunk_size != 2 && chunk_size != 4 && chunk_size != 8) { - fail("bad checksum const chunk size %lld\n", chunk_size); - } - // Here we assume that const values come to us big endian. - debug("#%d: const chunk, value: %llx, size: %llu\n", chunk, chunk_value, chunk_size); - csum_inet_update(&csum, (const uint8_t*)&chunk_value, chunk_size); - break; - default: - fail("bad checksum chunk kind %lu", chunk_kind); - } - } - int16_t csum_value = csum_inet_digest(&csum); - debug("writing inet checksum %hx to %llx\n", csum_value, csum_addr); - NONFAILING(copyin(csum_addr, csum_value, 2, 0, 0)); - break; - } - default: - fail("bad checksum kind %lu", csum_kind); - } - break; - } - default: - fail("bad argument type %lu", typ); - } - continue; - } - if (call_num == instr_copyout) { - read_input(&input_pos); // addr - read_input(&input_pos); // size - // The copyout will happen when/if the call completes. - continue; - } - - // Normal syscall. - if (call_num >= sizeof(syscalls) / sizeof(syscalls[0])) - fail("invalid command number %lu", call_num); - uint64_t num_args = read_input(&input_pos); - if (num_args > kMaxArgs) - fail("command has bad number of arguments %lu", num_args); - uint64_t args[kMaxArgs] = {}; - for (uint64_t i = 0; i < num_args; i++) - args[i] = read_arg(&input_pos); - for (uint64_t i = num_args; i < 6; i++) - args[i] = 0; - thread_t* th = schedule_call(n, call_index++, call_num, num_args, args, input_pos); - - if (collide && (call_index % 2) == 0) { - // Don't wait for every other call. - // We already have results from the previous execution. - } else if (flag_threaded) { - // Wait for call completion. - uint64_t start = current_time_ms(); - uint64_t now = start; - // Note: sys knows about this 20ms timeout when it generates - // timespec/timeval values. - const uint64_t timeout_ms = flag_debug ? 500 : 20; - for (;;) { - timespec ts = {}; - ts.tv_sec = 0; - ts.tv_nsec = (timeout_ms - (now - start)) * 1000 * 1000; - syscall(SYS_futex, &th->done, FUTEX_WAIT, 0, &ts); - if (__atomic_load_n(&th->done, __ATOMIC_RELAXED)) - break; - now = current_time_ms(); - if (now - start > 20) - break; - } - if (__atomic_load_n(&th->done, __ATOMIC_ACQUIRE)) - handle_completion(th); - // Check if any of previous calls have completed. - // Give them some additional time, because they could have been - // just unblocked by the current call. - if (running < 0) - fail("running = %d", running); - if (running > 0) { - bool last = read_input(&input_pos, true) == instr_eof; - usleep(last ? 1000 : 100); - for (int i = 0; i < kMaxThreads; i++) { - th = &threads[i]; - if (__atomic_load_n(&th->done, __ATOMIC_ACQUIRE) && !th->handled) - handle_completion(th); - } - } - } else { - // Execute directly. - if (th != &threads[0]) - fail("using non-main thread in non-thread mode"); - execute_call(th); - handle_completion(th); - } - } - - if (flag_collide && !flag_inject_fault && !collide) { - debug("enabling collider\n"); - collide = true; - goto retry; - } -} - -thread_t* schedule_call(int n, int call_index, int call_num, uint64_t num_args, uint64_t* args, uint64_t* pos) -{ - // Find a spare thread to execute the call. - int i; - for (i = 0; i < kMaxThreads; i++) { - thread_t* th = &threads[i]; - if (!th->created) - thread_create(th, i); - if (__atomic_load_n(&th->done, __ATOMIC_ACQUIRE)) { - if (!th->handled) - handle_completion(th); - break; - } - } - if (i == kMaxThreads) - exitf("out of threads"); - thread_t* th = &threads[i]; - debug("scheduling call %d [%s] on thread %d\n", call_index, syscalls[call_num].name, th->id); - if (th->ready || !th->done || !th->handled) - fail("bad thread state in schedule: ready=%d done=%d handled=%d", th->ready, th->done, th->handled); - th->copyout_pos = pos; - th->done = false; - th->handled = false; - th->call_n = n; - th->call_index = call_index; - th->call_num = call_num; - th->num_args = num_args; - for (int i = 0; i < kMaxArgs; i++) - th->args[i] = args[i]; - __atomic_store_n(&th->ready, 1, __ATOMIC_RELEASE); - syscall(SYS_futex, &th->ready, FUTEX_WAKE); - running++; - return th; -} - -void handle_completion(thread_t* th) -{ - debug("completion of call %d [%s] on thread %d\n", th->call_index, syscalls[th->call_num].name, th->id); - if (th->ready || !th->done || th->handled) - fail("bad thread state in completion: ready=%d done=%d handled=%d", - th->ready, th->done, th->handled); - if (th->res != (uintptr_t)-1) { - if (th->call_n >= kMaxCommands) - fail("result idx %ld overflows kMaxCommands", th->call_n); - results[th->call_n].executed = true; - results[th->call_n].val = th->res; - for (bool done = false; !done;) { - th->call_n++; - uint64_t call_num = read_input(&th->copyout_pos); - switch (call_num) { - case instr_copyout: { - char* addr = (char*)read_input(&th->copyout_pos); - uint64_t size = read_input(&th->copyout_pos); - uint64_t val = copyout(addr, size); - if (th->call_n >= kMaxCommands) - fail("result idx %ld overflows kMaxCommands", th->call_n); - results[th->call_n].executed = true; - results[th->call_n].val = val; - debug("copyout from %p\n", addr); - break; - } - default: - done = true; - break; - } - } - } - if (!collide) { - write_output(th->call_index); - write_output(th->call_num); - uint32_t reserrno = th->res != (uint32_t)-1 ? 0 : th->reserrno; - write_output(reserrno); - write_output(th->fault_injected); - uint32_t* signal_count_pos = write_output(0); // filled in later - uint32_t* cover_count_pos = write_output(0); // filled in later - uint32_t* comps_count_pos = write_output(0); // filled in later - uint32_t nsig = 0, cover_size = 0, comps_size = 0; - - if (flag_collect_comps) { - // Collect only the comparisons - comps_size = th->cover_size; - kcov_comparison_t* start = (kcov_comparison_t*)th->cover_data; - kcov_comparison_t* end = start + comps_size; - std::sort(start, end); - comps_size = std::unique(start, end) - start; - for (uint32_t i = 0; i < comps_size; ++i) - start[i].write(write_output); - } else { - // Write out feedback signals. - // Currently it is code edges computed as xor of - // two subsequent basic block PCs. - uint32_t prev = 0; - for (uint32_t i = 0; i < th->cover_size; i++) { - uint32_t pc = (uint32_t)th->cover_data[i]; - uint32_t sig = pc ^ prev; - prev = hash(pc); - if (dedup(sig)) - continue; - write_output(sig); - nsig++; - } - if (flag_collect_cover) { - // Write out real coverage (basic block PCs). - cover_size = th->cover_size; - if (flag_dedup_cover) { - uint64_t* start = (uint64_t*)th->cover_data; - uint64_t* end = start + cover_size; - std::sort(start, end); - cover_size = std::unique(start, end) - start; - } - // Truncate PCs to uint32_t assuming that they fit into 32-bits. - // True for x86_64 and arm64 without KASLR. - for (uint32_t i = 0; i < cover_size; i++) - write_output((uint32_t)th->cover_data[i]); - } - } - // Write out real coverage (basic block PCs). - *cover_count_pos = cover_size; - // Write out number of comparisons - *comps_count_pos = comps_size; - // Write out number of signals - *signal_count_pos = nsig; - debug("out #%u: index=%u num=%u errno=%d sig=%u cover=%u comps=%u\n", - completed, th->call_index, th->call_num, reserrno, nsig, - cover_size, comps_size); - completed++; - __atomic_store_n(output_data, completed, __ATOMIC_RELEASE); - } - th->handled = true; - running--; -} - -void thread_create(thread_t* th, int id) -{ - th->created = true; - th->id = id; - th->done = true; - th->handled = true; - if (flag_threaded) { - pthread_attr_t attr; - pthread_attr_init(&attr); - pthread_attr_setstacksize(&attr, 128 << 10); - if (pthread_create(&th->th, &attr, worker_thread, th)) - exitf("pthread_create failed"); - pthread_attr_destroy(&attr); - } -} - -void* worker_thread(void* arg) -{ - thread_t* th = (thread_t*)arg; - - cover_enable(th); - for (;;) { - while (!__atomic_load_n(&th->ready, __ATOMIC_ACQUIRE)) - syscall(SYS_futex, &th->ready, FUTEX_WAIT, 0, 0); - execute_call(th); - } - return 0; -} - -void execute_call(thread_t* th) -{ - th->ready = false; - call_t* call = &syscalls[th->call_num]; - debug("#%d: %s(", th->id, call->name); - for (int i = 0; i < th->num_args; i++) { - if (i != 0) - debug(", "); - debug("0x%lx", th->args[i]); - } - debug(")\n"); - - int fail_fd = -1; - if (flag_inject_fault && th->call_index == flag_fault_call) { - if (collide) - fail("both collide and fault injection are enabled"); - debug("injecting fault into %d-th operation\n", flag_fault_nth); - fail_fd = inject_fault(flag_fault_nth); - } - - cover_reset(th); - th->res = execute_syscall(call->sys_nr, th->args[0], th->args[1], - th->args[2], th->args[3], th->args[4], th->args[5], - th->args[6], th->args[7], th->args[8]); - th->reserrno = errno; - th->cover_size = read_cover_size(th); - th->fault_injected = false; - - if (flag_inject_fault && th->call_index == flag_fault_call) { - char buf[16]; - int n = read(fail_fd, buf, sizeof(buf) - 1); - if (n <= 0) - fail("failed to read /proc/self/task/tid/fail-nth"); - th->fault_injected = n == 2 && buf[0] == '0' && buf[1] == '\n'; - buf[0] = '0'; - if (write(fail_fd, buf, 1) != 1) - fail("failed to write /proc/self/task/tid/fail-nth"); - close(fail_fd); - debug("fault injected: %d\n", th->fault_injected); - } - - if (th->res == (uint32_t)-1) - debug("#%d: %s = errno(%d)\n", th->id, call->name, th->reserrno); - else - debug("#%d: %s = 0x%lx\n", th->id, call->name, th->res); - __atomic_store_n(&th->done, 1, __ATOMIC_RELEASE); - syscall(SYS_futex, &th->done, FUTEX_WAKE); -} - -void cover_open() -{ - if (!flag_cover) - return; - for (int i = 0; i < kMaxThreads; i++) { - thread_t* th = &threads[i]; - th->cover_fd = open("/sys/kernel/debug/kcov", O_RDWR); - if (th->cover_fd == -1) - fail("open of /sys/kernel/debug/kcov failed"); - if (ioctl(th->cover_fd, KCOV_INIT_TRACE, kCoverSize)) - fail("cover init trace write failed"); - size_t mmap_alloc_size = kCoverSize * sizeof(th->cover_data[0]); - uint64_t* mmap_ptr = (uint64_t*)mmap(NULL, mmap_alloc_size, - PROT_READ | PROT_WRITE, MAP_SHARED, th->cover_fd, 0); - if (mmap_ptr == MAP_FAILED) - fail("cover mmap failed"); - th->cover_size_ptr = mmap_ptr; - th->cover_data = &mmap_ptr[1]; - } -} - -void cover_enable(thread_t* th) -{ - if (!flag_cover) - return; - debug("#%d: enabling /sys/kernel/debug/kcov\n", th->id); - int kcov_mode = flag_collect_comps ? KCOV_TRACE_CMP : KCOV_TRACE_PC; - // This should be fatal, - // but in practice ioctl fails with assorted errors (9, 14, 25), - // so we use exitf. - if (ioctl(th->cover_fd, KCOV_ENABLE, kcov_mode)) - exitf("cover enable write trace failed, mode=%d", kcov_mode); - debug("#%d: enabled /sys/kernel/debug/kcov\n", th->id); -} - -void cover_reset(thread_t* th) -{ - if (!flag_cover) - return; - __atomic_store_n(th->cover_size_ptr, 0, __ATOMIC_RELAXED); -} - -uint64_t read_cover_size(thread_t* th) -{ - if (!flag_cover) - return 0; - uint64_t n = __atomic_load_n(th->cover_size_ptr, __ATOMIC_RELAXED); - debug("#%d: read cover size = %u\n", th->id, n); - if (n >= kCoverSize) - fail("#%d: too much cover %u", th->id, n); - return n; -} - -static uint32_t hash(uint32_t a) -{ - a = (a ^ 61) ^ (a >> 16); - a = a + (a << 3); - a = a ^ (a >> 4); - a = a * 0x27d4eb2d; - a = a ^ (a >> 15); - return a; -} - -const uint32_t dedup_table_size = 8 << 10; -uint32_t dedup_table[dedup_table_size]; - -// Poorman's best-effort hashmap-based deduplication. -// The hashmap is global which means that we deduplicate across different calls. -// This is OK because we are interested only in new signals. -static bool dedup(uint32_t sig) -{ - for (uint32_t i = 0; i < 4; i++) { - uint32_t pos = (sig + i) % dedup_table_size; - if (dedup_table[pos] == sig) - return true; - if (dedup_table[pos] == 0) { - dedup_table[pos] = sig; - return false; - } - } - dedup_table[sig % dedup_table_size] = sig; - return false; -} - -void copyin(char* addr, uint64_t val, uint64_t size, uint64_t bf_off, uint64_t bf_len) -{ - NONFAILING(switch (size) { - case 1: - STORE_BY_BITMASK(uint8_t, addr, val, bf_off, bf_len); - break; - case 2: - STORE_BY_BITMASK(uint16_t, addr, val, bf_off, bf_len); - break; - case 4: - STORE_BY_BITMASK(uint32_t, addr, val, bf_off, bf_len); - break; - case 8: - STORE_BY_BITMASK(uint64_t, addr, val, bf_off, bf_len); - break; - default: - fail("copyin: bad argument size %lu", size); - }); -} - -uint64_t copyout(char* addr, uint64_t size) -{ - uint64_t res = default_value; - NONFAILING(switch (size) { - case 1: - res = *(uint8_t*)addr; - break; - case 2: - res = *(uint16_t*)addr; - break; - case 4: - res = *(uint32_t*)addr; - break; - case 8: - res = *(uint64_t*)addr; - break; - default: - fail("copyout: bad argument size %lu", size); - }); - return res; -} - -uint64_t read_arg(uint64_t** input_posp) -{ - uint64_t typ = read_input(input_posp); - uint64_t size = read_input(input_posp); - (void)size; - uint64_t arg = 0; - switch (typ) { - case arg_const: { - arg = read_input(input_posp); - // Bitfields can't be args of a normal syscall, so just ignore them. - read_input(input_posp); // bit field offset - read_input(input_posp); // bit field length - break; - } - case arg_result: { - arg = read_result(input_posp); - break; - } - default: - fail("bad argument type %lu", typ); - } - return arg; -} - -uint64_t read_result(uint64_t** input_posp) -{ - uint64_t idx = read_input(input_posp); - uint64_t op_div = read_input(input_posp); - uint64_t op_add = read_input(input_posp); - if (idx >= kMaxCommands) - fail("command refers to bad result %ld", idx); - uint64_t arg = default_value; - if (results[idx].executed) { - arg = results[idx].val; - if (op_div != 0) - arg = arg / op_div; - arg += op_add; - } - return arg; -} - -uint64_t read_input(uint64_t** input_posp, bool peek) -{ - uint64_t* input_pos = *input_posp; - if ((char*)input_pos >= input_data + kMaxInput) - fail("input command overflows input"); - if (!peek) - *input_posp = input_pos + 1; - return *input_pos; -} - -uint32_t* write_output(uint32_t v) -{ - if (collide) - return 0; - if (output_pos < output_data || (char*)output_pos >= (char*)output_data + kMaxOutput) - fail("output overflow"); - *output_pos = v; - return output_pos++; -} diff --git a/executor/executor_fuchsia.cc b/executor/executor_fuchsia.cc new file mode 100644 index 000000000..6a312167b --- /dev/null +++ b/executor/executor_fuchsia.cc @@ -0,0 +1,9 @@ +// Copyright 2017 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// +build + +int main() +{ + return 0; +} diff --git a/executor/executor_linux.cc b/executor/executor_linux.cc new file mode 100644 index 000000000..cd0e406ff --- /dev/null +++ b/executor/executor_linux.cc @@ -0,0 +1,1001 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// +build + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "syscalls.h" + +#define SYZ_EXECUTOR +#include "common.h" + +#define KCOV_INIT_TRACE _IOR('c', 1, unsigned long long) +#define KCOV_INIT_CMP _IOR('c', 2, unsigned long long) +#define KCOV_ENABLE _IO('c', 100) +#define KCOV_DISABLE _IO('c', 101) + +const unsigned long KCOV_TRACE_PC = 0; +const unsigned long KCOV_TRACE_CMP = 1; + +const int kInFd = 3; +const int kOutFd = 4; +const int kInPipeFd = 5; +const int kOutPipeFd = 6; +const int kMaxInput = 2 << 20; +const int kMaxOutput = 16 << 20; +const int kMaxArgs = 9; +const int kMaxThreads = 16; +const int kMaxCommands = 16 << 10; +const int kCoverSize = 64 << 10; +const int kPageSize = 4 << 10; + +const uint64_t instr_eof = -1; +const uint64_t instr_copyin = -2; +const uint64_t instr_copyout = -3; + +const uint64_t arg_const = 0; +const uint64_t arg_result = 1; +const uint64_t arg_data = 2; +const uint64_t arg_csum = 3; + +// We use the default value instead of results of failed syscalls. +// -1 is an invalid fd and an invalid address and deterministic, +// so good enough for our purposes. +const uint64_t default_value = -1; + +enum sandbox_type { + sandbox_none, + sandbox_setuid, + sandbox_namespace, +}; + +bool flag_cover; +bool flag_threaded; +bool flag_collide; +bool flag_sandbox_privs; +sandbox_type flag_sandbox; +bool flag_enable_tun; +bool flag_enable_fault_injection; + +bool flag_collect_cover; +bool flag_dedup_cover; + +// If true, then executor should write the comparisons data to fuzzer. +bool flag_collect_comps; + +// Inject fault into flag_fault_nth-th operation in flag_fault_call-th syscall. +bool flag_inject_fault; +int flag_fault_call; +int flag_fault_nth; + +__attribute__((aligned(64 << 10))) char input_data[kMaxInput]; +uint32_t* output_data; +uint32_t* output_pos; +uint32_t completed; +int running; +bool collide; + +struct res_t { + bool executed; + uint64_t val; +}; + +res_t results[kMaxCommands]; + +enum { + KCOV_CMP_CONST = 1, + KCOV_CMP_SIZE1 = 0, + KCOV_CMP_SIZE2 = 2, + KCOV_CMP_SIZE4 = 4, + KCOV_CMP_SIZE8 = 6, + KCOV_CMP_SIZE_MASK = 6, +}; + +struct kcov_comparison_t { + uint64_t type; + uint64_t arg1; + uint64_t arg2; + + bool operator==(const struct kcov_comparison_t& other) const + { + return type == other.type && arg1 == other.arg1 && arg2 == other.arg2; + } + + bool operator<(const struct kcov_comparison_t& other) const + { + if (type != other.type) + return type < other.type; + if (arg1 != other.arg1) + return arg1 < other.arg1; + return arg2 < other.arg2; + } + + // Writes the structure using the write_one function for each field. + // Inspired by write_output() function. + void write(uint32_t* (*write_one)(uint32_t)) + { + // Write order: type arg1 arg2. + write_one((uint32_t)type); + + // KCOV converts all arguments of size x first to uintx_t and then to + // uint64_t. We want to properly extend signed values, e.g we want + // int8_t c = 0xfe to be represented as 0xfffffffffffffffe. + // Note that uint8_t c = 0xfe will be represented the same way. + // This is ok because during hints processing we will anyways try + // the value 0x00000000000000fe. + switch (type & KCOV_CMP_SIZE_MASK) { + case KCOV_CMP_SIZE1: + arg1 = (uint64_t)(int64_t)(int8_t)arg1; + arg2 = (uint64_t)(int64_t)(int8_t)arg2; + break; + case KCOV_CMP_SIZE2: + arg1 = (uint64_t)(int64_t)(int16_t)arg1; + arg2 = (uint64_t)(int64_t)(int16_t)arg2; + break; + case KCOV_CMP_SIZE4: + arg1 = (uint64_t)(int64_t)(int32_t)arg1; + arg2 = (uint64_t)(int64_t)(int32_t)arg2; + break; + } + bool is_size_8 = (type & KCOV_CMP_SIZE_MASK) == KCOV_CMP_SIZE8; + if (!is_size_8) { + write_one((uint32_t)arg1); + write_one((uint32_t)arg2); + return; + } + // If we have 64 bits arguments then write them in Little-endian. + write_one((uint32_t)(arg1 & 0xFFFFFFFF)); + write_one((uint32_t)(arg1 >> 32)); + write_one((uint32_t)(arg2 & 0xFFFFFFFF)); + write_one((uint32_t)(arg2 >> 32)); + } +}; + +struct thread_t { + bool created; + int id; + pthread_t th; + // TODO(dvyukov): this assumes 64-bit kernel. This must be "kernel long" somehow. + uint64_t* cover_data; + // Pointer to the size of coverage (stored as first word of memory). + uint64_t* cover_size_ptr; + + uint64_t* copyout_pos; + int ready; + int done; + bool handled; + int call_n; + int call_index; + int call_num; + int num_args; + uintptr_t args[kMaxArgs]; + uintptr_t res; + uint32_t reserrno; + uint64_t cover_size; + bool fault_injected; + int cover_fd; +}; + +thread_t threads[kMaxThreads]; + +// Checksum kinds. +const uint64_t arg_csum_inet = 0; + +// Checksum chunk kinds. +const uint64_t arg_csum_chunk_data = 0; +const uint64_t arg_csum_chunk_const = 1; + +void execute_one(); +uint64_t read_input(uint64_t** input_posp, bool peek = false); +uint64_t read_arg(uint64_t** input_posp); +uint64_t read_result(uint64_t** input_posp); +uint32_t* write_output(uint32_t v); +void copyin(char* addr, uint64_t val, uint64_t size, uint64_t bf_off, uint64_t bf_len); +uint64_t copyout(char* addr, uint64_t size); +thread_t* schedule_call(int n, int call_index, int call_num, uint64_t num_args, uint64_t* args, uint64_t* pos); +void execute_call(thread_t* th); +void handle_completion(thread_t* th); +void thread_create(thread_t* th, int id); +void* worker_thread(void* arg); +void cover_open(); +void cover_enable(thread_t* th); +void cover_reset(thread_t* th); +uint64_t read_cover_size(thread_t* th); +static uint32_t hash(uint32_t a); +static bool dedup(uint32_t sig); + +#ifndef GIT_REVISION +#define GIT_REVISION "unknown" +#endif + +int main(int argc, char** argv) +{ + if (argc == 2 && strcmp(argv[1], "version") == 0) { + puts("linux " GOARCH " " SYZ_REVISION " " GIT_REVISION); + return 0; + } + + prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); + if (mmap(&input_data[0], kMaxInput, PROT_READ, MAP_PRIVATE | MAP_FIXED, kInFd, 0) != &input_data[0]) + fail("mmap of input file failed"); + // The output region is the only thing in executor process for which consistency matters. + // If it is corrupted ipc package will fail to parse its contents and panic. + // But fuzzer constantly invents new ways of how to currupt the region, + // so we map the region at a (hopefully) hard to guess address surrounded by unmapped pages. + void* const kOutputDataAddr = (void*)0x1ddbc20000; + output_data = (uint32_t*)mmap(kOutputDataAddr, kMaxOutput, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, kOutFd, 0); + if (output_data != kOutputDataAddr) + fail("mmap of output file failed"); + // Prevent random programs to mess with these fds. + // Due to races in collider mode, a program can e.g. ftruncate one of these fds, + // which will cause fuzzer to crash. + // That's also the reason why we close kInPipeFd/kOutPipeFd below. + close(kInFd); + close(kOutFd); + + uint64_t flags = *(uint64_t*)input_data; + flag_debug = flags & (1 << 0); + flag_cover = flags & (1 << 1); + flag_threaded = flags & (1 << 2); + flag_collide = flags & (1 << 3); + flag_sandbox = sandbox_none; + if (flags & (1 << 4)) + flag_sandbox = sandbox_setuid; + else if (flags & (1 << 5)) + flag_sandbox = sandbox_namespace; + if (!flag_threaded) + flag_collide = false; + flag_enable_tun = flags & (1 << 6); + flag_enable_fault_injection = flags & (1 << 7); + + uint64_t executor_pid = *((uint64_t*)input_data + 1); + cover_open(); + install_segv_handler(); + use_temporary_dir(); + +#if defined(__i386__) || defined(__arm__) + // mmap syscall on i386/arm is translated to old_mmap and has different signature. + // As a workaround fix it up to mmap2, which has signature that we expect. + // pkg/csource has the same hack. + for (size_t i = 0; i < sizeof(syscalls) / sizeof(syscalls[0]); i++) { + if (syscalls[i].sys_nr == __NR_mmap) + syscalls[i].sys_nr = __NR_mmap2; + } +#endif + + int pid = -1; + switch (flag_sandbox) { + case sandbox_none: + pid = do_sandbox_none(executor_pid, flag_enable_tun); + break; + case sandbox_setuid: + pid = do_sandbox_setuid(executor_pid, flag_enable_tun); + break; + case sandbox_namespace: + pid = do_sandbox_namespace(executor_pid, flag_enable_tun); + break; + default: + fail("unknown sandbox type"); + } + if (pid < 0) + fail("clone failed"); + debug("spawned loop pid %d\n", pid); + int status = 0; + while (waitpid(-1, &status, __WALL) != pid) { + } + status = WEXITSTATUS(status); + // If an external sandbox process wraps executor, the out pipe will be closed + // before the sandbox process exits this will make ipc package kill the sandbox. + // As the result sandbox process will exit with exit status 9 instead of the executor + // exit status (notably kRetryStatus). Consequently, ipc will treat it as hard + // failure rather than a temporal failure. So we duplicate the exit status on the pipe. + char tmp = status; + if (write(kOutPipeFd, &tmp, 1)) { + // Not much we can do, but gcc wants us to check the return value. + } + errno = 0; + if (status == kFailStatus) + fail("loop failed"); + if (status == kErrorStatus) + error("loop errored"); + // Loop can be killed by a test process with e.g.: + // ptrace(PTRACE_SEIZE, 1, 0, 0x100040) + // This is unfortunate, but I don't have a better solution than ignoring it for now. + exitf("loop exited with status %d", status); + // Unreachable. + return 1; +} + +void loop() +{ + // Tell parent that we are ready to serve. + char tmp = 0; + if (write(kOutPipeFd, &tmp, 1) != 1) + fail("control pipe write failed"); + + for (int iter = 0;; iter++) { + // Create a new private work dir for this test (removed at the end of the loop). + char cwdbuf[256]; + sprintf(cwdbuf, "./%d", iter); + if (mkdir(cwdbuf, 0777)) + fail("failed to mkdir"); + + // TODO: consider moving the read into the child. + // Potentially it can speed up things a bit -- when the read finishes + // we already have a forked worker process. + uint64_t in_cmd[3] = {}; + if (read(kInPipeFd, &in_cmd[0], sizeof(in_cmd)) != (ssize_t)sizeof(in_cmd)) + fail("control pipe read failed"); + flag_collect_cover = in_cmd[0] & (1 << 0); + flag_dedup_cover = in_cmd[0] & (1 << 1); + flag_inject_fault = in_cmd[0] & (1 << 2); + flag_collect_comps = in_cmd[0] & (1 << 3); + flag_fault_call = in_cmd[1]; + flag_fault_nth = in_cmd[2]; + debug("exec opts: cover=%d comps=%d dedup=%d fault=%d/%d/%d\n", flag_collect_cover, + flag_collect_comps, flag_dedup_cover, + flag_inject_fault, flag_fault_call, flag_fault_nth); + + int pid = fork(); + if (pid < 0) + fail("clone failed"); + if (pid == 0) { + prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); + setpgrp(); + if (chdir(cwdbuf)) + fail("failed to chdir"); + close(kInPipeFd); + close(kOutPipeFd); + if (flag_enable_tun) { + // Read all remaining packets from tun to better + // isolate consequently executing programs. + flush_tun(); + } + execute_one(); + debug("worker exiting\n"); + doexit(0); + } + debug("spawned worker pid %d\n", pid); + + // We used to use sigtimedwait(SIGCHLD) to wait for the subprocess. + // But SIGCHLD is also delivered when a process stops/continues, + // so it would require a loop with status analysis and timeout recalculation. + // SIGCHLD should also unblock the usleep below, so the spin loop + // should be as efficient as sigtimedwait. + int status = 0; + uint64_t start = current_time_ms(); + uint64_t last_executed = start; + uint32_t executed_calls = __atomic_load_n(output_data, __ATOMIC_RELAXED); + for (;;) { + int res = waitpid(-1, &status, __WALL | WNOHANG); + int errno0 = errno; + if (res == pid) { + debug("waitpid(%d)=%d (%d)\n", pid, res, errno0); + break; + } + usleep(1000); + // Even though the test process executes exit at the end + // and execution time of each syscall is bounded by 20ms, + // this backup watchdog is necessary and its performance is important. + // The problem is that exit in the test processes can fail (sic). + // One observed scenario is that the test processes prohibits + // exit_group syscall using seccomp. Another observed scenario + // is that the test processes setups a userfaultfd for itself, + // then the main thread hangs when it wants to page in a page. + // Below we check if the test process still executes syscalls + // and kill it after 200ms of inactivity. + uint64_t now = current_time_ms(); + uint32_t now_executed = __atomic_load_n(output_data, __ATOMIC_RELAXED); + if (executed_calls != now_executed) { + executed_calls = now_executed; + last_executed = now; + } + if ((now - start < 3 * 1000) && (now - last_executed < 500)) + continue; + debug("waitpid(%d)=%d (%d)\n", pid, res, errno0); + debug("killing\n"); + kill(-pid, SIGKILL); + kill(pid, SIGKILL); + for (;;) { + int res = waitpid(-1, &status, __WALL); + debug("waitpid(%d)=%d (%d)\n", pid, res, errno); + if (res == pid) + break; + } + break; + } + status = WEXITSTATUS(status); + if (status == kFailStatus) + fail("child failed"); + if (status == kErrorStatus) + error("child errored"); + remove_dir(cwdbuf); + if (write(kOutPipeFd, &tmp, 1) != 1) + fail("control pipe write failed"); + } +} + +void execute_one() +{ +retry: + uint64_t* input_pos = (uint64_t*)&input_data[0]; + read_input(&input_pos); // flags + read_input(&input_pos); // pid + output_pos = output_data; + write_output(0); // Number of executed syscalls (updated later). + + if (!collide && !flag_threaded) + cover_enable(&threads[0]); + + int call_index = 0; + for (int n = 0;; n++) { + uint64_t call_num = read_input(&input_pos); + if (call_num == instr_eof) + break; + if (call_num == instr_copyin) { + char* addr = (char*)read_input(&input_pos); + uint64_t typ = read_input(&input_pos); + uint64_t size = read_input(&input_pos); + debug("copyin to %p\n", addr); + switch (typ) { + case arg_const: { + uint64_t arg = read_input(&input_pos); + uint64_t bf_off = read_input(&input_pos); + uint64_t bf_len = read_input(&input_pos); + copyin(addr, arg, size, bf_off, bf_len); + break; + } + case arg_result: { + uint64_t val = read_result(&input_pos); + copyin(addr, val, size, 0, 0); + break; + } + case arg_data: { + NONFAILING(memcpy(addr, input_pos, size)); + // Read out the data. + for (uint64_t i = 0; i < (size + 7) / 8; i++) + read_input(&input_pos); + break; + } + case arg_csum: { + debug("checksum found at %llx\n", addr); + char* csum_addr = addr; + uint64_t csum_size = size; + uint64_t csum_kind = read_input(&input_pos); + switch (csum_kind) { + case arg_csum_inet: { + if (csum_size != 2) { + fail("inet checksum must be 2 bytes, not %lu", size); + } + debug("calculating checksum for %llx\n", csum_addr); + struct csum_inet csum; + csum_inet_init(&csum); + uint64_t chunks_num = read_input(&input_pos); + uint64_t chunk; + for (chunk = 0; chunk < chunks_num; chunk++) { + uint64_t chunk_kind = read_input(&input_pos); + uint64_t chunk_value = read_input(&input_pos); + uint64_t chunk_size = read_input(&input_pos); + switch (chunk_kind) { + case arg_csum_chunk_data: + debug("#%d: data chunk, addr: %llx, size: %llu\n", chunk, chunk_value, chunk_size); + NONFAILING(csum_inet_update(&csum, (const uint8_t*)chunk_value, chunk_size)); + break; + case arg_csum_chunk_const: + if (chunk_size != 2 && chunk_size != 4 && chunk_size != 8) { + fail("bad checksum const chunk size %lld\n", chunk_size); + } + // Here we assume that const values come to us big endian. + debug("#%d: const chunk, value: %llx, size: %llu\n", chunk, chunk_value, chunk_size); + csum_inet_update(&csum, (const uint8_t*)&chunk_value, chunk_size); + break; + default: + fail("bad checksum chunk kind %lu", chunk_kind); + } + } + int16_t csum_value = csum_inet_digest(&csum); + debug("writing inet checksum %hx to %llx\n", csum_value, csum_addr); + NONFAILING(copyin(csum_addr, csum_value, 2, 0, 0)); + break; + } + default: + fail("bad checksum kind %lu", csum_kind); + } + break; + } + default: + fail("bad argument type %lu", typ); + } + continue; + } + if (call_num == instr_copyout) { + read_input(&input_pos); // addr + read_input(&input_pos); // size + // The copyout will happen when/if the call completes. + continue; + } + + // Normal syscall. + if (call_num >= sizeof(syscalls) / sizeof(syscalls[0])) + fail("invalid command number %lu", call_num); + uint64_t num_args = read_input(&input_pos); + if (num_args > kMaxArgs) + fail("command has bad number of arguments %lu", num_args); + uint64_t args[kMaxArgs] = {}; + for (uint64_t i = 0; i < num_args; i++) + args[i] = read_arg(&input_pos); + for (uint64_t i = num_args; i < 6; i++) + args[i] = 0; + thread_t* th = schedule_call(n, call_index++, call_num, num_args, args, input_pos); + + if (collide && (call_index % 2) == 0) { + // Don't wait for every other call. + // We already have results from the previous execution. + } else if (flag_threaded) { + // Wait for call completion. + uint64_t start = current_time_ms(); + uint64_t now = start; + // Note: sys knows about this 20ms timeout when it generates + // timespec/timeval values. + const uint64_t timeout_ms = flag_debug ? 500 : 20; + for (;;) { + timespec ts = {}; + ts.tv_sec = 0; + ts.tv_nsec = (timeout_ms - (now - start)) * 1000 * 1000; + syscall(SYS_futex, &th->done, FUTEX_WAIT, 0, &ts); + if (__atomic_load_n(&th->done, __ATOMIC_RELAXED)) + break; + now = current_time_ms(); + if (now - start > 20) + break; + } + if (__atomic_load_n(&th->done, __ATOMIC_ACQUIRE)) + handle_completion(th); + // Check if any of previous calls have completed. + // Give them some additional time, because they could have been + // just unblocked by the current call. + if (running < 0) + fail("running = %d", running); + if (running > 0) { + bool last = read_input(&input_pos, true) == instr_eof; + usleep(last ? 1000 : 100); + for (int i = 0; i < kMaxThreads; i++) { + th = &threads[i]; + if (__atomic_load_n(&th->done, __ATOMIC_ACQUIRE) && !th->handled) + handle_completion(th); + } + } + } else { + // Execute directly. + if (th != &threads[0]) + fail("using non-main thread in non-thread mode"); + execute_call(th); + handle_completion(th); + } + } + + if (flag_collide && !flag_inject_fault && !collide) { + debug("enabling collider\n"); + collide = true; + goto retry; + } +} + +thread_t* schedule_call(int n, int call_index, int call_num, uint64_t num_args, uint64_t* args, uint64_t* pos) +{ + // Find a spare thread to execute the call. + int i; + for (i = 0; i < kMaxThreads; i++) { + thread_t* th = &threads[i]; + if (!th->created) + thread_create(th, i); + if (__atomic_load_n(&th->done, __ATOMIC_ACQUIRE)) { + if (!th->handled) + handle_completion(th); + break; + } + } + if (i == kMaxThreads) + exitf("out of threads"); + thread_t* th = &threads[i]; + debug("scheduling call %d [%s] on thread %d\n", call_index, syscalls[call_num].name, th->id); + if (th->ready || !th->done || !th->handled) + fail("bad thread state in schedule: ready=%d done=%d handled=%d", th->ready, th->done, th->handled); + th->copyout_pos = pos; + th->done = false; + th->handled = false; + th->call_n = n; + th->call_index = call_index; + th->call_num = call_num; + th->num_args = num_args; + for (int i = 0; i < kMaxArgs; i++) + th->args[i] = args[i]; + __atomic_store_n(&th->ready, 1, __ATOMIC_RELEASE); + syscall(SYS_futex, &th->ready, FUTEX_WAKE); + running++; + return th; +} + +void handle_completion(thread_t* th) +{ + debug("completion of call %d [%s] on thread %d\n", th->call_index, syscalls[th->call_num].name, th->id); + if (th->ready || !th->done || th->handled) + fail("bad thread state in completion: ready=%d done=%d handled=%d", + th->ready, th->done, th->handled); + if (th->res != (uintptr_t)-1) { + if (th->call_n >= kMaxCommands) + fail("result idx %ld overflows kMaxCommands", th->call_n); + results[th->call_n].executed = true; + results[th->call_n].val = th->res; + for (bool done = false; !done;) { + th->call_n++; + uint64_t call_num = read_input(&th->copyout_pos); + switch (call_num) { + case instr_copyout: { + char* addr = (char*)read_input(&th->copyout_pos); + uint64_t size = read_input(&th->copyout_pos); + uint64_t val = copyout(addr, size); + if (th->call_n >= kMaxCommands) + fail("result idx %ld overflows kMaxCommands", th->call_n); + results[th->call_n].executed = true; + results[th->call_n].val = val; + debug("copyout from %p\n", addr); + break; + } + default: + done = true; + break; + } + } + } + if (!collide) { + write_output(th->call_index); + write_output(th->call_num); + uint32_t reserrno = th->res != (uint32_t)-1 ? 0 : th->reserrno; + write_output(reserrno); + write_output(th->fault_injected); + uint32_t* signal_count_pos = write_output(0); // filled in later + uint32_t* cover_count_pos = write_output(0); // filled in later + uint32_t* comps_count_pos = write_output(0); // filled in later + uint32_t nsig = 0, cover_size = 0, comps_size = 0; + + if (flag_collect_comps) { + // Collect only the comparisons + comps_size = th->cover_size; + kcov_comparison_t* start = (kcov_comparison_t*)th->cover_data; + kcov_comparison_t* end = start + comps_size; + std::sort(start, end); + comps_size = std::unique(start, end) - start; + for (uint32_t i = 0; i < comps_size; ++i) + start[i].write(write_output); + } else { + // Write out feedback signals. + // Currently it is code edges computed as xor of + // two subsequent basic block PCs. + uint32_t prev = 0; + for (uint32_t i = 0; i < th->cover_size; i++) { + uint32_t pc = (uint32_t)th->cover_data[i]; + uint32_t sig = pc ^ prev; + prev = hash(pc); + if (dedup(sig)) + continue; + write_output(sig); + nsig++; + } + if (flag_collect_cover) { + // Write out real coverage (basic block PCs). + cover_size = th->cover_size; + if (flag_dedup_cover) { + uint64_t* start = (uint64_t*)th->cover_data; + uint64_t* end = start + cover_size; + std::sort(start, end); + cover_size = std::unique(start, end) - start; + } + // Truncate PCs to uint32_t assuming that they fit into 32-bits. + // True for x86_64 and arm64 without KASLR. + for (uint32_t i = 0; i < cover_size; i++) + write_output((uint32_t)th->cover_data[i]); + } + } + // Write out real coverage (basic block PCs). + *cover_count_pos = cover_size; + // Write out number of comparisons + *comps_count_pos = comps_size; + // Write out number of signals + *signal_count_pos = nsig; + debug("out #%u: index=%u num=%u errno=%d sig=%u cover=%u comps=%u\n", + completed, th->call_index, th->call_num, reserrno, nsig, + cover_size, comps_size); + completed++; + __atomic_store_n(output_data, completed, __ATOMIC_RELEASE); + } + th->handled = true; + running--; +} + +void thread_create(thread_t* th, int id) +{ + th->created = true; + th->id = id; + th->done = true; + th->handled = true; + if (flag_threaded) { + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, 128 << 10); + if (pthread_create(&th->th, &attr, worker_thread, th)) + exitf("pthread_create failed"); + pthread_attr_destroy(&attr); + } +} + +void* worker_thread(void* arg) +{ + thread_t* th = (thread_t*)arg; + + cover_enable(th); + for (;;) { + while (!__atomic_load_n(&th->ready, __ATOMIC_ACQUIRE)) + syscall(SYS_futex, &th->ready, FUTEX_WAIT, 0, 0); + execute_call(th); + } + return 0; +} + +void execute_call(thread_t* th) +{ + th->ready = false; + call_t* call = &syscalls[th->call_num]; + debug("#%d: %s(", th->id, call->name); + for (int i = 0; i < th->num_args; i++) { + if (i != 0) + debug(", "); + debug("0x%lx", th->args[i]); + } + debug(")\n"); + + int fail_fd = -1; + if (flag_inject_fault && th->call_index == flag_fault_call) { + if (collide) + fail("both collide and fault injection are enabled"); + debug("injecting fault into %d-th operation\n", flag_fault_nth); + fail_fd = inject_fault(flag_fault_nth); + } + + cover_reset(th); + th->res = execute_syscall(call->sys_nr, th->args[0], th->args[1], + th->args[2], th->args[3], th->args[4], th->args[5], + th->args[6], th->args[7], th->args[8]); + th->reserrno = errno; + th->cover_size = read_cover_size(th); + th->fault_injected = false; + + if (flag_inject_fault && th->call_index == flag_fault_call) { + char buf[16]; + int n = read(fail_fd, buf, sizeof(buf) - 1); + if (n <= 0) + fail("failed to read /proc/self/task/tid/fail-nth"); + th->fault_injected = n == 2 && buf[0] == '0' && buf[1] == '\n'; + buf[0] = '0'; + if (write(fail_fd, buf, 1) != 1) + fail("failed to write /proc/self/task/tid/fail-nth"); + close(fail_fd); + debug("fault injected: %d\n", th->fault_injected); + } + + if (th->res == (uint32_t)-1) + debug("#%d: %s = errno(%d)\n", th->id, call->name, th->reserrno); + else + debug("#%d: %s = 0x%lx\n", th->id, call->name, th->res); + __atomic_store_n(&th->done, 1, __ATOMIC_RELEASE); + syscall(SYS_futex, &th->done, FUTEX_WAKE); +} + +void cover_open() +{ + if (!flag_cover) + return; + for (int i = 0; i < kMaxThreads; i++) { + thread_t* th = &threads[i]; + th->cover_fd = open("/sys/kernel/debug/kcov", O_RDWR); + if (th->cover_fd == -1) + fail("open of /sys/kernel/debug/kcov failed"); + if (ioctl(th->cover_fd, KCOV_INIT_TRACE, kCoverSize)) + fail("cover init trace write failed"); + size_t mmap_alloc_size = kCoverSize * sizeof(th->cover_data[0]); + uint64_t* mmap_ptr = (uint64_t*)mmap(NULL, mmap_alloc_size, + PROT_READ | PROT_WRITE, MAP_SHARED, th->cover_fd, 0); + if (mmap_ptr == MAP_FAILED) + fail("cover mmap failed"); + th->cover_size_ptr = mmap_ptr; + th->cover_data = &mmap_ptr[1]; + } +} + +void cover_enable(thread_t* th) +{ + if (!flag_cover) + return; + debug("#%d: enabling /sys/kernel/debug/kcov\n", th->id); + int kcov_mode = flag_collect_comps ? KCOV_TRACE_CMP : KCOV_TRACE_PC; + // This should be fatal, + // but in practice ioctl fails with assorted errors (9, 14, 25), + // so we use exitf. + if (ioctl(th->cover_fd, KCOV_ENABLE, kcov_mode)) + exitf("cover enable write trace failed, mode=%d", kcov_mode); + debug("#%d: enabled /sys/kernel/debug/kcov\n", th->id); +} + +void cover_reset(thread_t* th) +{ + if (!flag_cover) + return; + __atomic_store_n(th->cover_size_ptr, 0, __ATOMIC_RELAXED); +} + +uint64_t read_cover_size(thread_t* th) +{ + if (!flag_cover) + return 0; + uint64_t n = __atomic_load_n(th->cover_size_ptr, __ATOMIC_RELAXED); + debug("#%d: read cover size = %u\n", th->id, n); + if (n >= kCoverSize) + fail("#%d: too much cover %u", th->id, n); + return n; +} + +static uint32_t hash(uint32_t a) +{ + a = (a ^ 61) ^ (a >> 16); + a = a + (a << 3); + a = a ^ (a >> 4); + a = a * 0x27d4eb2d; + a = a ^ (a >> 15); + return a; +} + +const uint32_t dedup_table_size = 8 << 10; +uint32_t dedup_table[dedup_table_size]; + +// Poorman's best-effort hashmap-based deduplication. +// The hashmap is global which means that we deduplicate across different calls. +// This is OK because we are interested only in new signals. +static bool dedup(uint32_t sig) +{ + for (uint32_t i = 0; i < 4; i++) { + uint32_t pos = (sig + i) % dedup_table_size; + if (dedup_table[pos] == sig) + return true; + if (dedup_table[pos] == 0) { + dedup_table[pos] = sig; + return false; + } + } + dedup_table[sig % dedup_table_size] = sig; + return false; +} + +void copyin(char* addr, uint64_t val, uint64_t size, uint64_t bf_off, uint64_t bf_len) +{ + NONFAILING(switch (size) { + case 1: + STORE_BY_BITMASK(uint8_t, addr, val, bf_off, bf_len); + break; + case 2: + STORE_BY_BITMASK(uint16_t, addr, val, bf_off, bf_len); + break; + case 4: + STORE_BY_BITMASK(uint32_t, addr, val, bf_off, bf_len); + break; + case 8: + STORE_BY_BITMASK(uint64_t, addr, val, bf_off, bf_len); + break; + default: + fail("copyin: bad argument size %lu", size); + }); +} + +uint64_t copyout(char* addr, uint64_t size) +{ + uint64_t res = default_value; + NONFAILING(switch (size) { + case 1: + res = *(uint8_t*)addr; + break; + case 2: + res = *(uint16_t*)addr; + break; + case 4: + res = *(uint32_t*)addr; + break; + case 8: + res = *(uint64_t*)addr; + break; + default: + fail("copyout: bad argument size %lu", size); + }); + return res; +} + +uint64_t read_arg(uint64_t** input_posp) +{ + uint64_t typ = read_input(input_posp); + uint64_t size = read_input(input_posp); + (void)size; + uint64_t arg = 0; + switch (typ) { + case arg_const: { + arg = read_input(input_posp); + // Bitfields can't be args of a normal syscall, so just ignore them. + read_input(input_posp); // bit field offset + read_input(input_posp); // bit field length + break; + } + case arg_result: { + arg = read_result(input_posp); + break; + } + default: + fail("bad argument type %lu", typ); + } + return arg; +} + +uint64_t read_result(uint64_t** input_posp) +{ + uint64_t idx = read_input(input_posp); + uint64_t op_div = read_input(input_posp); + uint64_t op_add = read_input(input_posp); + if (idx >= kMaxCommands) + fail("command refers to bad result %ld", idx); + uint64_t arg = default_value; + if (results[idx].executed) { + arg = results[idx].val; + if (op_div != 0) + arg = arg / op_div; + arg += op_add; + } + return arg; +} + +uint64_t read_input(uint64_t** input_posp, bool peek) +{ + uint64_t* input_pos = *input_posp; + if ((char*)input_pos >= input_data + kMaxInput) + fail("input command overflows input"); + if (!peek) + *input_posp = input_pos + 1; + return *input_pos; +} + +uint32_t* write_output(uint32_t v) +{ + if (collide) + return 0; + if (output_pos < output_data || (char*)output_pos >= (char*)output_data + kMaxOutput) + fail("output overflow"); + *output_pos = v; + return output_pos++; +} -- cgit mrf-deployment