// Copyright 2016 syzkaller project authors. All rights reserved. // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. // This file is shared between executor and csource package. #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const int kFailStatus = 67; const int kErrorStatus = 68; const int kRetryStatus = 69; // logical error (e.g. invalid input program) __attribute__((noreturn)) void fail(const char* msg, ...) { int e = errno; fflush(stdout); va_list args; va_start(args, msg); vfprintf(stderr, msg, args); va_end(args); fprintf(stderr, " (errno %d)\n", e); exit(kFailStatus); } #if defined(SYZ_EXECUTOR) // kernel error (e.g. wrong syscall return value) __attribute__((noreturn)) void error(const char* msg, ...) { fflush(stdout); va_list args; va_start(args, msg); vfprintf(stderr, msg, args); va_end(args); fprintf(stderr, "\n"); exit(kErrorStatus); } #endif // just exit (e.g. due to temporal ENOMEM error) __attribute__((noreturn)) void exitf(const char* msg, ...) { int e = errno; fflush(stdout); va_list args; va_start(args, msg); vfprintf(stderr, msg, args); va_end(args); fprintf(stderr, " (errno %d)\n", e); exit(kRetryStatus); } static int flag_debug; void debug(const char* msg, ...) { if (!flag_debug) return; va_list args; va_start(args, msg); vfprintf(stdout, msg, args); va_end(args); fflush(stdout); } __thread int skip_segv; __thread jmp_buf segv_env; static void segv_handler(int sig, siginfo_t* info, void* uctx) { if (__atomic_load_n(&skip_segv, __ATOMIC_RELAXED)) _longjmp(segv_env, 1); exit(sig); } static void install_segv_handler() { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_sigaction = segv_handler; sa.sa_flags = SA_NODEFER | SA_SIGINFO; sigaction(SIGSEGV, &sa, NULL); sigaction(SIGBUS, &sa, NULL); } #define NONFAILING(...) \ { \ __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \ if (_setjmp(segv_env) == 0) { \ __VA_ARGS__; \ } \ __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \ } #ifdef __NR_syz_open_dev static uintptr_t syz_open_dev(uintptr_t a0, uintptr_t a1, uintptr_t a2) { if (a0 == 0xc || a0 == 0xb) { // syz_open_dev$char(dev const[0xc], major intptr, minor intptr) fd // syz_open_dev$block(dev const[0xb], major intptr, minor intptr) fd char buf[128]; sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8_t)a1, (uint8_t)a2); return open(buf, O_RDWR, 0); } else { // syz_open_dev(dev strconst, id intptr, flags flags[open_flags]) fd char buf[1024]; char* hash; strncpy(buf, (char*)a0, sizeof(buf)); buf[sizeof(buf) - 1] = 0; while ((hash = strchr(buf, '#'))) { *hash = '0' + (char)(a1 % 10); // 10 devices should be enough for everyone. a1 /= 10; } return open(buf, a2, 0); } } #endif #ifdef __NR_syz_open_pts static uintptr_t syz_open_pts(uintptr_t a0, uintptr_t a1) { // syz_openpts(fd fd[tty], flags flags[open_flags]) fd[tty] int ptyno = 0; if (ioctl(a0, TIOCGPTN, &ptyno)) return -1; char buf[128]; sprintf(buf, "/dev/pts/%d", ptyno); return open(buf, a1, 0); } #endif #ifdef __NR_syz_fuse_mount static uintptr_t syz_fuse_mount(uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5) { // syz_fuse_mount(target filename, mode flags[fuse_mode], uid uid, gid gid, maxread intptr, flags flags[mount_flags]) fd[fuse] uint64_t target = a0; uint64_t mode = a1; uint64_t uid = a2; uint64_t gid = a3; uint64_t maxread = a4; uint64_t flags = a5; int fd = open("/dev/fuse", O_RDWR); if (fd == -1) return fd; char buf[1024]; sprintf(buf, "fd=%d,user_id=%ld,group_id=%ld,rootmode=0%o", fd, (long)uid, (long)gid, (unsigned)mode & ~3u); if (maxread != 0) sprintf(buf + strlen(buf), ",max_read=%ld", (long)maxread); if (mode & 1) strcat(buf, ",default_permissions"); if (mode & 2) strcat(buf, ",allow_other"); syscall(SYS_mount, "", target, "fuse", flags, buf); // Ignore errors, maybe fuzzer can do something useful with fd alone. return fd; } #endif #ifdef __NR_syz_fuseblk_mount static uintptr_t syz_fuseblk_mount(uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7) { // syz_fuseblk_mount(target filename, blkdev filename, mode flags[fuse_mode], uid uid, gid gid, maxread intptr, blksize intptr, flags flags[mount_flags]) fd[fuse] uint64_t target = a0; uint64_t blkdev = a1; uint64_t mode = a2; uint64_t uid = a3; uint64_t gid = a4; uint64_t maxread = a5; uint64_t blksize = a6; uint64_t flags = a7; int fd = open("/dev/fuse", O_RDWR); if (fd == -1) return fd; if (syscall(SYS_mknodat, AT_FDCWD, blkdev, S_IFBLK, makedev(7, 199))) return fd; char buf[256]; sprintf(buf, "fd=%d,user_id=%ld,group_id=%ld,rootmode=0%o", fd, (long)uid, (long)gid, (unsigned)mode & ~3u); if (maxread != 0) sprintf(buf + strlen(buf), ",max_read=%ld", (long)maxread); if (blksize != 0) sprintf(buf + strlen(buf), ",blksize=%ld", (long)blksize); if (mode & 1) strcat(buf, ",default_permissions"); if (mode & 2) strcat(buf, ",allow_other"); syscall(SYS_mount, blkdev, target, "fuseblk", flags, buf); // Ignore errors, maybe fuzzer can do something useful with fd alone. return fd; } #endif static uintptr_t execute_syscall(int nr, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7, uintptr_t a8) { switch (nr) { default: return syscall(nr, a0, a1, a2, a3, a4, a5); #ifdef __NR_syz_test case __NR_syz_test: return 0; #endif #ifdef __NR_syz_open_dev case __NR_syz_open_dev: return syz_open_dev(a0, a1, a2); #endif #ifdef __NR_syz_open_pts case __NR_syz_open_pts: return syz_open_pts(a0, a1); #endif #ifdef __NR_syz_fuse_mount case __NR_syz_fuse_mount: return syz_fuse_mount(a0, a1, a2, a3, a4, a5); #endif #ifdef __NR_syz_fuseblk_mount case __NR_syz_fuseblk_mount: return syz_fuseblk_mount(a0, a1, a2, a3, a4, a5, a6, a7); #endif } } static void setup_main_process() { // Don't need that SIGCANCEL/SIGSETXID glibc stuff. // SIGCANCEL sent to main thread causes it to exit // without bringing down the whole group. struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = SIG_IGN; syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8); syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8); install_segv_handler(); char tmpdir_template[] = "./syzkaller.XXXXXX"; char* tmpdir = mkdtemp(tmpdir_template); if (!tmpdir) fail("failed to mkdtemp"); if (chmod(tmpdir, 0777)) fail("failed to chmod"); if (chdir(tmpdir)) fail("failed to chdir"); } static void loop(); static void sandbox_common() { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setpgrp(); setsid(); struct rlimit rlim; rlim.rlim_cur = rlim.rlim_max = 128 << 20; setrlimit(RLIMIT_AS, &rlim); rlim.rlim_cur = rlim.rlim_max = 1 << 20; setrlimit(RLIMIT_FSIZE, &rlim); rlim.rlim_cur = rlim.rlim_max = 1 << 20; setrlimit(RLIMIT_STACK, &rlim); rlim.rlim_cur = rlim.rlim_max = 0; setrlimit(RLIMIT_CORE, &rlim); // CLONE_NEWIPC/CLONE_IO cause EINVAL on some systems, so we do them separately of clone. unshare(CLONE_NEWNS); unshare(CLONE_NEWIPC); unshare(CLONE_IO); } #if defined(SYZ_EXECUTOR) || defined(SYZ_SANDBOX_NONE) static int do_sandbox_none() { int pid = fork(); if (pid) return pid; sandbox_common(); loop(); exit(1); } #endif #if defined(SYZ_EXECUTOR) || defined(SYZ_SANDBOX_SETUID) static int do_sandbox_setuid() { int pid = fork(); if (pid) return pid; sandbox_common(); const int nobody = 65534; if (setgroups(0, NULL)) fail("failed to setgroups"); if (syscall(SYS_setresgid, nobody, nobody, nobody)) fail("failed to setresgid"); if (syscall(SYS_setresuid, nobody, nobody, nobody)) fail("failed to setresuid"); loop(); exit(1); } #endif #if defined(SYZ_EXECUTOR) || defined(SYZ_SANDBOX_NAMESPACE) static int real_uid; static int real_gid; static char sandbox_stack[1 << 20]; static bool write_file(const char* file, const char* what, ...) { char buf[1024]; va_list args; va_start(args, what); vsnprintf(buf, sizeof(buf), what, args); va_end(args); buf[sizeof(buf) - 1] = 0; int len = strlen(buf); int fd = open(file, O_WRONLY | O_CLOEXEC); if (fd == -1) return false; if (write(fd, buf, len) != len) { close(fd); return false; } close(fd); return true; } static int namespace_sandbox_proc(void* arg) { sandbox_common(); // /proc/self/setgroups is not present on some systems, ignore error. write_file("/proc/self/setgroups", "deny"); if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid)) fail("write of /proc/self/uid_map failed"); if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid)) fail("write of /proc/self/gid_map failed"); if (mkdir("./syz-tmp", 0777)) fail("mkdir(syz-tmp) failed"); if (mount("", "./syz-tmp", "tmpfs", 0, NULL)) fail("mount(tmpfs) failed"); if (mkdir("./syz-tmp/newroot", 0777)) fail("mkdir failed"); if (mkdir("./syz-tmp/newroot/dev", 0700)) fail("mkdir failed"); if (mount("/dev", "./syz-tmp/newroot/dev", NULL, MS_BIND | MS_REC | MS_PRIVATE, NULL)) fail("mount(dev) failed"); if (mkdir("./syz-tmp/pivot", 0777)) fail("mkdir failed"); if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) { debug("pivot_root failed"); if (chdir("./syz-tmp")) fail("chdir failed"); } else { if (chdir("/")) fail("chdir failed"); if (umount2("./pivot", MNT_DETACH)) fail("umount failed"); } if (chroot("./newroot")) fail("chroot failed"); if (chdir("/")) fail("chdir failed"); // Drop CAP_SYS_PTRACE so that test processes can't attach to parent processes. // Previously it lead to hangs because the loop process stopped due to SIGSTOP. // Note that a process can always ptrace its direct children, which is enough // for testing purposes. __user_cap_header_struct cap_hdr = {}; __user_cap_data_struct cap_data[2] = {}; cap_hdr.version = _LINUX_CAPABILITY_VERSION_3; cap_hdr.pid = getpid(); if (syscall(SYS_capget, &cap_hdr, &cap_data)) fail("capget failed"); cap_data[0].effective &= ~(1 << CAP_SYS_PTRACE); cap_data[0].permitted &= ~(1 << CAP_SYS_PTRACE); cap_data[0].inheritable &= ~(1 << CAP_SYS_PTRACE); if (syscall(SYS_capset, &cap_hdr, &cap_data)) fail("capset failed"); loop(); exit(1); } static int do_sandbox_namespace() { real_uid = getuid(); real_gid = getgid(); return clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 8], CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWUTS | CLONE_NEWNET, NULL); } #endif // One does not simply remove a directory. // There can be mounts, so we need to try to umount. // Moreover, a mount can be mounted several times, so we need to try to umount in a loop. // Moreover, after umount a dir can become non-empty again, so we need another loop. // Moreover, a mount can be re-mounted as read-only and then we will fail to make a dir empty. static void remove_dir(const char* dir) { DIR* dp; struct dirent* ep; int iter = 0; retry: dp = opendir(dir); if (dp == NULL) { if (errno == EMFILE) { // This happens when the test process casts prlimit(NOFILE) on us. // Ideally we somehow prevent test processes from messing with parent processes. // But full sandboxing is expensive, so let's ignore this error for now. exitf("opendir(%s) failed due to NOFILE, exiting"); } exitf("opendir(%s) failed", dir); } while ((ep = readdir(dp))) { if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0) continue; char filename[FILENAME_MAX]; snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name); struct stat st; if (lstat(filename, &st)) exitf("lstat(%s) failed", filename); if (S_ISDIR(st.st_mode)) { remove_dir(filename); continue; } int i; for (i = 0;; i++) { debug("unlink(%s)\n", filename); if (unlink(filename) == 0) break; if (errno == EROFS) { debug("ignoring EROFS\n"); break; } if (errno != EBUSY || i > 100) exitf("unlink(%s) failed", filename); debug("umount(%s)\n", filename); if (umount2(filename, MNT_DETACH)) exitf("umount(%s) failed", filename); } } closedir(dp); int i; for (i = 0;; i++) { debug("rmdir(%s)\n", dir); if (rmdir(dir) == 0) break; if (i < 100) { if (errno == EROFS) { debug("ignoring EROFS\n"); break; } if (errno == EBUSY) { debug("umount(%s)\n", dir); if (umount2(dir, MNT_DETACH)) exitf("umount(%s) failed", dir); continue; } if (errno == ENOTEMPTY) { if (iter < 100) { iter++; goto retry; } } } exitf("rmdir(%s) failed", dir); } } #if defined(SYZ_EXECUTOR) || defined(SYZ_REPEAT) static uint64_t current_time_ms() { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts)) fail("clock_gettime failed"); return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; } #endif #if defined(SYZ_REPEAT) static void test(); void loop() { int iter; for (iter = 0;; iter++) { char cwdbuf[256]; sprintf(cwdbuf, "./%d", iter); if (mkdir(cwdbuf, 0777)) fail("failed to mkdir"); int pid = fork(); if (pid < 0) fail("clone failed"); if (pid == 0) { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setpgrp(); if (chdir(cwdbuf)) fail("failed to chdir"); test(); exit(0); } int status = 0; uint64_t start = current_time_ms(); for (;;) { int res = waitpid(pid, &status, __WALL | WNOHANG); int errno0 = errno; if (res == pid) break; usleep(1000); if (current_time_ms() - start > 5 * 1000) { kill(-pid, SIGKILL); kill(pid, SIGKILL); waitpid(pid, &status, __WALL); break; } } remove_dir(cwdbuf); } } #endif