From d6f3385b1a3f2fba8e14d6794bece1dcdd9e479e Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Wed, 12 Jan 2022 14:40:51 +0000 Subject: all: add syz_clone() and syz_clone3() pseudo calls As was pointed out in #2921, the current approach of limiting the number of pids per process does not work on all Linux-based kernels. We could just treat fork, clone and clone3 in a special way (e.g. exit on a zero return). However, in that case we also need to sanitize the arguments for clone and clone3 - if CLONE_VM is passed and stack is 0, the forked child processes (threads) will become nearly unindentifiable and will corrupt syz-executor's memory. While we could sanitize clone's arguments, we cannot do so for clone3 - nothing can guarantee that they will not be changed concurrently. Instead of calling those syscalls directly, introduce a special pseudo syscall syz_clone3. It copies and sanitizes the arguments and then executes clone3 (or fork, if we're on an older kernel) in such a way so as to prevent fork bombs from happening. Also introduce syz_clone() to still be able to fuzz it on older systems. --- executor/common_linux.h | 59 ++++++++++++++++++++++++++++++++++++++++++ pkg/csource/csource.go | 1 + pkg/csource/generated.go | 47 +++++++++++++++++++++++++++++++++ pkg/host/syscalls_linux.go | 2 ++ sys/linux/sys.txt | 11 +++++--- sys/linux/test/landlock_ptrace | 7 ++--- sys/linux/test/syz_clone | 1 + sys/linux/test/syz_clone3 | 3 +++ sys/targets/targets.go | 8 +++--- 9 files changed, 128 insertions(+), 11 deletions(-) create mode 100644 sys/linux/test/syz_clone create mode 100644 sys/linux/test/syz_clone3 diff --git a/executor/common_linux.h b/executor/common_linux.h index 479c229dc..520c13506 100644 --- a/executor/common_linux.h +++ b/executor/common_linux.h @@ -5215,3 +5215,62 @@ static long syz_80211_join_ibss(volatile long a0, volatile long a1, volatile lon } #endif + +#if SYZ_EXECUTOR || __NR_syz_clone || __NR_syz_clone3 +#if SYZ_EXECUTOR +// The slowdown multiplier is already taken into account. +#define USLEEP_FORKED_CHILD (3 * syscall_timeout_ms * 1000) +#else +#define USLEEP_FORKED_CHILD (3 * /*{{{BASE_CALL_TIMEOUT_MS}}}*/ *1000) +#endif + +static long handle_clone_ret(long ret) +{ + if (ret != 0) + return ret; + // Exit if we're in the child process - not all kernels provide the proper means + // to prevent fork-bombs. + // But first sleep for some time. This will hopefully foster IPC fuzzing. + usleep(USLEEP_FORKED_CHILD); + // Note that exit_group is a bad choice here because if we created just a thread, then + // the whole process will be killed. A plain exit will work fine in any case. + syscall(__NR_exit, 0); + while (1) { + } +} +#endif + +#if SYZ_EXECUTOR || __NR_syz_clone +// syz_clone is mostly needed on kernels which do not suport clone3. +static long syz_clone(volatile long flags, volatile long stack, volatile long stack_len, + volatile long ptid, volatile long ctid, volatile long tls) +{ + // ABI requires 16-byte stack alignment. + long sp = (stack + stack_len) & ~15; + // Clear the CLONE_VM flag. Otherwise it'll very likely corrupt syz-executor. + long ret = (long)syscall(__NR_clone, flags & ~CLONE_VM, sp, ptid, ctid, tls); + return handle_clone_ret(ret); +} +#endif + +#if SYZ_EXECUTOR || __NR_syz_clone3 +#include +#include + +#define MAX_CLONE_ARGS_BYTES 256 +static long syz_clone3(volatile long a0, volatile long a1) +{ + unsigned long copy_size = a1; + if (copy_size < sizeof(uint64) || copy_size > MAX_CLONE_ARGS_BYTES) + return -1; + // The structure may have different sizes on different kernel versions, so copy it as raw bytes. + char clone_args[MAX_CLONE_ARGS_BYTES]; + memcpy(&clone_args, (void*)a0, copy_size); + + // As in syz_clone, clear the CLONE_VM flag. Flags are in the first 8-byte integer field. + uint64* flags = (uint64*)&clone_args; + *flags &= ~CLONE_VM; + return handle_clone_ret((long)syscall(__NR_clone3, &clone_args, copy_size)); +} + +#endif diff --git a/pkg/csource/csource.go b/pkg/csource/csource.go index 1ba433f45..9c820a2e8 100644 --- a/pkg/csource/csource.go +++ b/pkg/csource/csource.go @@ -116,6 +116,7 @@ func (ctx *context) generateSource() ([]byte, error) { timeouts := ctx.sysTarget.Timeouts(ctx.opts.Slowdown) replacements["PROGRAM_TIMEOUT_MS"] = fmt.Sprint(int(timeouts.Program / time.Millisecond)) timeoutExpr := fmt.Sprint(int(timeouts.Syscall / time.Millisecond)) + replacements["BASE_CALL_TIMEOUT_MS"] = timeoutExpr for i, call := range ctx.p.Calls { if timeout := call.Meta.Attrs.Timeout; timeout != 0 { timeoutExpr += fmt.Sprintf(" + (call == %v ? %v : 0)", i, timeout*uint64(timeouts.Scale)) diff --git a/pkg/csource/generated.go b/pkg/csource/generated.go index 7c3b35ca5..c535f04ed 100644 --- a/pkg/csource/generated.go +++ b/pkg/csource/generated.go @@ -10173,6 +10173,53 @@ static long syz_80211_join_ibss(volatile long a0, volatile long a1, volatile lon #endif +#if SYZ_EXECUTOR || __NR_syz_clone || __NR_syz_clone3 +#if SYZ_EXECUTOR +#define USLEEP_FORKED_CHILD (3 * syscall_timeout_ms * 1000) +#else +#define USLEEP_FORKED_CHILD (3 * /*{{{BASE_CALL_TIMEOUT_MS}}}*/ *1000) +#endif + +static long handle_clone_ret(long ret) +{ + if (ret != 0) + return ret; + usleep(USLEEP_FORKED_CHILD); + syscall(__NR_exit, 0); + while (1) { + } +} +#endif + +#if SYZ_EXECUTOR || __NR_syz_clone +static long syz_clone(volatile long flags, volatile long stack, volatile long stack_len, + volatile long ptid, volatile long ctid, volatile long tls) +{ + long sp = (stack + stack_len) & ~15; + long ret = (long)syscall(__NR_clone, flags & ~CLONE_VM, sp, ptid, ctid, tls); + return handle_clone_ret(ret); +} +#endif + +#if SYZ_EXECUTOR || __NR_syz_clone3 +#include +#include + +#define MAX_CLONE_ARGS_BYTES 256 +static long syz_clone3(volatile long a0, volatile long a1) +{ + unsigned long copy_size = a1; + if (copy_size < sizeof(uint64) || copy_size > MAX_CLONE_ARGS_BYTES) + return -1; + char clone_args[MAX_CLONE_ARGS_BYTES]; + memcpy(&clone_args, (void*)a0, copy_size); + uint64* flags = (uint64*)&clone_args; + *flags &= ~CLONE_VM; + return handle_clone_ret((long)syscall(__NR_clone3, &clone_args, copy_size)); +} + +#endif + #elif GOOS_test #include diff --git a/pkg/host/syscalls_linux.go b/pkg/host/syscalls_linux.go index f43f1ada3..c628d20ab 100644 --- a/pkg/host/syscalls_linux.go +++ b/pkg/host/syscalls_linux.go @@ -316,6 +316,8 @@ var syzkallSupport = map[string]func(*prog.Syscall, *prog.Target, string) (bool, "syz_80211_inject_frame": isWifiEmulationSupported, "syz_80211_join_ibss": isWifiEmulationSupported, "syz_usbip_server_init": isSyzUsbIPSupported, + "syz_clone": alwaysSupported, + "syz_clone3": alwaysSupported, } func isSupportedSyzkall(c *prog.Syscall, target *prog.Target, sandbox string) (bool, string) { diff --git a/sys/linux/sys.txt b/sys/linux/sys.txt index fd934f976..8664e09a8 100644 --- a/sys/linux/sys.txt +++ b/sys/linux/sys.txt @@ -293,9 +293,14 @@ utimes(filename ptr[in, filename], times ptr[in, itimerval]) futimesat(dir fd_dir, pathname ptr[in, filename], times ptr[in, itimerval]) utimensat(dir fd_dir, pathname ptr[in, filename], times ptr[in, itimerval], flags flags[utimensat_flags]) -fork() pid (breaks_returns) -clone(flags flags[clone_flags], sp buffer[in], parentid ptr[out, int32], childtid ptr[out, int32], tls buffer[in]) (breaks_returns) -clone3(args ptr[in, clone_args], size bytesize[args]) pid (breaks_returns) +# Small trick - syzkaller cannot give the proper stack pointer to clone(), but we can do it with the aid of pseudo syscalls. +syz_clone(flags flags[clone_flags], stack buffer[in], stack_len bytesize[stack], parentid ptr[out, int32], childtid ptr[out, int32], tls buffer[in]) pid + +syz_clone3(args ptr[in, clone_args], size bytesize[args]) pid + +# We need these disabled definitions to simplify the presence and the NR checking. +clone(flags flags[clone_flags], sp buffer[in], parentid ptr[out, int32], childtid ptr[out, int32], tls buffer[in]) (breaks_returns, disabled) +clone3(args ptr[in, clone_args], size bytesize[args]) pid (breaks_returns, disabled) clone_args { flags flags[clone3_flags, int64] diff --git a/sys/linux/test/landlock_ptrace b/sys/linux/test/landlock_ptrace index e3037386d..ad63f3e5c 100644 --- a/sys/linux/test/landlock_ptrace +++ b/sys/linux/test/landlock_ptrace @@ -1,13 +1,10 @@ # Creates independent Landlock hierarchies and try different tracer/tracee # schemas (without scheduling control). -# -# fork() is not available for the following architectures: -# requires: -arch=arm64 -arch=riscv64 capset(&AUTO={0x20080522, 0x0}, &AUTO={0x0, 0x0, 0x0, 0x0, 0x0, 0x0}) prctl$PR_SET_NO_NEW_PRIVS(0x26, 0x1) -r0 = fork() +r0 = syz_clone(0x11, 0x0, 0x0, 0x0, 0x0, 0x0) # PTRACE_ATTACH and PTRACE_DETACH @@ -17,7 +14,7 @@ ptrace(0x11, r0) r1 = landlock_create_ruleset(&AUTO={0x100}, AUTO, 0x0) landlock_restrict_self(r1, 0x0) -r2 = fork() +r2 = syz_clone(0x11, 0x0, 0x0, 0x0, 0x0, 0x0) ptrace(0x10, r0) ptrace(0x11, r0) diff --git a/sys/linux/test/syz_clone b/sys/linux/test/syz_clone new file mode 100644 index 000000000..23022429c --- /dev/null +++ b/sys/linux/test/syz_clone @@ -0,0 +1 @@ +syz_clone(0x11, 0x0, 0x0, 0x0, 0x0, 0x0) diff --git a/sys/linux/test/syz_clone3 b/sys/linux/test/syz_clone3 new file mode 100644 index 000000000..5539d7f86 --- /dev/null +++ b/sys/linux/test/syz_clone3 @@ -0,0 +1,3 @@ +# requires: arch=amd64 + +syz_clone3(&AUTO={0x11, 0x0, 0x0, 0x0, {0x11}, 0x0, 0x0, 0x0, 0x0, 0x0, {0x0}}, AUTO) diff --git a/sys/targets/targets.go b/sys/targets/targets.go index a975ad27a..7255f7963 100644 --- a/sys/targets/targets.go +++ b/sys/targets/targets.go @@ -458,9 +458,11 @@ var oses = map[string]osCommon{ ExecutorUsesForkServer: true, KernelObject: "vmlinux", PseudoSyscallDeps: map[string][]string{ - "syz_read_part_table": []string{"memfd_create"}, - "syz_mount_image": []string{"memfd_create"}, - "syz_io_uring_setup": []string{"io_uring_setup"}, + "syz_read_part_table": {"memfd_create"}, + "syz_mount_image": {"memfd_create"}, + "syz_io_uring_setup": {"io_uring_setup"}, + "syz_clone3": {"clone3", "exit"}, + "syz_clone": {"clone", "exit"}, }, cflags: []string{"-static-pie"}, }, -- cgit mrf-deployment