diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2024-04-25 13:51:51 +0200 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2024-05-02 16:24:59 +0000 |
| commit | 22ee48a2879809608f79cc23c914859fa2335d59 (patch) | |
| tree | aa2b496eb81823999a8b48be011575ee7c650fff /pkg/vminfo/linux_syscalls.go | |
| parent | 3c7bb2247f61c5218d4cd58a558dc2496fba53a4 (diff) | |
pkg/vminfo: check enabled syscalls on the host
Move the syscall checking logic to the host.
Diffing sets of disabled syscalls before/after this change
in different configurations (none/setuid sandboxes, amd64/386 arches,
large/small kernel configs) shows only some improvements/bug fixes.
1. socket$inet[6]_icmp are now enabled.
Previously they were disabled due to net.ipv4.ping_group_range sysctl
in the init namespace which prevented creation of ping sockets.
In the new net namespace the sysctl gets default value which allows creation.
2. get_thread_area and set_thread_area are now disabled on amd64.
They are available only in 32-bit mode, but they are present in /proc/kallsyms,
so we enabled them always.
3. socket$bt_{bnep, cmtp, hidp, rfcomm} are now disabled.
They cannot be created in non init net namespace.
bt_sock_create() checks init_net and returns EAFNOSUPPORT immediately.
This is a bug in descriptions we need to fix.
Now we see it due to more precise checks.
4. fstat64/fstatat64/lstat64/stat64 are now enabled in 32-bit mode.
They are not present in /proc/kallsyms as syscalls, so we have not enabled them.
But they are available in 32-bit mode.
5. 78 openat variants + 10 socket variants + mount are now disabled
with setuid sandbox. They are not permitted w/o root permissions,
but we ignored that. This additionally leads to 700 transitively
disabled syscalls.
In all cases checking in the actual executor context/sandbox
looks very positive, esp. for more restrictive sandboxes.
Android sandbox should benefit as well.
The additional benefit is full testability of the new code.
The change includes only a basic test that covers all checks,
and ensures the code does not crash/hang, all generated programs
parse successfully, etc. But it's possible to unit-test
every condition now.
The new version also parallelizes checking across VMs,
checking on a slow emulated qemu drops from 210 seconds
to 140 seconds.
Diffstat (limited to 'pkg/vminfo/linux_syscalls.go')
| -rw-r--r-- | pkg/vminfo/linux_syscalls.go | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/pkg/vminfo/linux_syscalls.go b/pkg/vminfo/linux_syscalls.go new file mode 100644 index 000000000..a11eb06b4 --- /dev/null +++ b/pkg/vminfo/linux_syscalls.go @@ -0,0 +1,345 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package vminfo + +import ( + "bytes" + "fmt" + "os" + "regexp" + "strconv" + "strings" + "syscall" + + "github.com/google/syzkaller/prog" + "github.com/google/syzkaller/sys/targets" +) + +func (linux) syscallCheck(ctx *checkContext, call *prog.Syscall) string { + check := linuxSyscallChecks[call.CallName] + if check == nil { + check = func(ctx *checkContext, call *prog.Syscall) string { + return ctx.supportedSyscalls([]string{call.Name}) + } + } + if reason := check(ctx, call); reason != "" { + return reason + } + return linuxSupportedLSM(ctx, call) +} + +func linuxSupportedLSM(ctx *checkContext, call *prog.Syscall) string { + for _, lsm := range []string{"selinux", "apparmor", "smack"} { + if !strings.Contains(strings.ToLower(call.Name), lsm) { + continue + } + data, err := ctx.readFile("/sys/kernel/security/lsm") + if err != nil { + // Securityfs may not be mounted, but it does not mean that no LSMs are enabled. + if os.IsNotExist(err) { + break + } + return err.Error() + } + if !bytes.Contains(data, []byte(lsm)) { + return fmt.Sprintf("%v is not enabled", lsm) + } + } + return "" +} + +var linuxSyscallChecks = map[string]func(*checkContext, *prog.Syscall) string{ + "openat": linuxSupportedOpenat, + "mount": linuxSupportedMount, + "socket": linuxSupportedSocket, + "socketpair": linuxSupportedSocket, + "pkey_alloc": linuxPkeysSupported, + "syz_open_dev": linuxSyzOpenDevSupported, + "syz_open_procfs": linuxSyzOpenProcfsSupported, + "syz_open_pts": alwaysSupported, + "syz_execute_func": alwaysSupported, + "syz_emit_ethernet": linuxNetInjectionSupported, + "syz_extract_tcp_res": linuxNetInjectionSupported, + "syz_usb_connect": linuxCheckUSBEmulation, + "syz_usb_connect_ath9k": linuxCheckUSBEmulation, + "syz_usb_disconnect": linuxCheckUSBEmulation, + "syz_usb_control_io": linuxCheckUSBEmulation, + "syz_usb_ep_write": linuxCheckUSBEmulation, + "syz_usb_ep_read": linuxCheckUSBEmulation, + "syz_kvm_setup_cpu": linuxSyzKvmSetupCPUSupported, + "syz_emit_vhci": linuxVhciInjectionSupported, + "syz_init_net_socket": linuxSyzInitNetSocketSupported, + "syz_genetlink_get_family_id": linuxSyzGenetlinkGetFamilyIDSupported, + "syz_mount_image": linuxSyzMountImageSupported, + "syz_read_part_table": linuxSyzReadPartTableSupported, + "syz_io_uring_setup": alwaysSupported, + "syz_io_uring_submit": alwaysSupported, + "syz_io_uring_complete": alwaysSupported, + "syz_memcpy_off": alwaysSupported, + "syz_btf_id_by_name": linuxBtfVmlinuxSupported, + "syz_fuse_handle_req": alwaysSupported, + "syz_80211_inject_frame": linuxWifiEmulationSupported, + "syz_80211_join_ibss": linuxWifiEmulationSupported, + "syz_usbip_server_init": linuxSyzUsbIPSupported, + "syz_clone": alwaysSupported, + "syz_clone3": alwaysSupported, + "syz_pkey_set": linuxPkeysSupported, + "syz_socket_connect_nvme_tcp": linuxSyzSocketConnectNvmeTCPSupported, + "syz_pidfd_open": alwaysSupported, +} + +func linuxSyzOpenDevSupported(ctx *checkContext, call *prog.Syscall) string { + if _, ok := call.Args[0].Type.(*prog.ConstType); ok { + // This is for syz_open_dev$char/block. + return "" + } + fname, ok := extractStringConst(call.Args[0].Type) + if !ok { + panic("first open arg is not a pointer to string const") + } + hashCount := strings.Count(fname, "#") + if hashCount == 0 { + panic(fmt.Sprintf("%v does not contain # in the file name", call.Name)) + } + if hashCount > 2 { + // If this fails, the logic below needs an adjustment. + panic(fmt.Sprintf("%v contains too many #", call.Name)) + } + var ids []int + if _, ok := call.Args[1].Type.(*prog.ProcType); ok { + ids = []int{0} + } else { + for i := 0; i < 5; i++ { + for j := 0; j < 5; j++ { + if j == 0 || hashCount > 1 { + ids = append(ids, i+j*10) + } + } + } + } + modes := ctx.allOpenModes() + var calls []string + for _, id := range ids { + for _, mode := range modes { + call := fmt.Sprintf("%s(&AUTO='%v', 0x%x, 0x%x)", call.Name, fname, id, mode) + calls = append(calls, call) + } + } + reason := ctx.anyCallSucceeds(calls, fmt.Sprintf("failed to open %v", fname)) + if reason != "" { + // These entries might not be available at boot time, + // but will be created by connected USB devices. + for _, prefix := range []string{"/dev/hidraw", "/dev/usb/hiddev", "/dev/input/"} { + if strings.HasPrefix(fname, prefix) { + // Note: ideally we use linuxSyzOpenDevSupported here, + // since we already issued test syscalls, we can't. + if _, err := ctx.readFile("/dev/raw-gadget"); !os.IsNotExist(err) { + reason = "" + } + } + } + } + return reason +} + +func linuxNetInjectionSupported(ctx *checkContext, call *prog.Syscall) string { + return ctx.rootCanOpen("/dev/net/tun") +} + +func linuxSyzOpenProcfsSupported(ctx *checkContext, call *prog.Syscall) string { + return ctx.canOpen("/proc/cmdline") +} + +func linuxCheckUSBEmulation(ctx *checkContext, call *prog.Syscall) string { + return ctx.rootCanOpen("/dev/raw-gadget") +} + +func linuxSyzKvmSetupCPUSupported(ctx *checkContext, call *prog.Syscall) string { + switch call.Name { + case "syz_kvm_setup_cpu$x86": + if ctx.target.Arch == targets.AMD64 || ctx.target.Arch == targets.I386 { + return "" + } + case "syz_kvm_setup_cpu$arm64": + if ctx.target.Arch == targets.ARM64 { + return "" + } + case "syz_kvm_setup_cpu$ppc64": + if ctx.target.Arch == targets.PPC64LE { + return "" + } + } + return "unsupported arch" +} + +func linuxSupportedOpenat(ctx *checkContext, call *prog.Syscall) string { + fname, ok := extractStringConst(call.Args[1].Type) + if !ok || fname[0] != '/' { + return "" + } + modes := ctx.allOpenModes() + // Attempt to extract flags from the syscall description. + if mode, ok := call.Args[2].Type.(*prog.ConstType); ok { + modes = []uint64{mode.Val} + } + var calls []string + for _, mode := range modes { + call := fmt.Sprintf("openat(0x%0x, &AUTO='%v', 0x%x, 0x0)", ctx.val("AT_FDCWD"), fname, mode) + calls = append(calls, call) + } + return ctx.anyCallSucceeds(calls, fmt.Sprintf("failed to open %v", fname)) +} + +func linuxSupportedMount(ctx *checkContext, call *prog.Syscall) string { + return linuxSupportedFilesystem(ctx, call, 2) +} + +func linuxSyzMountImageSupported(ctx *checkContext, call *prog.Syscall) string { + return linuxSupportedFilesystem(ctx, call, 0) +} + +func linuxSupportedFilesystem(ctx *checkContext, call *prog.Syscall, fsarg int) string { + fstype, ok := extractStringConst(call.Args[fsarg].Type) + if !ok { + panic(fmt.Sprintf("%v: filesystem is not string const", call.Name)) + } + switch fstype { + case "fuse", "fuseblk": + if reason := ctx.canOpen("/dev/fuse"); reason != "" { + return reason + } + if reason := ctx.onlySandboxNoneOrNamespace(); reason != "" { + return reason + } + default: + if reason := ctx.onlySandboxNone(); reason != "" { + return reason + } + } + filesystems, err := ctx.readFile("/proc/filesystems") + if err != nil { + return err.Error() + } + if !bytes.Contains(filesystems, []byte("\t"+fstype+"\n")) { + return fmt.Sprintf("/proc/filesystems does not contain %v", fstype) + } + return "" +} + +func linuxSyzReadPartTableSupported(ctx *checkContext, call *prog.Syscall) string { + return ctx.onlySandboxNone() +} + +func linuxSupportedSocket(ctx *checkContext, call *prog.Syscall) string { + if call.Name == "socket" || call.Name == "socketpair" { + return "" // generic versions are always supported + } + af := uint64(0) + if arg, ok := call.Args[0].Type.(*prog.ConstType); ok { + af = arg.Val + } else { + panic(fmt.Sprintf("socket family is not const in %v", call.Name)) + } + typ, hasType := uint64(0), false + if arg, ok := call.Args[1].Type.(*prog.ConstType); ok { + typ, hasType = arg.Val, true + } else if arg, ok := call.Args[1].Type.(*prog.FlagsType); ok { + typ, hasType = arg.Vals[0], true + } + proto, hasProto := uint64(0), false + if arg, ok := call.Args[2].Type.(*prog.ConstType); ok { + proto, hasProto = arg.Val, true + } + syscallName := call.Name + if call.CallName == "socketpair" { + syscallName = "socket" + } + callStr := fmt.Sprintf("%s(0x%x, 0x%x, 0x%x)", syscallName, af, typ, proto) + errno := ctx.execCall(callStr) + if errno == syscall.ENOSYS || errno == syscall.EAFNOSUPPORT || hasProto && hasType && errno != 0 { + return fmt.Sprintf("%v failed: %v", callStr, errno) + } + return "" +} + +func linuxSyzGenetlinkGetFamilyIDSupported(ctx *checkContext, call *prog.Syscall) string { + // TODO: try to obtain actual family ID here. It will disable whole sets of sendmsg syscalls. + return ctx.callSucceeds(fmt.Sprintf("socket(0x%x, 0x%x, 0x%x)", + ctx.val("AF_NETLINK"), ctx.val("SOCK_RAW"), ctx.val("NETLINK_GENERIC"))) +} + +func linuxPkeysSupported(ctx *checkContext, call *prog.Syscall) string { + return ctx.callSucceeds("pkey_alloc(0x0, 0x0)") +} + +func linuxSyzSocketConnectNvmeTCPSupported(ctx *checkContext, call *prog.Syscall) string { + return ctx.onlySandboxNone() +} + +func linuxVhciInjectionSupported(ctx *checkContext, call *prog.Syscall) string { + return ctx.rootCanOpen("/dev/vhci") +} + +func linuxSyzInitNetSocketSupported(ctx *checkContext, call *prog.Syscall) string { + if reason := ctx.onlySandboxNone(); reason != "" { + return reason + } + return linuxSupportedSocket(ctx, call) +} + +func linuxBtfVmlinuxSupported(ctx *checkContext, call *prog.Syscall) string { + if reason := ctx.onlySandboxNone(); reason != "" { + return reason + } + return ctx.canOpen("/sys/kernel/btf/vmlinux") +} + +func linuxSyzUsbIPSupported(ctx *checkContext, call *prog.Syscall) string { + return ctx.canWrite("/sys/devices/platform/vhci_hcd.0/attach") +} + +func linuxWifiEmulationSupported(ctx *checkContext, call *prog.Syscall) string { + if reason := ctx.rootCanOpen("/sys/class/mac80211_hwsim/"); reason != "" { + return reason + } + // We use HWSIM_ATTR_PERM_ADDR which was added in 4.17. + return linuxRequireKernel(ctx, 4, 17) +} + +func linuxRequireKernel(ctx *checkContext, major, minor int) string { + data, err := ctx.readFile("/proc/version") + if err != nil { + return err.Error() + } + if ok, bad := matchKernelVersion(string(data), major, minor); bad { + return fmt.Sprintf("failed to parse kernel version: %s", data) + } else if !ok { + return fmt.Sprintf("kernel %v.%v required, have %s", major, minor, data) + } + return "" +} + +var kernelVersionRe = regexp.MustCompile(` ([0-9]+)\.([0-9]+)\.`) + +func matchKernelVersion(ver string, x, y int) (bool, bool) { + match := kernelVersionRe.FindStringSubmatch(ver) + if match == nil { + return false, true + } + major, err := strconv.Atoi(match[1]) + if err != nil { + return false, true + } + if major <= 0 || major > 999 { + return false, true + } + minor, err := strconv.Atoi(match[2]) + if err != nil { + return false, true + } + if minor <= 0 || minor > 999 { + return false, true + } + return major*1000+minor >= x*1000+y, false +} |
