From 874c5754bb22dbf77d6b600ff91f0f4f1fc5073a Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 12 Oct 2015 10:16:57 +0200 Subject: initial commit --- prog/analysis.go | 245 +++++++++++++++++++ prog/clone.go | 49 ++++ prog/consts.go | 598 ++++++++++++++++++++++++++++++++++++++++++++++ prog/encoding.go | 411 ++++++++++++++++++++++++++++++++ prog/encodingc.go | 114 +++++++++ prog/encodingexec.go | 172 ++++++++++++++ prog/generation.go | 29 +++ prog/mutation.go | 377 +++++++++++++++++++++++++++++ prog/mutation_test.go | 268 +++++++++++++++++++++ prog/prog.go | 143 +++++++++++ prog/prog_test.go | 77 ++++++ prog/rand.go | 638 ++++++++++++++++++++++++++++++++++++++++++++++++++ prog/validation.go | 168 +++++++++++++ 13 files changed, 3289 insertions(+) create mode 100644 prog/analysis.go create mode 100644 prog/clone.go create mode 100644 prog/consts.go create mode 100644 prog/encoding.go create mode 100644 prog/encodingc.go create mode 100644 prog/encodingexec.go create mode 100644 prog/generation.go create mode 100644 prog/mutation.go create mode 100644 prog/mutation_test.go create mode 100644 prog/prog.go create mode 100644 prog/prog_test.go create mode 100644 prog/rand.go create mode 100644 prog/validation.go (limited to 'prog') diff --git a/prog/analysis.go b/prog/analysis.go new file mode 100644 index 000000000..9a19d8264 --- /dev/null +++ b/prog/analysis.go @@ -0,0 +1,245 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// Conservative resource-related analysis of programs. +// The analysis figures out what files descriptors are [potentially] opened +// at a particular point in program, what pages are [potentially] mapped, +// what files were already referenced in calls, etc. + +package prog + +import ( + "fmt" + + "github.com/google/syzkaller/sys" +) + +const ( + maxPages = 4 << 10 +) + +type state struct { + enabledCalls []*sys.Call + files map[string]bool + resources map[sys.ResourceKind]map[sys.ResourceSubkind][]*Arg + strings map[string]bool + pages [maxPages]bool +} + +// analyze analyzes the program p up to but not including call c. +func analyze(enabledCalls []*sys.Call, p *Prog, c *Call) *state { + s := newState(enabledCalls) + for _, c1 := range p.Calls { + if c1 == c { + break + } + s.analyze(c1) + } + return s +} + +func newState(enabledCalls []*sys.Call) *state { + s := &state{ + enabledCalls: enabledCalls, + files: make(map[string]bool), + resources: make(map[sys.ResourceKind]map[sys.ResourceSubkind][]*Arg), + strings: make(map[string]bool), + } + if len(s.enabledCalls) == 0 { + s.enabledCalls = sys.Calls + } + return s +} + +func (s *state) analyze(c *Call) { + foreachArgArray(&c.Args, c.Ret, func(arg, base *Arg, _ *[]*Arg) { + switch typ := arg.Type.(type) { + case sys.FilenameType: + if arg.Kind == ArgData && arg.Dir != DirOut { + s.files[string(arg.Data)] = true + } + case sys.ResourceType: + if arg.Dir != DirIn { + if s.resources[typ.Kind] == nil { + s.resources[typ.Kind] = make(map[sys.ResourceSubkind][]*Arg) + } + s.resources[typ.Kind][typ.Subkind] = append(s.resources[typ.Kind][typ.Subkind], arg) + } + case sys.BufferType: + if typ.Kind == sys.BufferString && arg.Kind == ArgData && len(arg.Data) != 0 { + s.strings[string(arg.Data)] = true + } + } + }) + switch c.Meta.Name { + case "mmap": + // Filter out only very wrong arguments. + length := c.Args[1] + if length.AddrPage == 0 && length.AddrOffset == 0 { + break + } + if flags, fd := c.Args[4], c.Args[3]; flags.Val&MAP_ANONYMOUS == 0 && fd.Kind == ArgConst && fd.Val == sys.InvalidFD { + break + } + s.addressable(c.Args[0], length, true) + case "munmap": + s.addressable(c.Args[0], c.Args[1], false) + case "mremap": + s.addressable(c.Args[4], c.Args[2], true) + } +} + +func (s *state) addressable(addr, size *Arg, ok bool) { + if addr.Kind != ArgPointer || size.Kind != ArgPageSize { + panic("mmap/munmap/mremap args are not pages") + } + n := size.AddrPage + if size.AddrOffset != 0 { + n++ + } + if addr.AddrPage+n > uintptr(len(s.pages)) { + panic(fmt.Sprintf("address is out of bounds: page=%v len=%v (%v, %v) bound=%v", addr.AddrPage, n, size.AddrPage, size.AddrOffset, len(s.pages))) + } + for i := uintptr(0); i < n; i++ { + s.pages[addr.AddrPage+i] = ok + } +} + +func foreachArgArray(args *[]*Arg, ret *Arg, f func(arg, base *Arg, parent *[]*Arg)) { + var rec func(arg, base *Arg, parent *[]*Arg) + rec = func(arg, base *Arg, parent *[]*Arg) { + f(arg, base, parent) + for _, arg1 := range arg.Inner { + parent1 := parent + if _, ok := arg.Type.(sys.StructType); ok { + parent1 = &arg.Inner + } + rec(arg1, base, parent1) + } + if arg.Kind == ArgPointer && arg.Res != nil { + rec(arg.Res, arg, parent) + } + } + for _, arg := range *args { + rec(arg, nil, args) + } + if ret != nil { + rec(ret, nil, nil) + } +} + +func foreachArg(c *Call, f func(arg, base *Arg, parent *[]*Arg)) { + foreachArgArray(&c.Args, nil, f) +} + +func referencedArgs(args []*Arg, ret *Arg) (res []*Arg) { + f := func(arg, _ *Arg, _ *[]*Arg) { + for arg1 := range arg.Uses { + if arg1.Kind != ArgResult { + panic("use references not ArgResult") + } + res = append(res, arg1) + } + } + foreachArgArray(&args, ret, f) + return +} + +func assignTypeAndDir(c *Call) { + var rec func(arg *Arg, typ sys.Type, dir ArgDir) + rec = func(arg *Arg, typ sys.Type, dir ArgDir) { + if arg.Call != nil && arg.Call != c { + panic(fmt.Sprintf("different call is already assigned: %p %p %v %v", arg.Call, c, arg.Call.Meta.Name, c.Meta.Name)) + } + arg.Call = c + if arg.Type != nil && arg.Type.Name() != typ.Name() { + panic("different type is already assigned") + } + arg.Type = typ + switch arg.Kind { + case ArgPointer: + arg.Dir = DirIn + switch typ1 := typ.(type) { + case sys.FilenameType: + rec(arg.Res, typ, dir) + case sys.PtrType: + if arg.Res != nil { + rec(arg.Res, typ1.Type, ArgDir(typ1.Dir)) + } + } + case ArgGroup: + arg.Dir = dir + switch typ1 := typ.(type) { + case sys.StructType: + for i, arg1 := range arg.Inner { + rec(arg1, typ1.Fields[i], dir) + } + case sys.ArrayType: + for _, arg1 := range arg.Inner { + rec(arg1, typ1.Type, dir) + } + } + default: + arg.Dir = dir + } + } + for i, arg := range c.Args { + rec(arg, c.Meta.Args[i], DirIn) + } + if c.Ret == nil { + c.Ret = returnArg() + c.Ret.Call = c + c.Ret.Type = c.Meta.Ret + c.Ret.Dir = DirOut + } +} + +func sanitizeCall(c *Call) { + switch c.Meta.Name { + case "mmap": + // Add MAP_FIXED flag, otherwise it produces non-deterministic results. + addr := c.Args[0] + if addr.Kind != ArgPointer { + panic("mmap address is not ArgPointer") + } + length := c.Args[1] + if length.Kind != ArgPageSize { + panic("mmap length is not ArgPageSize") + } + flags := c.Args[3] + if flags.Kind != ArgConst { + panic("mmap flag arg is not const") + } + flags.Val |= MAP_FIXED + case "mremap": + // Add MREMAP_FIXED flag, otherwise it produces non-deterministic results. + flags := c.Args[3] + if flags.Kind != ArgConst { + panic("mremap flag arg is not const") + } + if flags.Val&MREMAP_MAYMOVE != 0 { + flags.Val |= MREMAP_FIXED + } + case "mknod": + mode := c.Args[1] + if mode.Kind != ArgConst { + panic("mknod mode is not const") + } + // Char and block devices read/write io ports, kernel memory and do other nasty things. + if mode.Val != S_IFREG && mode.Val != S_IFIFO && mode.Val != S_IFSOCK { + mode.Val = S_IFIFO + } + case "syslog": + cmd := c.Args[0] + // These disable console output, but we need it. + if cmd.Val == SYSLOG_ACTION_CONSOLE_OFF || cmd.Val == SYSLOG_ACTION_CONSOLE_ON { + cmd.Val = SYSLOG_ACTION_SIZE_UNREAD + } + case "exit", "exit_group": + code := c.Args[0] + // These codes are reserved by executor. + if code.Val%128 == 67 || code.Val%128 == 68 { + code.Val = 1 + } + } +} diff --git a/prog/clone.go b/prog/clone.go new file mode 100644 index 000000000..dd8e6e4db --- /dev/null +++ b/prog/clone.go @@ -0,0 +1,49 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package prog + +func (p *Prog) Clone() *Prog { + p1 := new(Prog) + newargs := make(map[*Arg]*Arg) + for _, c := range p.Calls { + c1 := new(Call) + c1.Meta = c.Meta + c1.Ret = c.Ret.clone(c1, newargs) + for _, arg := range c.Args { + c1.Args = append(c1.Args, arg.clone(c1, newargs)) + } + p1.Calls = append(p1.Calls, c1) + } + if err := p1.validate(); err != nil { + panic(err) + } + return p1 +} + +func (arg *Arg) clone(c *Call, newargs map[*Arg]*Arg) *Arg { + arg1 := new(Arg) + *arg1 = *arg + arg1.Call = c + arg1.Data = append([]byte{}, arg.Data...) + switch arg.Kind { + case ArgPointer: + if arg.Res != nil { + arg1.Res = arg.Res.clone(c, newargs) + } + case ArgResult: + r := newargs[arg.Res] + arg1.Res = r + if r.Uses == nil { + r.Uses = make(map[*Arg]bool) + } + r.Uses[arg1] = true + } + arg1.Inner = nil + for _, arg2 := range arg.Inner { + arg1.Inner = append(arg1.Inner, arg2.clone(c, newargs)) + } + arg1.Uses = nil // filled when we clone the referent + newargs[arg] = arg1 + return arg1 +} diff --git a/prog/consts.go b/prog/consts.go new file mode 100644 index 000000000..a83952d0e --- /dev/null +++ b/prog/consts.go @@ -0,0 +1,598 @@ +// AUTOGENERATED FILE +package prog + +const ( + ADDR_COMPAT_LAYOUT = 2097152 + ADDR_LIMIT_32BIT = 8388608 + ADDR_LIMIT_3GB = 134217728 + ADDR_NO_RANDOMIZE = 262144 + AF_APPLETALK = 5 + AF_ATMPVC = 8 + AF_AX25 = 3 + AF_INET = 2 + AF_INET6 = 10 + AF_IPX = 4 + AF_LOCAL = 1 + AF_NETLINK = 16 + AF_PACKET = 17 + AF_X25 = 9 + ARCH_GET_FS = 4099 + ARCH_GET_GS = 4100 + ARCH_SET_FS = 4098 + ARCH_SET_GS = 4097 + AT_EACCESS = 512 + AT_EMPTY_PATH = 4096 + AT_FDCWD = 18446744073709551516 + AT_REMOVEDIR = 512 + AT_SYMLINK_FOLLOW = 1024 + AT_SYMLINK_NOFOLLOW = 256 + CLOCK_BOOTTIME = 7 + CLOCK_MONOTONIC = 1 + CLOCK_MONOTONIC_COARSE = 6 + CLOCK_MONOTONIC_RAW = 4 + CLOCK_PROCESS_CPUTIME_ID = 2 + CLOCK_REALTIME = 0 + CLOCK_REALTIME_COARSE = 5 + CLOCK_THREAD_CPUTIME_ID = 3 + CLONE_CHILD_CLEARTID = 2097152 + CLONE_CHILD_SETTID = 16777216 + CLONE_FILES = 1024 + CLONE_FS = 512 + CLONE_IO = 2147483648 + CLONE_NEWIPC = 134217728 + CLONE_NEWNET = 1073741824 + CLONE_NEWNS = 131072 + CLONE_NEWPID = 536870912 + CLONE_NEWUTS = 67108864 + CLONE_PARENT = 32768 + CLONE_PARENT_SETTID = 1048576 + CLONE_PTRACE = 8192 + CLONE_SETTLS = 524288 + CLONE_SIGHAND = 2048 + CLONE_SYSVSEM = 262144 + CLONE_THREAD = 65536 + CLONE_UNTRACED = 8388608 + CLONE_VFORK = 16384 + CLONE_VM = 256 + DN_ACCESS = 1 + DN_ATTRIB = 32 + DN_CREATE = 4 + DN_DELETE = 8 + DN_MODIFY = 2 + DN_MULTISHOT = 2147483648 + DN_RENAME = 16 + EFD_CLOEXEC = 524288 + EFD_NONBLOCK = 2048 + EFD_SEMAPHORE = 1 + EPOLLERR = 8 + EPOLLET = 2147483648 + EPOLLHUP = 16 + EPOLLIN = 1 + EPOLLONESHOT = 1073741824 + EPOLLOUT = 4 + EPOLLPRI = 2 + EPOLLRDHUP = 8192 + EPOLL_CLOEXEC = 524288 + EPOLL_CTL_ADD = 1 + EPOLL_CTL_DEL = 2 + EPOLL_CTL_MOD = 3 + FALLOC_FL_KEEP_SIZE = 1 + FALLOC_FL_PUNCH_HOLE = 2 + FAN_ACCESS = 1 + FAN_ACCESS_PERM = 131072 + FAN_CLASS_CONTENT = 4 + FAN_CLASS_NOTIF = 0 + FAN_CLASS_PRE_CONTENT = 8 + FAN_CLOEXEC = 1 + FAN_CLOSE_NOWRITE = 16 + FAN_CLOSE_WRITE = 8 + FAN_EVENT_ON_CHILD = 134217728 + FAN_MARK_ADD = 1 + FAN_MARK_DONT_FOLLOW = 4 + FAN_MARK_FLUSH = 128 + FAN_MARK_IGNORED_MASK = 32 + FAN_MARK_IGNORED_SURV_MODIFY = 64 + FAN_MARK_MOUNT = 16 + FAN_MARK_ONLYDIR = 8 + FAN_MARK_REMOVE = 2 + FAN_MODIFY = 2 + FAN_NONBLOCK = 2 + FAN_ONDIR = 1073741824 + FAN_OPEN = 32 + FAN_OPEN_PERM = 65536 + FAN_UNLIMITED_MARKS = 32 + FAN_UNLIMITED_QUEUE = 16 + FD_CLOEXEC = 1 + FUTEX_CMP_REQUEUE = 4 + FUTEX_REQUEUE = 3 + FUTEX_WAIT = 0 + FUTEX_WAIT_BITSET = 9 + FUTEX_WAKE = 1 + F_DUPFD = 0 + F_DUPFD_CLOEXEC = 1030 + F_GETFD = 1 + F_GETFL = 3 + F_GETLEASE = 1025 + F_GETLK = 5 + F_GETOWN = 9 + F_GETOWN_EX = 16 + F_GETPIPE_SZ = 1032 + F_GETSIG = 11 + F_NOTIFY = 1026 + F_OWNER_PGRP = 2 + F_OWNER_PID = 1 + F_OWNER_TID = 0 + F_RDLCK = 0 + F_SETFD = 2 + F_SETFL = 4 + F_SETLEASE = 1024 + F_SETLK = 6 + F_SETLKW = 7 + F_SETOWN = 8 + F_SETOWN_EX = 15 + F_SETPIPE_SZ = 1031 + F_SETSIG = 10 + F_UNLCK = 2 + F_WRLCK = 1 + GETALL = 13 + GETNCNT = 14 + GETPID = 11 + GETVAL = 12 + GETZCNT = 15 + IN_ACCESS = 1 + IN_ATTRIB = 4 + IN_CLOEXEC = 524288 + IN_CLOSE_NOWRITE = 16 + IN_CLOSE_WRITE = 8 + IN_CREATE = 256 + IN_DELETE = 512 + IN_DELETE_SELF = 1024 + IN_DONT_FOLLOW = 33554432 + IN_EXCL_UNLINK = 67108864 + IN_MASK_ADD = 536870912 + IN_MODIFY = 2 + IN_MOVED_FROM = 64 + IN_MOVED_TO = 128 + IN_MOVE_SELF = 2048 + IN_NONBLOCK = 2048 + IN_ONESHOT = 2147483648 + IN_ONLYDIR = 16777216 + IN_OPEN = 32 + IOCB_CMD_FDSYNC = 3 + IOCB_CMD_FSYNC = 2 + IOCB_CMD_NOOP = 6 + IOCB_CMD_POLL = 5 + IOCB_CMD_PREAD = 0 + IOCB_CMD_PREADV = 7 + IOCB_CMD_PREADX = 4 + IOCB_CMD_PWRITE = 1 + IOCB_CMD_PWRITEV = 8 + IOCB_FLAG_RESFD = 1 + IOPRIO_WHO_PGRP = 2 + IOPRIO_WHO_PROCESS = 1 + IOPRIO_WHO_USER = 3 + IPC_CREAT = 512 + IPC_EXCL = 1024 + IPC_INFO = 3 + IPC_NOWAIT = 2048 + IPC_RMID = 0 + IPC_SET = 1 + IPC_STAT = 2 + ITIMER_PROF = 2 + ITIMER_REAL = 0 + ITIMER_VIRTUAL = 1 + KCMP_FILE = 0 + KCMP_FILES = 2 + KCMP_FS = 3 + KCMP_IO = 5 + KCMP_SIGHAND = 4 + KCMP_SYSVSEM = 6 + KCMP_VM = 1 + KEXEC_ARCH_386 = 196608 + KEXEC_ARCH_ARM = 2621440 + KEXEC_ARCH_IA_64 = 3276800 + KEXEC_ARCH_MIPS = 524288 + KEXEC_ARCH_MIPS_LE = 655360 + KEXEC_ARCH_PPC = 1310720 + KEXEC_ARCH_PPC64 = 1376256 + KEXEC_ARCH_S390 = 1441792 + KEXEC_ARCH_SH = 2752512 + KEXEC_ARCH_X86_64 = 4063232 + KEXEC_ON_CRASH = 1 + KEXEC_PRESERVE_CONTEXT = 2 + KEYCTL_ASSUME_AUTHORITY = 16 + KEYCTL_CHOWN = 4 + KEYCTL_CLEAR = 7 + KEYCTL_DESCRIBE = 6 + KEYCTL_GET_KEYRING_ID = 0 + KEYCTL_GET_PERSISTENT = 22 + KEYCTL_GET_SECURITY = 17 + KEYCTL_INSTANTIATE = 12 + KEYCTL_INSTANTIATE_IOV = 20 + KEYCTL_INVALIDATE = 21 + KEYCTL_JOIN_SESSION_KEYRING = 1 + KEYCTL_LINK = 8 + KEYCTL_NEGATE = 13 + KEYCTL_READ = 11 + KEYCTL_REJECT = 19 + KEYCTL_REVOKE = 3 + KEYCTL_SEARCH = 10 + KEYCTL_SESSION_TO_PARENT = 18 + KEYCTL_SETPERM = 5 + KEYCTL_SET_REQKEY_KEYRING = 14 + KEYCTL_SET_TIMEOUT = 15 + KEYCTL_UNLINK = 9 + KEYCTL_UPDATE = 2 + KEY_REQKEY_DEFL_DEFAULT = 0 + KEY_REQKEY_DEFL_GROUP_KEYRING = 6 + KEY_REQKEY_DEFL_NO_CHANGE = 18446744073709551615 + KEY_REQKEY_DEFL_PROCESS_KEYRING = 2 + KEY_REQKEY_DEFL_REQUESTOR_KEYRING = 7 + KEY_REQKEY_DEFL_SESSION_KEYRING = 3 + KEY_REQKEY_DEFL_THREAD_KEYRING = 1 + KEY_REQKEY_DEFL_USER_KEYRING = 4 + KEY_REQKEY_DEFL_USER_SESSION_KEYRING = 5 + KEY_SPEC_GROUP_KEYRING = 18446744073709551610 + KEY_SPEC_PROCESS_KEYRING = 18446744073709551614 + KEY_SPEC_REQKEY_AUTH_KEY = 18446744073709551609 + KEY_SPEC_REQUESTOR_KEYRING = 18446744073709551608 + KEY_SPEC_SESSION_KEYRING = 18446744073709551613 + KEY_SPEC_THREAD_KEYRING = 18446744073709551615 + KEY_SPEC_USER_KEYRING = 18446744073709551612 + KEY_SPEC_USER_SESSION_KEYRING = 18446744073709551611 + LOCK_EX = 2 + LOCK_NB = 4 + LOCK_SH = 1 + LOCK_UN = 8 + MADV_DODUMP = 17 + MADV_DOFORK = 11 + MADV_DONTDUMP = 16 + MADV_DONTFORK = 10 + MADV_DONTNEED = 4 + MADV_HUGEPAGE = 14 + MADV_HWPOISON = 100 + MADV_MERGEABLE = 12 + MADV_NOHUGEPAGE = 15 + MADV_NORMAL = 0 + MADV_RANDOM = 1 + MADV_REMOVE = 9 + MADV_SEQUENTIAL = 2 + MADV_SOFT_OFFLINE = 101 + MADV_UNMERGEABLE = 13 + MADV_WILLNEED = 3 + MAP_32BIT = 64 + MAP_ANONYMOUS = 32 + MAP_DENYWRITE = 2048 + MAP_EXECUTABLE = 4096 + MAP_FILE = 0 + MAP_FIXED = 16 + MAP_GROWSDOWN = 256 + MAP_HUGETLB = 262144 + MAP_LOCKED = 8192 + MAP_NONBLOCK = 65536 + MAP_NORESERVE = 16384 + MAP_POPULATE = 32768 + MAP_PRIVATE = 2 + MAP_SHARED = 1 + MAP_STACK = 131072 + MAP_UNINITIALIZED = 67108864 + MCL_CURRENT = 1 + MCL_FUTURE = 2 + MMAP_PAGE_ZERO = 1048576 + MNT_DETACH = 2 + MNT_EXPIRE = 4 + MNT_FORCE = 1 + MODULE_INIT_IGNORE_MODVERSIONS = 1 + MODULE_INIT_IGNORE_VERMAGIC = 2 + MPOL_BIND = 2 + MPOL_DEFAULT = 0 + MPOL_F_ADDR = 2 + MPOL_F_MEMS_ALLOWED = 4 + MPOL_F_NODE = 1 + MPOL_F_RELATIVE_NODES = 16384 + MPOL_F_STATIC_NODES = 32768 + MPOL_INTERLEAVE = 3 + MPOL_MF_MOVE = 2 + MPOL_MF_MOVE_ALL = 4 + MPOL_MF_STRICT = 1 + MPOL_PREFERRED = 1 + MREMAP_FIXED = 2 + MREMAP_MAYMOVE = 1 + MSG_CMSG_CLOEXEC = 1073741824 + MSG_CONFIRM = 2048 + MSG_DONTROUTE = 4 + MSG_DONTWAIT = 64 + MSG_EOR = 128 + MSG_ERRQUEUE = 8192 + MSG_EXCEPT = 8192 + MSG_INFO = 12 + MSG_MORE = 32768 + MSG_NOERROR = 4096 + MSG_NOSIGNAL = 16384 + MSG_OOB = 1 + MSG_PEEK = 2 + MSG_STAT = 11 + MSG_TRUNC = 32 + MSG_WAITALL = 256 + MSG_WAITFORONE = 65536 + MS_ASYNC = 1 + MS_BIND = 4096 + MS_DIRSYNC = 128 + MS_INVALIDATE = 2 + MS_MANDLOCK = 64 + MS_MOVE = 8192 + MS_NOATIME = 1024 + MS_NODEV = 4 + MS_NODIRATIME = 2048 + MS_NOEXEC = 8 + MS_NOSUID = 2 + MS_RDONLY = 1 + MS_RELATIME = 2097152 + MS_REMOUNT = 32 + MS_SILENT = 32768 + MS_STRICTATIME = 16777216 + MS_SYNC = 4 + MS_SYNCHRONOUS = 16 + NT_386_IOPERM = 513 + NT_386_TLS = 512 + NT_AUXV = 6 + NT_PRFPREG = 2 + NT_PRPSINFO = 3 + NT_PRSTATUS = 1 + NT_TASKSTRUCT = 4 + NT_X86_XSTATE = 514 + O_APPEND = 1024 + O_ASYNC = 8192 + O_CLOEXEC = 524288 + O_CREAT = 64 + O_DIRECT = 16384 + O_DIRECTORY = 65536 + O_DSYNC = 4096 + O_EXCL = 128 + O_LARGEFILE = 0 + O_NOATIME = 262144 + O_NOCTTY = 256 + O_NOFOLLOW = 131072 + O_NONBLOCK = 2048 + O_PATH = 2097152 + O_RDONLY = 0 + O_RDWR = 2 + O_SYNC = 1052672 + O_TRUNC = 512 + O_WRONLY = 1 + POSIX_FADV_DONTNEED = 4 + POSIX_FADV_NOREUSE = 5 + POSIX_FADV_NORMAL = 0 + POSIX_FADV_RANDOM = 1 + POSIX_FADV_SEQUENTIAL = 2 + POSIX_FADV_WILLNEED = 3 + PRIO_PGRP = 1 + PRIO_PROCESS = 0 + PRIO_USER = 2 + PROT_EXEC = 4 + PROT_READ = 1 + PROT_WRITE = 2 + PR_CAPBSET_DROP = 24 + PR_CAPBSET_READ = 23 + PR_GET_CHILD_SUBREAPER = 37 + PR_GET_DUMPABLE = 3 + PR_GET_ENDIAN = 19 + PR_GET_FPEMU = 9 + PR_GET_FPEXC = 11 + PR_GET_KEEPCAPS = 7 + PR_GET_NAME = 16 + PR_GET_NO_NEW_PRIVS = 39 + PR_GET_PDEATHSIG = 2 + PR_GET_SECCOMP = 21 + PR_GET_SECUREBITS = 27 + PR_GET_TID_ADDRESS = 40 + PR_GET_TIMERSLACK = 30 + PR_GET_TIMING = 13 + PR_GET_TSC = 25 + PR_GET_UNALIGN = 5 + PR_MCE_KILL = 33 + PR_MCE_KILL_GET = 34 + PR_SET_CHILD_SUBREAPER = 36 + PR_SET_DUMPABLE = 4 + PR_SET_ENDIAN = 20 + PR_SET_FPEMU = 10 + PR_SET_FPEXC = 12 + PR_SET_KEEPCAPS = 8 + PR_SET_MM = 35 + PR_SET_NAME = 15 + PR_SET_NO_NEW_PRIVS = 38 + PR_SET_PDEATHSIG = 1 + PR_SET_PTRACER = 1499557217 + PR_SET_SECCOMP = 22 + PR_SET_SECUREBITS = 28 + PR_SET_TIMERSLACK = 29 + PR_SET_TIMING = 14 + PR_SET_TSC = 26 + PR_SET_UNALIGN = 6 + PR_TASK_PERF_EVENTS_DISABLE = 31 + PR_TASK_PERF_EVENTS_ENABLE = 32 + PTRACE_ATTACH = 16 + PTRACE_CONT = 7 + PTRACE_DETACH = 17 + PTRACE_GETEVENTMSG = 16897 + PTRACE_GETFPREGS = 14 + PTRACE_GETREGS = 12 + PTRACE_GETREGSET = 16900 + PTRACE_GETSIGINFO = 16898 + PTRACE_INTERRUPT = 16903 + PTRACE_KILL = 8 + PTRACE_LISTEN = 16904 + PTRACE_O_EXITKILL = 1048576 + PTRACE_O_TRACECLONE = 8 + PTRACE_O_TRACEEXEC = 16 + PTRACE_O_TRACEEXIT = 64 + PTRACE_O_TRACEFORK = 2 + PTRACE_O_TRACESYSGOOD = 1 + PTRACE_O_TRACEVFORK = 4 + PTRACE_O_TRACEVFORKDONE = 32 + PTRACE_PEEKDATA = 2 + PTRACE_PEEKTEXT = 1 + PTRACE_PEEKUSER = 3 + PTRACE_POKEDATA = 5 + PTRACE_POKETEXT = 4 + PTRACE_POKEUSER = 6 + PTRACE_SEIZE = 16902 + PTRACE_SETFPREGS = 15 + PTRACE_SETOPTIONS = 16896 + PTRACE_SETREGS = 13 + PTRACE_SETREGSET = 16901 + PTRACE_SETSIGINFO = 16899 + PTRACE_SINGLESTEP = 9 + PTRACE_SYSCALL = 24 + PTRACE_SYSEMU = 31 + PTRACE_SYSEMU_SINGLESTEP = 32 + PTRACE_TRACEME = 0 + P_ALL = 0 + P_PGID = 2 + P_PID = 1 + READ_IMPLIES_EXEC = 4194304 + RENAME_EXCHANGE = 2 + RENAME_NOREPLACE = 1 + RENAME_WHITEOUT = 4 + RLIMIT_AS = 9 + RLIMIT_CORE = 4 + RLIMIT_CPU = 0 + RLIMIT_DATA = 2 + RLIMIT_FSIZE = 1 + RLIMIT_LOCKS = 10 + RLIMIT_MEMLOCK = 8 + RLIMIT_MSGQUEUE = 12 + RLIMIT_NICE = 13 + RLIMIT_NOFILE = 7 + RLIMIT_NPROC = 6 + RLIMIT_RSS = 5 + RLIMIT_RTPRIO = 14 + RLIMIT_RTTIME = 15 + RLIMIT_SIGPENDING = 11 + RLIMIT_STACK = 3 + RUSAGE_CHILDREN = 18446744073709551615 + RUSAGE_SELF = 0 + RUSAGE_THREAD = 1 + SA_NOCLDSTOP = 1 + SA_NOCLDWAIT = 2 + SA_NODEFER = 1073741824 + SA_ONSTACK = 134217728 + SA_RESETHAND = 2147483648 + SA_RESTART = 268435456 + SA_SIGINFO = 4 + SCHED_BATCH = 3 + SCHED_DEADLINE = 6 + SCHED_FIFO = 1 + SCHED_FLAG_RESET_ON_FORK = 1 + SCHED_IDLE = 5 + SCHED_OTHER = 0 + SCHED_RR = 2 + SECCOMP_FILTER_FLAG_TSYNC = 1 + SECCOMP_SET_MODE_FILTER = 1 + SECCOMP_SET_MODE_STRICT = 0 + SEEK_CUR = 1 + SEEK_DATA = 3 + SEEK_END = 2 + SEEK_HOLE = 4 + SEEK_SET = 0 + SEM_INFO = 19 + SEM_STAT = 18 + SEM_UNDO = 4096 + SETALL = 17 + SETVAL = 16 + SFD_CLOEXEC = 524288 + SFD_NONBLOCK = 2048 + SHM_HUGETLB = 2048 + SHM_INFO = 14 + SHM_LOCK = 11 + SHM_NORESERVE = 4096 + SHM_RDONLY = 4096 + SHM_REMAP = 16384 + SHM_RND = 8192 + SHM_STAT = 13 + SHM_UNLOCK = 12 + SHORT_INODE = 16777216 + SHUT_RD = 0 + SHUT_WR = 1 + SIGEV_NONE = 1 + SIGEV_SIGNAL = 0 + SIGEV_THREAD = 2 + SIG_BLOCK = 0 + SIG_SETMASK = 2 + SIG_UNBLOCK = 1 + SOCK_CLOEXEC = 524288 + SOCK_DGRAM = 2 + SOCK_NONBLOCK = 2048 + SOCK_PACKET = 10 + SOCK_RAW = 3 + SOCK_RDM = 4 + SOCK_SEQPACKET = 5 + SOCK_STREAM = 1 + SPLICE_F_GIFT = 8 + SPLICE_F_MORE = 4 + SPLICE_F_MOVE = 1 + SPLICE_F_NONBLOCK = 2 + STICKY_TIMEOUTS = 67108864 + SYNC_FILE_RANGE_WAIT_AFTER = 4 + SYNC_FILE_RANGE_WAIT_BEFORE = 1 + SYNC_FILE_RANGE_WRITE = 2 + SYSLOG_ACTION_CLEAR = 5 + SYSLOG_ACTION_CLOSE = 0 + SYSLOG_ACTION_CONSOLE_LEVEL = 8 + SYSLOG_ACTION_CONSOLE_OFF = 6 + SYSLOG_ACTION_CONSOLE_ON = 7 + SYSLOG_ACTION_OPEN = 1 + SYSLOG_ACTION_READ = 2 + SYSLOG_ACTION_READ_ALL = 3 + SYSLOG_ACTION_READ_CLEAR = 4 + SYSLOG_ACTION_SIZE_BUFFER = 10 + SYSLOG_ACTION_SIZE_UNREAD = 9 + SYZ_PER_BSD = 6 + SYZ_PER_HPUX = 16 + SYZ_PER_IRIX32 = 9 + SYZ_PER_IRIX64 = 11 + SYZ_PER_IRIXN32 = 10 + SYZ_PER_ISCR4 = 5 + SYZ_PER_LINUX = 0 + SYZ_PER_LINUX32 = 8 + SYZ_PER_OSF4 = 15 + SYZ_PER_OSR5 = 3 + SYZ_PER_RISCOS = 12 + SYZ_PER_SOLARIS = 13 + SYZ_PER_SVR3 = 2 + SYZ_PER_SVR4 = 1 + SYZ_PER_UW7 = 14 + SYZ_PER_WYSEV386 = 4 + SYZ_PER_XENIX = 7 + S_IFBLK = 24576 + S_IFCHR = 8192 + S_IFIFO = 4096 + S_IFREG = 32768 + S_IFSOCK = 49152 + S_IRGRP = 32 + S_IROTH = 4 + S_IRUSR = 256 + S_IWGRP = 16 + S_IWOTH = 2 + S_IWUSR = 128 + S_IXGRP = 8 + S_IXOTH = 1 + S_IXUSR = 64 + TFD_CLOEXEC = 524288 + TFD_NONBLOCK = 2048 + TFD_TIMER_ABSTIME = 1 + TIMER_ABSTIME = 1 + UMOUNT_NOFOLLOW = 8 + WCONTINUED = 8 + WEXITED = 4 + WHOLE_SECONDS = 33554432 + WNOHANG = 1 + WNOWAIT = 16777216 + WSTOPPED = 2 + WUNTRACED = 2 + XATTR_CREATE = 1 + XATTR_REPLACE = 2 + __WALL = 1073741824 + __WCLONE = 2147483648 + __WNOTHREAD = 536870912 +) diff --git a/prog/encoding.go b/prog/encoding.go new file mode 100644 index 000000000..6a74d3dd0 --- /dev/null +++ b/prog/encoding.go @@ -0,0 +1,411 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package prog + +import ( + "bufio" + "bytes" + "encoding/hex" + "fmt" + "io" + "strconv" + + "github.com/google/syzkaller/sys" +) + +// String generates a very compact program description (mostly for debug output). +func (p *Prog) String() string { + buf := new(bytes.Buffer) + for i, c := range p.Calls { + if i != 0 { + fmt.Fprintf(buf, "-") + } + fmt.Fprintf(buf, "%v", c.Meta.Name) + } + return buf.String() +} + +func (p *Prog) Serialize() []byte { + /* + if err := p.validate(); err != nil { + panic("serializing invalid program") + } + */ + buf := new(bytes.Buffer) + vars := make(map[*Arg]int) + varSeq := 0 + for _, c := range p.Calls { + if len(c.Ret.Uses) != 0 { + fmt.Fprintf(buf, "r%v = ", varSeq) + vars[c.Ret] = varSeq + varSeq++ + } + fmt.Fprintf(buf, "%v(", c.Meta.Name) + for i, a := range c.Args { + if i != 0 { + fmt.Fprintf(buf, ", ") + } + a.serialize(buf, vars, &varSeq) + } + fmt.Fprintf(buf, ")\n") + } + return buf.Bytes() +} + +func (a *Arg) serialize(buf io.Writer, vars map[*Arg]int, varSeq *int) { + if a == nil { + fmt.Fprintf(buf, "nil") + return + } + if len(a.Uses) != 0 { + fmt.Fprintf(buf, "[r%v=]", *varSeq) + vars[a] = *varSeq + *varSeq++ + } + switch a.Kind { + case ArgConst: + fmt.Fprintf(buf, "0x%x", a.Val) + case ArgResult: + id, ok := vars[a.Res] + if !ok { + panic("no result") + } + fmt.Fprintf(buf, "r%v", id) + if a.OpDiv != 0 { + fmt.Fprintf(buf, "/%v", a.OpDiv) + } + if a.OpAdd != 0 { + fmt.Fprintf(buf, "+%v", a.OpAdd) + } + case ArgPointer: + fmt.Fprintf(buf, "&%v=", serializeAddr(a, true)) + a.Res.serialize(buf, vars, varSeq) + case ArgPageSize: + fmt.Fprintf(buf, "%v", serializeAddr(a, false)) + case ArgData: + fmt.Fprintf(buf, "\"%v\"", hex.EncodeToString(a.Data)) + case ArgGroup: + fmt.Fprintf(buf, "{") + for i, a1 := range a.Inner { + if i != 0 { + fmt.Fprintf(buf, ", ") + } + a1.serialize(buf, vars, varSeq) + } + fmt.Fprintf(buf, "}") + default: + panic("unknown arg kind") + } +} + +func Deserialize(data []byte) (prog *Prog, err error) { + prog = new(Prog) + p := &parser{r: bufio.NewScanner(bytes.NewReader(data))} + vars := make(map[string]*Arg) + for p.Scan() { + if p.EOF() { + continue + } + name := p.Ident() + r := "" + if p.Char() == '=' { + r = name + p.Parse('=') + name = p.Ident() + + } + meta := sys.CallMap[name] + if meta == nil { + return nil, fmt.Errorf("unknown syscall %v", name) + } + c := &Call{Meta: meta} + prog.Calls = append(prog.Calls, c) + p.Parse('(') + for i := 0; p.Char() != ')'; i++ { + arg, err := parseArg(p, vars) + if err != nil { + return nil, err + } + c.Args = append(c.Args, arg) + if p.Char() != ')' { + p.Parse(',') + } + } + p.Parse(')') + if !p.EOF() { + return nil, fmt.Errorf("tailing data (line #%v)", p.l) + } + if len(c.Args) != len(meta.Args) { + return nil, fmt.Errorf("wrong call arg count: %v, want %v", len(c.Args), len(meta.Args)) + } + assignTypeAndDir(c) + if r != "" { + vars[r] = c.Ret + } + } + if p.Err() != nil { + return nil, err + } + if err := prog.validate(); err != nil { + return nil, err + } + return +} + +func parseArg(p *parser, vars map[string]*Arg) (*Arg, error) { + r := "" + if p.Char() == '[' { + p.Parse('[') + r = p.Ident() + p.Parse('=') + p.Parse(']') + } + var arg *Arg + switch p.Char() { + case '0': + val := p.Ident() + v, err := strconv.ParseUint(val, 0, 64) + if err != nil { + return nil, fmt.Errorf("wrong arg value '%v': %v", val, err) + } + arg = constArg(uintptr(v)) + case 'r': + id := p.Ident() + v, ok := vars[id] + if !ok || v == nil { + return nil, fmt.Errorf("result %v references unknown variable (vars=%+v)", id, vars) + } + arg = resultArg(v) + if p.Char() == '/' { + p.Parse('/') + op := p.Ident() + v, err := strconv.ParseUint(op, 0, 64) + if err != nil { + return nil, fmt.Errorf("wrong result div op: '%v'", op) + } + arg.OpDiv = uintptr(v) + } + if p.Char() == '+' { + p.Parse('+') + op := p.Ident() + v, err := strconv.ParseUint(op, 0, 64) + if err != nil { + return nil, fmt.Errorf("wrong result add op: '%v'", op) + } + arg.OpAdd = uintptr(v) + } + case '&': + p.Parse('&') + page, off, err := parseAddr(p, true) + if err != nil { + return nil, err + } + p.Parse('=') + inner, err := parseArg(p, vars) + if err != nil { + return nil, err + } + arg = pointerArg(page, off, inner) + case '(': + page, off, err := parseAddr(p, false) + if err != nil { + return nil, err + } + arg = pageSizeArg(page, off) + case '"': + p.Parse('"') + val := "" + if p.Char() != '"' { + val = p.Ident() + } + p.Parse('"') + data, err := hex.DecodeString(val) + if err != nil { + return nil, fmt.Errorf("data arg has bad value '%v'", val) + } + arg = dataArg(data) + case '{': + p.Parse('{') + var inner []*Arg + for p.Char() != '}' { + arg, err := parseArg(p, vars) + if err != nil { + return nil, err + } + inner = append(inner, arg) + if p.Char() != '}' { + p.Parse(',') + } + } + p.Parse('}') + arg = groupArg(inner) + case 'n': + p.Parse('n') + p.Parse('i') + p.Parse('l') + if r != "" { + return nil, fmt.Errorf("named nil argument") + } + default: + return nil, fmt.Errorf("failed to parse argument at %v (line #%v/%v: %v)", int(p.Char()), p.l, p.i, p.s) + } + if r != "" { + vars[r] = arg + } + return arg, nil +} + +const ( + encodingAddrBase = 0x7f0000000000 + encodingPageSize = 4 << 10 +) + +func serializeAddr(a *Arg, base bool) string { + page := a.AddrPage * encodingPageSize + if base { + page += encodingAddrBase + } + soff := "" + if off := a.AddrOffset; off != 0 { + sign := "+" + if off < 0 { + sign = "-" + off = -off + page += encodingPageSize + } + soff = fmt.Sprintf("%v0x%x", sign, off) + } + return fmt.Sprintf("(0x%x%v)", page, soff) +} + +func parseAddr(p *parser, base bool) (uintptr, int, error) { + p.Parse('(') + pstr := p.Ident() + page, err := strconv.ParseUint(pstr, 0, 64) + if err != nil { + return 0, 0, fmt.Errorf("failed to parse addr page: '%v'", pstr) + } + if page%encodingPageSize != 0 { + return 0, 0, fmt.Errorf("address base is not page size aligned: '%v'", pstr) + } + if base { + if page < encodingAddrBase { + return 0, 0, fmt.Errorf("address without base offset: '%v'", pstr) + } + page -= encodingAddrBase + } + var off int64 + if p.Char() == '+' || p.Char() == '-' { + minus := false + if p.Char() == '-' { + minus = true + p.Parse('-') + } else { + p.Parse('+') + } + ostr := p.Ident() + off, err = strconv.ParseInt(ostr, 0, 64) + if err != nil { + return 0, 0, fmt.Errorf("failed to parse addr offset: '%v'", ostr) + } + if minus { + page -= encodingPageSize + off = -off + } + } + p.Parse(')') + page /= encodingPageSize + return uintptr(page), int(off), nil +} + +type parser struct { + r *bufio.Scanner + s string + i int + l int + e error +} + +func (p *parser) Scan() bool { + if p.e != nil { + return false + } + if !p.r.Scan() { + p.e = p.r.Err() + return false + } + p.s = p.r.Text() + p.i = 0 + p.l++ + return true +} + +func (p *parser) Err() error { + return p.e +} + +func (p *parser) Str() string { + return p.s +} + +func (p *parser) EOF() bool { + return p.i == len(p.s) +} + +func (p *parser) Char() byte { + if p.e != nil { + return 0 + } + if p.EOF() { + p.failf("unexpected eof") + return 0 + } + return p.s[p.i] +} + +func (p *parser) Parse(ch byte) { + if p.e != nil { + return + } + if p.EOF() { + p.failf("want %s, got EOF", string(ch)) + return + } + if p.s[p.i] != ch { + p.failf("want '%v', got '%v'", string(ch), string(p.s[p.i])) + return + } + p.i++ + p.SkipWs() +} + +func (p *parser) SkipWs() { + for p.i < len(p.s) && (p.s[p.i] == ' ' || p.s[p.i] == '\t') { + p.i++ + } +} + +func (p *parser) Ident() string { + i := p.i + for p.i < len(p.s) && + (p.s[p.i] >= 'a' && p.s[p.i] <= 'z' || + p.s[p.i] >= 'A' && p.s[p.i] <= 'Z' || + p.s[p.i] >= '0' && p.s[p.i] <= '9' || + p.s[p.i] == '_' || p.s[p.i] == '$') { + p.i++ + } + if i == p.i { + p.failf("failed to parse identifier at pos %v", i) + return "" + } + if ch := p.s[i]; ch >= '0' && ch <= '9' { + } + s := p.s[i:p.i] + p.SkipWs() + return s +} + +func (p *parser) failf(msg string, args ...interface{}) { + p.e = fmt.Errorf("%v\nline #%v: %v", fmt.Sprintf(msg, args...), p.l, p.s) +} diff --git a/prog/encodingc.go b/prog/encodingc.go new file mode 100644 index 000000000..c84acc918 --- /dev/null +++ b/prog/encodingc.go @@ -0,0 +1,114 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package prog + +import ( + "bytes" + "fmt" + "io" + "unsafe" + + "github.com/google/syzkaller/sys" +) + +func (p *Prog) WriteCSource() []byte { + exec := p.SerializeForExec() + buf := new(bytes.Buffer) + writeCSource(buf, exec) + return buf.Bytes() +} + +func writeCSource(w io.Writer, exec []byte) { + fmt.Fprintf(w, `// autogenerated by syzkaller (http://github.com/google/syzkaller) +#include +#include +#include + +int main() +{ +`) + read := func() uintptr { + if len(exec) < 8 { + panic("exec program overflow") + } + v := *(*uint64)(unsafe.Pointer(&exec[0])) + exec = exec[8:] + return uintptr(v) + } + resultRef := func() string { + arg := read() + res := fmt.Sprintf("r%v", arg) + if opDiv := read(); opDiv != 0 { + res = fmt.Sprintf("%v/%v", res, opDiv) + } + if opAdd := read(); opAdd != 0 { + res = fmt.Sprintf("%v+%v", res, opAdd) + } + return res + } + lastCall := 0 + for n := 0; ; n++ { + switch instr := read(); instr { + case instrEOF: + fmt.Fprintf(w, "\treturn 0;\n}\n") + return + case instrCopyin: + addr := read() + typ := read() + size := read() + switch typ { + case execArgConst: + arg := read() + fmt.Fprintf(w, "\t*(uint%v_t*)0x%x = 0x%x;\n", size*8, addr, arg) + case execArgResult: + fmt.Fprintf(w, "\t*(uint%v_t*)0x%x = %v;\n", size*8, addr, resultRef()) + case execArgData: + data := exec[:size] + exec = exec[(size+7)/8*8:] + var esc []byte + for _, v := range data { + hex := func(v byte) byte { + if v < 10 { + return '0' + v + } + return 'a' + v - 10 + } + esc = append(esc, '\\', 'x', hex(v>>4), hex(v<<4>>4)) + } + fmt.Fprintf(w, "\tmemcpy((void*)0x%x, \"%s\", %v);\n", addr, esc, size) + default: + panic("bad argument type") + } + case instrCopyout: + addr := read() + size := read() + fmt.Fprintf(w, "\tlong r%v = -1;\n", n) + fmt.Fprintf(w, "\tif (r%v != -1)\n", lastCall) + fmt.Fprintf(w, "\t\tr%v = *(uint%v_t*)0x%x;\n", n, size*8, addr) + default: + // Normal syscall. + meta := sys.Calls[instr] + fmt.Fprintf(w, "\tlong r%v = syscall(SYS_%v", n, meta.CallName) + nargs := read() + for i := uintptr(0); i < nargs; i++ { + typ := read() + size := read() + _ = size + switch typ { + case execArgConst: + fmt.Fprintf(w, ", 0x%xul", read()) + case execArgResult: + fmt.Fprintf(w, ", %v", resultRef()) + default: + panic("unknown arg type") + } + } + for i := nargs; i < 6; i++ { + fmt.Fprintf(w, ", 0") + } + fmt.Fprintf(w, ");\n") + lastCall = n + } + } +} diff --git a/prog/encodingexec.go b/prog/encodingexec.go new file mode 100644 index 000000000..90b8ef7ea --- /dev/null +++ b/prog/encodingexec.go @@ -0,0 +1,172 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// This file does serialization of programs for executor binary. +// The format aims at simple parsing: binary and irreversible. + +package prog + +const ( + instrEOF = ^uintptr(iota) + instrCopyin + instrCopyout +) + +const ( + execArgConst = uintptr(iota) + execArgResult + execArgData +) + +const ( + ptrSize = 8 + pageSize = 4 << 10 + dataOffset = 512 << 20 +) + +func (p *Prog) SerializeForExec() []byte { + if err := p.validate(); err != nil { + panic("serializing invalid program") + } + var instrSeq uintptr + w := &execContext{args: make(map[*Arg]*argInfo)} + for _, c := range p.Calls { + // Calculate arg offsets within structs. + foreachArg(c, func(arg, base *Arg, _ *[]*Arg) { + if base == nil || arg.Kind == ArgGroup { + return + } + if w.args[base] == nil { + w.args[base] = &argInfo{} + } + w.args[arg] = &argInfo{Offset: w.args[base].CurSize} + w.args[base].CurSize += arg.Size(arg.Type) + }) + // Generate copyin instructions that fill in data into pointer arguments. + foreachArg(c, func(arg, _ *Arg, _ *[]*Arg) { + if arg.Kind == ArgPointer && arg.Res != nil { + var rec func(*Arg) + rec = func(arg1 *Arg) { + if arg1.Kind == ArgGroup { + for _, arg2 := range arg1.Inner { + rec(arg2) + } + return + } + if arg1.Dir == DirOut || arg1.Kind == ArgData && len(arg1.Data) == 0 { + return + } + w.write(instrCopyin) + w.write(physicalAddr(arg) + w.args[arg1].Offset) + w.writeArg(arg1) + instrSeq++ + } + rec(arg.Res) + } + }) + // Generate the call itself. + w.write(uintptr(c.Meta.ID)) + w.write(uintptr(len(c.Args))) + for _, arg := range c.Args { + w.writeArg(arg) + } + w.args[c.Ret] = &argInfo{Idx: instrSeq} + instrSeq++ + // Generate copyout instructions that persist interesting return values. + foreachArg(c, func(arg, base *Arg, _ *[]*Arg) { + if len(arg.Uses) == 0 { + return + } + switch arg.Kind { + case ArgReturn: + // Idx is already assigned above. + case ArgConst, ArgResult: + // Create a separate copyout instruction that has own Idx. + if base.Kind != ArgPointer { + panic("arg base is not a pointer") + } + info := w.args[arg] + info.Idx = instrSeq + instrSeq++ + w.write(instrCopyout) + w.write(physicalAddr(base) + info.Offset) + w.write(arg.Size(arg.Type)) + default: + panic("bad arg kind in copyout") + } + }) + } + w.write(instrEOF) + return w.buf +} + +func physicalAddr(arg *Arg) uintptr { + if arg.Kind != ArgPointer { + panic("physicalAddr: bad arg kind") + } + addr := arg.AddrPage*pageSize + dataOffset + if arg.AddrOffset >= 0 { + addr += uintptr(arg.AddrOffset) + } else { + addr += pageSize - uintptr(-arg.AddrOffset) + } + return addr +} + +type execContext struct { + buf []byte + args map[*Arg]*argInfo +} + +type argInfo struct { + Offset uintptr // from base pointer + CurSize uintptr + Idx uintptr // instruction index +} + +func (w *execContext) write(v uintptr) { + w.buf = append(w.buf, byte(v>>0), byte(v>>8), byte(v>>16), byte(v>>24), byte(v>>32), byte(v>>40), byte(v>>48), byte(v>>56)) +} + +func (w *execContext) writeArg(arg *Arg) { + switch arg.Kind { + case ArgConst: + w.write(execArgConst) + w.write(arg.Size(arg.Type)) + w.write(arg.Val) + case ArgResult: + w.write(execArgResult) + w.write(arg.Size(arg.Type)) + w.write(w.args[arg.Res].Idx) + w.write(arg.OpDiv) + w.write(arg.OpAdd) + case ArgPointer: + w.write(execArgConst) + w.write(arg.Size(arg.Type)) + w.write(physicalAddr(arg)) + case ArgPageSize: + w.write(execArgConst) + w.write(arg.Size(arg.Type)) + w.write(arg.AddrPage * pageSize) + case ArgData: + w.write(execArgData) + w.write(uintptr(len(arg.Data))) + for i := 0; i < len(arg.Data); i += 8 { + var v uintptr + for j := 0; j < 8; j++ { + if i+j >= len(arg.Data) { + break + } + v |= uintptr(arg.Data[i+j]) << uint(j*8) + } + w.write(v) + } + case ArgGroup: + // Squash groups. + for _, arg1 := range arg.Inner { + w.writeArg(arg1) + } + default: + panic("unknown arg type") + } +} diff --git a/prog/generation.go b/prog/generation.go new file mode 100644 index 000000000..43e527aa0 --- /dev/null +++ b/prog/generation.go @@ -0,0 +1,29 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package prog + +import ( + "math/rand" + + "github.com/google/syzkaller/sys" +) + +// Generate generates a random program of length ~ncalls. +// calls is a set of allowed syscalls, if nil all syscalls are used. +func Generate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) *Prog { + p := new(Prog) + r := newRand(rs) + s := newState(enabledCalls) + for len(p.Calls) < ncalls { + calls := r.generateCall(s) + for _, c := range calls { + s.analyze(c) + p.Calls = append(p.Calls, c) + } + } + if err := p.validate(); err != nil { + panic(err) + } + return p +} diff --git a/prog/mutation.go b/prog/mutation.go new file mode 100644 index 000000000..6d84f198f --- /dev/null +++ b/prog/mutation.go @@ -0,0 +1,377 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package prog + +import ( + "fmt" + "math/rand" + + "github.com/google/syzkaller/sys" +) + +func (p *Prog) Mutate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) { + r := newRand(rs) + for stop := false; !stop; stop = r.bin() { + r.choose( + 10, func() { + // Insert a new call. + if len(p.Calls) >= ncalls { + return + } + idx := r.Intn(len(p.Calls) + 1) + var c *Call + if idx < len(p.Calls) { + c = p.Calls[idx] + } + s := analyze(enabledCalls, p, c) + calls := r.generateCall(s) + p.insertBefore(c, calls) + }, + 10, func() { + // Change args of a call. + if len(p.Calls) == 0 { + return + } + c := p.Calls[r.Intn(len(p.Calls))] + if len(c.Args) == 0 { + return + } + s := analyze(enabledCalls, p, c) + for stop := false; !stop; stop = r.bin() { + args, bases, parents := mutationArgs(c) + idx := r.Intn(len(args)) + arg, base, parent := args[idx], bases[idx], parents[idx] + var baseSize uintptr + if base != nil { + if base.Kind != ArgPointer || base.Res == nil { + panic("bad base arg") + } + baseSize = base.Res.Size(base.Res.Type) + } + var size *Arg + switch a := arg.Type.(type) { + case sys.IntType, sys.FlagsType, sys.FileoffType, sys.ResourceType, sys.VmaType: + arg1, size1, calls1 := r.generateArg(s, arg.Type, arg.Dir, nil) + replaceArg(p, arg, arg1, calls1) + size = size1 + case sys.BufferType: + switch a.Kind { + case sys.BufferBlob: + var data []byte + switch arg.Kind { + case ArgData: + data = append([]byte{}, arg.Data...) + case ArgConst: + // 0 is OK for optional args. + if arg.Val != 0 { + panic(fmt.Sprintf("BufferType has non-zero const value: %v", arg.Val)) + } + default: + panic(fmt.Sprintf("bad arg kind for BufferType: %v", arg.Kind)) + } + arg.Data = mutateData(r, data) + case sys.BufferString: + if r.bin() { + arg.Data = mutateData(r, append([]byte{}, arg.Data...)) + } else { + arg.Data = r.randString(s) + } + case sys.BufferSockaddr: + arg.Data = r.sockaddr(s) + default: + panic("unknown buffer kind") + } + size = constArg(uintptr(len(arg.Data))) + case sys.FilenameType: + filename := r.filename(s) + arg.Data = []byte(filename) + case sys.ArrayType: + count := r.rand(6) + if count > uintptr(len(arg.Inner)) { + var calls []*Call + for count > uintptr(len(arg.Inner)) { + arg1, _, calls1 := r.generateArg(s, a.Type, arg.Dir, nil) + arg.Inner = append(arg.Inner, arg1) + for _, c1 := range calls1 { + calls = append(calls, c1) + s.analyze(c1) + } + } + for _, c1 := range calls { + assignTypeAndDir(c1) + sanitizeCall(c1) + } + assignTypeAndDir(c) + sanitizeCall(c) + p.insertBefore(c, calls) + } else if count < uintptr(len(arg.Inner)) { + removed := arg.Inner[count:] + arg.Inner = arg.Inner[:count] + + foreachArgArray(&removed, nil, func(arg, _ *Arg, _ *[]*Arg) { + if arg.Kind == ArgResult { + if _, ok := arg.Res.Uses[arg]; !ok { + panic("broken tree") + } + delete(arg.Res.Uses, arg) + } + }) + + for _, arg := range referencedArgs(removed, nil) { + c1 := arg.Call + s := analyze(enabledCalls, p, c1) + arg1, _, calls1 := r.generateArg(s, arg.Type, arg.Dir, nil) + replaceArg(p, arg, arg1, calls1) + } + } + // TODO: swap elements of the array + size = constArg(count) + case sys.PtrType: + // TODO: we don't know size for out args + size := uintptr(1) + if arg.Res != nil { + size = arg.Res.Size(arg.Res.Type) + } + arg1, calls1 := r.addr(s, size, arg.Res) + replaceArg(p, arg, arg1, calls1) + case sys.StructType: + switch a.Name() { + case "timespec", "timeval": + arg1, _, calls1 := r.generateArg(s, arg.Type, arg.Dir, nil) + replaceArg(p, arg.Inner[0], arg1.Inner[0], calls1) + replaceArg(p, arg.Inner[1], arg1.Inner[1], nil) + default: + panic("bad arg returned by mutationArgs: StructType") + } + case sys.LenType: + panic("bad arg returned by mutationArgs: LenType") + default: + panic(fmt.Sprintf("bad arg returned by mutationArgs: %#v, type=%#v", *arg, arg.Type)) + } + + // Update associated size argument if there is one. + if size != nil { + name := arg.Type.Name() + if name == "" && base != nil { + name = base.Type.Name() + } + for _, arg1 := range *parent { + if sz, ok := arg1.Type.(sys.LenType); ok && sz.Buf == name { + if arg1.Kind != ArgConst && arg1.Kind != ArgPageSize { + panic(fmt.Sprintf("size arg is not const: %#v", *arg1)) + } + arg1.Val = size.Val + arg1.AddrPage = size.AddrPage + arg1.AddrOffset = size.AddrOffset + } + } + } + + // Update base pointer if size has increased. + if base != nil && baseSize < base.Res.Size(base.Res.Type) { + arg1, calls1 := r.addr(s, base.Res.Size(base.Res.Type), base.Res) + for _, c := range calls1 { + assignTypeAndDir(c) + sanitizeCall(c) + } + p.insertBefore(c, calls1) + arg.AddrPage = arg1.AddrPage + arg.AddrOffset = arg1.AddrOffset + } + } + }, + 1, func() { + // Remove a random call. + if len(p.Calls) == 0 { + return + } + idx := r.Intn(len(p.Calls)) + c := p.Calls[idx] + copy(p.Calls[idx:], p.Calls[idx+1:]) + p.Calls = p.Calls[:len(p.Calls)-1] + + foreachArg(c, func(arg, _ *Arg, _ *[]*Arg) { + if arg.Kind == ArgResult { + if _, ok := arg.Res.Uses[arg]; !ok { + panic("broken tree") + } + delete(arg.Res.Uses, arg) + } + }) + + for _, arg := range referencedArgs(c.Args, c.Ret) { + c1 := arg.Call + s := analyze(enabledCalls, p, c1) + arg1, _, calls1 := r.generateArg(s, arg.Type, arg.Dir, nil) + replaceArg(p, arg, arg1, calls1) + } + }, + ) + } + for _, c := range p.Calls { + assignTypeAndDir(c) + sanitizeCall(c) + } + if err := p.validate(); err != nil { + panic(err) + } +} + +// Minimize minimizes program p into an equivalent program using the equivalence +// predicate pred. It iteratively generates simpler programs and asks pred +// whether it is equal to the orginal program or not. If it is equivalent then +// the simplification attempt is committed and the process continues. +func Minimize(p0 *Prog, callIndex0 int, pred func(*Prog, int) bool) (*Prog, int) { + if callIndex0 < 0 || callIndex0 >= len(p0.Calls) { + panic("bad call index") + } + name0 := p0.Calls[callIndex0].Meta.Name + // Try to remove all calls except the last one one-by-one. + for i := len(p0.Calls) - 1; i >= 0; i-- { + if i == callIndex0 { + continue + } + callIndex := callIndex0 + if i < callIndex { + callIndex-- + } + c := p0.Calls[i] + p := p0.Clone() + c = p.Calls[i] + copy(p.Calls[i:], p.Calls[i+1:]) + p.Calls = p.Calls[:len(p.Calls)-1] + for _, arg := range referencedArgs(c.Args, c.Ret) { + arg1 := constArg(arg.Type.Default()) + replaceArg(p, arg, arg1, nil) + } + foreachArg(c, func(arg, _ *Arg, _ *[]*Arg) { + if arg.Kind == ArgResult { + delete(arg.Res.Uses, arg) + } + }) + if !pred(p, callIndex) { + continue + } + p0 = p + callIndex0 = callIndex + } + // TODO: simplify individual arguments: + // - replace constants with 0 + // - reset bits in constants + // - remove offsets from addresses + // - replace file descriptors with -1 + // etc + if callIndex0 < 0 || callIndex0 >= len(p0.Calls) || name0 != p0.Calls[callIndex0].Meta.Name { + panic(fmt.Sprintf("bad call index after minimizatoin: ncalls=%v index=%v call=%v/%v", + len(p0.Calls), callIndex0, name0, p0.Calls[callIndex0].Meta.Name)) + } + return p0, callIndex0 +} + +func (p *Prog) TrimAfter(idx int) { + if idx < 0 || idx >= len(p.Calls) { + panic("trimming non-existing call") + } + for i := len(p.Calls) - 1; i > idx; i-- { + c := p.Calls[i] + foreachArg(c, func(arg, _ *Arg, _ *[]*Arg) { + if arg.Kind == ArgResult { + delete(arg.Res.Uses, arg) + } + }) + } + p.Calls = p.Calls[:idx+1] +} + +func mutationArgs(c *Call) (args, bases []*Arg, parents []*[]*Arg) { + foreachArg(c, func(arg, base *Arg, parent *[]*Arg) { + switch arg.Type.(type) { + case sys.StructType: + switch arg.Type.Name() { + default: + // For structs only individual fields are updated. + return + case "timespec", "timeval": + // These special structs are mutated as a whole. + } + case sys.LenType: + // Size is updated when the size-of arg change. + return + } + if arg.Dir == DirOut { + return + } + if base != nil { + if _, ok := base.Type.(sys.StructType); ok && (base.Type.Name() == "timespec" || base.Type.Name() == "timeval") { + // These special structs are mutated as a whole. + return + } + } + args = append(args, arg) + bases = append(bases, base) + parents = append(parents, parent) + }) + return +} + +func mutateData(r *randGen, data []byte) []byte { + for stop := false; !stop; stop = r.bin() { + r.choose( + 1, func() { + data = append(data, byte(r.rand(256))) + }, + 1, func() { + if len(data) == 0 { + return + } + data[r.Intn(len(data))] = byte(r.rand(256)) + }, + 1, func() { + if len(data) == 0 { + return + } + byt := r.Intn(len(data)) + bit := r.Intn(8) + data[byt] ^= 1 << uint(bit) + }, + 1, func() { + if len(data) == 0 { + return + } + i := r.Intn(len(data)) + copy(data[i:], data[i+1:]) + data = data[:len(data)-1] + }, + ) + } + return data +} + +func replaceArg(p *Prog, arg, arg1 *Arg, calls []*Call) { + if arg.Kind != ArgConst && arg.Kind != ArgResult && arg.Kind != ArgPointer { + panic(fmt.Sprintf("replaceArg: bad arg kind %v", arg.Kind)) + } + if arg1.Kind != ArgConst && arg1.Kind != ArgResult && arg1.Kind != ArgPointer { + panic(fmt.Sprintf("replaceArg: bad arg1 kind %v", arg1.Kind)) + } + if arg.Kind == ArgResult { + delete(arg.Res.Uses, arg) + } + for _, c := range calls { + assignTypeAndDir(c) + sanitizeCall(c) + } + c := arg.Call + p.insertBefore(c, calls) + // Somewhat hacky, but safe and preserves references to arg. + uses := arg.Uses + *arg = *arg1 + arg.Uses = uses + if arg.Kind == ArgResult { + delete(arg.Res.Uses, arg1) + arg.Res.Uses[arg] = true + } + assignTypeAndDir(c) + sanitizeCall(c) +} diff --git a/prog/mutation_test.go b/prog/mutation_test.go new file mode 100644 index 000000000..c788baf3d --- /dev/null +++ b/prog/mutation_test.go @@ -0,0 +1,268 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package prog + +import ( + "bytes" + "fmt" + "testing" +) + +func TestClone(t *testing.T) { + rs, iters := initTest(t) + for i := 0; i < iters; i++ { + p := Generate(rs, 10, nil) + p1 := p.Clone() + data := p.Serialize() + data1 := p1.Serialize() + if !bytes.Equal(data, data1) { + t.Fatalf("program changed after clone\noriginal:\n%s\n\nnew:\n%s\n", data, data1) + } + } +} + +func TestMutate(t *testing.T) { + rs, iters := initTest(t) +next: + for i := 0; i < iters; i++ { + p := Generate(rs, 10, nil) + data0 := p.Serialize() + p1 := p.Clone() + // There is a chance that mutation will produce the same program. + // So we check that at least 1 out of 10 mutations actually change the program. + for try := 0; try < 10; try++ { + p1.Mutate(rs, 10, nil) + data := p.Serialize() + if !bytes.Equal(data0, data) { + t.Fatalf("program changed after clone/mutate\noriginal:\n%s\n\nnew:\n%s\n", data0, data) + } + data1 := p1.Serialize() + if !bytes.Equal(data, data1) { + continue next + } + } + t.Fatalf("mutation does not change program") + } +} + +func TestMutateTable(t *testing.T) { + if testing.Short() { + t.Skip("skipping in short mode") + } + tests := [][2]string{ + // Insert calls. + { + "mmap(&(0x7f0000000000)=nil, (0x1000), 0x3, 0x32, 0xffffffffffffffff, 0x0)\n" + + "pipe2(&(0x7f0000000000)={0x0, 0x0}, 0x0)\n", + + "mmap(&(0x7f0000000000)=nil, (0x1000), 0x3, 0x32, 0xffffffffffffffff, 0x0)\n" + + "sched_yield()\n" + + "pipe2(&(0x7f0000000000)={0x0, 0x0}, 0x0)\n", + }, + // Remove calls and update args. + { + "r0 = open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x1)\n" + + "sched_yield()\n" + + "read(r0, &(0x7f0000000000)=0x0, 0x1)\n" + + "sched_yield()\n", + + "sched_yield()\n" + + "read(0xffffffffffffffff, &(0x7f0000000000)=0x0, 0x1)\n" + + "sched_yield()\n", + }, + // Mutate flags. + { + "r0 = open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x1)\n" + + "sched_yield()\n" + + "read(r0, &(0x7f0000000000)=0x0, 0x1)\n" + + "sched_yield()\n", + + "r0 = open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x2)\n" + + "sched_yield()\n" + + "read(r0, &(0x7f0000000000)=0x0, 0x1)\n" + + "sched_yield()\n", + }, + // Mutate data (delete byte and update size). + { + "r0 = open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x1)\n" + + "write(r0, &(0x7f0000000000)=\"11223344\", 0x4)\n", + + "r0 = open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x1)\n" + + "write(r0, &(0x7f0000000000)=\"112244\", 0x3)\n", + }, + // Mutate data (insert byte and update size). + { + "r0 = open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x1)\n" + + "write(r0, &(0x7f0000000000)=\"1122\", 0x2)\n", + + "r0 = open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x1)\n" + + "write(r0, &(0x7f0000000000)=\"112255\", 0x3)\n", + }, + // Mutate data (change byte). + { + "r0 = open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x1)\n" + + "write(r0, &(0x7f0000000000)=\"1122\", 0x2)\n", + + "r0 = open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x1)\n" + + "write(r0, &(0x7f0000000000)=\"1155\", 0x2)\n", + }, + // Change filename. + { + "open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x1)\n" + + "r0 = open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x1)\n" + + "write(r0, &(0x7f0000000000)=\"\", 0x0)\n", + + "open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x1)\n" + + "r0 = open(&(0x7f0000001000)=\"2e2f66696c653100\", 0x22c0, 0x1)\n" + + "write(r0, &(0x7f0000000000)=\"\", 0x0)\n", + }, + // Extend an array. + { + "r0 = open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x1)\n" + + "readv(r0, &(0x7f0000000000)={{&(0x7f0000001000)=nil, 0x1}, {&(0x7f0000002000)=nil, 0x2}}, 0x2)\n", + + "mmap(&(0x7f0000000000)=nil, (0x1000), 0x3, 0x32, 0xffffffffffffffff, 0x0)\n" + + "r0 = open(&(0x7f0000001000)=\"2e2f66696c653000\", 0x22c0, 0x1)\n" + + "readv(r0, &(0x7f0000000000)={{&(0x7f0000001000)=nil, 0x1}, {&(0x7f0000002000)=nil, 0x2}, {&(0x7f0000000000)=nil, 0x3}}, 0x3)\n", + }, + } + rs, _ := initTest(t) +nextTest: + for ti, test := range tests { + p, err := Deserialize([]byte(test[0])) + if err != nil { + t.Fatalf("failed to deserialize original program: %v", err) + } + for i := 0; i < 1e6; i++ { + p1 := p.Clone() + p1.Mutate(rs, 30, nil) + data1 := p1.Serialize() + if string(data1) == test[1] { + t.Logf("test #%v: success on iter %v", ti, i) + continue nextTest + } + _ = fmt.Printf + } + t.Fatalf("failed to achieve mutation goal\noriginal:\n%s\n\ngoal:\n%s\n", test[0], test[1]) + } +} + +func TestMinimize(t *testing.T) { + tests := []struct { + orig string + callIndex int + pred func(*Prog, int) bool + result string + }{ + // Predicate always returns false, so must get the same program. + { + "mmap(&(0x7f0000000000)=nil, (0x1000), 0x3, 0x32, 0xffffffffffffffff, 0x0)\n" + + "sched_yield()\n" + + "pipe2(&(0x7f0000000000)={0x0, 0x0}, 0x0)\n", + 2, + func(p *Prog, callIndex int) bool { + if len(p.Calls) == 0 { + t.Fatalf("got an empty program") + } + if p.Calls[len(p.Calls)-1].Meta.Name != "pipe2" { + t.Fatalf("last call is removed") + } + return false + }, + "mmap(&(0x7f0000000000)=nil, (0x1000), 0x3, 0x32, 0xffffffffffffffff, 0x0)\n" + + "sched_yield()\n" + + "pipe2(&(0x7f0000000000)={0x0, 0x0}, 0x0)\n", + }, + // Remove a call. + { + "mmap(&(0x7f0000000000)=nil, (0x1000), 0x3, 0x32, 0xffffffffffffffff, 0x0)\n" + + "sched_yield()\n" + + "pipe2(&(0x7f0000000000)={0x0, 0x0}, 0x0)\n", + 2, + func(p *Prog, callIndex int) bool { + // Aim at removal of sched_yield. + return len(p.Calls) == 2 && p.Calls[0].Meta.Name == "mmap" && p.Calls[1].Meta.Name == "pipe2" + }, + "mmap(&(0x7f0000000000)=nil, (0x1000), 0x3, 0x32, 0xffffffffffffffff, 0x0)\n" + + "pipe2(&(0x7f0000000000)={0x0, 0x0}, 0x0)\n", + }, + // Remove two dependent calls. + { + "mmap(&(0x7f0000000000)=nil, (0x1000), 0x3, 0x32, 0xffffffffffffffff, 0x0)\n" + + "pipe2(&(0x7f0000000000)={0x0, 0x0}, 0x0)\n" + + "sched_yield()\n", + 2, + func(p *Prog, callIndex int) bool { + // Aim at removal of pipe2 and then mmap. + if len(p.Calls) == 2 && p.Calls[0].Meta.Name == "mmap" && p.Calls[1].Meta.Name == "sched_yield" { + return true + } + if len(p.Calls) == 1 && p.Calls[0].Meta.Name == "sched_yield" { + return true + } + return false + }, + "sched_yield()\n", + }, + // Remove a call and replace results. + { + "mmap(&(0x7f0000000000)=nil, (0x1000), 0x3, 0x32, 0xffffffffffffffff, 0x0)\n" + + "pipe2(&(0x7f0000000000)={[r0=]0x0, 0x0}, 0x0)\n" + + "write(r0, &(0x7f0000000000)=\"1155\", 0x2)\n" + + "sched_yield()\n", + 3, + func(p *Prog, callIndex int) bool { + return p.String() == "mmap-write-sched_yield" + }, + "mmap(&(0x7f0000000000)=nil, (0x1000), 0x3, 0x32, 0xffffffffffffffff, 0x0)\n" + + "write(0xffffffffffffffff, &(0x7f0000000000)=\"1155\", 0x2)\n" + + "sched_yield()\n", + }, + // Remove a call and replace results. + { + "mmap(&(0x7f0000000000)=nil, (0x1000), 0x3, 0x32, 0xffffffffffffffff, 0x0)\n" + + "r0=open(&(0x7f0000000000)=\"1155\", 0x0, 0x0)\n" + + "write(r0, &(0x7f0000000000)=\"1155\", 0x2)\n" + + "sched_yield()\n", + 3, + func(p *Prog, callIndex int) bool { + return p.String() == "mmap-write-sched_yield" + }, + "mmap(&(0x7f0000000000)=nil, (0x1000), 0x3, 0x32, 0xffffffffffffffff, 0x0)\n" + + "write(0xffffffffffffffff, &(0x7f0000000000)=\"1155\", 0x2)\n" + + "sched_yield()\n", + }, + } + for ti, test := range tests { + p, err := Deserialize([]byte(test.orig)) + if err != nil { + t.Fatalf("failed to deserialize original program: %v", err) + } + p1, _ := Minimize(p, test.callIndex, test.pred) + res := p1.Serialize() + if string(res) != test.result { + t.Fatalf("minimization produced wrong result #%v\norig:\n%v\nexpect:\n%v\ngot:\n%v\n", + ti, test.orig, test.result, string(res)) + } + } +} + +func TestMinimizeRandom(t *testing.T) { + rs, iters := initTest(t) + for i := 0; i < iters; i++ { + p := Generate(rs, 10, nil) + Minimize(p, len(p.Calls)-1, func(p1 *Prog, callIndex int) bool { + if err := p1.validate(); err != nil { + t.Fatalf("invalid program: %v", err) + } + return false + }) + Minimize(p, len(p.Calls)-1, func(p1 *Prog, callIndex int) bool { + if err := p1.validate(); err != nil { + t.Fatalf("invalid program: %v", err) + } + return true + }) + } +} diff --git a/prog/prog.go b/prog/prog.go new file mode 100644 index 000000000..019873d99 --- /dev/null +++ b/prog/prog.go @@ -0,0 +1,143 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package prog + +import ( + "github.com/google/syzkaller/sys" +) + +type Prog struct { + Calls []*Call +} + +type Call struct { + Meta *sys.Call + Args []*Arg + Ret *Arg +} + +type Arg struct { + Call *Call + Type sys.Type + Kind ArgKind + Dir ArgDir + Val uintptr // value of ArgConst + AddrPage uintptr // page index for ArgPointer address, page count for ArgPageSize + AddrOffset int // page offset for ArgPointer address + Data []byte // data of ArgData + Inner []*Arg // subargs of ArgGroup + Res *Arg // target of ArgResult, pointee for ArgPointer + Uses map[*Arg]bool // this arg is used by those ArgResult args + OpDiv uintptr // divide result for ArgResult (executed before OpAdd) + OpAdd uintptr // add to result for ArgResult +} + +type ArgKind int + +const ( + ArgConst ArgKind = iota + ArgResult + ArgPointer // even if these are always constant (for reproducibility), we use a separate type because they are represented in an abstract (base+page+offset) form + ArgPageSize // same as ArgPointer but base is not added, so it represents "lengths" in pages + ArgData + ArgGroup // logical group of args (struct or array) + ArgReturn // fake value denoting syscall return value +) + +type ArgDir sys.Dir + +const ( + DirIn = ArgDir(sys.DirIn) + DirOut = ArgDir(sys.DirOut) + DirInOut = ArgDir(sys.DirInOut) +) + +func (a *Arg) Size(typ sys.Type) uintptr { + switch typ1 := typ.(type) { + case sys.IntType: + return typ1.TypeSize + case sys.LenType: + return typ1.TypeSize + case sys.FlagsType: + return typ1.TypeSize + case sys.FileoffType: + return typ1.TypeSize + case sys.ResourceType: + return typ1.Size() + case sys.VmaType: + return ptrSize + case sys.FilenameType: + return uintptr(len(a.Data)) + case sys.PtrType: + return ptrSize + case sys.StructType: + var size uintptr + for i, f := range typ1.Fields { + size += a.Inner[i].Size(f) + } + return size + case sys.ArrayType: + if len(a.Inner) == 0 { + return 0 + } + return uintptr(len(a.Inner)) * a.Inner[0].Size(typ1.Type) + case sys.BufferType: + return uintptr(len(a.Data)) + default: + panic("unknown arg type") + } +} + +func constArg(v uintptr) *Arg { + return &Arg{Kind: ArgConst, Val: v} +} + +func resultArg(r *Arg) *Arg { + arg := &Arg{Kind: ArgResult, Res: r} + if r.Uses == nil { + r.Uses = make(map[*Arg]bool) + } + if r.Uses[arg] { + panic("already used") + } + r.Uses[arg] = true + return arg +} + +func dataArg(data []byte) *Arg { + return &Arg{Kind: ArgData, Data: append([]byte{}, data...)} +} + +func pointerArg(page uintptr, off int, obj *Arg) *Arg { + return &Arg{Kind: ArgPointer, AddrPage: page, AddrOffset: off, Res: obj} +} + +func pageSizeArg(npages uintptr, off int) *Arg { + return &Arg{Kind: ArgPageSize, AddrPage: npages, AddrOffset: off} +} + +func groupArg(inner []*Arg) *Arg { + return &Arg{Kind: ArgGroup, Inner: inner} +} + +func returnArg() *Arg { + return &Arg{Kind: ArgReturn, Dir: DirOut} +} + +func (p *Prog) insertBefore(c *Call, calls []*Call) { + idx := 0 + for ; idx < len(p.Calls); idx++ { + if p.Calls[idx] == c { + break + } + } + var newCalls []*Call + newCalls = append(newCalls, p.Calls[:idx]...) + newCalls = append(newCalls, calls...) + if idx < len(p.Calls) { + newCalls = append(newCalls, p.Calls[idx]) + newCalls = append(newCalls, p.Calls[idx+1:]...) + } + p.Calls = newCalls +} diff --git a/prog/prog_test.go b/prog/prog_test.go new file mode 100644 index 000000000..e4c956c74 --- /dev/null +++ b/prog/prog_test.go @@ -0,0 +1,77 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package prog + +import ( + "bytes" + "fmt" + "math/rand" + "testing" + "time" +) + +func initTest(t *testing.T) (rand.Source, int) { + iters := 10000 + if testing.Short() { + iters = 100 + } + seed := int64(time.Now().UnixNano()) + rs := rand.NewSource(seed) + t.Logf("seed=%v", seed) + return rs, iters +} + +func TestGeneration(t *testing.T) { + rs, iters := initTest(t) + for i := 0; i < iters; i++ { + p := Generate(rs, 20, nil) + hasGettime, hasSelect := false, false + for _, c := range p.Calls { + if c.Meta.Name == "clock_gettime" { + hasGettime = true + } + if c.Meta.Name == "select" { + hasSelect = true + } + } + if hasGettime && hasSelect { + fmt.Printf("%s\n\n", p.WriteCSource()) + } + } +} + +func TestSerialize(t *testing.T) { + rs, iters := initTest(t) + for i := 0; i < iters; i++ { + p := Generate(rs, 10, nil) + data := p.Serialize() + p1, err := Deserialize(data) + if err != nil { + t.Fatalf("failed to deserialize program: %v\n%s", err, data) + } + data1 := p1.Serialize() + if len(p.Calls) != len(p1.Calls) { + t.Fatalf("different number of calls") + } + if !bytes.Equal(data, data1) { + t.Fatalf("program changed after serialize/deserialize\noriginal:\n%s\n\nnew:\n%s\n", data, data1) + } + } +} + +func TestSerializeForExec(t *testing.T) { + rs, iters := initTest(t) + for i := 0; i < iters; i++ { + p := Generate(rs, 10, nil) + p.SerializeForExec() + } +} + +func TestSerializeC(t *testing.T) { + rs, iters := initTest(t) + for i := 0; i < iters; i++ { + p := Generate(rs, 10, nil) + p.WriteCSource() + } +} diff --git a/prog/rand.go b/prog/rand.go new file mode 100644 index 000000000..18c79bb9d --- /dev/null +++ b/prog/rand.go @@ -0,0 +1,638 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package prog + +import ( + "bytes" + "encoding/binary" + "fmt" + "math/rand" + + "github.com/google/syzkaller/sys" +) + +type randGen struct { + *rand.Rand + createDepth int +} + +func newRand(rs rand.Source) *randGen { + return &randGen{rand.New(rs), 0} +} + +func (r *randGen) rand(n int) uintptr { + return uintptr(r.Intn(n)) +} + +func (r *randGen) bin() bool { + return r.Intn(2) == 0 +} + +func (r *randGen) oneOf(n int) bool { + return r.Intn(n) == 0 +} + +func (r *randGen) rand64() uintptr { + v := uintptr(r.Int63()) + if r.bin() { + v |= 1 << 63 + } + return v +} + +func (r *randGen) randInt() uintptr { + v := r.rand64() + r.choose( + 100, func() { v %= 10 }, + 10, func() { v %= 256 }, + 10, func() { v %= 4 << 10 }, + 10, func() { v %= 64 << 10 }, + 1, func() { v %= 1 << 31 }, + 1, func() {}, + ) + r.choose( + 10, func() {}, + 1, func() { v = uintptr(-int(v)) }, + ) + return v +} + +func (r *randGen) randBufLen() (n uintptr) { + r.choose( + 1, func() { n = 0 }, + 50, func() { n = r.rand(256) }, + 5, func() { n = 4 << 10 }, + ) + return +} + +func (r *randGen) randPageCount() (n uintptr) { + r.choose( + 100, func() { n = r.rand(4) + 1 }, + 5, func() { n = r.rand(20) + 1 }, + 1, func() { n = (r.rand(3) + 1) * 1024 }, + ) + return +} + +func (r *randGen) flags(vv []uintptr) uintptr { + var v uintptr + r.choose( + 10, func() { v = 0 }, + 10, func() { v = vv[r.rand(len(vv))] }, + 90, func() { + for stop := false; !stop; stop = r.bin() { + v |= vv[r.rand(len(vv))] + } + }, + 1, func() { v = r.rand64() }, + ) + return v +} + +func (r *randGen) filename(s *state) string { + // TODO: support procfs and sysfs + dir := "." + if r.oneOf(5) && len(s.files) != 0 { + files := make([]string, 0, len(s.files)) + for f := range s.files { + files = append(files, f) + } + dir = files[r.Intn(len(files))] + if len(dir) > 0 && dir[len(dir)-1] == 0 { + dir = dir[:len(dir)-1] + } + } + if len(s.files) == 0 || r.oneOf(10) { + // Generate a new name. + for i := 0; ; i++ { + f := fmt.Sprintf("%v/file%v\x00", dir, i) + if !s.files[f] { + return f + } + } + } + files := make([]string, 0, len(s.files)) + for f := range s.files { + files = append(files, f) + } + return files[r.Intn(len(files))] +} + +var sockFamilies = []uint16{AF_LOCAL, AF_INET, AF_INET6, AF_IPX, AF_NETLINK, AF_X25, AF_AX25, AF_ATMPVC, AF_APPLETALK, AF_PACKET} + +func (r *randGen) sockaddr(s *state) []byte { + fa := sockFamilies[r.Intn(len(sockFamilies))] + port := 13269 + uint16(r.Intn(20)) + buf := new(bytes.Buffer) + binary.Write(buf, binary.LittleEndian, fa) // this is actually host byte order + switch fa { + case AF_LOCAL: + buf.WriteString(r.filename(s)) + case AF_INET: + binary.Write(buf, binary.BigEndian, port) + binary.Write(buf, binary.BigEndian, uint32(127<<24+0<<16+0<<8+1)) + case AF_INET6: + binary.Write(buf, binary.BigEndian, port) + binary.Write(buf, binary.BigEndian, uint32(r.Int63())) // flow info + binary.Write(buf, binary.BigEndian, uint64(0)) // addr: loopback + binary.Write(buf, binary.BigEndian, uint64(1)) // addr: loopback + binary.Write(buf, binary.BigEndian, uint32(r.Int63())) // scope id + case AF_IPX: + case AF_NETLINK: + case AF_X25: + case AF_AX25: + case AF_ATMPVC: + case AF_APPLETALK: + case AF_PACKET: + binary.Write(buf, binary.BigEndian, uint16(0)) // Physical-layer protocol + binary.Write(buf, binary.BigEndian, uint32(0)) // Interface number + binary.Write(buf, binary.BigEndian, uint16(0)) // ARP hardware type + binary.Write(buf, binary.BigEndian, uint8(0)) // Packet type + binary.Write(buf, binary.BigEndian, uint8(0)) // Length of address + binary.Write(buf, binary.BigEndian, uint64(0)) // Physical-layer address + default: + panic("unknown socket domain") + } + if r.oneOf(2) { + buf.Write(make([]byte, 128-len(buf.Bytes()))) + } + data := buf.Bytes() + if r.oneOf(100) { + data = data[:r.Intn(len(data))] + } + return data +} + +func (r *randGen) randString(s *state) []byte { + if len(s.strings) != 0 && r.bin() { + // Return an existing string. + strings := make([]string, 0, len(s.strings)) + for s := range s.strings { + strings = append(strings, s) + } + return []byte(strings[r.Intn(len(strings))]) + } + dict := []string{"user", "keyring", "trusted", "system", "security", "selinux", + "posix_acl_access", "mime_type", "md5sum", "nodev", "self", "sysfs", "rootfs", + "ramfs", "bdev", "proc", "cgroup", "cpuset", "tmpfs", "devtmpfs", "debugfs", + "securityfs", "sockfs", "pipefs", "anon_inodefs", "devpts", "ext3", "ext2", + "ext4", "hugetlbfs", "vfat", "ecryptfs", "fuseblk", "fuse", "fusectl", "pstore", + "mqueue", "rpc_pipefs", "nfs", "nfs4", "nfsd", "binfmt_misc", "autofs", "xfs", + "jfs", "msdos", "ntfs", "minix", "hfs", "hfsplus", "qnx4", "ufs", "btrfs"} + punct := []byte{'!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '+', '\\', + '/', ':', '.', ',', '-', '\'', '[', ']', '{', '}'} + buf := new(bytes.Buffer) + for !r.oneOf(4) { + r.choose( + 10, func() { buf.WriteString(dict[r.Intn(len(dict))]) }, + 10, func() { buf.Write([]byte{punct[r.Intn(len(punct))]}) }, + 1, func() { buf.Write([]byte{byte(r.Intn(256))}) }, + ) + } + if !r.oneOf(100) { + buf.Write([]byte{0}) + } + return buf.Bytes() +} + +func (r *randGen) timespec(s *state, usec bool) (arg *Arg, calls []*Call) { + // We need to generate timespec/timeval that are either (1) definitely in the past, + // or (2) definitely in unreachable fututre, or (3) few ms ahead of now. + // Note timespec/timeval can be absolute or relative to now. + r.choose( + 1, func() { + // now for relative, past for absolute + arg = groupArg([]*Arg{constArg(0), constArg(0)}) + }, + 1, func() { + // few ms ahead for relative, past for absolute + nsec := uintptr(10 * 1e6) + if usec { + nsec /= 1e3 + } + arg = groupArg([]*Arg{constArg(0), constArg(nsec)}) + }, + 1, func() { + // unreachable fututre for both relative and absolute + arg = groupArg([]*Arg{constArg(2e9), constArg(0)}) + }, + 1, func() { + // few ms ahead for absolute + tp := groupArg([]*Arg{constArg(0), constArg(0)}) + var tpaddr *Arg + tpaddr, calls = r.addr(s, 2*ptrSize, tp) + gettime := &Call{ + Meta: sys.CallMap["clock_gettime"], + Args: []*Arg{ + constArg(CLOCK_REALTIME), + tpaddr, + }, + } + calls = append(calls, gettime) + sec := resultArg(tp.Inner[0]) + nsec := resultArg(tp.Inner[1]) + if usec { + nsec.OpDiv = 1e3 + nsec.OpAdd = 10 * 1e3 + } else { + nsec.OpAdd = 10 * 1e6 + } + arg = groupArg([]*Arg{sec, nsec}) + }, + ) + return +} + +func (r *randGen) addr1(s *state, size uintptr, data *Arg) (*Arg, []*Call) { + npages := (size + pageSize - 1) / pageSize + if r.oneOf(10) { + return r.randPageAddr(s, npages, data), nil + } + for i := uintptr(0); i < maxPages-npages; i++ { + free := true + for j := uintptr(0); j < npages; j++ { + if s.pages[i+j] { + free = false + break + } + } + if !free { + continue + } + c := &Call{ + Meta: sys.CallMap["mmap"], + Args: []*Arg{ + pointerArg(i, 0, nil), + pageSizeArg(npages, 0), + constArg(PROT_READ | PROT_WRITE), + constArg(MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED), + constArg(sys.InvalidFD), + constArg(0), + }, + } + return pointerArg(i, 0, data), []*Call{c} + } + return r.randPageAddr(s, npages, data), nil +} + +func (r *randGen) addr(s *state, size uintptr, data *Arg) (*Arg, []*Call) { + arg, calls := r.addr1(s, size, data) + if arg.Kind != ArgPointer { + panic("bad") + } + // Patch offset of the address. + r.choose( + 1, func() {}, + 1, func() { arg.AddrOffset = -int(size) }, + 1, func() { + if size > 0 { + arg.AddrOffset = -r.Intn(int(size)) + } + }, + 1, func() { arg.AddrOffset = r.Intn(pageSize) }, + ) + return arg, calls +} + +func (r *randGen) randPageAddr(s *state, npages uintptr, data *Arg) *Arg { + var starts []uintptr + for i := uintptr(0); i < maxPages-npages; i++ { + busy := true + for j := uintptr(0); j < npages; j++ { + if !s.pages[i+j] { + busy = false + break + } + } + // TODO: it does not need to be completely busy, + // for example, mmap addr arg can be new memory. + if !busy { + continue + } + starts = append(starts, i) + } + if len(starts) != 0 { + return pointerArg(starts[r.rand(len(starts))], 0, data) + } else { + return pointerArg(r.rand(int(maxPages-npages)), 0, data) + } +} + +func (r *randGen) createResource(s *state, res sys.ResourceType) (arg *Arg, calls []*Call) { + if r.createDepth > 2 { + special := res.SpecialValues() + return constArg(special[r.Intn(len(special))]), nil + } + r.createDepth++ + defer func() { r.createDepth-- }() + + sk := res.Subkind + if r.oneOf(50) { + // Spoof resource subkind. + all := res.SubKinds() + sk = all[r.Intn(len(all))] + } + // Find calls that produce the necessary resources. + var metas []*sys.Call + // Recurse into arguments to see if there is an out/inout arg of necessary type. + var checkArg func(typ sys.Type, dir ArgDir) bool + checkArg = func(typ sys.Type, dir ArgDir) bool { + if resarg, ok := typ.(sys.ResourceType); ok && dir != DirIn && resarg.Kind == res.Kind && + (resarg.Subkind == sk || resarg.Subkind == sys.ResAny || sk == sys.ResAny) { + return true + } + switch typ1 := typ.(type) { + case sys.ArrayType: + if checkArg(typ1.Type, dir) { + return true + } + case sys.StructType: + for _, fld := range typ1.Fields { + if checkArg(fld, dir) { + return true + } + } + case sys.PtrType: + if checkArg(typ1.Type, ArgDir(typ1.Dir)) { + return true + } + } + return false + } + for _, meta := range s.enabledCalls { + ok := false + for _, arg := range meta.Args { + if checkArg(arg, DirIn) { + ok = true + break + } + } + if !ok && meta.Ret != nil && checkArg(meta.Ret, DirOut) { + ok = true + } + if ok { + metas = append(metas, meta) + } + } + if len(metas) == 0 { + if len(s.enabledCalls) != len(sys.Calls) { + // We used only a subset of all syscalls, + // so we legitimately may not be able to create the resource. + return constArg(res.Default()), nil + } + panic(fmt.Sprintf("can't create resource %v/%v", res.Kind, sk)) + } + + // Now we have a set of candidate calls that can create the necessary resource. + for i := 0; i < 1e3; i++ { + // Generate one of them. + meta := metas[r.Intn(len(metas))] + calls := r.generateParticularCall(s, meta) + assignTypeAndDir(calls[len(calls)-1]) + s1 := newState(s.enabledCalls) + s1.analyze(calls[len(calls)-1]) + // Now see if we have what we want. + var allres []*Arg + for sk1, ress := range s1.resources[res.Kind] { + if sk1 == sys.ResAny || sk == sys.ResAny || sk1 == sk { + allres = append(allres, ress...) + } + } + if len(allres) != 0 { + // Bingo! + arg := resultArg(allres[r.Intn(len(allres))]) + return arg, calls + } + switch meta.Name { + case "getgroups": + // Returns groups in an array. + default: + panic(fmt.Sprintf("unexpected call failed to create a resource %v/%v: %v", res.Kind, sk, meta.Name)) + } + // Discard unsuccessful calls. + for _, c := range calls { + foreachArg(c, func(arg, _ *Arg, _ *[]*Arg) { + if arg.Kind == ArgResult { + delete(arg.Res.Uses, arg) + } + }) + } + } + // Generally we can loop several times, e.g. when we choose a call that returns + // the resource in an array, but then generateArg generated that array of zero length. + // But we must succeed eventually. + panic("failed to create a resource") +} + +func (r *randGen) choose(args ...interface{}) { + if len(args) == 0 || len(args)%2 != 0 { + panic("bad number of args to choose") + } + n := len(args) / 2 + weights := make([]int, n) + funcs := make([]func(), n) + total := 0 + for i := 0; i < n; i++ { + weights[i] = total + args[i*2].(int) + funcs[i] = args[i*2+1].(func()) + total = weights[i] + } + x := r.Intn(total) + for i, w := range weights { + if x < w { + funcs[i]() + return + } + } + panic("choose is broken") +} + +func (r *randGen) generateCall(s *state) []*Call { + meta := s.enabledCalls[r.rand(len(s.enabledCalls))] + return r.generateParticularCall(s, meta) +} + +func (r *randGen) generateParticularCall(s *state, meta *sys.Call) (calls []*Call) { + c := &Call{Meta: meta} + c.Args, calls = r.generateArgs(s, meta.Args, DirIn) + calls = append(calls, c) + for _, c1 := range calls { + assignTypeAndDir(c1) + sanitizeCall(c1) + } + return calls +} + +func (r *randGen) generateArgs(s *state, types []sys.Type, dir ArgDir) ([]*Arg, []*Call) { + var calls []*Call + args := make([]*Arg, len(types)) + sizes := make(map[string]*Arg) + // Pass 1: generate all args except size arguments. + for i, typ := range types { + if _, ok := typ.(sys.LenType); ok { + continue + } + arg, size, calls1 := r.generateArg(s, typ, dir, sizes) + args[i] = arg + calls = append(calls, calls1...) + if size != nil { + sizes[typ.Name()] = size + } + } + // Pass 2: fill in size arguments. + for i, typ := range types { + if a, ok := typ.(sys.LenType); ok { + size := sizes[a.Buf] + if size == nil { + panic(fmt.Sprintf("no size for %v[%v] (%+v)", a.Name(), a.Buf, sizes)) + } + args[i] = size + } + } + return args, calls +} + +func (r *randGen) generateArg(s *state, typ sys.Type, dir ArgDir, sizes map[string]*Arg) (arg, size *Arg, calls []*Call) { + if dir == DirOut { + // No need to generate something interesting for output scalar arguments. + // But we still need to generate the argument itself so that it can be referenced + // in subsequent calls. For the same reason we do generate pointer/array/struct + // output arguments (their elements can be referenced in subsequent calls). + switch typ.(type) { + case sys.IntType, sys.FlagsType, sys.FileoffType, sys.ResourceType: + return constArg(0), nil, nil + } + } + + if typ.Optional() && r.oneOf(10) { + if _, ok := typ.(sys.BufferType); ok { + panic("impossible") // parent PtrType must be Optional instead + } + return constArg(typ.Default()), constArg(0), nil + } + + switch a := typ.(type) { + case sys.ResourceType: + r.choose( + 1, func() { + special := a.SpecialValues() + arg = constArg(special[r.Intn(len(special))]) + }, + 90, func() { + // Get an existing resource. + if ress := s.resources[a.Kind]; ress != nil { + allres := ress[a.Subkind] + allres = append(allres, ress[sys.ResAny]...) + if a.Subkind == sys.ResAny || r.oneOf(10) { + for _, v := range ress { + allres = append(allres, v...) + } + } + if len(allres) != 0 { + // TODO: negative PIDs mean process group, + // we should be able to negate an existing PID. + arg = resultArg(allres[r.Intn(len(allres))]) + } + } + if arg == nil { + arg, calls = r.createResource(s, a) + } + }, + 10, func() { + // Create a new resource. + arg, calls = r.createResource(s, a) + }, + ) + return arg, nil, calls + case sys.FileoffType: + // TODO: can do better + var arg *Arg + r.choose( + 90, func() { arg = constArg(0) }, + 10, func() { arg = constArg(r.rand(100)) }, + 1, func() { arg = constArg(r.randInt()) }, + ) + return arg, nil, nil + case sys.BufferType: + switch a.Kind { + case sys.BufferBlob: + sz := r.randBufLen() + if dir == DirOut { + return nil, constArg(sz), nil + } + data := make([]byte, sz) + for i := range data { + data[i] = byte(r.Intn(256)) + } + return dataArg(data), constArg(sz), nil + case sys.BufferString: + data := r.randString(s) + return dataArg(data), constArg(uintptr(len(data))), nil + case sys.BufferSockaddr: + data := r.sockaddr(s) + if dir == DirOut { + return nil, constArg(uintptr(len(data))), nil + } + return dataArg(data), constArg(uintptr(len(data))), nil + default: + panic("unknown buffer kind") + } + case sys.VmaType: + npages := r.randPageCount() + arg := r.randPageAddr(s, npages, nil) + return arg, pageSizeArg(npages, 0), nil + case sys.FlagsType: + return constArg(r.flags(a.Vals)), nil, nil + case sys.IntType: + v := r.randInt() + if a.Limit != 0 && !r.oneOf(100) { + v %= a.Limit + } + return constArg(v), nil, nil + case sys.FilenameType: + filename := r.filename(s) + return dataArg([]byte(filename)), nil, nil + case sys.ArrayType: + count := r.rand(6) + var inner []*Arg + var calls []*Call + for i := uintptr(0); i < count; i++ { + arg1, _, calls1 := r.generateArg(s, a.Type, dir, nil) + inner = append(inner, arg1) + calls = append(calls, calls1...) + } + return groupArg(inner), constArg(count), calls + case sys.StructType: + if dir != DirOut && (a.Name() == "timespec" || a.Name() == "timeval") { + usec := a.Name() == "timeval" + arg, calls = r.timespec(s, usec) + return arg, nil, calls + } + args, calls := r.generateArgs(s, a.Fields, dir) + return groupArg(args), nil, calls + case sys.PtrType: + inner, size, calls := r.generateArg(s, a.Type, ArgDir(a.Dir), sizes) + if ArgDir(a.Dir) == DirOut && inner == nil { + // No data, but we should have got size. + arg, calls1 := r.addr(s, size.Val, nil) + calls = append(calls, calls1...) + return arg, size, calls + } + if size == nil { + size = constArg(inner.Size(a.Type)) + } + arg, calls1 := r.addr(s, inner.Size(a.Type), inner) + calls = append(calls, calls1...) + return arg, size, calls + case sys.LenType: + if sizes == nil || sizes[a.Buf] == nil { + fmt.Printf("name=%v buf=%v sizes=%+v\n", a.Name(), a.Buf, sizes) + panic("me no generate len") + } + return sizes[a.Name()], nil, nil + default: + panic("unknown argument type") + } +} diff --git a/prog/validation.go b/prog/validation.go new file mode 100644 index 000000000..097848bed --- /dev/null +++ b/prog/validation.go @@ -0,0 +1,168 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package prog + +import ( + "fmt" + + "github.com/google/syzkaller/sys" +) + +type validCtx struct { + args map[*Arg]bool + uses map[*Arg]*Arg +} + +func (p *Prog) validate() error { + ctx := &validCtx{make(map[*Arg]bool), make(map[*Arg]*Arg)} + for _, c := range p.Calls { + if err := c.validate(ctx); err != nil { + + fmt.Printf("PROGRAM:\n") + for _, c := range p.Calls { + fmt.Printf(" %v: %+v %p\n", c.Meta.Name, c.Args, c.Ret) + } + + return err + } + } + for u, orig := range ctx.uses { + if !ctx.args[u] { + return fmt.Errorf("use of %+v referes to an out-of-tree arg\narg: %#v", *orig, u) + } + } + return nil +} + +func (c *Call) validate(ctx *validCtx) error { + if len(c.Args) != len(c.Meta.Args) { + return fmt.Errorf("syscall %v: wrong number of arguments, want %v, got %v", c.Meta.Name, len(c.Meta.Args), len(c.Args)) + } + var checkArg func(arg *Arg, typ sys.Type) error + checkArg = func(arg *Arg, typ sys.Type) error { + if arg == nil { + return fmt.Errorf("syscall %v: nil arg", c.Meta.Name) + } + if arg.Call != c { + return fmt.Errorf("syscall %v: arg has wrong call, call=%p, arg=%+v", c.Meta.Name, c, *arg) + } + if ctx.args[arg] { + return fmt.Errorf("syscall %v: arg is referenced several times in the tree", c.Meta.Name) + } + ctx.args[arg] = true + for u := range arg.Uses { + ctx.uses[u] = arg + } + if arg.Type == nil { + return fmt.Errorf("syscall %v: no type", c.Meta.Name) + } + if arg.Type.Name() != typ.Name() { + return fmt.Errorf("syscall %v: arg '%v' type mismatch", c.Meta.Name, typ.Name()) + } + if arg.Dir == DirOut { + if arg.Val != 0 || arg.AddrPage != 0 || arg.AddrOffset != 0 || len(arg.Data) != 0 { + return fmt.Errorf("syscall %v: output arg '%v' has data", c.Meta.Name, typ.Name()) + } + } + switch arg.Type.(type) { + case sys.ResourceType: + switch arg.Kind { + case ArgResult: + case ArgReturn: + case ArgConst: + if arg.Dir == DirOut && arg.Val != 0 { + return fmt.Errorf("syscall %v: out resource arg '%v' has bad const value %v", c.Meta.Name, typ.Name(), arg.Val) + } + default: + return fmt.Errorf("syscall %v: fd arg '%v' has bad kind %v", c.Meta.Name, typ.Name(), arg.Kind) + } + case sys.FilenameType: + switch arg.Kind { + case ArgData: + default: + return fmt.Errorf("syscall %v: filename arg '%v' has bad kind %v", c.Meta.Name, typ.Name(), arg.Kind) + } + } + switch arg.Kind { + case ArgConst: + case ArgResult: + if arg.Res == nil { + return fmt.Errorf("syscall %v: result arg '%v' has no reference", c.Meta.Name, typ.Name()) + } + if !ctx.args[arg.Res] { + return fmt.Errorf("syscall %v: result arg '%v' references out-of-tree result: %p%+v -> %v %p%+v", + c.Meta.Name, typ.Name(), arg, arg, arg.Res.Call.Meta.Name, arg.Res, arg.Res) + } + if _, ok := arg.Res.Uses[arg]; !ok { + return fmt.Errorf("syscall %v: result arg '%v' has broken link (%+v)", c.Meta.Name, typ.Name(), arg.Res.Uses) + } + case ArgPointer: + if arg.Dir != DirIn { + return fmt.Errorf("syscall %v: pointer arg '%v' has output direction", c.Meta.Name, typ.Name()) + } + switch typ1 := typ.(type) { + case sys.VmaType: + if arg.Res != nil { + return fmt.Errorf("syscall %v: vma arg '%v' has data", c.Meta.Name, typ.Name()) + } + case sys.PtrType: + if arg.Res != nil { + if err := checkArg(arg.Res, typ1.Type); err != nil { + return err + } + } + default: + return fmt.Errorf("syscall %v: pointer arg '%v' has bad meta type %+v", c.Meta.Name, typ.Name(), typ) + } + case ArgPageSize: + case ArgData: + case ArgGroup: + switch typ1 := typ.(type) { + case sys.StructType: + if len(arg.Inner) != len(typ1.Fields) { + return fmt.Errorf("syscall %v: struct arg '%v' has wrong number of fields, want %v, got %v", c.Meta.Name, typ.Name(), len(typ1.Fields), len(arg.Inner)) + } + for i, arg1 := range arg.Inner { + if err := checkArg(arg1, typ1.Fields[i]); err != nil { + return err + } + } + case sys.ArrayType: + for _, arg1 := range arg.Inner { + if err := checkArg(arg1, typ1.Type); err != nil { + return err + } + } + default: + return fmt.Errorf("syscall %v: group arg '%v' has bad underlying type %+v", c.Meta.Name, typ.Name(), typ) + } + case ArgReturn: + default: + return fmt.Errorf("syscall %v: unknown arg '%v' kind", c.Meta.Name, typ.Name()) + } + return nil + } + for i, arg := range c.Args { + if c.Ret.Kind != ArgReturn { + return fmt.Errorf("syscall %v: arg '%v' has wrong return kind", c.Meta.Name, arg.Type.Name()) + } + if err := checkArg(arg, c.Meta.Args[i]); err != nil { + return err + } + } + if c.Ret == nil { + return fmt.Errorf("syscall %v: return value is absent", c.Meta.Name) + } + if c.Ret.Kind != ArgReturn { + return fmt.Errorf("syscall %v: return value has wrong kind %v", c.Meta.Name, c.Ret.Kind) + } + if c.Meta.Ret != nil { + if err := checkArg(c.Ret, c.Meta.Ret); err != nil { + return err + } + } else if c.Ret.Type != nil { + return fmt.Errorf("syscall %v: return value has spurious type: %+v", c.Meta.Name, c.Ret.Type) + } + return nil +} -- cgit mrf-deployment