diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2025-04-08 14:27:33 +0200 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2025-04-09 10:27:41 +0000 |
| commit | 988b336c79bf2f92392015e5075e92f0148ad869 (patch) | |
| tree | 1e28f832accba910334c94ff14cf9715e9e378a5 /pkg/declextract | |
| parent | 16f995ffcf2e3469a7e464ac5d486385641df7d8 (diff) | |
tools/syz-declextract: refine arg types for syscall variants
Use scope-based dataflow analysis for syscall variants (including ioctls).
As the result we only consider code that relates to a partiuclar command/ioctl,
and can infer arguments/return types for each command/ioctl independently.
Diffstat (limited to 'pkg/declextract')
| -rw-r--r-- | pkg/declextract/declextract.go | 30 | ||||
| -rw-r--r-- | pkg/declextract/entity.go | 2 | ||||
| -rw-r--r-- | pkg/declextract/fileops.go | 22 | ||||
| -rw-r--r-- | pkg/declextract/typing.go | 118 |
4 files changed, 128 insertions, 44 deletions
diff --git a/pkg/declextract/declextract.go b/pkg/declextract/declextract.go index 479a40892..3800ad70c 100644 --- a/pkg/declextract/declextract.go +++ b/pkg/declextract/declextract.go @@ -161,20 +161,22 @@ func (ctx *context) processSyscalls() { var syscalls []*Syscall for _, call := range ctx.Syscalls { ctx.processFields(call.Args, "", false) - call.returnType = ctx.inferReturnType(call.Func, call.SourceFile) - for i, arg := range call.Args { - typ := ctx.inferArgType(call.Func, call.SourceFile, i) - refineFieldType(arg, typ, false) - } - ctx.emitSyscall(&syscalls, call, "") - for i := range call.Args { - cmds := ctx.inferCommandVariants(call.Func, call.SourceFile, i) + for varArg := range call.Args { + cmds := ctx.inferCommandVariants(call.Func, call.SourceFile, varArg) for _, cmd := range cmds { variant := *call variant.Args = slices.Clone(call.Args) - newArg := *variant.Args[i] - newArg.syzType = fmt.Sprintf("const[%v]", cmd) - variant.Args[i] = &newArg + for i, oldArg := range variant.Args { + arg := *oldArg + if i == varArg { + arg.syzType = fmt.Sprintf("const[%v]", cmd) + } else { + typ := ctx.inferArgType(call.Func, call.SourceFile, i, varArg, cmd) + refineFieldType(&arg, typ, false) + } + variant.Args[i] = &arg + } + variant.returnType = ctx.inferReturnType(call.Func, call.SourceFile, varArg, cmd) suffix := cmd if call.Func == "__do_sys_ioctl" { suffix = ctx.uniqualize("ioctl cmd", cmd) @@ -182,6 +184,12 @@ func (ctx *context) processSyscalls() { ctx.emitSyscall(&syscalls, &variant, "_"+suffix) } } + call.returnType = ctx.inferReturnType(call.Func, call.SourceFile, -1, "") + for i, arg := range call.Args { + typ := ctx.inferArgType(call.Func, call.SourceFile, i, -1, "") + refineFieldType(arg, typ, false) + } + ctx.emitSyscall(&syscalls, call, "") } ctx.Syscalls = sortAndDedupSlice(syscalls) } diff --git a/pkg/declextract/entity.go b/pkg/declextract/entity.go index 5562ff570..740530ca9 100644 --- a/pkg/declextract/entity.go +++ b/pkg/declextract/entity.go @@ -45,6 +45,8 @@ type FunctionScope struct { LOC int `json:"loc,omitempty"` Calls []string `json:"calls,omitempty"` Facts []*TypingFact `json:"facts,omitempty"` + + fn *Function } type ConstInfo struct { diff --git a/pkg/declextract/fileops.go b/pkg/declextract/fileops.go index cacdcaa9e..408ccc4fc 100644 --- a/pkg/declextract/fileops.go +++ b/pkg/declextract/fileops.go @@ -61,11 +61,15 @@ func (ctx *context) createFops(fops *FileOps, files []string) { } func (ctx *context) createIoctls(fops *FileOps, suffix, fdt string) { - const defaultArgType = "ptr[in, array[int8]]" - cmds := ctx.inferCommandVariants(fops.Ioctl, fops.SourceFile, 1) + const ( + cmdArg = 1 + argArg = 2 + defaultArgType = "ptr[in, array[int8]]" + ) + cmds := ctx.inferCommandVariants(fops.Ioctl, fops.SourceFile, cmdArg) if len(cmds) == 0 { - retType := ctx.inferReturnType(fops.Ioctl, fops.SourceFile) - argType := ctx.inferArgType(fops.Ioctl, fops.SourceFile, 2) + retType := ctx.inferReturnType(fops.Ioctl, fops.SourceFile, -1, "") + argType := ctx.inferArgType(fops.Ioctl, fops.SourceFile, argArg, -1, "") if argType == "" { argType = defaultArgType } @@ -80,10 +84,16 @@ func (ctx *context) createIoctls(fops *FileOps, suffix, fdt string) { Type: typ, } argType = ctx.fieldType(f, nil, "", false) + } else { + argType = ctx.inferArgType(fops.Ioctl, fops.SourceFile, argArg, cmdArg, cmd) + if argType == "" { + argType = defaultArgType + } } + retType := ctx.inferReturnType(fops.Ioctl, fops.SourceFile, cmdArg, cmd) name := ctx.uniqualize("ioctl cmd", cmd) - ctx.fmt("ioctl%v_%v(fd %v, cmd const[%v], arg %v)\n", - autoSuffix, name, fdt, cmd, argType) + ctx.fmt("ioctl%v_%v(fd %v, cmd const[%v], arg %v) %v\n", + autoSuffix, name, fdt, cmd, argType, retType) } } diff --git a/pkg/declextract/typing.go b/pkg/declextract/typing.go index f29f8e950..3de53ee62 100644 --- a/pkg/declextract/typing.go +++ b/pkg/declextract/typing.go @@ -34,10 +34,7 @@ import ( // - Infer that pointers are file names (they should flow to some known function for path resolution). // - Use SSA analysis to track flow via local variables better. Potentiall we can just rename on every next use // and ignore backwards edges (it's unlikely that backwards edges are required for type inference). -// - Infer ioctl commands in transitively called functions using data flow. // - Infer file_operations associated with an fd by tracking flow to alloc_file_pseudo and friends. -// - Add context-sensitivity at least on switched arguments (ioctl commands). -// - Infer other switched arguments besides ioctl commands. // - Infer netlink arg types by tracking flow from genl_info::attrs[ATTR_FOO]. // - Infer simple constraints on arguments, e.g. "if (arg != 0) return -EINVAL". // - Use kernel typedefs for typing (e.g. pid_t). We can use them for uapi structs, but also for kernel @@ -48,6 +45,10 @@ import ( // For example, these cases lead to false inference of fd type for returned value: // https://elixir.bootlin.com/linux/v6.13-rc2/source/net/core/sock.c#L1870 // https://elixir.bootlin.com/linux/v6.13-rc2/source/net/socket.c#L1742 +// - Use const[0] for unused arguments. If an arg is unused, or only flows to functions where it's unused, +// we can consider it as unused. +// - Detect common patterns for "must be 0" or "must be const" arguments, e.g.: +// if (flags != 0) return -EINVAL; var ( // Refines types based on data flows... @@ -96,7 +97,7 @@ type typingNode struct { id string fn *Function arg int - flows [2]map[*typingNode]bool + flows [2]map[*typingNode][]*FunctionScope } const ( @@ -107,14 +108,16 @@ const ( func (ctx *context) processTypingFacts() { for _, fn := range ctx.Functions { for _, scope := range fn.Scopes { + scope.fn = fn for _, fact := range scope.Facts { src := ctx.canonicalNode(fn, fact.Src) dst := ctx.canonicalNode(fn, fact.Dst) if src == nil || dst == nil { continue } - src.flows[flowTo][dst] = true - dst.flows[flowFrom][src] = true + + src.flows[flowTo][dst] = append(src.flows[flowTo][dst], scope) + dst.flows[flowFrom][src] = append(dst.flows[flowFrom][src], scope) } } } @@ -156,7 +159,7 @@ func (ctx *context) canonicalNode(fn *Function, ent *TypingEntity) *typingNode { arg: arg, } for i := range n.flows { - n.flows[i] = make(map[*typingNode]bool) + n.flows[i] = make(map[*typingNode][]*FunctionScope) } facts[id] = n return n @@ -179,35 +182,43 @@ func (ent *TypingEntity) ID(fn *Function) (string, string) { } } -func (ctx *context) inferReturnType(name, file string) string { - return ctx.inferFuncNode(name, file, "ret") +func (ctx *context) inferReturnType(name, file string, scopeArg int, scopeVal string) string { + return ctx.inferFuncNode(name, file, "ret", scopeArg, scopeVal) +} + +func (ctx *context) inferArgType(name, file string, arg, scopeArg int, scopeVal string) string { + return ctx.inferFuncNode(name, file, fmt.Sprintf("arg%v", arg), scopeArg, scopeVal) } -func (ctx *context) inferArgType(name, file string, arg int) string { - return ctx.inferFuncNode(name, file, fmt.Sprintf("arg%v", arg)) +type fnArg struct { + fn *Function + arg int } -func (ctx *context) inferFuncNode(name, file, node string) string { +func (ctx *context) inferFuncNode(name, file, node string, scopeArg int, scopeVal string) string { fn := ctx.findFunc(name, file) if fn == nil { return "" } - return ctx.inferNodeType(fn.facts[node], fmt.Sprintf("%v %v", name, node)) + scopeFnArgs := ctx.inferArgFlow(fnArg{fn, scopeArg}) + return ctx.inferNodeType(fn.facts[node], scopeFnArgs, scopeVal, fmt.Sprintf("%v %v", name, node)) } func (ctx *context) inferFieldType(structName, field string) string { name := fmt.Sprintf("%v.%v", structName, field) - return ctx.inferNodeType(ctx.facts[name], name) + return ctx.inferNodeType(ctx.facts[name], nil, "", name) } -func (ctx *context) inferNodeType(n *typingNode, what string) string { +func (ctx *context) inferNodeType(n *typingNode, scopeFnArgs map[fnArg]bool, scopeVal, what string) string { if n == nil { return "" } ic := &inferContext{ - visited: make(map[*typingNode]bool), - flowType: flowFrom, - maxDepth: maxTraversalDepth, + scopeFnArgs: scopeFnArgs, + scopeVal: scopeVal, + visited: make(map[*typingNode]bool), + flowType: flowFrom, + maxDepth: maxTraversalDepth, } ic.walk(n) ic.flowType = flowTo @@ -220,13 +231,15 @@ func (ctx *context) inferNodeType(n *typingNode, what string) string { } type inferContext struct { - path []*typingNode - visited map[*typingNode]bool - result string - resultPath []*typingNode - resultFlow int - flowType int - maxDepth int + path []*typingNode + visited map[*typingNode]bool + scopeFnArgs map[fnArg]bool + scopeVal string + result string + resultPath []*typingNode + resultFlow int + flowType int + maxDepth int } func (ic *inferContext) walk(n *typingNode) { @@ -246,13 +259,39 @@ func (ic *inferContext) walk(n *typingNode) { } } if len(ic.path) < ic.maxDepth { - for e := range n.flows[ic.flowType] { - ic.walk(e) + for e, scopes := range n.flows[ic.flowType] { + if ic.relevantScope(scopes) { + ic.walk(e) + } } } ic.path = ic.path[:len(ic.path)-1] } +func (ic *inferContext) relevantScope(scopes []*FunctionScope) bool { + if ic.scopeFnArgs == nil { + // We are not doing scope-limited walk, so all scopes are relevant. + return true + } + for _, scope := range scopes { + if scope.Arg == -1 { + // Always use global scope. + return true + } + if !ic.scopeFnArgs[fnArg{scope.fn, scope.Arg}] { + // The scope argument is not related to the current scope. + return true + } + // For the scope argument, check that it has the right value. + for _, val := range scope.Values { + if val == ic.scopeVal { + return true + } + } + } + return false +} + func refineFieldType(f *Field, typ string, preserveSize bool) { // If our manual heuristics have figured out a more precise fd subtype, // don't replace it with generic fd. @@ -319,3 +358,28 @@ func (ctx *context) walkCommandVariants(n *typingNode, variants *[]string, visit ctx.walkCommandVariants(e, variants, visited, depth+1) } } + +// inferArgFlow returns transitive closure of all function arguments that the given argument flows to. +func (ctx *context) inferArgFlow(arg fnArg) map[fnArg]bool { + n := arg.fn.facts[fmt.Sprintf("arg%v", arg.arg)] + if n == nil { + return nil + } + fnArgs := make(map[fnArg]bool) + visited := make(map[*typingNode]bool) + ctx.walkArgFlow(n, fnArgs, visited, 0) + return fnArgs +} + +func (ctx *context) walkArgFlow(n *typingNode, fnArgs map[fnArg]bool, visited map[*typingNode]bool, depth int) { + if visited[n] || depth >= 10 { + return + } + visited[n] = true + if n.arg >= 0 { + fnArgs[fnArg{n.fn, n.arg}] = true + } + for e := range n.flows[flowTo] { + ctx.walkArgFlow(e, fnArgs, visited, depth+1) + } +} |
