aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/declextract
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2025-04-08 14:27:33 +0200
committerDmitry Vyukov <dvyukov@google.com>2025-04-09 10:27:41 +0000
commit988b336c79bf2f92392015e5075e92f0148ad869 (patch)
tree1e28f832accba910334c94ff14cf9715e9e378a5 /pkg/declextract
parent16f995ffcf2e3469a7e464ac5d486385641df7d8 (diff)
tools/syz-declextract: refine arg types for syscall variants
Use scope-based dataflow analysis for syscall variants (including ioctls). As the result we only consider code that relates to a partiuclar command/ioctl, and can infer arguments/return types for each command/ioctl independently.
Diffstat (limited to 'pkg/declextract')
-rw-r--r--pkg/declextract/declextract.go30
-rw-r--r--pkg/declextract/entity.go2
-rw-r--r--pkg/declextract/fileops.go22
-rw-r--r--pkg/declextract/typing.go118
4 files changed, 128 insertions, 44 deletions
diff --git a/pkg/declextract/declextract.go b/pkg/declextract/declextract.go
index 479a40892..3800ad70c 100644
--- a/pkg/declextract/declextract.go
+++ b/pkg/declextract/declextract.go
@@ -161,20 +161,22 @@ func (ctx *context) processSyscalls() {
var syscalls []*Syscall
for _, call := range ctx.Syscalls {
ctx.processFields(call.Args, "", false)
- call.returnType = ctx.inferReturnType(call.Func, call.SourceFile)
- for i, arg := range call.Args {
- typ := ctx.inferArgType(call.Func, call.SourceFile, i)
- refineFieldType(arg, typ, false)
- }
- ctx.emitSyscall(&syscalls, call, "")
- for i := range call.Args {
- cmds := ctx.inferCommandVariants(call.Func, call.SourceFile, i)
+ for varArg := range call.Args {
+ cmds := ctx.inferCommandVariants(call.Func, call.SourceFile, varArg)
for _, cmd := range cmds {
variant := *call
variant.Args = slices.Clone(call.Args)
- newArg := *variant.Args[i]
- newArg.syzType = fmt.Sprintf("const[%v]", cmd)
- variant.Args[i] = &newArg
+ for i, oldArg := range variant.Args {
+ arg := *oldArg
+ if i == varArg {
+ arg.syzType = fmt.Sprintf("const[%v]", cmd)
+ } else {
+ typ := ctx.inferArgType(call.Func, call.SourceFile, i, varArg, cmd)
+ refineFieldType(&arg, typ, false)
+ }
+ variant.Args[i] = &arg
+ }
+ variant.returnType = ctx.inferReturnType(call.Func, call.SourceFile, varArg, cmd)
suffix := cmd
if call.Func == "__do_sys_ioctl" {
suffix = ctx.uniqualize("ioctl cmd", cmd)
@@ -182,6 +184,12 @@ func (ctx *context) processSyscalls() {
ctx.emitSyscall(&syscalls, &variant, "_"+suffix)
}
}
+ call.returnType = ctx.inferReturnType(call.Func, call.SourceFile, -1, "")
+ for i, arg := range call.Args {
+ typ := ctx.inferArgType(call.Func, call.SourceFile, i, -1, "")
+ refineFieldType(arg, typ, false)
+ }
+ ctx.emitSyscall(&syscalls, call, "")
}
ctx.Syscalls = sortAndDedupSlice(syscalls)
}
diff --git a/pkg/declextract/entity.go b/pkg/declextract/entity.go
index 5562ff570..740530ca9 100644
--- a/pkg/declextract/entity.go
+++ b/pkg/declextract/entity.go
@@ -45,6 +45,8 @@ type FunctionScope struct {
LOC int `json:"loc,omitempty"`
Calls []string `json:"calls,omitempty"`
Facts []*TypingFact `json:"facts,omitempty"`
+
+ fn *Function
}
type ConstInfo struct {
diff --git a/pkg/declextract/fileops.go b/pkg/declextract/fileops.go
index cacdcaa9e..408ccc4fc 100644
--- a/pkg/declextract/fileops.go
+++ b/pkg/declextract/fileops.go
@@ -61,11 +61,15 @@ func (ctx *context) createFops(fops *FileOps, files []string) {
}
func (ctx *context) createIoctls(fops *FileOps, suffix, fdt string) {
- const defaultArgType = "ptr[in, array[int8]]"
- cmds := ctx.inferCommandVariants(fops.Ioctl, fops.SourceFile, 1)
+ const (
+ cmdArg = 1
+ argArg = 2
+ defaultArgType = "ptr[in, array[int8]]"
+ )
+ cmds := ctx.inferCommandVariants(fops.Ioctl, fops.SourceFile, cmdArg)
if len(cmds) == 0 {
- retType := ctx.inferReturnType(fops.Ioctl, fops.SourceFile)
- argType := ctx.inferArgType(fops.Ioctl, fops.SourceFile, 2)
+ retType := ctx.inferReturnType(fops.Ioctl, fops.SourceFile, -1, "")
+ argType := ctx.inferArgType(fops.Ioctl, fops.SourceFile, argArg, -1, "")
if argType == "" {
argType = defaultArgType
}
@@ -80,10 +84,16 @@ func (ctx *context) createIoctls(fops *FileOps, suffix, fdt string) {
Type: typ,
}
argType = ctx.fieldType(f, nil, "", false)
+ } else {
+ argType = ctx.inferArgType(fops.Ioctl, fops.SourceFile, argArg, cmdArg, cmd)
+ if argType == "" {
+ argType = defaultArgType
+ }
}
+ retType := ctx.inferReturnType(fops.Ioctl, fops.SourceFile, cmdArg, cmd)
name := ctx.uniqualize("ioctl cmd", cmd)
- ctx.fmt("ioctl%v_%v(fd %v, cmd const[%v], arg %v)\n",
- autoSuffix, name, fdt, cmd, argType)
+ ctx.fmt("ioctl%v_%v(fd %v, cmd const[%v], arg %v) %v\n",
+ autoSuffix, name, fdt, cmd, argType, retType)
}
}
diff --git a/pkg/declextract/typing.go b/pkg/declextract/typing.go
index f29f8e950..3de53ee62 100644
--- a/pkg/declextract/typing.go
+++ b/pkg/declextract/typing.go
@@ -34,10 +34,7 @@ import (
// - Infer that pointers are file names (they should flow to some known function for path resolution).
// - Use SSA analysis to track flow via local variables better. Potentiall we can just rename on every next use
// and ignore backwards edges (it's unlikely that backwards edges are required for type inference).
-// - Infer ioctl commands in transitively called functions using data flow.
// - Infer file_operations associated with an fd by tracking flow to alloc_file_pseudo and friends.
-// - Add context-sensitivity at least on switched arguments (ioctl commands).
-// - Infer other switched arguments besides ioctl commands.
// - Infer netlink arg types by tracking flow from genl_info::attrs[ATTR_FOO].
// - Infer simple constraints on arguments, e.g. "if (arg != 0) return -EINVAL".
// - Use kernel typedefs for typing (e.g. pid_t). We can use them for uapi structs, but also for kernel
@@ -48,6 +45,10 @@ import (
// For example, these cases lead to false inference of fd type for returned value:
// https://elixir.bootlin.com/linux/v6.13-rc2/source/net/core/sock.c#L1870
// https://elixir.bootlin.com/linux/v6.13-rc2/source/net/socket.c#L1742
+// - Use const[0] for unused arguments. If an arg is unused, or only flows to functions where it's unused,
+// we can consider it as unused.
+// - Detect common patterns for "must be 0" or "must be const" arguments, e.g.:
+// if (flags != 0) return -EINVAL;
var (
// Refines types based on data flows...
@@ -96,7 +97,7 @@ type typingNode struct {
id string
fn *Function
arg int
- flows [2]map[*typingNode]bool
+ flows [2]map[*typingNode][]*FunctionScope
}
const (
@@ -107,14 +108,16 @@ const (
func (ctx *context) processTypingFacts() {
for _, fn := range ctx.Functions {
for _, scope := range fn.Scopes {
+ scope.fn = fn
for _, fact := range scope.Facts {
src := ctx.canonicalNode(fn, fact.Src)
dst := ctx.canonicalNode(fn, fact.Dst)
if src == nil || dst == nil {
continue
}
- src.flows[flowTo][dst] = true
- dst.flows[flowFrom][src] = true
+
+ src.flows[flowTo][dst] = append(src.flows[flowTo][dst], scope)
+ dst.flows[flowFrom][src] = append(dst.flows[flowFrom][src], scope)
}
}
}
@@ -156,7 +159,7 @@ func (ctx *context) canonicalNode(fn *Function, ent *TypingEntity) *typingNode {
arg: arg,
}
for i := range n.flows {
- n.flows[i] = make(map[*typingNode]bool)
+ n.flows[i] = make(map[*typingNode][]*FunctionScope)
}
facts[id] = n
return n
@@ -179,35 +182,43 @@ func (ent *TypingEntity) ID(fn *Function) (string, string) {
}
}
-func (ctx *context) inferReturnType(name, file string) string {
- return ctx.inferFuncNode(name, file, "ret")
+func (ctx *context) inferReturnType(name, file string, scopeArg int, scopeVal string) string {
+ return ctx.inferFuncNode(name, file, "ret", scopeArg, scopeVal)
+}
+
+func (ctx *context) inferArgType(name, file string, arg, scopeArg int, scopeVal string) string {
+ return ctx.inferFuncNode(name, file, fmt.Sprintf("arg%v", arg), scopeArg, scopeVal)
}
-func (ctx *context) inferArgType(name, file string, arg int) string {
- return ctx.inferFuncNode(name, file, fmt.Sprintf("arg%v", arg))
+type fnArg struct {
+ fn *Function
+ arg int
}
-func (ctx *context) inferFuncNode(name, file, node string) string {
+func (ctx *context) inferFuncNode(name, file, node string, scopeArg int, scopeVal string) string {
fn := ctx.findFunc(name, file)
if fn == nil {
return ""
}
- return ctx.inferNodeType(fn.facts[node], fmt.Sprintf("%v %v", name, node))
+ scopeFnArgs := ctx.inferArgFlow(fnArg{fn, scopeArg})
+ return ctx.inferNodeType(fn.facts[node], scopeFnArgs, scopeVal, fmt.Sprintf("%v %v", name, node))
}
func (ctx *context) inferFieldType(structName, field string) string {
name := fmt.Sprintf("%v.%v", structName, field)
- return ctx.inferNodeType(ctx.facts[name], name)
+ return ctx.inferNodeType(ctx.facts[name], nil, "", name)
}
-func (ctx *context) inferNodeType(n *typingNode, what string) string {
+func (ctx *context) inferNodeType(n *typingNode, scopeFnArgs map[fnArg]bool, scopeVal, what string) string {
if n == nil {
return ""
}
ic := &inferContext{
- visited: make(map[*typingNode]bool),
- flowType: flowFrom,
- maxDepth: maxTraversalDepth,
+ scopeFnArgs: scopeFnArgs,
+ scopeVal: scopeVal,
+ visited: make(map[*typingNode]bool),
+ flowType: flowFrom,
+ maxDepth: maxTraversalDepth,
}
ic.walk(n)
ic.flowType = flowTo
@@ -220,13 +231,15 @@ func (ctx *context) inferNodeType(n *typingNode, what string) string {
}
type inferContext struct {
- path []*typingNode
- visited map[*typingNode]bool
- result string
- resultPath []*typingNode
- resultFlow int
- flowType int
- maxDepth int
+ path []*typingNode
+ visited map[*typingNode]bool
+ scopeFnArgs map[fnArg]bool
+ scopeVal string
+ result string
+ resultPath []*typingNode
+ resultFlow int
+ flowType int
+ maxDepth int
}
func (ic *inferContext) walk(n *typingNode) {
@@ -246,13 +259,39 @@ func (ic *inferContext) walk(n *typingNode) {
}
}
if len(ic.path) < ic.maxDepth {
- for e := range n.flows[ic.flowType] {
- ic.walk(e)
+ for e, scopes := range n.flows[ic.flowType] {
+ if ic.relevantScope(scopes) {
+ ic.walk(e)
+ }
}
}
ic.path = ic.path[:len(ic.path)-1]
}
+func (ic *inferContext) relevantScope(scopes []*FunctionScope) bool {
+ if ic.scopeFnArgs == nil {
+ // We are not doing scope-limited walk, so all scopes are relevant.
+ return true
+ }
+ for _, scope := range scopes {
+ if scope.Arg == -1 {
+ // Always use global scope.
+ return true
+ }
+ if !ic.scopeFnArgs[fnArg{scope.fn, scope.Arg}] {
+ // The scope argument is not related to the current scope.
+ return true
+ }
+ // For the scope argument, check that it has the right value.
+ for _, val := range scope.Values {
+ if val == ic.scopeVal {
+ return true
+ }
+ }
+ }
+ return false
+}
+
func refineFieldType(f *Field, typ string, preserveSize bool) {
// If our manual heuristics have figured out a more precise fd subtype,
// don't replace it with generic fd.
@@ -319,3 +358,28 @@ func (ctx *context) walkCommandVariants(n *typingNode, variants *[]string, visit
ctx.walkCommandVariants(e, variants, visited, depth+1)
}
}
+
+// inferArgFlow returns transitive closure of all function arguments that the given argument flows to.
+func (ctx *context) inferArgFlow(arg fnArg) map[fnArg]bool {
+ n := arg.fn.facts[fmt.Sprintf("arg%v", arg.arg)]
+ if n == nil {
+ return nil
+ }
+ fnArgs := make(map[fnArg]bool)
+ visited := make(map[*typingNode]bool)
+ ctx.walkArgFlow(n, fnArgs, visited, 0)
+ return fnArgs
+}
+
+func (ctx *context) walkArgFlow(n *typingNode, fnArgs map[fnArg]bool, visited map[*typingNode]bool, depth int) {
+ if visited[n] || depth >= 10 {
+ return
+ }
+ visited[n] = true
+ if n.arg >= 0 {
+ fnArgs[fnArg{n.fn, n.arg}] = true
+ }
+ for e := range n.flows[flowTo] {
+ ctx.walkArgFlow(e, fnArgs, visited, depth+1)
+ }
+}