aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/declextract
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2024-12-02 10:57:36 +0100
committerDmitry Vyukov <dvyukov@google.com>2024-12-11 15:22:17 +0000
commit59267911909f7e749367f87d62448d7daa87b1b5 (patch)
tree02bf0a5d9e6012a8b6cb237fed7789f61aca6cc2 /pkg/declextract
parentc756ba4e975097bf74b952367e2cd1a8db466c69 (diff)
tools/syz-declextract: generate file_operations descriptions
Emit descriptions for special files in /dev, /sys, /proc, and ./. pkg/declextract combines file_operations info produced by the clang tool with the dynamic probing info produced by pkg/ifaceprobe in order to produce complete descriptions for special files.
Diffstat (limited to 'pkg/declextract')
-rw-r--r--pkg/declextract/declextract.go32
-rw-r--r--pkg/declextract/fileops.go256
-rw-r--r--pkg/declextract/netlink.go11
-rw-r--r--pkg/declextract/serialization.go1
4 files changed, 287 insertions, 13 deletions
diff --git a/pkg/declextract/declextract.go b/pkg/declextract/declextract.go
index 1d650b9ae..55194e807 100644
--- a/pkg/declextract/declextract.go
+++ b/pkg/declextract/declextract.go
@@ -9,13 +9,17 @@ import (
"fmt"
"slices"
"strings"
+
+ "github.com/google/syzkaller/pkg/ifaceprobe"
)
-func Run(out *Output, syscallRename map[string][]string) ([]byte, []*Interface, error) {
+func Run(out *Output, probe *ifaceprobe.Info, syscallRename map[string][]string) ([]byte, []*Interface, error) {
ctx := &context{
Output: out,
+ probe: probe,
syscallRename: syscallRename,
structs: make(map[string]*Struct),
+ uniqualizer: make(map[string]int),
}
ctx.processIncludes()
ctx.processEnums()
@@ -30,8 +34,10 @@ func Run(out *Output, syscallRename map[string][]string) ([]byte, []*Interface,
type context struct {
*Output
+ probe *ifaceprobe.Info
syscallRename map[string][]string // syscall function -> syscall names
structs map[string]*Struct
+ uniqualizer map[string]int
interfaces []*Interface
descriptions *bytes.Buffer
errs []error
@@ -186,9 +192,9 @@ func (ctx *context) fieldTypeInt(f, counts *Field, needBase bool) string {
if f.BitWidth != 0 {
baseType += fmt.Sprintf(":%v", f.BitWidth)
}
- unusedType := fmt.Sprintf("const[0 %v]", maybeBaseType(baseType, needBase))
- if f.IsAnonymous {
- return unusedType
+ constType := fmt.Sprintf("const[%v %v]", t.MinValue, maybeBaseType(baseType, needBase))
+ if f.IsAnonymous || t.IsConst {
+ return constType
}
if t.Enum != "" {
t.Enum += autoSuffix
@@ -223,7 +229,14 @@ func (ctx *context) fieldTypeInt(f, counts *Field, needBase bool) string {
}
if strings.Contains(f.Name, "pad") || strings.Contains(f.Name, "unused") ||
strings.Contains(f.Name, "_reserved") {
- return unusedType
+ return constType
+ }
+ if t.MinValue != 0 || t.MaxValue != 0 {
+ minVal, maxVal := uint64(t.MinValue), uint64(t.MaxValue)
+ if minVal > maxVal {
+ minVal, maxVal = maxVal, minVal
+ }
+ return baseType + fmt.Sprintf("[%v:%v]", minVal, maxVal)
}
return baseType
}
@@ -375,6 +388,15 @@ func (ctx *context) bounds(name string, min, max int) string {
return ""
}
+func (ctx *context) uniqualize(typ, name string) string {
+ id := fmt.Sprintf("%v-%v", typ, name)
+ ctx.uniqualizer[id]++
+ if seq := ctx.uniqualizer[id]; seq != 1 {
+ return name + fmt.Sprint(seq)
+ }
+ return name
+}
+
const (
autoSuffix = "$auto"
todoType = "auto_todo"
diff --git a/pkg/declextract/fileops.go b/pkg/declextract/fileops.go
new file mode 100644
index 000000000..a18566647
--- /dev/null
+++ b/pkg/declextract/fileops.go
@@ -0,0 +1,256 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package declextract
+
+import (
+ "fmt"
+ "slices"
+ "strings"
+)
+
+// TODO: also emit interface entry for file_operations.
+
+func (ctx *context) serializeFileOps() {
+ fopsToFiles := ctx.mapFopsToFiles()
+ for _, fops := range ctx.FileOps {
+ files := fopsToFiles[fops]
+ if len(files) == 0 {
+ continue // each unmapped entry means some code we don't know how to cover yet
+ }
+ ctx.createFops(fops, files)
+ }
+}
+
+func (ctx *context) createFops(fops *FileOps, files []string) {
+ // If it has only open, then emit only openat that returns generic fd.
+ fdt := "fd"
+ if len(fops.ops()) > 1 || fops.Open == "" {
+ fdt = fmt.Sprintf("fd_%v", fops.Name)
+ ctx.fmt("resource %v[fd]\n", fdt)
+ }
+ suffix := autoSuffix + "_" + fops.Name
+ if len(files) == 1 {
+ ctx.fmt("openat%v(fd const[AT_FDCWD], file ptr[in, string[\"%s\"]],"+
+ " flags flags[open_flags], mode const[0]) %v\n", suffix, files[0], fdt)
+ } else {
+ // If there are too many files, split them into parts.
+ // First, compiler currently sets limit of 2000 values for string flags;
+ // second, it should provide additional prioritization signal for the fuzzer
+ // (if there are multiple openat calls, it should call them more often than a single call).
+ const partSize = 100
+ singlePart := len(files) <= partSize
+ for partID := 0; len(files) != 0; partID++ {
+ part := files[:min(100, len(files))]
+ files = files[len(part):]
+ partSuffix := ""
+ if !singlePart {
+ partSuffix = fmt.Sprint(partID)
+ }
+ fileFlags := fmt.Sprintf("%v_files%v", fops.Name, partSuffix)
+ ctx.fmt("%v = ", fileFlags)
+ for i, file := range part {
+ ctx.fmt("%v \"%v\"", comma(i), file)
+ }
+ ctx.fmt("\n")
+ ctx.fmt("openat%v%v(fd const[AT_FDCWD], file ptr[in, string[%v]],"+
+ " flags flags[open_flags], mode const[0]) %v\n",
+ suffix, partSuffix, fileFlags, fdt)
+ }
+ }
+ if fops.Read != "" {
+ ctx.fmt("read%v(fd %v, buf ptr[out, array[int8]], len bytesize[buf])\n", suffix, fdt)
+ }
+ if fops.Write != "" {
+ ctx.fmt("write%v(fd %v, buf ptr[in, array[int8]], len bytesize[buf])\n", suffix, fdt)
+ }
+ if fops.Mmap != "" {
+ ctx.fmt("mmap%v(addr vma, len len[addr], prot flags[mmap_prot],"+
+ " flags flags[mmap_flags], fd %v, offset fileoff)\n", suffix, fdt)
+ }
+ if fops.Ioctl != "" {
+ if len(fops.IoctlCmds) == 0 {
+ ctx.fmt("ioctl%v(fd %v, cmd intptr, arg ptr[in, array[int8]])\n", suffix, fdt)
+ } else {
+ for _, cmd := range sortAndDedupSlice(fops.IoctlCmds) {
+ name := ctx.uniqualize("ioctl cmd", cmd.Name)
+ f := &Field{
+ Name: strings.ToLower(cmd.Name),
+ Type: cmd.Type,
+ }
+ typ := ctx.fieldType(f, nil, "", false)
+ ctx.fmt("ioctl%v_%v(fd %v, cmd const[%v], arg %v)\n",
+ autoSuffix, name, fdt, cmd.Name, typ)
+ }
+ }
+ }
+ ctx.fmt("\n")
+}
+
+// mapFopsToFiles maps file_operations to actual file names.
+func (ctx *context) mapFopsToFiles() map[*FileOps][]string {
+ // Mapping turns out to be more of an art than science because
+ // (1) there are lots of common callback functions that present in lots of file_operations
+ // in different combinations, (2) some file operations are updated at runtime,
+ // (3) some file operations are chained at runtime and we see callbacks from several
+ // of them at the same time, (4) some callbacks are not reached (e.g. debugfs files
+ // always have write callback, but can be installed without write permission).
+
+ // uniqueFuncs hold callback functions that are present in only 1 file_operations,
+ // if such a callback is matched, it's a stronger prioritization signal for that file_operations.
+ uniqueFuncs := make(map[string]int)
+ funcToFops := make(map[string][]*FileOps)
+ for _, fops := range ctx.FileOps {
+ for _, fn := range fops.ops() {
+ funcToFops[fn] = append(funcToFops[fn], fops)
+ uniqueFuncs[fn]++
+ }
+ }
+ pcToFunc := make(map[uint64]string)
+ for _, pc := range ctx.probe.PCs {
+ pcToFunc[pc.PC] = pc.Func
+ }
+ // matchedFuncs holds functions are present in any file_operations callbacks
+ // (lots of coverage is not related to any file_operations at all).
+ matchedFuncs := make(map[string]bool)
+ // Maps file names to set of all callbacks that operations on the file has reached.
+ fileToFuncs := make(map[string]map[string]bool)
+ for _, file := range ctx.probe.Files {
+ funcs := make(map[string]bool)
+ fileToFuncs[file.Name] = funcs
+ for _, pc := range file.Cover {
+ fn := pcToFunc[pc]
+ if len(funcToFops[fn]) != 0 {
+ funcs[fn] = true
+ matchedFuncs[fn] = true
+ }
+ }
+ }
+ // This is a special entry for files that has only open callback
+ // (it does not make sense to differentiate them further).
+ generic := &FileOps{
+ Name: "generic",
+ Open: "only_open",
+ }
+ ctx.FileOps = append(ctx.FileOps, generic)
+ fopsToFiles := make(map[*FileOps][]string)
+ for _, file := range ctx.probe.Files {
+ // For each file figure out the potential file_operations that match this file best.
+ funcs := fileToFuncs[file.Name]
+ // First collect all candidates (all file_operations for which at least 1 callback was triggered).
+ candidates := make(map[*FileOps]int)
+ for fn := range funcs {
+ for _, fops := range funcToFops[fn] {
+ if fops.Open != "" && len(fops.ops()) == 1 {
+ // If it has only open, it's not very interesting
+ // (we will use generic for it below).
+ continue
+ }
+ hasUnique := false
+ for _, fn := range fops.ops() {
+ if uniqueFuncs[fn] == 1 {
+ hasUnique = true
+ }
+ }
+ // If we've triggered at least one unique callback, we take this
+ // file_operations in any case. Otherwise check if file_operations
+ // has open/ioctl that we haven't triggered.
+ // Note that it may have open/ioctl, and this is the right file_operations
+ // for the file, yet we haven't triggered them for reasons described
+ // in the beginning of the function.
+ if !hasUnique {
+ if fops.Open != "" && !funcs[fops.Open] {
+ continue
+ }
+ if fops.Ioctl != "" && !funcs[fops.Ioctl] {
+ continue
+ }
+ }
+ candidates[fops] = 0
+ }
+ }
+ if len(candidates) == 0 {
+ candidates[generic] = 0
+ }
+ // Now find the best set of candidates.
+ // There are lots of false positives due to common callback functions.
+ maxScore := 0
+ for fops := range candidates {
+ ops := fops.ops()
+ // All else being equal prefer file_operations with more callbacks defined.
+ score := len(ops)
+ for _, fn := range ops {
+ if !funcs[fn] {
+ continue
+ }
+ // Matched callbacks increase the score.
+ score += 10
+ // If we matched ioctl, bump score by a lot.
+ // We do want to emit ioctl's b/c they the only non-trivial
+ // operations we emit at the moment.
+ if fn == fops.Ioctl {
+ score += 100
+ }
+ // Unique callbacks are the strongest prioritization signal.
+ // Besides some corner cases there is no way we can reach a unique callback
+ // from a wrong file (a corner case would be in one callback calls another
+ // callback directly).
+ if uniqueFuncs[fn] == 1 {
+ score += 1000
+ }
+ }
+ candidates[fops] = score
+ maxScore = max(maxScore, score)
+ }
+ // Now, take the candidates with the highest score (there still may be several of them).
+ var best []*FileOps
+ for fops, score := range candidates {
+ if score == maxScore {
+ best = append(best, fops)
+ }
+ }
+ best = sortAndDedupSlice(best)
+ // Now, filter out some excessive file_operations.
+ // An example of an excessive case is if we have 2 file_operations with just read+write,
+ // currently we emit generic read/write operations, so we would emit completly equal
+ // descriptions for both. Ioctl commands is the only non-generic descriptions we emit now,
+ // so if a file_operations has any commands, it won't be considered excessive.
+ // Note that if we generate specialized descriptions for read/write/mmap in future,
+ // then these won't be considered excessive as well.
+ excessive := make(map[*FileOps]bool)
+ for i := 0; i < len(best); i++ {
+ for j := i + 1; j < len(best); j++ {
+ a, b := best[i], best[j]
+ if (a.Ioctl == b.Ioctl || len(a.IoctlCmds)+len(b.IoctlCmds) == 0) &&
+ (a.Read == "") == (b.Read == "") &&
+ (a.Write == "") == (b.Write == "") &&
+ (a.Mmap == "") == (b.Mmap == "") &&
+ (a.Ioctl == "") == (b.Ioctl == "") {
+ excessive[b] = true
+ }
+ }
+ }
+ // Finally record the file for the best non-excessive file_operations
+ // (there are still can be several of them).
+ for _, fops := range best {
+ if !excessive[fops] {
+ fopsToFiles[fops] = append(fopsToFiles[fops], file.Name)
+ }
+ }
+ }
+ for fops, files := range fopsToFiles {
+ slices.Sort(files)
+ fopsToFiles[fops] = files
+ }
+ return fopsToFiles
+}
+
+func (fops *FileOps) ops() []string {
+ var ops []string
+ for _, op := range []string{fops.Open, fops.Read, fops.Write, fops.Mmap, fops.Ioctl} {
+ if op != "" {
+ ops = append(ops, op)
+ }
+ }
+ return ops
+}
diff --git a/pkg/declextract/netlink.go b/pkg/declextract/netlink.go
index 47ae7c664..0adec5bc4 100644
--- a/pkg/declextract/netlink.go
+++ b/pkg/declextract/netlink.go
@@ -29,21 +29,16 @@ func (ctx *context) serializeNetlink() {
ctx.fmt("syz_genetlink_get_family_id%v_%v(name ptr[in, string[\"%v\"]],"+
" fd sock_nl_generic) genl_%v_family_id%v\n\n", autoSuffix, id, fam.Name, id, autoSuffix)
- dedup := make(map[string]int)
for _, op := range fam.Ops {
policy := voidType
if op.Policy != "" {
policy = op.Policy + autoSuffix
pq.policyUsed(op.Policy)
}
- suffix := ""
- dedup[op.Name]++
- if v := dedup[op.Name]; v != 1 {
- suffix = fmt.Sprint(v)
- }
- ctx.fmt("sendmsg%v_%v%v(fd sock_nl_generic,"+
+ name := ctx.uniqualize("netlink op", op.Name)
+ ctx.fmt("sendmsg%v_%v(fd sock_nl_generic,"+
" msg ptr[in, msghdr_%v%v[%v, %v]], f flags[send_flags])\n",
- autoSuffix, op.Name, suffix, id, autoSuffix, op.Name, policy)
+ autoSuffix, name, id, autoSuffix, op.Name, policy)
ctx.noteInterface(&Interface{
Type: IfaceNetlinkOp,
diff --git a/pkg/declextract/serialization.go b/pkg/declextract/serialization.go
index 1bc82a86b..2906ce6c2 100644
--- a/pkg/declextract/serialization.go
+++ b/pkg/declextract/serialization.go
@@ -15,6 +15,7 @@ func (ctx *context) serialize() {
ctx.serializeIncludes()
ctx.serializeEnums()
ctx.serializeSyscalls()
+ ctx.serializeFileOps()
ctx.serializeNetlink()
ctx.serializeStructs()
ctx.serializeDefines()