aboutsummaryrefslogtreecommitdiffstats
path: root/pkg
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2024-12-11 16:49:01 +0100
committerDmitry Vyukov <dvyukov@google.com>2024-12-13 14:42:28 +0000
commitef0cd4a7bc26b206a7a5af18beed1589c388a204 (patch)
tree854f3b8475f091f6af07e52a88a033b7652e2da1 /pkg
parenta35f0e6cafe5705ddc9f527bb6cfe297384021ef (diff)
tools/syz-declextract: extract info about all functions
Extract info about all functions, and compute total LOC for each interface. For now only static calls are considered, this doesn't handle indirect calls yet. This is just a groundwork for more complex callgraph/dataflow analysis.
Diffstat (limited to 'pkg')
-rw-r--r--pkg/declextract/declextract.go8
-rw-r--r--pkg/declextract/entity.go17
-rw-r--r--pkg/declextract/interface.go71
3 files changed, 96 insertions, 0 deletions
diff --git a/pkg/declextract/declextract.go b/pkg/declextract/declextract.go
index 30caeaa2d..4edb6c867 100644
--- a/pkg/declextract/declextract.go
+++ b/pkg/declextract/declextract.go
@@ -7,6 +7,7 @@ import (
"bytes"
"errors"
"fmt"
+ "os"
"slices"
"strings"
@@ -19,8 +20,10 @@ func Run(out *Output, probe *ifaceprobe.Info, syscallRename map[string][]string)
probe: probe,
syscallRename: syscallRename,
structs: make(map[string]*Struct),
+ funcs: make(map[string]*Function),
uniqualizer: make(map[string]int),
}
+ ctx.processFunctions()
ctx.processIncludes()
ctx.processEnums()
ctx.processStructs()
@@ -37,6 +40,7 @@ type context struct {
probe *ifaceprobe.Info
syscallRename map[string][]string // syscall function -> syscall names
structs map[string]*Struct
+ funcs map[string]*Function
uniqualizer map[string]int
interfaces []*Interface
descriptions *bytes.Buffer
@@ -47,6 +51,10 @@ func (ctx *context) error(msg string, args ...any) {
ctx.errs = append(ctx.errs, fmt.Errorf(msg, args...))
}
+func (ctx *context) warn(msg string, args ...any) {
+ fmt.Fprintf(os.Stderr, msg+"\n", args...)
+}
+
func (ctx *context) processIncludes() {
// These additional includes must be at the top, because other kernel headers
// are broken and won't compile without these additional ones included first.
diff --git a/pkg/declextract/entity.go b/pkg/declextract/entity.go
index 7cdd5a73a..ba45cc51c 100644
--- a/pkg/declextract/entity.go
+++ b/pkg/declextract/entity.go
@@ -11,6 +11,7 @@ import (
)
type Output struct {
+ Functions []*Function `json:"functions,omitempty"`
Includes []string `json:"includes,omitempty"`
Defines []*Define `json:"defines,omitempty"`
Enums []*Enum `json:"enums,omitempty"`
@@ -22,6 +23,17 @@ type Output struct {
NetlinkPolicies []*NetlinkPolicy `json:"netlink_policies,omitempty"`
}
+type Function struct {
+ Name string `json:"name,omitempty"`
+ File string `json:"file,omitempty"`
+ IsStatic bool `json:"is_static,omitempty"`
+ LOC int `json:"loc,omitempty"`
+ Calls []string `json:"calls,omitempty"`
+
+ callers int
+ calls []*Function
+}
+
type Define struct {
Name string `json:"name,omitempty"`
Value string `json:"value,omitempty"`
@@ -147,6 +159,7 @@ type BufferType struct {
}
func (out *Output) Merge(other *Output) {
+ out.Functions = append(out.Functions, other.Functions...)
out.Includes = append(out.Includes, other.Includes...)
out.Defines = append(out.Defines, other.Defines...)
out.Enums = append(out.Enums, other.Enums...)
@@ -159,6 +172,7 @@ func (out *Output) Merge(other *Output) {
}
func (out *Output) SortAndDedup() {
+ out.Functions = sortAndDedupSlice(out.Functions)
out.Includes = sortAndDedupSlice(out.Includes)
out.Defines = sortAndDedupSlice(out.Defines)
out.Enums = sortAndDedupSlice(out.Enums)
@@ -173,6 +187,9 @@ func (out *Output) SortAndDedup() {
// SetSoureFile attaches the source file to the entities that need it.
// The clang tool could do it, but it looks easier to do it here.
func (out *Output) SetSourceFile(file string, updatePath func(string) string) {
+ for _, fn := range out.Functions {
+ fn.File = updatePath(fn.File)
+ }
for i, inc := range out.Includes {
out.Includes[i] = updatePath(inc)
}
diff --git a/pkg/declextract/interface.go b/pkg/declextract/interface.go
index dfb223d16..7abce44fb 100644
--- a/pkg/declextract/interface.go
+++ b/pkg/declextract/interface.go
@@ -5,6 +5,7 @@ package declextract
import (
"slices"
+ "strings"
)
type Interface struct {
@@ -17,6 +18,7 @@ type Interface struct {
Subsystems []string
ManualDescriptions bool
AutoDescriptions bool
+ ReachableLOC int
}
const (
@@ -36,6 +38,7 @@ func (ctx *context) noteInterface(iface *Interface) {
func (ctx *context) finishInterfaces() {
for _, iface := range ctx.interfaces {
+ iface.ReachableLOC = ctx.reachableLOC(iface.Func, iface.Files[0])
slices.Sort(iface.Files)
iface.Files = slices.Compact(iface.Files)
if iface.Access == "" {
@@ -44,3 +47,71 @@ func (ctx *context) finishInterfaces() {
}
ctx.interfaces = sortAndDedupSlice(ctx.interfaces)
}
+
+func (ctx *context) processFunctions() {
+ for _, fn := range ctx.Functions {
+ ctx.funcs[fn.File+fn.Name] = fn
+ // Strictly speaking there may be several different static functions in different headers,
+ // but we ignore such possibility for now.
+ if !fn.IsStatic || strings.HasSuffix(fn.File, "*.h") {
+ ctx.funcs[fn.Name] = fn
+ }
+ }
+ nocallers := 0
+ for _, fn := range ctx.Functions {
+ for _, callee := range fn.Calls {
+ called := ctx.findFunc(callee, fn.File)
+ if called == nil || called == fn {
+ continue
+ }
+ fn.calls = append(fn.calls, called)
+ called.callers++
+ }
+ fn.Calls = nil
+ if len(fn.calls) == 0 {
+ nocallers++
+ }
+ }
+}
+
+func (ctx *context) reachableLOC(name, file string) int {
+ fn := ctx.findFunc(name, file)
+ if fn == nil {
+ ctx.warn("can't find function %v in called in %v", name, file)
+ return 0
+ }
+ reachable := make(map[*Function]bool)
+ ctx.collectRachable(fn, reachable)
+ loc := 0
+ for fn := range reachable {
+ loc += fn.LOC
+ }
+ return loc
+}
+
+func (ctx *context) collectRachable(fn *Function, reachable map[*Function]bool) {
+ // Ignore very common functions when computing reachability for complexity analysis.
+ // Counting kmalloc/printk against each caller is not useful (they have ~10K calls).
+ // There are also subsystem common functions (e.g. functions called in some parts of fs/net).
+ // The current threshold is somewhat arbitrary and is based on the number of callers in syzbot kernel:
+ // 6 callers - 2272 functions
+ // 5 callers - 3468 functions
+ // 4 callers - 6295 functions
+ // 3 callers - 16527 functions
+ const commonFuncThreshold = 5
+
+ reachable[fn] = true
+ for _, callee := range fn.calls {
+ if reachable[callee] || callee.callers >= commonFuncThreshold {
+ continue
+ }
+ ctx.collectRachable(callee, reachable)
+ }
+}
+
+func (ctx *context) findFunc(name, file string) *Function {
+ if fn := ctx.funcs[file+name]; fn != nil {
+ return fn
+ }
+ return ctx.funcs[name]
+}