From ef0cd4a7bc26b206a7a5af18beed1589c388a204 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 11 Dec 2024 16:49:01 +0100 Subject: tools/syz-declextract: extract info about all functions Extract info about all functions, and compute total LOC for each interface. For now only static calls are considered, this doesn't handle indirect calls yet. This is just a groundwork for more complex callgraph/dataflow analysis. --- pkg/declextract/declextract.go | 8 +++++ pkg/declextract/entity.go | 17 ++++++++++ pkg/declextract/interface.go | 71 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+) (limited to 'pkg') diff --git a/pkg/declextract/declextract.go b/pkg/declextract/declextract.go index 30caeaa2d..4edb6c867 100644 --- a/pkg/declextract/declextract.go +++ b/pkg/declextract/declextract.go @@ -7,6 +7,7 @@ import ( "bytes" "errors" "fmt" + "os" "slices" "strings" @@ -19,8 +20,10 @@ func Run(out *Output, probe *ifaceprobe.Info, syscallRename map[string][]string) probe: probe, syscallRename: syscallRename, structs: make(map[string]*Struct), + funcs: make(map[string]*Function), uniqualizer: make(map[string]int), } + ctx.processFunctions() ctx.processIncludes() ctx.processEnums() ctx.processStructs() @@ -37,6 +40,7 @@ type context struct { probe *ifaceprobe.Info syscallRename map[string][]string // syscall function -> syscall names structs map[string]*Struct + funcs map[string]*Function uniqualizer map[string]int interfaces []*Interface descriptions *bytes.Buffer @@ -47,6 +51,10 @@ func (ctx *context) error(msg string, args ...any) { ctx.errs = append(ctx.errs, fmt.Errorf(msg, args...)) } +func (ctx *context) warn(msg string, args ...any) { + fmt.Fprintf(os.Stderr, msg+"\n", args...) +} + func (ctx *context) processIncludes() { // These additional includes must be at the top, because other kernel headers // are broken and won't compile without these additional ones included first. diff --git a/pkg/declextract/entity.go b/pkg/declextract/entity.go index 7cdd5a73a..ba45cc51c 100644 --- a/pkg/declextract/entity.go +++ b/pkg/declextract/entity.go @@ -11,6 +11,7 @@ import ( ) type Output struct { + Functions []*Function `json:"functions,omitempty"` Includes []string `json:"includes,omitempty"` Defines []*Define `json:"defines,omitempty"` Enums []*Enum `json:"enums,omitempty"` @@ -22,6 +23,17 @@ type Output struct { NetlinkPolicies []*NetlinkPolicy `json:"netlink_policies,omitempty"` } +type Function struct { + Name string `json:"name,omitempty"` + File string `json:"file,omitempty"` + IsStatic bool `json:"is_static,omitempty"` + LOC int `json:"loc,omitempty"` + Calls []string `json:"calls,omitempty"` + + callers int + calls []*Function +} + type Define struct { Name string `json:"name,omitempty"` Value string `json:"value,omitempty"` @@ -147,6 +159,7 @@ type BufferType struct { } func (out *Output) Merge(other *Output) { + out.Functions = append(out.Functions, other.Functions...) out.Includes = append(out.Includes, other.Includes...) out.Defines = append(out.Defines, other.Defines...) out.Enums = append(out.Enums, other.Enums...) @@ -159,6 +172,7 @@ func (out *Output) Merge(other *Output) { } func (out *Output) SortAndDedup() { + out.Functions = sortAndDedupSlice(out.Functions) out.Includes = sortAndDedupSlice(out.Includes) out.Defines = sortAndDedupSlice(out.Defines) out.Enums = sortAndDedupSlice(out.Enums) @@ -173,6 +187,9 @@ func (out *Output) SortAndDedup() { // SetSoureFile attaches the source file to the entities that need it. // The clang tool could do it, but it looks easier to do it here. func (out *Output) SetSourceFile(file string, updatePath func(string) string) { + for _, fn := range out.Functions { + fn.File = updatePath(fn.File) + } for i, inc := range out.Includes { out.Includes[i] = updatePath(inc) } diff --git a/pkg/declextract/interface.go b/pkg/declextract/interface.go index dfb223d16..7abce44fb 100644 --- a/pkg/declextract/interface.go +++ b/pkg/declextract/interface.go @@ -5,6 +5,7 @@ package declextract import ( "slices" + "strings" ) type Interface struct { @@ -17,6 +18,7 @@ type Interface struct { Subsystems []string ManualDescriptions bool AutoDescriptions bool + ReachableLOC int } const ( @@ -36,6 +38,7 @@ func (ctx *context) noteInterface(iface *Interface) { func (ctx *context) finishInterfaces() { for _, iface := range ctx.interfaces { + iface.ReachableLOC = ctx.reachableLOC(iface.Func, iface.Files[0]) slices.Sort(iface.Files) iface.Files = slices.Compact(iface.Files) if iface.Access == "" { @@ -44,3 +47,71 @@ func (ctx *context) finishInterfaces() { } ctx.interfaces = sortAndDedupSlice(ctx.interfaces) } + +func (ctx *context) processFunctions() { + for _, fn := range ctx.Functions { + ctx.funcs[fn.File+fn.Name] = fn + // Strictly speaking there may be several different static functions in different headers, + // but we ignore such possibility for now. + if !fn.IsStatic || strings.HasSuffix(fn.File, "*.h") { + ctx.funcs[fn.Name] = fn + } + } + nocallers := 0 + for _, fn := range ctx.Functions { + for _, callee := range fn.Calls { + called := ctx.findFunc(callee, fn.File) + if called == nil || called == fn { + continue + } + fn.calls = append(fn.calls, called) + called.callers++ + } + fn.Calls = nil + if len(fn.calls) == 0 { + nocallers++ + } + } +} + +func (ctx *context) reachableLOC(name, file string) int { + fn := ctx.findFunc(name, file) + if fn == nil { + ctx.warn("can't find function %v in called in %v", name, file) + return 0 + } + reachable := make(map[*Function]bool) + ctx.collectRachable(fn, reachable) + loc := 0 + for fn := range reachable { + loc += fn.LOC + } + return loc +} + +func (ctx *context) collectRachable(fn *Function, reachable map[*Function]bool) { + // Ignore very common functions when computing reachability for complexity analysis. + // Counting kmalloc/printk against each caller is not useful (they have ~10K calls). + // There are also subsystem common functions (e.g. functions called in some parts of fs/net). + // The current threshold is somewhat arbitrary and is based on the number of callers in syzbot kernel: + // 6 callers - 2272 functions + // 5 callers - 3468 functions + // 4 callers - 6295 functions + // 3 callers - 16527 functions + const commonFuncThreshold = 5 + + reachable[fn] = true + for _, callee := range fn.calls { + if reachable[callee] || callee.callers >= commonFuncThreshold { + continue + } + ctx.collectRachable(callee, reachable) + } +} + +func (ctx *context) findFunc(name, file string) *Function { + if fn := ctx.funcs[file+name]; fn != nil { + return fn + } + return ctx.funcs[name] +} -- cgit mrf-deployment