From 0b3a8483a73330b481a0d9fef2e276eb06465bc0 Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Fri, 18 Jul 2025 12:21:39 +0200 Subject: all: determine patched symbols for focused fuzzing Hash the code section of the individual symbols from vmlinux.o and use it to determine the functions that changed their bodies between the base and the patched build. If the number of affected symbols is reasonable (<5%), fuzz it with the highest priority. --- pkg/build/linux.go | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ pkg/manager/diff.go | 48 ++++++++++++++++++++++++++++++++++++-------- pkg/manager/diff_test.go | 47 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 136 insertions(+), 11 deletions(-) (limited to 'pkg') diff --git a/pkg/build/linux.go b/pkg/build/linux.go index bb5de4f28..e19eec60e 100644 --- a/pkg/build/linux.go +++ b/pkg/build/linux.go @@ -10,6 +10,7 @@ import ( "debug/elf" "encoding/hex" "fmt" + "io" "os" "path" "path/filepath" @@ -257,6 +258,57 @@ func queryLinuxCompiler(kernelDir string) (string, error) { return string(result[1]), nil } +// ElfSymbolHashes returns a map of sha256 hashes per a symbol contained in the elf file. +// It's best to call it on vmlinux.o since PCs in the binary code are not patched yet. +func ElfSymbolHashes(bin string) (map[string]string, error) { + file, err := elf.Open(bin) + if err != nil { + return nil, err + } + defer file.Close() + + symbols, err := file.Symbols() + if err != nil { + return nil, err + } + + textSection := file.Section(".text") + if textSection == nil { + return nil, fmt.Errorf(".text section not found") + } + + sectionReader, ok := textSection.Open().(io.ReaderAt) + if !ok { + return nil, fmt.Errorf(".text section reader does not support ReadAt") + } + + hashes := make(map[string]string) + for _, s := range symbols { + if elf.ST_TYPE(s.Info) != elf.STT_FUNC || s.Size == 0 { + continue + } + + if s.Section >= elf.SHN_LORESERVE || int(s.Section) >= len(file.Sections) || + file.Sections[s.Section] != textSection { + continue + } + + offset := s.Value - textSection.Addr + if offset+s.Size > textSection.Size { + continue + } + + code := make([]byte, s.Size) + _, err := sectionReader.ReadAt(code, int64(offset)) + if err != nil { + continue + } + hash := sha256.Sum256(code) + hashes[s.Name] = hex.EncodeToString(hash[:]) + } + return hashes, nil +} + // elfBinarySignature calculates signature of an elf binary aiming at runtime behavior // (text/data, debug info is ignored). func elfBinarySignature(bin string, tracer debugtracer.DebugTracer) (string, error) { diff --git a/pkg/manager/diff.go b/pkg/manager/diff.go index 64d0af444..aa640515d 100644 --- a/pkg/manager/diff.go +++ b/pkg/manager/diff.go @@ -297,9 +297,9 @@ func (dc *diffContext) monitorPatchedCoverage(ctx context.Context) error { return nil } focusAreaStats := dc.new.progsPerArea() - if focusAreaStats[modifiedArea]+focusAreaStats[includesArea] > 0 { - log.Logf(0, "fuzzer has reached the modified code (%d + %d), continuing fuzzing", - focusAreaStats[modifiedArea], focusAreaStats[includesArea]) + if focusAreaStats[symbolsArea]+focusAreaStats[filesArea]+focusAreaStats[includesArea] > 0 { + log.Logf(0, "fuzzer has reached the modified code (%d + %d + %d), continuing fuzzing", + focusAreaStats[symbolsArea], focusAreaStats[filesArea], focusAreaStats[includesArea]) return nil } log.Logf(0, "fuzzer has not reached the modified code in %s, aborting", @@ -721,18 +721,32 @@ func (rr *reproRunner) Run(ctx context.Context, r *repro.Result) { } const ( - modifiedArea = "modified" + symbolsArea = "symbols" + filesArea = "files" includesArea = "included" ) -func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte) { +func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte, baseHashes, patchedHashes map[string]string) { + funcs := modifiedSymbols(baseHashes, patchedHashes) + if len(funcs) > 0 { + log.Logf(0, "adding modified_functions to focus areas: %q", funcs) + cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas, + mgrconfig.FocusArea{ + Name: symbolsArea, + Filter: mgrconfig.CovFilterCfg{ + Functions: funcs, + }, + Weight: 6.0, + }) + } + direct, transitive := affectedFiles(cfg, gitPatches) if len(direct) > 0 { sort.Strings(direct) - log.Logf(0, "adding directly modified files to focus_order: %q", direct) + log.Logf(0, "adding directly modified files to focus areas: %q", direct) cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas, mgrconfig.FocusArea{ - Name: modifiedArea, + Name: filesArea, Filter: mgrconfig.CovFilterCfg{ Files: direct, }, @@ -742,7 +756,7 @@ func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte) { if len(transitive) > 0 { sort.Strings(transitive) - log.Logf(0, "adding transitively affected to focus_order: %q", transitive) + log.Logf(0, "adding transitively affected to focus areas: %q", transitive) cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas, mgrconfig.FocusArea{ Name: includesArea, @@ -808,3 +822,21 @@ func affectedFiles(cfg *mgrconfig.Config, gitPatches [][]byte) (direct, transiti } return } + +// If there are too many different symbols, they are no longer specific enough. +// Don't use them to focus the fuzzer. +const modifiedSymbolThreshold = 0.05 + +func modifiedSymbols(baseHashes, patchedHashes map[string]string) []string { + var ret []string + for name, hash := range patchedHashes { + if baseHash, ok := baseHashes[name]; !ok || baseHash != hash { + ret = append(ret, name) + if float64(len(ret)) > float64(len(patchedHashes))*modifiedSymbolThreshold { + return nil + } + } + } + sort.Strings(ret) + return ret +} diff --git a/pkg/manager/diff_test.go b/pkg/manager/diff_test.go index f2408f13b..9e27dc288 100644 --- a/pkg/manager/diff_test.go +++ b/pkg/manager/diff_test.go @@ -4,6 +4,7 @@ package manager import ( + "fmt" "testing" "github.com/google/syzkaller/pkg/mgrconfig" @@ -24,6 +25,10 @@ int main(void) { }`, "c.c": `int main(void) { }`, })) + baseHashes, patchedHashes := dummySymbolHashes(), dummySymbolHashes() + baseHashes["function"] = "hash1" + patchedHashes["function"] = "hash2" + PatchFocusAreas(cfg, [][]byte{ []byte(`diff --git a/b.c b/b.c index 103167d..fbf7a68 100644 @@ -34,7 +39,7 @@ index 103167d..fbf7a68 100644 \ No newline at end of file +int main(void) { return 1; } \ No newline at end of file`), - // Also, emulate an update to te header.h. + // Also, emulate an update to header.h. []byte(`diff --git a/header.h b/header.h index 103167d..fbf7a68 100644 --- a/header.h @@ -44,11 +49,18 @@ index 103167d..fbf7a68 100644 \ No newline at end of file +Test2 \ No newline at end of file`), - }) + }, baseHashes, patchedHashes) assert.Equal(t, []mgrconfig.FocusArea{ { - Name: modifiedArea, + Name: symbolsArea, + Filter: mgrconfig.CovFilterCfg{ + Functions: []string{"function"}, + }, + Weight: 6.0, + }, + { + Name: filesArea, Filter: mgrconfig.CovFilterCfg{ Files: []string{"b.c", "header.h"}, }, @@ -66,3 +78,32 @@ index 103167d..fbf7a68 100644 }, }, cfg.Experimental.FocusAreas) } + +func dummySymbolHashes() map[string]string { + ret := map[string]string{} + for i := 0; i < 100; i++ { + ret[fmt.Sprint(i)] = fmt.Sprint(i) + } + return ret +} + +func TestModifiedSymbols(t *testing.T) { + t.Run("too many changed", func(t *testing.T) { + ret := modifiedSymbols(map[string]string{ + "functionA": "hash1", + "functionB": "hash2", + }, map[string]string{ + "functionA": "hash1", + "functionB": "hash is not hash2", + }) + assert.Empty(t, ret) + }) + t.Run("less than threshold", func(t *testing.T) { + base, patched := dummySymbolHashes(), dummySymbolHashes() + base["function"] = "hash1" + patched["function"] = "hash2" + base["function2"] = "hash1" + patched["function2"] = "hash2" + assert.Equal(t, []string{"function", "function2"}, modifiedSymbols(base, patched)) + }) +} -- cgit mrf-deployment