From 0b3a8483a73330b481a0d9fef2e276eb06465bc0 Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Fri, 18 Jul 2025 12:21:39 +0200 Subject: all: determine patched symbols for focused fuzzing Hash the code section of the individual symbols from vmlinux.o and use it to determine the functions that changed their bodies between the base and the patched build. If the number of affected symbols is reasonable (<5%), fuzz it with the highest priority. --- pkg/build/linux.go | 52 +++++++++++++++++++++++++++++++++ pkg/manager/diff.go | 48 +++++++++++++++++++++++++----- pkg/manager/diff_test.go | 47 +++++++++++++++++++++++++++-- syz-cluster/workflow/build-step/main.go | 31 ++++++++++++++++++-- syz-cluster/workflow/fuzz-step/main.go | 47 +++++++++++++++++++++++++---- tools/syz-diff/diff.go | 2 +- 6 files changed, 206 insertions(+), 21 deletions(-) diff --git a/pkg/build/linux.go b/pkg/build/linux.go index bb5de4f28..e19eec60e 100644 --- a/pkg/build/linux.go +++ b/pkg/build/linux.go @@ -10,6 +10,7 @@ import ( "debug/elf" "encoding/hex" "fmt" + "io" "os" "path" "path/filepath" @@ -257,6 +258,57 @@ func queryLinuxCompiler(kernelDir string) (string, error) { return string(result[1]), nil } +// ElfSymbolHashes returns a map of sha256 hashes per a symbol contained in the elf file. +// It's best to call it on vmlinux.o since PCs in the binary code are not patched yet. +func ElfSymbolHashes(bin string) (map[string]string, error) { + file, err := elf.Open(bin) + if err != nil { + return nil, err + } + defer file.Close() + + symbols, err := file.Symbols() + if err != nil { + return nil, err + } + + textSection := file.Section(".text") + if textSection == nil { + return nil, fmt.Errorf(".text section not found") + } + + sectionReader, ok := textSection.Open().(io.ReaderAt) + if !ok { + return nil, fmt.Errorf(".text section reader does not support ReadAt") + } + + hashes := make(map[string]string) + for _, s := range symbols { + if elf.ST_TYPE(s.Info) != elf.STT_FUNC || s.Size == 0 { + continue + } + + if s.Section >= elf.SHN_LORESERVE || int(s.Section) >= len(file.Sections) || + file.Sections[s.Section] != textSection { + continue + } + + offset := s.Value - textSection.Addr + if offset+s.Size > textSection.Size { + continue + } + + code := make([]byte, s.Size) + _, err := sectionReader.ReadAt(code, int64(offset)) + if err != nil { + continue + } + hash := sha256.Sum256(code) + hashes[s.Name] = hex.EncodeToString(hash[:]) + } + return hashes, nil +} + // elfBinarySignature calculates signature of an elf binary aiming at runtime behavior // (text/data, debug info is ignored). func elfBinarySignature(bin string, tracer debugtracer.DebugTracer) (string, error) { diff --git a/pkg/manager/diff.go b/pkg/manager/diff.go index 64d0af444..aa640515d 100644 --- a/pkg/manager/diff.go +++ b/pkg/manager/diff.go @@ -297,9 +297,9 @@ func (dc *diffContext) monitorPatchedCoverage(ctx context.Context) error { return nil } focusAreaStats := dc.new.progsPerArea() - if focusAreaStats[modifiedArea]+focusAreaStats[includesArea] > 0 { - log.Logf(0, "fuzzer has reached the modified code (%d + %d), continuing fuzzing", - focusAreaStats[modifiedArea], focusAreaStats[includesArea]) + if focusAreaStats[symbolsArea]+focusAreaStats[filesArea]+focusAreaStats[includesArea] > 0 { + log.Logf(0, "fuzzer has reached the modified code (%d + %d + %d), continuing fuzzing", + focusAreaStats[symbolsArea], focusAreaStats[filesArea], focusAreaStats[includesArea]) return nil } log.Logf(0, "fuzzer has not reached the modified code in %s, aborting", @@ -721,18 +721,32 @@ func (rr *reproRunner) Run(ctx context.Context, r *repro.Result) { } const ( - modifiedArea = "modified" + symbolsArea = "symbols" + filesArea = "files" includesArea = "included" ) -func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte) { +func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte, baseHashes, patchedHashes map[string]string) { + funcs := modifiedSymbols(baseHashes, patchedHashes) + if len(funcs) > 0 { + log.Logf(0, "adding modified_functions to focus areas: %q", funcs) + cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas, + mgrconfig.FocusArea{ + Name: symbolsArea, + Filter: mgrconfig.CovFilterCfg{ + Functions: funcs, + }, + Weight: 6.0, + }) + } + direct, transitive := affectedFiles(cfg, gitPatches) if len(direct) > 0 { sort.Strings(direct) - log.Logf(0, "adding directly modified files to focus_order: %q", direct) + log.Logf(0, "adding directly modified files to focus areas: %q", direct) cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas, mgrconfig.FocusArea{ - Name: modifiedArea, + Name: filesArea, Filter: mgrconfig.CovFilterCfg{ Files: direct, }, @@ -742,7 +756,7 @@ func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte) { if len(transitive) > 0 { sort.Strings(transitive) - log.Logf(0, "adding transitively affected to focus_order: %q", transitive) + log.Logf(0, "adding transitively affected to focus areas: %q", transitive) cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas, mgrconfig.FocusArea{ Name: includesArea, @@ -808,3 +822,21 @@ func affectedFiles(cfg *mgrconfig.Config, gitPatches [][]byte) (direct, transiti } return } + +// If there are too many different symbols, they are no longer specific enough. +// Don't use them to focus the fuzzer. +const modifiedSymbolThreshold = 0.05 + +func modifiedSymbols(baseHashes, patchedHashes map[string]string) []string { + var ret []string + for name, hash := range patchedHashes { + if baseHash, ok := baseHashes[name]; !ok || baseHash != hash { + ret = append(ret, name) + if float64(len(ret)) > float64(len(patchedHashes))*modifiedSymbolThreshold { + return nil + } + } + } + sort.Strings(ret) + return ret +} diff --git a/pkg/manager/diff_test.go b/pkg/manager/diff_test.go index f2408f13b..9e27dc288 100644 --- a/pkg/manager/diff_test.go +++ b/pkg/manager/diff_test.go @@ -4,6 +4,7 @@ package manager import ( + "fmt" "testing" "github.com/google/syzkaller/pkg/mgrconfig" @@ -24,6 +25,10 @@ int main(void) { }`, "c.c": `int main(void) { }`, })) + baseHashes, patchedHashes := dummySymbolHashes(), dummySymbolHashes() + baseHashes["function"] = "hash1" + patchedHashes["function"] = "hash2" + PatchFocusAreas(cfg, [][]byte{ []byte(`diff --git a/b.c b/b.c index 103167d..fbf7a68 100644 @@ -34,7 +39,7 @@ index 103167d..fbf7a68 100644 \ No newline at end of file +int main(void) { return 1; } \ No newline at end of file`), - // Also, emulate an update to te header.h. + // Also, emulate an update to header.h. []byte(`diff --git a/header.h b/header.h index 103167d..fbf7a68 100644 --- a/header.h @@ -44,11 +49,18 @@ index 103167d..fbf7a68 100644 \ No newline at end of file +Test2 \ No newline at end of file`), - }) + }, baseHashes, patchedHashes) assert.Equal(t, []mgrconfig.FocusArea{ { - Name: modifiedArea, + Name: symbolsArea, + Filter: mgrconfig.CovFilterCfg{ + Functions: []string{"function"}, + }, + Weight: 6.0, + }, + { + Name: filesArea, Filter: mgrconfig.CovFilterCfg{ Files: []string{"b.c", "header.h"}, }, @@ -66,3 +78,32 @@ index 103167d..fbf7a68 100644 }, }, cfg.Experimental.FocusAreas) } + +func dummySymbolHashes() map[string]string { + ret := map[string]string{} + for i := 0; i < 100; i++ { + ret[fmt.Sprint(i)] = fmt.Sprint(i) + } + return ret +} + +func TestModifiedSymbols(t *testing.T) { + t.Run("too many changed", func(t *testing.T) { + ret := modifiedSymbols(map[string]string{ + "functionA": "hash1", + "functionB": "hash2", + }, map[string]string{ + "functionA": "hash1", + "functionB": "hash is not hash2", + }) + assert.Empty(t, ret) + }) + t.Run("less than threshold", func(t *testing.T) { + base, patched := dummySymbolHashes(), dummySymbolHashes() + base["function"] = "hash1" + patched["function"] = "hash2" + base["function2"] = "hash1" + patched["function2"] = "hash2" + assert.Equal(t, []string{"function", "function2"}, modifiedSymbols(base, patched)) + }) +} diff --git a/syz-cluster/workflow/build-step/main.go b/syz-cluster/workflow/build-step/main.go index 1abf5bc4a..3797681ea 100644 --- a/syz-cluster/workflow/build-step/main.go +++ b/syz-cluster/workflow/build-step/main.go @@ -10,6 +10,10 @@ import ( "errors" "flag" "fmt" + "log" + "os" + "path/filepath" + "github.com/google/syzkaller/pkg/build" "github.com/google/syzkaller/pkg/debugtracer" "github.com/google/syzkaller/pkg/osutil" @@ -18,9 +22,6 @@ import ( "github.com/google/syzkaller/syz-cluster/pkg/api" "github.com/google/syzkaller/syz-cluster/pkg/app" "github.com/google/syzkaller/syz-cluster/pkg/triage" - "log" - "os" - "path/filepath" ) var ( @@ -261,8 +262,14 @@ func buildKernel(tracer debugtracer.DebugTracer, req *api.BuildRequest) (*BuildR return nil, err } tracer.Log("build finished successfully") + + err = saveSymbolHashes(tracer) + if err != nil { + tracer.Log("failed to save symbol hashes: %s", err) + } // Note: Output directory has the following structure: // |-- image + // |-- symbol_hashes.json // |-- kernel // |-- kernel.config // `-- obj @@ -270,6 +277,24 @@ func buildKernel(tracer debugtracer.DebugTracer, req *api.BuildRequest) (*BuildR return ret, nil } +func saveSymbolHashes(tracer debugtracer.DebugTracer) error { + hashes, err := build.ElfSymbolHashes(filepath.Join(*flagRepository, "vmlinux.o")) + if err != nil { + return fmt.Errorf("failed to query symbol hashes: %w", err) + } + tracer.Log("extracted hashes for %d symbols", len(hashes)) + file, err := os.Create(filepath.Join(*flagOutput, "symbol_hashes.json")) + if err != nil { + return fmt.Errorf("failed to open symbol_hashes.json: %w", err) + } + defer file.Close() + err = json.NewEncoder(file).Encode(hashes) + if err != nil { + return fmt.Errorf("failed to serialize: %w", err) + } + return nil +} + func ensureFlags(args ...string) { for i := 0; i+1 < len(args); i += 2 { if args[i] == "" { diff --git a/syz-cluster/workflow/fuzz-step/main.go b/syz-cluster/workflow/fuzz-step/main.go index 6996ae8a0..f7ed5ef39 100644 --- a/syz-cluster/workflow/fuzz-step/main.go +++ b/syz-cluster/workflow/fuzz-step/main.go @@ -10,6 +10,12 @@ import ( "errors" "flag" "fmt" + "io" + "net/http" + "os" + "path/filepath" + "time" + "github.com/google/syzkaller/pkg/config" "github.com/google/syzkaller/pkg/log" "github.com/google/syzkaller/pkg/manager" @@ -19,11 +25,6 @@ import ( "github.com/google/syzkaller/syz-cluster/pkg/api" "github.com/google/syzkaller/syz-cluster/pkg/app" "golang.org/x/sync/errgroup" - "io" - "net/http" - "os" - "path/filepath" - "time" ) var ( @@ -93,7 +94,12 @@ func run(baseCtx context.Context, client *api.Client, timeout time.Duration, if err != nil { return fmt.Errorf("failed to load configs: %w", err) } - manager.PatchFocusAreas(patched, series.PatchBodies()) + + baseSymbols, patchedSymbols, err := readSymbolHashes() + if err != nil { + app.Errorf("failed to read symbol hashes: %v", err) + } + manager.PatchFocusAreas(patched, series.PatchBodies(), baseSymbols, patchedSymbols) if *flagCorpusURL != "" { err := downloadCorpus(baseCtx, patched.Workdir, *flagCorpusURL) @@ -282,6 +288,35 @@ func reportFinding(ctx context.Context, client *api.Client, bug *manager.UniqueB return client.UploadFinding(ctx, finding) } +func readSymbolHashes() (base, patched map[string]string, err error) { + // These are saved by the build step. + base, err = readJSONMap("/base/symbol_hashes.json") + if err != nil { + return nil, nil, fmt.Errorf("failed to read base hashes: %w", err) + } + patched, err = readJSONMap("/patched/symbol_hashes.json") + if err != nil { + return nil, nil, fmt.Errorf("failed to read patched hashes: %w", err) + } + log.Logf(0, "extracted %d symbol hashes for base and %d for patched", len(base), len(patched)) + return +} + +func readJSONMap(file string) (map[string]string, error) { + f, err := os.Open(file) + if err != nil { + return nil, err + } + defer f.Close() + + var data map[string]string + err = json.NewDecoder(f).Decode(&data) + if err != nil { + return nil, err + } + return data, nil +} + func compressArtifacts(dir string) (io.Reader, error) { var buf bytes.Buffer lw := &LimitedWriter{ diff --git a/tools/syz-diff/diff.go b/tools/syz-diff/diff.go index c77728d4a..3e7230fe0 100644 --- a/tools/syz-diff/diff.go +++ b/tools/syz-diff/diff.go @@ -43,7 +43,7 @@ func main() { if err != nil { log.Fatal(err) } - manager.PatchFocusAreas(newCfg, [][]byte{data}) + manager.PatchFocusAreas(newCfg, [][]byte{data}, nil, nil) } ctx := vm.ShutdownCtx() -- cgit mrf-deployment