diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2025-07-18 12:21:39 +0200 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2025-07-22 14:49:22 +0000 |
| commit | 0b3a8483a73330b481a0d9fef2e276eb06465bc0 (patch) | |
| tree | aebd4c3a914f6c69d6d6f89c8b2e68a6c5d868af | |
| parent | 6ce0983056e7e9efff04681de67e6310b88fdc16 (diff) | |
all: determine patched symbols for focused fuzzing
Hash the code section of the individual symbols from vmlinux.o and use
it to determine the functions that changed their bodies between the base
and the patched build.
If the number of affected symbols is reasonable (<5%), fuzz it with the
highest priority.
| -rw-r--r-- | pkg/build/linux.go | 52 | ||||
| -rw-r--r-- | pkg/manager/diff.go | 48 | ||||
| -rw-r--r-- | pkg/manager/diff_test.go | 47 | ||||
| -rw-r--r-- | syz-cluster/workflow/build-step/main.go | 31 | ||||
| -rw-r--r-- | syz-cluster/workflow/fuzz-step/main.go | 47 | ||||
| -rw-r--r-- | tools/syz-diff/diff.go | 2 |
6 files changed, 206 insertions, 21 deletions
diff --git a/pkg/build/linux.go b/pkg/build/linux.go index bb5de4f28..e19eec60e 100644 --- a/pkg/build/linux.go +++ b/pkg/build/linux.go @@ -10,6 +10,7 @@ import ( "debug/elf" "encoding/hex" "fmt" + "io" "os" "path" "path/filepath" @@ -257,6 +258,57 @@ func queryLinuxCompiler(kernelDir string) (string, error) { return string(result[1]), nil } +// ElfSymbolHashes returns a map of sha256 hashes per a symbol contained in the elf file. +// It's best to call it on vmlinux.o since PCs in the binary code are not patched yet. +func ElfSymbolHashes(bin string) (map[string]string, error) { + file, err := elf.Open(bin) + if err != nil { + return nil, err + } + defer file.Close() + + symbols, err := file.Symbols() + if err != nil { + return nil, err + } + + textSection := file.Section(".text") + if textSection == nil { + return nil, fmt.Errorf(".text section not found") + } + + sectionReader, ok := textSection.Open().(io.ReaderAt) + if !ok { + return nil, fmt.Errorf(".text section reader does not support ReadAt") + } + + hashes := make(map[string]string) + for _, s := range symbols { + if elf.ST_TYPE(s.Info) != elf.STT_FUNC || s.Size == 0 { + continue + } + + if s.Section >= elf.SHN_LORESERVE || int(s.Section) >= len(file.Sections) || + file.Sections[s.Section] != textSection { + continue + } + + offset := s.Value - textSection.Addr + if offset+s.Size > textSection.Size { + continue + } + + code := make([]byte, s.Size) + _, err := sectionReader.ReadAt(code, int64(offset)) + if err != nil { + continue + } + hash := sha256.Sum256(code) + hashes[s.Name] = hex.EncodeToString(hash[:]) + } + return hashes, nil +} + // elfBinarySignature calculates signature of an elf binary aiming at runtime behavior // (text/data, debug info is ignored). func elfBinarySignature(bin string, tracer debugtracer.DebugTracer) (string, error) { diff --git a/pkg/manager/diff.go b/pkg/manager/diff.go index 64d0af444..aa640515d 100644 --- a/pkg/manager/diff.go +++ b/pkg/manager/diff.go @@ -297,9 +297,9 @@ func (dc *diffContext) monitorPatchedCoverage(ctx context.Context) error { return nil } focusAreaStats := dc.new.progsPerArea() - if focusAreaStats[modifiedArea]+focusAreaStats[includesArea] > 0 { - log.Logf(0, "fuzzer has reached the modified code (%d + %d), continuing fuzzing", - focusAreaStats[modifiedArea], focusAreaStats[includesArea]) + if focusAreaStats[symbolsArea]+focusAreaStats[filesArea]+focusAreaStats[includesArea] > 0 { + log.Logf(0, "fuzzer has reached the modified code (%d + %d + %d), continuing fuzzing", + focusAreaStats[symbolsArea], focusAreaStats[filesArea], focusAreaStats[includesArea]) return nil } log.Logf(0, "fuzzer has not reached the modified code in %s, aborting", @@ -721,18 +721,32 @@ func (rr *reproRunner) Run(ctx context.Context, r *repro.Result) { } const ( - modifiedArea = "modified" + symbolsArea = "symbols" + filesArea = "files" includesArea = "included" ) -func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte) { +func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte, baseHashes, patchedHashes map[string]string) { + funcs := modifiedSymbols(baseHashes, patchedHashes) + if len(funcs) > 0 { + log.Logf(0, "adding modified_functions to focus areas: %q", funcs) + cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas, + mgrconfig.FocusArea{ + Name: symbolsArea, + Filter: mgrconfig.CovFilterCfg{ + Functions: funcs, + }, + Weight: 6.0, + }) + } + direct, transitive := affectedFiles(cfg, gitPatches) if len(direct) > 0 { sort.Strings(direct) - log.Logf(0, "adding directly modified files to focus_order: %q", direct) + log.Logf(0, "adding directly modified files to focus areas: %q", direct) cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas, mgrconfig.FocusArea{ - Name: modifiedArea, + Name: filesArea, Filter: mgrconfig.CovFilterCfg{ Files: direct, }, @@ -742,7 +756,7 @@ func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte) { if len(transitive) > 0 { sort.Strings(transitive) - log.Logf(0, "adding transitively affected to focus_order: %q", transitive) + log.Logf(0, "adding transitively affected to focus areas: %q", transitive) cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas, mgrconfig.FocusArea{ Name: includesArea, @@ -808,3 +822,21 @@ func affectedFiles(cfg *mgrconfig.Config, gitPatches [][]byte) (direct, transiti } return } + +// If there are too many different symbols, they are no longer specific enough. +// Don't use them to focus the fuzzer. +const modifiedSymbolThreshold = 0.05 + +func modifiedSymbols(baseHashes, patchedHashes map[string]string) []string { + var ret []string + for name, hash := range patchedHashes { + if baseHash, ok := baseHashes[name]; !ok || baseHash != hash { + ret = append(ret, name) + if float64(len(ret)) > float64(len(patchedHashes))*modifiedSymbolThreshold { + return nil + } + } + } + sort.Strings(ret) + return ret +} diff --git a/pkg/manager/diff_test.go b/pkg/manager/diff_test.go index f2408f13b..9e27dc288 100644 --- a/pkg/manager/diff_test.go +++ b/pkg/manager/diff_test.go @@ -4,6 +4,7 @@ package manager import ( + "fmt" "testing" "github.com/google/syzkaller/pkg/mgrconfig" @@ -24,6 +25,10 @@ int main(void) { }`, "c.c": `int main(void) { }`, })) + baseHashes, patchedHashes := dummySymbolHashes(), dummySymbolHashes() + baseHashes["function"] = "hash1" + patchedHashes["function"] = "hash2" + PatchFocusAreas(cfg, [][]byte{ []byte(`diff --git a/b.c b/b.c index 103167d..fbf7a68 100644 @@ -34,7 +39,7 @@ index 103167d..fbf7a68 100644 \ No newline at end of file +int main(void) { return 1; } \ No newline at end of file`), - // Also, emulate an update to te header.h. + // Also, emulate an update to header.h. []byte(`diff --git a/header.h b/header.h index 103167d..fbf7a68 100644 --- a/header.h @@ -44,11 +49,18 @@ index 103167d..fbf7a68 100644 \ No newline at end of file +Test2 \ No newline at end of file`), - }) + }, baseHashes, patchedHashes) assert.Equal(t, []mgrconfig.FocusArea{ { - Name: modifiedArea, + Name: symbolsArea, + Filter: mgrconfig.CovFilterCfg{ + Functions: []string{"function"}, + }, + Weight: 6.0, + }, + { + Name: filesArea, Filter: mgrconfig.CovFilterCfg{ Files: []string{"b.c", "header.h"}, }, @@ -66,3 +78,32 @@ index 103167d..fbf7a68 100644 }, }, cfg.Experimental.FocusAreas) } + +func dummySymbolHashes() map[string]string { + ret := map[string]string{} + for i := 0; i < 100; i++ { + ret[fmt.Sprint(i)] = fmt.Sprint(i) + } + return ret +} + +func TestModifiedSymbols(t *testing.T) { + t.Run("too many changed", func(t *testing.T) { + ret := modifiedSymbols(map[string]string{ + "functionA": "hash1", + "functionB": "hash2", + }, map[string]string{ + "functionA": "hash1", + "functionB": "hash is not hash2", + }) + assert.Empty(t, ret) + }) + t.Run("less than threshold", func(t *testing.T) { + base, patched := dummySymbolHashes(), dummySymbolHashes() + base["function"] = "hash1" + patched["function"] = "hash2" + base["function2"] = "hash1" + patched["function2"] = "hash2" + assert.Equal(t, []string{"function", "function2"}, modifiedSymbols(base, patched)) + }) +} diff --git a/syz-cluster/workflow/build-step/main.go b/syz-cluster/workflow/build-step/main.go index 1abf5bc4a..3797681ea 100644 --- a/syz-cluster/workflow/build-step/main.go +++ b/syz-cluster/workflow/build-step/main.go @@ -10,6 +10,10 @@ import ( "errors" "flag" "fmt" + "log" + "os" + "path/filepath" + "github.com/google/syzkaller/pkg/build" "github.com/google/syzkaller/pkg/debugtracer" "github.com/google/syzkaller/pkg/osutil" @@ -18,9 +22,6 @@ import ( "github.com/google/syzkaller/syz-cluster/pkg/api" "github.com/google/syzkaller/syz-cluster/pkg/app" "github.com/google/syzkaller/syz-cluster/pkg/triage" - "log" - "os" - "path/filepath" ) var ( @@ -261,8 +262,14 @@ func buildKernel(tracer debugtracer.DebugTracer, req *api.BuildRequest) (*BuildR return nil, err } tracer.Log("build finished successfully") + + err = saveSymbolHashes(tracer) + if err != nil { + tracer.Log("failed to save symbol hashes: %s", err) + } // Note: Output directory has the following structure: // |-- image + // |-- symbol_hashes.json // |-- kernel // |-- kernel.config // `-- obj @@ -270,6 +277,24 @@ func buildKernel(tracer debugtracer.DebugTracer, req *api.BuildRequest) (*BuildR return ret, nil } +func saveSymbolHashes(tracer debugtracer.DebugTracer) error { + hashes, err := build.ElfSymbolHashes(filepath.Join(*flagRepository, "vmlinux.o")) + if err != nil { + return fmt.Errorf("failed to query symbol hashes: %w", err) + } + tracer.Log("extracted hashes for %d symbols", len(hashes)) + file, err := os.Create(filepath.Join(*flagOutput, "symbol_hashes.json")) + if err != nil { + return fmt.Errorf("failed to open symbol_hashes.json: %w", err) + } + defer file.Close() + err = json.NewEncoder(file).Encode(hashes) + if err != nil { + return fmt.Errorf("failed to serialize: %w", err) + } + return nil +} + func ensureFlags(args ...string) { for i := 0; i+1 < len(args); i += 2 { if args[i] == "" { diff --git a/syz-cluster/workflow/fuzz-step/main.go b/syz-cluster/workflow/fuzz-step/main.go index 6996ae8a0..f7ed5ef39 100644 --- a/syz-cluster/workflow/fuzz-step/main.go +++ b/syz-cluster/workflow/fuzz-step/main.go @@ -10,6 +10,12 @@ import ( "errors" "flag" "fmt" + "io" + "net/http" + "os" + "path/filepath" + "time" + "github.com/google/syzkaller/pkg/config" "github.com/google/syzkaller/pkg/log" "github.com/google/syzkaller/pkg/manager" @@ -19,11 +25,6 @@ import ( "github.com/google/syzkaller/syz-cluster/pkg/api" "github.com/google/syzkaller/syz-cluster/pkg/app" "golang.org/x/sync/errgroup" - "io" - "net/http" - "os" - "path/filepath" - "time" ) var ( @@ -93,7 +94,12 @@ func run(baseCtx context.Context, client *api.Client, timeout time.Duration, if err != nil { return fmt.Errorf("failed to load configs: %w", err) } - manager.PatchFocusAreas(patched, series.PatchBodies()) + + baseSymbols, patchedSymbols, err := readSymbolHashes() + if err != nil { + app.Errorf("failed to read symbol hashes: %v", err) + } + manager.PatchFocusAreas(patched, series.PatchBodies(), baseSymbols, patchedSymbols) if *flagCorpusURL != "" { err := downloadCorpus(baseCtx, patched.Workdir, *flagCorpusURL) @@ -282,6 +288,35 @@ func reportFinding(ctx context.Context, client *api.Client, bug *manager.UniqueB return client.UploadFinding(ctx, finding) } +func readSymbolHashes() (base, patched map[string]string, err error) { + // These are saved by the build step. + base, err = readJSONMap("/base/symbol_hashes.json") + if err != nil { + return nil, nil, fmt.Errorf("failed to read base hashes: %w", err) + } + patched, err = readJSONMap("/patched/symbol_hashes.json") + if err != nil { + return nil, nil, fmt.Errorf("failed to read patched hashes: %w", err) + } + log.Logf(0, "extracted %d symbol hashes for base and %d for patched", len(base), len(patched)) + return +} + +func readJSONMap(file string) (map[string]string, error) { + f, err := os.Open(file) + if err != nil { + return nil, err + } + defer f.Close() + + var data map[string]string + err = json.NewDecoder(f).Decode(&data) + if err != nil { + return nil, err + } + return data, nil +} + func compressArtifacts(dir string) (io.Reader, error) { var buf bytes.Buffer lw := &LimitedWriter{ diff --git a/tools/syz-diff/diff.go b/tools/syz-diff/diff.go index c77728d4a..3e7230fe0 100644 --- a/tools/syz-diff/diff.go +++ b/tools/syz-diff/diff.go @@ -43,7 +43,7 @@ func main() { if err != nil { log.Fatal(err) } - manager.PatchFocusAreas(newCfg, [][]byte{data}) + manager.PatchFocusAreas(newCfg, [][]byte{data}, nil, nil) } ctx := vm.ShutdownCtx() |
