aboutsummaryrefslogtreecommitdiffstats
path: root/pkg
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2025-07-18 12:21:39 +0200
committerAleksandr Nogikh <nogikh@google.com>2025-07-22 14:49:22 +0000
commit0b3a8483a73330b481a0d9fef2e276eb06465bc0 (patch)
treeaebd4c3a914f6c69d6d6f89c8b2e68a6c5d868af /pkg
parent6ce0983056e7e9efff04681de67e6310b88fdc16 (diff)
all: determine patched symbols for focused fuzzing
Hash the code section of the individual symbols from vmlinux.o and use it to determine the functions that changed their bodies between the base and the patched build. If the number of affected symbols is reasonable (<5%), fuzz it with the highest priority.
Diffstat (limited to 'pkg')
-rw-r--r--pkg/build/linux.go52
-rw-r--r--pkg/manager/diff.go48
-rw-r--r--pkg/manager/diff_test.go47
3 files changed, 136 insertions, 11 deletions
diff --git a/pkg/build/linux.go b/pkg/build/linux.go
index bb5de4f28..e19eec60e 100644
--- a/pkg/build/linux.go
+++ b/pkg/build/linux.go
@@ -10,6 +10,7 @@ import (
"debug/elf"
"encoding/hex"
"fmt"
+ "io"
"os"
"path"
"path/filepath"
@@ -257,6 +258,57 @@ func queryLinuxCompiler(kernelDir string) (string, error) {
return string(result[1]), nil
}
+// ElfSymbolHashes returns a map of sha256 hashes per a symbol contained in the elf file.
+// It's best to call it on vmlinux.o since PCs in the binary code are not patched yet.
+func ElfSymbolHashes(bin string) (map[string]string, error) {
+ file, err := elf.Open(bin)
+ if err != nil {
+ return nil, err
+ }
+ defer file.Close()
+
+ symbols, err := file.Symbols()
+ if err != nil {
+ return nil, err
+ }
+
+ textSection := file.Section(".text")
+ if textSection == nil {
+ return nil, fmt.Errorf(".text section not found")
+ }
+
+ sectionReader, ok := textSection.Open().(io.ReaderAt)
+ if !ok {
+ return nil, fmt.Errorf(".text section reader does not support ReadAt")
+ }
+
+ hashes := make(map[string]string)
+ for _, s := range symbols {
+ if elf.ST_TYPE(s.Info) != elf.STT_FUNC || s.Size == 0 {
+ continue
+ }
+
+ if s.Section >= elf.SHN_LORESERVE || int(s.Section) >= len(file.Sections) ||
+ file.Sections[s.Section] != textSection {
+ continue
+ }
+
+ offset := s.Value - textSection.Addr
+ if offset+s.Size > textSection.Size {
+ continue
+ }
+
+ code := make([]byte, s.Size)
+ _, err := sectionReader.ReadAt(code, int64(offset))
+ if err != nil {
+ continue
+ }
+ hash := sha256.Sum256(code)
+ hashes[s.Name] = hex.EncodeToString(hash[:])
+ }
+ return hashes, nil
+}
+
// elfBinarySignature calculates signature of an elf binary aiming at runtime behavior
// (text/data, debug info is ignored).
func elfBinarySignature(bin string, tracer debugtracer.DebugTracer) (string, error) {
diff --git a/pkg/manager/diff.go b/pkg/manager/diff.go
index 64d0af444..aa640515d 100644
--- a/pkg/manager/diff.go
+++ b/pkg/manager/diff.go
@@ -297,9 +297,9 @@ func (dc *diffContext) monitorPatchedCoverage(ctx context.Context) error {
return nil
}
focusAreaStats := dc.new.progsPerArea()
- if focusAreaStats[modifiedArea]+focusAreaStats[includesArea] > 0 {
- log.Logf(0, "fuzzer has reached the modified code (%d + %d), continuing fuzzing",
- focusAreaStats[modifiedArea], focusAreaStats[includesArea])
+ if focusAreaStats[symbolsArea]+focusAreaStats[filesArea]+focusAreaStats[includesArea] > 0 {
+ log.Logf(0, "fuzzer has reached the modified code (%d + %d + %d), continuing fuzzing",
+ focusAreaStats[symbolsArea], focusAreaStats[filesArea], focusAreaStats[includesArea])
return nil
}
log.Logf(0, "fuzzer has not reached the modified code in %s, aborting",
@@ -721,18 +721,32 @@ func (rr *reproRunner) Run(ctx context.Context, r *repro.Result) {
}
const (
- modifiedArea = "modified"
+ symbolsArea = "symbols"
+ filesArea = "files"
includesArea = "included"
)
-func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte) {
+func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte, baseHashes, patchedHashes map[string]string) {
+ funcs := modifiedSymbols(baseHashes, patchedHashes)
+ if len(funcs) > 0 {
+ log.Logf(0, "adding modified_functions to focus areas: %q", funcs)
+ cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas,
+ mgrconfig.FocusArea{
+ Name: symbolsArea,
+ Filter: mgrconfig.CovFilterCfg{
+ Functions: funcs,
+ },
+ Weight: 6.0,
+ })
+ }
+
direct, transitive := affectedFiles(cfg, gitPatches)
if len(direct) > 0 {
sort.Strings(direct)
- log.Logf(0, "adding directly modified files to focus_order: %q", direct)
+ log.Logf(0, "adding directly modified files to focus areas: %q", direct)
cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas,
mgrconfig.FocusArea{
- Name: modifiedArea,
+ Name: filesArea,
Filter: mgrconfig.CovFilterCfg{
Files: direct,
},
@@ -742,7 +756,7 @@ func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte) {
if len(transitive) > 0 {
sort.Strings(transitive)
- log.Logf(0, "adding transitively affected to focus_order: %q", transitive)
+ log.Logf(0, "adding transitively affected to focus areas: %q", transitive)
cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas,
mgrconfig.FocusArea{
Name: includesArea,
@@ -808,3 +822,21 @@ func affectedFiles(cfg *mgrconfig.Config, gitPatches [][]byte) (direct, transiti
}
return
}
+
+// If there are too many different symbols, they are no longer specific enough.
+// Don't use them to focus the fuzzer.
+const modifiedSymbolThreshold = 0.05
+
+func modifiedSymbols(baseHashes, patchedHashes map[string]string) []string {
+ var ret []string
+ for name, hash := range patchedHashes {
+ if baseHash, ok := baseHashes[name]; !ok || baseHash != hash {
+ ret = append(ret, name)
+ if float64(len(ret)) > float64(len(patchedHashes))*modifiedSymbolThreshold {
+ return nil
+ }
+ }
+ }
+ sort.Strings(ret)
+ return ret
+}
diff --git a/pkg/manager/diff_test.go b/pkg/manager/diff_test.go
index f2408f13b..9e27dc288 100644
--- a/pkg/manager/diff_test.go
+++ b/pkg/manager/diff_test.go
@@ -4,6 +4,7 @@
package manager
import (
+ "fmt"
"testing"
"github.com/google/syzkaller/pkg/mgrconfig"
@@ -24,6 +25,10 @@ int main(void) { }`,
"c.c": `int main(void) { }`,
}))
+ baseHashes, patchedHashes := dummySymbolHashes(), dummySymbolHashes()
+ baseHashes["function"] = "hash1"
+ patchedHashes["function"] = "hash2"
+
PatchFocusAreas(cfg, [][]byte{
[]byte(`diff --git a/b.c b/b.c
index 103167d..fbf7a68 100644
@@ -34,7 +39,7 @@ index 103167d..fbf7a68 100644
\ No newline at end of file
+int main(void) { return 1; }
\ No newline at end of file`),
- // Also, emulate an update to te header.h.
+ // Also, emulate an update to header.h.
[]byte(`diff --git a/header.h b/header.h
index 103167d..fbf7a68 100644
--- a/header.h
@@ -44,11 +49,18 @@ index 103167d..fbf7a68 100644
\ No newline at end of file
+Test2
\ No newline at end of file`),
- })
+ }, baseHashes, patchedHashes)
assert.Equal(t, []mgrconfig.FocusArea{
{
- Name: modifiedArea,
+ Name: symbolsArea,
+ Filter: mgrconfig.CovFilterCfg{
+ Functions: []string{"function"},
+ },
+ Weight: 6.0,
+ },
+ {
+ Name: filesArea,
Filter: mgrconfig.CovFilterCfg{
Files: []string{"b.c", "header.h"},
},
@@ -66,3 +78,32 @@ index 103167d..fbf7a68 100644
},
}, cfg.Experimental.FocusAreas)
}
+
+func dummySymbolHashes() map[string]string {
+ ret := map[string]string{}
+ for i := 0; i < 100; i++ {
+ ret[fmt.Sprint(i)] = fmt.Sprint(i)
+ }
+ return ret
+}
+
+func TestModifiedSymbols(t *testing.T) {
+ t.Run("too many changed", func(t *testing.T) {
+ ret := modifiedSymbols(map[string]string{
+ "functionA": "hash1",
+ "functionB": "hash2",
+ }, map[string]string{
+ "functionA": "hash1",
+ "functionB": "hash is not hash2",
+ })
+ assert.Empty(t, ret)
+ })
+ t.Run("less than threshold", func(t *testing.T) {
+ base, patched := dummySymbolHashes(), dummySymbolHashes()
+ base["function"] = "hash1"
+ patched["function"] = "hash2"
+ base["function2"] = "hash1"
+ patched["function2"] = "hash2"
+ assert.Equal(t, []string{"function", "function2"}, modifiedSymbols(base, patched))
+ })
+}