aboutsummaryrefslogtreecommitdiffstats
path: root/syz-manager
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2024-08-14 14:05:48 +0200
committerAleksandr Nogikh <nogikh@google.com>2024-08-14 13:03:10 +0000
commite6b88e204cdd134b36c39384a01169256fcb8c77 (patch)
tree09355e82a57639df7bfbd83e3b9a2b74b4e7d624 /syz-manager
parent95696db519b9375ae7350800e30c7f56249aa14f (diff)
syz-manager: re-minimize a subset of corpus programs
The current fuzzing algorithm in syzkaller may preserve the poorly minimized corpus programs forever - they trigger a lot of coverage and will very likely survive all syzkaller restarts. As a result, more than 18% of the current syzbot corpus programs are larger than 25 calls, which is an unreasonably big figure. Introduce a way to prevent the domination of the bigprograms in the corpus. Every corpus retriage, pick up to 50 seeds among the 10% largest corpus programs and re-minimize them. Hopefully, this should reduce the magnitude of the problem.
Diffstat (limited to 'syz-manager')
-rw-r--r--syz-manager/manager.go32
1 files changed, 31 insertions, 1 deletions
diff --git a/syz-manager/manager.go b/syz-manager/manager.go
index 21d568d67..519c4cd0b 100644
--- a/syz-manager/manager.go
+++ b/syz-manager/manager.go
@@ -654,14 +654,44 @@ func (mgr *Manager) loadCorpus() []fuzzer.Candidate {
}
candidates = append(candidates, item)
}
- log.Logf(0, "%-24v: %v (%v seeds)", "corpus", len(candidates), seeds)
// Let's favorize smaller programs, otherwise the poorly minimized ones may overshadow the rest.
sort.SliceStable(candidates, func(i, j int) bool {
return len(candidates[i].Prog.Calls) < len(candidates[j].Prog.Calls)
})
+ reminimized := reminimizeSubset(candidates)
+ log.Logf(0, "%-24v: %v (%v seeds), %d will be reminimized",
+ "corpus", len(candidates), seeds, reminimized)
return candidates
}
+// reminimizeSubset clears the fuzzer.ProgMinimized flag of a small subset of seeds.
+// The ultimate objective is to gradually clean up the poorly minimized corpus programs.
+// reminimizeSubset assumes that candidates are sorted in the order of ascending len(Prog.Calls).
+func reminimizeSubset(candidates []fuzzer.Candidate) int {
+ if len(candidates) == 0 {
+ return 0
+ }
+ // Only consider the top 10% of the largest programs.
+ threshold := len(candidates[len(candidates)*9/10].Prog.Calls)
+ var resetIndices []int
+ for i, info := range candidates {
+ if info.Flags&fuzzer.ProgMinimized == 0 {
+ continue
+ }
+ if len(info.Prog.Calls) >= threshold {
+ resetIndices = append(resetIndices, i)
+ }
+ }
+ // Reset ProgMinimized for up to 1% of the seed programs.
+ reset := min(50, len(resetIndices), max(1, len(candidates)/100))
+ rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
+ for _, i := range rnd.Perm(len(resetIndices))[:reset] {
+ idx := resetIndices[i]
+ candidates[idx].Flags &= ^fuzzer.ProgMinimized
+ }
+ return reset
+}
+
func loadProg(target *prog.Target, data []byte) (*prog.Prog, error) {
p, err := target.Deserialize(data, prog.NonStrict)
if err != nil {