From ee54fec6dae601bf9a349f2dbc40c1012f4491a1 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 18 Jun 2018 19:45:52 +0200 Subject: syz-manager: ignore inputs that are not enabled on target Currently we only ignore programs that contain syscalls that are not statically enabled in config. This does not account for syscalls that are not supported on target machine. Load corpus after we got machine check with actual list of supported syscalls. --- syz-manager/manager.go | 153 ++++++++++++++++++++++++++----------------------- 1 file changed, 82 insertions(+), 71 deletions(-) diff --git a/syz-manager/manager.go b/syz-manager/manager.go index b83029abc..16affa1f5 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -91,6 +91,8 @@ type Manager struct { const ( // Just started, nothing done yet. phaseInit = iota + // Corpus is loaded and machine is checked. + phaseLoadedCorpus // Triaged all inputs from corpus. // This is when we start querying hub and minimizing persistent corpus. phaseTriagedCorpus @@ -185,74 +187,6 @@ func RunManager(cfg *mgrconfig.Config, target *prog.Target, syscalls map[int]boo if err != nil { log.Fatalf("failed to open corpus database: %v", err) } - // By default we don't re-minimize/re-smash programs from corpus, - // it takes lots of time on start and is unnecessary. - // However, on version bumps we can selectively re-minimize/re-smash. - minimized, smashed := true, true - switch mgr.corpusDB.Version { - case 0: - // Version 0 had broken minimization, so we need to re-minimize. - minimized = false - fallthrough - case 1: - // Version 1->2: memory is preallocated so lots of mmaps become unnecessary. - minimized = false - fallthrough - case 2: - // Version 2->3: big-endian hints. - smashed = false - fallthrough - case currentDBVersion: - } - deleted := 0 - for key, rec := range mgr.corpusDB.Records { - p, err := mgr.target.Deserialize(rec.Val) - if err != nil { - if deleted < 10 { - log.Logf(0, "deleting broken program: %v\n%s", err, rec.Val) - } - mgr.corpusDB.Delete(key) - deleted++ - continue - } - disabled := false - for _, c := range p.Calls { - if !syscalls[c.Meta.ID] { - disabled = true - break - } - } - if disabled { - // This program contains a disabled syscall. - // We won't execute it, but remember its hash so - // it is not deleted during minimization. - // TODO: use mgr.checkResult.EnabledCalls which accounts for missing devices, etc. - // But it is available only after vm check. - mgr.disabledHashes[hash.String(rec.Val)] = struct{}{} - continue - } - mgr.candidates = append(mgr.candidates, rpctype.RPCCandidate{ - Prog: rec.Val, - Minimized: minimized, - Smashed: smashed, - }) - } - mgr.fresh = len(mgr.corpusDB.Records) == 0 - log.Logf(0, "loaded %v programs (%v total, %v deleted)", - len(mgr.candidates), len(mgr.corpusDB.Records), deleted) - - // Now this is ugly. - // We duplicate all inputs in the corpus and shuffle the second part. - // This solves the following problem. A fuzzer can crash while triaging candidates, - // in such case it will also lost all cached candidates. Or, the input can be somewhat flaky - // and doesn't give the coverage on first try. So we give each input the second chance. - // Shuffling should alleviate deterministically losing the same inputs on fuzzer crashing. - mgr.candidates = append(mgr.candidates, mgr.candidates...) - shuffle := mgr.candidates[len(mgr.candidates)/2:] - for i := range shuffle { - j := i + rand.Intn(len(shuffle)-i) - shuffle[i], shuffle[j] = shuffle[j], shuffle[i] - } // Create HTTP server. mgr.initHTTP() @@ -532,6 +466,82 @@ func (mgr *Manager) vmLoop() { } } +func (mgr *Manager) loadCorpus() { + // By default we don't re-minimize/re-smash programs from corpus, + // it takes lots of time on start and is unnecessary. + // However, on version bumps we can selectively re-minimize/re-smash. + minimized, smashed := true, true + switch mgr.corpusDB.Version { + case 0: + // Version 0 had broken minimization, so we need to re-minimize. + minimized = false + fallthrough + case 1: + // Version 1->2: memory is preallocated so lots of mmaps become unnecessary. + minimized = false + fallthrough + case 2: + // Version 2->3: big-endian hints. + smashed = false + fallthrough + case currentDBVersion: + } + syscalls := make(map[int]bool) + for _, id := range mgr.checkResult.EnabledCalls { + syscalls[id] = true + } + deleted := 0 + for key, rec := range mgr.corpusDB.Records { + p, err := mgr.target.Deserialize(rec.Val) + if err != nil { + if deleted < 10 { + log.Logf(0, "deleting broken program: %v\n%s", err, rec.Val) + } + mgr.corpusDB.Delete(key) + deleted++ + continue + } + disabled := false + for _, c := range p.Calls { + if !syscalls[c.Meta.ID] { + disabled = true + break + } + } + if disabled { + // This program contains a disabled syscall. + // We won't execute it, but remember its hash so + // it is not deleted during minimization. + mgr.disabledHashes[hash.String(rec.Val)] = struct{}{} + continue + } + mgr.candidates = append(mgr.candidates, rpctype.RPCCandidate{ + Prog: rec.Val, + Minimized: minimized, + Smashed: smashed, + }) + } + mgr.fresh = len(mgr.corpusDB.Records) == 0 + log.Logf(0, "%-24v: %v (%v deleted)", "corpus", len(mgr.candidates), deleted) + + // Now this is ugly. + // We duplicate all inputs in the corpus and shuffle the second part. + // This solves the following problem. A fuzzer can crash while triaging candidates, + // in such case it will also lost all cached candidates. Or, the input can be somewhat flaky + // and doesn't give the coverage on first try. So we give each input the second chance. + // Shuffling should alleviate deterministically losing the same inputs on fuzzer crashing. + mgr.candidates = append(mgr.candidates, mgr.candidates...) + shuffle := mgr.candidates[len(mgr.candidates)/2:] + for i := range shuffle { + j := i + rand.Intn(len(shuffle)-i) + shuffle[i], shuffle[j] = shuffle[j], shuffle[i] + } + if mgr.phase != phaseInit { + panic(fmt.Sprintf("loadCorpus: bad phase %v", mgr.phase)) + } + mgr.phase = phaseLoadedCorpus +} + func (mgr *Manager) runInstance(index int) (*Crash, error) { mgr.checkUsedFiles() inst, err := mgr.vmPool.Create(index) @@ -942,12 +952,13 @@ func (mgr *Manager) Check(a *rpctype.CheckArgs, r *int) error { log.Fatalf("machine check: %v", a.Error) } log.Logf(0, "machine check:") - log.Logf(0, "%-24v: %v", "syscalls", len(a.EnabledCalls)) + log.Logf(0, "%-24v: %v/%v", "syscalls", len(a.EnabledCalls), len(mgr.target.Syscalls)) for _, feat := range a.Features { log.Logf(0, "%-24v: %v", feat.Name, feat.Reason) } a.DisabledCalls = nil mgr.checkResult = a + mgr.loadCorpus() mgr.firstConnect = time.Now() return nil } @@ -1047,7 +1058,7 @@ func (mgr *Manager) Poll(a *rpctype.PollArgs, r *rpctype.PollRes) error { } if len(mgr.candidates) == 0 { mgr.candidates = nil - if mgr.phase == phaseInit { + if mgr.phase == phaseLoadedCorpus { if mgr.cfg.HubClient != "" { mgr.phase = phaseTriagedCorpus } else { @@ -1064,7 +1075,7 @@ func (mgr *Manager) hubSync() { defer mgr.mu.Unlock() switch mgr.phase { - case phaseInit: + case phaseInit, phaseLoadedCorpus: return case phaseTriagedCorpus: mgr.phase = phaseQueriedHub -- cgit mrf-deployment