diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2024-09-12 15:53:24 +0200 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2024-09-12 16:29:23 +0000 |
| commit | 2a91a78df9ed766fac414f94e9d3cc5fa71add55 (patch) | |
| tree | 91e80536ed38be1e7b3a7b7a19bb2c61ad1a614a /syz-manager | |
| parent | 41b5d1787276981454609f1e9ca17f335e8223b5 (diff) | |
syz-manager: send new inputs to the hub only once
We used to send corpus updates (added/removed elements) to the hub in each sync.
But that produced too much churn since hub algorithm is O(N^2) (distributing everything
to everybody), and lots of new inputs are later removed (either we can't reproduce coverage
after restart, or inputs removed during corpus minimization). So now we don't send new inputs
in each sync, instead we aim at sending corpus once after initial triage. This solves
the problem with non-reproducible/removed inputs. Typical instance life-time on syzbot is <24h,
for such instances we send the corpus once. If an instance somehow lives for longer, then we
re-connect and re-send once in a while (e.g. a local long-running instance).
Diffstat (limited to 'syz-manager')
| -rw-r--r-- | syz-manager/hub.go | 55 | ||||
| -rw-r--r-- | syz-manager/manager.go | 13 |
2 files changed, 35 insertions, 33 deletions
diff --git a/syz-manager/hub.go b/syz-manager/hub.go index 6acedf37e..9db988484 100644 --- a/syz-manager/hub.go +++ b/syz-manager/hub.go @@ -74,7 +74,6 @@ type HubConnector struct { enabledCalls map[*prog.Syscall]bool leak bool fresh bool - hubCorpus map[string]bool newRepros [][]byte hubReproQueue chan *manager.Crash needMoreRepros func() bool @@ -91,7 +90,8 @@ type HubConnector struct { // HubManagerView restricts interface between HubConnector and Manager. type HubManagerView interface { - getMinimizedCorpus() (corpus []*corpus.Item, repros [][]byte) + getMinimizedCorpus() []*corpus.Item + getNewRepros() [][]byte addNewCandidates(candidates []fuzzer.Candidate) needMoreCandidates() bool hubIsUnreachable() @@ -100,25 +100,28 @@ type HubManagerView interface { func (hc *HubConnector) loop() { var hub *rpctype.RPCClient var doneOnce bool + var connectTime time.Time for query := 0; ; time.Sleep(10 * time.Minute) { - corpus, repros := hc.mgr.getMinimizedCorpus() - if !hc.cfg.Cover { + if hub == nil { + var corpus []*corpus.Item // If we are using fake coverage, don't send our corpus to the hub. // It should be lower quality than coverage-guided corpus. // However still send repros and accept new inputs. - corpus = nil - } - hc.newRepros = append(hc.newRepros, repros...) - if hub == nil { + if hc.cfg.Cover { + corpus = hc.mgr.getMinimizedCorpus() + } var err error if hub, err = hc.connect(corpus); err != nil { log.Logf(0, "failed to connect to hub at %v: %v", hc.cfg.HubAddr, err) } else { log.Logf(0, "connected to hub at %v, corpus %v", hc.cfg.HubAddr, len(corpus)) + connectTime = time.Now() } } if hub != nil && hc.mgr.needMoreCandidates() { - if err := hc.sync(hub, corpus); err != nil { + repros := hc.mgr.getNewRepros() + hc.newRepros = append(hc.newRepros, repros...) + if err := hc.sync(hub); err != nil { log.Logf(0, "hub sync failed: %v", err) hub.Close() hub = nil @@ -131,6 +134,19 @@ func (hc *HubConnector) loop() { if hub == nil && query >= maxAttempts && !doneOnce { hc.mgr.hubIsUnreachable() } + // We used to send corpus updates (added/removed elements) to the hub in each sync. + // But that produced too much churn since hub algorithm is O(N^2) (distributing everything + // to everybody), and lots of new inputs are later removed (either we can't reproduce coverage + // after restart, or inputs removed during corpus minimization). So now we don't send new inputs + // in each sync, instead we aim at sending corpus once after initial triage. This solves + // the problem with non-reproducible/removed inputs. Typical instance life-time on syzbot is <24h, + // for such instances we send the corpus once. If an instance somehow lives for longer, then we + // re-connect and re-send once in a while (e.g. a local long-running instance). + if hub != nil && time.Since(connectTime) > 30*time.Hour { + log.Logf(0, "re-syncing with hub") + hub.Close() + hub = nil + } } } @@ -153,9 +169,7 @@ func (hc *HubConnector) connect(corpus []*corpus.Item) (*rpctype.RPCClient, erro for call := range hc.enabledCalls { a.Calls = append(a.Calls, call.Name) } - hubCorpus := make(map[string]bool) for _, inp := range corpus { - hubCorpus[inp.Sig] = true a.Corpus = append(a.Corpus, inp.Prog.Serialize()) } // Never send more than this, this is never healthy but happens episodically @@ -176,12 +190,11 @@ func (hc *HubConnector) connect(corpus []*corpus.Item) (*rpctype.RPCClient, erro if err != nil { return nil, err } - hc.hubCorpus = hubCorpus hc.fresh = false return hub, nil } -func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus []*corpus.Item) error { +func (hc *HubConnector) sync(hub *rpctype.RPCClient) error { key, err := hc.keyGet() if err != nil { return err @@ -191,22 +204,6 @@ func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus []*corpus.Item) erro Key: key, Manager: hc.cfg.Name, } - sigs := make(map[string]bool) - for _, inp := range corpus { - sigs[inp.Sig] = true - if hc.hubCorpus[inp.Sig] { - continue - } - hc.hubCorpus[inp.Sig] = true - a.Add = append(a.Add, inp.Prog.Serialize()) - } - for sig := range hc.hubCorpus { - if sigs[sig] { - continue - } - delete(hc.hubCorpus, sig) - a.Del = append(a.Del, sig) - } if hc.needMoreRepros != nil { a.NeedRepros = hc.needMoreRepros() } diff --git a/syz-manager/manager.go b/syz-manager/manager.go index c090d1a0d..71168fb82 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -974,14 +974,19 @@ func (mgr *Manager) corpusInputHandler(updates <-chan corpus.NewItemEvent) { } } -func (mgr *Manager) getMinimizedCorpus() (corpus []*corpus.Item, repros [][]byte) { +func (mgr *Manager) getMinimizedCorpus() []*corpus.Item { mgr.mu.Lock() defer mgr.mu.Unlock() mgr.minimizeCorpusLocked() - corpus = mgr.corpus.Items() - repros = mgr.newRepros + return mgr.corpus.Items() +} + +func (mgr *Manager) getNewRepros() [][]byte { + mgr.mu.Lock() + defer mgr.mu.Unlock() + repros := mgr.newRepros mgr.newRepros = nil - return + return repros } func (mgr *Manager) addNewCandidates(candidates []fuzzer.Candidate) { |
