From 2a91a78df9ed766fac414f94e9d3cc5fa71add55 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 12 Sep 2024 15:53:24 +0200 Subject: syz-manager: send new inputs to the hub only once We used to send corpus updates (added/removed elements) to the hub in each sync. But that produced too much churn since hub algorithm is O(N^2) (distributing everything to everybody), and lots of new inputs are later removed (either we can't reproduce coverage after restart, or inputs removed during corpus minimization). So now we don't send new inputs in each sync, instead we aim at sending corpus once after initial triage. This solves the problem with non-reproducible/removed inputs. Typical instance life-time on syzbot is <24h, for such instances we send the corpus once. If an instance somehow lives for longer, then we re-connect and re-send once in a while (e.g. a local long-running instance). --- syz-manager/hub.go | 55 ++++++++++++++++++++++++-------------------------- syz-manager/manager.go | 13 ++++++++---- 2 files changed, 35 insertions(+), 33 deletions(-) (limited to 'syz-manager') diff --git a/syz-manager/hub.go b/syz-manager/hub.go index 6acedf37e..9db988484 100644 --- a/syz-manager/hub.go +++ b/syz-manager/hub.go @@ -74,7 +74,6 @@ type HubConnector struct { enabledCalls map[*prog.Syscall]bool leak bool fresh bool - hubCorpus map[string]bool newRepros [][]byte hubReproQueue chan *manager.Crash needMoreRepros func() bool @@ -91,7 +90,8 @@ type HubConnector struct { // HubManagerView restricts interface between HubConnector and Manager. type HubManagerView interface { - getMinimizedCorpus() (corpus []*corpus.Item, repros [][]byte) + getMinimizedCorpus() []*corpus.Item + getNewRepros() [][]byte addNewCandidates(candidates []fuzzer.Candidate) needMoreCandidates() bool hubIsUnreachable() @@ -100,25 +100,28 @@ type HubManagerView interface { func (hc *HubConnector) loop() { var hub *rpctype.RPCClient var doneOnce bool + var connectTime time.Time for query := 0; ; time.Sleep(10 * time.Minute) { - corpus, repros := hc.mgr.getMinimizedCorpus() - if !hc.cfg.Cover { + if hub == nil { + var corpus []*corpus.Item // If we are using fake coverage, don't send our corpus to the hub. // It should be lower quality than coverage-guided corpus. // However still send repros and accept new inputs. - corpus = nil - } - hc.newRepros = append(hc.newRepros, repros...) - if hub == nil { + if hc.cfg.Cover { + corpus = hc.mgr.getMinimizedCorpus() + } var err error if hub, err = hc.connect(corpus); err != nil { log.Logf(0, "failed to connect to hub at %v: %v", hc.cfg.HubAddr, err) } else { log.Logf(0, "connected to hub at %v, corpus %v", hc.cfg.HubAddr, len(corpus)) + connectTime = time.Now() } } if hub != nil && hc.mgr.needMoreCandidates() { - if err := hc.sync(hub, corpus); err != nil { + repros := hc.mgr.getNewRepros() + hc.newRepros = append(hc.newRepros, repros...) + if err := hc.sync(hub); err != nil { log.Logf(0, "hub sync failed: %v", err) hub.Close() hub = nil @@ -131,6 +134,19 @@ func (hc *HubConnector) loop() { if hub == nil && query >= maxAttempts && !doneOnce { hc.mgr.hubIsUnreachable() } + // We used to send corpus updates (added/removed elements) to the hub in each sync. + // But that produced too much churn since hub algorithm is O(N^2) (distributing everything + // to everybody), and lots of new inputs are later removed (either we can't reproduce coverage + // after restart, or inputs removed during corpus minimization). So now we don't send new inputs + // in each sync, instead we aim at sending corpus once after initial triage. This solves + // the problem with non-reproducible/removed inputs. Typical instance life-time on syzbot is <24h, + // for such instances we send the corpus once. If an instance somehow lives for longer, then we + // re-connect and re-send once in a while (e.g. a local long-running instance). + if hub != nil && time.Since(connectTime) > 30*time.Hour { + log.Logf(0, "re-syncing with hub") + hub.Close() + hub = nil + } } } @@ -153,9 +169,7 @@ func (hc *HubConnector) connect(corpus []*corpus.Item) (*rpctype.RPCClient, erro for call := range hc.enabledCalls { a.Calls = append(a.Calls, call.Name) } - hubCorpus := make(map[string]bool) for _, inp := range corpus { - hubCorpus[inp.Sig] = true a.Corpus = append(a.Corpus, inp.Prog.Serialize()) } // Never send more than this, this is never healthy but happens episodically @@ -176,12 +190,11 @@ func (hc *HubConnector) connect(corpus []*corpus.Item) (*rpctype.RPCClient, erro if err != nil { return nil, err } - hc.hubCorpus = hubCorpus hc.fresh = false return hub, nil } -func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus []*corpus.Item) error { +func (hc *HubConnector) sync(hub *rpctype.RPCClient) error { key, err := hc.keyGet() if err != nil { return err @@ -191,22 +204,6 @@ func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus []*corpus.Item) erro Key: key, Manager: hc.cfg.Name, } - sigs := make(map[string]bool) - for _, inp := range corpus { - sigs[inp.Sig] = true - if hc.hubCorpus[inp.Sig] { - continue - } - hc.hubCorpus[inp.Sig] = true - a.Add = append(a.Add, inp.Prog.Serialize()) - } - for sig := range hc.hubCorpus { - if sigs[sig] { - continue - } - delete(hc.hubCorpus, sig) - a.Del = append(a.Del, sig) - } if hc.needMoreRepros != nil { a.NeedRepros = hc.needMoreRepros() } diff --git a/syz-manager/manager.go b/syz-manager/manager.go index c090d1a0d..71168fb82 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -974,14 +974,19 @@ func (mgr *Manager) corpusInputHandler(updates <-chan corpus.NewItemEvent) { } } -func (mgr *Manager) getMinimizedCorpus() (corpus []*corpus.Item, repros [][]byte) { +func (mgr *Manager) getMinimizedCorpus() []*corpus.Item { mgr.mu.Lock() defer mgr.mu.Unlock() mgr.minimizeCorpusLocked() - corpus = mgr.corpus.Items() - repros = mgr.newRepros + return mgr.corpus.Items() +} + +func (mgr *Manager) getNewRepros() [][]byte { + mgr.mu.Lock() + defer mgr.mu.Unlock() + repros := mgr.newRepros mgr.newRepros = nil - return + return repros } func (mgr *Manager) addNewCandidates(candidates []fuzzer.Candidate) { -- cgit mrf-deployment