From 2a91a78df9ed766fac414f94e9d3cc5fa71add55 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 12 Sep 2024 15:53:24 +0200
Subject: syz-manager: send new inputs to the hub only once

We used to send corpus updates (added/removed elements) to the hub in each sync.
But that produced too much churn since hub algorithm is O(N^2) (distributing everything
to everybody), and lots of new inputs are later removed (either we can't reproduce coverage
after restart, or inputs removed during corpus minimization). So now we don't send new inputs
in each sync, instead we aim at sending corpus once after initial triage. This solves
the problem with non-reproducible/removed inputs. Typical instance life-time on syzbot is <24h,
for such instances we send the corpus once. If an instance somehow lives for longer, then we
re-connect and re-send once in a while (e.g. a local long-running instance).
---
 syz-manager/hub.go     | 55 ++++++++++++++++++++++++--------------------------
 syz-manager/manager.go | 13 ++++++++----
 2 files changed, 35 insertions(+), 33 deletions(-)

(limited to 'syz-manager')

diff --git a/syz-manager/hub.go b/syz-manager/hub.go
index 6acedf37e..9db988484 100644
--- a/syz-manager/hub.go
+++ b/syz-manager/hub.go
@@ -74,7 +74,6 @@ type HubConnector struct {
 	enabledCalls   map[*prog.Syscall]bool
 	leak           bool
 	fresh          bool
-	hubCorpus      map[string]bool
 	newRepros      [][]byte
 	hubReproQueue  chan *manager.Crash
 	needMoreRepros func() bool
@@ -91,7 +90,8 @@ type HubConnector struct {
 
 // HubManagerView restricts interface between HubConnector and Manager.
 type HubManagerView interface {
-	getMinimizedCorpus() (corpus []*corpus.Item, repros [][]byte)
+	getMinimizedCorpus() []*corpus.Item
+	getNewRepros() [][]byte
 	addNewCandidates(candidates []fuzzer.Candidate)
 	needMoreCandidates() bool
 	hubIsUnreachable()
@@ -100,25 +100,28 @@ type HubManagerView interface {
 func (hc *HubConnector) loop() {
 	var hub *rpctype.RPCClient
 	var doneOnce bool
+	var connectTime time.Time
 	for query := 0; ; time.Sleep(10 * time.Minute) {
-		corpus, repros := hc.mgr.getMinimizedCorpus()
-		if !hc.cfg.Cover {
+		if hub == nil {
+			var corpus []*corpus.Item
 			// If we are using fake coverage, don't send our corpus to the hub.
 			// It should be lower quality than coverage-guided corpus.
 			// However still send repros and accept new inputs.
-			corpus = nil
-		}
-		hc.newRepros = append(hc.newRepros, repros...)
-		if hub == nil {
+			if hc.cfg.Cover {
+				corpus = hc.mgr.getMinimizedCorpus()
+			}
 			var err error
 			if hub, err = hc.connect(corpus); err != nil {
 				log.Logf(0, "failed to connect to hub at %v: %v", hc.cfg.HubAddr, err)
 			} else {
 				log.Logf(0, "connected to hub at %v, corpus %v", hc.cfg.HubAddr, len(corpus))
+				connectTime = time.Now()
 			}
 		}
 		if hub != nil && hc.mgr.needMoreCandidates() {
-			if err := hc.sync(hub, corpus); err != nil {
+			repros := hc.mgr.getNewRepros()
+			hc.newRepros = append(hc.newRepros, repros...)
+			if err := hc.sync(hub); err != nil {
 				log.Logf(0, "hub sync failed: %v", err)
 				hub.Close()
 				hub = nil
@@ -131,6 +134,19 @@ func (hc *HubConnector) loop() {
 		if hub == nil && query >= maxAttempts && !doneOnce {
 			hc.mgr.hubIsUnreachable()
 		}
+		// We used to send corpus updates (added/removed elements) to the hub in each sync.
+		// But that produced too much churn since hub algorithm is O(N^2) (distributing everything
+		// to everybody), and lots of new inputs are later removed (either we can't reproduce coverage
+		// after restart, or inputs removed during corpus minimization). So now we don't send new inputs
+		// in each sync, instead we aim at sending corpus once after initial triage. This solves
+		// the problem with non-reproducible/removed inputs. Typical instance life-time on syzbot is <24h,
+		// for such instances we send the corpus once. If an instance somehow lives for longer, then we
+		// re-connect and re-send once in a while (e.g. a local long-running instance).
+		if hub != nil && time.Since(connectTime) > 30*time.Hour {
+			log.Logf(0, "re-syncing with hub")
+			hub.Close()
+			hub = nil
+		}
 	}
 }
 
@@ -153,9 +169,7 @@ func (hc *HubConnector) connect(corpus []*corpus.Item) (*rpctype.RPCClient, erro
 	for call := range hc.enabledCalls {
 		a.Calls = append(a.Calls, call.Name)
 	}
-	hubCorpus := make(map[string]bool)
 	for _, inp := range corpus {
-		hubCorpus[inp.Sig] = true
 		a.Corpus = append(a.Corpus, inp.Prog.Serialize())
 	}
 	// Never send more than this, this is never healthy but happens episodically
@@ -176,12 +190,11 @@ func (hc *HubConnector) connect(corpus []*corpus.Item) (*rpctype.RPCClient, erro
 	if err != nil {
 		return nil, err
 	}
-	hc.hubCorpus = hubCorpus
 	hc.fresh = false
 	return hub, nil
 }
 
-func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus []*corpus.Item) error {
+func (hc *HubConnector) sync(hub *rpctype.RPCClient) error {
 	key, err := hc.keyGet()
 	if err != nil {
 		return err
@@ -191,22 +204,6 @@ func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus []*corpus.Item) erro
 		Key:     key,
 		Manager: hc.cfg.Name,
 	}
-	sigs := make(map[string]bool)
-	for _, inp := range corpus {
-		sigs[inp.Sig] = true
-		if hc.hubCorpus[inp.Sig] {
-			continue
-		}
-		hc.hubCorpus[inp.Sig] = true
-		a.Add = append(a.Add, inp.Prog.Serialize())
-	}
-	for sig := range hc.hubCorpus {
-		if sigs[sig] {
-			continue
-		}
-		delete(hc.hubCorpus, sig)
-		a.Del = append(a.Del, sig)
-	}
 	if hc.needMoreRepros != nil {
 		a.NeedRepros = hc.needMoreRepros()
 	}
diff --git a/syz-manager/manager.go b/syz-manager/manager.go
index c090d1a0d..71168fb82 100644
--- a/syz-manager/manager.go
+++ b/syz-manager/manager.go
@@ -974,14 +974,19 @@ func (mgr *Manager) corpusInputHandler(updates <-chan corpus.NewItemEvent) {
 	}
 }
 
-func (mgr *Manager) getMinimizedCorpus() (corpus []*corpus.Item, repros [][]byte) {
+func (mgr *Manager) getMinimizedCorpus() []*corpus.Item {
 	mgr.mu.Lock()
 	defer mgr.mu.Unlock()
 	mgr.minimizeCorpusLocked()
-	corpus = mgr.corpus.Items()
-	repros = mgr.newRepros
+	return mgr.corpus.Items()
+}
+
+func (mgr *Manager) getNewRepros() [][]byte {
+	mgr.mu.Lock()
+	defer mgr.mu.Unlock()
+	repros := mgr.newRepros
 	mgr.newRepros = nil
-	return
+	return repros
 }
 
 func (mgr *Manager) addNewCandidates(candidates []fuzzer.Candidate) {
-- 
cgit mrf-deployment