From a5625da37fd55922bfad3161db3f041a54140fac Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Fri, 28 Apr 2023 16:44:51 +0200 Subject: syz-manager: jump to phaseTriagedHub after a timeout At times, syz-hub gets broken and no syz-manager instance can connect to it for quite a while. This basically prevents corpus rotations and reproducer generation from happening. If syz-hub is still unreachable after 3 connection attempts, give up and jump to phaseTriagedHub unconditionally. --- syz-manager/hub.go | 25 ++++++++++++++++++------- syz-manager/manager.go | 14 ++++++++++++++ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/syz-manager/hub.go b/syz-manager/hub.go index f07754802..031af0026 100644 --- a/syz-manager/hub.go +++ b/syz-manager/hub.go @@ -73,25 +73,36 @@ type HubConnector struct { type HubManagerView interface { getMinimizedCorpus() (corpus, repros [][]byte) addNewCandidates(candidates []rpctype.Candidate) + hubIsUnreachable() } func (hc *HubConnector) loop() { var hub *rpctype.RPCClient - for ; ; time.Sleep(10 * time.Minute) { + var doneOnce bool + for query := 0; ; time.Sleep(10 * time.Minute) { corpus, repros := hc.mgr.getMinimizedCorpus() hc.newRepros = append(hc.newRepros, repros...) if hub == nil { var err error if hub, err = hc.connect(corpus); err != nil { log.Logf(0, "failed to connect to hub at %v: %v", hc.cfg.HubAddr, err) - continue + } else { + log.Logf(0, "connected to hub at %v, corpus %v", hc.cfg.HubAddr, len(corpus)) } - log.Logf(0, "connected to hub at %v, corpus %v", hc.cfg.HubAddr, len(corpus)) } - if err := hc.sync(hub, corpus); err != nil { - log.Logf(0, "hub sync failed: %v", err) - hub.Close() - hub = nil + if hub != nil { + if err := hc.sync(hub, corpus); err != nil { + log.Logf(0, "hub sync failed: %v", err) + hub.Close() + hub = nil + } else { + doneOnce = true + } + } + query++ + const maxAttempts = 3 + if hub == nil && query >= maxAttempts && !doneOnce { + hc.mgr.hubIsUnreachable() } } } diff --git a/syz-manager/manager.go b/syz-manager/manager.go index 874d5ced4..97bac4208 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -1394,6 +1394,20 @@ func (mgr *Manager) candidateBatch(size int) []rpctype.Candidate { return res } +func (mgr *Manager) hubIsUnreachable() { + var dash *dashapi.Dashboard + mgr.mu.Lock() + if mgr.phase == phaseTriagedCorpus { + dash = mgr.dash + mgr.phase = phaseTriagedHub + log.Logf(0, "did not manage to connect to syz-hub; moving forward") + } + mgr.mu.Unlock() + if dash != nil { + mgr.dash.LogError(mgr.cfg.Name, "did not manage to connect to syz-hub") + } +} + func (mgr *Manager) rotateCorpus() bool { mgr.mu.Lock() defer mgr.mu.Unlock() -- cgit mrf-deployment