From 4aecaec7b552451908682c3d8917025f6bb27fe8 Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Wed, 27 Mar 2024 12:16:12 +0100 Subject: syz-manager: resend inputs from non-crashed VMs If a VM was e.g. intentionally restarted, there's no need to discard all pending requests. Remember them and then redistribute to other VMs. --- syz-manager/manager.go | 2 +- syz-manager/rpc.go | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/syz-manager/manager.go b/syz-manager/manager.go index 4a9392c20..5ebdd3df3 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -757,7 +757,7 @@ func (mgr *Manager) runInstance(index int) (*Crash, error) { rep, vmInfo, err := mgr.runInstanceInner(index, instanceName) - machineInfo := mgr.serv.shutdownInstance(instanceName) + machineInfo := mgr.serv.shutdownInstance(instanceName, rep != nil) if len(vmInfo) != 0 { machineInfo = append(append(vmInfo, '\n'), machineInfo...) } diff --git a/syz-manager/rpc.go b/syz-manager/rpc.go index ed4d3fc91..fa11dc558 100644 --- a/syz-manager/rpc.go +++ b/syz-manager/rpc.go @@ -35,6 +35,10 @@ type RPCServer struct { checkResult *rpctype.CheckArgs checkFailures int + + // We did not finish these requests because of VM restarts. + // They will be eventually given to other VMs. + rescuedInputs []*fuzzer.Request } type Runner struct { @@ -188,6 +192,18 @@ func (serv *RPCServer) ExchangeInfo(a *rpctype.ExchangeInfoRequest, r *rpctype.E panic("exchange info call with nil fuzzer") } + // Try to collect some of the postponed requests. + if serv.mu.TryLock() { + for i := len(serv.rescuedInputs) - 1; i >= 0 && a.NeedProgs > 0; i-- { + inp := serv.rescuedInputs[i] + serv.rescuedInputs[i] = nil + serv.rescuedInputs = serv.rescuedInputs[:i] + r.Requests = append(r.Requests, runner.newRequest(inp)) + a.NeedProgs-- + } + serv.mu.Unlock() + } + // First query new inputs and only then post results. // It should foster a more even distribution of executions // across all VMs. @@ -237,7 +253,7 @@ func (serv *RPCServer) updateFilteredCover(pcs []uint32) error { return nil } -func (serv *RPCServer) shutdownInstance(name string) []byte { +func (serv *RPCServer) shutdownInstance(name string, crashed bool) []byte { var runner *Runner if val, _ := serv.runners.LoadAndDelete(name); val != nil { runner = val.(*Runner) @@ -254,12 +270,20 @@ func (serv *RPCServer) shutdownInstance(name string) []byte { runner.requests = nil runner.mu.Unlock() - // If the object does not exist, there would be no oldRequests either. - fuzzerObj := serv.mgr.getFuzzer() - for _, req := range oldRequests { + if crashed { // The VM likely crashed, so let's tell pkg/fuzzer to abort the affected jobs. - // TODO: distinguish between real VM crashes and regular VM restarts? - fuzzerObj.Done(req, &fuzzer.Result{Stop: true}) + // fuzzerObj may be null, but in that case oldRequests would be empty as well. + fuzzerObj := serv.mgr.getFuzzer() + for _, req := range oldRequests { + fuzzerObj.Done(req, &fuzzer.Result{Stop: true}) + } + } else { + // We will resend these inputs to another VM. + serv.mu.Lock() + for _, req := range oldRequests { + serv.rescuedInputs = append(serv.rescuedInputs, req) + } + serv.mu.Unlock() } return runner.machineInfo } -- cgit mrf-deployment