diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2025-12-29 15:13:05 +0100 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2025-12-29 14:43:40 +0000 |
| commit | ac3c71e7063b1fc3b1ede9f76fd3c3b4ce072219 (patch) | |
| tree | d5e34cec7dce63980f38a2207b3ca8778388fc50 /pkg | |
| parent | 9fce9e44cb18ac5d0a14e0ca8968b630324ff110 (diff) | |
pkg/rpcserver: fix race in local server shutdown
Currently we stop both executor binary and the RPC server concurrently
due to use of errgroup.WithContext. As the result, executor may SYZFAIL
on the closed network connection before it's killed.
This race leads to very high percent (25%) of failed repro attempts
for my local syz-manager runs. When we run syz-execprog with Repeat=false,
the race triggers frequently. May have something to do with heavily
instrumented kernel where some operations may take longer (e.g. killing
syz-executor and stopping all of its threads).
This should also fix #6091
Diffstat (limited to 'pkg')
| -rw-r--r-- | pkg/rpcserver/local.go | 31 |
1 files changed, 21 insertions, 10 deletions
diff --git a/pkg/rpcserver/local.go b/pkg/rpcserver/local.go index eca5a6eca..c406cb9ed 100644 --- a/pkg/rpcserver/local.go +++ b/pkg/rpcserver/local.go @@ -17,7 +17,6 @@ import ( "github.com/google/syzkaller/pkg/signal" "github.com/google/syzkaller/pkg/vminfo" "github.com/google/syzkaller/prog" - "golang.org/x/sync/errgroup" ) type LocalConfig struct { @@ -43,15 +42,27 @@ func RunLocal(ctx context.Context, cfg *LocalConfig) error { return err } defer localCtx.serv.Close() - // groupCtx will be cancelled once any goroutine returns an error. - eg, groupCtx := errgroup.WithContext(ctx) - eg.Go(func() error { - return localCtx.RunInstance(groupCtx, 0) - }) - eg.Go(func() error { - return localCtx.serv.Serve(groupCtx) - }) - return eg.Wait() + + // Note: we must not stop the RPC server before we finish RunInstance. + // Otherwise, RPC server will close the connection, and executor may SYZFAIL + // on the closed network connection. + // We first need to wait for the executor binary to finish, and only then stop the RPC server. + // However, we want to stop both if the other one errors out. + instCtx, instCancel := context.WithCancel(ctx) + defer instCancel() + servCtx, servCancel := context.WithCancel(context.Background()) + defer servCancel() + servErr := make(chan error, 1) + go func() { + servErr <- localCtx.serv.Serve(servCtx) + instCancel() + }() + instErr := localCtx.RunInstance(instCtx, 0) + servCancel() + if err := <-servErr; err != nil { + return err + } + return instErr } func setupLocal(ctx context.Context, cfg *LocalConfig) (*local, context.Context, error) { |
