aboutsummaryrefslogtreecommitdiffstats
path: root/pkg
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2025-12-29 15:13:05 +0100
committerDmitry Vyukov <dvyukov@google.com>2025-12-29 14:43:40 +0000
commitac3c71e7063b1fc3b1ede9f76fd3c3b4ce072219 (patch)
treed5e34cec7dce63980f38a2207b3ca8778388fc50 /pkg
parent9fce9e44cb18ac5d0a14e0ca8968b630324ff110 (diff)
pkg/rpcserver: fix race in local server shutdown
Currently we stop both executor binary and the RPC server concurrently due to use of errgroup.WithContext. As the result, executor may SYZFAIL on the closed network connection before it's killed. This race leads to very high percent (25%) of failed repro attempts for my local syz-manager runs. When we run syz-execprog with Repeat=false, the race triggers frequently. May have something to do with heavily instrumented kernel where some operations may take longer (e.g. killing syz-executor and stopping all of its threads). This should also fix #6091
Diffstat (limited to 'pkg')
-rw-r--r--pkg/rpcserver/local.go31
1 files changed, 21 insertions, 10 deletions
diff --git a/pkg/rpcserver/local.go b/pkg/rpcserver/local.go
index eca5a6eca..c406cb9ed 100644
--- a/pkg/rpcserver/local.go
+++ b/pkg/rpcserver/local.go
@@ -17,7 +17,6 @@ import (
"github.com/google/syzkaller/pkg/signal"
"github.com/google/syzkaller/pkg/vminfo"
"github.com/google/syzkaller/prog"
- "golang.org/x/sync/errgroup"
)
type LocalConfig struct {
@@ -43,15 +42,27 @@ func RunLocal(ctx context.Context, cfg *LocalConfig) error {
return err
}
defer localCtx.serv.Close()
- // groupCtx will be cancelled once any goroutine returns an error.
- eg, groupCtx := errgroup.WithContext(ctx)
- eg.Go(func() error {
- return localCtx.RunInstance(groupCtx, 0)
- })
- eg.Go(func() error {
- return localCtx.serv.Serve(groupCtx)
- })
- return eg.Wait()
+
+ // Note: we must not stop the RPC server before we finish RunInstance.
+ // Otherwise, RPC server will close the connection, and executor may SYZFAIL
+ // on the closed network connection.
+ // We first need to wait for the executor binary to finish, and only then stop the RPC server.
+ // However, we want to stop both if the other one errors out.
+ instCtx, instCancel := context.WithCancel(ctx)
+ defer instCancel()
+ servCtx, servCancel := context.WithCancel(context.Background())
+ defer servCancel()
+ servErr := make(chan error, 1)
+ go func() {
+ servErr <- localCtx.serv.Serve(servCtx)
+ instCancel()
+ }()
+ instErr := localCtx.RunInstance(instCtx, 0)
+ servCancel()
+ if err := <-servErr; err != nil {
+ return err
+ }
+ return instErr
}
func setupLocal(ctx context.Context, cfg *LocalConfig) (*local, context.Context, error) {