From 6d29602b3cc4e8e586d706e78221b8019a7c2ae3 Mon Sep 17 00:00:00 2001 From: Cameron Finucane Date: Fri, 9 Aug 2024 15:44:35 -0700 Subject: vm/starnix: avoid hang on vm shutdown - Extraneous references to the `ffx log` pipe are closed, allowing the EOF from the subprocess to be propagated properly. - The SSH bridge into the Starnix sshd is now properly shut down when the instance is closed, avoiding a zombie process. - Some of the starnix code has been updated to be in line with a refactor that took place in other VM types. --- vm/starnix/starnix.go | 51 ++++++++++++++++----------------------------------- 1 file changed, 16 insertions(+), 35 deletions(-) diff --git a/vm/starnix/starnix.go b/vm/starnix/starnix.go index 87d180aff..88c0cb3b9 100644 --- a/vm/starnix/starnix.go +++ b/vm/starnix/starnix.go @@ -58,10 +58,11 @@ type instance struct { rpipe io.ReadCloser wpipe io.WriteCloser fuchsiaLogs *exec.Cmd + sshBridge *exec.Cmd sshPubKey string sshPrivKey string merger *vmimpl.OutputMerger - diagnose chan bool + timeouts targets.Timeouts } const targetDir = "/tmp" @@ -210,8 +211,9 @@ func (inst *instance) Close() error { inst.fuchsiaLogs.Process.Kill() inst.fuchsiaLogs.Wait() } - if inst.merger != nil { - inst.merger.Wait() + if inst.sshBridge != nil { + inst.sshBridge.Process.Kill() + inst.sshBridge.Wait() } if inst.rpipe != nil { inst.rpipe.Close() @@ -219,6 +221,9 @@ func (inst *instance) Close() error { if inst.wpipe != nil { inst.wpipe.Close() } + if inst.merger != nil { + inst.merger.Wait() + } return nil } @@ -323,6 +328,9 @@ func (inst *instance) connect() error { if err = cmd.Start(); err != nil { return err } + + inst.sshBridge = cmd + time.Sleep(5 * time.Second) if inst.debug { log.Logf(0, "instance %s: forwarded port from starnix container", inst.name) @@ -421,38 +429,11 @@ func (inst *instance) Run(timeout time.Duration, stop <-chan bool, command strin return nil, nil, err } wpipe.Close() - errc := make(chan error, 1) - signal := func(err error) { - select { - case errc <- err: - default: - } - } - - go func() { - retry: - select { - case <-time.After(timeout): - signal(vmimpl.ErrTimeout) - case <-stop: - signal(vmimpl.ErrTimeout) - case <-inst.diagnose: - cmd.Process.Kill() - goto retry - case err := <-inst.merger.Err: - cmd.Process.Kill() - if cmdErr := cmd.Wait(); cmdErr == nil { - // If the command exited successfully, we got EOF error from merger. - // But in this case no error has happened and the EOF is expected. - err = nil - } - signal(err) - return - } - cmd.Process.Kill() - cmd.Wait() - }() - return inst.merger.Output, errc, nil + return vmimpl.Multiplex(cmd, inst.merger, timeout, vmimpl.MultiplexConfig{ + Stop: stop, + Debug: inst.debug, + Scale: inst.timeouts.Scale, + }) } func (inst *instance) Info() ([]byte, error) { -- cgit mrf-deployment