// Copyright 2015 syzkaller project authors. All rights reserved. // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. // Package vm provides an abstract test machine (VM, physical machine, etc) // interface for the rest of the system. // For convenience test machines are subsequently collectively called VMs. // Package wraps vmimpl package interface with some common functionality // and higher-level interface. package vm import ( "bytes" "context" "errors" "fmt" "io" "os" "path/filepath" "strings" "sync/atomic" "time" "github.com/google/syzkaller/pkg/log" "github.com/google/syzkaller/pkg/mgrconfig" "github.com/google/syzkaller/pkg/osutil" "github.com/google/syzkaller/pkg/report" "github.com/google/syzkaller/pkg/report/crash" "github.com/google/syzkaller/pkg/stat" "github.com/google/syzkaller/sys/targets" "github.com/google/syzkaller/vm/dispatcher" "github.com/google/syzkaller/vm/vmimpl" // Import all VM implementations, so that users only need to import vm. _ "github.com/google/syzkaller/vm/adb" _ "github.com/google/syzkaller/vm/bhyve" _ "github.com/google/syzkaller/vm/cuttlefish" _ "github.com/google/syzkaller/vm/gce" _ "github.com/google/syzkaller/vm/gvisor" _ "github.com/google/syzkaller/vm/isolated" _ "github.com/google/syzkaller/vm/proxyapp" _ "github.com/google/syzkaller/vm/qemu" _ "github.com/google/syzkaller/vm/starnix" _ "github.com/google/syzkaller/vm/virtualbox" _ "github.com/google/syzkaller/vm/vmm" _ "github.com/google/syzkaller/vm/vmware" ) type Pool struct { impl vmimpl.Pool typ vmimpl.Type workdir string template string timeouts targets.Timeouts count int activeCount int32 snapshot bool hostFuzzer bool statOutputReceived *stat.Val } type Instance struct { pool *Pool impl vmimpl.Instance workdir string index int snapshotSetup bool onClose func() } var ( Shutdown = vmimpl.Shutdown ErrTimeout = vmimpl.ErrTimeout _ BootErrorer = vmimpl.BootError{} _ InfraErrorer = vmimpl.InfraError{} ) func ShutdownCtx() context.Context { ctx, done := context.WithCancel(context.Background()) go func() { <-Shutdown done() }() return ctx } type BootErrorer interface { BootError() (string, []byte) } type InfraErrorer interface { InfraError() (string, []byte) } // vmType splits the VM type from any suffix (separated by ":"). This is mostly // useful for the "proxyapp" type, where pkg/build needs to specify/handle // sub-types. func vmType(fullName string) string { name, _, _ := strings.Cut(fullName, ":") return name } // AllowsOvercommit returns if the instance type allows overcommit of instances // (i.e. creation of instances out-of-thin-air). Overcommit is used during image // and patch testing in syz-ci when it just asks for more than specified in config // instances. Generally virtual machines (qemu, gce) support overcommit, // while physical machines (adb, isolated) do not. Strictly speaking, we should // never use overcommit and use only what's specified in config, because we // override resource limits specified in config (e.g. can OOM). But it works and // makes lots of things much simpler. func AllowsOvercommit(typ string) bool { return vmimpl.Types[vmType(typ)].Overcommit } // Create creates a VM pool that can be used to create individual VMs. func Create(cfg *mgrconfig.Config, debug bool) (*Pool, error) { typ, ok := vmimpl.Types[vmType(cfg.Type)] if !ok { return nil, fmt.Errorf("unknown instance type '%v'", cfg.Type) } env := &vmimpl.Env{ Name: cfg.Name, OS: cfg.TargetOS, Arch: cfg.TargetVMArch, Workdir: cfg.Workdir, Image: cfg.Image, SSHKey: cfg.SSHKey, SSHUser: cfg.SSHUser, Timeouts: cfg.Timeouts, Snapshot: cfg.Snapshot, Debug: debug, Config: cfg.VM, KernelSrc: cfg.KernelSrc, } impl, err := typ.Ctor(env) if err != nil { return nil, err } count := impl.Count() if debug && count > 1 { log.Logf(0, "limiting number of VMs from %v to 1 in debug mode", count) count = 1 } return &Pool{ impl: impl, typ: typ, workdir: env.Workdir, template: cfg.WorkdirTemplate, timeouts: cfg.Timeouts, count: count, snapshot: cfg.Snapshot, hostFuzzer: cfg.SysTarget.HostFuzzer, statOutputReceived: stat.New("vm output", "Bytes of VM console output received", stat.Graph("traffic"), stat.Rate{}, stat.FormatMB), }, nil } func (pool *Pool) Count() int { return pool.count } func (pool *Pool) Create(ctx context.Context, index int) (*Instance, error) { if index < 0 || index >= pool.count { return nil, fmt.Errorf("invalid VM index %v (count %v)", index, pool.count) } workdir, err := osutil.ProcessTempDir(pool.workdir) if err != nil { return nil, fmt.Errorf("failed to create instance temp dir: %w", err) } if pool.template != "" { if err := osutil.CopyDirRecursively(pool.template, filepath.Join(workdir, "template")); err != nil { return nil, err } } impl, err := pool.impl.Create(ctx, workdir, index) if err != nil { os.RemoveAll(workdir) return nil, err } atomic.AddInt32(&pool.activeCount, 1) return &Instance{ pool: pool, impl: impl, workdir: workdir, index: index, onClose: func() { atomic.AddInt32(&pool.activeCount, -1) }, }, nil } // TODO: Integration or end-to-end testing is needed. // // https://github.com/google/syzkaller/pull/3269#discussion_r967650801 func (pool *Pool) Close() error { if pool.activeCount != 0 { panic("all the instances should be closed before pool.Close()") } if closer, ok := pool.impl.(io.Closer); ok { return closer.Close() } return nil } // SetupSnapshot must be called once before calling RunSnapshot. // Input is copied into the VM in an implementation defined way and is interpreted by executor. func (inst *Instance) SetupSnapshot(input []byte) error { impl, ok := inst.impl.(snapshotter) if !ok { return errors.New("this VM type does not support snapshot mode") } if inst.snapshotSetup { return fmt.Errorf("SetupSnapshot called twice") } inst.snapshotSetup = true return impl.SetupSnapshot(input) } // RunSnapshot runs one input in snapshotting mode. // Input is copied into the VM in an implementation defined way and is interpreted by executor. // Result is the result provided by the executor. // Output is the kernel console output during execution of the input. func (inst *Instance) RunSnapshot(input []byte) (result, output []byte, err error) { impl, ok := inst.impl.(snapshotter) if !ok { return nil, nil, errors.New("this VM type does not support snapshot mode") } if !inst.snapshotSetup { return nil, nil, fmt.Errorf("RunSnapshot without SetupSnapshot") } // Executor has own timeout logic, so use a slightly larger timeout here. timeout := inst.pool.timeouts.Program / 5 * 7 return impl.RunSnapshot(timeout, input) } type snapshotter interface { SetupSnapshot([]byte) error RunSnapshot(time.Duration, []byte) ([]byte, []byte, error) } func (inst *Instance) Copy(hostSrc string) (string, error) { return inst.impl.Copy(hostSrc) } func (inst *Instance) Forward(port int) (string, error) { return inst.impl.Forward(port) } type ExitCondition int const ( // The program is allowed to exit after timeout. ExitTimeout = ExitCondition(1 << iota) // The program is allowed to exit with no errors. ExitNormal // The program is allowed to exit with errors. ExitError ) type RunOptions struct { // exitCondition says which exit modes should be considered as errors/OK exitCondition ExitCondition // BeforeContext is how many bytes BEFORE the crash description to keep in the report. beforeContext int // afterContext is how many bytes AFTER the crash description to keep in the report. afterContext int // An early notification that the command has finished / VM crashed. earlyFinishCb func() injectExecuting <-chan bool tickerPeriod time.Duration } func WithExitCondition(exitCondition ExitCondition) func(*RunOptions) { return func(opts *RunOptions) { opts.exitCondition = exitCondition } } func WithBeforeContext(beforeContext int) func(*RunOptions) { return func(opts *RunOptions) { opts.beforeContext = beforeContext } } func WithInjectExecuting(injectExecuting <-chan bool) func(*RunOptions) { return func(opts *RunOptions) { opts.injectExecuting = injectExecuting } } func WithEarlyFinishCb(cb func()) func(*RunOptions) { return func(opts *RunOptions) { opts.earlyFinishCb = cb } } // Run runs cmd inside of the VM (think of ssh cmd) and monitors command execution // and the kernel console output. It detects kernel oopses in output, lost connections, hangs, etc. // Returns command+kernel output and a non-symbolized crash report (nil if no error happens). func (inst *Instance) Run(ctx context.Context, reporter *report.Reporter, command string, opts ...func(*RunOptions)) ( []byte, []*report.Report, error) { runOptions := &RunOptions{ beforeContext: 128 << 10, afterContext: 128 << 10, tickerPeriod: 10 * time.Second, } for _, opt := range opts { opt(runOptions) } outc, errc, err := inst.impl.Run(ctx, command) if err != nil { return nil, nil, err } mon := &monitor{ RunOptions: runOptions, inst: inst, outc: outc, errc: errc, reporter: reporter, lastExecuteTime: time.Now(), } reps := mon.monitorExecution() return mon.output, reps, nil } func (inst *Instance) Info() ([]byte, error) { if ii, ok := inst.impl.(vmimpl.Infoer); ok { return ii.Info() } return nil, nil } func (inst *Instance) diagnose(reps []*report.Report) ([]byte, bool) { if len(reps) == 0 { panic("reps is empty") } return inst.impl.Diagnose(reps[0]) } func (inst *Instance) Index() int { return inst.index } func (inst *Instance) Close() error { err := inst.impl.Close() if retErr := os.RemoveAll(inst.workdir); err == nil { err = retErr } inst.onClose() return err } type Dispatcher = dispatcher.Pool[*Instance] func NewDispatcher(pool *Pool, def dispatcher.Runner[*Instance]) *Dispatcher { return dispatcher.NewPool(pool.count, pool.Create, def) } type monitor struct { *RunOptions inst *Instance outc <-chan []byte errc <-chan error reporter *report.Reporter // output is at most mon.beforeContext + len(report) + afterContext bytes. output []byte // curPos in the output to scan for the matches. curPos int lastExecuteTime time.Time // extractCalled is used to prevent multiple extractError calls. extractCalled bool } func (mon *monitor) monitorExecution() []*report.Report { ticker := time.NewTicker(mon.tickerPeriod * mon.inst.pool.timeouts.Scale) defer ticker.Stop() defer func() { if mon.earlyFinishCb != nil { mon.earlyFinishCb() } }() for { select { case err := <-mon.errc: switch err { case nil: // The program has exited without errors, // but wait for kernel output in case there is some delayed oops. crash := "" if mon.exitCondition&ExitNormal == 0 { crash = lostConnectionCrash } return mon.extractErrors(crash) case ErrTimeout: if mon.exitCondition&ExitTimeout == 0 { return mon.extractErrors(timeoutCrash) } return nil default: // Note: connection lost can race with a kernel oops message. // In such case we want to return the kernel oops. crash := "" if mon.exitCondition&ExitError == 0 { crash = lostConnectionCrash } return mon.extractErrors(crash) } case out, ok := <-mon.outc: if !ok { mon.outc = nil continue } mon.inst.pool.statOutputReceived.Add(len(out)) if rep, done := mon.appendOutput(out); done { return rep } case <-mon.injectExecuting: mon.lastExecuteTime = time.Now() case <-ticker.C: // Detect both "no output whatsoever" and "kernel episodically prints // something to console, but fuzzer is not actually executing programs". if time.Since(mon.lastExecuteTime) > mon.inst.pool.timeouts.NoOutput { return mon.extractErrors(noOutputCrash) } case <-Shutdown: return nil } } } func (mon *monitor) appendOutput(out []byte) ([]*report.Report, bool) { lastPos := len(mon.output) mon.output = append(mon.output, out...) if bytes.Contains(mon.output[lastPos:], []byte(executedProgramsStart)) { mon.lastExecuteTime = time.Now() } if mon.reporter.ContainsCrash(mon.output[mon.curPos:]) { return mon.extractErrors("unknown error"), true } if len(mon.output) > 2*mon.beforeContext { copy(mon.output, mon.output[len(mon.output)-mon.beforeContext:]) mon.output = mon.output[:mon.beforeContext] } // Find the starting position for crash matching on the next iteration. // We step back from the end of output by maxErrorLength to handle the case // when a crash line is currently split/incomplete. And then we try to find // the preceding '\n' to have a full line. This is required to handle // the case when a particular pattern is ignored as crash, but a suffix // of the pattern is detected as crash (e.g. "ODEBUG:" is trimmed to "BUG:"). mon.curPos = len(mon.output) - maxErrorLength for i := 0; i < maxErrorLength; i++ { if mon.curPos <= 0 || mon.output[mon.curPos-1] == '\n' { break } mon.curPos-- } mon.curPos = max(mon.curPos, 0) return nil, false } func (mon *monitor) extractErrors(defaultError string) []*report.Report { if mon.extractCalled { panic("extractError called twice") } mon.extractCalled = true if mon.earlyFinishCb != nil { mon.earlyFinishCb() mon.earlyFinishCb = nil } diagOutput, diagWait := []byte{}, false if defaultError != "" { diagOutput, diagWait = mon.inst.diagnose(mon.createReports(defaultError)) } // Give it some time to finish writing the error message. // But don't wait for "no output", we already waited enough. if defaultError != noOutputCrash || diagWait { mon.waitForOutput() } // Check the executorPreemptedStr only for preemptible instances since executor can print // the string spuriously in some cases (gets SIGTERM from test program somehow). if mon.inst.pool.typ.Preemptible && bytes.Contains(mon.output, []byte(executorPreemptedStr)) { return nil } if defaultError == "" && mon.reporter.ContainsCrash(mon.output[mon.curPos:]) { // We did not call Diagnose above because we thought there is no error, so call it now. diagOutput, diagWait = mon.inst.diagnose(mon.createReports(defaultError)) if diagWait { mon.waitForOutput() } } reps := mon.createReports(defaultError) if len(reps) == 0 { return nil } if len(diagOutput) > 0 { reps[0].Output = append(reps[0].Output, vmDiagnosisStart...) reps[0].Output = append(reps[0].Output, diagOutput...) } return reps } func (mon *monitor) createReports(defaultError string) []*report.Report { curPos := mon.curPos var res []*report.Report for { rep := mon.reporter.ParseFrom(mon.output, curPos) if rep == nil { if defaultError == "" || len(res) > 0 { return res } typ := crash.UnknownType if defaultError == lostConnectionCrash { typ = crash.LostConnection } return []*report.Report{{ Title: defaultError, Output: mon.output, Suppressed: report.IsSuppressed(mon.reporter, mon.output), Type: typ, }} } curPos = rep.SkipPos start := max(rep.StartPos-mon.beforeContext, 0) end := min(rep.EndPos+mon.afterContext, len(rep.Output)) rep.Output = rep.Output[start:end] rep.StartPos -= start rep.EndPos -= start if len(res) == 0 || (len(res) > 0 && !rep.Corrupted && !rep.Suppressed) { res = append(res, rep) } } } func (mon *monitor) waitForOutput() { timer := time.NewTimer(vmimpl.WaitForOutputTimeout * mon.inst.pool.timeouts.Scale) defer timer.Stop() for { select { case out, ok := <-mon.outc: if !ok { return } mon.output = append(mon.output, out...) case <-timer.C: return case <-Shutdown: return } } } const ( maxErrorLength = 256 lostConnectionCrash = "lost connection to test machine" noOutputCrash = "no output from test machine" timeoutCrash = "timed out" executorPreemptedStr = "SYZ-EXECUTOR: PREEMPTED" vmDiagnosisStart = "\nVM DIAGNOSIS:\n" executedProgramsStart = "executed programs:" // syz-execprog output )