diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2024-07-05 17:20:13 +0200 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2024-07-25 13:12:57 +0000 |
| commit | 4d77b9fe7da3d014943a16cb4b9a4ca3a531521a (patch) | |
| tree | c37fbf8b50205eb8b830595a621ad4b355e32e9a /vm/qemu | |
| parent | 206f31df2861c47b13a8c05a105afa94bcc7106c (diff) | |
all: add qemu snapshotting mode
Diffstat (limited to 'vm/qemu')
| -rw-r--r-- | vm/qemu/qemu.go | 20 | ||||
| -rw-r--r-- | vm/qemu/snapshot_linux.go | 246 | ||||
| -rw-r--r-- | vm/qemu/snapshot_unimpl.go | 33 |
3 files changed, 299 insertions, 0 deletions
diff --git a/vm/qemu/qemu.go b/vm/qemu/qemu.go index d8f0dd36c..858339c23 100644 --- a/vm/qemu/qemu.go +++ b/vm/qemu/qemu.go @@ -111,6 +111,7 @@ type instance struct { qemu *exec.Cmd merger *vmimpl.OutputMerger files map[string]string + *snapshot } type archConfig struct { @@ -371,6 +372,9 @@ func (pool *Pool) ctor(workdir, sshkey, sshuser string, index int) (*instance, e sshkey: sshkey, sshuser: sshuser, } + if pool.env.Snapshot { + inst.snapshot = new(snapshot) + } if st, err := os.Stat(inst.image); err == nil && st.Size() == 0 { // Some kernels may not need an image, however caller may still // want to pass us a fake empty image because the rest of syzkaller @@ -415,6 +419,9 @@ func (inst *instance) Close() error { if inst.mon != nil { inst.mon.Close() } + if inst.snapshot != nil { + inst.snapshotClose() + } return nil } @@ -463,6 +470,12 @@ func (inst *instance) boot() error { } }() + if inst.snapshot != nil { + if err := inst.snapshotHandshake(); err != nil { + return err + } + } + if err := vmimpl.WaitForSSH(inst.debug, 10*time.Minute*inst.timeouts.Scale, "localhost", inst.sshkey, inst.sshuser, inst.os, inst.port, inst.merger.Err, false); err != nil { bootOutputStop <- true @@ -555,6 +568,13 @@ func (inst *instance) buildQemuArgs() ([]string, error) { "-device", "isa-applesmc,osk="+inst.cfg.AppleSmcOsk, ) } + if inst.snapshot != nil { + snapshotArgs, err := inst.snapshotEnable() + if err != nil { + return nil, err + } + args = append(args, snapshotArgs...) + } return args, nil } diff --git a/vm/qemu/snapshot_linux.go b/vm/qemu/snapshot_linux.go new file mode 100644 index 000000000..5a30fa382 --- /dev/null +++ b/vm/qemu/snapshot_linux.go @@ -0,0 +1,246 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package qemu + +import ( + "encoding/binary" + "fmt" + "net" + "path/filepath" + "sync/atomic" + "syscall" + "time" + "unsafe" + + "github.com/google/syzkaller/pkg/flatrpc" + "golang.org/x/sys/unix" +) + +type snapshot struct { + ivsListener *net.UnixListener + ivsConn *net.UnixConn + doorbellFD int + eventFD int + shmemFD int + shmem []byte + input []byte + header *flatrpc.SnapshotHeaderT +} + +func (inst *instance) snapshotClose() { + if inst.ivsListener != nil { + inst.ivsListener.Close() + } + if inst.ivsConn != nil { + inst.ivsConn.Close() + } + if inst.doorbellFD != 0 { + syscall.Close(inst.doorbellFD) + } + if inst.eventFD != 0 { + syscall.Close(inst.eventFD) + } + if inst.shmemFD != 0 { + syscall.Close(inst.shmemFD) + } + if inst.shmem != nil { + syscall.Munmap(inst.shmem) + } +} + +func (inst *instance) snapshotEnable() ([]string, error) { + // We use ivshmem device (Inter-VM Shared Memory) for communication with the VM, + // it allows to have a shared memory region directly accessible by both host and target: + // https://www.qemu.org/docs/master/system/devices/ivshmem.html + // + // The shared memory region is not restored as part of snapshot restore since we set: + // migrate_set_capability x-ignore-shared on + // This allows to write a new input into ivshmem before each restore. + // + // We also use doorbell (interrupt) capability of ivshmem to notify host about + // program execution completion. Doorbell also allows to send interrupts in the other direction + // (from host to target), but we don't need/use this since we arrange things such that + // snapshot restore serves as a signal to execute new input. + // + // Ideally we use a single ivshmem device for both purposes (shmem+doorbell). + // But unfortunately it seems that the doorbell device is always restored on snapshot restore + // (at least I did not find a way to make it not restored, maybe can be solved with qemu change). + // So we use 2 separate devices for these purposes. + shmemFD, err := unix.MemfdCreate("syz-qemu-shmem", 0) + if err != nil { + return nil, fmt.Errorf("qemu: memfd_create failed: %w", err) + } + inst.shmemFD = shmemFD + if err := syscall.Ftruncate(shmemFD, int64(flatrpc.ConstSnapshotShmemSize)); err != nil { + return nil, fmt.Errorf("qemu: ftruncate failed: %w", err) + } + shmem, err := syscall.Mmap(shmemFD, 0, int(flatrpc.ConstSnapshotShmemSize), + syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) + if err != nil { + return nil, fmt.Errorf("qemu: shmem mmap failed: %w", err) + } + inst.shmem = shmem + inst.input = shmem[:flatrpc.ConstMaxInputSize:flatrpc.ConstMaxInputSize] + inst.header = (*flatrpc.SnapshotHeaderT)(unsafe.Pointer(&shmem[flatrpc.ConstMaxInputSize])) + shmemFile := fmt.Sprintf("/proc/%v/fd/%v", syscall.Getpid(), shmemFD) + + doorbellFD, err := unix.MemfdCreate("syz-qemu-doorbell", 0) + if err != nil { + return nil, fmt.Errorf("qemu: memfd_create failed: %w", err) + } + if err := syscall.Ftruncate(doorbellFD, int64(flatrpc.ConstSnapshotDoorbellSize)); err != nil { + return nil, fmt.Errorf("qemu: ftruncate failed: %w", err) + } + inst.doorbellFD = doorbellFD + + eventFD, err := unix.Eventfd(0, unix.EFD_SEMAPHORE) + if err != nil { + return nil, fmt.Errorf("qemu: eventfd failed: %w", err) + } + inst.eventFD = eventFD + + sockPath := filepath.Join(inst.workdir, "ivs.sock") + ln, err := net.ListenUnix("unix", &net.UnixAddr{Name: sockPath, Net: "unix"}) + if err != nil { + return nil, fmt.Errorf("qemu: unix listen on %v failed: %w", sockPath, err) + } + inst.ivsListener = ln + + return []string{ + // migratable=on is required to take snapshots. + // tsc=off disables RDTSC timestamp counter, it's not virtualized/restored as part of snapshots, + // so the target kernel sees a large jump in time and always declares TSC as unstable after restore. + "-cpu", "host,migratable=on,tsc=off", + "-chardev", fmt.Sprintf("socket,path=%v,id=snapshot-doorbell", sockPath), + "-device", "ivshmem-doorbell,master=on,vectors=1,chardev=snapshot-doorbell", + "-device", "ivshmem-plain,master=on,memdev=snapshot-shmem", + "-object", fmt.Sprintf("memory-backend-file,size=%v,share=on,discard-data=on,id=snapshot-shmem,mem-path=%v", + uint64(flatrpc.ConstSnapshotShmemSize), shmemFile), + }, nil +} + +func (inst *instance) snapshotHandshake() error { + // ivshmem-doorbell expects an external server that communicates via a unix socket. + // The protocol is not documented, for details see: + // https://github.com/qemu/qemu/blob/master/hw/misc/ivshmem.c + // https://github.com/qemu/qemu/blob/master/contrib/ivshmem-server/ivshmem-server.c + conn, err := inst.ivsListener.AcceptUnix() + if err != nil { + return fmt.Errorf("qemu: unix accept failed: %w", err) + } + inst.ivsListener.Close() + inst.ivsListener = nil + inst.ivsConn = conn + + msg := make([]byte, 8) + // Send protocol version 0. + binary.LittleEndian.PutUint64(msg, 0) + if _, err := conn.Write(msg); err != nil { + return fmt.Errorf("qemu: ivs conn write failed: %w", err) + } + // Send VM id 0. + binary.LittleEndian.PutUint64(msg, 0) + if _, err := conn.Write(msg); err != nil { + return fmt.Errorf("qemu: ivs conn write failed: %w", err) + } + // Send shared memory file FD. + binary.LittleEndian.PutUint64(msg, ^uint64(0)) + rights := syscall.UnixRights(inst.doorbellFD) + if _, _, err := conn.WriteMsgUnix(msg, rights, nil); err != nil { + return fmt.Errorf("qemu: ivs conn sendmsg failed: %w", err) + } + // Send event FD for VM 1 interrupt vector 0. + binary.LittleEndian.PutUint64(msg, 1) + rights = syscall.UnixRights(inst.eventFD) + if _, _, err := conn.WriteMsgUnix(msg, rights, nil); err != nil { + return fmt.Errorf("qemu: ivs conn sendmsg failed: %w", err) + } + return nil +} + +func (inst *instance) SetupSnapshot(input []byte) error { + copy(inst.input, input) + // Tell executor that we are ready to snapshot and wait for an ack. + inst.header.UpdateState(flatrpc.SnapshotStateHandshake) + if !inst.waitSnapshotStateChange(flatrpc.SnapshotStateHandshake, 10*time.Minute) { + return fmt.Errorf("executor does not start snapshot handshake") + } + if _, err := inst.hmp("migrate_set_capability x-ignore-shared on", 0); err != nil { + return err + } + if _, err := inst.hmp("savevm syz", 0); err != nil { + return err + } + if inst.debug { + inst.hmp("info snapshots", 0) // this prints size of the snapshot + } + inst.header.UpdateState(flatrpc.SnapshotStateSnapshotted) + if !inst.waitSnapshotStateChange(flatrpc.SnapshotStateSnapshotted, time.Minute) { + return fmt.Errorf("executor has not confirmed snapshot handshake") + } + return nil +} + +func (inst *instance) RunSnapshot(timeout time.Duration, input []byte) (result, output []byte, err error) { + copy(inst.input, input) + inst.header.OutputOffset = 0 + inst.header.OutputSize = 0 + inst.header.UpdateState(flatrpc.SnapshotStateExecute) + if _, err := inst.hmp("loadvm syz", 0); err != nil { + return nil, nil, err + } + inst.waitSnapshotStateChange(flatrpc.SnapshotStateExecute, timeout) + resStart := int(flatrpc.ConstMaxInputSize) + int(atomic.LoadUint32(&inst.header.OutputOffset)) + resEnd := resStart + int(atomic.LoadUint32(&inst.header.OutputSize)) + var res []byte + if resEnd <= len(inst.shmem) { + res = inst.shmem[resStart:resEnd:resEnd] + } + output = inst.readOutput() + return res, output, nil +} + +func (inst *instance) waitSnapshotStateChange(state flatrpc.SnapshotState, timeout time.Duration) bool { + deadline := time.Now().Add(timeout) + timeoutMs := int(timeout / time.Millisecond) + fds := []unix.PollFd{{ + Fd: int32(inst.eventFD), + Events: unix.POLLIN, + }} + for { + if n, _ := unix.Poll(fds, timeoutMs); n == 1 { + var buf [8]byte + syscall.Read(inst.eventFD, buf[:]) + } + if inst.header.LoadState() != state { + return true + } + remain := time.Until(deadline) + if remain < time.Millisecond { + return false + } + timeoutMs = int(remain / time.Millisecond) + } +} + +func (inst *instance) readOutput() []byte { + var output []byte + // If output channel has overflown, then wait for more output from the merger goroutine. + wait := cap(inst.merger.Output) + for { + select { + case out := <-inst.merger.Output: + output = append(output, out...) + wait-- + default: + if wait > 0 { + return output + } + // After the first overflow we wait after every read because the goroutine + // may be running and sending more output to the channel concurrently. + wait = 1 + time.Sleep(10 * time.Millisecond) + } + } +} diff --git a/vm/qemu/snapshot_unimpl.go b/vm/qemu/snapshot_unimpl.go new file mode 100644 index 000000000..ab9438a47 --- /dev/null +++ b/vm/qemu/snapshot_unimpl.go @@ -0,0 +1,33 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +//go:build !linux + +package qemu + +import ( + "fmt" +) + +type snapshot struct{} + +var errNotImplemented = fmt.Errorf("snapshots are not implemeneted") + +func (inst *instance) snapshotClose() { +} + +func (inst *instance) snapshotEnable() ([]string, error) { + return nil, errNotImplemented +} + +func (inst *instance) snapshotHandshake() error { + return errNotImplemented +} + +func (inst *instance) SetupSnapshot(input []byte) error { + return errNotImplemented +} + +func (inst *instance) RunSnapshot(input []byte) (result, output []byte, err error) { + return nil, nil, errNotImplemented +} |
