From d21c3c878c8965786d39a5d73bbe22792b0a6ccd Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 7 Oct 2016 18:56:45 +0200 Subject: syz-gce: add autonomous GCE runner It is meant to download new kernel images from GCS, update and rebuild syzkaller and restart syz-manager. Work in progress... --- gce/gce.go | 95 ++++++++++++++++------ syz-gce/syz-gce.go | 99 +++++++++++++++------- syz-manager/manager.go | 2 +- vm/gce/gce.go | 217 +++---------------------------------------------- 4 files changed, 156 insertions(+), 257 deletions(-) diff --git a/gce/gce.go b/gce/gce.go index 2ae3aae37..a6cf7a5e0 100644 --- a/gce/gce.go +++ b/gce/gce.go @@ -3,6 +3,12 @@ // Package gce provides wrappers around Google Compute Engine (GCE) APIs. // It is assumed that the program itself also runs on GCE as APIs operate on the current project/zone. +// +// See https://cloud.google.com/compute/docs for details. +// In particular, API reference: +// https://cloud.google.com/compute/docs/reference/latest +// and Go API wrappers: +// https://godoc.org/google.golang.org/api/compute/v0.beta package gce import ( @@ -84,24 +90,6 @@ func NewContext() (*Context, error) { return ctx, nil } -func (ctx *Context) getMeta(path string) (string, error) { - req, err := http.NewRequest("GET", "http://metadata.google.internal/computeMetadata/v1/"+path, nil) - if err != nil { - return "", err - } - req.Header.Add("Metadata-Flavor", "Google") - resp, err := http.DefaultClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - return "", err - } - return string(body), nil -} - func (ctx *Context) CreateInstance(name, machineType, image, sshkey string) (string, error) { prefix := "https://www.googleapis.com/compute/v1/projects/" + ctx.ProjectID instance := &compute.Instance{ @@ -148,7 +136,7 @@ func (ctx *Context) CreateInstance(name, machineType, image, sshkey string) (str if err != nil { return "", fmt.Errorf("failed to create instance: %v", err) } - if err := ctx.waitForCompletion("create", op.Name, false); err != nil { + if err := ctx.waitForCompletion("zone", "create image", op.Name, false); err != nil { return "", err } @@ -175,24 +163,65 @@ func (ctx *Context) CreateInstance(name, machineType, image, sshkey string) (str func (ctx *Context) DeleteInstance(name string) error { <-ctx.apiRateGate op, err := ctx.computeService.Instances.Delete(ctx.ProjectID, ctx.ZoneID, name).Do() - apiErr, ok := err.(*googleapi.Error) - if ok && apiErr.Code == 404 { + if apiErr, ok := err.(*googleapi.Error); ok && apiErr.Code == 404 { return nil } if err != nil { return fmt.Errorf("failed to delete instance: %v", err) } - if err := ctx.waitForCompletion("delete", op.Name, true); err != nil { + if err := ctx.waitForCompletion("zone", "delete image", op.Name, true); err != nil { + return err + } + return nil +} + +func (ctx *Context) CreateImage(imageName, gcsFile string) error { + image := &compute.Image{ + Name: imageName, + RawDisk: &compute.ImageRawDisk{ + Source: "https://storage.googleapis.com/" + gcsFile, + }, + } + <-ctx.apiRateGate + op, err := ctx.computeService.Images.Insert(ctx.ProjectID, image).Do() + if err != nil { + return fmt.Errorf("failed to create image: %v", err) + } + if err := ctx.waitForCompletion("global", "create image", op.Name, false); err != nil { + return err + } + return nil +} + +func (ctx *Context) DeleteImage(imageName string) error { + <-ctx.apiRateGate + op, err := ctx.computeService.Images.Delete(ctx.ProjectID, imageName).Do() + if apiErr, ok := err.(*googleapi.Error); ok && apiErr.Code == 404 { + return nil + } + if err != nil { + return fmt.Errorf("failed to delete image: %v", err) + } + if err := ctx.waitForCompletion("global", "delete image", op.Name, true); err != nil { return err } return nil } -func (ctx *Context) waitForCompletion(desc, opName string, ignoreNotFound bool) error { +func (ctx *Context) waitForCompletion(typ, desc, opName string, ignoreNotFound bool) error { for { time.Sleep(2 * time.Second) <-ctx.apiRateGate - op, err := ctx.computeService.ZoneOperations.Get(ctx.ProjectID, ctx.ZoneID, opName).Do() + var err error + var op *compute.Operation + switch typ { + case "global": + op, err = ctx.computeService.GlobalOperations.Get(ctx.ProjectID, opName).Do() + case "zone": + op, err = ctx.computeService.ZoneOperations.Get(ctx.ProjectID, ctx.ZoneID, opName).Do() + default: + panic("unknown operation type: " + typ) + } if err != nil { return fmt.Errorf("failed to get %v operation %v: %v", desc, opName, err) } @@ -216,3 +245,21 @@ func (ctx *Context) waitForCompletion(desc, opName string, ignoreNotFound bool) } } } + +func (ctx *Context) getMeta(path string) (string, error) { + req, err := http.NewRequest("GET", "http://metadata.google.internal/computeMetadata/v1/"+path, nil) + if err != nil { + return "", err + } + req.Header.Add("Metadata-Flavor", "Google") + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return "", err + } + return string(body), nil +} diff --git a/syz-gce/syz-gce.go b/syz-gce/syz-gce.go index 936b52eee..f16672d11 100644 --- a/syz-gce/syz-gce.go +++ b/syz-gce/syz-gce.go @@ -21,8 +21,9 @@ import ( "time" "cloud.google.com/go/storage" + "github.com/google/syzkaller/config" + "github.com/google/syzkaller/gce" "golang.org/x/net/context" - "google.golang.org/api/compute/v0.beta" ) var ( @@ -31,22 +32,25 @@ var ( cfg *Config ctx context.Context storageClient *storage.Client - computeService *compute.Service + GCE *gce.Context ) type Config struct { - Image_Archive string - Image_Path string - Http_Port int - Machine_Type string - Machine_Count int - Sandbox string - Procs int + Image_Archive string + Image_Path string + Image_Name string + Http_Port int + Manager_Http_Port int + Machine_Type string + Machine_Count int + Sandbox string + Procs int } func main() { flag.Parse() cfg = readConfig(*flagConfig) + log.Printf("config: %+v", cfg) var err error ctx = context.Background() @@ -55,12 +59,11 @@ func main() { fatalf("failed to create cloud storage client: %v", err) } - tokenSource, err := google.DefaultTokenSource(ctx, compute.CloudPlatformScope) + GCE, err = gce.NewContext() if err != nil { - fatalf("failed to get a token source: %v", err) + log.Fatalf("failed to init gce: %v", err) } - httpClient := oauth2.NewClient(ctx, tokenSource) - computeService, _ = compute.New(httpClient) + log.Printf("gce initialized: running on %v, internal IP, %v project %v, zone %v", GCE.Instance, GCE.InternalIP, GCE.ProjectID, GCE.ZoneID) archive, updated, err := openFile(cfg.Image_Archive) if err != nil { @@ -68,29 +71,43 @@ func main() { } log.Printf("archive updated: %v", updated) - if false { - if err := os.RemoveAll("image"); err != nil { - fatalf("failed to remove image dir: %v", err) - } - if err := downloadAndExtract(archive, "image"); err != nil { - fatalf("failed to download and extract %v: %v", cfg.Image_Archive, err) - } + if err := os.RemoveAll("image"); err != nil { + fatalf("failed to remove image dir: %v", err) + } + if err := downloadAndExtract(archive, "image"); err != nil { + fatalf("failed to download and extract %v: %v", cfg.Image_Archive, err) + } if err := uploadFile("image/disk.tar.gz", cfg.Image_Path); err != nil { fatalf("failed to upload image: %v", err) } + + if err := GCE.DeleteImage(cfg.Image_Name); err != nil { + fatalf("failed to delete GCE image: %v", err) } + if err := GCE.CreateImage(cfg.Image_Name, cfg.Image_Path); err != nil { + fatalf("failed to create GCE image: %v", err) + } - + syzBin, err := updateSyzkallerBuild() + if err != nil { + fatalf("failed to update/build syzkaller: %v", err) + } + _ = syzBin - if false { - syzBin, err := updateSyzkallerBuild() - if err != nil { - fatalf("failed to update/build syzkaller: %v", err) - } - _ = syzBin + if err := writeManagerConfig("manager.cfg"); err != nil { + fatalf("failed to write manager config: %v", err) } + + manager := exec.Command("gopath/src/github.com/google/syzkaller/bin/syz-manager", "-config=manager.cfg") + manager.Stdout = os.Stdout + manager.Stderr = os.Stderr + if err := manager.Start(); err != nil { + fatalf("failed to start syz-manager: %v", err) + } + err = manager.Wait() + fatalf("syz-manager exited with: %v", err) } func readConfig(filename string) *Config { @@ -108,6 +125,32 @@ func readConfig(filename string) *Config { return cfg } +func writeManagerConfig(file string) error { + managerCfg := &config.Config{ + Http: fmt.Sprintf(":%v", cfg.Manager_Http_Port), + Rpc: ":0", + Workdir: "workdir", + Vmlinux: "image/obj/vmlinux", + Syzkaller: "gopath/src/github.com/google/syzkaller", + Type: "gce", + Machine_Type: cfg.Machine_Type, + Count: cfg.Machine_Count, + Image: cfg.Image_Name, + Sshkey: "image/key", + Sandbox: cfg.Sandbox, + Procs: cfg.Procs, + Cover: true, + } + data, err := json.MarshalIndent(managerCfg, "", "\t") + if err != nil { + return err + } + if err := ioutil.WriteFile(file, data, 0600); err != nil { + return err + } + return nil +} + func openFile(file string) (*storage.ObjectHandle, time.Time, error) { pos := strings.IndexByte(file, '/') if pos == -1 { @@ -192,7 +235,7 @@ func updateSyzkallerBuild() (string, error) { if err != nil { return "", err } - goGet := exec.Command("go", "get", "-u", "-d", "github.com/google/syzkaller/syz-manager") + goGet := exec.Command("go", "get", "-u", "-d", "github.com/google/syzkaller/syz-manager", "github.com/google/syzkaller/syz-gce") goGet.Env = append([]string{"GOPATH=" + gopath}, os.Environ()...) if output, err := goGet.CombinedOutput(); err != nil { return "", fmt.Errorf("%v\n%s", err, output) diff --git a/syz-manager/manager.go b/syz-manager/manager.go index 0908cd956..c5d865674 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -86,7 +86,7 @@ func main() { func RunManager(cfg *config.Config, syscalls map[int]bool, suppressions []*regexp.Regexp) { crashdir := filepath.Join(cfg.Workdir, "crashes") - os.MkdirAll(crashdir) + os.MkdirAll(crashdir, 0700) enabledSyscalls := "" if len(syscalls) != 0 { diff --git a/vm/gce/gce.go b/vm/gce/gce.go index 2c40305a5..4a977b5f8 100644 --- a/vm/gce/gce.go +++ b/vm/gce/gce.go @@ -3,35 +3,26 @@ // Package gce allows to use Google Compute Engine (GCE) virtual machines as VMs. // It is assumed that syz-manager also runs on GCE as VMs are created in the current project/zone. +// // See https://cloud.google.com/compute/docs for details. // In particular, how to build GCE-compatible images: // https://cloud.google.com/compute/docs/tutorials/building-images // Working with serial console: // https://cloud.google.com/compute/docs/instances/interacting-with-serial-console -// API reference: -// https://cloud.google.com/compute/docs/reference/latest/ -// and Go API wrappers: -// https://godoc.org/google.golang.org/api/compute/v0.beta package gce import ( "fmt" "io/ioutil" "log" - "net/http" "os" "os/exec" "path/filepath" - "strings" "sync" "time" + "github.com/google/syzkaller/gce" "github.com/google/syzkaller/vm" - "golang.org/x/net/context" - "golang.org/x/oauth2" - "golang.org/x/oauth2/google" - "google.golang.org/api/compute/v0.beta" - "google.golang.org/api/googleapi" ) func init() { @@ -49,65 +40,16 @@ type instance struct { } var ( - initOnce sync.Once - computeService *compute.Service - projectID string - zoneID string - internalIP string - - // apiCallTicker ticks regularly, preventing us from accidentally making - // GCE API calls too quickly. Our quota is 20 QPS, but we temporarily - // limit ourselves to less than that. - apiRateGate = time.NewTicker(time.Second / 10).C + initOnce sync.Once + GCE *gce.Context ) func initGCE() { - ctx := context.Background() - tokenSource, err := google.DefaultTokenSource(ctx, compute.CloudPlatformScope) - if err != nil { - log.Fatalf("failed to get a token source: %v", err) - } - httpClient := oauth2.NewClient(ctx, tokenSource) - computeService, _ = compute.New(httpClient) - // Obtain project name, zone and current instance IP address. - projectID, err = getMeta("project/project-id") - if err != nil { - log.Fatalf("failed to query gce project-id: %v", err) - } - zoneID, err = getMeta("instance/zone") - if err != nil { - log.Fatalf("failed to query gce zone: %v", err) - } - if i := strings.LastIndexByte(zoneID, '/'); i != -1 { - zoneID = zoneID[i+1:] // the query returns some nonsense prefix - } - instID, err := getMeta("instance/id") - if err != nil { - log.Fatalf("failed to query gce instance id: %v", err) - } - instances, err := computeService.Instances.List(projectID, zoneID).Do() + GCE, err := gce.NewContext() if err != nil { - log.Fatalf("error getting instance list: %v", err) - } - // Finds this instance internal IP. - instName := "" - for _, inst := range instances.Items { - if fmt.Sprint(inst.Id) != instID { - continue - } - instName = inst.Name - for _, iface := range inst.NetworkInterfaces { - if strings.HasPrefix(iface.NetworkIP, "10.") { - internalIP = iface.NetworkIP - break - } - } - break - } - if instName == "" || internalIP == "" { - log.Fatalf("failed to get current instance name and internal IP") + log.Fatalf("failed to init gce: %v", err) } - log.Printf("gce initialized: running on %v, internal IP, %v project %v, zone %v", instName, internalIP, projectID, zoneID) + log.Printf("gce initialized: running on %v, internal IP, %v project %v, zone %v", GCE.Instance, GCE.InternalIP, GCE.ProjectID, GCE.ZoneID) } func ctor(cfg *vm.Config) (vm.Instance, error) { @@ -132,17 +74,17 @@ func ctor(cfg *vm.Config) (vm.Instance, error) { } log.Printf("deleting instance: %v", name) - if err := deleteInstance(name); err != nil { + if err := GCE.DeleteInstance(name); err != nil { return nil, err } log.Printf("creating instance: %v", name) - ip, err := createInstance(name, cfg.MachineType, cfg.Image, string(sshkeyPub)) + ip, err := GCE.CreateInstance(name, cfg.MachineType, cfg.Image, string(sshkeyPub)) if err != nil { return nil, err } defer func() { if !ok { - deleteInstance(name) + GCE.DeleteInstance(name) } }() log.Printf("wait instance to boot: %v (%v)", name, ip) @@ -162,12 +104,12 @@ func ctor(cfg *vm.Config) (vm.Instance, error) { func (inst *instance) Close() { close(inst.closed) - deleteInstance(inst.name) + GCE.DeleteInstance(inst.name) os.RemoveAll(inst.cfg.Workdir) } func (inst *instance) Forward(port int) (string, error) { - return fmt.Sprintf("%v:%v", internalIP, port), nil + return fmt.Sprintf("%v:%v", GCE.InternalIP, port), nil } func (inst *instance) Copy(hostSrc string) (string, error) { @@ -199,7 +141,7 @@ func (inst *instance) Run(timeout time.Duration, command string) (<-chan []byte, return nil, nil, err } - conAddr := fmt.Sprintf("%v.%v.%v.syzkaller.port=1@ssh-serialport.googleapis.com", projectID, zoneID, inst.name) + conAddr := fmt.Sprintf("%v.%v.%v.syzkaller.port=1@ssh-serialport.googleapis.com", GCE.ProjectID, GCE.ZoneID, inst.name) conArgs := append(sshArgs(inst.sshkey, "-p", 9600), conAddr) con := exec.Command("ssh", conArgs...) con.Env = []string{} @@ -281,24 +223,6 @@ func (inst *instance) Run(timeout time.Duration, command string) (<-chan []byte, return merger.Output, errc, nil } -func getMeta(path string) (string, error) { - req, err := http.NewRequest("GET", "http://metadata.google.internal/computeMetadata/v1/"+path, nil) - if err != nil { - return "", err - } - req.Header.Add("Metadata-Flavor", "Google") - resp, err := http.DefaultClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - return "", err - } - return string(body), nil -} - func waitInstanceBoot(ip, sshkey string) error { for i := 0; i < 100; i++ { if !vm.SleepInterruptible(5 * time.Second) { @@ -312,121 +236,6 @@ func waitInstanceBoot(ip, sshkey string) error { return fmt.Errorf("can't ssh into the instance") } -func createInstance(name, machineType, image, sshkey string) (string, error) { - prefix := "https://www.googleapis.com/compute/v1/projects/" + projectID - instance := &compute.Instance{ - Name: name, - Description: "syzkaller worker", - MachineType: prefix + "/zones/" + zoneID + "/machineTypes/" + machineType, - Disks: []*compute.AttachedDisk{ - { - AutoDelete: true, - Boot: true, - Type: "PERSISTENT", - InitializeParams: &compute.AttachedDiskInitializeParams{ - DiskName: name, - SourceImage: prefix + "/global/images/" + image, - }, - }, - }, - Metadata: &compute.Metadata{ - Items: []*compute.MetadataItems{ - { - Key: "ssh-keys", - Value: "syzkaller:" + sshkey, - }, - { - Key: "serial-port-enable", - Value: "1", - }, - }, - }, - NetworkInterfaces: []*compute.NetworkInterface{ - &compute.NetworkInterface{ - Network: "global/networks/default", - }, - }, - Scheduling: &compute.Scheduling{ - AutomaticRestart: false, - Preemptible: false, - OnHostMaintenance: "MIGRATE", - }, - } - - <-apiRateGate - op, err := computeService.Instances.Insert(projectID, zoneID, instance).Do() - if err != nil { - return "", fmt.Errorf("failed to create instance: %v", err) - } - if err := waitForCompletion("create", op.Name, false); err != nil { - return "", err - } - - <-apiRateGate - inst, err := computeService.Instances.Get(projectID, zoneID, name).Do() - if err != nil { - return "", fmt.Errorf("error getting instance %s details after creation: %v", name, err) - } - - // Finds its internal IP. - ip := "" - for _, iface := range inst.NetworkInterfaces { - if strings.HasPrefix(iface.NetworkIP, "10.") { - ip = iface.NetworkIP - break - } - } - if ip == "" { - return "", fmt.Errorf("didn't find instance internal IP address") - } - return ip, nil -} - -func deleteInstance(name string) error { - <-apiRateGate - op, err := computeService.Instances.Delete(projectID, zoneID, name).Do() - apiErr, ok := err.(*googleapi.Error) - if ok && apiErr.Code == 404 { - return nil - } - if err != nil { - return fmt.Errorf("failed to delete instance: %v", err) - } - if err := waitForCompletion("delete", op.Name, true); err != nil { - return err - } - return nil -} - -func waitForCompletion(desc, opName string, ignoreNotFound bool) error { - for { - time.Sleep(2 * time.Second) - <-apiRateGate - op, err := computeService.ZoneOperations.Get(projectID, zoneID, opName).Do() - if err != nil { - return fmt.Errorf("failed to get %v operation %v: %v", desc, opName, err) - } - switch op.Status { - case "PENDING", "RUNNING": - continue - case "DONE": - if op.Error != nil { - reason := "" - for _, operr := range op.Error.Errors { - if ignoreNotFound && operr.Code == "RESOURCE_NOT_FOUND" { - return nil - } - reason += fmt.Sprintf("%+v.", operr) - } - return fmt.Errorf("%v operation failed: %v", desc, reason) - } - return nil - default: - return fmt.Errorf("unknown %v operation status %q: %+v", desc, op.Status, op) - } - } -} - func sshArgs(sshKey, portArg string, port int) []string { return []string{ portArg, fmt.Sprint(port), -- cgit mrf-deployment