diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2016-10-07 18:56:45 +0200 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2016-10-07 18:56:45 +0200 |
| commit | d21c3c878c8965786d39a5d73bbe22792b0a6ccd (patch) | |
| tree | 5c72441d9a36e0f7e2ed91e1c3e6243966757acc /vm/gce | |
| parent | 2da6f4a8e133a3b35321b9cc7d87022cc4b91723 (diff) | |
syz-gce: add autonomous GCE runner
It is meant to download new kernel images from GCS,
update and rebuild syzkaller and restart syz-manager.
Work in progress...
Diffstat (limited to 'vm/gce')
| -rw-r--r-- | vm/gce/gce.go | 217 |
1 files changed, 13 insertions, 204 deletions
diff --git a/vm/gce/gce.go b/vm/gce/gce.go index 2c40305a5..4a977b5f8 100644 --- a/vm/gce/gce.go +++ b/vm/gce/gce.go @@ -3,35 +3,26 @@ // Package gce allows to use Google Compute Engine (GCE) virtual machines as VMs. // It is assumed that syz-manager also runs on GCE as VMs are created in the current project/zone. +// // See https://cloud.google.com/compute/docs for details. // In particular, how to build GCE-compatible images: // https://cloud.google.com/compute/docs/tutorials/building-images // Working with serial console: // https://cloud.google.com/compute/docs/instances/interacting-with-serial-console -// API reference: -// https://cloud.google.com/compute/docs/reference/latest/ -// and Go API wrappers: -// https://godoc.org/google.golang.org/api/compute/v0.beta package gce import ( "fmt" "io/ioutil" "log" - "net/http" "os" "os/exec" "path/filepath" - "strings" "sync" "time" + "github.com/google/syzkaller/gce" "github.com/google/syzkaller/vm" - "golang.org/x/net/context" - "golang.org/x/oauth2" - "golang.org/x/oauth2/google" - "google.golang.org/api/compute/v0.beta" - "google.golang.org/api/googleapi" ) func init() { @@ -49,65 +40,16 @@ type instance struct { } var ( - initOnce sync.Once - computeService *compute.Service - projectID string - zoneID string - internalIP string - - // apiCallTicker ticks regularly, preventing us from accidentally making - // GCE API calls too quickly. Our quota is 20 QPS, but we temporarily - // limit ourselves to less than that. - apiRateGate = time.NewTicker(time.Second / 10).C + initOnce sync.Once + GCE *gce.Context ) func initGCE() { - ctx := context.Background() - tokenSource, err := google.DefaultTokenSource(ctx, compute.CloudPlatformScope) - if err != nil { - log.Fatalf("failed to get a token source: %v", err) - } - httpClient := oauth2.NewClient(ctx, tokenSource) - computeService, _ = compute.New(httpClient) - // Obtain project name, zone and current instance IP address. - projectID, err = getMeta("project/project-id") - if err != nil { - log.Fatalf("failed to query gce project-id: %v", err) - } - zoneID, err = getMeta("instance/zone") - if err != nil { - log.Fatalf("failed to query gce zone: %v", err) - } - if i := strings.LastIndexByte(zoneID, '/'); i != -1 { - zoneID = zoneID[i+1:] // the query returns some nonsense prefix - } - instID, err := getMeta("instance/id") - if err != nil { - log.Fatalf("failed to query gce instance id: %v", err) - } - instances, err := computeService.Instances.List(projectID, zoneID).Do() + GCE, err := gce.NewContext() if err != nil { - log.Fatalf("error getting instance list: %v", err) - } - // Finds this instance internal IP. - instName := "" - for _, inst := range instances.Items { - if fmt.Sprint(inst.Id) != instID { - continue - } - instName = inst.Name - for _, iface := range inst.NetworkInterfaces { - if strings.HasPrefix(iface.NetworkIP, "10.") { - internalIP = iface.NetworkIP - break - } - } - break - } - if instName == "" || internalIP == "" { - log.Fatalf("failed to get current instance name and internal IP") + log.Fatalf("failed to init gce: %v", err) } - log.Printf("gce initialized: running on %v, internal IP, %v project %v, zone %v", instName, internalIP, projectID, zoneID) + log.Printf("gce initialized: running on %v, internal IP, %v project %v, zone %v", GCE.Instance, GCE.InternalIP, GCE.ProjectID, GCE.ZoneID) } func ctor(cfg *vm.Config) (vm.Instance, error) { @@ -132,17 +74,17 @@ func ctor(cfg *vm.Config) (vm.Instance, error) { } log.Printf("deleting instance: %v", name) - if err := deleteInstance(name); err != nil { + if err := GCE.DeleteInstance(name); err != nil { return nil, err } log.Printf("creating instance: %v", name) - ip, err := createInstance(name, cfg.MachineType, cfg.Image, string(sshkeyPub)) + ip, err := GCE.CreateInstance(name, cfg.MachineType, cfg.Image, string(sshkeyPub)) if err != nil { return nil, err } defer func() { if !ok { - deleteInstance(name) + GCE.DeleteInstance(name) } }() log.Printf("wait instance to boot: %v (%v)", name, ip) @@ -162,12 +104,12 @@ func ctor(cfg *vm.Config) (vm.Instance, error) { func (inst *instance) Close() { close(inst.closed) - deleteInstance(inst.name) + GCE.DeleteInstance(inst.name) os.RemoveAll(inst.cfg.Workdir) } func (inst *instance) Forward(port int) (string, error) { - return fmt.Sprintf("%v:%v", internalIP, port), nil + return fmt.Sprintf("%v:%v", GCE.InternalIP, port), nil } func (inst *instance) Copy(hostSrc string) (string, error) { @@ -199,7 +141,7 @@ func (inst *instance) Run(timeout time.Duration, command string) (<-chan []byte, return nil, nil, err } - conAddr := fmt.Sprintf("%v.%v.%v.syzkaller.port=1@ssh-serialport.googleapis.com", projectID, zoneID, inst.name) + conAddr := fmt.Sprintf("%v.%v.%v.syzkaller.port=1@ssh-serialport.googleapis.com", GCE.ProjectID, GCE.ZoneID, inst.name) conArgs := append(sshArgs(inst.sshkey, "-p", 9600), conAddr) con := exec.Command("ssh", conArgs...) con.Env = []string{} @@ -281,24 +223,6 @@ func (inst *instance) Run(timeout time.Duration, command string) (<-chan []byte, return merger.Output, errc, nil } -func getMeta(path string) (string, error) { - req, err := http.NewRequest("GET", "http://metadata.google.internal/computeMetadata/v1/"+path, nil) - if err != nil { - return "", err - } - req.Header.Add("Metadata-Flavor", "Google") - resp, err := http.DefaultClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - return "", err - } - return string(body), nil -} - func waitInstanceBoot(ip, sshkey string) error { for i := 0; i < 100; i++ { if !vm.SleepInterruptible(5 * time.Second) { @@ -312,121 +236,6 @@ func waitInstanceBoot(ip, sshkey string) error { return fmt.Errorf("can't ssh into the instance") } -func createInstance(name, machineType, image, sshkey string) (string, error) { - prefix := "https://www.googleapis.com/compute/v1/projects/" + projectID - instance := &compute.Instance{ - Name: name, - Description: "syzkaller worker", - MachineType: prefix + "/zones/" + zoneID + "/machineTypes/" + machineType, - Disks: []*compute.AttachedDisk{ - { - AutoDelete: true, - Boot: true, - Type: "PERSISTENT", - InitializeParams: &compute.AttachedDiskInitializeParams{ - DiskName: name, - SourceImage: prefix + "/global/images/" + image, - }, - }, - }, - Metadata: &compute.Metadata{ - Items: []*compute.MetadataItems{ - { - Key: "ssh-keys", - Value: "syzkaller:" + sshkey, - }, - { - Key: "serial-port-enable", - Value: "1", - }, - }, - }, - NetworkInterfaces: []*compute.NetworkInterface{ - &compute.NetworkInterface{ - Network: "global/networks/default", - }, - }, - Scheduling: &compute.Scheduling{ - AutomaticRestart: false, - Preemptible: false, - OnHostMaintenance: "MIGRATE", - }, - } - - <-apiRateGate - op, err := computeService.Instances.Insert(projectID, zoneID, instance).Do() - if err != nil { - return "", fmt.Errorf("failed to create instance: %v", err) - } - if err := waitForCompletion("create", op.Name, false); err != nil { - return "", err - } - - <-apiRateGate - inst, err := computeService.Instances.Get(projectID, zoneID, name).Do() - if err != nil { - return "", fmt.Errorf("error getting instance %s details after creation: %v", name, err) - } - - // Finds its internal IP. - ip := "" - for _, iface := range inst.NetworkInterfaces { - if strings.HasPrefix(iface.NetworkIP, "10.") { - ip = iface.NetworkIP - break - } - } - if ip == "" { - return "", fmt.Errorf("didn't find instance internal IP address") - } - return ip, nil -} - -func deleteInstance(name string) error { - <-apiRateGate - op, err := computeService.Instances.Delete(projectID, zoneID, name).Do() - apiErr, ok := err.(*googleapi.Error) - if ok && apiErr.Code == 404 { - return nil - } - if err != nil { - return fmt.Errorf("failed to delete instance: %v", err) - } - if err := waitForCompletion("delete", op.Name, true); err != nil { - return err - } - return nil -} - -func waitForCompletion(desc, opName string, ignoreNotFound bool) error { - for { - time.Sleep(2 * time.Second) - <-apiRateGate - op, err := computeService.ZoneOperations.Get(projectID, zoneID, opName).Do() - if err != nil { - return fmt.Errorf("failed to get %v operation %v: %v", desc, opName, err) - } - switch op.Status { - case "PENDING", "RUNNING": - continue - case "DONE": - if op.Error != nil { - reason := "" - for _, operr := range op.Error.Errors { - if ignoreNotFound && operr.Code == "RESOURCE_NOT_FOUND" { - return nil - } - reason += fmt.Sprintf("%+v.", operr) - } - return fmt.Errorf("%v operation failed: %v", desc, reason) - } - return nil - default: - return fmt.Errorf("unknown %v operation status %q: %+v", desc, op.Status, op) - } - } -} - func sshArgs(sshKey, portArg string, port int) []string { return []string{ portArg, fmt.Sprint(port), |
