aboutsummaryrefslogtreecommitdiffstats
path: root/vm/gce
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2016-10-07 18:56:45 +0200
committerDmitry Vyukov <dvyukov@google.com>2016-10-07 18:56:45 +0200
commitd21c3c878c8965786d39a5d73bbe22792b0a6ccd (patch)
tree5c72441d9a36e0f7e2ed91e1c3e6243966757acc /vm/gce
parent2da6f4a8e133a3b35321b9cc7d87022cc4b91723 (diff)
syz-gce: add autonomous GCE runner
It is meant to download new kernel images from GCS, update and rebuild syzkaller and restart syz-manager. Work in progress...
Diffstat (limited to 'vm/gce')
-rw-r--r--vm/gce/gce.go217
1 files changed, 13 insertions, 204 deletions
diff --git a/vm/gce/gce.go b/vm/gce/gce.go
index 2c40305a5..4a977b5f8 100644
--- a/vm/gce/gce.go
+++ b/vm/gce/gce.go
@@ -3,35 +3,26 @@
// Package gce allows to use Google Compute Engine (GCE) virtual machines as VMs.
// It is assumed that syz-manager also runs on GCE as VMs are created in the current project/zone.
+//
// See https://cloud.google.com/compute/docs for details.
// In particular, how to build GCE-compatible images:
// https://cloud.google.com/compute/docs/tutorials/building-images
// Working with serial console:
// https://cloud.google.com/compute/docs/instances/interacting-with-serial-console
-// API reference:
-// https://cloud.google.com/compute/docs/reference/latest/
-// and Go API wrappers:
-// https://godoc.org/google.golang.org/api/compute/v0.beta
package gce
import (
"fmt"
"io/ioutil"
"log"
- "net/http"
"os"
"os/exec"
"path/filepath"
- "strings"
"sync"
"time"
+ "github.com/google/syzkaller/gce"
"github.com/google/syzkaller/vm"
- "golang.org/x/net/context"
- "golang.org/x/oauth2"
- "golang.org/x/oauth2/google"
- "google.golang.org/api/compute/v0.beta"
- "google.golang.org/api/googleapi"
)
func init() {
@@ -49,65 +40,16 @@ type instance struct {
}
var (
- initOnce sync.Once
- computeService *compute.Service
- projectID string
- zoneID string
- internalIP string
-
- // apiCallTicker ticks regularly, preventing us from accidentally making
- // GCE API calls too quickly. Our quota is 20 QPS, but we temporarily
- // limit ourselves to less than that.
- apiRateGate = time.NewTicker(time.Second / 10).C
+ initOnce sync.Once
+ GCE *gce.Context
)
func initGCE() {
- ctx := context.Background()
- tokenSource, err := google.DefaultTokenSource(ctx, compute.CloudPlatformScope)
- if err != nil {
- log.Fatalf("failed to get a token source: %v", err)
- }
- httpClient := oauth2.NewClient(ctx, tokenSource)
- computeService, _ = compute.New(httpClient)
- // Obtain project name, zone and current instance IP address.
- projectID, err = getMeta("project/project-id")
- if err != nil {
- log.Fatalf("failed to query gce project-id: %v", err)
- }
- zoneID, err = getMeta("instance/zone")
- if err != nil {
- log.Fatalf("failed to query gce zone: %v", err)
- }
- if i := strings.LastIndexByte(zoneID, '/'); i != -1 {
- zoneID = zoneID[i+1:] // the query returns some nonsense prefix
- }
- instID, err := getMeta("instance/id")
- if err != nil {
- log.Fatalf("failed to query gce instance id: %v", err)
- }
- instances, err := computeService.Instances.List(projectID, zoneID).Do()
+ GCE, err := gce.NewContext()
if err != nil {
- log.Fatalf("error getting instance list: %v", err)
- }
- // Finds this instance internal IP.
- instName := ""
- for _, inst := range instances.Items {
- if fmt.Sprint(inst.Id) != instID {
- continue
- }
- instName = inst.Name
- for _, iface := range inst.NetworkInterfaces {
- if strings.HasPrefix(iface.NetworkIP, "10.") {
- internalIP = iface.NetworkIP
- break
- }
- }
- break
- }
- if instName == "" || internalIP == "" {
- log.Fatalf("failed to get current instance name and internal IP")
+ log.Fatalf("failed to init gce: %v", err)
}
- log.Printf("gce initialized: running on %v, internal IP, %v project %v, zone %v", instName, internalIP, projectID, zoneID)
+ log.Printf("gce initialized: running on %v, internal IP, %v project %v, zone %v", GCE.Instance, GCE.InternalIP, GCE.ProjectID, GCE.ZoneID)
}
func ctor(cfg *vm.Config) (vm.Instance, error) {
@@ -132,17 +74,17 @@ func ctor(cfg *vm.Config) (vm.Instance, error) {
}
log.Printf("deleting instance: %v", name)
- if err := deleteInstance(name); err != nil {
+ if err := GCE.DeleteInstance(name); err != nil {
return nil, err
}
log.Printf("creating instance: %v", name)
- ip, err := createInstance(name, cfg.MachineType, cfg.Image, string(sshkeyPub))
+ ip, err := GCE.CreateInstance(name, cfg.MachineType, cfg.Image, string(sshkeyPub))
if err != nil {
return nil, err
}
defer func() {
if !ok {
- deleteInstance(name)
+ GCE.DeleteInstance(name)
}
}()
log.Printf("wait instance to boot: %v (%v)", name, ip)
@@ -162,12 +104,12 @@ func ctor(cfg *vm.Config) (vm.Instance, error) {
func (inst *instance) Close() {
close(inst.closed)
- deleteInstance(inst.name)
+ GCE.DeleteInstance(inst.name)
os.RemoveAll(inst.cfg.Workdir)
}
func (inst *instance) Forward(port int) (string, error) {
- return fmt.Sprintf("%v:%v", internalIP, port), nil
+ return fmt.Sprintf("%v:%v", GCE.InternalIP, port), nil
}
func (inst *instance) Copy(hostSrc string) (string, error) {
@@ -199,7 +141,7 @@ func (inst *instance) Run(timeout time.Duration, command string) (<-chan []byte,
return nil, nil, err
}
- conAddr := fmt.Sprintf("%v.%v.%v.syzkaller.port=1@ssh-serialport.googleapis.com", projectID, zoneID, inst.name)
+ conAddr := fmt.Sprintf("%v.%v.%v.syzkaller.port=1@ssh-serialport.googleapis.com", GCE.ProjectID, GCE.ZoneID, inst.name)
conArgs := append(sshArgs(inst.sshkey, "-p", 9600), conAddr)
con := exec.Command("ssh", conArgs...)
con.Env = []string{}
@@ -281,24 +223,6 @@ func (inst *instance) Run(timeout time.Duration, command string) (<-chan []byte,
return merger.Output, errc, nil
}
-func getMeta(path string) (string, error) {
- req, err := http.NewRequest("GET", "http://metadata.google.internal/computeMetadata/v1/"+path, nil)
- if err != nil {
- return "", err
- }
- req.Header.Add("Metadata-Flavor", "Google")
- resp, err := http.DefaultClient.Do(req)
- if err != nil {
- return "", err
- }
- defer resp.Body.Close()
- body, err := ioutil.ReadAll(resp.Body)
- if err != nil {
- return "", err
- }
- return string(body), nil
-}
-
func waitInstanceBoot(ip, sshkey string) error {
for i := 0; i < 100; i++ {
if !vm.SleepInterruptible(5 * time.Second) {
@@ -312,121 +236,6 @@ func waitInstanceBoot(ip, sshkey string) error {
return fmt.Errorf("can't ssh into the instance")
}
-func createInstance(name, machineType, image, sshkey string) (string, error) {
- prefix := "https://www.googleapis.com/compute/v1/projects/" + projectID
- instance := &compute.Instance{
- Name: name,
- Description: "syzkaller worker",
- MachineType: prefix + "/zones/" + zoneID + "/machineTypes/" + machineType,
- Disks: []*compute.AttachedDisk{
- {
- AutoDelete: true,
- Boot: true,
- Type: "PERSISTENT",
- InitializeParams: &compute.AttachedDiskInitializeParams{
- DiskName: name,
- SourceImage: prefix + "/global/images/" + image,
- },
- },
- },
- Metadata: &compute.Metadata{
- Items: []*compute.MetadataItems{
- {
- Key: "ssh-keys",
- Value: "syzkaller:" + sshkey,
- },
- {
- Key: "serial-port-enable",
- Value: "1",
- },
- },
- },
- NetworkInterfaces: []*compute.NetworkInterface{
- &compute.NetworkInterface{
- Network: "global/networks/default",
- },
- },
- Scheduling: &compute.Scheduling{
- AutomaticRestart: false,
- Preemptible: false,
- OnHostMaintenance: "MIGRATE",
- },
- }
-
- <-apiRateGate
- op, err := computeService.Instances.Insert(projectID, zoneID, instance).Do()
- if err != nil {
- return "", fmt.Errorf("failed to create instance: %v", err)
- }
- if err := waitForCompletion("create", op.Name, false); err != nil {
- return "", err
- }
-
- <-apiRateGate
- inst, err := computeService.Instances.Get(projectID, zoneID, name).Do()
- if err != nil {
- return "", fmt.Errorf("error getting instance %s details after creation: %v", name, err)
- }
-
- // Finds its internal IP.
- ip := ""
- for _, iface := range inst.NetworkInterfaces {
- if strings.HasPrefix(iface.NetworkIP, "10.") {
- ip = iface.NetworkIP
- break
- }
- }
- if ip == "" {
- return "", fmt.Errorf("didn't find instance internal IP address")
- }
- return ip, nil
-}
-
-func deleteInstance(name string) error {
- <-apiRateGate
- op, err := computeService.Instances.Delete(projectID, zoneID, name).Do()
- apiErr, ok := err.(*googleapi.Error)
- if ok && apiErr.Code == 404 {
- return nil
- }
- if err != nil {
- return fmt.Errorf("failed to delete instance: %v", err)
- }
- if err := waitForCompletion("delete", op.Name, true); err != nil {
- return err
- }
- return nil
-}
-
-func waitForCompletion(desc, opName string, ignoreNotFound bool) error {
- for {
- time.Sleep(2 * time.Second)
- <-apiRateGate
- op, err := computeService.ZoneOperations.Get(projectID, zoneID, opName).Do()
- if err != nil {
- return fmt.Errorf("failed to get %v operation %v: %v", desc, opName, err)
- }
- switch op.Status {
- case "PENDING", "RUNNING":
- continue
- case "DONE":
- if op.Error != nil {
- reason := ""
- for _, operr := range op.Error.Errors {
- if ignoreNotFound && operr.Code == "RESOURCE_NOT_FOUND" {
- return nil
- }
- reason += fmt.Sprintf("%+v.", operr)
- }
- return fmt.Errorf("%v operation failed: %v", desc, reason)
- }
- return nil
- default:
- return fmt.Errorf("unknown %v operation status %q: %+v", desc, op.Status, op)
- }
- }
-}
-
func sshArgs(sshKey, portArg string, port int) []string {
return []string{
portArg, fmt.Sprint(port),