diff options
Diffstat (limited to 'tools/syz-testbed/testbed.go')
| -rw-r--r-- | tools/syz-testbed/testbed.go | 371 |
1 files changed, 225 insertions, 146 deletions
diff --git a/tools/syz-testbed/testbed.go b/tools/syz-testbed/testbed.go index bad77c294..819a61b34 100644 --- a/tools/syz-testbed/testbed.go +++ b/tools/syz-testbed/testbed.go @@ -13,15 +13,12 @@ import ( "flag" "fmt" "log" - "net" - "os" "path/filepath" "regexp" - "strconv" + "sync" "time" "github.com/google/syzkaller/pkg/config" - syz_instance "github.com/google/syzkaller/pkg/instance" "github.com/google/syzkaller/pkg/mgrconfig" "github.com/google/syzkaller/pkg/osutil" "github.com/google/syzkaller/pkg/tool" @@ -29,44 +26,45 @@ import ( ) var ( - flagConfig = flag.String("config", "", "config file") - flagCleanup = flag.Bool("cleanup", false, "remove existing work directories") + flagConfig = flag.String("config", "", "config file") ) type TestbedConfig struct { - Corpus string `json:"corpus"` // path to the corpus file - Workdir string `json:"workdir"` // instances will be checked out there - ManagerConfig json.RawMessage `json:"manager_config"` // base manager config - Checkouts []TestbedCheckout `json:"checkouts"` + Name string `json:"name"` // name of the testbed + MaxInstances int `json:"max_instances"` // max # of simultaneously running instances + RunTime DurationConfig `json:"run_time"` // lifetime of an instance (default "24h") + Corpus string `json:"corpus"` // path to the corpus file + Workdir string `json:"workdir"` // instances will be checked out there + ManagerConfig json.RawMessage `json:"manager_config"` // base manager config + Checkouts []CheckoutConfig `json:"checkouts"` } -type TestbedCheckout struct { +type DurationConfig struct { + time.Duration +} + +type CheckoutConfig struct { Name string `json:"name"` Repo string `json:"repo"` Branch string `json:"branch"` - Count int `json:"count"` -} - -type CheckoutInfo struct { - Path string - Name string - Instances []InstanceInfo } -// The essential information about an already prepared instance. -type InstanceInfo struct { - Name string - Workdir string - BenchFile string - LogFile string - HTTP string - ExecCommand string - ExecCommandArgs []string +type TestbedContext struct { + Config *TestbedConfig + ManagerConfig *mgrconfig.Config + Checkouts []*Checkout + NextRestart time.Time + NextCheckoutID int + NextInstanceID int + statMutex sync.Mutex } func main() { flag.Parse() - cfg := &TestbedConfig{} + cfg := &TestbedConfig{ + Name: "testbed", + RunTime: DurationConfig{24 * time.Hour}, + } err := config.LoadFile(*flagConfig, &cfg) if err != nil { tool.Failf("failed to read config: %s", err) @@ -82,150 +80,238 @@ func main() { tool.Failf("failed to parse manager config: %s", err) } if managerCfg.HTTP == "" { - managerCfg.HTTP = ":50000" + managerCfg.HTTP = "0.0.0.0:0" } - checkouts := []*CheckoutInfo{} - for _, co := range cfg.Checkouts { - checkouts = append(checkouts, newCheckout(co, cfg, managerCfg)) + ctx := TestbedContext{ + Config: cfg, + ManagerConfig: managerCfg, } - - log.Printf("------------------") - for _, co := range checkouts { - for _, instance := range co.Instances { - go runInstance(instance) + for _, checkoutCfg := range cfg.Checkouts { + co, err := ctx.NewCheckout(&checkoutCfg) + if err != nil { + tool.Failf("checkout failed: %s", err) } + ctx.Checkouts = append(ctx.Checkouts, co) } - go collectStats(cfg, checkouts) - // Block the execution indefinitely. - // Either the process will be killed or it will exit itself if one of the instances fails. - select {} -} -func collectStats(cfg *TestbedConfig, checkouts []*CheckoutInfo) { - const period = 90 * time.Second - benchFolder := filepath.Join(cfg.Workdir, "benches") - err := osutil.MkdirAll(benchFolder) - if err != nil { - tool.Failf("failed to create bench folder: %s", err) - } - tableStats := map[string]func(checkouts []*CheckoutInfo) ([][]string, error){ - "bugs.csv": generateBugTable, - "checkout_stats.csv": checkoutStatsTable, - "instance_stats.csv": instanceStatsTable, - } - for { - time.Sleep(period) - for fileName, genFunc := range tableStats { - table, err := genFunc(checkouts) - if err == nil { - saveTableAsCsv(table, filepath.Join(cfg.Workdir, fileName)) + shutdown := make(chan struct{}) + osutil.HandleInterrupts(shutdown) + + go func() { + const period = 90 * time.Second + for { + time.Sleep(period) + err := ctx.SaveStats() + if err != nil { + log.Printf("stats saving error: %s", err) } } - for _, checkout := range checkouts { - fileName := fmt.Sprintf("avg_%v.txt", checkout.Name) - saveAvgBenchFile(checkout, filepath.Join(benchFolder, fileName)) + }() + + ctx.Loop(shutdown) +} + +func (ctx *TestbedContext) GetStatViews() ([]StatView, error) { + groupsCompleted := []RunResultGroup{} + groupsAll := []RunResultGroup{} + for _, checkout := range ctx.Checkouts { + running := []*RunResult{} + for _, instance := range checkout.Running { + result, err := instance.FetchResult() + if err != nil { + return nil, err + } + running = append(running, result) } + groupsCompleted = append(groupsCompleted, RunResultGroup{ + Name: checkout.Name, + Results: checkout.Completed, + }) + groupsAll = append(groupsAll, RunResultGroup{ + Name: checkout.Name, + Results: append(checkout.Completed, running...), + }) } + return []StatView{ + { + Name: "all", + Groups: groupsAll, + }, + { + Name: "completed", + Groups: groupsCompleted, + }, + }, nil } -func runInstance(info InstanceInfo) { - logfile, err := os.Create(info.LogFile) +func (ctx *TestbedContext) saveStatView(view StatView) error { + dir := filepath.Join(ctx.Config.Workdir, "stats_"+view.Name) + benchDir := filepath.Join(dir, "benches") + err := osutil.MkdirAll(benchDir) if err != nil { - tool.Failf("[%s] failed to create logfile: %s", info.Name, err) + return fmt.Errorf("failed to create %s: %s", benchDir, err) } - cmd := osutil.GraciousCommand(info.ExecCommand, info.ExecCommandArgs...) - cmd.Stdout = logfile - cmd.Stderr = logfile - err = cmd.Start() - if err != nil { - tool.Failf("[%s] failed to start instance: %s", info.Name, err) + + tableStats := map[string]func(view StatView) ([][]string, error){ + "bugs.csv": (StatView).GenerateBugTable, + "checkout_stats.csv": (StatView).StatsTable, + "instance_stats.csv": (StatView).InstanceStatsTable, } - log.Printf("[%s] Instance started. Listening on %s", info.Name, info.HTTP) - logfile.Close() - err = cmd.Wait() - tool.Failf("[%s] Instance exited: %s", info.Name, err) + + for fileName, genFunc := range tableStats { + table, err := genFunc(view) + if err == nil { + SaveTableAsCsv(table, filepath.Join(dir, fileName)) + } else { + log.Printf("some error: %s", err) + } + } + for _, group := range view.Groups { + fileName := fmt.Sprintf("avg_%v.txt", group.Name) + group.SaveAvgBenchFile(filepath.Join(benchDir, fileName)) + } + return nil } -func newCheckout(co TestbedCheckout, cfg *TestbedConfig, managerCfg *mgrconfig.Config) *CheckoutInfo { - log.Printf("[%s] Checking out", co.Name) - path := filepath.Join(cfg.Workdir, "checkouts", co.Name) - if osutil.IsExist(path) { - if !*flagCleanup { - tool.Failf("path %s already exists", path) +func (ctx *TestbedContext) saveTestbedStats(file string) error { + table := [][]string{ + {"Checkout", "Running", "Completed", "Until reset"}, + } + for _, checkout := range ctx.Checkouts { + until := "-" + if ctx.NextRestart.After(time.Now()) { + until = time.Until(ctx.NextRestart).Round(time.Second).String() } - osutil.RemoveAll(path) + table = append(table, []string{ + checkout.Name, + fmt.Sprintf("%d", len(checkout.Running)), + fmt.Sprintf("%d", len(checkout.Completed)), + until, + }) } - repo := vcs.NewSyzkallerRepo(path) - commit, err := repo.Poll(co.Repo, co.Branch) + return SaveTableAsCsv(table, file) +} + +func (ctx *TestbedContext) SaveStats() error { + // Preventing concurrent saving of the stats. + ctx.statMutex.Lock() + defer ctx.statMutex.Unlock() + views, err := ctx.GetStatViews() if err != nil { - tool.Failf("failed to checkout %s (%s): %s", co.Repo, co.Branch, err) + return err } - log.Printf("[%s] Done. Latest commit: %s", co.Name, commit) - log.Printf("[%s] Building", co.Name) - if _, err := osutil.RunCmd(time.Hour, path, syz_instance.MakeBin); err != nil { - tool.Failf("[%s] Make failed: %s", co.Name, err) + for _, view := range views { + err := ctx.saveStatView(view) + if err != nil { + return err + } } - checkoutInfo := CheckoutInfo{ - Name: co.Name, - Path: path, + return ctx.saveTestbedStats(filepath.Join(ctx.Config.Workdir, "testbed.csv")) +} + +func (ctx *TestbedContext) generateInstances(count int) ([]*Instance, error) { + // It seems that even gracefully finished syz-managers can leak GCE instances. + // To allow for that strange behavior, let's reuse syz-manager names, so that + // they will in turn reuse the names of the leaked GCE instances. + instances := []*Instance{} + for idx := 1; idx <= count; idx++ { + checkout := ctx.Checkouts[ctx.NextCheckoutID] + ctx.NextCheckoutID = (ctx.NextCheckoutID + 1) % len(ctx.Checkouts) + instance, err := ctx.NewInstance(checkout, fmt.Sprintf("%s-%d", ctx.Config.Name, idx)) + if err != nil { + return nil, err + } + checkout.Running = append(checkout.Running, instance) + instances = append(instances, instance) } - for i := 1; i <= co.Count; i++ { - name := fmt.Sprintf("%v-%d", co.Name, i) - log.Printf("[%s] Generating workdir", name) - workdir := filepath.Join(path, fmt.Sprintf("workdir_%d", i)) - err = osutil.MkdirAll(workdir) + return instances, nil +} + +// Create instances, run them, stop them, archive them, and so on... +func (ctx *TestbedContext) Loop(stop chan struct{}) { + duration := ctx.Config.RunTime.Duration + mustStop := false + for !mustStop { + log.Printf("setting up instances") + instances, err := ctx.generateInstances(ctx.Config.MaxInstances) if err != nil { - tool.Failf("failed to create dir %s", workdir) + tool.Failf("failed to set up intances: %s", err) } - if cfg.Corpus != "" { - corpusPath := filepath.Join(workdir, "corpus.db") - err = osutil.CopyFile(cfg.Corpus, corpusPath) - if err != nil { - tool.Failf("failed to copy corpus from %s: %s", cfg.Corpus, err) - } + log.Printf("starting instances") + instanceStatuses := make(chan error, len(instances)) + var wg sync.WaitGroup + for _, inst := range instances { + wg.Add(1) + go func(instance *Instance) { + instanceStatuses <- instance.Run() + wg.Done() + }(inst) } - log.Printf("[%s] Generating syz-manager config", name) - managerCfg.Name = name - managerCfg.Workdir = workdir - managerCfg.Syzkaller = path - managerCfgPath := filepath.Join(path, fmt.Sprintf("syz_%d.cnf", i)) - err = config.SaveFile(managerCfgPath, managerCfg) - if err != nil { - tool.Failf("failed to save manager config to %s: %s", managerCfgPath, err) + + ctx.NextRestart = time.Now().Add(duration) + select { + case err := <-instanceStatuses: + // Syz-managers are not supposed to stop under normal circumstances. + // If one of them did stop, there must have been a very good reason to. + // For now, we just shut down the whole experiment in such a case. + log.Printf("an instance has failed (%s), stopping everything", err) + mustStop = true + case <-stop: + log.Printf("stopping the experiment") + mustStop = true + case <-time.After(duration): + log.Printf("run period has finished") } - bench := filepath.Join(path, fmt.Sprintf("bench_%d.txt", i)) - log := filepath.Join(path, fmt.Sprintf("log_%d.txt", i)) - checkoutInfo.Instances = append(checkoutInfo.Instances, InstanceInfo{ - Name: managerCfg.Name, - Workdir: workdir, - BenchFile: bench, - LogFile: log, - HTTP: managerCfg.HTTP, - ExecCommand: filepath.Join(path, "bin", "syz-manager"), - ExecCommandArgs: []string{"-config", managerCfgPath, "-bench", bench}, - }) - managerCfg.HTTP, err = increasePort(managerCfg.HTTP) + + // Wait for all instances to finish. + for _, instance := range instances { + instance.Stop() + } + wg.Wait() + + // Only mark instances completed if they've indeed been running the whole iteration. + if !mustStop { + for _, checkout := range ctx.Checkouts { + err = checkout.ArchiveRunning() + if err != nil { + tool.Failf("ArchiveRunning error: %s", err) + } + } + } + + log.Printf("collecting statistics") + err = ctx.SaveStats() if err != nil { - tool.Failf("failed to inrease port number: %s", err) + log.Printf("stats saving error: %s", err) } } - return &checkoutInfo } -func increasePort(http string) (string, error) { - host, portStr, err := net.SplitHostPort(http) - if err != nil { - return "", fmt.Errorf("invalid http value: %s", http) +func (d *DurationConfig) UnmarshalJSON(data []byte) error { + var v interface{} + if err := json.Unmarshal(data, &v); err != nil { + return err } - port, err := strconv.Atoi(portStr) - if err != nil { - return "", err + str, ok := v.(string) + if !ok { + return fmt.Errorf("%s was expected to be a string", data) + } + parsed, err := time.ParseDuration(str) + if err == nil { + d.Duration = parsed } - return net.JoinHostPort(host, fmt.Sprintf("%d", port+1)), nil + return err +} +func (d *DurationConfig) MarshalJSON() ([]byte, error) { + return json.Marshal(d.String()) } func checkConfig(cfg *TestbedConfig) error { + testbedNameRe := regexp.MustCompile(`^[0-9a-z\-]{1,20}$`) + if !testbedNameRe.MatchString(cfg.Name) { + return fmt.Errorf("invalid testbed name: %v", cfg.Name) + } if cfg.Workdir == "" { return fmt.Errorf("workdir is empty") } @@ -237,8 +323,10 @@ func checkConfig(cfg *TestbedConfig) error { if cfg.Corpus != "" && !osutil.IsExist(cfg.Corpus) { return fmt.Errorf("corpus %v does not exist", cfg.Corpus) } + if cfg.MaxInstances < 1 { + return fmt.Errorf("max_instances cannot be less than 1") + } cfg.Corpus = osutil.Abs(cfg.Corpus) - instanceNameRe := regexp.MustCompile(`^[0-9a-z\-]{1,20}$`) names := make(map[string]bool) for idx := range cfg.Checkouts { co := &cfg.Checkouts[idx] @@ -250,17 +338,8 @@ func checkConfig(cfg *TestbedConfig) error { } else if !vcs.CheckBranch(co.Branch) { return fmt.Errorf("invalid branch: %s", co.Branch) } - if co.Count < 0 { - return fmt.Errorf("count cannot be negative") - } else if co.Count == 0 { - // The default value. - co.Count = 1 - } - if !instanceNameRe.MatchString(co.Name) { - return fmt.Errorf("invalid instance name: %v", co.Name) - } if names[co.Name] { - return fmt.Errorf("duplicate instance name: %v", co.Name) + return fmt.Errorf("duplicate checkout name: %v", co.Name) } names[co.Name] = true } |
