diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2021-10-28 17:49:25 +0000 |
|---|---|---|
| committer | Aleksandr Nogikh <wp32pw@gmail.com> | 2021-11-12 16:33:43 +0100 |
| commit | bba04c950dfc1bc6b709232bb32d552223997678 (patch) | |
| tree | e646235d8fa12248ea951a491a59259753e266b2 /tools/syz-testbed/testbed.go | |
| parent | e543635de79b44506c51fa953a8ad8eb870397f6 (diff) | |
tools/syz-testbed: produce results indefinitely
Creation of instances only at the start of the tool limits the scale of
the experiments.
Instead of setting up instances at the beginning, create, start and
stop them dynamically. This allows to conduct much larger experiments
while keeping the maximal resource consuption under control. When an
instance has been executing for the specified time, it is stopped and
replaced by another one.
Introduce two new parameters:
- max_instances - the maximal number of simultaneously running instances.
- run_hours - the lifetime (in hours) of a single instance.
The longer the tool runs, the more test results will be accumulated for
each checkout specified in the configuration file.
Diffstat (limited to 'tools/syz-testbed/testbed.go')
| -rw-r--r-- | tools/syz-testbed/testbed.go | 371 |
1 files changed, 225 insertions, 146 deletions
diff --git a/tools/syz-testbed/testbed.go b/tools/syz-testbed/testbed.go index bad77c294..819a61b34 100644 --- a/tools/syz-testbed/testbed.go +++ b/tools/syz-testbed/testbed.go @@ -13,15 +13,12 @@ import ( "flag" "fmt" "log" - "net" - "os" "path/filepath" "regexp" - "strconv" + "sync" "time" "github.com/google/syzkaller/pkg/config" - syz_instance "github.com/google/syzkaller/pkg/instance" "github.com/google/syzkaller/pkg/mgrconfig" "github.com/google/syzkaller/pkg/osutil" "github.com/google/syzkaller/pkg/tool" @@ -29,44 +26,45 @@ import ( ) var ( - flagConfig = flag.String("config", "", "config file") - flagCleanup = flag.Bool("cleanup", false, "remove existing work directories") + flagConfig = flag.String("config", "", "config file") ) type TestbedConfig struct { - Corpus string `json:"corpus"` // path to the corpus file - Workdir string `json:"workdir"` // instances will be checked out there - ManagerConfig json.RawMessage `json:"manager_config"` // base manager config - Checkouts []TestbedCheckout `json:"checkouts"` + Name string `json:"name"` // name of the testbed + MaxInstances int `json:"max_instances"` // max # of simultaneously running instances + RunTime DurationConfig `json:"run_time"` // lifetime of an instance (default "24h") + Corpus string `json:"corpus"` // path to the corpus file + Workdir string `json:"workdir"` // instances will be checked out there + ManagerConfig json.RawMessage `json:"manager_config"` // base manager config + Checkouts []CheckoutConfig `json:"checkouts"` } -type TestbedCheckout struct { +type DurationConfig struct { + time.Duration +} + +type CheckoutConfig struct { Name string `json:"name"` Repo string `json:"repo"` Branch string `json:"branch"` - Count int `json:"count"` -} - -type CheckoutInfo struct { - Path string - Name string - Instances []InstanceInfo } -// The essential information about an already prepared instance. -type InstanceInfo struct { - Name string - Workdir string - BenchFile string - LogFile string - HTTP string - ExecCommand string - ExecCommandArgs []string +type TestbedContext struct { + Config *TestbedConfig + ManagerConfig *mgrconfig.Config + Checkouts []*Checkout + NextRestart time.Time + NextCheckoutID int + NextInstanceID int + statMutex sync.Mutex } func main() { flag.Parse() - cfg := &TestbedConfig{} + cfg := &TestbedConfig{ + Name: "testbed", + RunTime: DurationConfig{24 * time.Hour}, + } err := config.LoadFile(*flagConfig, &cfg) if err != nil { tool.Failf("failed to read config: %s", err) @@ -82,150 +80,238 @@ func main() { tool.Failf("failed to parse manager config: %s", err) } if managerCfg.HTTP == "" { - managerCfg.HTTP = ":50000" + managerCfg.HTTP = "0.0.0.0:0" } - checkouts := []*CheckoutInfo{} - for _, co := range cfg.Checkouts { - checkouts = append(checkouts, newCheckout(co, cfg, managerCfg)) + ctx := TestbedContext{ + Config: cfg, + ManagerConfig: managerCfg, } - - log.Printf("------------------") - for _, co := range checkouts { - for _, instance := range co.Instances { - go runInstance(instance) + for _, checkoutCfg := range cfg.Checkouts { + co, err := ctx.NewCheckout(&checkoutCfg) + if err != nil { + tool.Failf("checkout failed: %s", err) } + ctx.Checkouts = append(ctx.Checkouts, co) } - go collectStats(cfg, checkouts) - // Block the execution indefinitely. - // Either the process will be killed or it will exit itself if one of the instances fails. - select {} -} -func collectStats(cfg *TestbedConfig, checkouts []*CheckoutInfo) { - const period = 90 * time.Second - benchFolder := filepath.Join(cfg.Workdir, "benches") - err := osutil.MkdirAll(benchFolder) - if err != nil { - tool.Failf("failed to create bench folder: %s", err) - } - tableStats := map[string]func(checkouts []*CheckoutInfo) ([][]string, error){ - "bugs.csv": generateBugTable, - "checkout_stats.csv": checkoutStatsTable, - "instance_stats.csv": instanceStatsTable, - } - for { - time.Sleep(period) - for fileName, genFunc := range tableStats { - table, err := genFunc(checkouts) - if err == nil { - saveTableAsCsv(table, filepath.Join(cfg.Workdir, fileName)) + shutdown := make(chan struct{}) + osutil.HandleInterrupts(shutdown) + + go func() { + const period = 90 * time.Second + for { + time.Sleep(period) + err := ctx.SaveStats() + if err != nil { + log.Printf("stats saving error: %s", err) } } - for _, checkout := range checkouts { - fileName := fmt.Sprintf("avg_%v.txt", checkout.Name) - saveAvgBenchFile(checkout, filepath.Join(benchFolder, fileName)) + }() + + ctx.Loop(shutdown) +} + +func (ctx *TestbedContext) GetStatViews() ([]StatView, error) { + groupsCompleted := []RunResultGroup{} + groupsAll := []RunResultGroup{} + for _, checkout := range ctx.Checkouts { + running := []*RunResult{} + for _, instance := range checkout.Running { + result, err := instance.FetchResult() + if err != nil { + return nil, err + } + running = append(running, result) } + groupsCompleted = append(groupsCompleted, RunResultGroup{ + Name: checkout.Name, + Results: checkout.Completed, + }) + groupsAll = append(groupsAll, RunResultGroup{ + Name: checkout.Name, + Results: append(checkout.Completed, running...), + }) } + return []StatView{ + { + Name: "all", + Groups: groupsAll, + }, + { + Name: "completed", + Groups: groupsCompleted, + }, + }, nil } -func runInstance(info InstanceInfo) { - logfile, err := os.Create(info.LogFile) +func (ctx *TestbedContext) saveStatView(view StatView) error { + dir := filepath.Join(ctx.Config.Workdir, "stats_"+view.Name) + benchDir := filepath.Join(dir, "benches") + err := osutil.MkdirAll(benchDir) if err != nil { - tool.Failf("[%s] failed to create logfile: %s", info.Name, err) + return fmt.Errorf("failed to create %s: %s", benchDir, err) } - cmd := osutil.GraciousCommand(info.ExecCommand, info.ExecCommandArgs...) - cmd.Stdout = logfile - cmd.Stderr = logfile - err = cmd.Start() - if err != nil { - tool.Failf("[%s] failed to start instance: %s", info.Name, err) + + tableStats := map[string]func(view StatView) ([][]string, error){ + "bugs.csv": (StatView).GenerateBugTable, + "checkout_stats.csv": (StatView).StatsTable, + "instance_stats.csv": (StatView).InstanceStatsTable, } - log.Printf("[%s] Instance started. Listening on %s", info.Name, info.HTTP) - logfile.Close() - err = cmd.Wait() - tool.Failf("[%s] Instance exited: %s", info.Name, err) + + for fileName, genFunc := range tableStats { + table, err := genFunc(view) + if err == nil { + SaveTableAsCsv(table, filepath.Join(dir, fileName)) + } else { + log.Printf("some error: %s", err) + } + } + for _, group := range view.Groups { + fileName := fmt.Sprintf("avg_%v.txt", group.Name) + group.SaveAvgBenchFile(filepath.Join(benchDir, fileName)) + } + return nil } -func newCheckout(co TestbedCheckout, cfg *TestbedConfig, managerCfg *mgrconfig.Config) *CheckoutInfo { - log.Printf("[%s] Checking out", co.Name) - path := filepath.Join(cfg.Workdir, "checkouts", co.Name) - if osutil.IsExist(path) { - if !*flagCleanup { - tool.Failf("path %s already exists", path) +func (ctx *TestbedContext) saveTestbedStats(file string) error { + table := [][]string{ + {"Checkout", "Running", "Completed", "Until reset"}, + } + for _, checkout := range ctx.Checkouts { + until := "-" + if ctx.NextRestart.After(time.Now()) { + until = time.Until(ctx.NextRestart).Round(time.Second).String() } - osutil.RemoveAll(path) + table = append(table, []string{ + checkout.Name, + fmt.Sprintf("%d", len(checkout.Running)), + fmt.Sprintf("%d", len(checkout.Completed)), + until, + }) } - repo := vcs.NewSyzkallerRepo(path) - commit, err := repo.Poll(co.Repo, co.Branch) + return SaveTableAsCsv(table, file) +} + +func (ctx *TestbedContext) SaveStats() error { + // Preventing concurrent saving of the stats. + ctx.statMutex.Lock() + defer ctx.statMutex.Unlock() + views, err := ctx.GetStatViews() if err != nil { - tool.Failf("failed to checkout %s (%s): %s", co.Repo, co.Branch, err) + return err } - log.Printf("[%s] Done. Latest commit: %s", co.Name, commit) - log.Printf("[%s] Building", co.Name) - if _, err := osutil.RunCmd(time.Hour, path, syz_instance.MakeBin); err != nil { - tool.Failf("[%s] Make failed: %s", co.Name, err) + for _, view := range views { + err := ctx.saveStatView(view) + if err != nil { + return err + } } - checkoutInfo := CheckoutInfo{ - Name: co.Name, - Path: path, + return ctx.saveTestbedStats(filepath.Join(ctx.Config.Workdir, "testbed.csv")) +} + +func (ctx *TestbedContext) generateInstances(count int) ([]*Instance, error) { + // It seems that even gracefully finished syz-managers can leak GCE instances. + // To allow for that strange behavior, let's reuse syz-manager names, so that + // they will in turn reuse the names of the leaked GCE instances. + instances := []*Instance{} + for idx := 1; idx <= count; idx++ { + checkout := ctx.Checkouts[ctx.NextCheckoutID] + ctx.NextCheckoutID = (ctx.NextCheckoutID + 1) % len(ctx.Checkouts) + instance, err := ctx.NewInstance(checkout, fmt.Sprintf("%s-%d", ctx.Config.Name, idx)) + if err != nil { + return nil, err + } + checkout.Running = append(checkout.Running, instance) + instances = append(instances, instance) } - for i := 1; i <= co.Count; i++ { - name := fmt.Sprintf("%v-%d", co.Name, i) - log.Printf("[%s] Generating workdir", name) - workdir := filepath.Join(path, fmt.Sprintf("workdir_%d", i)) - err = osutil.MkdirAll(workdir) + return instances, nil +} + +// Create instances, run them, stop them, archive them, and so on... +func (ctx *TestbedContext) Loop(stop chan struct{}) { + duration := ctx.Config.RunTime.Duration + mustStop := false + for !mustStop { + log.Printf("setting up instances") + instances, err := ctx.generateInstances(ctx.Config.MaxInstances) if err != nil { - tool.Failf("failed to create dir %s", workdir) + tool.Failf("failed to set up intances: %s", err) } - if cfg.Corpus != "" { - corpusPath := filepath.Join(workdir, "corpus.db") - err = osutil.CopyFile(cfg.Corpus, corpusPath) - if err != nil { - tool.Failf("failed to copy corpus from %s: %s", cfg.Corpus, err) - } + log.Printf("starting instances") + instanceStatuses := make(chan error, len(instances)) + var wg sync.WaitGroup + for _, inst := range instances { + wg.Add(1) + go func(instance *Instance) { + instanceStatuses <- instance.Run() + wg.Done() + }(inst) } - log.Printf("[%s] Generating syz-manager config", name) - managerCfg.Name = name - managerCfg.Workdir = workdir - managerCfg.Syzkaller = path - managerCfgPath := filepath.Join(path, fmt.Sprintf("syz_%d.cnf", i)) - err = config.SaveFile(managerCfgPath, managerCfg) - if err != nil { - tool.Failf("failed to save manager config to %s: %s", managerCfgPath, err) + + ctx.NextRestart = time.Now().Add(duration) + select { + case err := <-instanceStatuses: + // Syz-managers are not supposed to stop under normal circumstances. + // If one of them did stop, there must have been a very good reason to. + // For now, we just shut down the whole experiment in such a case. + log.Printf("an instance has failed (%s), stopping everything", err) + mustStop = true + case <-stop: + log.Printf("stopping the experiment") + mustStop = true + case <-time.After(duration): + log.Printf("run period has finished") } - bench := filepath.Join(path, fmt.Sprintf("bench_%d.txt", i)) - log := filepath.Join(path, fmt.Sprintf("log_%d.txt", i)) - checkoutInfo.Instances = append(checkoutInfo.Instances, InstanceInfo{ - Name: managerCfg.Name, - Workdir: workdir, - BenchFile: bench, - LogFile: log, - HTTP: managerCfg.HTTP, - ExecCommand: filepath.Join(path, "bin", "syz-manager"), - ExecCommandArgs: []string{"-config", managerCfgPath, "-bench", bench}, - }) - managerCfg.HTTP, err = increasePort(managerCfg.HTTP) + + // Wait for all instances to finish. + for _, instance := range instances { + instance.Stop() + } + wg.Wait() + + // Only mark instances completed if they've indeed been running the whole iteration. + if !mustStop { + for _, checkout := range ctx.Checkouts { + err = checkout.ArchiveRunning() + if err != nil { + tool.Failf("ArchiveRunning error: %s", err) + } + } + } + + log.Printf("collecting statistics") + err = ctx.SaveStats() if err != nil { - tool.Failf("failed to inrease port number: %s", err) + log.Printf("stats saving error: %s", err) } } - return &checkoutInfo } -func increasePort(http string) (string, error) { - host, portStr, err := net.SplitHostPort(http) - if err != nil { - return "", fmt.Errorf("invalid http value: %s", http) +func (d *DurationConfig) UnmarshalJSON(data []byte) error { + var v interface{} + if err := json.Unmarshal(data, &v); err != nil { + return err } - port, err := strconv.Atoi(portStr) - if err != nil { - return "", err + str, ok := v.(string) + if !ok { + return fmt.Errorf("%s was expected to be a string", data) + } + parsed, err := time.ParseDuration(str) + if err == nil { + d.Duration = parsed } - return net.JoinHostPort(host, fmt.Sprintf("%d", port+1)), nil + return err +} +func (d *DurationConfig) MarshalJSON() ([]byte, error) { + return json.Marshal(d.String()) } func checkConfig(cfg *TestbedConfig) error { + testbedNameRe := regexp.MustCompile(`^[0-9a-z\-]{1,20}$`) + if !testbedNameRe.MatchString(cfg.Name) { + return fmt.Errorf("invalid testbed name: %v", cfg.Name) + } if cfg.Workdir == "" { return fmt.Errorf("workdir is empty") } @@ -237,8 +323,10 @@ func checkConfig(cfg *TestbedConfig) error { if cfg.Corpus != "" && !osutil.IsExist(cfg.Corpus) { return fmt.Errorf("corpus %v does not exist", cfg.Corpus) } + if cfg.MaxInstances < 1 { + return fmt.Errorf("max_instances cannot be less than 1") + } cfg.Corpus = osutil.Abs(cfg.Corpus) - instanceNameRe := regexp.MustCompile(`^[0-9a-z\-]{1,20}$`) names := make(map[string]bool) for idx := range cfg.Checkouts { co := &cfg.Checkouts[idx] @@ -250,17 +338,8 @@ func checkConfig(cfg *TestbedConfig) error { } else if !vcs.CheckBranch(co.Branch) { return fmt.Errorf("invalid branch: %s", co.Branch) } - if co.Count < 0 { - return fmt.Errorf("count cannot be negative") - } else if co.Count == 0 { - // The default value. - co.Count = 1 - } - if !instanceNameRe.MatchString(co.Name) { - return fmt.Errorf("invalid instance name: %v", co.Name) - } if names[co.Name] { - return fmt.Errorf("duplicate instance name: %v", co.Name) + return fmt.Errorf("duplicate checkout name: %v", co.Name) } names[co.Name] = true } |
