4 files changed, 478 insertions, 221 deletions
diff --git a/tools/syz-testbed/checkout.go b/tools/syz-testbed/checkout.go
new file mode 100644
index 000000000..fe846bbb2
--- /dev/null
+++ b/tools/syz-testbed/checkout.go
@@ -0,0 +1,56 @@
+// Copyright 2021 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"log"
+	"path/filepath"
+	"time"
+
+	syz_instance "github.com/google/syzkaller/pkg/instance"
+	"github.com/google/syzkaller/pkg/osutil"
+	"github.com/google/syzkaller/pkg/vcs"
+)
+
+type Checkout struct {
+	Path      string
+	Name      string
+	Running   []*Instance
+	Completed []*RunResult
+}
+
+func (checkout *Checkout) ArchiveRunning() error {
+	for _, instance := range checkout.Running {
+		result, err := instance.FetchResult()
+		if err != nil {
+			return err
+		}
+		checkout.Completed = append(checkout.Completed, result)
+	}
+	checkout.Running = []*Instance{}
+	return nil
+}
+
+func (ctx *TestbedContext) NewCheckout(config *CheckoutConfig) (*Checkout, error) {
+	checkout := &Checkout{
+		Name: config.Name,
+		Path: filepath.Join(ctx.Config.Workdir, "checkouts", config.Name),
+	}
+	log.Printf("[%s] Checking out", checkout.Name)
+	if osutil.IsExist(checkout.Path) {
+		return nil, fmt.Errorf("path %s already exists", checkout.Path)
+	}
+	repo := vcs.NewSyzkallerRepo(checkout.Path)
+	commit, err := repo.Poll(config.Repo, config.Branch)
+	if err != nil {
+		return nil, fmt.Errorf("failed to checkout %s (%s): %s", config.Repo, config.Branch, err)
+	}
+	log.Printf("[%s] Done. Latest commit: %s", checkout.Name, commit)
+	log.Printf("[%s] Building", checkout.Name)
+	if _, err := osutil.RunCmd(time.Hour, checkout.Path, syz_instance.MakeBin); err != nil {
+		return nil, fmt.Errorf("[%s] Make failed: %s", checkout.Name, err)
+	}
+	return checkout, nil
+}
diff --git a/tools/syz-testbed/instance.go b/tools/syz-testbed/instance.go
new file mode 100644
index 000000000..c51b9c597
--- /dev/null
+++ b/tools/syz-testbed/instance.go
@@ -0,0 +1,130 @@
+// Copyright 2021 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"time"
+
+	"github.com/google/syzkaller/pkg/config"
+	"github.com/google/syzkaller/pkg/osutil"
+)
+
+// The essential information about an active instance.
+type Instance struct {
+	Name            string
+	Workdir         string
+	BenchFile       string
+	LogFile         string
+	HTTP            string
+	ExecCommand     string
+	ExecCommandArgs []string
+	stopChannel     chan bool
+}
+
+func (inst *Instance) Run() error {
+	const stopDelay = time.Minute
+
+	logfile, err := os.Create(inst.LogFile)
+	if err != nil {
+		return fmt.Errorf("[%s] failed to create logfile: %s", inst.Name, err)
+	}
+	log.Printf("[%s] starting", inst.Name)
+	cmd := osutil.GraciousCommand(inst.ExecCommand, inst.ExecCommandArgs...)
+	cmd.Stdout = logfile
+	cmd.Stderr = logfile
+
+	complete := make(chan error)
+	go func() {
+		complete <- cmd.Run()
+	}()
+
+	select {
+	case err := <-complete:
+		return fmt.Errorf("[%s] stopped: %s", inst.Name, err)
+	case <-inst.stopChannel:
+		// TODO: handle other OSes?
+		cmd.Process.Signal(os.Interrupt)
+		select {
+		case <-complete:
+			// The manager has exited.
+		case <-time.After(stopDelay):
+			// The manager did not exit - kill it.
+			log.Printf("[%s] instance did not exit itself, killing it", inst.Name)
+			cmd.Process.Kill()
+			<-complete
+		}
+		return nil
+	}
+}
+
+func (inst *Instance) Stop() {
+	inst.stopChannel <- true
+}
+
+func (inst *Instance) FetchResult() (*RunResult, error) {
+	bugs, err := collectBugs(inst.Workdir)
+	if err != nil {
+		return nil, err
+	}
+	records, err := readBenches(inst.BenchFile)
+	if err != nil {
+		return nil, err
+	}
+	return &RunResult{
+		Workdir:     inst.Workdir,
+		Bugs:        bugs,
+		StatRecords: records,
+	}, nil
+}
+
+func (ctx *TestbedContext) NewInstance(checkout *Checkout, mgrName string) (*Instance, error) {
+	defer func() {
+		ctx.NextInstanceID++
+	}()
+	name := fmt.Sprintf("%s-%d", checkout.Name, ctx.NextInstanceID)
+	managerCfgPath := filepath.Join(checkout.Path, fmt.Sprintf("syz-%d.cnf", ctx.NextInstanceID))
+	workdir := filepath.Join(checkout.Path, fmt.Sprintf("workdir_%d", ctx.NextInstanceID))
+	bench := filepath.Join(checkout.Path, fmt.Sprintf("bench-%d.txt", ctx.NextInstanceID))
+	logFile := filepath.Join(checkout.Path, fmt.Sprintf("log-%d.txt", ctx.NextInstanceID))
+
+	log.Printf("[%s] Generating workdir", name)
+	err := osutil.MkdirAll(workdir)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create workdir %s", workdir)
+	}
+
+	if ctx.Config.Corpus != "" {
+		log.Printf("[%s] Copying corpus", name)
+		corpusPath := filepath.Join(workdir, "corpus.db")
+		err = osutil.CopyFile(ctx.Config.Corpus, corpusPath)
+		if err != nil {
+			return nil, fmt.Errorf("failed to copy corpus from %s: %s", ctx.Config.Corpus, err)
+		}
+	}
+
+	log.Printf("[%s] Generating syz-manager config", name)
+	managerCfg := *ctx.ManagerConfig
+	managerCfg.Name = mgrName
+	managerCfg.Workdir = workdir
+	managerCfg.Syzkaller = checkout.Path
+	err = config.SaveFile(managerCfgPath, managerCfg)
+	if err != nil {
+		return nil, fmt.Errorf("failed to save manager config to %s: %s", managerCfgPath, err)
+	}
+
+	return &Instance{
+		Name:            name,
+		Workdir:         workdir,
+		BenchFile:       bench,
+		LogFile:         logFile,
+		HTTP:            managerCfg.HTTP,
+		ExecCommand:     filepath.Join(checkout.Path, "bin", "syz-manager"),
+		ExecCommandArgs: []string{"-config", managerCfgPath, "-bench", bench},
+		stopChannel:     make(chan bool, 1),
+	}, nil
+}
diff --git a/tools/syz-testbed/stats.go b/tools/syz-testbed/stats.go
index 9b5ed7133..f244bcbda 100644
--- a/tools/syz-testbed/stats.go
+++ b/tools/syz-testbed/stats.go
@@ -19,6 +19,25 @@ type BugInfo struct {
 	Title string
 }
 
+// The information collected from a syz-manager instance.
+type RunResult struct {
+	Workdir     string
+	Bugs        []BugInfo
+	StatRecords []map[string]uint64
+}
+
+// The grouping of single instance results. Taken by all stat generating routines.
+type RunResultGroup struct {
+	Name    string
+	Results []*RunResult
+}
+
+// Different "views" of the statistics, e.g. only completed instances or completed + running.
+type StatView struct {
+	Name   string
+	Groups []RunResultGroup
+}
+
 // TODO: we're implementing this functionaity at least the 3rd time (see syz-manager/html
 // and tools/reporter). Create a more generic implementation and put it into a globally
 // visible package.
@@ -57,7 +76,7 @@ func readBenches(benchFile string) ([]map[string]uint64, error) {
 	return ret, nil
 }
 
-func avgStats(infos []map[string]uint64) map[string]uint64 {
+func avgBenches(infos []map[string]uint64) map[string]uint64 {
 	ret := make(map[string]uint64)
 	if len(infos) == 0 {
 		return ret
@@ -75,29 +94,25 @@ func avgStats(infos []map[string]uint64) map[string]uint64 {
 
 type BugSummary struct {
 	title string
-	found map[*CheckoutInfo]bool
+	found map[string]bool
 }
 
 // If there are several instances belonging to a single checkout, we're interested in the
 // set of bugs found by at least one of those instances.
-func summarizeBugs(checkouts []*CheckoutInfo) ([]*BugSummary, error) {
+func summarizeBugs(groups []RunResultGroup) ([]*BugSummary, error) {
 	bugsMap := make(map[string]*BugSummary)
-	for _, checkout := range checkouts {
-		for _, instance := range checkout.Instances {
-			bugs, err := collectBugs(instance.Workdir)
-			if err != nil {
-				return nil, err
-			}
-			for _, bug := range bugs {
+	for _, group := range groups {
+		for _, result := range group.Results {
+			for _, bug := range result.Bugs {
 				summary := bugsMap[bug.Title]
 				if summary == nil {
 					summary = &BugSummary{
 						title: bug.Title,
-						found: make(map[*CheckoutInfo]bool),
+						found: make(map[string]bool),
 					}
 					bugsMap[bug.Title] = summary
 				}
-				summary.found[checkout] = true
+				summary.found[group.Name] = true
 			}
 		}
 	}
@@ -110,13 +125,13 @@ func summarizeBugs(checkouts []*CheckoutInfo) ([]*BugSummary, error) {
 
 // For each checkout, take the union of sets of bugs found by each instance.
 // Then output these unions as a single table.
-func generateBugTable(checkouts []*CheckoutInfo) ([][]string, error) {
+func (view StatView) GenerateBugTable() ([][]string, error) {
 	table := [][]string{}
 	titles := []string{""}
-	for _, checkout := range checkouts {
-		titles = append(titles, checkout.Name)
+	for _, group := range view.Groups {
+		titles = append(titles, group.Name)
 	}
-	summaries, err := summarizeBugs(checkouts)
+	summaries, err := summarizeBugs(view.Groups)
 	if err != nil {
 		return nil, err
 	}
@@ -124,9 +139,9 @@ func generateBugTable(checkouts []*CheckoutInfo) ([][]string, error) {
 	table = append(table, titles)
 	for _, bug := range summaries {
 		row := []string{bug.title}
-		for _, checkout := range checkouts {
+		for _, group := range view.Groups {
 			val := ""
-			if bug.found[checkout] {
+			if bug.found[group.Name] {
 				val = "YES"
 			}
 			row = append(row, val)
@@ -136,26 +151,32 @@ func generateBugTable(checkouts []*CheckoutInfo) ([][]string, error) {
 	return table, nil
 }
 
-type StatGroup struct {
-	Name      string
-	Instances []InstanceInfo
+func (group RunResultGroup) AvgStatRecords() []map[string]uint64 {
+	ret := []map[string]uint64{}
+	for i := 0; ; i++ {
+		toAvg := []map[string]uint64{}
+		for _, result := range group.Results {
+			if i < len(result.StatRecords) {
+				toAvg = append(toAvg, result.StatRecords[i])
+			}
+		}
+		if len(toAvg) != len(group.Results) || len(toAvg) == 0 {
+			break
+		}
+		ret = append(ret, avgBenches(toAvg))
+	}
+	return ret
 }
 
-func genericStatsTable(groups []StatGroup) ([][]string, error) {
+func (view StatView) StatsTable() ([][]string, error) {
 	// Map: stats key x group name -> value.
 	cells := make(map[string]map[string]string)
-	for _, group := range groups {
-		infos := []map[string]uint64{}
-		for _, instance := range group.Instances {
-			records, err := readBenches(instance.BenchFile)
-			if err != nil {
-				return nil, err
-			}
-			if len(records) > 0 {
-				infos = append(infos, records[len(records)-1])
-			}
+	for _, group := range view.Groups {
+		avgBench := group.AvgStatRecords()
+		if len(avgBench) == 0 {
+			continue
 		}
-		for key, value := range avgStats(infos) {
+		for key, value := range avgBench[len(avgBench)-1] {
 			if _, ok := cells[key]; !ok {
 				cells[key] = make(map[string]string)
 			}
@@ -163,13 +184,13 @@ func genericStatsTable(groups []StatGroup) ([][]string, error) {
 		}
 	}
 	title := []string{""}
-	for _, group := range groups {
+	for _, group := range view.Groups {
 		title = append(title, group.Name)
 	}
 	table := [][]string{title}
 	for key, valuesMap := range cells {
 		row := []string{key}
-		for _, group := range groups {
+		for _, group := range view.Groups {
 			row = append(row, valuesMap[group.Name])
 		}
 		table = append(table, row)
@@ -177,56 +198,27 @@ func genericStatsTable(groups []StatGroup) ([][]string, error) {
 	return table, nil
 }
 
-func checkoutStatsTable(checkouts []*CheckoutInfo) ([][]string, error) {
-	groups := []StatGroup{}
-	for _, checkout := range checkouts {
-		groups = append(groups, StatGroup{
-			Name:      checkout.Name,
-			Instances: checkout.Instances,
-		})
-	}
-	return genericStatsTable(groups)
-}
-
-func instanceStatsTable(checkouts []*CheckoutInfo) ([][]string, error) {
-	groups := []StatGroup{}
-	for _, checkout := range checkouts {
-		for _, instance := range checkout.Instances {
-			groups = append(groups, StatGroup{
-				Name:      instance.Name,
-				Instances: []InstanceInfo{instance},
+func (view StatView) InstanceStatsTable() ([][]string, error) {
+	newView := StatView{}
+	for _, group := range view.Groups {
+		for i, result := range group.Results {
+			newView.Groups = append(newView.Groups, RunResultGroup{
+				Name:    fmt.Sprintf("%s-%d", group.Name, i),
+				Results: []*RunResult{result},
 			})
 		}
 	}
-	return genericStatsTable(groups)
+	return newView.StatsTable()
 }
 
 // Average bench files of several instances into a single bench file.
-func saveAvgBenchFile(checkout *CheckoutInfo, fileName string) error {
-	allRecords := [][]map[string]uint64{}
-	for _, instance := range checkout.Instances {
-		records, err := readBenches(instance.BenchFile)
-		if err != nil {
-			return err
-		}
-		allRecords = append(allRecords, records)
-	}
+func (group *RunResultGroup) SaveAvgBenchFile(fileName string) error {
 	f, err := os.Create(fileName)
 	if err != nil {
 		return err
 	}
 	defer f.Close()
-	for i := 0; ; i++ {
-		toAvg := []map[string]uint64{}
-		for _, records := range allRecords {
-			if i < len(records) {
-				toAvg = append(toAvg, records[i])
-			}
-		}
-		if len(toAvg) != len(allRecords) {
-			break
-		}
-		averaged := avgStats(toAvg)
+	for _, averaged := range group.AvgStatRecords() {
 		data, err := json.MarshalIndent(averaged, "", "  ")
 		if err != nil {
 			return err
@@ -238,7 +230,7 @@ func saveAvgBenchFile(checkout *CheckoutInfo, fileName string) error {
 	return nil
 }
 
-func saveTableAsCsv(table [][]string, fileName string) error {
+func SaveTableAsCsv(table [][]string, fileName string) error {
 	f, err := os.Create(fileName)
 	if err != nil {
 		return err
diff --git a/tools/syz-testbed/testbed.go b/tools/syz-testbed/testbed.go
index bad77c294..819a61b34 100644
--- a/tools/syz-testbed/testbed.go
+++ b/tools/syz-testbed/testbed.go
@@ -13,15 +13,12 @@ import (
 	"flag"
 	"fmt"
 	"log"
-	"net"
-	"os"
 	"path/filepath"
 	"regexp"
-	"strconv"
+	"sync"
 	"time"
 
 	"github.com/google/syzkaller/pkg/config"
-	syz_instance "github.com/google/syzkaller/pkg/instance"
 	"github.com/google/syzkaller/pkg/mgrconfig"
 	"github.com/google/syzkaller/pkg/osutil"
 	"github.com/google/syzkaller/pkg/tool"
@@ -29,44 +26,45 @@ import (
 )
 
 var (
-	flagConfig  = flag.String("config", "", "config file")
-	flagCleanup = flag.Bool("cleanup", false, "remove existing work directories")
+	flagConfig = flag.String("config", "", "config file")
 )
 
 type TestbedConfig struct {
-	Corpus        string            `json:"corpus"`         // path to the corpus file
-	Workdir       string            `json:"workdir"`        // instances will be checked out there
-	ManagerConfig json.RawMessage   `json:"manager_config"` // base manager config
-	Checkouts     []TestbedCheckout `json:"checkouts"`
+	Name          string           `json:"name"`           // name of the testbed
+	MaxInstances  int              `json:"max_instances"`  // max # of simultaneously running instances
+	RunTime       DurationConfig   `json:"run_time"`       // lifetime of an instance (default "24h")
+	Corpus        string           `json:"corpus"`         // path to the corpus file
+	Workdir       string           `json:"workdir"`        // instances will be checked out there
+	ManagerConfig json.RawMessage  `json:"manager_config"` // base manager config
+	Checkouts     []CheckoutConfig `json:"checkouts"`
 }
 
-type TestbedCheckout struct {
+type DurationConfig struct {
+	time.Duration
+}
+
+type CheckoutConfig struct {
 	Name   string `json:"name"`
 	Repo   string `json:"repo"`
 	Branch string `json:"branch"`
-	Count  int    `json:"count"`
-}
-
-type CheckoutInfo struct {
-	Path      string
-	Name      string
-	Instances []InstanceInfo
 }
 
-// The essential information about an already prepared instance.
-type InstanceInfo struct {
-	Name            string
-	Workdir         string
-	BenchFile       string
-	LogFile         string
-	HTTP            string
-	ExecCommand     string
-	ExecCommandArgs []string
+type TestbedContext struct {
+	Config         *TestbedConfig
+	ManagerConfig  *mgrconfig.Config
+	Checkouts      []*Checkout
+	NextRestart    time.Time
+	NextCheckoutID int
+	NextInstanceID int
+	statMutex      sync.Mutex
 }
 
 func main() {
 	flag.Parse()
-	cfg := &TestbedConfig{}
+	cfg := &TestbedConfig{
+		Name:    "testbed",
+		RunTime: DurationConfig{24 * time.Hour},
+	}
 	err := config.LoadFile(*flagConfig, &cfg)
 	if err != nil {
 		tool.Failf("failed to read config: %s", err)
@@ -82,150 +80,238 @@ func main() {
 		tool.Failf("failed to parse manager config: %s", err)
 	}
 	if managerCfg.HTTP == "" {
-		managerCfg.HTTP = ":50000"
+		managerCfg.HTTP = "0.0.0.0:0"
 	}
 
-	checkouts := []*CheckoutInfo{}
-	for _, co := range cfg.Checkouts {
-		checkouts = append(checkouts, newCheckout(co, cfg, managerCfg))
+	ctx := TestbedContext{
+		Config:        cfg,
+		ManagerConfig: managerCfg,
 	}
-
-	log.Printf("------------------")
-	for _, co := range checkouts {
-		for _, instance := range co.Instances {
-			go runInstance(instance)
+	for _, checkoutCfg := range cfg.Checkouts {
+		co, err := ctx.NewCheckout(&checkoutCfg)
+		if err != nil {
+			tool.Failf("checkout failed: %s", err)
 		}
+		ctx.Checkouts = append(ctx.Checkouts, co)
 	}
-	go collectStats(cfg, checkouts)
-	// Block the execution indefinitely.
-	// Either the process will be killed or it will exit itself if one of the instances fails.
-	select {}
-}
 
-func collectStats(cfg *TestbedConfig, checkouts []*CheckoutInfo) {
-	const period = 90 * time.Second
-	benchFolder := filepath.Join(cfg.Workdir, "benches")
-	err := osutil.MkdirAll(benchFolder)
-	if err != nil {
-		tool.Failf("failed to create bench folder: %s", err)
-	}
-	tableStats := map[string]func(checkouts []*CheckoutInfo) ([][]string, error){
-		"bugs.csv":           generateBugTable,
-		"checkout_stats.csv": checkoutStatsTable,
-		"instance_stats.csv": instanceStatsTable,
-	}
-	for {
-		time.Sleep(period)
-		for fileName, genFunc := range tableStats {
-			table, err := genFunc(checkouts)
-			if err == nil {
-				saveTableAsCsv(table, filepath.Join(cfg.Workdir, fileName))
+	shutdown := make(chan struct{})
+	osutil.HandleInterrupts(shutdown)
+
+	go func() {
+		const period = 90 * time.Second
+		for {
+			time.Sleep(period)
+			err := ctx.SaveStats()
+			if err != nil {
+				log.Printf("stats saving error: %s", err)
 			}
 		}
-		for _, checkout := range checkouts {
-			fileName := fmt.Sprintf("avg_%v.txt", checkout.Name)
-			saveAvgBenchFile(checkout, filepath.Join(benchFolder, fileName))
+	}()
+
+	ctx.Loop(shutdown)
+}
+
+func (ctx *TestbedContext) GetStatViews() ([]StatView, error) {
+	groupsCompleted := []RunResultGroup{}
+	groupsAll := []RunResultGroup{}
+	for _, checkout := range ctx.Checkouts {
+		running := []*RunResult{}
+		for _, instance := range checkout.Running {
+			result, err := instance.FetchResult()
+			if err != nil {
+				return nil, err
+			}
+			running = append(running, result)
 		}
+		groupsCompleted = append(groupsCompleted, RunResultGroup{
+			Name:    checkout.Name,
+			Results: checkout.Completed,
+		})
+		groupsAll = append(groupsAll, RunResultGroup{
+			Name:    checkout.Name,
+			Results: append(checkout.Completed, running...),
+		})
 	}
+	return []StatView{
+		{
+			Name:   "all",
+			Groups: groupsAll,
+		},
+		{
+			Name:   "completed",
+			Groups: groupsCompleted,
+		},
+	}, nil
 }
 
-func runInstance(info InstanceInfo) {
-	logfile, err := os.Create(info.LogFile)
+func (ctx *TestbedContext) saveStatView(view StatView) error {
+	dir := filepath.Join(ctx.Config.Workdir, "stats_"+view.Name)
+	benchDir := filepath.Join(dir, "benches")
+	err := osutil.MkdirAll(benchDir)
 	if err != nil {
-		tool.Failf("[%s] failed to create logfile: %s", info.Name, err)
+		return fmt.Errorf("failed to create %s: %s", benchDir, err)
 	}
-	cmd := osutil.GraciousCommand(info.ExecCommand, info.ExecCommandArgs...)
-	cmd.Stdout = logfile
-	cmd.Stderr = logfile
-	err = cmd.Start()
-	if err != nil {
-		tool.Failf("[%s] failed to start instance: %s", info.Name, err)
+
+	tableStats := map[string]func(view StatView) ([][]string, error){
+		"bugs.csv":           (StatView).GenerateBugTable,
+		"checkout_stats.csv": (StatView).StatsTable,
+		"instance_stats.csv": (StatView).InstanceStatsTable,
 	}
-	log.Printf("[%s] Instance started. Listening on %s", info.Name, info.HTTP)
-	logfile.Close()
-	err = cmd.Wait()
-	tool.Failf("[%s] Instance exited: %s", info.Name, err)
+
+	for fileName, genFunc := range tableStats {
+		table, err := genFunc(view)
+		if err == nil {
+			SaveTableAsCsv(table, filepath.Join(dir, fileName))
+		} else {
+			log.Printf("some error: %s", err)
+		}
+	}
+	for _, group := range view.Groups {
+		fileName := fmt.Sprintf("avg_%v.txt", group.Name)
+		group.SaveAvgBenchFile(filepath.Join(benchDir, fileName))
+	}
+	return nil
 }
 
-func newCheckout(co TestbedCheckout, cfg *TestbedConfig, managerCfg *mgrconfig.Config) *CheckoutInfo {
-	log.Printf("[%s] Checking out", co.Name)
-	path := filepath.Join(cfg.Workdir, "checkouts", co.Name)
-	if osutil.IsExist(path) {
-		if !*flagCleanup {
-			tool.Failf("path %s already exists", path)
+func (ctx *TestbedContext) saveTestbedStats(file string) error {
+	table := [][]string{
+		{"Checkout", "Running", "Completed", "Until reset"},
+	}
+	for _, checkout := range ctx.Checkouts {
+		until := "-"
+		if ctx.NextRestart.After(time.Now()) {
+			until = time.Until(ctx.NextRestart).Round(time.Second).String()
 		}
-		osutil.RemoveAll(path)
+		table = append(table, []string{
+			checkout.Name,
+			fmt.Sprintf("%d", len(checkout.Running)),
+			fmt.Sprintf("%d", len(checkout.Completed)),
+			until,
+		})
 	}
-	repo := vcs.NewSyzkallerRepo(path)
-	commit, err := repo.Poll(co.Repo, co.Branch)
+	return SaveTableAsCsv(table, file)
+}
+
+func (ctx *TestbedContext) SaveStats() error {
+	// Preventing concurrent saving of the stats.
+	ctx.statMutex.Lock()
+	defer ctx.statMutex.Unlock()
+	views, err := ctx.GetStatViews()
 	if err != nil {
-		tool.Failf("failed to checkout %s (%s): %s", co.Repo, co.Branch, err)
+		return err
 	}
-	log.Printf("[%s] Done. Latest commit: %s", co.Name, commit)
-	log.Printf("[%s] Building", co.Name)
-	if _, err := osutil.RunCmd(time.Hour, path, syz_instance.MakeBin); err != nil {
-		tool.Failf("[%s] Make failed: %s", co.Name, err)
+	for _, view := range views {
+		err := ctx.saveStatView(view)
+		if err != nil {
+			return err
+		}
 	}
-	checkoutInfo := CheckoutInfo{
-		Name: co.Name,
-		Path: path,
+	return ctx.saveTestbedStats(filepath.Join(ctx.Config.Workdir, "testbed.csv"))
+}
+
+func (ctx *TestbedContext) generateInstances(count int) ([]*Instance, error) {
+	// It seems that even gracefully finished syz-managers can leak GCE instances.
+	// To allow for that strange behavior, let's reuse syz-manager names, so that
+	// they will in turn reuse the names of the leaked GCE instances.
+	instances := []*Instance{}
+	for idx := 1; idx <= count; idx++ {
+		checkout := ctx.Checkouts[ctx.NextCheckoutID]
+		ctx.NextCheckoutID = (ctx.NextCheckoutID + 1) % len(ctx.Checkouts)
+		instance, err := ctx.NewInstance(checkout, fmt.Sprintf("%s-%d", ctx.Config.Name, idx))
+		if err != nil {
+			return nil, err
+		}
+		checkout.Running = append(checkout.Running, instance)
+		instances = append(instances, instance)
 	}
-	for i := 1; i <= co.Count; i++ {
-		name := fmt.Sprintf("%v-%d", co.Name, i)
-		log.Printf("[%s] Generating workdir", name)
-		workdir := filepath.Join(path, fmt.Sprintf("workdir_%d", i))
-		err = osutil.MkdirAll(workdir)
+	return instances, nil
+}
+
+// Create instances, run them, stop them, archive them, and so on...
+func (ctx *TestbedContext) Loop(stop chan struct{}) {
+	duration := ctx.Config.RunTime.Duration
+	mustStop := false
+	for !mustStop {
+		log.Printf("setting up instances")
+		instances, err := ctx.generateInstances(ctx.Config.MaxInstances)
 		if err != nil {
-			tool.Failf("failed to create dir %s", workdir)
+			tool.Failf("failed to set up intances: %s", err)
 		}
-		if cfg.Corpus != "" {
-			corpusPath := filepath.Join(workdir, "corpus.db")
-			err = osutil.CopyFile(cfg.Corpus, corpusPath)
-			if err != nil {
-				tool.Failf("failed to copy corpus from %s: %s", cfg.Corpus, err)
-			}
+		log.Printf("starting instances")
+		instanceStatuses := make(chan error, len(instances))
+		var wg sync.WaitGroup
+		for _, inst := range instances {
+			wg.Add(1)
+			go func(instance *Instance) {
+				instanceStatuses <- instance.Run()
+				wg.Done()
+			}(inst)
 		}
-		log.Printf("[%s] Generating syz-manager config", name)
-		managerCfg.Name = name
-		managerCfg.Workdir = workdir
-		managerCfg.Syzkaller = path
-		managerCfgPath := filepath.Join(path, fmt.Sprintf("syz_%d.cnf", i))
-		err = config.SaveFile(managerCfgPath, managerCfg)
-		if err != nil {
-			tool.Failf("failed to save manager config to %s: %s", managerCfgPath, err)
+
+		ctx.NextRestart = time.Now().Add(duration)
+		select {
+		case err := <-instanceStatuses:
+			// Syz-managers are not supposed to stop under normal circumstances.
+			// If one of them did stop, there must have been a very good reason to.
+			// For now, we just shut down the whole experiment in such a case.
+			log.Printf("an instance has failed (%s), stopping everything", err)
+			mustStop = true
+		case <-stop:
+			log.Printf("stopping the experiment")
+			mustStop = true
+		case <-time.After(duration):
+			log.Printf("run period has finished")
 		}
-		bench := filepath.Join(path, fmt.Sprintf("bench_%d.txt", i))
-		log := filepath.Join(path, fmt.Sprintf("log_%d.txt", i))
-		checkoutInfo.Instances = append(checkoutInfo.Instances, InstanceInfo{
-			Name:            managerCfg.Name,
-			Workdir:         workdir,
-			BenchFile:       bench,
-			LogFile:         log,
-			HTTP:            managerCfg.HTTP,
-			ExecCommand:     filepath.Join(path, "bin", "syz-manager"),
-			ExecCommandArgs: []string{"-config", managerCfgPath, "-bench", bench},
-		})
-		managerCfg.HTTP, err = increasePort(managerCfg.HTTP)
+
+		// Wait for all instances to finish.
+		for _, instance := range instances {
+			instance.Stop()
+		}
+		wg.Wait()
+
+		// Only mark instances completed if they've indeed been running the whole iteration.
+		if !mustStop {
+			for _, checkout := range ctx.Checkouts {
+				err = checkout.ArchiveRunning()
+				if err != nil {
+					tool.Failf("ArchiveRunning error: %s", err)
+				}
+			}
+		}
+
+		log.Printf("collecting statistics")
+		err = ctx.SaveStats()
 		if err != nil {
-			tool.Failf("failed to inrease port number: %s", err)
+			log.Printf("stats saving error: %s", err)
 		}
 	}
-	return &checkoutInfo
 }
 
-func increasePort(http string) (string, error) {
-	host, portStr, err := net.SplitHostPort(http)
-	if err != nil {
-		return "", fmt.Errorf("invalid http value: %s", http)
+func (d *DurationConfig) UnmarshalJSON(data []byte) error {
+	var v interface{}
+	if err := json.Unmarshal(data, &v); err != nil {
+		return err
 	}
-	port, err := strconv.Atoi(portStr)
-	if err != nil {
-		return "", err
+	str, ok := v.(string)
+	if !ok {
+		return fmt.Errorf("%s was expected to be a string", data)
+	}
+	parsed, err := time.ParseDuration(str)
+	if err == nil {
+		d.Duration = parsed
 	}
-	return net.JoinHostPort(host, fmt.Sprintf("%d", port+1)), nil
+	return err
+}
+func (d *DurationConfig) MarshalJSON() ([]byte, error) {
+	return json.Marshal(d.String())
 }
 
 func checkConfig(cfg *TestbedConfig) error {
+	testbedNameRe := regexp.MustCompile(`^[0-9a-z\-]{1,20}$`)
+	if !testbedNameRe.MatchString(cfg.Name) {
+		return fmt.Errorf("invalid testbed name: %v", cfg.Name)
+	}
 	if cfg.Workdir == "" {
 		return fmt.Errorf("workdir is empty")
 	}
@@ -237,8 +323,10 @@ func checkConfig(cfg *TestbedConfig) error {
 	if cfg.Corpus != "" && !osutil.IsExist(cfg.Corpus) {
 		return fmt.Errorf("corpus %v does not exist", cfg.Corpus)
 	}
+	if cfg.MaxInstances < 1 {
+		return fmt.Errorf("max_instances cannot be less than 1")
+	}
 	cfg.Corpus = osutil.Abs(cfg.Corpus)
-	instanceNameRe := regexp.MustCompile(`^[0-9a-z\-]{1,20}$`)
 	names := make(map[string]bool)
 	for idx := range cfg.Checkouts {
 		co := &cfg.Checkouts[idx]
@@ -250,17 +338,8 @@ func checkConfig(cfg *TestbedConfig) error {
 		} else if !vcs.CheckBranch(co.Branch) {
 			return fmt.Errorf("invalid branch: %s", co.Branch)
 		}
-		if co.Count < 0 {
-			return fmt.Errorf("count cannot be negative")
-		} else if co.Count == 0 {
-			// The default value.
-			co.Count = 1
-		}
-		if !instanceNameRe.MatchString(co.Name) {
-			return fmt.Errorf("invalid instance name: %v", co.Name)
-		}
 		if names[co.Name] {
-			return fmt.Errorf("duplicate instance name: %v", co.Name)
+			return fmt.Errorf("duplicate checkout name: %v", co.Name)
 		}
 		names[co.Name] = true
 	}