aboutsummaryrefslogtreecommitdiffstats
path: root/pkg
diff options
context:
space:
mode:
authorAndrey Konovalov <andreyknvl@google.com>2017-06-21 19:47:18 +0200
committerAndrey Konovalov <andreyknvl@google.com>2017-06-27 11:59:11 +0200
commit3349d055d7b275f6a8d33780c47f8ca5023a8377 (patch)
tree41979f987d8fa514429cc63d4b38a79e423b5b43 /pkg
parent65a34e4c2d820bb42758569a200e94e7a5c781cd (diff)
repro: bisect the log to find multiple guilty programs
Diffstat (limited to 'pkg')
-rw-r--r--pkg/repro/repro.go401
1 files changed, 321 insertions, 80 deletions
diff --git a/pkg/repro/repro.go b/pkg/repro/repro.go
index 67fac81d7..0c126cb18 100644
--- a/pkg/repro/repro.go
+++ b/pkg/repro/repro.go
@@ -4,6 +4,7 @@
package repro
import (
+ "bytes"
"fmt"
"os"
"path/filepath"
@@ -41,14 +42,6 @@ type instance struct {
executorBin string
}
-func reverseEntries(entries []*prog.LogEntry) []*prog.LogEntry {
- last := len(entries) - 1
- for i := 0; i < len(entries)/2; i++ {
- entries[i], entries[last-i] = entries[last-i], entries[i]
- }
- return entries
-}
-
func Run(crashLog []byte, cfg *mgrconfig.Config, vmPool *vm.Pool, vmIndexes []int) (*Result, error) {
if len(vmIndexes) == 0 {
return nil, fmt.Errorf("no VMs provided")
@@ -136,6 +129,46 @@ func Run(crashLog []byte, cfg *mgrconfig.Config, vmPool *vm.Pool, vmIndexes []in
return res, err
}
+func (ctx *context) repro(entries []*prog.LogEntry, crashStart int) (*Result, error) {
+ // Cut programs that were executed after crash.
+ for i, ent := range entries {
+ if ent.Start > crashStart {
+ entries = entries[:i]
+ break
+ }
+ }
+
+ res, err := ctx.reproExtractProg(entries)
+ if err != nil {
+ return res, err
+ }
+ if res == nil {
+ return nil, nil
+ }
+
+ res, err = ctx.reproMinimizeProg(res)
+ if err != nil {
+ return res, err
+ }
+
+ res, err = ctx.reproExtractC(res)
+ if err != nil {
+ return res, err
+ }
+ if !res.CRepro {
+ res.Opts.Repro = false
+ return res, nil
+ }
+
+ res, err = ctx.reproMinimizeC(res)
+ if err != nil {
+ return res, err
+ }
+
+ res.Opts.Repro = false
+ return res, nil
+}
+
func (ctx *context) reproExtractProg(entries []*prog.LogEntry) (*Result, error) {
Logf(2, "reproducing crash '%v': suspecting %v programs", ctx.crashDesc, len(entries))
@@ -154,6 +187,49 @@ func (ctx *context) reproExtractProg(entries []*prog.LogEntry) (*Result, error)
lastEntries = append(lastEntries, entries[indices[i]])
}
+ // Execute each program separately for 10 seconds, that should detect simple crashes (i.e. no races and no hangs).
+ // Programs are executed in reverse order, usually the last program is the guilty one.
+ res, err := ctx.reproExtractProgSingle(reverseEntries(lastEntries), 10*time.Second)
+ if err != nil {
+ return res, err
+ }
+ if res != nil {
+ return res, nil
+ }
+
+ // Execute all programs and bisect the log to find guilty programs.
+ res, err = ctx.reproExtractProgBisect(reverseEntries(entries), 10*time.Second)
+ if err != nil {
+ return res, err
+ }
+ if res != nil {
+ return res, nil
+ }
+
+ // Execute each program separately for 5 minutes to catch races and hangs. Note that the max duration must be larger
+ // than hang/no output detection duration in vm.MonitorExecution, which is currently set to 3 mins.
+ res, err = ctx.reproExtractProgSingle(reverseEntries(lastEntries), 5*time.Minute)
+ if err != nil {
+ return res, err
+ }
+ if res != nil {
+ return res, nil
+ }
+
+ // Execute all programs and bisect the log with 5 minute timeout.
+ res, err = ctx.reproExtractProgBisect(reverseEntries(entries), 5*time.Minute)
+ if err != nil {
+ return res, err
+ }
+ if res != nil {
+ return res, nil
+ }
+
+ Logf(0, "reproducing crash '%v': no program crashed", ctx.crashDesc)
+ return nil, nil
+}
+
+func (ctx *context) reproExtractProgSingle(entries []*prog.LogEntry, duration time.Duration) (*Result, error) {
opts := csource.Options{
Threaded: true,
Collide: true,
@@ -168,46 +244,207 @@ func (ctx *context) reproExtractProg(entries []*prog.LogEntry) (*Result, error)
Repro: true,
}
- // Execute the suspected programs.
- // We first try to execute each program for 10 seconds, that should detect simple crashes
- // (i.e. no races and no hangs). Then we execute each program for 5 minutes
- // to catch races and hangs. Note that the max duration must be larger than
- // hang/no output detection duration in vm.MonitorExecution, which is currently set to 3 mins.
- // Programs are executed in reverse order, usually the last program is the guilty one.
- durations := []time.Duration{10 * time.Second, 5 * time.Minute}
- suspected := [][]*prog.LogEntry{reverseEntries(entries), reverseEntries(lastEntries)}
- var res *Result
- for i, dur := range durations {
- for _, ent := range suspected[i] {
- opts.Fault = ent.Fault
- opts.FaultCall = ent.FaultCall
- opts.FaultNth = ent.FaultNth
- if opts.FaultCall < 0 || opts.FaultCall >= len(ent.P.Calls) {
- opts.FaultCall = len(ent.P.Calls) - 1
+ for _, ent := range entries {
+ opts.Fault = ent.Fault
+ opts.FaultCall = ent.FaultCall
+ opts.FaultNth = ent.FaultNth
+ if opts.FaultCall < 0 || opts.FaultCall >= len(ent.P.Calls) {
+ opts.FaultCall = len(ent.P.Calls) - 1
+ }
+ crashed, err := ctx.testProg(ent.P, duration, opts)
+ if err != nil {
+ return nil, err
+ }
+ if crashed {
+ res := &Result{
+ Prog: ent.P,
+ Duration: duration * 3 / 2,
+ Opts: opts,
+ }
+ return res, nil
+ }
+ }
+
+ return nil, nil
+}
+
+func (ctx *context) reproExtractProgBisect(entries []*prog.LogEntry, baseDuration time.Duration) (*Result, error) {
+ Logf(3, "reproducing crash '%v': bisect: bisecting %d programs", ctx.crashDesc, len(entries))
+
+ opts := csource.Options{
+ Threaded: true,
+ Collide: true,
+ Repeat: true,
+ Procs: ctx.cfg.Procs,
+ Sandbox: ctx.cfg.Sandbox,
+ EnableTun: true,
+ UseTmpDir: true,
+ HandleSegv: true,
+ WaitRepeat: true,
+ Debug: true,
+ Repro: true,
+ }
+
+ duration := func(entries int) time.Duration {
+ return baseDuration + time.Duration((entries/4))*time.Second
+ }
+
+ // Check that executing the whole log results in a crash.
+ Logf(3, "reproducing crash '%v': bisect: executing all %d programs", ctx.crashDesc, len(entries))
+ crashed, err := ctx.testProgs(entries, duration(len(entries)), opts)
+ if err != nil {
+ return nil, err
+ }
+ if !crashed {
+ Logf(3, "reproducing crash '%v': bisect: didn't crash", ctx.crashDesc)
+ return nil, nil
+ }
+
+ compose := func(guilty1, guilty2 [][]*prog.LogEntry, chunk []*prog.LogEntry) []*prog.LogEntry {
+ progs := []*prog.LogEntry{}
+ for _, c := range guilty1 {
+ progs = append(progs, c...)
+ }
+ progs = append(progs, chunk...)
+ for _, c := range guilty2 {
+ progs = append(progs, c...)
+ }
+ return progs
+ }
+
+ logGuilty := func(guilty [][]*prog.LogEntry) string {
+ log := "["
+ for i, chunk := range guilty {
+ log += fmt.Sprintf("<%d>", len(chunk))
+ if i != len(guilty)-1 {
+ log += ", "
}
- crashed, err := ctx.testProg(ent.P, dur, opts)
+ }
+ log += "]"
+ return log
+ }
+
+ // Bisect the programs to find the ones that cause the crash.
+ guilty := [][]*prog.LogEntry{entries}
+again:
+ Logf(3, "reproducing crash '%v': bisect: guilty chunks: %v", ctx.crashDesc, logGuilty(guilty))
+ for i, chunk := range guilty {
+ if len(chunk) == 1 {
+ continue
+ }
+
+ guilty1 := guilty[:i]
+ guilty2 := guilty[i+1:]
+ Logf(3, "reproducing crash '%v': bisect: guilty chunks split: %v, <%v>, %v", ctx.crashDesc, logGuilty(guilty1), len(chunk), logGuilty(guilty2))
+
+ chunk1 := chunk[0 : len(chunk)/2]
+ chunk2 := chunk[len(chunk)/2 : len(chunk)]
+ Logf(3, "reproducing crash '%v': bisect: chunk split: <%v> => <%v>, <%v>", ctx.crashDesc, len(chunk), len(chunk1), len(chunk2))
+
+ Logf(3, "reproducing crash '%v': bisect: triggering crash without chunk #1", ctx.crashDesc)
+ progs := compose(guilty1, guilty2, chunk2)
+ crashed, err := ctx.testProgs(progs, duration(len(progs)), opts)
+ if err != nil {
+ return nil, err
+ }
+
+ if crashed {
+ guilty = nil
+ guilty = append(guilty, guilty1...)
+ guilty = append(guilty, chunk2)
+ guilty = append(guilty, guilty2...)
+ Logf(3, "reproducing crash '%v': bisect: crashed, chunk #1 evicted", ctx.crashDesc)
+ goto again
+ }
+
+ Logf(3, "reproducing crash '%v': bisect: triggering crash without chunk #2", ctx.crashDesc)
+ progs = compose(guilty1, guilty2, chunk1)
+ crashed, err = ctx.testProgs(progs, duration(len(progs)), opts)
+ if err != nil {
+ return nil, err
+ }
+
+ if crashed {
+ guilty = nil
+ guilty = append(guilty, guilty1...)
+ guilty = append(guilty, chunk1)
+ guilty = append(guilty, guilty2...)
+ Logf(3, "reproducing crash '%v': bisect: crashed, chunk #2 evicted", ctx.crashDesc)
+ goto again
+ }
+
+ guilty = nil
+ guilty = append(guilty, guilty1...)
+ guilty = append(guilty, chunk1)
+ guilty = append(guilty, chunk2)
+ guilty = append(guilty, guilty2...)
+
+ Logf(3, "reproducing crash '%v': bisect: not crashed, both chunks required", ctx.crashDesc)
+
+ goto again
+ }
+
+ // Concatenate all programs into one.
+ entries = []*prog.LogEntry{}
+ var prog prog.Prog
+ for _, chunk := range guilty {
+ if len(chunk) != 1 {
+ return nil, fmt.Errorf("bad bisect result: %v", guilty)
+ }
+ entries = append(entries, chunk[0])
+ prog.Calls = append(prog.Calls, chunk[0].P.Calls...)
+ }
+
+ // TODO: Minimize each program before concatenation.
+ // TODO: Return multiple programs if concatenation fails.
+
+ Logf(3, "reproducing crash '%v': bisect: %d programs left:\n\n%s\n", ctx.crashDesc, len(entries), encodeEntries(entries))
+ Logf(3, "reproducing crash '%v': bisect: concatenating", ctx.crashDesc)
+
+ // Execute the program without fault injection.
+ dur := duration(len(entries)) * 3 / 2
+ crashed, err = ctx.testProg(&prog, dur, opts)
+ if err != nil {
+ return nil, err
+ }
+ if crashed {
+ res := &Result{
+ Prog: &prog,
+ Duration: dur,
+ Opts: opts,
+ }
+ Logf(3, "reproducing crash '%v': bisect: concatenation succeded", ctx.crashDesc)
+ return res, nil
+ }
+
+ // Try with fault injection.
+ calls := 0
+ for _, entry := range entries {
+ if entry.Fault {
+ opts.FaultCall = calls + entry.FaultCall
+ opts.FaultNth = entry.FaultNth
+ if entry.FaultCall < 0 || entry.FaultCall >= len(entry.P.Calls) {
+ opts.FaultCall = calls + len(entry.P.Calls) - 1
+ }
+ crashed, err := ctx.testProg(&prog, dur, opts)
if err != nil {
return nil, err
}
if crashed {
- res = &Result{
- Prog: ent.P,
- Duration: dur * 3 / 2,
+ res := &Result{
+ Prog: &prog,
+ Duration: dur,
Opts: opts,
}
- break
+ Logf(3, "reproducing crash '%v': bisect: concatenation succeded with fault injection", ctx.crashDesc)
+ return res, nil
}
}
- if res != nil {
- break
- }
- }
- if res == nil {
- Logf(0, "reproducing crash '%v': no program crashed", ctx.crashDesc)
- return nil, nil
+ calls += len(entry.P.Calls)
}
- return res, nil
+ Logf(3, "reproducing crash '%v': bisect: concatenation failed", ctx.crashDesc)
+ return nil, nil
}
func (ctx *context) reproMinimizeProg(res *Result) (*Result, error) {
@@ -221,7 +458,7 @@ func (ctx *context) reproMinimizeProg(res *Result) (*Result, error) {
res.Prog, res.Opts.FaultCall = prog.Minimize(res.Prog, call, func(p1 *prog.Prog, callIndex int) bool {
crashed, err := ctx.testProg(p1, res.Duration, res.Opts)
if err != nil {
- Logf(1, "reproducing crash '%v': minimization failed with %v", ctx.crashDesc, err)
+ Logf(0, "reproducing crash '%v': minimization failed with %v", ctx.crashDesc, err)
return false
}
return crashed
@@ -368,54 +605,27 @@ func (ctx *context) reproMinimizeC(res *Result) (*Result, error) {
return res, nil
}
-func (ctx *context) repro(entries []*prog.LogEntry, crashStart int) (*Result, error) {
- // Cut programs that were executed after crash.
- for i, ent := range entries {
- if ent.Start > crashStart {
- entries = entries[:i]
- break
- }
- }
-
- res, err := ctx.reproExtractProg(entries)
- if err != nil {
- return res, err
- }
- if res == nil {
- return nil, nil
- }
-
- res, err = ctx.reproMinimizeProg(res)
- if err != nil {
- return res, err
- }
-
- res, err = ctx.reproExtractC(res)
- if err != nil {
- return res, err
- }
- if !res.CRepro {
- res.Opts.Repro = false
- return res, nil
- }
-
- res, err = ctx.reproMinimizeC(res)
- if err != nil {
- return res, err
+func (ctx *context) testProg(p *prog.Prog, duration time.Duration, opts csource.Options) (crashed bool, err error) {
+ entry := prog.LogEntry{P: p}
+ if opts.FaultCall != -1 {
+ entry.Fault = true
+ entry.FaultCall = opts.FaultCall
+ entry.FaultNth = opts.FaultNth
}
-
- res.Opts.Repro = false
- return res, nil
+ return ctx.testProgs([]*prog.LogEntry{&entry}, duration, opts)
}
-func (ctx *context) testProg(p *prog.Prog, duration time.Duration, opts csource.Options) (crashed bool, err error) {
+func (ctx *context) testProgs(entries []*prog.LogEntry, duration time.Duration, opts csource.Options) (crashed bool, err error) {
inst := <-ctx.instances
if inst == nil {
return false, fmt.Errorf("all VMs failed to boot")
}
defer ctx.returnInstance(inst)
+ if len(entries) == 0 {
+ return false, fmt.Errorf("no programs to execute")
+ }
- pstr := p.Serialize()
+ pstr := encodeEntries(entries)
progFile, err := fileutil.WriteTempFile(pstr)
if err != nil {
return false, err
@@ -433,10 +643,21 @@ func (ctx *context) testProg(p *prog.Prog, duration time.Duration, opts csource.
if !opts.Fault {
opts.FaultCall = -1
}
- command := fmt.Sprintf("%v -executor %v -cover=0 -procs=%v -repeat=%v -sandbox %v -threaded=%v -collide=%v -fault_call=%v -fault_nth=%v %v",
- inst.execprogBin, inst.executorBin, opts.Procs, repeat, opts.Sandbox, opts.Threaded, opts.Collide, opts.FaultCall, opts.FaultNth, vmProgFile)
+ program := entries[0].P.String()
+ if len(entries) > 1 {
+ program = "["
+ for i, entry := range entries {
+ program += fmt.Sprintf("%v", len(entry.P.Calls))
+ if i != len(entries)-1 {
+ program += ", "
+ }
+ }
+ program += "]"
+ }
+ command := fmt.Sprintf("%v -executor %v -cover=0 -procs=%v -repeat=%v -sandbox %v -threaded=%v -collide=%v %v",
+ inst.execprogBin, inst.executorBin, opts.Procs, repeat, opts.Sandbox, opts.Threaded, opts.Collide, vmProgFile)
Logf(2, "reproducing crash '%v': testing program (duration=%v, %+v): %s",
- ctx.crashDesc, duration, opts, p)
+ ctx.crashDesc, duration, opts, program)
return ctx.testImpl(inst.Instance, command, duration)
}
@@ -496,3 +717,23 @@ func (ctx *context) returnInstance(inst *instance) {
ctx.bootRequests <- inst.index
inst.Close()
}
+
+func reverseEntries(entries []*prog.LogEntry) []*prog.LogEntry {
+ last := len(entries) - 1
+ for i := 0; i < len(entries)/2; i++ {
+ entries[i], entries[last-i] = entries[last-i], entries[i]
+ }
+ return entries
+}
+
+func encodeEntries(entries []*prog.LogEntry) []byte {
+ buf := new(bytes.Buffer)
+ for _, ent := range entries {
+ opts := ""
+ if ent.Fault {
+ opts = fmt.Sprintf(" (fault-call:%v fault-nth:%v)", ent.FaultCall, ent.FaultNth)
+ }
+ fmt.Fprintf(buf, "executing program %v%v:\n%v", ent.Proc, opts, string(ent.P.Serialize()))
+ }
+ return buf.Bytes()
+}