diff options
| author | Andrey Konovalov <andreyknvl@google.com> | 2017-06-21 19:47:18 +0200 |
|---|---|---|
| committer | Andrey Konovalov <andreyknvl@google.com> | 2017-06-27 11:59:11 +0200 |
| commit | 3349d055d7b275f6a8d33780c47f8ca5023a8377 (patch) | |
| tree | 41979f987d8fa514429cc63d4b38a79e423b5b43 /pkg | |
| parent | 65a34e4c2d820bb42758569a200e94e7a5c781cd (diff) | |
repro: bisect the log to find multiple guilty programs
Diffstat (limited to 'pkg')
| -rw-r--r-- | pkg/repro/repro.go | 401 |
1 files changed, 321 insertions, 80 deletions
diff --git a/pkg/repro/repro.go b/pkg/repro/repro.go index 67fac81d7..0c126cb18 100644 --- a/pkg/repro/repro.go +++ b/pkg/repro/repro.go @@ -4,6 +4,7 @@ package repro import ( + "bytes" "fmt" "os" "path/filepath" @@ -41,14 +42,6 @@ type instance struct { executorBin string } -func reverseEntries(entries []*prog.LogEntry) []*prog.LogEntry { - last := len(entries) - 1 - for i := 0; i < len(entries)/2; i++ { - entries[i], entries[last-i] = entries[last-i], entries[i] - } - return entries -} - func Run(crashLog []byte, cfg *mgrconfig.Config, vmPool *vm.Pool, vmIndexes []int) (*Result, error) { if len(vmIndexes) == 0 { return nil, fmt.Errorf("no VMs provided") @@ -136,6 +129,46 @@ func Run(crashLog []byte, cfg *mgrconfig.Config, vmPool *vm.Pool, vmIndexes []in return res, err } +func (ctx *context) repro(entries []*prog.LogEntry, crashStart int) (*Result, error) { + // Cut programs that were executed after crash. + for i, ent := range entries { + if ent.Start > crashStart { + entries = entries[:i] + break + } + } + + res, err := ctx.reproExtractProg(entries) + if err != nil { + return res, err + } + if res == nil { + return nil, nil + } + + res, err = ctx.reproMinimizeProg(res) + if err != nil { + return res, err + } + + res, err = ctx.reproExtractC(res) + if err != nil { + return res, err + } + if !res.CRepro { + res.Opts.Repro = false + return res, nil + } + + res, err = ctx.reproMinimizeC(res) + if err != nil { + return res, err + } + + res.Opts.Repro = false + return res, nil +} + func (ctx *context) reproExtractProg(entries []*prog.LogEntry) (*Result, error) { Logf(2, "reproducing crash '%v': suspecting %v programs", ctx.crashDesc, len(entries)) @@ -154,6 +187,49 @@ func (ctx *context) reproExtractProg(entries []*prog.LogEntry) (*Result, error) lastEntries = append(lastEntries, entries[indices[i]]) } + // Execute each program separately for 10 seconds, that should detect simple crashes (i.e. no races and no hangs). + // Programs are executed in reverse order, usually the last program is the guilty one. + res, err := ctx.reproExtractProgSingle(reverseEntries(lastEntries), 10*time.Second) + if err != nil { + return res, err + } + if res != nil { + return res, nil + } + + // Execute all programs and bisect the log to find guilty programs. + res, err = ctx.reproExtractProgBisect(reverseEntries(entries), 10*time.Second) + if err != nil { + return res, err + } + if res != nil { + return res, nil + } + + // Execute each program separately for 5 minutes to catch races and hangs. Note that the max duration must be larger + // than hang/no output detection duration in vm.MonitorExecution, which is currently set to 3 mins. + res, err = ctx.reproExtractProgSingle(reverseEntries(lastEntries), 5*time.Minute) + if err != nil { + return res, err + } + if res != nil { + return res, nil + } + + // Execute all programs and bisect the log with 5 minute timeout. + res, err = ctx.reproExtractProgBisect(reverseEntries(entries), 5*time.Minute) + if err != nil { + return res, err + } + if res != nil { + return res, nil + } + + Logf(0, "reproducing crash '%v': no program crashed", ctx.crashDesc) + return nil, nil +} + +func (ctx *context) reproExtractProgSingle(entries []*prog.LogEntry, duration time.Duration) (*Result, error) { opts := csource.Options{ Threaded: true, Collide: true, @@ -168,46 +244,207 @@ func (ctx *context) reproExtractProg(entries []*prog.LogEntry) (*Result, error) Repro: true, } - // Execute the suspected programs. - // We first try to execute each program for 10 seconds, that should detect simple crashes - // (i.e. no races and no hangs). Then we execute each program for 5 minutes - // to catch races and hangs. Note that the max duration must be larger than - // hang/no output detection duration in vm.MonitorExecution, which is currently set to 3 mins. - // Programs are executed in reverse order, usually the last program is the guilty one. - durations := []time.Duration{10 * time.Second, 5 * time.Minute} - suspected := [][]*prog.LogEntry{reverseEntries(entries), reverseEntries(lastEntries)} - var res *Result - for i, dur := range durations { - for _, ent := range suspected[i] { - opts.Fault = ent.Fault - opts.FaultCall = ent.FaultCall - opts.FaultNth = ent.FaultNth - if opts.FaultCall < 0 || opts.FaultCall >= len(ent.P.Calls) { - opts.FaultCall = len(ent.P.Calls) - 1 + for _, ent := range entries { + opts.Fault = ent.Fault + opts.FaultCall = ent.FaultCall + opts.FaultNth = ent.FaultNth + if opts.FaultCall < 0 || opts.FaultCall >= len(ent.P.Calls) { + opts.FaultCall = len(ent.P.Calls) - 1 + } + crashed, err := ctx.testProg(ent.P, duration, opts) + if err != nil { + return nil, err + } + if crashed { + res := &Result{ + Prog: ent.P, + Duration: duration * 3 / 2, + Opts: opts, + } + return res, nil + } + } + + return nil, nil +} + +func (ctx *context) reproExtractProgBisect(entries []*prog.LogEntry, baseDuration time.Duration) (*Result, error) { + Logf(3, "reproducing crash '%v': bisect: bisecting %d programs", ctx.crashDesc, len(entries)) + + opts := csource.Options{ + Threaded: true, + Collide: true, + Repeat: true, + Procs: ctx.cfg.Procs, + Sandbox: ctx.cfg.Sandbox, + EnableTun: true, + UseTmpDir: true, + HandleSegv: true, + WaitRepeat: true, + Debug: true, + Repro: true, + } + + duration := func(entries int) time.Duration { + return baseDuration + time.Duration((entries/4))*time.Second + } + + // Check that executing the whole log results in a crash. + Logf(3, "reproducing crash '%v': bisect: executing all %d programs", ctx.crashDesc, len(entries)) + crashed, err := ctx.testProgs(entries, duration(len(entries)), opts) + if err != nil { + return nil, err + } + if !crashed { + Logf(3, "reproducing crash '%v': bisect: didn't crash", ctx.crashDesc) + return nil, nil + } + + compose := func(guilty1, guilty2 [][]*prog.LogEntry, chunk []*prog.LogEntry) []*prog.LogEntry { + progs := []*prog.LogEntry{} + for _, c := range guilty1 { + progs = append(progs, c...) + } + progs = append(progs, chunk...) + for _, c := range guilty2 { + progs = append(progs, c...) + } + return progs + } + + logGuilty := func(guilty [][]*prog.LogEntry) string { + log := "[" + for i, chunk := range guilty { + log += fmt.Sprintf("<%d>", len(chunk)) + if i != len(guilty)-1 { + log += ", " } - crashed, err := ctx.testProg(ent.P, dur, opts) + } + log += "]" + return log + } + + // Bisect the programs to find the ones that cause the crash. + guilty := [][]*prog.LogEntry{entries} +again: + Logf(3, "reproducing crash '%v': bisect: guilty chunks: %v", ctx.crashDesc, logGuilty(guilty)) + for i, chunk := range guilty { + if len(chunk) == 1 { + continue + } + + guilty1 := guilty[:i] + guilty2 := guilty[i+1:] + Logf(3, "reproducing crash '%v': bisect: guilty chunks split: %v, <%v>, %v", ctx.crashDesc, logGuilty(guilty1), len(chunk), logGuilty(guilty2)) + + chunk1 := chunk[0 : len(chunk)/2] + chunk2 := chunk[len(chunk)/2 : len(chunk)] + Logf(3, "reproducing crash '%v': bisect: chunk split: <%v> => <%v>, <%v>", ctx.crashDesc, len(chunk), len(chunk1), len(chunk2)) + + Logf(3, "reproducing crash '%v': bisect: triggering crash without chunk #1", ctx.crashDesc) + progs := compose(guilty1, guilty2, chunk2) + crashed, err := ctx.testProgs(progs, duration(len(progs)), opts) + if err != nil { + return nil, err + } + + if crashed { + guilty = nil + guilty = append(guilty, guilty1...) + guilty = append(guilty, chunk2) + guilty = append(guilty, guilty2...) + Logf(3, "reproducing crash '%v': bisect: crashed, chunk #1 evicted", ctx.crashDesc) + goto again + } + + Logf(3, "reproducing crash '%v': bisect: triggering crash without chunk #2", ctx.crashDesc) + progs = compose(guilty1, guilty2, chunk1) + crashed, err = ctx.testProgs(progs, duration(len(progs)), opts) + if err != nil { + return nil, err + } + + if crashed { + guilty = nil + guilty = append(guilty, guilty1...) + guilty = append(guilty, chunk1) + guilty = append(guilty, guilty2...) + Logf(3, "reproducing crash '%v': bisect: crashed, chunk #2 evicted", ctx.crashDesc) + goto again + } + + guilty = nil + guilty = append(guilty, guilty1...) + guilty = append(guilty, chunk1) + guilty = append(guilty, chunk2) + guilty = append(guilty, guilty2...) + + Logf(3, "reproducing crash '%v': bisect: not crashed, both chunks required", ctx.crashDesc) + + goto again + } + + // Concatenate all programs into one. + entries = []*prog.LogEntry{} + var prog prog.Prog + for _, chunk := range guilty { + if len(chunk) != 1 { + return nil, fmt.Errorf("bad bisect result: %v", guilty) + } + entries = append(entries, chunk[0]) + prog.Calls = append(prog.Calls, chunk[0].P.Calls...) + } + + // TODO: Minimize each program before concatenation. + // TODO: Return multiple programs if concatenation fails. + + Logf(3, "reproducing crash '%v': bisect: %d programs left:\n\n%s\n", ctx.crashDesc, len(entries), encodeEntries(entries)) + Logf(3, "reproducing crash '%v': bisect: concatenating", ctx.crashDesc) + + // Execute the program without fault injection. + dur := duration(len(entries)) * 3 / 2 + crashed, err = ctx.testProg(&prog, dur, opts) + if err != nil { + return nil, err + } + if crashed { + res := &Result{ + Prog: &prog, + Duration: dur, + Opts: opts, + } + Logf(3, "reproducing crash '%v': bisect: concatenation succeded", ctx.crashDesc) + return res, nil + } + + // Try with fault injection. + calls := 0 + for _, entry := range entries { + if entry.Fault { + opts.FaultCall = calls + entry.FaultCall + opts.FaultNth = entry.FaultNth + if entry.FaultCall < 0 || entry.FaultCall >= len(entry.P.Calls) { + opts.FaultCall = calls + len(entry.P.Calls) - 1 + } + crashed, err := ctx.testProg(&prog, dur, opts) if err != nil { return nil, err } if crashed { - res = &Result{ - Prog: ent.P, - Duration: dur * 3 / 2, + res := &Result{ + Prog: &prog, + Duration: dur, Opts: opts, } - break + Logf(3, "reproducing crash '%v': bisect: concatenation succeded with fault injection", ctx.crashDesc) + return res, nil } } - if res != nil { - break - } - } - if res == nil { - Logf(0, "reproducing crash '%v': no program crashed", ctx.crashDesc) - return nil, nil + calls += len(entry.P.Calls) } - return res, nil + Logf(3, "reproducing crash '%v': bisect: concatenation failed", ctx.crashDesc) + return nil, nil } func (ctx *context) reproMinimizeProg(res *Result) (*Result, error) { @@ -221,7 +458,7 @@ func (ctx *context) reproMinimizeProg(res *Result) (*Result, error) { res.Prog, res.Opts.FaultCall = prog.Minimize(res.Prog, call, func(p1 *prog.Prog, callIndex int) bool { crashed, err := ctx.testProg(p1, res.Duration, res.Opts) if err != nil { - Logf(1, "reproducing crash '%v': minimization failed with %v", ctx.crashDesc, err) + Logf(0, "reproducing crash '%v': minimization failed with %v", ctx.crashDesc, err) return false } return crashed @@ -368,54 +605,27 @@ func (ctx *context) reproMinimizeC(res *Result) (*Result, error) { return res, nil } -func (ctx *context) repro(entries []*prog.LogEntry, crashStart int) (*Result, error) { - // Cut programs that were executed after crash. - for i, ent := range entries { - if ent.Start > crashStart { - entries = entries[:i] - break - } - } - - res, err := ctx.reproExtractProg(entries) - if err != nil { - return res, err - } - if res == nil { - return nil, nil - } - - res, err = ctx.reproMinimizeProg(res) - if err != nil { - return res, err - } - - res, err = ctx.reproExtractC(res) - if err != nil { - return res, err - } - if !res.CRepro { - res.Opts.Repro = false - return res, nil - } - - res, err = ctx.reproMinimizeC(res) - if err != nil { - return res, err +func (ctx *context) testProg(p *prog.Prog, duration time.Duration, opts csource.Options) (crashed bool, err error) { + entry := prog.LogEntry{P: p} + if opts.FaultCall != -1 { + entry.Fault = true + entry.FaultCall = opts.FaultCall + entry.FaultNth = opts.FaultNth } - - res.Opts.Repro = false - return res, nil + return ctx.testProgs([]*prog.LogEntry{&entry}, duration, opts) } -func (ctx *context) testProg(p *prog.Prog, duration time.Duration, opts csource.Options) (crashed bool, err error) { +func (ctx *context) testProgs(entries []*prog.LogEntry, duration time.Duration, opts csource.Options) (crashed bool, err error) { inst := <-ctx.instances if inst == nil { return false, fmt.Errorf("all VMs failed to boot") } defer ctx.returnInstance(inst) + if len(entries) == 0 { + return false, fmt.Errorf("no programs to execute") + } - pstr := p.Serialize() + pstr := encodeEntries(entries) progFile, err := fileutil.WriteTempFile(pstr) if err != nil { return false, err @@ -433,10 +643,21 @@ func (ctx *context) testProg(p *prog.Prog, duration time.Duration, opts csource. if !opts.Fault { opts.FaultCall = -1 } - command := fmt.Sprintf("%v -executor %v -cover=0 -procs=%v -repeat=%v -sandbox %v -threaded=%v -collide=%v -fault_call=%v -fault_nth=%v %v", - inst.execprogBin, inst.executorBin, opts.Procs, repeat, opts.Sandbox, opts.Threaded, opts.Collide, opts.FaultCall, opts.FaultNth, vmProgFile) + program := entries[0].P.String() + if len(entries) > 1 { + program = "[" + for i, entry := range entries { + program += fmt.Sprintf("%v", len(entry.P.Calls)) + if i != len(entries)-1 { + program += ", " + } + } + program += "]" + } + command := fmt.Sprintf("%v -executor %v -cover=0 -procs=%v -repeat=%v -sandbox %v -threaded=%v -collide=%v %v", + inst.execprogBin, inst.executorBin, opts.Procs, repeat, opts.Sandbox, opts.Threaded, opts.Collide, vmProgFile) Logf(2, "reproducing crash '%v': testing program (duration=%v, %+v): %s", - ctx.crashDesc, duration, opts, p) + ctx.crashDesc, duration, opts, program) return ctx.testImpl(inst.Instance, command, duration) } @@ -496,3 +717,23 @@ func (ctx *context) returnInstance(inst *instance) { ctx.bootRequests <- inst.index inst.Close() } + +func reverseEntries(entries []*prog.LogEntry) []*prog.LogEntry { + last := len(entries) - 1 + for i := 0; i < len(entries)/2; i++ { + entries[i], entries[last-i] = entries[last-i], entries[i] + } + return entries +} + +func encodeEntries(entries []*prog.LogEntry) []byte { + buf := new(bytes.Buffer) + for _, ent := range entries { + opts := "" + if ent.Fault { + opts = fmt.Sprintf(" (fault-call:%v fault-nth:%v)", ent.FaultCall, ent.FaultNth) + } + fmt.Fprintf(buf, "executing program %v%v:\n%v", ent.Proc, opts, string(ent.P.Serialize())) + } + return buf.Bytes() +} |
