aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2023-05-24 18:15:06 +0200
committerAleksandr Nogikh <wp32pw@gmail.com>2023-05-25 11:45:51 +0200
commit0d9bcfef72eef7ccb5cf2320bbed5014385b59c2 (patch)
tree90053114586019fae5d4ceaf86aa3f3d29d3f61b
parentf7e4c7256881bef31f5332d73c569a82cdd6cfbe (diff)
pkg/repro: retry on ExecProgInstance errors
Most of those errors seem to be transient, so there's no sense to fail the whole C repro generation process. Give it one more chance and only fail after that.
-rw-r--r--pkg/repro/repro.go23
-rw-r--r--pkg/repro/repro_test.go69
2 files changed, 77 insertions, 15 deletions
diff --git a/pkg/repro/repro.go b/pkg/repro/repro.go
index daebeed18..dc680cbb5 100644
--- a/pkg/repro/repro.go
+++ b/pkg/repro/repro.go
@@ -516,12 +516,14 @@ func (ctx *context) testProg(p *prog.Prog, duration time.Duration, opts csource.
func (ctx *context) testWithInstance(callback func(execInterface) (rep *instance.RunResult,
err error)) (bool, error) {
- inst := <-ctx.instances
- if inst == nil {
- return false, fmt.Errorf("all VMs failed to boot")
+ result, err := ctx.runOnInstance(callback)
+ if err != nil {
+ // It's hard to classify all kinds of errors into the one worth repeating
+ // and not. So let's just retry run for all errors.
+ // If the problem is transient, it will likely go away.
+ // If the problem is permanent, it will just be the same.
+ result, err = ctx.runOnInstance(callback)
}
- defer ctx.returnInstance(inst)
- result, err := callback(inst.execProg)
if err != nil {
return false, err
}
@@ -541,6 +543,17 @@ func (ctx *context) testWithInstance(callback func(execInterface) (rep *instance
return true, nil
}
+// A helper method for testWithInstance.
+func (ctx *context) runOnInstance(callback func(execInterface) (rep *instance.RunResult,
+ err error)) (*instance.RunResult, error) {
+ inst := <-ctx.instances
+ if inst == nil {
+ return nil, fmt.Errorf("all VMs failed to boot")
+ }
+ defer ctx.returnInstance(inst)
+ return callback(inst.execProg)
+}
+
func encodeEntries(entries []*prog.LogEntry) []byte {
buf := new(bytes.Buffer)
for _, ent := range entries {
diff --git a/pkg/repro/repro_test.go b/pkg/repro/repro_test.go
index ebe3809e0..c6c4cbe8d 100644
--- a/pkg/repro/repro_test.go
+++ b/pkg/repro/repro_test.go
@@ -4,6 +4,7 @@
package repro
import (
+ "fmt"
"math/rand"
"regexp"
"sync"
@@ -178,24 +179,53 @@ alarm(0xa)
getpid()
`
+// Only crash if `pause()` is followed by `alarm(0xa)`.
+var testCrashCondition = regexp.MustCompile(`(?s)pause\(\).*alarm\(0xa\)`)
+
+func testExecRunner(log []byte) (*instance.RunResult, error) {
+ crash := testCrashCondition.Match(log)
+ if crash {
+ ret := &instance.RunResult{}
+ ret.Report = &report.Report{
+ Title: `some crash`,
+ }
+ return ret, nil
+ }
+ return &instance.RunResult{}, nil
+}
+
// Just a pkg/repro smoke test: check that we can extract a two-call reproducer.
// No focus on error handling and minor corner cases.
func TestPlainRepro(t *testing.T) {
ctx := prepareTestCtx(t, testReproLog)
- // Only crash if `pause()` is followed by `alarm(0xa)`.
- var match = regexp.MustCompile(`(?s)pause\(\).*alarm\(0xa\)`)
+ go generateTestInstances(ctx, 3, &testExecInterface{
+ t: t,
+ run: testExecRunner,
+ })
+ result, _, err := ctx.run()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if diff := cmp.Diff(`pause()
+alarm(0xa)
+`, string(result.Prog.Serialize())); diff != "" {
+ t.Fatal(diff)
+ }
+}
+
+// There happen to be transient errors like ssh/scp connection failures.
+// Ensure that the code just retries.
+func TestVMErrorResilience(t *testing.T) {
+ ctx := prepareTestCtx(t, testReproLog)
+ fail := false
go generateTestInstances(ctx, 3, &testExecInterface{
t: t,
run: func(log []byte) (*instance.RunResult, error) {
- crash := match.Match(log)
- if crash {
- ret := &instance.RunResult{}
- ret.Report = &report.Report{
- Title: `some crash`,
- }
- return ret, nil
+ fail = !fail
+ if fail {
+ return nil, fmt.Errorf("some random error")
}
- return &instance.RunResult{}, nil
+ return testExecRunner(log)
},
})
result, _, err := ctx.run()
@@ -208,3 +238,22 @@ alarm(0xa)
t.Fatal(diff)
}
}
+
+func TestTooManyErrors(t *testing.T) {
+ ctx := prepareTestCtx(t, testReproLog)
+ counter := 0
+ go generateTestInstances(ctx, 3, &testExecInterface{
+ t: t,
+ run: func(log []byte) (*instance.RunResult, error) {
+ counter++
+ if counter%3 != 0 {
+ return nil, fmt.Errorf("some random error")
+ }
+ return testExecRunner(log)
+ },
+ })
+ _, _, err := ctx.run()
+ if err == nil {
+ t.Fatalf("expected an error")
+ }
+}