aboutsummaryrefslogtreecommitdiffstats
path: root/pkg
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2023-07-19 16:40:52 +0200
committerAleksandr Nogikh <nogikh@google.com>2023-07-20 08:39:10 +0000
commit7b630fdbfd1d53f913e0aff0dfa8ebfbaf86652b (patch)
tree630ca928ae59153b37314a0b9ac28d2e7da4b391 /pkg
parentd922ca7efda29b23dfb85abe37aee1641c4fbe05 (diff)
pkg/repro: tolerate two consequential run errors
Retrying once has greatly reduced the number of "failed to copy prog to VM" errors, but they still periodically pop up. The underlying problem is still not 100% known. Supposedly, if a booted VM with an instrumented kernel has to wait too long, it can just hang or crash by itself. At least on some problematic revisions. Investigation wouldbe quite time-consuming -- we need to do a complicated refactoring in order to also capture serial output for Copy() failures. So far it does not seem to be totally worth it. Let's do 3 runOnInstance() attempts. If the problem still persists, there's no point in doing more runs -- we'd have to determine the exact root cause.
Diffstat (limited to 'pkg')
-rw-r--r--pkg/repro/repro.go12
-rw-r--r--pkg/repro/repro_test.go2
2 files changed, 10 insertions, 4 deletions
diff --git a/pkg/repro/repro.go b/pkg/repro/repro.go
index b774705e6..1da74830f 100644
--- a/pkg/repro/repro.go
+++ b/pkg/repro/repro.go
@@ -527,13 +527,19 @@ func (ctx *context) testProg(p *prog.Prog, duration time.Duration, opts csource.
func (ctx *context) testWithInstance(callback func(execInterface) (rep *instance.RunResult,
err error)) (bool, error) {
- result, err := ctx.runOnInstance(callback)
- if err != nil {
+ var result *instance.RunResult
+ var err error
+
+ const attempts = 3
+ for i := 0; i < attempts; i++ {
// It's hard to classify all kinds of errors into the one worth repeating
- // and not. So let's just retry run for all errors.
+ // and not. So let's just retry runs for all errors.
// If the problem is transient, it will likely go away.
// If the problem is permanent, it will just be the same.
result, err = ctx.runOnInstance(callback)
+ if err == nil {
+ break
+ }
}
if err != nil {
return false, err
diff --git a/pkg/repro/repro_test.go b/pkg/repro/repro_test.go
index 0e58685b7..fc8d9cad0 100644
--- a/pkg/repro/repro_test.go
+++ b/pkg/repro/repro_test.go
@@ -247,7 +247,7 @@ func TestTooManyErrors(t *testing.T) {
t: t,
run: func(log []byte) (*instance.RunResult, error) {
counter++
- if counter%3 != 0 {
+ if counter%4 != 0 {
return nil, fmt.Errorf("some random error")
}
return testExecRunner(log)