diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2023-10-09 13:32:41 +0200 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2023-10-10 10:20:29 +0000 |
| commit | 83165b574421e7d45d7a64c46c22718dbfee2787 (patch) | |
| tree | 8d9989b7d35e75af6df0ba4080fe21d521733764 /vm | |
| parent | 20b77f8a879c95d91df126a37dd54b3215b5f79e (diff) | |
vm/gce: retry GCE init
We've been seeing an increase in "failed to init gce" errors on syzbot.
These problems seem totally transient, so let's address it by retrying
the initialization instead of aborting syz-manager's execution.
Diffstat (limited to 'vm')
| -rw-r--r-- | vm/gce/gce.go | 29 |
1 files changed, 27 insertions, 2 deletions
diff --git a/vm/gce/gce.go b/vm/gce/gce.go index 84714769b..0bac4c152 100644 --- a/vm/gce/gce.go +++ b/vm/gce/gce.go @@ -107,10 +107,11 @@ func Ctor(env *vmimpl.Env, consoleReadCmd string) (*Pool, error) { return nil, fmt.Errorf("both image and gce_image are specified") } - GCE, err := gce.NewContext(cfg.ZoneID) + GCE, err := initGCE(cfg.ZoneID) if err != nil { - return nil, fmt.Errorf("failed to init gce: %w", err) + return nil, err } + log.Logf(0, "GCE initialized: running on %v, internal IP %v, project %v, zone %v, net %v/%v", GCE.Instance, GCE.InternalIP, GCE.ProjectID, GCE.ZoneID, GCE.Network, GCE.Subnetwork) @@ -138,6 +139,30 @@ func Ctor(env *vmimpl.Env, consoleReadCmd string) (*Pool, error) { return pool, nil } +func initGCE(zoneID string) (*gce.Context, error) { + // There happen some transient GCE init errors on and off. + // Let's try it several times before aborting. + const ( + gceInitAttempts = 3 + gceInitBackoff = 5 * time.Second + ) + var ( + GCE *gce.Context + err error + ) + for i := 1; i <= gceInitAttempts; i++ { + if i > 1 { + time.Sleep(gceInitBackoff) + } + GCE, err = gce.NewContext(zoneID) + if err == nil { + return GCE, nil + } + log.Logf(0, "init GCE attempt %d/%d failed: %v", i, gceInitAttempts, err) + } + return nil, fmt.Errorf("all attempts to init GCE failed: %w", err) +} + func (pool *Pool) Count() int { return pool.cfg.Count } |
