diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2025-04-22 17:24:38 +0200 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2025-04-23 09:01:11 +0000 |
| commit | d00e39af8a96f7fe18fa5664936a7d658f80eeef (patch) | |
| tree | 01ed97e71880c4ad070ab95b5df6c0f36470c87f | |
| parent | 6eb12242337d83872574660e27cfe2e8db463e48 (diff) | |
pkg/manager: better handle the reproduction queue
As pingQueue holds maximum 1 element, it's not a very good indicator of
whether there are more elements in the reproduction queue. If the first
few crashes happen to be no longer needed (which is quite likely when
reproduction loop is started long after we have begun to collect the
crashes), the loop becomes reactive - new reproductions will be started
only once new bugs are submitted to the queue.
Fix it by polling the queue until it returns nil. Add a test that
exposes the previously existing bug.
| -rw-r--r-- | pkg/manager/repro.go | 6 | ||||
| -rw-r--r-- | pkg/manager/repro_test.go | 45 |
2 files changed, 45 insertions, 6 deletions
diff --git a/pkg/manager/repro.go b/pkg/manager/repro.go index f9fb2d0e2..28b676752 100644 --- a/pkg/manager/repro.go +++ b/pkg/manager/repro.go @@ -188,11 +188,15 @@ func (r *ReproLoop) Loop(ctx context.Context) { for { if crash != nil && !r.mgr.NeedRepro(crash) { log.Logf(1, "reproduction of %q aborted: it's no longer needed", crash.FullTitle()) - crash = nil // Now we might not need that many VMs. r.mu.Lock() r.adjustPoolSizeLocked() r.mu.Unlock() + + // Immediately check if there was any other crash in the queue, so that we fall back + // to waiting on pingQueue only if there were really no other crashes in the queue. + crash = r.popCrash() + continue } if crash != nil { break diff --git a/pkg/manager/repro_test.go b/pkg/manager/repro_test.go index d66bee8d0..909d6ffeb 100644 --- a/pkg/manager/repro_test.go +++ b/pkg/manager/repro_test.go @@ -97,8 +97,12 @@ func TestReproOrder(t *testing.T) { } func TestReproRWRace(t *testing.T) { + var reproProgExist atomic.Bool mock := &reproMgrMock{ run: make(chan runCallback), + needReproCb: func(_ *Crash) bool { + return !reproProgExist.Load() + }, } obj := NewReproLoop(mock, 3, false) @@ -114,7 +118,7 @@ func TestReproRWRace(t *testing.T) { called := <-mock.run // Pretend that processRepro() is finished and // we've written "repro.prog" to the disk. - mock.reproProgExist.Store(true) + reproProgExist.Store(true) assert.False(t, mock.NeedRepro(nil)) called.ret <- &ReproResult{} assert.True(t, obj.CanReproMore()) @@ -145,10 +149,38 @@ func TestCancelRunningRepro(t *testing.T) { done() } +func TestEnqueueTriggersRepro(t *testing.T) { + mock := &reproMgrMock{ + run: make(chan runCallback), + needReproCb: func(crash *Crash) bool { + return crash.FullTitle() == "C" + }, + } + obj := NewReproLoop(mock, 1, false) + obj.Enqueue(&Crash{Report: &report.Report{Title: "A"}, Manual: true}) + obj.Enqueue(&Crash{Report: &report.Report{Title: "B"}, Manual: true}) + obj.Enqueue(&Crash{Report: &report.Report{Title: "C"}}) + + ctx, done := context.WithCancel(context.Background()) + complete := make(chan struct{}) + go func() { + obj.Loop(ctx) + close(complete) + }() + + defer func() { + <-complete + }() + // The test will hang if the loop never picks up the title C. + crash := <-mock.run + assert.Equal(t, "C", crash.crash.FullTitle()) + done() +} + type reproMgrMock struct { - reserved atomic.Int64 - run chan runCallback - reproProgExist atomic.Bool + reserved atomic.Int64 + run chan runCallback + needReproCb func(*Crash) bool } type runCallback struct { @@ -187,7 +219,10 @@ func (m *reproMgrMock) RunRepro(ctx context.Context, crash *Crash) *ReproResult } func (m *reproMgrMock) NeedRepro(crash *Crash) bool { - return !m.reproProgExist.Load() + if m.needReproCb != nil { + return m.needReproCb(crash) + } + return true } func (m *reproMgrMock) ResizeReproPool(VMs int) { |
