From d00e39af8a96f7fe18fa5664936a7d658f80eeef Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Tue, 22 Apr 2025 17:24:38 +0200 Subject: pkg/manager: better handle the reproduction queue As pingQueue holds maximum 1 element, it's not a very good indicator of whether there are more elements in the reproduction queue. If the first few crashes happen to be no longer needed (which is quite likely when reproduction loop is started long after we have begun to collect the crashes), the loop becomes reactive - new reproductions will be started only once new bugs are submitted to the queue. Fix it by polling the queue until it returns nil. Add a test that exposes the previously existing bug. --- pkg/manager/repro.go | 6 +++++- pkg/manager/repro_test.go | 45 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/pkg/manager/repro.go b/pkg/manager/repro.go index f9fb2d0e2..28b676752 100644 --- a/pkg/manager/repro.go +++ b/pkg/manager/repro.go @@ -188,11 +188,15 @@ func (r *ReproLoop) Loop(ctx context.Context) { for { if crash != nil && !r.mgr.NeedRepro(crash) { log.Logf(1, "reproduction of %q aborted: it's no longer needed", crash.FullTitle()) - crash = nil // Now we might not need that many VMs. r.mu.Lock() r.adjustPoolSizeLocked() r.mu.Unlock() + + // Immediately check if there was any other crash in the queue, so that we fall back + // to waiting on pingQueue only if there were really no other crashes in the queue. + crash = r.popCrash() + continue } if crash != nil { break diff --git a/pkg/manager/repro_test.go b/pkg/manager/repro_test.go index d66bee8d0..909d6ffeb 100644 --- a/pkg/manager/repro_test.go +++ b/pkg/manager/repro_test.go @@ -97,8 +97,12 @@ func TestReproOrder(t *testing.T) { } func TestReproRWRace(t *testing.T) { + var reproProgExist atomic.Bool mock := &reproMgrMock{ run: make(chan runCallback), + needReproCb: func(_ *Crash) bool { + return !reproProgExist.Load() + }, } obj := NewReproLoop(mock, 3, false) @@ -114,7 +118,7 @@ func TestReproRWRace(t *testing.T) { called := <-mock.run // Pretend that processRepro() is finished and // we've written "repro.prog" to the disk. - mock.reproProgExist.Store(true) + reproProgExist.Store(true) assert.False(t, mock.NeedRepro(nil)) called.ret <- &ReproResult{} assert.True(t, obj.CanReproMore()) @@ -145,10 +149,38 @@ func TestCancelRunningRepro(t *testing.T) { done() } +func TestEnqueueTriggersRepro(t *testing.T) { + mock := &reproMgrMock{ + run: make(chan runCallback), + needReproCb: func(crash *Crash) bool { + return crash.FullTitle() == "C" + }, + } + obj := NewReproLoop(mock, 1, false) + obj.Enqueue(&Crash{Report: &report.Report{Title: "A"}, Manual: true}) + obj.Enqueue(&Crash{Report: &report.Report{Title: "B"}, Manual: true}) + obj.Enqueue(&Crash{Report: &report.Report{Title: "C"}}) + + ctx, done := context.WithCancel(context.Background()) + complete := make(chan struct{}) + go func() { + obj.Loop(ctx) + close(complete) + }() + + defer func() { + <-complete + }() + // The test will hang if the loop never picks up the title C. + crash := <-mock.run + assert.Equal(t, "C", crash.crash.FullTitle()) + done() +} + type reproMgrMock struct { - reserved atomic.Int64 - run chan runCallback - reproProgExist atomic.Bool + reserved atomic.Int64 + run chan runCallback + needReproCb func(*Crash) bool } type runCallback struct { @@ -187,7 +219,10 @@ func (m *reproMgrMock) RunRepro(ctx context.Context, crash *Crash) *ReproResult } func (m *reproMgrMock) NeedRepro(crash *Crash) bool { - return !m.reproProgExist.Load() + if m.needReproCb != nil { + return m.needReproCb(crash) + } + return true } func (m *reproMgrMock) ResizeReproPool(VMs int) { -- cgit mrf-deployment