aboutsummaryrefslogtreecommitdiffstats
path: root/syz-manager
diff options
context:
space:
mode:
authorSabyrzhan Tasbolatov <snovitoll@gmail.com>2024-08-20 12:42:25 +0500
committerAleksandr Nogikh <nogikh@google.com>2024-08-21 11:45:23 +0000
commitb840654d3ada1d4c3ade855affaa8b53cdd92346 (patch)
treeeac6c27a98b8b8dd4724a0d36507db4edeff8167 /syz-manager
parentd504e3fd900ecc041a690a1389982c7e9c1560fd (diff)
syz-manager: sync saveRepro() and needLocalRepro()
There is a race condition of saving and accessing "repro.prog" file in processFuzzingResults() goroutine. needRepro() which is called from reproMgr.Loop() or upon <-externalReproQueue, or at the end of saveCrash() can be called before saveRepro(). Removed reproMgr.Done channel and moved to post-processing of *ReproResult to runRepro(). This should guarantee the order of RW operations on "repro.prog" file. $ stress ./syz-manager.test -test.run=TestReproRWRace 5s: 16 runs so far, 0 failures, 8 active 10s: 40 runs so far, 0 failures, 8 active 15s: 64 runs so far, 0 failures, 8 active 20s: 80 runs so far, 0 failures, 8 active 25s: 96 runs so far, 0 failures, 8 active 30s: 112 runs so far, 0 failures, 8 active 35s: 136 runs so far, 0 failures, 8 active 40s: 152 runs so far, 0 failures, 8 active 45s: 168 runs so far, 0 failures, 8 active 50s: 184 runs so far, 0 failures, 8 active 55s: 200 runs so far, 0 failures, 8 active 1m0s: 216 runs so far, 0 failures, 8 active Fixes: https://github.com/google/syzkaller/issues/5157
Diffstat (limited to 'syz-manager')
-rw-r--r--syz-manager/manager.go35
-rw-r--r--syz-manager/repro.go5
-rw-r--r--syz-manager/repro_test.go68
3 files changed, 76 insertions, 32 deletions
diff --git a/syz-manager/manager.go b/syz-manager/manager.go
index 704129b1b..6657ed2f6 100644
--- a/syz-manager/manager.go
+++ b/syz-manager/manager.go
@@ -401,21 +401,6 @@ func (mgr *Manager) processFuzzingResults(ctx context.Context) {
if crash != nil {
mgr.saveCrash(crash)
}
- case res := <-mgr.reproMgr.Done:
- if res.err != nil {
- reportReproError(res.err)
- }
- if res.repro == nil {
- if res.crash.Title == "" {
- log.Logf(1, "repro '%v' not from dashboard, so not reporting the failure",
- res.crash.FullTitle())
- } else {
- log.Logf(1, "report repro failure of '%v'", res.crash.Title)
- mgr.saveFailedRepro(res.crash.Report, res.stats)
- }
- } else {
- mgr.saveRepro(res)
- }
case crash := <-mgr.externalReproQueue:
if mgr.needRepro(crash) {
mgr.reproMgr.Enqueue(crash)
@@ -491,9 +476,29 @@ func (mgr *Manager) runRepro(crash *Crash) *ReproResult {
}
}
}
+
+ mgr.processRepro(ret)
+
return ret
}
+func (mgr *Manager) processRepro(res *ReproResult) {
+ if res.err != nil {
+ reportReproError(res.err)
+ }
+ if res.repro == nil {
+ if res.crash.Title == "" {
+ log.Logf(1, "repro '%v' not from dashboard, so not reporting the failure",
+ res.crash.FullTitle())
+ } else {
+ log.Logf(1, "report repro failure of '%v'", res.crash.Title)
+ mgr.saveFailedRepro(res.crash.Report, res.stats)
+ }
+ } else {
+ mgr.saveRepro(res)
+ }
+}
+
func (mgr *Manager) preloadCorpus() {
corpusDB, err := db.Open(filepath.Join(mgr.cfg.Workdir, "corpus.db"), true)
if err != nil {
diff --git a/syz-manager/repro.go b/syz-manager/repro.go
index 11812b83b..3232bd9f3 100644
--- a/syz-manager/repro.go
+++ b/syz-manager/repro.go
@@ -20,8 +20,6 @@ type reproManagerView interface {
}
type reproManager struct {
- Done chan *ReproResult
-
statNumReproducing *stat.Val
statPending *stat.Val
@@ -39,8 +37,6 @@ type reproManager struct {
func newReproManager(mgr reproManagerView, reproVMs int, onlyOnce bool) *reproManager {
ret := &reproManager{
- Done: make(chan *ReproResult, 10),
-
mgr: mgr,
onlyOnce: onlyOnce,
parallel: make(chan struct{}, reproVMs),
@@ -217,7 +213,6 @@ func (m *reproManager) handle(crash *Crash) {
log.Logf(0, "repro finished '%v', repro=%v crepro=%v desc='%v' hub=%v from_dashboard=%v",
crash.FullTitle(), res.repro != nil, crepro, title, crash.fromHub, crash.fromDashboard,
)
- m.Done <- res
}
func (m *reproManager) adjustPoolSizeLocked() {
diff --git a/syz-manager/repro_test.go b/syz-manager/repro_test.go
index 8042c7042..84ab94a6b 100644
--- a/syz-manager/repro_test.go
+++ b/syz-manager/repro_test.go
@@ -56,15 +56,7 @@ func TestReproManager(t *testing.T) {
called.ret <- &ReproResult{crash: &Crash{fromHub: true}}
called2.ret <- &ReproResult{crash: &Crash{fromHub: true}}
- // Wait until the number of reserved VMs goes to 0.
- for i := 0; i < 100; i++ {
- if mock.reserved.Load() == 0 {
- assert.True(t, obj.CanReproMore())
- return
- }
- time.Sleep(10 * time.Millisecond)
- }
- t.Fatal("reserved VMs must have dropped to 0")
+ mock.onVMShutdown(t, obj)
}
func TestReproOrder(t *testing.T) {
@@ -105,9 +97,48 @@ func TestReproOrder(t *testing.T) {
assert.Equal(t, crashes[2], obj.popCrash())
}
+func TestReproRWRace(t *testing.T) {
+ mock := &reproMgrMock{
+ run: make(chan runCallback),
+ }
+ obj := newReproManager(mock, 3, false)
+
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+
+ go func() {
+ obj.Loop(ctx) // calls runRepro()
+ }()
+
+ assert.False(t, obj.CanReproMore())
+ obj.StartReproduction()
+ assert.True(t, obj.CanReproMore())
+
+ obj.Enqueue(&Crash{Report: &report.Report{Title: "A"}})
+ obj.Enqueue(&Crash{Report: &report.Report{Title: "A"}})
+
+ assert.True(t, mock.needRepro(nil))
+ called := <-mock.run
+ called.ret <- &ReproResult{}
+ // Pretend that processRepro() is finished and
+ // we've written "repro.prog" to the disk.
+ mock.reproProgExist.Store(true)
+ assert.False(t, mock.needRepro(nil))
+ assert.True(t, obj.CanReproMore())
+
+ called2 := <-mock.run
+ called2.ret <- &ReproResult{}
+ assert.False(t, mock.needRepro(nil))
+ assert.True(t, obj.CanReproMore())
+
+ // Reproducers may be still running.
+ mock.onVMShutdown(t, obj)
+}
+
type reproMgrMock struct {
- reserved atomic.Int64
- run chan runCallback
+ reserved atomic.Int64
+ run chan runCallback
+ reproProgExist atomic.Bool
}
type runCallback struct {
@@ -115,6 +146,19 @@ type runCallback struct {
ret chan *ReproResult
}
+// Wait until the number of reserved VMs goes to 0.
+func (m *reproMgrMock) onVMShutdown(t *testing.T, reproMgr *reproManager) {
+ for i := 0; i < 100; i++ {
+ if m.reserved.Load() == 0 {
+ assert.True(t, reproMgr.CanReproMore())
+ assert.True(t, reproMgr.Empty())
+ return
+ }
+ time.Sleep(10 * time.Millisecond)
+ }
+ t.Fatal("reserved VMs must have dropped to 0")
+}
+
func (m *reproMgrMock) runRepro(crash *Crash) *ReproResult {
retCh := make(chan *ReproResult)
m.run <- runCallback{crash: crash, ret: retCh}
@@ -124,7 +168,7 @@ func (m *reproMgrMock) runRepro(crash *Crash) *ReproResult {
}
func (m *reproMgrMock) needRepro(crash *Crash) bool {
- return true
+ return !m.reproProgExist.Load()
}
func (m *reproMgrMock) resizeReproPool(VMs int) {