From 479141f703a43adab07cba7f8b3c99399fbbeb68 Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Thu, 24 Oct 2024 10:15:09 +0200 Subject: pkg/corpus: support multiple focus areas Focus area assigns a fuzzing priority to a set of PCs. When running ChooseProgram(), corpus will first select a focus area proportionally to the specified weights, and only then continue with selecting a program belonging to it. --- pkg/corpus/corpus.go | 96 +++++++++++++++++++++++++++++++++++++++-------- pkg/corpus/corpus_test.go | 22 ++++++----- pkg/corpus/minimize.go | 19 +++++++--- pkg/corpus/prio.go | 58 ++++++++++++++++++---------- pkg/corpus/prio_test.go | 75 +++++++++++++++++++++++++++++++++++- 5 files changed, 219 insertions(+), 51 deletions(-) (limited to 'pkg/corpus') diff --git a/pkg/corpus/corpus.go b/pkg/corpus/corpus.go index 8b70b0cfd..487457376 100644 --- a/pkg/corpus/corpus.go +++ b/pkg/corpus/corpus.go @@ -5,6 +5,8 @@ package corpus import ( "context" + "fmt" + "maps" "sync" "github.com/google/syzkaller/pkg/cover" @@ -17,16 +19,30 @@ import ( // Corpus object represents a set of syzkaller-found programs that // cover the kernel up to the currently reached frontiers. type Corpus struct { - ctx context.Context - mu sync.RWMutex - progs map[string]*Item - signal signal.Signal // total signal of all items - cover cover.Cover // total coverage of all items - updates chan<- NewItemEvent + ctx context.Context + mu sync.RWMutex + progsMap map[string]*Item + signal signal.Signal // total signal of all items + cover cover.Cover // total coverage of all items + updates chan<- NewItemEvent + *ProgramsList StatProgs *stat.Val StatSignal *stat.Val StatCover *stat.Val + + focusAreas []*focusAreaState +} + +type focusAreaState struct { + FocusArea + *ProgramsList +} + +type FocusArea struct { + Name string // can be empty + CoverPCs map[uint64]struct{} + Weight float64 } func NewCorpus(ctx context.Context) *Corpus { @@ -36,12 +52,12 @@ func NewCorpus(ctx context.Context) *Corpus { func NewMonitoredCorpus(ctx context.Context, updates chan<- NewItemEvent) *Corpus { corpus := &Corpus{ ctx: ctx, - progs: make(map[string]*Item), + progsMap: make(map[string]*Item), updates: updates, ProgramsList: &ProgramsList{}, } corpus.StatProgs = stat.New("corpus", "Number of test programs in the corpus", stat.Console, - stat.Link("/corpus"), stat.Graph("corpus"), stat.LenOf(&corpus.progs, &corpus.mu)) + stat.Link("/corpus"), stat.Graph("corpus"), stat.LenOf(&corpus.progsMap, &corpus.mu)) corpus.StatSignal = stat.New("signal", "Fuzzing signal in the corpus", stat.LenOf(&corpus.signal, &corpus.mu)) corpus.StatCover = stat.New("coverage", "Source coverage in the corpus", stat.Console, @@ -67,6 +83,8 @@ type Item struct { Signal signal.Signal Cover []uint64 Updates []ItemUpdate + + areas map[*focusAreaState]struct{} } func (item Item) StringCall() string { @@ -88,6 +106,29 @@ type NewItemEvent struct { NewCover []uint64 } +// SetFocusAreas can only be called once on an empty corpus. +func (corpus *Corpus) SetFocusAreas(areas []FocusArea) { + corpus.mu.Lock() + defer corpus.mu.Unlock() + if len(corpus.progsMap) > 0 { + panic("SetFocusAreas() is called on a non-empty corpus") + } + for _, area := range areas { + obj := &ProgramsList{} + if len(areas) > 1 && area.Name != "" { + // Only show extra statistics if there's more than one area. + stat.New("corpus ["+area.Name+"]", + fmt.Sprintf("Corpus programs of the focus area %q", area.Name), + stat.Console, stat.Graph("corpus"), + stat.LenOf(&obj.progs, &corpus.mu)) + } + corpus.focusAreas = append(corpus.focusAreas, &focusAreaState{ + FocusArea: area, + ProgramsList: obj, + }) + } +} + func (corpus *Corpus) Save(inp NewInput) { progData := inp.Prog.Serialize() sig := hash.String(progData) @@ -100,7 +141,7 @@ func (corpus *Corpus) Save(inp NewInput) { RawCover: inp.RawCover, } exists := false - if old, ok := corpus.progs[sig]; ok { + if old, ok := corpus.progsMap[sig]; ok { exists = true newSignal := old.Signal.Copy() newSignal.Merge(inp.Signal) @@ -115,14 +156,16 @@ func (corpus *Corpus) Save(inp NewInput) { Signal: newSignal, Cover: newCover.Serialize(), Updates: append([]ItemUpdate{}, old.Updates...), + areas: maps.Clone(old.areas), } const maxUpdates = 32 if len(newItem.Updates) < maxUpdates { newItem.Updates = append(newItem.Updates, update) } - corpus.progs[sig] = newItem + corpus.progsMap[sig] = newItem + corpus.applyFocusAreas(newItem, inp.Cover) } else { - corpus.progs[sig] = &Item{ + item := &Item{ Sig: sig, Call: inp.Call, Prog: inp.Prog, @@ -131,6 +174,8 @@ func (corpus *Corpus) Save(inp NewInput) { Cover: inp.Cover, Updates: []ItemUpdate{update}, } + corpus.progsMap[sig] = item + corpus.applyFocusAreas(item, inp.Cover) corpus.saveProgram(inp.Prog, inp.Signal) } corpus.signal.Merge(inp.Signal) @@ -147,6 +192,27 @@ func (corpus *Corpus) Save(inp NewInput) { } } } + +func (corpus *Corpus) applyFocusAreas(item *Item, coverDelta []uint64) { + for _, area := range corpus.focusAreas { + matches := false + for _, pc := range coverDelta { + if _, ok := area.CoverPCs[pc]; ok { + matches = true + break + } + } + if !matches { + continue + } + area.saveProgram(item.Prog, item.Signal) + if item.areas == nil { + item.areas = make(map[*focusAreaState]struct{}) + item.areas[area] = struct{}{} + } + } +} + func (corpus *Corpus) Signal() signal.Signal { corpus.mu.RLock() defer corpus.mu.RUnlock() @@ -156,8 +222,8 @@ func (corpus *Corpus) Signal() signal.Signal { func (corpus *Corpus) Items() []*Item { corpus.mu.RLock() defer corpus.mu.RUnlock() - ret := make([]*Item, 0, len(corpus.progs)) - for _, item := range corpus.progs { + ret := make([]*Item, 0, len(corpus.progsMap)) + for _, item := range corpus.progsMap { ret = append(ret, item) } return ret @@ -166,7 +232,7 @@ func (corpus *Corpus) Items() []*Item { func (corpus *Corpus) Item(sig string) *Item { corpus.mu.RLock() defer corpus.mu.RUnlock() - return corpus.progs[sig] + return corpus.progsMap[sig] } type CallCov struct { @@ -178,7 +244,7 @@ func (corpus *Corpus) CallCover() map[string]*CallCov { corpus.mu.RLock() defer corpus.mu.RUnlock() calls := make(map[string]*CallCov) - for _, inp := range corpus.progs { + for _, inp := range corpus.progsMap { call := inp.StringCall() if calls[call] == nil { calls[call] = new(CallCov) diff --git a/pkg/corpus/corpus_test.go b/pkg/corpus/corpus_test.go index 90b11717a..c4410503e 100644 --- a/pkg/corpus/corpus_test.go +++ b/pkg/corpus/corpus_test.go @@ -22,7 +22,7 @@ func TestCorpusOperation(t *testing.T) { // First program is saved. rs := rand.NewSource(0) - inp1 := generateInput(target, rs, 5, 5) + inp1 := generateInput(target, rs, 5) go corpus.Save(inp1) event := <-ch progData := inp1.Prog.Serialize() @@ -30,9 +30,9 @@ func TestCorpusOperation(t *testing.T) { assert.Equal(t, false, event.Exists) // Second program is saved for every its call. - inp2 := generateInput(target, rs, 5, 5) + inp2 := generateInput(target, rs, 5) progData = inp2.Prog.Serialize() - for i := 0; i < 5; i++ { + for i := 0; i < len(inp2.Prog.Calls); i++ { inp2.Call = i go corpus.Save(inp2) event := <-ch @@ -49,7 +49,6 @@ func TestCorpusOperation(t *testing.T) { // Verify the total signal. assert.Equal(t, corpus.StatSignal.Val(), 5) - assert.Equal(t, corpus.StatCover.Val(), 0) assert.Equal(t, corpus.StatProgs.Val(), 2) corpus.Minimize(true) @@ -61,7 +60,7 @@ func TestCorpusCoverage(t *testing.T) { corpus := NewMonitoredCorpus(context.Background(), ch) rs := rand.NewSource(0) - inp := generateInput(target, rs, 5, 5) + inp := generateInput(target, rs, 5) inp.Cover = []uint64{10, 11} go corpus.Save(inp) event := <-ch @@ -91,7 +90,7 @@ func TestCorpusSaveConcurrency(t *testing.T) { rs := rand.NewSource(0) r := rand.New(rs) for it := 0; it < iters; it++ { - inp := generateInput(target, rs, 10, it) + inp := generateInput(target, rs, it) corpus.Save(inp) corpus.ChooseProgram(r).Clone() } @@ -99,16 +98,21 @@ func TestCorpusSaveConcurrency(t *testing.T) { } } -func generateInput(target *prog.Target, rs rand.Source, ncalls, sizeSig int) NewInput { - p := target.Generate(rs, ncalls, target.DefaultChoiceTable()) +func generateInput(target *prog.Target, rs rand.Source, sizeSig int) NewInput { + return generateRangedInput(target, rs, 1, sizeSig) +} + +func generateRangedInput(target *prog.Target, rs rand.Source, sigFrom, sigTo int) NewInput { + p := target.Generate(rs, 5, target.DefaultChoiceTable()) var raw []uint64 - for i := 1; i <= sizeSig; i++ { + for i := sigFrom; i <= sigTo; i++ { raw = append(raw, uint64(i)) } return NewInput{ Prog: p, Call: int(rs.Int63() % int64(len(p.Calls))), Signal: signal.FromRaw(raw, 0), + Cover: raw, } } diff --git a/pkg/corpus/minimize.go b/pkg/corpus/minimize.go index 8eac96122..d01e06f57 100644 --- a/pkg/corpus/minimize.go +++ b/pkg/corpus/minimize.go @@ -14,7 +14,7 @@ func (corpus *Corpus) Minimize(cover bool) { defer corpus.mu.Unlock() inputs := make([]signal.Context, 0, len(corpus.progs)) - for _, inp := range corpus.progs { + for _, inp := range corpus.progsMap { inputs = append(inputs, signal.Context{ Signal: inp.Signal, Context: inp, @@ -37,12 +37,19 @@ func (corpus *Corpus) Minimize(cover bool) { return len(first.Prog.Calls) < len(second.Prog.Calls) }) - corpus.progs = make(map[string]*Item) - programsList := &ProgramsList{} + corpus.progsMap = make(map[string]*Item) + + // Overwrite the program lists. + corpus.ProgramsList = &ProgramsList{} + for _, area := range corpus.focusAreas { + area.ProgramsList = &ProgramsList{} + } for _, ctx := range signal.Minimize(inputs) { inp := ctx.(*Item) - corpus.progs[inp.Sig] = inp - programsList.saveProgram(inp.Prog, inp.Signal) + corpus.progsMap[inp.Sig] = inp + corpus.saveProgram(inp.Prog, inp.Signal) + for area := range inp.areas { + area.saveProgram(inp.Prog, inp.Signal) + } } - corpus.ProgramsList.replace(programsList) } diff --git a/pkg/corpus/prio.go b/pkg/corpus/prio.go index 57f9ddaf5..f23466df6 100644 --- a/pkg/corpus/prio.go +++ b/pkg/corpus/prio.go @@ -6,22 +6,18 @@ package corpus import ( "math/rand" "sort" - "sync" "github.com/google/syzkaller/pkg/signal" "github.com/google/syzkaller/prog" ) type ProgramsList struct { - mu sync.RWMutex progs []*prog.Prog sumPrios int64 accPrios []int64 } -func (pl *ProgramsList) ChooseProgram(r *rand.Rand) *prog.Prog { - pl.mu.RLock() - defer pl.mu.RUnlock() +func (pl *ProgramsList) chooseProgram(r *rand.Rand) *prog.Prog { if len(pl.progs) == 0 { return nil } @@ -32,15 +28,7 @@ func (pl *ProgramsList) ChooseProgram(r *rand.Rand) *prog.Prog { return pl.progs[idx] } -func (pl *ProgramsList) Programs() []*prog.Prog { - pl.mu.RLock() - defer pl.mu.RUnlock() - return pl.progs -} - func (pl *ProgramsList) saveProgram(p *prog.Prog, signal signal.Signal) { - pl.mu.Lock() - defer pl.mu.Unlock() prio := int64(len(signal)) if prio == 0 { prio = 1 @@ -50,10 +38,42 @@ func (pl *ProgramsList) saveProgram(p *prog.Prog, signal signal.Signal) { pl.progs = append(pl.progs, p) } -func (pl *ProgramsList) replace(other *ProgramsList) { - pl.mu.Lock() - defer pl.mu.Unlock() - pl.sumPrios = other.sumPrios - pl.accPrios = other.accPrios - pl.progs = other.progs +func (corpus *Corpus) ChooseProgram(r *rand.Rand) *prog.Prog { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + if len(corpus.progsMap) == 0 { + return nil + } + // We could have used an approach similar to chooseProgram(), but for small number + // of focus areas that is an overkill. + var randArea *focusAreaState + if len(corpus.focusAreas) > 0 { + sum := 0.0 + nonEmpty := make([]*focusAreaState, 0, len(corpus.focusAreas)) + for _, area := range corpus.focusAreas { + if len(area.progs) == 0 { + continue + } + sum += area.Weight + nonEmpty = append(nonEmpty, area) + } + val := r.Float64() * sum + currSum := 0.0 + for _, area := range nonEmpty { + if val >= currSum { + randArea = area + } + currSum += area.Weight + } + } + if randArea != nil { + return randArea.chooseProgram(r) + } + return corpus.chooseProgram(r) +} + +func (corpus *Corpus) Programs() []*prog.Prog { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + return corpus.progs } diff --git a/pkg/corpus/prio_test.go b/pkg/corpus/prio_test.go index 3eec54bed..6328ba3e5 100644 --- a/pkg/corpus/prio_test.go +++ b/pkg/corpus/prio_test.go @@ -11,6 +11,7 @@ import ( "github.com/google/syzkaller/prog" "github.com/google/syzkaller/sys/targets" + "github.com/stretchr/testify/assert" ) func TestChooseProgram(t *testing.T) { @@ -31,13 +32,13 @@ func TestChooseProgram(t *testing.T) { if sizeSig%250 == 0 { sizeSig = 0 } - inp := generateInput(target, rs, 10, sizeSig) + inp := generateInput(target, rs, sizeSig) corpus.Save(inp) priorities[inp.Prog] = int64(len(inp.Signal)) } counters := make(map[*prog.Prog]int) for it := 0; it < maxIters; it++ { - counters[corpus.ChooseProgram(r)]++ + counters[corpus.chooseProgram(r)]++ } for p, prio := range priorities { prob := float64(prio) / float64(corpus.sumPrios) @@ -47,3 +48,73 @@ func TestChooseProgram(t *testing.T) { } } } + +func TestFocusAreas(t *testing.T) { + target := getTarget(t, targets.TestOS, targets.TestArch64) + corpus := NewCorpus(context.Background()) + corpus.SetFocusAreas([]FocusArea{ + { + CoverPCs: map[uint64]struct{}{ + 0: {}, + 1: {}, + 2: {}, + }, + Weight: 10, + }, + { + CoverPCs: map[uint64]struct{}{ + 2: {}, + 3: {}, + }, + Weight: 30, + }, + { + CoverPCs: map[uint64]struct{}{ + 4: {}, + 5: {}, + }, + Weight: 60, + }, + }) + + rs := rand.NewSource(0) + + fillGroup := func(from, to, count int) map[*prog.Prog]bool { + ret := map[*prog.Prog]bool{} + for i := 0; i < count; i++ { + a := from + i%(to-from+1) + b := a + i%(to-a+1) + inp := generateRangedInput(target, rs, a, b) + ret[inp.Prog] = true + corpus.Save(inp) + } + return ret + } + + first := fillGroup(0, 1, 10) + second := fillGroup(2, 3, 10) + third := fillGroup(4, 5, 10) + + rnd := rand.New(rs) + different := map[*prog.Prog]bool{} + firstCount, secondCount, thirdCount := 0, 0, 0 + const TOTAL = 10000 + for i := 0; i < TOTAL; i++ { + p := corpus.ChooseProgram(rnd) + different[p] = true + + if first[p] { + firstCount++ + } else if second[p] { + secondCount++ + } else if third[p] { + thirdCount++ + } + } + + assert.Greater(t, len(different), 25) + // These must be proportional to the focus area weight distribution. + assert.InDelta(t, firstCount, TOTAL*0.1, TOTAL/25) + assert.InDelta(t, secondCount, TOTAL*0.3, TOTAL/25) + assert.InDelta(t, thirdCount, TOTAL*0.6, TOTAL/25) +} -- cgit mrf-deployment