aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/corpus
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2024-10-24 10:15:09 +0200
committerTaras Madan <tarasmadan@google.com>2024-10-25 12:08:02 +0000
commit479141f703a43adab07cba7f8b3c99399fbbeb68 (patch)
tree7e3334fbfb856e4bc29bf5e8d016a4fefe4b842a /pkg/corpus
parent945e91b794873481a34fe25de502ba96c8dc2a6b (diff)
pkg/corpus: support multiple focus areas
Focus area assigns a fuzzing priority to a set of PCs. When running ChooseProgram(), corpus will first select a focus area proportionally to the specified weights, and only then continue with selecting a program belonging to it.
Diffstat (limited to 'pkg/corpus')
-rw-r--r--pkg/corpus/corpus.go96
-rw-r--r--pkg/corpus/corpus_test.go22
-rw-r--r--pkg/corpus/minimize.go19
-rw-r--r--pkg/corpus/prio.go58
-rw-r--r--pkg/corpus/prio_test.go75
5 files changed, 219 insertions, 51 deletions
diff --git a/pkg/corpus/corpus.go b/pkg/corpus/corpus.go
index 8b70b0cfd..487457376 100644
--- a/pkg/corpus/corpus.go
+++ b/pkg/corpus/corpus.go
@@ -5,6 +5,8 @@ package corpus
import (
"context"
+ "fmt"
+ "maps"
"sync"
"github.com/google/syzkaller/pkg/cover"
@@ -17,16 +19,30 @@ import (
// Corpus object represents a set of syzkaller-found programs that
// cover the kernel up to the currently reached frontiers.
type Corpus struct {
- ctx context.Context
- mu sync.RWMutex
- progs map[string]*Item
- signal signal.Signal // total signal of all items
- cover cover.Cover // total coverage of all items
- updates chan<- NewItemEvent
+ ctx context.Context
+ mu sync.RWMutex
+ progsMap map[string]*Item
+ signal signal.Signal // total signal of all items
+ cover cover.Cover // total coverage of all items
+ updates chan<- NewItemEvent
+
*ProgramsList
StatProgs *stat.Val
StatSignal *stat.Val
StatCover *stat.Val
+
+ focusAreas []*focusAreaState
+}
+
+type focusAreaState struct {
+ FocusArea
+ *ProgramsList
+}
+
+type FocusArea struct {
+ Name string // can be empty
+ CoverPCs map[uint64]struct{}
+ Weight float64
}
func NewCorpus(ctx context.Context) *Corpus {
@@ -36,12 +52,12 @@ func NewCorpus(ctx context.Context) *Corpus {
func NewMonitoredCorpus(ctx context.Context, updates chan<- NewItemEvent) *Corpus {
corpus := &Corpus{
ctx: ctx,
- progs: make(map[string]*Item),
+ progsMap: make(map[string]*Item),
updates: updates,
ProgramsList: &ProgramsList{},
}
corpus.StatProgs = stat.New("corpus", "Number of test programs in the corpus", stat.Console,
- stat.Link("/corpus"), stat.Graph("corpus"), stat.LenOf(&corpus.progs, &corpus.mu))
+ stat.Link("/corpus"), stat.Graph("corpus"), stat.LenOf(&corpus.progsMap, &corpus.mu))
corpus.StatSignal = stat.New("signal", "Fuzzing signal in the corpus",
stat.LenOf(&corpus.signal, &corpus.mu))
corpus.StatCover = stat.New("coverage", "Source coverage in the corpus", stat.Console,
@@ -67,6 +83,8 @@ type Item struct {
Signal signal.Signal
Cover []uint64
Updates []ItemUpdate
+
+ areas map[*focusAreaState]struct{}
}
func (item Item) StringCall() string {
@@ -88,6 +106,29 @@ type NewItemEvent struct {
NewCover []uint64
}
+// SetFocusAreas can only be called once on an empty corpus.
+func (corpus *Corpus) SetFocusAreas(areas []FocusArea) {
+ corpus.mu.Lock()
+ defer corpus.mu.Unlock()
+ if len(corpus.progsMap) > 0 {
+ panic("SetFocusAreas() is called on a non-empty corpus")
+ }
+ for _, area := range areas {
+ obj := &ProgramsList{}
+ if len(areas) > 1 && area.Name != "" {
+ // Only show extra statistics if there's more than one area.
+ stat.New("corpus ["+area.Name+"]",
+ fmt.Sprintf("Corpus programs of the focus area %q", area.Name),
+ stat.Console, stat.Graph("corpus"),
+ stat.LenOf(&obj.progs, &corpus.mu))
+ }
+ corpus.focusAreas = append(corpus.focusAreas, &focusAreaState{
+ FocusArea: area,
+ ProgramsList: obj,
+ })
+ }
+}
+
func (corpus *Corpus) Save(inp NewInput) {
progData := inp.Prog.Serialize()
sig := hash.String(progData)
@@ -100,7 +141,7 @@ func (corpus *Corpus) Save(inp NewInput) {
RawCover: inp.RawCover,
}
exists := false
- if old, ok := corpus.progs[sig]; ok {
+ if old, ok := corpus.progsMap[sig]; ok {
exists = true
newSignal := old.Signal.Copy()
newSignal.Merge(inp.Signal)
@@ -115,14 +156,16 @@ func (corpus *Corpus) Save(inp NewInput) {
Signal: newSignal,
Cover: newCover.Serialize(),
Updates: append([]ItemUpdate{}, old.Updates...),
+ areas: maps.Clone(old.areas),
}
const maxUpdates = 32
if len(newItem.Updates) < maxUpdates {
newItem.Updates = append(newItem.Updates, update)
}
- corpus.progs[sig] = newItem
+ corpus.progsMap[sig] = newItem
+ corpus.applyFocusAreas(newItem, inp.Cover)
} else {
- corpus.progs[sig] = &Item{
+ item := &Item{
Sig: sig,
Call: inp.Call,
Prog: inp.Prog,
@@ -131,6 +174,8 @@ func (corpus *Corpus) Save(inp NewInput) {
Cover: inp.Cover,
Updates: []ItemUpdate{update},
}
+ corpus.progsMap[sig] = item
+ corpus.applyFocusAreas(item, inp.Cover)
corpus.saveProgram(inp.Prog, inp.Signal)
}
corpus.signal.Merge(inp.Signal)
@@ -147,6 +192,27 @@ func (corpus *Corpus) Save(inp NewInput) {
}
}
}
+
+func (corpus *Corpus) applyFocusAreas(item *Item, coverDelta []uint64) {
+ for _, area := range corpus.focusAreas {
+ matches := false
+ for _, pc := range coverDelta {
+ if _, ok := area.CoverPCs[pc]; ok {
+ matches = true
+ break
+ }
+ }
+ if !matches {
+ continue
+ }
+ area.saveProgram(item.Prog, item.Signal)
+ if item.areas == nil {
+ item.areas = make(map[*focusAreaState]struct{})
+ item.areas[area] = struct{}{}
+ }
+ }
+}
+
func (corpus *Corpus) Signal() signal.Signal {
corpus.mu.RLock()
defer corpus.mu.RUnlock()
@@ -156,8 +222,8 @@ func (corpus *Corpus) Signal() signal.Signal {
func (corpus *Corpus) Items() []*Item {
corpus.mu.RLock()
defer corpus.mu.RUnlock()
- ret := make([]*Item, 0, len(corpus.progs))
- for _, item := range corpus.progs {
+ ret := make([]*Item, 0, len(corpus.progsMap))
+ for _, item := range corpus.progsMap {
ret = append(ret, item)
}
return ret
@@ -166,7 +232,7 @@ func (corpus *Corpus) Items() []*Item {
func (corpus *Corpus) Item(sig string) *Item {
corpus.mu.RLock()
defer corpus.mu.RUnlock()
- return corpus.progs[sig]
+ return corpus.progsMap[sig]
}
type CallCov struct {
@@ -178,7 +244,7 @@ func (corpus *Corpus) CallCover() map[string]*CallCov {
corpus.mu.RLock()
defer corpus.mu.RUnlock()
calls := make(map[string]*CallCov)
- for _, inp := range corpus.progs {
+ for _, inp := range corpus.progsMap {
call := inp.StringCall()
if calls[call] == nil {
calls[call] = new(CallCov)
diff --git a/pkg/corpus/corpus_test.go b/pkg/corpus/corpus_test.go
index 90b11717a..c4410503e 100644
--- a/pkg/corpus/corpus_test.go
+++ b/pkg/corpus/corpus_test.go
@@ -22,7 +22,7 @@ func TestCorpusOperation(t *testing.T) {
// First program is saved.
rs := rand.NewSource(0)
- inp1 := generateInput(target, rs, 5, 5)
+ inp1 := generateInput(target, rs, 5)
go corpus.Save(inp1)
event := <-ch
progData := inp1.Prog.Serialize()
@@ -30,9 +30,9 @@ func TestCorpusOperation(t *testing.T) {
assert.Equal(t, false, event.Exists)
// Second program is saved for every its call.
- inp2 := generateInput(target, rs, 5, 5)
+ inp2 := generateInput(target, rs, 5)
progData = inp2.Prog.Serialize()
- for i := 0; i < 5; i++ {
+ for i := 0; i < len(inp2.Prog.Calls); i++ {
inp2.Call = i
go corpus.Save(inp2)
event := <-ch
@@ -49,7 +49,6 @@ func TestCorpusOperation(t *testing.T) {
// Verify the total signal.
assert.Equal(t, corpus.StatSignal.Val(), 5)
- assert.Equal(t, corpus.StatCover.Val(), 0)
assert.Equal(t, corpus.StatProgs.Val(), 2)
corpus.Minimize(true)
@@ -61,7 +60,7 @@ func TestCorpusCoverage(t *testing.T) {
corpus := NewMonitoredCorpus(context.Background(), ch)
rs := rand.NewSource(0)
- inp := generateInput(target, rs, 5, 5)
+ inp := generateInput(target, rs, 5)
inp.Cover = []uint64{10, 11}
go corpus.Save(inp)
event := <-ch
@@ -91,7 +90,7 @@ func TestCorpusSaveConcurrency(t *testing.T) {
rs := rand.NewSource(0)
r := rand.New(rs)
for it := 0; it < iters; it++ {
- inp := generateInput(target, rs, 10, it)
+ inp := generateInput(target, rs, it)
corpus.Save(inp)
corpus.ChooseProgram(r).Clone()
}
@@ -99,16 +98,21 @@ func TestCorpusSaveConcurrency(t *testing.T) {
}
}
-func generateInput(target *prog.Target, rs rand.Source, ncalls, sizeSig int) NewInput {
- p := target.Generate(rs, ncalls, target.DefaultChoiceTable())
+func generateInput(target *prog.Target, rs rand.Source, sizeSig int) NewInput {
+ return generateRangedInput(target, rs, 1, sizeSig)
+}
+
+func generateRangedInput(target *prog.Target, rs rand.Source, sigFrom, sigTo int) NewInput {
+ p := target.Generate(rs, 5, target.DefaultChoiceTable())
var raw []uint64
- for i := 1; i <= sizeSig; i++ {
+ for i := sigFrom; i <= sigTo; i++ {
raw = append(raw, uint64(i))
}
return NewInput{
Prog: p,
Call: int(rs.Int63() % int64(len(p.Calls))),
Signal: signal.FromRaw(raw, 0),
+ Cover: raw,
}
}
diff --git a/pkg/corpus/minimize.go b/pkg/corpus/minimize.go
index 8eac96122..d01e06f57 100644
--- a/pkg/corpus/minimize.go
+++ b/pkg/corpus/minimize.go
@@ -14,7 +14,7 @@ func (corpus *Corpus) Minimize(cover bool) {
defer corpus.mu.Unlock()
inputs := make([]signal.Context, 0, len(corpus.progs))
- for _, inp := range corpus.progs {
+ for _, inp := range corpus.progsMap {
inputs = append(inputs, signal.Context{
Signal: inp.Signal,
Context: inp,
@@ -37,12 +37,19 @@ func (corpus *Corpus) Minimize(cover bool) {
return len(first.Prog.Calls) < len(second.Prog.Calls)
})
- corpus.progs = make(map[string]*Item)
- programsList := &ProgramsList{}
+ corpus.progsMap = make(map[string]*Item)
+
+ // Overwrite the program lists.
+ corpus.ProgramsList = &ProgramsList{}
+ for _, area := range corpus.focusAreas {
+ area.ProgramsList = &ProgramsList{}
+ }
for _, ctx := range signal.Minimize(inputs) {
inp := ctx.(*Item)
- corpus.progs[inp.Sig] = inp
- programsList.saveProgram(inp.Prog, inp.Signal)
+ corpus.progsMap[inp.Sig] = inp
+ corpus.saveProgram(inp.Prog, inp.Signal)
+ for area := range inp.areas {
+ area.saveProgram(inp.Prog, inp.Signal)
+ }
}
- corpus.ProgramsList.replace(programsList)
}
diff --git a/pkg/corpus/prio.go b/pkg/corpus/prio.go
index 57f9ddaf5..f23466df6 100644
--- a/pkg/corpus/prio.go
+++ b/pkg/corpus/prio.go
@@ -6,22 +6,18 @@ package corpus
import (
"math/rand"
"sort"
- "sync"
"github.com/google/syzkaller/pkg/signal"
"github.com/google/syzkaller/prog"
)
type ProgramsList struct {
- mu sync.RWMutex
progs []*prog.Prog
sumPrios int64
accPrios []int64
}
-func (pl *ProgramsList) ChooseProgram(r *rand.Rand) *prog.Prog {
- pl.mu.RLock()
- defer pl.mu.RUnlock()
+func (pl *ProgramsList) chooseProgram(r *rand.Rand) *prog.Prog {
if len(pl.progs) == 0 {
return nil
}
@@ -32,15 +28,7 @@ func (pl *ProgramsList) ChooseProgram(r *rand.Rand) *prog.Prog {
return pl.progs[idx]
}
-func (pl *ProgramsList) Programs() []*prog.Prog {
- pl.mu.RLock()
- defer pl.mu.RUnlock()
- return pl.progs
-}
-
func (pl *ProgramsList) saveProgram(p *prog.Prog, signal signal.Signal) {
- pl.mu.Lock()
- defer pl.mu.Unlock()
prio := int64(len(signal))
if prio == 0 {
prio = 1
@@ -50,10 +38,42 @@ func (pl *ProgramsList) saveProgram(p *prog.Prog, signal signal.Signal) {
pl.progs = append(pl.progs, p)
}
-func (pl *ProgramsList) replace(other *ProgramsList) {
- pl.mu.Lock()
- defer pl.mu.Unlock()
- pl.sumPrios = other.sumPrios
- pl.accPrios = other.accPrios
- pl.progs = other.progs
+func (corpus *Corpus) ChooseProgram(r *rand.Rand) *prog.Prog {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ if len(corpus.progsMap) == 0 {
+ return nil
+ }
+ // We could have used an approach similar to chooseProgram(), but for small number
+ // of focus areas that is an overkill.
+ var randArea *focusAreaState
+ if len(corpus.focusAreas) > 0 {
+ sum := 0.0
+ nonEmpty := make([]*focusAreaState, 0, len(corpus.focusAreas))
+ for _, area := range corpus.focusAreas {
+ if len(area.progs) == 0 {
+ continue
+ }
+ sum += area.Weight
+ nonEmpty = append(nonEmpty, area)
+ }
+ val := r.Float64() * sum
+ currSum := 0.0
+ for _, area := range nonEmpty {
+ if val >= currSum {
+ randArea = area
+ }
+ currSum += area.Weight
+ }
+ }
+ if randArea != nil {
+ return randArea.chooseProgram(r)
+ }
+ return corpus.chooseProgram(r)
+}
+
+func (corpus *Corpus) Programs() []*prog.Prog {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ return corpus.progs
}
diff --git a/pkg/corpus/prio_test.go b/pkg/corpus/prio_test.go
index 3eec54bed..6328ba3e5 100644
--- a/pkg/corpus/prio_test.go
+++ b/pkg/corpus/prio_test.go
@@ -11,6 +11,7 @@ import (
"github.com/google/syzkaller/prog"
"github.com/google/syzkaller/sys/targets"
+ "github.com/stretchr/testify/assert"
)
func TestChooseProgram(t *testing.T) {
@@ -31,13 +32,13 @@ func TestChooseProgram(t *testing.T) {
if sizeSig%250 == 0 {
sizeSig = 0
}
- inp := generateInput(target, rs, 10, sizeSig)
+ inp := generateInput(target, rs, sizeSig)
corpus.Save(inp)
priorities[inp.Prog] = int64(len(inp.Signal))
}
counters := make(map[*prog.Prog]int)
for it := 0; it < maxIters; it++ {
- counters[corpus.ChooseProgram(r)]++
+ counters[corpus.chooseProgram(r)]++
}
for p, prio := range priorities {
prob := float64(prio) / float64(corpus.sumPrios)
@@ -47,3 +48,73 @@ func TestChooseProgram(t *testing.T) {
}
}
}
+
+func TestFocusAreas(t *testing.T) {
+ target := getTarget(t, targets.TestOS, targets.TestArch64)
+ corpus := NewCorpus(context.Background())
+ corpus.SetFocusAreas([]FocusArea{
+ {
+ CoverPCs: map[uint64]struct{}{
+ 0: {},
+ 1: {},
+ 2: {},
+ },
+ Weight: 10,
+ },
+ {
+ CoverPCs: map[uint64]struct{}{
+ 2: {},
+ 3: {},
+ },
+ Weight: 30,
+ },
+ {
+ CoverPCs: map[uint64]struct{}{
+ 4: {},
+ 5: {},
+ },
+ Weight: 60,
+ },
+ })
+
+ rs := rand.NewSource(0)
+
+ fillGroup := func(from, to, count int) map[*prog.Prog]bool {
+ ret := map[*prog.Prog]bool{}
+ for i := 0; i < count; i++ {
+ a := from + i%(to-from+1)
+ b := a + i%(to-a+1)
+ inp := generateRangedInput(target, rs, a, b)
+ ret[inp.Prog] = true
+ corpus.Save(inp)
+ }
+ return ret
+ }
+
+ first := fillGroup(0, 1, 10)
+ second := fillGroup(2, 3, 10)
+ third := fillGroup(4, 5, 10)
+
+ rnd := rand.New(rs)
+ different := map[*prog.Prog]bool{}
+ firstCount, secondCount, thirdCount := 0, 0, 0
+ const TOTAL = 10000
+ for i := 0; i < TOTAL; i++ {
+ p := corpus.ChooseProgram(rnd)
+ different[p] = true
+
+ if first[p] {
+ firstCount++
+ } else if second[p] {
+ secondCount++
+ } else if third[p] {
+ thirdCount++
+ }
+ }
+
+ assert.Greater(t, len(different), 25)
+ // These must be proportional to the focus area weight distribution.
+ assert.InDelta(t, firstCount, TOTAL*0.1, TOTAL/25)
+ assert.InDelta(t, secondCount, TOTAL*0.3, TOTAL/25)
+ assert.InDelta(t, thirdCount, TOTAL*0.6, TOTAL/25)
+}