aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/corpus
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2024-03-13 18:52:18 +0100
committerAleksandr Nogikh <nogikh@google.com>2024-03-18 10:58:52 +0000
commitfc090d205d8c3d58f190659a98795d89421b7e6b (patch)
tree2b33cca3e6366a1565d70fdce01a51d6e50f6448 /pkg/corpus
parentd615901c739a765329b688494cee2f8e1b5037cb (diff)
pkg/corpus: a separate package for the corpus functionality
pkg/fuzzer and syz-manager have a common corpus functionality that can be well be unified. Create a separate pkg/corpus package that would be used by both of them. It will simplify further work of moving pkg/fuzzer to the host.
Diffstat (limited to 'pkg/corpus')
-rw-r--r--pkg/corpus/corpus.go231
-rw-r--r--pkg/corpus/corpus_test.go98
-rw-r--r--pkg/corpus/minimize.go41
-rw-r--r--pkg/corpus/prio.go51
-rw-r--r--pkg/corpus/prio_test.go49
5 files changed, 470 insertions, 0 deletions
diff --git a/pkg/corpus/corpus.go b/pkg/corpus/corpus.go
new file mode 100644
index 000000000..1d14ba026
--- /dev/null
+++ b/pkg/corpus/corpus.go
@@ -0,0 +1,231 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package corpus
+
+import (
+ "context"
+ "sync"
+
+ "github.com/google/syzkaller/pkg/cover"
+ "github.com/google/syzkaller/pkg/hash"
+ "github.com/google/syzkaller/pkg/rpctype"
+ "github.com/google/syzkaller/pkg/signal"
+ "github.com/google/syzkaller/prog"
+)
+
+// Corpus object represents a set of syzkaller-found programs that
+// cover the kernel up to the currently reached frontiers.
+type Corpus struct {
+ ctx context.Context
+ mu sync.RWMutex
+ progs map[string]*Item
+ signal signal.Signal // signal of inputs in corpus
+ updates chan<- NewItemEvent
+ ProgramsList
+}
+
+func NewCorpus(ctx context.Context) *Corpus {
+ return NewMonitoredCorpus(ctx, nil)
+}
+
+func NewMonitoredCorpus(ctx context.Context, updates chan<- NewItemEvent) *Corpus {
+ return &Corpus{
+ ctx: ctx,
+ progs: make(map[string]*Item),
+ updates: updates,
+ }
+}
+
+// It may happen that a single program is relevant because of several
+// sysalls. In that case, there will be several ItemUpdate entities.
+type ItemUpdate struct {
+ Call int
+ RawCover []uint32
+}
+
+// Item objects are to be treated as immutable, otherwise it's just
+// too hard to synchonize accesses to them across the whole project.
+// When Corpus updates one of its items, it saves a copy of it.
+type Item struct {
+ Sig string
+ Call int
+ Prog *prog.Prog
+ ProgData []byte // to save some Serialize() calls
+ HasAny bool // whether the prog contains squashed arguments
+ Signal signal.Signal
+ Cover []uint32
+ Updates []ItemUpdate
+}
+
+func (item Item) StringCall() string {
+ return stringCall(item.Prog, item.Call)
+}
+
+// RPCInputShort() does not include coverage.
+func (item Item) RPCInputShort() rpctype.Input {
+ return rpctype.Input{
+ Call: item.Call,
+ Prog: item.ProgData,
+ Signal: item.Signal.Serialize(),
+ }
+}
+
+func stringCall(p *prog.Prog, call int) string {
+ if call != -1 {
+ return p.Calls[call].Meta.Name
+ }
+ return ".extra"
+}
+
+type NewInput struct {
+ Prog *prog.Prog
+ Call int
+ Signal signal.Signal
+ Cover []uint32
+ RawCover []uint32
+}
+
+func (item NewInput) StringCall() string {
+ return stringCall(item.Prog, item.Call)
+}
+
+func (item NewInput) RPCInput() rpctype.Input {
+ return rpctype.Input{
+ Call: item.Call,
+ Prog: item.Prog.Serialize(),
+ Signal: item.Signal.Serialize(),
+ Cover: item.Cover,
+ RawCover: item.RawCover,
+ }
+}
+
+type NewItemEvent struct {
+ Sig string
+ Exists bool
+ ProgData []byte
+}
+
+func (corpus *Corpus) Save(inp NewInput) {
+ progData := inp.Prog.Serialize()
+ sig := hash.String(progData)
+
+ corpus.mu.Lock()
+ defer corpus.mu.Unlock()
+
+ update := ItemUpdate{
+ Call: inp.Call,
+ RawCover: inp.RawCover,
+ }
+ exists := false
+ if old, ok := corpus.progs[sig]; ok {
+ exists = true
+ newSignal := old.Signal.Copy()
+ newSignal.Merge(inp.Signal)
+ var newCover cover.Cover
+ newCover.Merge(old.Cover)
+ newCover.Merge(inp.Cover)
+ newItem := &Item{
+ Sig: sig,
+ Prog: old.Prog,
+ ProgData: progData,
+ Call: old.Call,
+ HasAny: old.HasAny,
+ Signal: newSignal,
+ Cover: newCover.Serialize(),
+ Updates: append([]ItemUpdate{}, old.Updates...),
+ }
+ const maxUpdates = 32
+ if len(newItem.Updates) < maxUpdates {
+ newItem.Updates = append(newItem.Updates, update)
+ }
+ corpus.progs[sig] = newItem
+ } else {
+ corpus.progs[sig] = &Item{
+ Sig: sig,
+ Call: inp.Call,
+ Prog: inp.Prog,
+ ProgData: progData,
+ HasAny: inp.Prog.ContainsAny(),
+ Signal: inp.Signal,
+ Cover: inp.Cover,
+ Updates: []ItemUpdate{update},
+ }
+ corpus.saveProgram(inp.Prog, inp.Signal)
+ }
+ corpus.signal.Merge(inp.Signal)
+ if corpus.updates != nil {
+ select {
+ case <-corpus.ctx.Done():
+ case corpus.updates <- NewItemEvent{
+ Sig: sig,
+ Exists: exists,
+ ProgData: progData,
+ }:
+ }
+ }
+}
+
+func (corpus *Corpus) DiffSignal(s signal.Signal) signal.Signal {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ return corpus.signal.Diff(s)
+}
+
+func (corpus *Corpus) Signal() signal.Signal {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ return corpus.signal.Copy()
+}
+
+func (corpus *Corpus) Items() []*Item {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ ret := make([]*Item, 0, len(corpus.progs))
+ for _, item := range corpus.progs {
+ ret = append(ret, item)
+ }
+ return ret
+}
+
+func (corpus *Corpus) Item(sig string) *Item {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ return corpus.progs[sig]
+}
+
+// Stat is a snapshot of the relevant current state figures.
+type Stat struct {
+ Progs int
+ Signal int
+}
+
+func (corpus *Corpus) Stat() Stat {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ return Stat{
+ Progs: len(corpus.progs),
+ Signal: len(corpus.signal),
+ }
+}
+
+type CallCov struct {
+ Count int
+ Cover cover.Cover
+}
+
+func (corpus *Corpus) CallCover() map[string]*CallCov {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ calls := make(map[string]*CallCov)
+ for _, inp := range corpus.progs {
+ call := inp.StringCall()
+ if calls[call] == nil {
+ calls[call] = new(CallCov)
+ }
+ cc := calls[call]
+ cc.Count++
+ cc.Cover.Merge(inp.Cover)
+ }
+ return calls
+}
diff --git a/pkg/corpus/corpus_test.go b/pkg/corpus/corpus_test.go
new file mode 100644
index 000000000..cce537087
--- /dev/null
+++ b/pkg/corpus/corpus_test.go
@@ -0,0 +1,98 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package corpus
+
+import (
+ "context"
+ "math/rand"
+ "testing"
+
+ "github.com/google/syzkaller/pkg/signal"
+ "github.com/google/syzkaller/prog"
+ "github.com/google/syzkaller/sys/targets"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestCorpusOperation(t *testing.T) {
+ // Basic corpus functionality.
+ target := getTarget(t, targets.TestOS, targets.TestArch64)
+ ch := make(chan NewItemEvent)
+ corpus := NewMonitoredCorpus(context.Background(), ch)
+
+ // First program is saved.
+ rs := rand.NewSource(0)
+ inp1 := generateInput(target, rs, 5, 5)
+ go corpus.Save(inp1)
+ event := <-ch
+ progData := inp1.Prog.Serialize()
+ assert.Equal(t, progData, event.ProgData)
+ assert.Equal(t, false, event.Exists)
+
+ // Second program is saved for every its call.
+ inp2 := generateInput(target, rs, 5, 5)
+ progData = inp2.Prog.Serialize()
+ for i := 0; i < 5; i++ {
+ inp2.Call = i
+ go corpus.Save(inp2)
+ event := <-ch
+ assert.Equal(t, progData, event.ProgData)
+ assert.Equal(t, i != 0, event.Exists)
+ }
+
+ // Verify that we can query corpus items.
+ items := corpus.Items()
+ assert.Len(t, items, 2)
+ for _, item := range items {
+ assert.Equal(t, item, corpus.Item(item.Sig))
+ }
+
+ // Verify the total signal.
+ assert.Len(t, corpus.Signal(), 5)
+
+ corpus.Minimize(true)
+}
+
+func TestCorpusSaveConcurrency(t *testing.T) {
+ target := getTarget(t, targets.TestOS, targets.TestArch64)
+ corpus := NewCorpus(context.Background())
+
+ const (
+ routines = 10
+ iters = 100
+ )
+
+ for i := 0; i < routines; i++ {
+ go func() {
+ rs := rand.NewSource(0)
+ r := rand.New(rs)
+ for it := 0; it < iters; it++ {
+ inp := generateInput(target, rs, 10, it)
+ corpus.Save(inp)
+ corpus.ChooseProgram(r).Clone()
+ }
+ }()
+ }
+}
+
+func generateInput(target *prog.Target, rs rand.Source, ncalls, sizeSig int) NewInput {
+ p := target.Generate(rs, ncalls, target.DefaultChoiceTable())
+ var raw []uint32
+ for i := 1; i <= sizeSig; i++ {
+ raw = append(raw, uint32(i))
+ }
+ return NewInput{
+ Prog: p,
+ Call: int(rs.Int63() % int64(len(p.Calls))),
+ Signal: signal.FromRaw(raw, 0),
+ }
+}
+
+func getTarget(t *testing.T, os, arch string) *prog.Target {
+ t.Parallel()
+ target, err := prog.GetTarget(os, arch)
+ if err != nil {
+ t.Fatal(err)
+ }
+ return target
+}
diff --git a/pkg/corpus/minimize.go b/pkg/corpus/minimize.go
new file mode 100644
index 000000000..b47816ccf
--- /dev/null
+++ b/pkg/corpus/minimize.go
@@ -0,0 +1,41 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package corpus
+
+import (
+ "sort"
+
+ "github.com/google/syzkaller/pkg/signal"
+)
+
+func (corpus *Corpus) Minimize(cover bool) {
+ corpus.mu.Lock()
+ defer corpus.mu.Unlock()
+
+ inputs := make([]signal.Context, 0, len(corpus.progs))
+ for _, inp := range corpus.progs {
+ inputs = append(inputs, signal.Context{
+ Signal: inp.Signal,
+ Context: inp,
+ })
+ }
+
+ // Note: inputs are unsorted (based on map iteration).
+ // This gives some intentional non-determinism during minimization.
+ // However, we want to give preference to non-squashed inputs,
+ // so let's sort by this criteria.
+ sort.SliceStable(inputs, func(i, j int) bool {
+ firstAny := inputs[i].Context.(*Item).HasAny
+ secondAny := inputs[j].Context.(*Item).HasAny
+ return !firstAny && secondAny
+ })
+
+ corpus.progs = make(map[string]*Item)
+ corpus.ProgramsList = ProgramsList{}
+ for _, ctx := range signal.Minimize(inputs) {
+ inp := ctx.(*Item)
+ corpus.progs[inp.Sig] = inp
+ corpus.saveProgram(inp.Prog, inp.Signal)
+ }
+}
diff --git a/pkg/corpus/prio.go b/pkg/corpus/prio.go
new file mode 100644
index 000000000..a4d85a518
--- /dev/null
+++ b/pkg/corpus/prio.go
@@ -0,0 +1,51 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package corpus
+
+import (
+ "math/rand"
+ "sort"
+ "sync"
+
+ "github.com/google/syzkaller/pkg/signal"
+ "github.com/google/syzkaller/prog"
+)
+
+type ProgramsList struct {
+ mu sync.RWMutex
+ progs []*prog.Prog
+ sumPrios int64
+ accPrios []int64
+}
+
+func (pl *ProgramsList) saveProgram(p *prog.Prog, signal signal.Signal) {
+ pl.mu.Lock()
+ defer pl.mu.Unlock()
+ prio := int64(len(signal))
+ if prio == 0 {
+ prio = 1
+ }
+ pl.sumPrios += prio
+ pl.accPrios = append(pl.accPrios, pl.sumPrios)
+ pl.progs = append(pl.progs, p)
+}
+
+func (pl *ProgramsList) ChooseProgram(r *rand.Rand) *prog.Prog {
+ pl.mu.RLock()
+ defer pl.mu.RUnlock()
+ if len(pl.progs) == 0 {
+ return nil
+ }
+ randVal := r.Int63n(pl.sumPrios + 1)
+ idx := sort.Search(len(pl.accPrios), func(i int) bool {
+ return pl.accPrios[i] >= randVal
+ })
+ return pl.progs[idx]
+}
+
+func (pl *ProgramsList) Programs() []*prog.Prog {
+ pl.mu.RLock()
+ defer pl.mu.RUnlock()
+ return pl.progs
+}
diff --git a/pkg/corpus/prio_test.go b/pkg/corpus/prio_test.go
new file mode 100644
index 000000000..3eec54bed
--- /dev/null
+++ b/pkg/corpus/prio_test.go
@@ -0,0 +1,49 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package corpus
+
+import (
+ "context"
+ "math"
+ "math/rand"
+ "testing"
+
+ "github.com/google/syzkaller/prog"
+ "github.com/google/syzkaller/sys/targets"
+)
+
+func TestChooseProgram(t *testing.T) {
+ rs := rand.NewSource(0)
+ r := rand.New(rs)
+ target := getTarget(t, targets.TestOS, targets.TestArch64)
+ corpus := NewCorpus(context.Background())
+
+ const (
+ maxIters = 1000
+ sizeCorpus = 1000
+ eps = 0.01
+ )
+
+ priorities := make(map[*prog.Prog]int64)
+ for i := 0; i < sizeCorpus; i++ {
+ sizeSig := i + 1
+ if sizeSig%250 == 0 {
+ sizeSig = 0
+ }
+ inp := generateInput(target, rs, 10, sizeSig)
+ corpus.Save(inp)
+ priorities[inp.Prog] = int64(len(inp.Signal))
+ }
+ counters := make(map[*prog.Prog]int)
+ for it := 0; it < maxIters; it++ {
+ counters[corpus.ChooseProgram(r)]++
+ }
+ for p, prio := range priorities {
+ prob := float64(prio) / float64(corpus.sumPrios)
+ diff := math.Abs(prob*maxIters - float64(counters[p]))
+ if diff > eps*maxIters {
+ t.Fatalf("the difference (%f) is higher than %f%%", diff, eps*100)
+ }
+ }
+}