From fc090d205d8c3d58f190659a98795d89421b7e6b Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Wed, 13 Mar 2024 18:52:18 +0100 Subject: pkg/corpus: a separate package for the corpus functionality pkg/fuzzer and syz-manager have a common corpus functionality that can be well be unified. Create a separate pkg/corpus package that would be used by both of them. It will simplify further work of moving pkg/fuzzer to the host. --- pkg/corpus/corpus.go | 231 ++++++++++++++++++++++++++++++++++++++++++++++ pkg/corpus/corpus_test.go | 98 ++++++++++++++++++++ pkg/corpus/minimize.go | 41 ++++++++ pkg/corpus/prio.go | 51 ++++++++++ pkg/corpus/prio_test.go | 49 ++++++++++ 5 files changed, 470 insertions(+) create mode 100644 pkg/corpus/corpus.go create mode 100644 pkg/corpus/corpus_test.go create mode 100644 pkg/corpus/minimize.go create mode 100644 pkg/corpus/prio.go create mode 100644 pkg/corpus/prio_test.go (limited to 'pkg/corpus') diff --git a/pkg/corpus/corpus.go b/pkg/corpus/corpus.go new file mode 100644 index 000000000..1d14ba026 --- /dev/null +++ b/pkg/corpus/corpus.go @@ -0,0 +1,231 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package corpus + +import ( + "context" + "sync" + + "github.com/google/syzkaller/pkg/cover" + "github.com/google/syzkaller/pkg/hash" + "github.com/google/syzkaller/pkg/rpctype" + "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/prog" +) + +// Corpus object represents a set of syzkaller-found programs that +// cover the kernel up to the currently reached frontiers. +type Corpus struct { + ctx context.Context + mu sync.RWMutex + progs map[string]*Item + signal signal.Signal // signal of inputs in corpus + updates chan<- NewItemEvent + ProgramsList +} + +func NewCorpus(ctx context.Context) *Corpus { + return NewMonitoredCorpus(ctx, nil) +} + +func NewMonitoredCorpus(ctx context.Context, updates chan<- NewItemEvent) *Corpus { + return &Corpus{ + ctx: ctx, + progs: make(map[string]*Item), + updates: updates, + } +} + +// It may happen that a single program is relevant because of several +// sysalls. In that case, there will be several ItemUpdate entities. +type ItemUpdate struct { + Call int + RawCover []uint32 +} + +// Item objects are to be treated as immutable, otherwise it's just +// too hard to synchonize accesses to them across the whole project. +// When Corpus updates one of its items, it saves a copy of it. +type Item struct { + Sig string + Call int + Prog *prog.Prog + ProgData []byte // to save some Serialize() calls + HasAny bool // whether the prog contains squashed arguments + Signal signal.Signal + Cover []uint32 + Updates []ItemUpdate +} + +func (item Item) StringCall() string { + return stringCall(item.Prog, item.Call) +} + +// RPCInputShort() does not include coverage. +func (item Item) RPCInputShort() rpctype.Input { + return rpctype.Input{ + Call: item.Call, + Prog: item.ProgData, + Signal: item.Signal.Serialize(), + } +} + +func stringCall(p *prog.Prog, call int) string { + if call != -1 { + return p.Calls[call].Meta.Name + } + return ".extra" +} + +type NewInput struct { + Prog *prog.Prog + Call int + Signal signal.Signal + Cover []uint32 + RawCover []uint32 +} + +func (item NewInput) StringCall() string { + return stringCall(item.Prog, item.Call) +} + +func (item NewInput) RPCInput() rpctype.Input { + return rpctype.Input{ + Call: item.Call, + Prog: item.Prog.Serialize(), + Signal: item.Signal.Serialize(), + Cover: item.Cover, + RawCover: item.RawCover, + } +} + +type NewItemEvent struct { + Sig string + Exists bool + ProgData []byte +} + +func (corpus *Corpus) Save(inp NewInput) { + progData := inp.Prog.Serialize() + sig := hash.String(progData) + + corpus.mu.Lock() + defer corpus.mu.Unlock() + + update := ItemUpdate{ + Call: inp.Call, + RawCover: inp.RawCover, + } + exists := false + if old, ok := corpus.progs[sig]; ok { + exists = true + newSignal := old.Signal.Copy() + newSignal.Merge(inp.Signal) + var newCover cover.Cover + newCover.Merge(old.Cover) + newCover.Merge(inp.Cover) + newItem := &Item{ + Sig: sig, + Prog: old.Prog, + ProgData: progData, + Call: old.Call, + HasAny: old.HasAny, + Signal: newSignal, + Cover: newCover.Serialize(), + Updates: append([]ItemUpdate{}, old.Updates...), + } + const maxUpdates = 32 + if len(newItem.Updates) < maxUpdates { + newItem.Updates = append(newItem.Updates, update) + } + corpus.progs[sig] = newItem + } else { + corpus.progs[sig] = &Item{ + Sig: sig, + Call: inp.Call, + Prog: inp.Prog, + ProgData: progData, + HasAny: inp.Prog.ContainsAny(), + Signal: inp.Signal, + Cover: inp.Cover, + Updates: []ItemUpdate{update}, + } + corpus.saveProgram(inp.Prog, inp.Signal) + } + corpus.signal.Merge(inp.Signal) + if corpus.updates != nil { + select { + case <-corpus.ctx.Done(): + case corpus.updates <- NewItemEvent{ + Sig: sig, + Exists: exists, + ProgData: progData, + }: + } + } +} + +func (corpus *Corpus) DiffSignal(s signal.Signal) signal.Signal { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + return corpus.signal.Diff(s) +} + +func (corpus *Corpus) Signal() signal.Signal { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + return corpus.signal.Copy() +} + +func (corpus *Corpus) Items() []*Item { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + ret := make([]*Item, 0, len(corpus.progs)) + for _, item := range corpus.progs { + ret = append(ret, item) + } + return ret +} + +func (corpus *Corpus) Item(sig string) *Item { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + return corpus.progs[sig] +} + +// Stat is a snapshot of the relevant current state figures. +type Stat struct { + Progs int + Signal int +} + +func (corpus *Corpus) Stat() Stat { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + return Stat{ + Progs: len(corpus.progs), + Signal: len(corpus.signal), + } +} + +type CallCov struct { + Count int + Cover cover.Cover +} + +func (corpus *Corpus) CallCover() map[string]*CallCov { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + calls := make(map[string]*CallCov) + for _, inp := range corpus.progs { + call := inp.StringCall() + if calls[call] == nil { + calls[call] = new(CallCov) + } + cc := calls[call] + cc.Count++ + cc.Cover.Merge(inp.Cover) + } + return calls +} diff --git a/pkg/corpus/corpus_test.go b/pkg/corpus/corpus_test.go new file mode 100644 index 000000000..cce537087 --- /dev/null +++ b/pkg/corpus/corpus_test.go @@ -0,0 +1,98 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package corpus + +import ( + "context" + "math/rand" + "testing" + + "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/prog" + "github.com/google/syzkaller/sys/targets" + "github.com/stretchr/testify/assert" +) + +func TestCorpusOperation(t *testing.T) { + // Basic corpus functionality. + target := getTarget(t, targets.TestOS, targets.TestArch64) + ch := make(chan NewItemEvent) + corpus := NewMonitoredCorpus(context.Background(), ch) + + // First program is saved. + rs := rand.NewSource(0) + inp1 := generateInput(target, rs, 5, 5) + go corpus.Save(inp1) + event := <-ch + progData := inp1.Prog.Serialize() + assert.Equal(t, progData, event.ProgData) + assert.Equal(t, false, event.Exists) + + // Second program is saved for every its call. + inp2 := generateInput(target, rs, 5, 5) + progData = inp2.Prog.Serialize() + for i := 0; i < 5; i++ { + inp2.Call = i + go corpus.Save(inp2) + event := <-ch + assert.Equal(t, progData, event.ProgData) + assert.Equal(t, i != 0, event.Exists) + } + + // Verify that we can query corpus items. + items := corpus.Items() + assert.Len(t, items, 2) + for _, item := range items { + assert.Equal(t, item, corpus.Item(item.Sig)) + } + + // Verify the total signal. + assert.Len(t, corpus.Signal(), 5) + + corpus.Minimize(true) +} + +func TestCorpusSaveConcurrency(t *testing.T) { + target := getTarget(t, targets.TestOS, targets.TestArch64) + corpus := NewCorpus(context.Background()) + + const ( + routines = 10 + iters = 100 + ) + + for i := 0; i < routines; i++ { + go func() { + rs := rand.NewSource(0) + r := rand.New(rs) + for it := 0; it < iters; it++ { + inp := generateInput(target, rs, 10, it) + corpus.Save(inp) + corpus.ChooseProgram(r).Clone() + } + }() + } +} + +func generateInput(target *prog.Target, rs rand.Source, ncalls, sizeSig int) NewInput { + p := target.Generate(rs, ncalls, target.DefaultChoiceTable()) + var raw []uint32 + for i := 1; i <= sizeSig; i++ { + raw = append(raw, uint32(i)) + } + return NewInput{ + Prog: p, + Call: int(rs.Int63() % int64(len(p.Calls))), + Signal: signal.FromRaw(raw, 0), + } +} + +func getTarget(t *testing.T, os, arch string) *prog.Target { + t.Parallel() + target, err := prog.GetTarget(os, arch) + if err != nil { + t.Fatal(err) + } + return target +} diff --git a/pkg/corpus/minimize.go b/pkg/corpus/minimize.go new file mode 100644 index 000000000..b47816ccf --- /dev/null +++ b/pkg/corpus/minimize.go @@ -0,0 +1,41 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package corpus + +import ( + "sort" + + "github.com/google/syzkaller/pkg/signal" +) + +func (corpus *Corpus) Minimize(cover bool) { + corpus.mu.Lock() + defer corpus.mu.Unlock() + + inputs := make([]signal.Context, 0, len(corpus.progs)) + for _, inp := range corpus.progs { + inputs = append(inputs, signal.Context{ + Signal: inp.Signal, + Context: inp, + }) + } + + // Note: inputs are unsorted (based on map iteration). + // This gives some intentional non-determinism during minimization. + // However, we want to give preference to non-squashed inputs, + // so let's sort by this criteria. + sort.SliceStable(inputs, func(i, j int) bool { + firstAny := inputs[i].Context.(*Item).HasAny + secondAny := inputs[j].Context.(*Item).HasAny + return !firstAny && secondAny + }) + + corpus.progs = make(map[string]*Item) + corpus.ProgramsList = ProgramsList{} + for _, ctx := range signal.Minimize(inputs) { + inp := ctx.(*Item) + corpus.progs[inp.Sig] = inp + corpus.saveProgram(inp.Prog, inp.Signal) + } +} diff --git a/pkg/corpus/prio.go b/pkg/corpus/prio.go new file mode 100644 index 000000000..a4d85a518 --- /dev/null +++ b/pkg/corpus/prio.go @@ -0,0 +1,51 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package corpus + +import ( + "math/rand" + "sort" + "sync" + + "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/prog" +) + +type ProgramsList struct { + mu sync.RWMutex + progs []*prog.Prog + sumPrios int64 + accPrios []int64 +} + +func (pl *ProgramsList) saveProgram(p *prog.Prog, signal signal.Signal) { + pl.mu.Lock() + defer pl.mu.Unlock() + prio := int64(len(signal)) + if prio == 0 { + prio = 1 + } + pl.sumPrios += prio + pl.accPrios = append(pl.accPrios, pl.sumPrios) + pl.progs = append(pl.progs, p) +} + +func (pl *ProgramsList) ChooseProgram(r *rand.Rand) *prog.Prog { + pl.mu.RLock() + defer pl.mu.RUnlock() + if len(pl.progs) == 0 { + return nil + } + randVal := r.Int63n(pl.sumPrios + 1) + idx := sort.Search(len(pl.accPrios), func(i int) bool { + return pl.accPrios[i] >= randVal + }) + return pl.progs[idx] +} + +func (pl *ProgramsList) Programs() []*prog.Prog { + pl.mu.RLock() + defer pl.mu.RUnlock() + return pl.progs +} diff --git a/pkg/corpus/prio_test.go b/pkg/corpus/prio_test.go new file mode 100644 index 000000000..3eec54bed --- /dev/null +++ b/pkg/corpus/prio_test.go @@ -0,0 +1,49 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package corpus + +import ( + "context" + "math" + "math/rand" + "testing" + + "github.com/google/syzkaller/prog" + "github.com/google/syzkaller/sys/targets" +) + +func TestChooseProgram(t *testing.T) { + rs := rand.NewSource(0) + r := rand.New(rs) + target := getTarget(t, targets.TestOS, targets.TestArch64) + corpus := NewCorpus(context.Background()) + + const ( + maxIters = 1000 + sizeCorpus = 1000 + eps = 0.01 + ) + + priorities := make(map[*prog.Prog]int64) + for i := 0; i < sizeCorpus; i++ { + sizeSig := i + 1 + if sizeSig%250 == 0 { + sizeSig = 0 + } + inp := generateInput(target, rs, 10, sizeSig) + corpus.Save(inp) + priorities[inp.Prog] = int64(len(inp.Signal)) + } + counters := make(map[*prog.Prog]int) + for it := 0; it < maxIters; it++ { + counters[corpus.ChooseProgram(r)]++ + } + for p, prio := range priorities { + prob := float64(prio) / float64(corpus.sumPrios) + diff := math.Abs(prob*maxIters - float64(counters[p])) + if diff > eps*maxIters { + t.Fatalf("the difference (%f) is higher than %f%%", diff, eps*100) + } + } +} -- cgit mrf-deployment