diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2024-03-13 18:52:18 +0100 |
|---|---|---|
| committer | Aleksandr Nogikh <nogikh@google.com> | 2024-03-18 10:58:52 +0000 |
| commit | fc090d205d8c3d58f190659a98795d89421b7e6b (patch) | |
| tree | 2b33cca3e6366a1565d70fdce01a51d6e50f6448 | |
| parent | d615901c739a765329b688494cee2f8e1b5037cb (diff) | |
pkg/corpus: a separate package for the corpus functionality
pkg/fuzzer and syz-manager have a common corpus functionality that can
be well be unified.
Create a separate pkg/corpus package that would be used by both of them.
It will simplify further work of moving pkg/fuzzer to the host.
| -rw-r--r-- | pkg/corpus/corpus.go | 231 | ||||
| -rw-r--r-- | pkg/corpus/corpus_test.go | 98 | ||||
| -rw-r--r-- | pkg/corpus/minimize.go | 41 | ||||
| -rw-r--r-- | pkg/corpus/prio.go | 51 | ||||
| -rw-r--r-- | pkg/corpus/prio_test.go | 49 | ||||
| -rw-r--r-- | pkg/fuzzer/corpus.go | 114 | ||||
| -rw-r--r-- | pkg/fuzzer/corpus_test.go | 98 | ||||
| -rw-r--r-- | pkg/fuzzer/cover.go | 56 | ||||
| -rw-r--r-- | pkg/fuzzer/fuzzer.go | 27 | ||||
| -rw-r--r-- | pkg/fuzzer/fuzzer_test.go | 12 | ||||
| -rw-r--r-- | pkg/fuzzer/job.go | 34 | ||||
| -rw-r--r-- | pkg/rpctype/rpctype.go | 3 | ||||
| -rw-r--r-- | syz-fuzzer/fuzzer.go | 28 | ||||
| -rw-r--r-- | syz-manager/http.go | 67 | ||||
| -rw-r--r-- | syz-manager/hub.go | 4 | ||||
| -rw-r--r-- | syz-manager/manager.go | 212 | ||||
| -rw-r--r-- | syz-manager/rpc.go | 27 |
17 files changed, 709 insertions, 443 deletions
diff --git a/pkg/corpus/corpus.go b/pkg/corpus/corpus.go new file mode 100644 index 000000000..1d14ba026 --- /dev/null +++ b/pkg/corpus/corpus.go @@ -0,0 +1,231 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package corpus + +import ( + "context" + "sync" + + "github.com/google/syzkaller/pkg/cover" + "github.com/google/syzkaller/pkg/hash" + "github.com/google/syzkaller/pkg/rpctype" + "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/prog" +) + +// Corpus object represents a set of syzkaller-found programs that +// cover the kernel up to the currently reached frontiers. +type Corpus struct { + ctx context.Context + mu sync.RWMutex + progs map[string]*Item + signal signal.Signal // signal of inputs in corpus + updates chan<- NewItemEvent + ProgramsList +} + +func NewCorpus(ctx context.Context) *Corpus { + return NewMonitoredCorpus(ctx, nil) +} + +func NewMonitoredCorpus(ctx context.Context, updates chan<- NewItemEvent) *Corpus { + return &Corpus{ + ctx: ctx, + progs: make(map[string]*Item), + updates: updates, + } +} + +// It may happen that a single program is relevant because of several +// sysalls. In that case, there will be several ItemUpdate entities. +type ItemUpdate struct { + Call int + RawCover []uint32 +} + +// Item objects are to be treated as immutable, otherwise it's just +// too hard to synchonize accesses to them across the whole project. +// When Corpus updates one of its items, it saves a copy of it. +type Item struct { + Sig string + Call int + Prog *prog.Prog + ProgData []byte // to save some Serialize() calls + HasAny bool // whether the prog contains squashed arguments + Signal signal.Signal + Cover []uint32 + Updates []ItemUpdate +} + +func (item Item) StringCall() string { + return stringCall(item.Prog, item.Call) +} + +// RPCInputShort() does not include coverage. +func (item Item) RPCInputShort() rpctype.Input { + return rpctype.Input{ + Call: item.Call, + Prog: item.ProgData, + Signal: item.Signal.Serialize(), + } +} + +func stringCall(p *prog.Prog, call int) string { + if call != -1 { + return p.Calls[call].Meta.Name + } + return ".extra" +} + +type NewInput struct { + Prog *prog.Prog + Call int + Signal signal.Signal + Cover []uint32 + RawCover []uint32 +} + +func (item NewInput) StringCall() string { + return stringCall(item.Prog, item.Call) +} + +func (item NewInput) RPCInput() rpctype.Input { + return rpctype.Input{ + Call: item.Call, + Prog: item.Prog.Serialize(), + Signal: item.Signal.Serialize(), + Cover: item.Cover, + RawCover: item.RawCover, + } +} + +type NewItemEvent struct { + Sig string + Exists bool + ProgData []byte +} + +func (corpus *Corpus) Save(inp NewInput) { + progData := inp.Prog.Serialize() + sig := hash.String(progData) + + corpus.mu.Lock() + defer corpus.mu.Unlock() + + update := ItemUpdate{ + Call: inp.Call, + RawCover: inp.RawCover, + } + exists := false + if old, ok := corpus.progs[sig]; ok { + exists = true + newSignal := old.Signal.Copy() + newSignal.Merge(inp.Signal) + var newCover cover.Cover + newCover.Merge(old.Cover) + newCover.Merge(inp.Cover) + newItem := &Item{ + Sig: sig, + Prog: old.Prog, + ProgData: progData, + Call: old.Call, + HasAny: old.HasAny, + Signal: newSignal, + Cover: newCover.Serialize(), + Updates: append([]ItemUpdate{}, old.Updates...), + } + const maxUpdates = 32 + if len(newItem.Updates) < maxUpdates { + newItem.Updates = append(newItem.Updates, update) + } + corpus.progs[sig] = newItem + } else { + corpus.progs[sig] = &Item{ + Sig: sig, + Call: inp.Call, + Prog: inp.Prog, + ProgData: progData, + HasAny: inp.Prog.ContainsAny(), + Signal: inp.Signal, + Cover: inp.Cover, + Updates: []ItemUpdate{update}, + } + corpus.saveProgram(inp.Prog, inp.Signal) + } + corpus.signal.Merge(inp.Signal) + if corpus.updates != nil { + select { + case <-corpus.ctx.Done(): + case corpus.updates <- NewItemEvent{ + Sig: sig, + Exists: exists, + ProgData: progData, + }: + } + } +} + +func (corpus *Corpus) DiffSignal(s signal.Signal) signal.Signal { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + return corpus.signal.Diff(s) +} + +func (corpus *Corpus) Signal() signal.Signal { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + return corpus.signal.Copy() +} + +func (corpus *Corpus) Items() []*Item { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + ret := make([]*Item, 0, len(corpus.progs)) + for _, item := range corpus.progs { + ret = append(ret, item) + } + return ret +} + +func (corpus *Corpus) Item(sig string) *Item { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + return corpus.progs[sig] +} + +// Stat is a snapshot of the relevant current state figures. +type Stat struct { + Progs int + Signal int +} + +func (corpus *Corpus) Stat() Stat { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + return Stat{ + Progs: len(corpus.progs), + Signal: len(corpus.signal), + } +} + +type CallCov struct { + Count int + Cover cover.Cover +} + +func (corpus *Corpus) CallCover() map[string]*CallCov { + corpus.mu.RLock() + defer corpus.mu.RUnlock() + calls := make(map[string]*CallCov) + for _, inp := range corpus.progs { + call := inp.StringCall() + if calls[call] == nil { + calls[call] = new(CallCov) + } + cc := calls[call] + cc.Count++ + cc.Cover.Merge(inp.Cover) + } + return calls +} diff --git a/pkg/corpus/corpus_test.go b/pkg/corpus/corpus_test.go new file mode 100644 index 000000000..cce537087 --- /dev/null +++ b/pkg/corpus/corpus_test.go @@ -0,0 +1,98 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package corpus + +import ( + "context" + "math/rand" + "testing" + + "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/prog" + "github.com/google/syzkaller/sys/targets" + "github.com/stretchr/testify/assert" +) + +func TestCorpusOperation(t *testing.T) { + // Basic corpus functionality. + target := getTarget(t, targets.TestOS, targets.TestArch64) + ch := make(chan NewItemEvent) + corpus := NewMonitoredCorpus(context.Background(), ch) + + // First program is saved. + rs := rand.NewSource(0) + inp1 := generateInput(target, rs, 5, 5) + go corpus.Save(inp1) + event := <-ch + progData := inp1.Prog.Serialize() + assert.Equal(t, progData, event.ProgData) + assert.Equal(t, false, event.Exists) + + // Second program is saved for every its call. + inp2 := generateInput(target, rs, 5, 5) + progData = inp2.Prog.Serialize() + for i := 0; i < 5; i++ { + inp2.Call = i + go corpus.Save(inp2) + event := <-ch + assert.Equal(t, progData, event.ProgData) + assert.Equal(t, i != 0, event.Exists) + } + + // Verify that we can query corpus items. + items := corpus.Items() + assert.Len(t, items, 2) + for _, item := range items { + assert.Equal(t, item, corpus.Item(item.Sig)) + } + + // Verify the total signal. + assert.Len(t, corpus.Signal(), 5) + + corpus.Minimize(true) +} + +func TestCorpusSaveConcurrency(t *testing.T) { + target := getTarget(t, targets.TestOS, targets.TestArch64) + corpus := NewCorpus(context.Background()) + + const ( + routines = 10 + iters = 100 + ) + + for i := 0; i < routines; i++ { + go func() { + rs := rand.NewSource(0) + r := rand.New(rs) + for it := 0; it < iters; it++ { + inp := generateInput(target, rs, 10, it) + corpus.Save(inp) + corpus.ChooseProgram(r).Clone() + } + }() + } +} + +func generateInput(target *prog.Target, rs rand.Source, ncalls, sizeSig int) NewInput { + p := target.Generate(rs, ncalls, target.DefaultChoiceTable()) + var raw []uint32 + for i := 1; i <= sizeSig; i++ { + raw = append(raw, uint32(i)) + } + return NewInput{ + Prog: p, + Call: int(rs.Int63() % int64(len(p.Calls))), + Signal: signal.FromRaw(raw, 0), + } +} + +func getTarget(t *testing.T, os, arch string) *prog.Target { + t.Parallel() + target, err := prog.GetTarget(os, arch) + if err != nil { + t.Fatal(err) + } + return target +} diff --git a/pkg/corpus/minimize.go b/pkg/corpus/minimize.go new file mode 100644 index 000000000..b47816ccf --- /dev/null +++ b/pkg/corpus/minimize.go @@ -0,0 +1,41 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package corpus + +import ( + "sort" + + "github.com/google/syzkaller/pkg/signal" +) + +func (corpus *Corpus) Minimize(cover bool) { + corpus.mu.Lock() + defer corpus.mu.Unlock() + + inputs := make([]signal.Context, 0, len(corpus.progs)) + for _, inp := range corpus.progs { + inputs = append(inputs, signal.Context{ + Signal: inp.Signal, + Context: inp, + }) + } + + // Note: inputs are unsorted (based on map iteration). + // This gives some intentional non-determinism during minimization. + // However, we want to give preference to non-squashed inputs, + // so let's sort by this criteria. + sort.SliceStable(inputs, func(i, j int) bool { + firstAny := inputs[i].Context.(*Item).HasAny + secondAny := inputs[j].Context.(*Item).HasAny + return !firstAny && secondAny + }) + + corpus.progs = make(map[string]*Item) + corpus.ProgramsList = ProgramsList{} + for _, ctx := range signal.Minimize(inputs) { + inp := ctx.(*Item) + corpus.progs[inp.Sig] = inp + corpus.saveProgram(inp.Prog, inp.Signal) + } +} diff --git a/pkg/corpus/prio.go b/pkg/corpus/prio.go new file mode 100644 index 000000000..a4d85a518 --- /dev/null +++ b/pkg/corpus/prio.go @@ -0,0 +1,51 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package corpus + +import ( + "math/rand" + "sort" + "sync" + + "github.com/google/syzkaller/pkg/signal" + "github.com/google/syzkaller/prog" +) + +type ProgramsList struct { + mu sync.RWMutex + progs []*prog.Prog + sumPrios int64 + accPrios []int64 +} + +func (pl *ProgramsList) saveProgram(p *prog.Prog, signal signal.Signal) { + pl.mu.Lock() + defer pl.mu.Unlock() + prio := int64(len(signal)) + if prio == 0 { + prio = 1 + } + pl.sumPrios += prio + pl.accPrios = append(pl.accPrios, pl.sumPrios) + pl.progs = append(pl.progs, p) +} + +func (pl *ProgramsList) ChooseProgram(r *rand.Rand) *prog.Prog { + pl.mu.RLock() + defer pl.mu.RUnlock() + if len(pl.progs) == 0 { + return nil + } + randVal := r.Int63n(pl.sumPrios + 1) + idx := sort.Search(len(pl.accPrios), func(i int) bool { + return pl.accPrios[i] >= randVal + }) + return pl.progs[idx] +} + +func (pl *ProgramsList) Programs() []*prog.Prog { + pl.mu.RLock() + defer pl.mu.RUnlock() + return pl.progs +} diff --git a/pkg/corpus/prio_test.go b/pkg/corpus/prio_test.go new file mode 100644 index 000000000..3eec54bed --- /dev/null +++ b/pkg/corpus/prio_test.go @@ -0,0 +1,49 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package corpus + +import ( + "context" + "math" + "math/rand" + "testing" + + "github.com/google/syzkaller/prog" + "github.com/google/syzkaller/sys/targets" +) + +func TestChooseProgram(t *testing.T) { + rs := rand.NewSource(0) + r := rand.New(rs) + target := getTarget(t, targets.TestOS, targets.TestArch64) + corpus := NewCorpus(context.Background()) + + const ( + maxIters = 1000 + sizeCorpus = 1000 + eps = 0.01 + ) + + priorities := make(map[*prog.Prog]int64) + for i := 0; i < sizeCorpus; i++ { + sizeSig := i + 1 + if sizeSig%250 == 0 { + sizeSig = 0 + } + inp := generateInput(target, rs, 10, sizeSig) + corpus.Save(inp) + priorities[inp.Prog] = int64(len(inp.Signal)) + } + counters := make(map[*prog.Prog]int) + for it := 0; it < maxIters; it++ { + counters[corpus.ChooseProgram(r)]++ + } + for p, prio := range priorities { + prob := float64(prio) / float64(corpus.sumPrios) + diff := math.Abs(prob*maxIters - float64(counters[p])) + if diff > eps*maxIters { + t.Fatalf("the difference (%f) is higher than %f%%", diff, eps*100) + } + } +} diff --git a/pkg/fuzzer/corpus.go b/pkg/fuzzer/corpus.go deleted file mode 100644 index b92ab1c64..000000000 --- a/pkg/fuzzer/corpus.go +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright 2024 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package fuzzer - -import ( - "math/rand" - "sort" - "sync" - - "github.com/google/syzkaller/pkg/hash" - "github.com/google/syzkaller/pkg/signal" - "github.com/google/syzkaller/prog" -) - -type Corpus struct { - mu sync.RWMutex - progs []*prog.Prog - hashes map[hash.Sig]struct{} - sumPrios int64 - accPrios []int64 - signal signal.Signal // signal of inputs in corpus - maxSignal signal.Signal // max signal ever observed (including flakes) - newSignal signal.Signal -} - -// CorpusStat is a snapshot of the relevant current state figures. -type CorpusStat struct { - Progs int - Signal int - MaxSignal int -} - -func newCorpus() *Corpus { - return &Corpus{ - hashes: make(map[hash.Sig]struct{}), - } -} - -// TODO: maybe we want to treat progs from other fuzzers exactly like -// candidates? And even triage them? -func (corpus *Corpus) Save(p *prog.Prog, signal signal.Signal, sig hash.Sig) { - corpus.mu.Lock() - defer corpus.mu.Unlock() - if _, ok := corpus.hashes[sig]; !ok { - corpus.progs = append(corpus.progs, p) - corpus.hashes[sig] = struct{}{} - prio := int64(len(signal)) - if prio == 0 { - prio = 1 - } - corpus.sumPrios += prio - corpus.accPrios = append(corpus.accPrios, corpus.sumPrios) - } - corpus.signal.Merge(signal) - corpus.maxSignal.Merge(signal) -} - -// Signal that should no longer be chased after. -func (corpus *Corpus) AddMaxSignal(sign signal.Signal) { - // TODO: how do we ensure occasional drop of this max cover? - corpus.mu.Lock() - defer corpus.mu.Unlock() - corpus.maxSignal.Merge(sign) -} - -func (corpus *Corpus) AddRawMaxSignal(signal []uint32, prio uint8) signal.Signal { - corpus.mu.Lock() - defer corpus.mu.Unlock() - diff := corpus.maxSignal.DiffRaw(signal, prio) - if diff.Empty() { - return diff - } - corpus.maxSignal.Merge(diff) - corpus.newSignal.Merge(diff) - return diff -} - -func (corpus *Corpus) chooseProgram(r *rand.Rand) *prog.Prog { - corpus.mu.RLock() - defer corpus.mu.RUnlock() - if len(corpus.progs) == 0 { - return nil - } - randVal := r.Int63n(corpus.sumPrios + 1) - idx := sort.Search(len(corpus.accPrios), func(i int) bool { - return corpus.accPrios[i] >= randVal - }) - return corpus.progs[idx] -} - -func (corpus *Corpus) Programs() []*prog.Prog { - corpus.mu.RLock() - defer corpus.mu.RUnlock() - return corpus.progs -} - -func (corpus *Corpus) GrabNewSignal() signal.Signal { - corpus.mu.Lock() - defer corpus.mu.Unlock() - sign := corpus.newSignal - corpus.newSignal = nil - return sign -} - -func (corpus *Corpus) Stat() CorpusStat { - corpus.mu.RLock() - defer corpus.mu.RUnlock() - return CorpusStat{ - Progs: len(corpus.progs), - Signal: len(corpus.signal), - MaxSignal: len(corpus.maxSignal), - } -} diff --git a/pkg/fuzzer/corpus_test.go b/pkg/fuzzer/corpus_test.go deleted file mode 100644 index 0b62d8c5a..000000000 --- a/pkg/fuzzer/corpus_test.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2024 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package fuzzer - -import ( - "math" - "math/rand" - "testing" - - "github.com/google/syzkaller/pkg/hash" - "github.com/google/syzkaller/pkg/signal" - "github.com/google/syzkaller/prog" - "github.com/google/syzkaller/sys/targets" -) - -type InputTest struct { - p *prog.Prog - sign signal.Signal - sig hash.Sig -} - -func TestChooseProgram(t *testing.T) { - rs := rand.NewSource(0) - r := rand.New(rs) - target := getTarget(t, targets.TestOS, targets.TestArch64) - corpus := newCorpus() - - const ( - maxIters = 1000 - sizeCorpus = 1000 - eps = 0.01 - ) - - priorities := make(map[*prog.Prog]int64) - for i := 0; i < sizeCorpus; i++ { - sizeSig := i + 1 - if sizeSig%250 == 0 { - sizeSig = 0 - } - inp := generateInput(target, rs, 10, sizeSig) - corpus.Save(inp.p, inp.sign, inp.sig) - priorities[inp.p] = int64(len(inp.sign)) - } - counters := make(map[*prog.Prog]int) - for it := 0; it < maxIters; it++ { - counters[corpus.chooseProgram(r)]++ - } - for p, prio := range priorities { - prob := float64(prio) / float64(corpus.sumPrios) - diff := math.Abs(prob*maxIters - float64(counters[p])) - if diff > eps*maxIters { - t.Fatalf("the difference (%f) is higher than %f%%", diff, eps*100) - } - } -} - -func TestCorpusSaveConcurrency(t *testing.T) { - target := getTarget(t, targets.TestOS, targets.TestArch64) - corpus := newCorpus() - - const ( - routines = 10 - iters = 100 - ) - - for i := 0; i < routines; i++ { - go func() { - rs := rand.NewSource(0) - r := rand.New(rs) - for it := 0; it < iters; it++ { - inp := generateInput(target, rs, 10, it) - corpus.Save(inp.p, inp.sign, inp.sig) - corpus.chooseProgram(r).Clone() - } - }() - } -} - -func generateInput(target *prog.Target, rs rand.Source, ncalls, sizeSig int) (inp InputTest) { - inp.p = target.Generate(rs, ncalls, target.DefaultChoiceTable()) - var raw []uint32 - for i := 1; i <= sizeSig; i++ { - raw = append(raw, uint32(i)) - } - inp.sign = signal.FromRaw(raw, 0) - inp.sig = hash.Hash(inp.p.Serialize()) - return -} - -func getTarget(t *testing.T, os, arch string) *prog.Target { - t.Parallel() - target, err := prog.GetTarget(os, arch) - if err != nil { - t.Fatal(err) - } - return target -} diff --git a/pkg/fuzzer/cover.go b/pkg/fuzzer/cover.go new file mode 100644 index 000000000..cc18862bd --- /dev/null +++ b/pkg/fuzzer/cover.go @@ -0,0 +1,56 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package fuzzer + +import ( + "sync" + + "github.com/google/syzkaller/pkg/signal" +) + +// Cover keeps track of the signal known to the fuzzer. +type Cover struct { + mu sync.RWMutex + maxSignal signal.Signal // max signal ever observed (including flakes) + newSignal signal.Signal // newly identified max signal +} + +// Signal that should no longer be chased after. +func (cover *Cover) AddMaxSignal(sign signal.Signal) { + cover.mu.Lock() + defer cover.mu.Unlock() + cover.maxSignal.Merge(sign) +} + +func (cover *Cover) addRawMaxSignal(signal []uint32, prio uint8) signal.Signal { + cover.mu.Lock() + defer cover.mu.Unlock() + diff := cover.maxSignal.DiffRaw(signal, prio) + if diff.Empty() { + return diff + } + cover.maxSignal.Merge(diff) + cover.newSignal.Merge(diff) + return diff +} + +func (cover *Cover) GrabNewSignal() signal.Signal { + cover.mu.Lock() + defer cover.mu.Unlock() + sign := cover.newSignal + cover.newSignal = nil + return sign +} + +type CoverStat struct { + MaxSignal int +} + +func (cover *Cover) Stat() CoverStat { + cover.mu.RLock() + defer cover.mu.RUnlock() + return CoverStat{ + MaxSignal: len(cover.maxSignal), + } +} diff --git a/pkg/fuzzer/fuzzer.go b/pkg/fuzzer/fuzzer.go index 64be8325c..17513716a 100644 --- a/pkg/fuzzer/fuzzer.go +++ b/pkg/fuzzer/fuzzer.go @@ -12,15 +12,15 @@ import ( "sync/atomic" "time" + "github.com/google/syzkaller/pkg/corpus" "github.com/google/syzkaller/pkg/hash" "github.com/google/syzkaller/pkg/ipc" - "github.com/google/syzkaller/pkg/rpctype" "github.com/google/syzkaller/prog" ) type Fuzzer struct { Config *Config - Corpus *Corpus + Cover *Cover NeedCandidates chan struct{} ctx context.Context @@ -49,7 +49,7 @@ func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand, target *prog.Target) *Fuzzer { f := &Fuzzer{ Config: cfg, - Corpus: newCorpus(), + Cover: &Cover{}, NeedCandidates: make(chan struct{}, 1), ctx: ctx, @@ -75,6 +75,7 @@ func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand, type Config struct { Debug bool + Corpus *corpus.Corpus Logf func(level int, msg string, args ...interface{}) Coverage bool FaultInjection bool @@ -87,7 +88,7 @@ type Config struct { // If the number of queued candidates is less than MinCandidates, // NeedCandidates is triggered. MinCandidates uint - NewInputs chan rpctype.Input + NewInputs chan corpus.NewInput } type Request struct { @@ -133,7 +134,7 @@ func (fuzzer *Fuzzer) Done(req *Request, res *Result) { func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int, flags ProgTypes) { prio := signalPrio(p, info, call) - newMaxSignal := fuzzer.Corpus.AddRawMaxSignal(info.Signal, prio) + newMaxSignal := fuzzer.Cover.addRawMaxSignal(info.Signal, prio) if newMaxSignal.Empty() { return } @@ -304,12 +305,12 @@ func (fuzzer *Fuzzer) choiceTableUpdater() { return case <-fuzzer.ctRegenerate: } - fuzzer.updateChoiceTable(fuzzer.Corpus.Programs()) + fuzzer.updateChoiceTable(fuzzer.Config.Corpus.Programs()) } } func (fuzzer *Fuzzer) ChoiceTable() *prog.ChoiceTable { - progs := fuzzer.Corpus.Programs() + progs := fuzzer.Config.Corpus.Programs() fuzzer.ctMu.Lock() defer fuzzer.ctMu.Unlock() @@ -348,3 +349,15 @@ func (fuzzer *Fuzzer) logCurrentStats() { fuzzer.Logf(0, "%s", str) } } + +type Stat struct { + CoverStat + corpus.Stat +} + +func (fuzzer *Fuzzer) Stat() Stat { + return Stat{ + CoverStat: fuzzer.Cover.Stat(), + Stat: fuzzer.Config.Corpus.Stat(), + } +} diff --git a/pkg/fuzzer/fuzzer_test.go b/pkg/fuzzer/fuzzer_test.go index 1896b2b84..7000bb062 100644 --- a/pkg/fuzzer/fuzzer_test.go +++ b/pkg/fuzzer/fuzzer_test.go @@ -18,10 +18,10 @@ import ( "testing" "time" + "github.com/google/syzkaller/pkg/corpus" "github.com/google/syzkaller/pkg/csource" "github.com/google/syzkaller/pkg/ipc" "github.com/google/syzkaller/pkg/ipc/ipcconfig" - "github.com/google/syzkaller/pkg/rpctype" "github.com/google/syzkaller/pkg/testutil" "github.com/google/syzkaller/prog" "github.com/google/syzkaller/sys/targets" @@ -41,7 +41,8 @@ func TestFuzz(t *testing.T) { defer cancel() fuzzer := NewFuzzer(ctx, &Config{ - Debug: true, + Debug: true, + Corpus: corpus.NewCorpus(ctx), Logf: func(level int, msg string, args ...interface{}) { if level > 1 { return @@ -52,7 +53,7 @@ func TestFuzz(t *testing.T) { EnabledCalls: map[*prog.Syscall]bool{ target.SyscallMap["syz_test_fuzzer1"]: true, }, - NewInputs: make(chan rpctype.Input), + NewInputs: make(chan corpus.NewInput), }, rand.New(testutil.RandSource(t)), target) go func() { @@ -77,7 +78,7 @@ func TestFuzz(t *testing.T) { tf.wait() t.Logf("resulting corpus:") - for _, p := range fuzzer.Corpus.Programs() { + for _, p := range fuzzer.Config.Corpus.Programs() { t.Logf("-----") t.Logf("%s", p.Serialize()) } @@ -99,6 +100,7 @@ func BenchmarkFuzzer(b *testing.B) { calls[c] = true } fuzzer := NewFuzzer(ctx, &Config{ + Corpus: corpus.NewCorpus(ctx), Coverage: true, EnabledCalls: calls, }, rand.New(rand.NewSource(time.Now().UnixNano())), target) @@ -160,7 +162,7 @@ func (f *testFuzzer) oneMore() bool { defer f.mu.Unlock() f.iter++ if f.iter%100 == 0 { - stat := f.fuzzer.Corpus.Stat() + stat := f.fuzzer.Stat() f.t.Logf("<iter %d>: corpus %d, signal %d, max signal %d, crash types %d", f.iter, stat.Progs, stat.Signal, stat.MaxSignal, len(f.crashes)) } diff --git a/pkg/fuzzer/job.go b/pkg/fuzzer/job.go index a0295affb..63493c914 100644 --- a/pkg/fuzzer/job.go +++ b/pkg/fuzzer/job.go @@ -7,10 +7,9 @@ import ( "fmt" "math/rand" + "github.com/google/syzkaller/pkg/corpus" "github.com/google/syzkaller/pkg/cover" - "github.com/google/syzkaller/pkg/hash" "github.com/google/syzkaller/pkg/ipc" - "github.com/google/syzkaller/pkg/rpctype" "github.com/google/syzkaller/pkg/signal" "github.com/google/syzkaller/prog" ) @@ -71,7 +70,7 @@ func genProgRequest(fuzzer *Fuzzer, rnd *rand.Rand) *Request { } func mutateProgRequest(fuzzer *Fuzzer, rnd *rand.Rand) *Request { - p := fuzzer.Corpus.chooseProgram(rnd) + p := fuzzer.Config.Corpus.ChooseProgram(rnd) if p == nil { return nil } @@ -80,7 +79,7 @@ func mutateProgRequest(fuzzer *Fuzzer, rnd *rand.Rand) *Request { prog.RecommendedCalls, fuzzer.ChoiceTable(), fuzzer.Config.NoMutateCalls, - fuzzer.Corpus.Programs(), + fuzzer.Config.Corpus.Programs(), ) return &Request{ Prog: newP, @@ -126,10 +125,9 @@ func triageJobPrio(flags ProgTypes) jobPriority { } func (job *triageJob) run(fuzzer *Fuzzer) { - callName := ".extra" logCallName := "extra" if job.call != -1 { - callName = job.p.Calls[job.call].Meta.Name + callName := job.p.Calls[job.call].Meta.Name logCallName = fmt.Sprintf("call #%v %v", job.call, callName) } fuzzer.Logf(3, "triaging input for %v (new signal=%v)", logCallName, job.newSignal.Len()) @@ -144,9 +142,7 @@ func (job *triageJob) run(fuzzer *Fuzzer) { return } } - data := job.p.Serialize() - fuzzer.Logf(2, "added new input for %q to the corpus:\n%s", - logCallName, string(data)) + fuzzer.Logf(2, "added new input for %q to the corpus:\n%s", logCallName, job.p.String()) if job.flags&ProgSmashed == 0 { fuzzer.startJob(&smashJob{ p: job.p.Clone(), @@ -154,18 +150,18 @@ func (job *triageJob) run(fuzzer *Fuzzer) { jobPriority: newJobPriority(smashPrio), }) } - fuzzer.Corpus.Save(job.p, info.stableSignal, hash.Hash(data)) + input := corpus.NewInput{ + Prog: job.p, + Call: job.call, + Signal: info.stableSignal, + Cover: info.cover.Serialize(), + RawCover: info.rawCover, + } + fuzzer.Config.Corpus.Save(input) if fuzzer.Config.NewInputs != nil { select { case <-fuzzer.ctx.Done(): - case fuzzer.Config.NewInputs <- rpctype.Input{ - Call: callName, - CallID: job.call, - Prog: data, - Signal: info.stableSignal.Serialize(), - Cover: info.cover.Serialize(), - RawCover: info.rawCover, - }: + case fuzzer.Config.NewInputs <- input: } } } @@ -298,7 +294,7 @@ func (job *smashJob) run(fuzzer *Fuzzer) { p.Mutate(rnd, prog.RecommendedCalls, fuzzer.ChoiceTable(), fuzzer.Config.NoMutateCalls, - fuzzer.Corpus.Programs()) + fuzzer.Config.Corpus.Programs()) result := fuzzer.exec(job, &Request{ Prog: p, NeedSignal: true, diff --git a/pkg/rpctype/rpctype.go b/pkg/rpctype/rpctype.go index 3b1244662..efd9d6589 100644 --- a/pkg/rpctype/rpctype.go +++ b/pkg/rpctype/rpctype.go @@ -14,11 +14,10 @@ import ( ) type Input struct { - Call string + Call int // seq number of call in the prog to which the item is related (-1 for extra) Prog []byte Signal signal.Serial Cover []uint32 - CallID int // seq number of call in the prog to which the item is related (-1 for extra) RawCover []uint32 } diff --git a/syz-fuzzer/fuzzer.go b/syz-fuzzer/fuzzer.go index 1adec40d4..0b4970b23 100644 --- a/syz-fuzzer/fuzzer.go +++ b/syz-fuzzer/fuzzer.go @@ -17,9 +17,9 @@ import ( "sync/atomic" "time" + "github.com/google/syzkaller/pkg/corpus" "github.com/google/syzkaller/pkg/csource" "github.com/google/syzkaller/pkg/fuzzer" - "github.com/google/syzkaller/pkg/hash" "github.com/google/syzkaller/pkg/host" "github.com/google/syzkaller/pkg/ipc" "github.com/google/syzkaller/pkg/ipc/ipcconfig" @@ -230,6 +230,7 @@ func main() { calls[target.Syscalls[id]] = true } fuzzerObj := fuzzer.NewFuzzer(context.Background(), &fuzzer.Config{ + Corpus: corpus.NewCorpus(context.Background()), Coverage: config.Flags&ipc.FlagSignal > 0, FaultInjection: r.CheckResult.Features[host.FeatureFault].Enabled, Comparisons: r.CheckResult.Features[host.FeatureComparisons].Enabled, @@ -239,7 +240,7 @@ func main() { LeakChecking: r.CheckResult.Features[host.FeatureLeak].Enabled, FetchRawCover: *flagRawCover, MinCandidates: uint(*flagProcs * 2), - NewInputs: make(chan rpctype.Input), + NewInputs: make(chan corpus.NewInput), }, rnd, target) fuzzerTool := &FuzzerTool{ @@ -270,7 +271,7 @@ func main() { for needCandidates, more := true, true; more; needCandidates = false { more = fuzzerTool.poll(needCandidates, nil) // This loop lead to "no output" in qemu emulation, tell manager we are not dead. - stat := fuzzerObj.Corpus.Stat() + stat := fuzzerObj.Stat() log.Logf(0, "fetching corpus: %v, signal %v/%v (executing program)", stat.Progs, stat.Signal, stat.MaxSignal) } @@ -289,7 +290,7 @@ func main() { } // Start send input workers. for i := 0; i < *flagProcs*2; i++ { - go fuzzerTool.sendInputsWorker() + go fuzzerTool.sendInputsWorker(fuzzerObj.Config.NewInputs) } fuzzerTool.pollLoop() } @@ -388,7 +389,7 @@ func (tool *FuzzerTool) poll(needCandidates bool, stats map[string]uint64) bool a := &rpctype.PollArgs{ Name: tool.name, NeedCandidates: needCandidates, - MaxSignal: fuzzer.Corpus.GrabNewSignal().Serialize(), + MaxSignal: fuzzer.Cover.GrabNewSignal().Serialize(), Stats: stats, } r := &rpctype.PollRes{} @@ -398,7 +399,7 @@ func (tool *FuzzerTool) poll(needCandidates bool, stats map[string]uint64) bool maxSignal := r.MaxSignal.Deserialize() log.Logf(1, "poll: candidates=%v inputs=%v signal=%v", len(r.Candidates), len(r.NewInputs), maxSignal.Len()) - fuzzer.Corpus.AddMaxSignal(maxSignal) + fuzzer.Cover.AddMaxSignal(maxSignal) for _, inp := range r.NewInputs { tool.inputFromOtherFuzzer(inp) } @@ -409,11 +410,11 @@ func (tool *FuzzerTool) poll(needCandidates bool, stats map[string]uint64) bool return len(r.NewInputs) != 0 || len(r.Candidates) != 0 || maxSignal.Len() != 0 } -func (tool *FuzzerTool) sendInputsWorker() { - for inp := range tool.fuzzer.Config.NewInputs { +func (tool *FuzzerTool) sendInputsWorker(ch <-chan corpus.NewInput) { + for update := range ch { a := &rpctype.NewInputArgs{ Name: tool.name, - Input: inp, + Input: update.RPCInput(), } if err := tool.manager.Call("Manager.NewInput", a, nil); err != nil { log.SyzFatalf("Manager.NewInput call failed: %v", err) @@ -454,9 +455,12 @@ func (tool *FuzzerTool) inputFromOtherFuzzer(inp rpctype.Input) { if p == nil { return } - tool.fuzzer.Corpus.Save(p, - inp.Signal.Deserialize(), - hash.Hash(inp.Prog)) + tool.fuzzer.Config.Corpus.Save(corpus.NewInput{ + Prog: p, + Call: inp.Call, + Signal: inp.Signal.Deserialize(), + Cover: inp.Cover, + }) } func (tool *FuzzerTool) deserializeInput(inp []byte) *prog.Prog { diff --git a/syz-manager/http.go b/syz-manager/http.go index 5578705bf..7f2c037ff 100644 --- a/syz-manager/http.go +++ b/syz-manager/http.go @@ -24,7 +24,6 @@ import ( "github.com/google/syzkaller/pkg/html/pages" "github.com/google/syzkaller/pkg/log" "github.com/google/syzkaller/pkg/osutil" - "github.com/google/syzkaller/pkg/signal" "github.com/google/syzkaller/pkg/vcs" "github.com/google/syzkaller/prog" "github.com/gorilla/handlers" @@ -106,8 +105,8 @@ func (mgr *Manager) httpSyscalls(w http.ResponseWriter, r *http.Request) { data.Calls = append(data.Calls, UICallType{ Name: c, ID: syscallID, - Inputs: cc.count, - Cover: len(cc.cov), + Inputs: cc.Count, + Cover: len(cc.Cover), }) } sort.Slice(data.Calls, func(i, j int) bool { @@ -131,7 +130,7 @@ func (mgr *Manager) collectStats() []UIStat { {Name: "config", Value: configName, Link: "/config"}, {Name: "uptime", Value: fmt.Sprint(time.Since(mgr.startTime) / 1e9 * 1e9)}, {Name: "fuzzing", Value: fmt.Sprint(mgr.fuzzingTime / 60e9 * 60e9)}, - {Name: "corpus", Value: fmt.Sprint(len(mgr.corpus)), Link: "/corpus"}, + {Name: "corpus", Value: fmt.Sprint(mgr.corpus.Stat().Progs), Link: "/corpus"}, {Name: "triage queue", Value: fmt.Sprint(len(mgr.candidates))}, {Name: "signal", Value: fmt.Sprint(rawStats["signal"])}, {Name: "coverage", Value: fmt.Sprint(rawStats["coverage"]), Link: "/cover"}, @@ -203,18 +202,13 @@ func (mgr *Manager) httpCorpus(w http.ResponseWriter, r *http.Request) { Call: r.FormValue("call"), RawCover: mgr.cfg.RawCover, } - for sig, inp := range mgr.corpus { - if data.Call != "" && data.Call != inp.Call { + for _, inp := range mgr.corpus.Items() { + if data.Call != "" && data.Call != inp.StringCall() { continue } - p, err := mgr.target.Deserialize(inp.Prog, prog.NonStrict) - if err != nil { - http.Error(w, fmt.Sprintf("failed to deserialize program: %v", err), http.StatusInternalServerError) - return - } data.Inputs = append(data.Inputs, &UIInput{ - Sig: sig, - Short: p.String(), + Sig: inp.Sig, + Short: inp.Prog.String(), Cover: len(inp.Cover), }) } @@ -312,7 +306,11 @@ func (mgr *Manager) httpCoverCover(w http.ResponseWriter, r *http.Request, funcF mgr.mu.Lock() var progs []cover.Prog if sig := r.FormValue("input"); sig != "" { - inp := mgr.corpus[sig] + inp := mgr.corpus.Item(sig) + if inp == nil { + http.Error(w, "unknown input hash", http.StatusInternalServerError) + return + } if r.FormValue("update_id") != "" { updateID, err := strconv.Atoi(r.FormValue("update_id")) if err != nil || updateID < 0 || updateID >= len(inp.Updates) { @@ -321,25 +319,25 @@ func (mgr *Manager) httpCoverCover(w http.ResponseWriter, r *http.Request, funcF } progs = append(progs, cover.Prog{ Sig: sig, - Data: string(inp.Prog), + Data: string(inp.ProgData), PCs: coverToPCs(rg, inp.Updates[updateID].RawCover), }) } else { progs = append(progs, cover.Prog{ Sig: sig, - Data: string(inp.Prog), + Data: string(inp.ProgData), PCs: coverToPCs(rg, inp.Cover), }) } } else { call := r.FormValue("call") - for sig, inp := range mgr.corpus { - if call != "" && call != inp.Call { + for _, inp := range mgr.corpus.Items() { + if call != "" && call != inp.StringCall() { continue } progs = append(progs, cover.Prog{ - Sig: sig, - Data: string(inp.Prog), + Sig: inp.Sig, + Data: string(inp.ProgData), PCs: coverToPCs(rg, inp.Cover), }) } @@ -387,12 +385,8 @@ func (mgr *Manager) httpCoverCover(w http.ResponseWriter, r *http.Request, funcF func (mgr *Manager) httpCoverFallback(w http.ResponseWriter, r *http.Request) { mgr.mu.Lock() defer mgr.mu.Unlock() - var maxSignal signal.Signal - for _, inp := range mgr.corpus { - maxSignal.Merge(inp.Signal.Deserialize()) - } calls := make(map[int][]int) - for s := range maxSignal { + for s := range mgr.corpus.Signal() { id, errno := prog.DecodeFallbackSignal(uint32(s)) calls[id] = append(calls[id], errno) } @@ -437,13 +431,8 @@ func (mgr *Manager) httpPrio(w http.ResponseWriter, r *http.Request) { } var corpus []*prog.Prog - for _, inp := range mgr.corpus { - p, err := mgr.target.Deserialize(inp.Prog, prog.NonStrict) - if err != nil { - http.Error(w, fmt.Sprintf("failed to deserialize program: %v", err), http.StatusInternalServerError) - return - } - corpus = append(corpus, p) + for _, inp := range mgr.corpus.Items() { + corpus = append(corpus, inp.Prog) } prios := mgr.target.CalculatePriorities(corpus) @@ -477,34 +466,34 @@ func (mgr *Manager) httpFile(w http.ResponseWriter, r *http.Request) { func (mgr *Manager) httpInput(w http.ResponseWriter, r *http.Request) { mgr.mu.Lock() defer mgr.mu.Unlock() - inp, ok := mgr.corpus[r.FormValue("sig")] - if !ok { + inp := mgr.corpus.Item(r.FormValue("sig")) + if inp == nil { http.Error(w, "can't find the input", http.StatusInternalServerError) return } w.Header().Set("Content-Type", "text/plain; charset=utf-8") - w.Write(inp.Prog) + w.Write(inp.ProgData) } func (mgr *Manager) httpDebugInput(w http.ResponseWriter, r *http.Request) { mgr.mu.Lock() defer mgr.mu.Unlock() - inp, ok := mgr.corpus[r.FormValue("sig")] - if !ok { + inp := mgr.corpus.Item(r.FormValue("sig")) + if inp == nil { http.Error(w, "can't find the input", http.StatusInternalServerError) return } getIDs := func(callID int) []int { ret := []int{} for id, update := range inp.Updates { - if update.CallID == callID { + if update.Call == callID { ret = append(ret, id) } } return ret } data := []UIRawCallCover{} - for pos, line := range strings.Split(string(inp.Prog), "\n") { + for pos, line := range strings.Split(string(inp.ProgData), "\n") { line = strings.TrimSpace(line) if line == "" { continue diff --git a/syz-manager/hub.go b/syz-manager/hub.go index c10011818..d06a0cd0e 100644 --- a/syz-manager/hub.go +++ b/syz-manager/hub.go @@ -215,7 +215,7 @@ func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus [][]byte) error { func (hc *HubConnector) processProgs(inputs []rpctype.HubInput) (minimized, smashed, dropped int) { candidates := make([]rpctype.Candidate, 0, len(inputs)) for _, inp := range inputs { - bad, disabled, _ := checkProgram(hc.target, hc.enabledCalls, inp.Prog) + _, disabled, bad := parseProgram(hc.target, hc.enabledCalls, inp.Prog) if bad != nil || disabled { log.Logf(0, "rejecting program from hub (bad=%v, disabled=%v):\n%s", bad, disabled, inp) @@ -268,7 +268,7 @@ func splitDomains(domain string) (string, string) { func (hc *HubConnector) processRepros(repros [][]byte) int { dropped := 0 for _, repro := range repros { - bad, disabled, _ := checkProgram(hc.target, hc.enabledCalls, repro) + _, disabled, bad := parseProgram(hc.target, hc.enabledCalls, repro) if bad != nil || disabled { log.Logf(0, "rejecting repro from hub (bad=%v, disabled=%v):\n%s", bad, disabled, repro) diff --git a/syz-manager/manager.go b/syz-manager/manager.go index 3c09e9a0b..ca35d6660 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -5,6 +5,7 @@ package main import ( "bytes" + "context" "encoding/json" "flag" "fmt" @@ -14,14 +15,13 @@ import ( "os" "os/exec" "path/filepath" - "sort" "sync" "sync/atomic" "time" "github.com/google/syzkaller/dashboard/dashapi" "github.com/google/syzkaller/pkg/asset" - "github.com/google/syzkaller/pkg/cover" + "github.com/google/syzkaller/pkg/corpus" "github.com/google/syzkaller/pkg/csource" "github.com/google/syzkaller/pkg/db" "github.com/google/syzkaller/pkg/gce" @@ -35,7 +35,6 @@ import ( crash_pkg "github.com/google/syzkaller/pkg/report/crash" "github.com/google/syzkaller/pkg/repro" "github.com/google/syzkaller/pkg/rpctype" - "github.com/google/syzkaller/pkg/signal" "github.com/google/syzkaller/prog" "github.com/google/syzkaller/sys/targets" "github.com/google/syzkaller/vm" @@ -55,7 +54,9 @@ type Manager struct { reporter *report.Reporter crashdir string serv *RPCServer + corpus *corpus.Corpus corpusDB *db.DB + corpusDBMu sync.Mutex // for concurrent operations on corpusDB startTime time.Time firstConnect time.Time fuzzingTime time.Duration @@ -75,7 +76,6 @@ type Manager struct { candidates []rpctype.Candidate // untriaged inputs from corpus and hub disabledHashes map[string]struct{} - corpus map[string]CorpusItem seeds [][]byte newRepros [][]byte lastMinCorpus int @@ -100,29 +100,6 @@ type Manager struct { assetStorage *asset.Storage } -type CorpusItemUpdate struct { - CallID int - RawCover []uint32 -} - -type CorpusItem struct { - Call string - Prog []byte - HasAny bool // whether the prog contains squashed arguments - Signal signal.Serial - Cover []uint32 - Updates []CorpusItemUpdate -} - -func (item *CorpusItem) RPCInput() rpctype.Input { - return rpctype.Input{ - Call: item.Call, - Prog: item.Prog, - Signal: item.Signal, - Cover: item.Cover, - } -} - const ( // Just started, nothing done yet. phaseInit = iota @@ -186,9 +163,11 @@ func RunManager(cfg *mgrconfig.Config) { log.Fatalf("%v", err) } + corpusUpdates := make(chan corpus.NewItemEvent, 32) mgr := &Manager{ cfg: cfg, vmPool: vmPool, + corpus: corpus.NewMonitoredCorpus(context.Background(), corpusUpdates), target: cfg.Target, sysTarget: cfg.SysTarget, reporter: reporter, @@ -196,7 +175,6 @@ func RunManager(cfg *mgrconfig.Config) { startTime: time.Now(), stats: &Stats{haveHub: cfg.HubClient != ""}, crashTypes: make(map[string]bool), - corpus: make(map[string]CorpusItem), disabledHashes: make(map[string]struct{}), memoryLeakFrames: make(map[string]bool), dataRaceFrames: make(map[string]bool), @@ -213,6 +191,7 @@ func RunManager(cfg *mgrconfig.Config) { mgr.initStats() // Initializes prometheus variables. mgr.initHTTP() // Creates HTTP server. mgr.collectUsedFiles() + go mgr.saveCorpus(corpusUpdates) // Create RPC server for fuzzers. mgr.serv, err = startRPCServer(mgr) @@ -295,8 +274,9 @@ func (mgr *Manager) initBench() { mgr.mu.Unlock() continue } - mgr.minimizeCorpus() - vals["corpus"] = uint64(len(mgr.corpus)) + mgr.minimizeCorpusUnlocked() + stat := mgr.corpus.Stat() + vals["corpus"] = uint64(stat.Progs) vals["uptime"] = uint64(time.Since(mgr.firstConnect)) / 1e9 vals["fuzzing"] = uint64(mgr.fuzzingTime) / 1e9 vals["candidates"] = uint64(len(mgr.candidates)) @@ -687,7 +667,7 @@ func (mgr *Manager) loadCorpus() { } func (mgr *Manager) loadProg(data []byte, minimized, smashed bool) bool { - bad, disabled, _ := checkProgram(mgr.target, mgr.targetEnabledSyscalls, data) + _, disabled, bad := parseProgram(mgr.target, mgr.targetEnabledSyscalls, data) if bad != nil { return false } @@ -736,29 +716,27 @@ func programLeftover(target *prog.Target, enabled map[*prog.Syscall]bool, data [ return p.Serialize() } -// The linter complains about error not being the last argument. -// nolint: stylecheck -func checkProgram(target *prog.Target, enabled map[*prog.Syscall]bool, data []byte) (bad error, disabled, hasAny bool) { - p, err := target.Deserialize(data, prog.NonStrict) +func parseProgram(target *prog.Target, enabled map[*prog.Syscall]bool, data []byte) ( + p *prog.Prog, disabled bool, err error) { + p, err = target.Deserialize(data, prog.NonStrict) if err != nil { - return err, true, false + return } if len(p.Calls) > prog.MaxCalls { - return fmt.Errorf("longer than %d calls", prog.MaxCalls), true, false + return nil, false, fmt.Errorf("longer than %d calls", prog.MaxCalls) } // For some yet unknown reasons, programs with fail_nth > 0 may sneak in. Ignore them. for _, call := range p.Calls { if call.Props.FailNth > 0 { - return fmt.Errorf("input has fail_nth > 0"), true, false + return nil, false, fmt.Errorf("input has fail_nth > 0") } } - hasAny = p.ContainsAny() for _, c := range p.Calls { if !enabled[c.Meta] { - return nil, true, hasAny + return p, true, nil } } - return nil, false, hasAny + return p, false, nil } func (mgr *Manager) runInstance(index int) (*Crash, error) { @@ -1224,13 +1202,29 @@ func fullReproLog(stats *repro.Stats) []byte { stats.SimplifyProgTime, stats.ExtractCTime, stats.SimplifyCTime, stats.Log)) } +func (mgr *Manager) saveCorpus(updates <-chan corpus.NewItemEvent) { + for update := range updates { + if update.Exists { + // We only save new progs into the corpus.db file. + continue + } + mgr.corpusDBMu.Lock() + mgr.corpusDB.Save(update.Sig, update.ProgData, 0) + if err := mgr.corpusDB.Flush(); err != nil { + log.Errorf("failed to save corpus database: %v", err) + } + mgr.corpusDBMu.Unlock() + } +} + func (mgr *Manager) getMinimizedCorpus() (corpus, repros [][]byte) { mgr.mu.Lock() defer mgr.mu.Unlock() - mgr.minimizeCorpus() - corpus = make([][]byte, 0, len(mgr.corpus)) - for _, inp := range mgr.corpus { - corpus = append(corpus, inp.Prog) + mgr.minimizeCorpusUnlocked() + items := mgr.corpus.Items() + corpus = make([][]byte, 0, len(items)) + for _, inp := range items { + corpus = append(corpus, inp.ProgData) } repros = mgr.newRepros mgr.newRepros = nil @@ -1252,46 +1246,26 @@ func (mgr *Manager) addNewCandidates(candidates []rpctype.Candidate) { } } -func (mgr *Manager) minimizeCorpus() { - if mgr.phase < phaseLoadedCorpus || len(mgr.corpus) <= mgr.lastMinCorpus*103/100 { +func (mgr *Manager) minimizeCorpusUnlocked() { + currSize := mgr.corpus.Stat().Progs + if mgr.phase < phaseLoadedCorpus || currSize <= mgr.lastMinCorpus*103/100 { return } - inputs := make([]signal.Context, 0, len(mgr.corpus)) - for _, inp := range mgr.corpus { - inputs = append(inputs, signal.Context{ - Signal: inp.Signal.Deserialize(), - Context: inp, - }) - } - - // Note: inputs are unsorted (based on map iteration). - // This gives some intentional non-determinism during minimization. - // However, we want to give preference to non-squashed inputs, - // so let's sort by this criteria. - sort.SliceStable(inputs, func(i, j int) bool { - firstAny := inputs[i].Context.(CorpusItem).HasAny - secondAny := inputs[j].Context.(CorpusItem).HasAny - return !firstAny && secondAny - }) + mgr.corpus.Minimize(mgr.cfg.Cover) + newSize := mgr.corpus.Stat().Progs - newCorpus := make(map[string]CorpusItem) - for _, ctx := range signal.Minimize(inputs) { - inp := ctx.(CorpusItem) - newCorpus[hash.String(inp.Prog)] = inp - } - log.Logf(1, "minimized corpus: %v -> %v", len(mgr.corpus), len(newCorpus)) - mgr.corpus = newCorpus - mgr.lastMinCorpus = len(newCorpus) + log.Logf(1, "minimized corpus: %v -> %v", currSize, newSize) + mgr.lastMinCorpus = newSize // From time to time we get corpus explosion due to different reason: // generic bugs, per-OS bugs, problems with fallback coverage, kcov bugs, etc. // This has bad effect on the instance and especially on instances // connected via hub. Do some per-syscall sanity checking to prevent this. - for call, info := range mgr.collectSyscallInfoUnlocked() { + for call, info := range mgr.corpus.CallCover() { if mgr.cfg.Cover { // If we have less than 1K inputs per this call, // accept all new inputs unconditionally. - if info.count < 1000 { + if info.Count < 1000 { continue } // If we have more than 3K already, don't accept any more. @@ -1299,13 +1273,13 @@ func (mgr *Manager) minimizeCorpus() { // Empirically, real coverage for the most saturated syscalls is ~30-60 // per program (even when we have a thousand of them). For explosion // case coverage tend to be much lower (~0.3-5 per program). - if info.count < 3000 && len(info.cov)/info.count >= 10 { + if info.Count < 3000 && len(info.Cover)/info.Count >= 10 { continue } } else { // If we don't have real coverage, signal is weak. // If we have more than several hundreds, there is something wrong. - if info.count < 300 { + if info.Count < 300 { continue } } @@ -1320,8 +1294,10 @@ func (mgr *Manager) minimizeCorpus() { if mgr.phase < phaseTriagedCorpus { return } + mgr.corpusDBMu.Lock() + defer mgr.corpusDBMu.Unlock() for key := range mgr.corpusDB.Records { - _, ok1 := mgr.corpus[key] + ok1 := mgr.corpus.Item(key) != nil _, ok2 := mgr.disabledHashes[key] if !ok1 && !ok2 { mgr.corpusDB.Delete(key) @@ -1336,32 +1312,21 @@ func setGuiltyFiles(crash *dashapi.Crash, report *report.Report) { } } -type CallCov struct { - count int - cov cover.Cover -} - -func (mgr *Manager) collectSyscallInfo() map[string]*CallCov { +func (mgr *Manager) collectSyscallInfo() map[string]*corpus.CallCov { mgr.mu.Lock() - defer mgr.mu.Unlock() - return mgr.collectSyscallInfoUnlocked() -} + checkResult := mgr.checkResult + mgr.mu.Unlock() -func (mgr *Manager) collectSyscallInfoUnlocked() map[string]*CallCov { - if mgr.checkResult == nil { + if checkResult == nil { return nil } - calls := make(map[string]*CallCov) - for _, call := range mgr.checkResult.EnabledCalls[mgr.cfg.Sandbox] { - calls[mgr.target.Syscalls[call].Name] = new(CallCov) - } - for _, inp := range mgr.corpus { - if calls[inp.Call] == nil { - calls[inp.Call] = new(CallCov) + calls := mgr.corpus.CallCover() + // Add enabled, but not yet covered calls. + for _, call := range checkResult.EnabledCalls[mgr.cfg.Sandbox] { + key := mgr.target.Syscalls[call].Name + if calls[key] == nil { + calls[key] = new(corpus.CallCov) } - cc := calls[inp.Call] - cc.count++ - cc.cov.Merge(inp.Cover) } return calls } @@ -1371,10 +1336,11 @@ func (mgr *Manager) fuzzerConnect(modules []host.KernelModule) ( mgr.mu.Lock() defer mgr.mu.Unlock() - mgr.minimizeCorpus() - corpus := make([]rpctype.Input, 0, len(mgr.corpus)) - for _, inp := range mgr.corpus { - corpus = append(corpus, inp.RPCInput()) + mgr.minimizeCorpusUnlocked() + items := mgr.corpus.Items() + corpus := make([]rpctype.Input, 0, len(items)) + for _, inp := range items { + corpus = append(corpus, inp.RPCInputShort()) } frames := BugFrames{ memoryLeaks: make([]string, 0, len(mgr.memoryLeakFrames)), @@ -1408,45 +1374,14 @@ func (mgr *Manager) machineChecked(a *rpctype.CheckArgs, enabledSyscalls map[*pr mgr.firstConnect = time.Now() } -func (mgr *Manager) newInput(inp rpctype.Input, sign signal.Signal, hasAny bool) bool { +func (mgr *Manager) newInput(inp corpus.NewInput) bool { mgr.mu.Lock() defer mgr.mu.Unlock() - if mgr.saturatedCalls[inp.Call] { + if mgr.saturatedCalls[inp.StringCall()] { + // TODO: move this logic to pkg/corpus or pkg/fuzzer? return false } - update := CorpusItemUpdate{ - CallID: inp.CallID, - RawCover: inp.RawCover, - } - sig := hash.String(inp.Prog) - if old, ok := mgr.corpus[sig]; ok { - // The input is already present, but possibly with diffent signal/coverage/call. - sign.Merge(old.Signal.Deserialize()) - old.Signal = sign.Serialize() - var cov cover.Cover - cov.Merge(old.Cover) - cov.Merge(inp.Cover) - old.Cover = cov.Serialize() - const maxUpdates = 32 - old.Updates = append(old.Updates, update) - if len(old.Updates) > maxUpdates { - old.Updates = old.Updates[:maxUpdates] - } - mgr.corpus[sig] = old - } else { - mgr.corpus[sig] = CorpusItem{ - Call: inp.Call, - Prog: inp.Prog, - HasAny: hasAny, - Signal: inp.Signal, - Cover: inp.Cover, - Updates: []CorpusItemUpdate{update}, - } - mgr.corpusDB.Save(sig, inp.Prog, 0) - if err := mgr.corpusDB.Flush(); err != nil { - log.Errorf("failed to save corpus database: %v", err) - } - } + mgr.corpus.Save(inp) return true } @@ -1553,11 +1488,12 @@ func (mgr *Manager) dashboardReporter() { crashes := mgr.stats.crashes.get() suppressedCrashes := mgr.stats.crashSuppressed.get() execs := mgr.stats.execTotal.get() + corpusStat := mgr.corpus.Stat() req := &dashapi.ManagerStatsReq{ Name: mgr.cfg.Name, Addr: webAddr, UpTime: time.Since(mgr.firstConnect), - Corpus: uint64(len(mgr.corpus)), + Corpus: uint64(corpusStat.Progs), PCs: mgr.stats.corpusCover.get(), Cover: mgr.stats.corpusSignal.get(), CrashTypes: mgr.stats.crashTypes.get(), diff --git a/syz-manager/rpc.go b/syz-manager/rpc.go index dfa050334..0fdaf3b46 100644 --- a/syz-manager/rpc.go +++ b/syz-manager/rpc.go @@ -10,6 +10,7 @@ import ( "sync" "time" + "github.com/google/syzkaller/pkg/corpus" "github.com/google/syzkaller/pkg/cover" "github.com/google/syzkaller/pkg/host" "github.com/google/syzkaller/pkg/log" @@ -30,9 +31,13 @@ type RPCServer struct { batchSize int canonicalModules *cover.Canonicalizer - mu sync.Mutex - fuzzers map[string]*Fuzzer - checkResult *rpctype.CheckArgs + mu sync.Mutex + fuzzers map[string]*Fuzzer + checkResult *rpctype.CheckArgs + + // TODO: we don't really need these anymore, but there's not much sense + // in rewriting the code that uses them -- most of that code will be dropped + // once we move pkg/fuzzer to the host. maxSignal signal.Signal corpusSignal signal.Signal corpusCover cover.Cover @@ -61,7 +66,7 @@ type RPCManagerView interface { fuzzerConnect([]host.KernelModule) ( []rpctype.Input, BugFrames, map[uint32]uint32, map[uint32]uint32, error) machineChecked(result *rpctype.CheckArgs, enabledSyscalls map[*prog.Syscall]bool) - newInput(inp rpctype.Input, sign signal.Signal, hasAny bool) bool + newInput(inp corpus.NewInput) bool candidateBatch(size int) []rpctype.Candidate rotateCorpus() bool } @@ -256,7 +261,7 @@ func (serv *RPCServer) Check(a *rpctype.CheckArgs, r *int) error { } func (serv *RPCServer) NewInput(a *rpctype.NewInputArgs, r *int) error { - bad, disabled, hasAny := checkProgram(serv.cfg.Target, serv.targetEnabledSyscalls, a.Input.Prog) + p, disabled, bad := parseProgram(serv.cfg.Target, serv.targetEnabledSyscalls, a.Input.Prog) if bad != nil || disabled { log.Errorf("rejecting program from fuzzer (bad=%v, disabled=%v):\n%s", bad, disabled, a.Input.Prog) return nil @@ -269,8 +274,16 @@ func (serv *RPCServer) NewInput(a *rpctype.NewInputArgs, r *int) error { a.Cover, a.Signal = f.instModules.Canonicalize(a.Cover, a.Signal) } inputSignal := a.Signal.Deserialize() + + inp := corpus.NewInput{ + Prog: p, + Call: a.Call, + Signal: inputSignal, + Cover: a.Cover, + } + log.Logf(4, "new input from %v for syscall %v (signal=%v, cover=%v)", - a.Name, a.Call, inputSignal.Len(), len(a.Cover)) + a.Name, inp.StringCall(), inputSignal.Len(), len(a.Cover)) // Note: f may be nil if we called shutdownInstance, // but this request is already in-flight. genuine := !serv.corpusSignal.Diff(inputSignal).Empty() @@ -281,7 +294,7 @@ func (serv *RPCServer) NewInput(a *rpctype.NewInputArgs, r *int) error { if !genuine && !rotated { return nil } - if !serv.mgr.newInput(a.Input, inputSignal, hasAny) { + if !serv.mgr.newInput(inp) { return nil } |
