aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2024-03-13 18:52:18 +0100
committerAleksandr Nogikh <nogikh@google.com>2024-03-18 10:58:52 +0000
commitfc090d205d8c3d58f190659a98795d89421b7e6b (patch)
tree2b33cca3e6366a1565d70fdce01a51d6e50f6448
parentd615901c739a765329b688494cee2f8e1b5037cb (diff)
pkg/corpus: a separate package for the corpus functionality
pkg/fuzzer and syz-manager have a common corpus functionality that can be well be unified. Create a separate pkg/corpus package that would be used by both of them. It will simplify further work of moving pkg/fuzzer to the host.
-rw-r--r--pkg/corpus/corpus.go231
-rw-r--r--pkg/corpus/corpus_test.go98
-rw-r--r--pkg/corpus/minimize.go41
-rw-r--r--pkg/corpus/prio.go51
-rw-r--r--pkg/corpus/prio_test.go49
-rw-r--r--pkg/fuzzer/corpus.go114
-rw-r--r--pkg/fuzzer/corpus_test.go98
-rw-r--r--pkg/fuzzer/cover.go56
-rw-r--r--pkg/fuzzer/fuzzer.go27
-rw-r--r--pkg/fuzzer/fuzzer_test.go12
-rw-r--r--pkg/fuzzer/job.go34
-rw-r--r--pkg/rpctype/rpctype.go3
-rw-r--r--syz-fuzzer/fuzzer.go28
-rw-r--r--syz-manager/http.go67
-rw-r--r--syz-manager/hub.go4
-rw-r--r--syz-manager/manager.go212
-rw-r--r--syz-manager/rpc.go27
17 files changed, 709 insertions, 443 deletions
diff --git a/pkg/corpus/corpus.go b/pkg/corpus/corpus.go
new file mode 100644
index 000000000..1d14ba026
--- /dev/null
+++ b/pkg/corpus/corpus.go
@@ -0,0 +1,231 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package corpus
+
+import (
+ "context"
+ "sync"
+
+ "github.com/google/syzkaller/pkg/cover"
+ "github.com/google/syzkaller/pkg/hash"
+ "github.com/google/syzkaller/pkg/rpctype"
+ "github.com/google/syzkaller/pkg/signal"
+ "github.com/google/syzkaller/prog"
+)
+
+// Corpus object represents a set of syzkaller-found programs that
+// cover the kernel up to the currently reached frontiers.
+type Corpus struct {
+ ctx context.Context
+ mu sync.RWMutex
+ progs map[string]*Item
+ signal signal.Signal // signal of inputs in corpus
+ updates chan<- NewItemEvent
+ ProgramsList
+}
+
+func NewCorpus(ctx context.Context) *Corpus {
+ return NewMonitoredCorpus(ctx, nil)
+}
+
+func NewMonitoredCorpus(ctx context.Context, updates chan<- NewItemEvent) *Corpus {
+ return &Corpus{
+ ctx: ctx,
+ progs: make(map[string]*Item),
+ updates: updates,
+ }
+}
+
+// It may happen that a single program is relevant because of several
+// sysalls. In that case, there will be several ItemUpdate entities.
+type ItemUpdate struct {
+ Call int
+ RawCover []uint32
+}
+
+// Item objects are to be treated as immutable, otherwise it's just
+// too hard to synchonize accesses to them across the whole project.
+// When Corpus updates one of its items, it saves a copy of it.
+type Item struct {
+ Sig string
+ Call int
+ Prog *prog.Prog
+ ProgData []byte // to save some Serialize() calls
+ HasAny bool // whether the prog contains squashed arguments
+ Signal signal.Signal
+ Cover []uint32
+ Updates []ItemUpdate
+}
+
+func (item Item) StringCall() string {
+ return stringCall(item.Prog, item.Call)
+}
+
+// RPCInputShort() does not include coverage.
+func (item Item) RPCInputShort() rpctype.Input {
+ return rpctype.Input{
+ Call: item.Call,
+ Prog: item.ProgData,
+ Signal: item.Signal.Serialize(),
+ }
+}
+
+func stringCall(p *prog.Prog, call int) string {
+ if call != -1 {
+ return p.Calls[call].Meta.Name
+ }
+ return ".extra"
+}
+
+type NewInput struct {
+ Prog *prog.Prog
+ Call int
+ Signal signal.Signal
+ Cover []uint32
+ RawCover []uint32
+}
+
+func (item NewInput) StringCall() string {
+ return stringCall(item.Prog, item.Call)
+}
+
+func (item NewInput) RPCInput() rpctype.Input {
+ return rpctype.Input{
+ Call: item.Call,
+ Prog: item.Prog.Serialize(),
+ Signal: item.Signal.Serialize(),
+ Cover: item.Cover,
+ RawCover: item.RawCover,
+ }
+}
+
+type NewItemEvent struct {
+ Sig string
+ Exists bool
+ ProgData []byte
+}
+
+func (corpus *Corpus) Save(inp NewInput) {
+ progData := inp.Prog.Serialize()
+ sig := hash.String(progData)
+
+ corpus.mu.Lock()
+ defer corpus.mu.Unlock()
+
+ update := ItemUpdate{
+ Call: inp.Call,
+ RawCover: inp.RawCover,
+ }
+ exists := false
+ if old, ok := corpus.progs[sig]; ok {
+ exists = true
+ newSignal := old.Signal.Copy()
+ newSignal.Merge(inp.Signal)
+ var newCover cover.Cover
+ newCover.Merge(old.Cover)
+ newCover.Merge(inp.Cover)
+ newItem := &Item{
+ Sig: sig,
+ Prog: old.Prog,
+ ProgData: progData,
+ Call: old.Call,
+ HasAny: old.HasAny,
+ Signal: newSignal,
+ Cover: newCover.Serialize(),
+ Updates: append([]ItemUpdate{}, old.Updates...),
+ }
+ const maxUpdates = 32
+ if len(newItem.Updates) < maxUpdates {
+ newItem.Updates = append(newItem.Updates, update)
+ }
+ corpus.progs[sig] = newItem
+ } else {
+ corpus.progs[sig] = &Item{
+ Sig: sig,
+ Call: inp.Call,
+ Prog: inp.Prog,
+ ProgData: progData,
+ HasAny: inp.Prog.ContainsAny(),
+ Signal: inp.Signal,
+ Cover: inp.Cover,
+ Updates: []ItemUpdate{update},
+ }
+ corpus.saveProgram(inp.Prog, inp.Signal)
+ }
+ corpus.signal.Merge(inp.Signal)
+ if corpus.updates != nil {
+ select {
+ case <-corpus.ctx.Done():
+ case corpus.updates <- NewItemEvent{
+ Sig: sig,
+ Exists: exists,
+ ProgData: progData,
+ }:
+ }
+ }
+}
+
+func (corpus *Corpus) DiffSignal(s signal.Signal) signal.Signal {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ return corpus.signal.Diff(s)
+}
+
+func (corpus *Corpus) Signal() signal.Signal {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ return corpus.signal.Copy()
+}
+
+func (corpus *Corpus) Items() []*Item {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ ret := make([]*Item, 0, len(corpus.progs))
+ for _, item := range corpus.progs {
+ ret = append(ret, item)
+ }
+ return ret
+}
+
+func (corpus *Corpus) Item(sig string) *Item {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ return corpus.progs[sig]
+}
+
+// Stat is a snapshot of the relevant current state figures.
+type Stat struct {
+ Progs int
+ Signal int
+}
+
+func (corpus *Corpus) Stat() Stat {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ return Stat{
+ Progs: len(corpus.progs),
+ Signal: len(corpus.signal),
+ }
+}
+
+type CallCov struct {
+ Count int
+ Cover cover.Cover
+}
+
+func (corpus *Corpus) CallCover() map[string]*CallCov {
+ corpus.mu.RLock()
+ defer corpus.mu.RUnlock()
+ calls := make(map[string]*CallCov)
+ for _, inp := range corpus.progs {
+ call := inp.StringCall()
+ if calls[call] == nil {
+ calls[call] = new(CallCov)
+ }
+ cc := calls[call]
+ cc.Count++
+ cc.Cover.Merge(inp.Cover)
+ }
+ return calls
+}
diff --git a/pkg/corpus/corpus_test.go b/pkg/corpus/corpus_test.go
new file mode 100644
index 000000000..cce537087
--- /dev/null
+++ b/pkg/corpus/corpus_test.go
@@ -0,0 +1,98 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package corpus
+
+import (
+ "context"
+ "math/rand"
+ "testing"
+
+ "github.com/google/syzkaller/pkg/signal"
+ "github.com/google/syzkaller/prog"
+ "github.com/google/syzkaller/sys/targets"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestCorpusOperation(t *testing.T) {
+ // Basic corpus functionality.
+ target := getTarget(t, targets.TestOS, targets.TestArch64)
+ ch := make(chan NewItemEvent)
+ corpus := NewMonitoredCorpus(context.Background(), ch)
+
+ // First program is saved.
+ rs := rand.NewSource(0)
+ inp1 := generateInput(target, rs, 5, 5)
+ go corpus.Save(inp1)
+ event := <-ch
+ progData := inp1.Prog.Serialize()
+ assert.Equal(t, progData, event.ProgData)
+ assert.Equal(t, false, event.Exists)
+
+ // Second program is saved for every its call.
+ inp2 := generateInput(target, rs, 5, 5)
+ progData = inp2.Prog.Serialize()
+ for i := 0; i < 5; i++ {
+ inp2.Call = i
+ go corpus.Save(inp2)
+ event := <-ch
+ assert.Equal(t, progData, event.ProgData)
+ assert.Equal(t, i != 0, event.Exists)
+ }
+
+ // Verify that we can query corpus items.
+ items := corpus.Items()
+ assert.Len(t, items, 2)
+ for _, item := range items {
+ assert.Equal(t, item, corpus.Item(item.Sig))
+ }
+
+ // Verify the total signal.
+ assert.Len(t, corpus.Signal(), 5)
+
+ corpus.Minimize(true)
+}
+
+func TestCorpusSaveConcurrency(t *testing.T) {
+ target := getTarget(t, targets.TestOS, targets.TestArch64)
+ corpus := NewCorpus(context.Background())
+
+ const (
+ routines = 10
+ iters = 100
+ )
+
+ for i := 0; i < routines; i++ {
+ go func() {
+ rs := rand.NewSource(0)
+ r := rand.New(rs)
+ for it := 0; it < iters; it++ {
+ inp := generateInput(target, rs, 10, it)
+ corpus.Save(inp)
+ corpus.ChooseProgram(r).Clone()
+ }
+ }()
+ }
+}
+
+func generateInput(target *prog.Target, rs rand.Source, ncalls, sizeSig int) NewInput {
+ p := target.Generate(rs, ncalls, target.DefaultChoiceTable())
+ var raw []uint32
+ for i := 1; i <= sizeSig; i++ {
+ raw = append(raw, uint32(i))
+ }
+ return NewInput{
+ Prog: p,
+ Call: int(rs.Int63() % int64(len(p.Calls))),
+ Signal: signal.FromRaw(raw, 0),
+ }
+}
+
+func getTarget(t *testing.T, os, arch string) *prog.Target {
+ t.Parallel()
+ target, err := prog.GetTarget(os, arch)
+ if err != nil {
+ t.Fatal(err)
+ }
+ return target
+}
diff --git a/pkg/corpus/minimize.go b/pkg/corpus/minimize.go
new file mode 100644
index 000000000..b47816ccf
--- /dev/null
+++ b/pkg/corpus/minimize.go
@@ -0,0 +1,41 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package corpus
+
+import (
+ "sort"
+
+ "github.com/google/syzkaller/pkg/signal"
+)
+
+func (corpus *Corpus) Minimize(cover bool) {
+ corpus.mu.Lock()
+ defer corpus.mu.Unlock()
+
+ inputs := make([]signal.Context, 0, len(corpus.progs))
+ for _, inp := range corpus.progs {
+ inputs = append(inputs, signal.Context{
+ Signal: inp.Signal,
+ Context: inp,
+ })
+ }
+
+ // Note: inputs are unsorted (based on map iteration).
+ // This gives some intentional non-determinism during minimization.
+ // However, we want to give preference to non-squashed inputs,
+ // so let's sort by this criteria.
+ sort.SliceStable(inputs, func(i, j int) bool {
+ firstAny := inputs[i].Context.(*Item).HasAny
+ secondAny := inputs[j].Context.(*Item).HasAny
+ return !firstAny && secondAny
+ })
+
+ corpus.progs = make(map[string]*Item)
+ corpus.ProgramsList = ProgramsList{}
+ for _, ctx := range signal.Minimize(inputs) {
+ inp := ctx.(*Item)
+ corpus.progs[inp.Sig] = inp
+ corpus.saveProgram(inp.Prog, inp.Signal)
+ }
+}
diff --git a/pkg/corpus/prio.go b/pkg/corpus/prio.go
new file mode 100644
index 000000000..a4d85a518
--- /dev/null
+++ b/pkg/corpus/prio.go
@@ -0,0 +1,51 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package corpus
+
+import (
+ "math/rand"
+ "sort"
+ "sync"
+
+ "github.com/google/syzkaller/pkg/signal"
+ "github.com/google/syzkaller/prog"
+)
+
+type ProgramsList struct {
+ mu sync.RWMutex
+ progs []*prog.Prog
+ sumPrios int64
+ accPrios []int64
+}
+
+func (pl *ProgramsList) saveProgram(p *prog.Prog, signal signal.Signal) {
+ pl.mu.Lock()
+ defer pl.mu.Unlock()
+ prio := int64(len(signal))
+ if prio == 0 {
+ prio = 1
+ }
+ pl.sumPrios += prio
+ pl.accPrios = append(pl.accPrios, pl.sumPrios)
+ pl.progs = append(pl.progs, p)
+}
+
+func (pl *ProgramsList) ChooseProgram(r *rand.Rand) *prog.Prog {
+ pl.mu.RLock()
+ defer pl.mu.RUnlock()
+ if len(pl.progs) == 0 {
+ return nil
+ }
+ randVal := r.Int63n(pl.sumPrios + 1)
+ idx := sort.Search(len(pl.accPrios), func(i int) bool {
+ return pl.accPrios[i] >= randVal
+ })
+ return pl.progs[idx]
+}
+
+func (pl *ProgramsList) Programs() []*prog.Prog {
+ pl.mu.RLock()
+ defer pl.mu.RUnlock()
+ return pl.progs
+}
diff --git a/pkg/corpus/prio_test.go b/pkg/corpus/prio_test.go
new file mode 100644
index 000000000..3eec54bed
--- /dev/null
+++ b/pkg/corpus/prio_test.go
@@ -0,0 +1,49 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package corpus
+
+import (
+ "context"
+ "math"
+ "math/rand"
+ "testing"
+
+ "github.com/google/syzkaller/prog"
+ "github.com/google/syzkaller/sys/targets"
+)
+
+func TestChooseProgram(t *testing.T) {
+ rs := rand.NewSource(0)
+ r := rand.New(rs)
+ target := getTarget(t, targets.TestOS, targets.TestArch64)
+ corpus := NewCorpus(context.Background())
+
+ const (
+ maxIters = 1000
+ sizeCorpus = 1000
+ eps = 0.01
+ )
+
+ priorities := make(map[*prog.Prog]int64)
+ for i := 0; i < sizeCorpus; i++ {
+ sizeSig := i + 1
+ if sizeSig%250 == 0 {
+ sizeSig = 0
+ }
+ inp := generateInput(target, rs, 10, sizeSig)
+ corpus.Save(inp)
+ priorities[inp.Prog] = int64(len(inp.Signal))
+ }
+ counters := make(map[*prog.Prog]int)
+ for it := 0; it < maxIters; it++ {
+ counters[corpus.ChooseProgram(r)]++
+ }
+ for p, prio := range priorities {
+ prob := float64(prio) / float64(corpus.sumPrios)
+ diff := math.Abs(prob*maxIters - float64(counters[p]))
+ if diff > eps*maxIters {
+ t.Fatalf("the difference (%f) is higher than %f%%", diff, eps*100)
+ }
+ }
+}
diff --git a/pkg/fuzzer/corpus.go b/pkg/fuzzer/corpus.go
deleted file mode 100644
index b92ab1c64..000000000
--- a/pkg/fuzzer/corpus.go
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright 2024 syzkaller project authors. All rights reserved.
-// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
-
-package fuzzer
-
-import (
- "math/rand"
- "sort"
- "sync"
-
- "github.com/google/syzkaller/pkg/hash"
- "github.com/google/syzkaller/pkg/signal"
- "github.com/google/syzkaller/prog"
-)
-
-type Corpus struct {
- mu sync.RWMutex
- progs []*prog.Prog
- hashes map[hash.Sig]struct{}
- sumPrios int64
- accPrios []int64
- signal signal.Signal // signal of inputs in corpus
- maxSignal signal.Signal // max signal ever observed (including flakes)
- newSignal signal.Signal
-}
-
-// CorpusStat is a snapshot of the relevant current state figures.
-type CorpusStat struct {
- Progs int
- Signal int
- MaxSignal int
-}
-
-func newCorpus() *Corpus {
- return &Corpus{
- hashes: make(map[hash.Sig]struct{}),
- }
-}
-
-// TODO: maybe we want to treat progs from other fuzzers exactly like
-// candidates? And even triage them?
-func (corpus *Corpus) Save(p *prog.Prog, signal signal.Signal, sig hash.Sig) {
- corpus.mu.Lock()
- defer corpus.mu.Unlock()
- if _, ok := corpus.hashes[sig]; !ok {
- corpus.progs = append(corpus.progs, p)
- corpus.hashes[sig] = struct{}{}
- prio := int64(len(signal))
- if prio == 0 {
- prio = 1
- }
- corpus.sumPrios += prio
- corpus.accPrios = append(corpus.accPrios, corpus.sumPrios)
- }
- corpus.signal.Merge(signal)
- corpus.maxSignal.Merge(signal)
-}
-
-// Signal that should no longer be chased after.
-func (corpus *Corpus) AddMaxSignal(sign signal.Signal) {
- // TODO: how do we ensure occasional drop of this max cover?
- corpus.mu.Lock()
- defer corpus.mu.Unlock()
- corpus.maxSignal.Merge(sign)
-}
-
-func (corpus *Corpus) AddRawMaxSignal(signal []uint32, prio uint8) signal.Signal {
- corpus.mu.Lock()
- defer corpus.mu.Unlock()
- diff := corpus.maxSignal.DiffRaw(signal, prio)
- if diff.Empty() {
- return diff
- }
- corpus.maxSignal.Merge(diff)
- corpus.newSignal.Merge(diff)
- return diff
-}
-
-func (corpus *Corpus) chooseProgram(r *rand.Rand) *prog.Prog {
- corpus.mu.RLock()
- defer corpus.mu.RUnlock()
- if len(corpus.progs) == 0 {
- return nil
- }
- randVal := r.Int63n(corpus.sumPrios + 1)
- idx := sort.Search(len(corpus.accPrios), func(i int) bool {
- return corpus.accPrios[i] >= randVal
- })
- return corpus.progs[idx]
-}
-
-func (corpus *Corpus) Programs() []*prog.Prog {
- corpus.mu.RLock()
- defer corpus.mu.RUnlock()
- return corpus.progs
-}
-
-func (corpus *Corpus) GrabNewSignal() signal.Signal {
- corpus.mu.Lock()
- defer corpus.mu.Unlock()
- sign := corpus.newSignal
- corpus.newSignal = nil
- return sign
-}
-
-func (corpus *Corpus) Stat() CorpusStat {
- corpus.mu.RLock()
- defer corpus.mu.RUnlock()
- return CorpusStat{
- Progs: len(corpus.progs),
- Signal: len(corpus.signal),
- MaxSignal: len(corpus.maxSignal),
- }
-}
diff --git a/pkg/fuzzer/corpus_test.go b/pkg/fuzzer/corpus_test.go
deleted file mode 100644
index 0b62d8c5a..000000000
--- a/pkg/fuzzer/corpus_test.go
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright 2024 syzkaller project authors. All rights reserved.
-// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
-
-package fuzzer
-
-import (
- "math"
- "math/rand"
- "testing"
-
- "github.com/google/syzkaller/pkg/hash"
- "github.com/google/syzkaller/pkg/signal"
- "github.com/google/syzkaller/prog"
- "github.com/google/syzkaller/sys/targets"
-)
-
-type InputTest struct {
- p *prog.Prog
- sign signal.Signal
- sig hash.Sig
-}
-
-func TestChooseProgram(t *testing.T) {
- rs := rand.NewSource(0)
- r := rand.New(rs)
- target := getTarget(t, targets.TestOS, targets.TestArch64)
- corpus := newCorpus()
-
- const (
- maxIters = 1000
- sizeCorpus = 1000
- eps = 0.01
- )
-
- priorities := make(map[*prog.Prog]int64)
- for i := 0; i < sizeCorpus; i++ {
- sizeSig := i + 1
- if sizeSig%250 == 0 {
- sizeSig = 0
- }
- inp := generateInput(target, rs, 10, sizeSig)
- corpus.Save(inp.p, inp.sign, inp.sig)
- priorities[inp.p] = int64(len(inp.sign))
- }
- counters := make(map[*prog.Prog]int)
- for it := 0; it < maxIters; it++ {
- counters[corpus.chooseProgram(r)]++
- }
- for p, prio := range priorities {
- prob := float64(prio) / float64(corpus.sumPrios)
- diff := math.Abs(prob*maxIters - float64(counters[p]))
- if diff > eps*maxIters {
- t.Fatalf("the difference (%f) is higher than %f%%", diff, eps*100)
- }
- }
-}
-
-func TestCorpusSaveConcurrency(t *testing.T) {
- target := getTarget(t, targets.TestOS, targets.TestArch64)
- corpus := newCorpus()
-
- const (
- routines = 10
- iters = 100
- )
-
- for i := 0; i < routines; i++ {
- go func() {
- rs := rand.NewSource(0)
- r := rand.New(rs)
- for it := 0; it < iters; it++ {
- inp := generateInput(target, rs, 10, it)
- corpus.Save(inp.p, inp.sign, inp.sig)
- corpus.chooseProgram(r).Clone()
- }
- }()
- }
-}
-
-func generateInput(target *prog.Target, rs rand.Source, ncalls, sizeSig int) (inp InputTest) {
- inp.p = target.Generate(rs, ncalls, target.DefaultChoiceTable())
- var raw []uint32
- for i := 1; i <= sizeSig; i++ {
- raw = append(raw, uint32(i))
- }
- inp.sign = signal.FromRaw(raw, 0)
- inp.sig = hash.Hash(inp.p.Serialize())
- return
-}
-
-func getTarget(t *testing.T, os, arch string) *prog.Target {
- t.Parallel()
- target, err := prog.GetTarget(os, arch)
- if err != nil {
- t.Fatal(err)
- }
- return target
-}
diff --git a/pkg/fuzzer/cover.go b/pkg/fuzzer/cover.go
new file mode 100644
index 000000000..cc18862bd
--- /dev/null
+++ b/pkg/fuzzer/cover.go
@@ -0,0 +1,56 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package fuzzer
+
+import (
+ "sync"
+
+ "github.com/google/syzkaller/pkg/signal"
+)
+
+// Cover keeps track of the signal known to the fuzzer.
+type Cover struct {
+ mu sync.RWMutex
+ maxSignal signal.Signal // max signal ever observed (including flakes)
+ newSignal signal.Signal // newly identified max signal
+}
+
+// Signal that should no longer be chased after.
+func (cover *Cover) AddMaxSignal(sign signal.Signal) {
+ cover.mu.Lock()
+ defer cover.mu.Unlock()
+ cover.maxSignal.Merge(sign)
+}
+
+func (cover *Cover) addRawMaxSignal(signal []uint32, prio uint8) signal.Signal {
+ cover.mu.Lock()
+ defer cover.mu.Unlock()
+ diff := cover.maxSignal.DiffRaw(signal, prio)
+ if diff.Empty() {
+ return diff
+ }
+ cover.maxSignal.Merge(diff)
+ cover.newSignal.Merge(diff)
+ return diff
+}
+
+func (cover *Cover) GrabNewSignal() signal.Signal {
+ cover.mu.Lock()
+ defer cover.mu.Unlock()
+ sign := cover.newSignal
+ cover.newSignal = nil
+ return sign
+}
+
+type CoverStat struct {
+ MaxSignal int
+}
+
+func (cover *Cover) Stat() CoverStat {
+ cover.mu.RLock()
+ defer cover.mu.RUnlock()
+ return CoverStat{
+ MaxSignal: len(cover.maxSignal),
+ }
+}
diff --git a/pkg/fuzzer/fuzzer.go b/pkg/fuzzer/fuzzer.go
index 64be8325c..17513716a 100644
--- a/pkg/fuzzer/fuzzer.go
+++ b/pkg/fuzzer/fuzzer.go
@@ -12,15 +12,15 @@ import (
"sync/atomic"
"time"
+ "github.com/google/syzkaller/pkg/corpus"
"github.com/google/syzkaller/pkg/hash"
"github.com/google/syzkaller/pkg/ipc"
- "github.com/google/syzkaller/pkg/rpctype"
"github.com/google/syzkaller/prog"
)
type Fuzzer struct {
Config *Config
- Corpus *Corpus
+ Cover *Cover
NeedCandidates chan struct{}
ctx context.Context
@@ -49,7 +49,7 @@ func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand,
target *prog.Target) *Fuzzer {
f := &Fuzzer{
Config: cfg,
- Corpus: newCorpus(),
+ Cover: &Cover{},
NeedCandidates: make(chan struct{}, 1),
ctx: ctx,
@@ -75,6 +75,7 @@ func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand,
type Config struct {
Debug bool
+ Corpus *corpus.Corpus
Logf func(level int, msg string, args ...interface{})
Coverage bool
FaultInjection bool
@@ -87,7 +88,7 @@ type Config struct {
// If the number of queued candidates is less than MinCandidates,
// NeedCandidates is triggered.
MinCandidates uint
- NewInputs chan rpctype.Input
+ NewInputs chan corpus.NewInput
}
type Request struct {
@@ -133,7 +134,7 @@ func (fuzzer *Fuzzer) Done(req *Request, res *Result) {
func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int,
flags ProgTypes) {
prio := signalPrio(p, info, call)
- newMaxSignal := fuzzer.Corpus.AddRawMaxSignal(info.Signal, prio)
+ newMaxSignal := fuzzer.Cover.addRawMaxSignal(info.Signal, prio)
if newMaxSignal.Empty() {
return
}
@@ -304,12 +305,12 @@ func (fuzzer *Fuzzer) choiceTableUpdater() {
return
case <-fuzzer.ctRegenerate:
}
- fuzzer.updateChoiceTable(fuzzer.Corpus.Programs())
+ fuzzer.updateChoiceTable(fuzzer.Config.Corpus.Programs())
}
}
func (fuzzer *Fuzzer) ChoiceTable() *prog.ChoiceTable {
- progs := fuzzer.Corpus.Programs()
+ progs := fuzzer.Config.Corpus.Programs()
fuzzer.ctMu.Lock()
defer fuzzer.ctMu.Unlock()
@@ -348,3 +349,15 @@ func (fuzzer *Fuzzer) logCurrentStats() {
fuzzer.Logf(0, "%s", str)
}
}
+
+type Stat struct {
+ CoverStat
+ corpus.Stat
+}
+
+func (fuzzer *Fuzzer) Stat() Stat {
+ return Stat{
+ CoverStat: fuzzer.Cover.Stat(),
+ Stat: fuzzer.Config.Corpus.Stat(),
+ }
+}
diff --git a/pkg/fuzzer/fuzzer_test.go b/pkg/fuzzer/fuzzer_test.go
index 1896b2b84..7000bb062 100644
--- a/pkg/fuzzer/fuzzer_test.go
+++ b/pkg/fuzzer/fuzzer_test.go
@@ -18,10 +18,10 @@ import (
"testing"
"time"
+ "github.com/google/syzkaller/pkg/corpus"
"github.com/google/syzkaller/pkg/csource"
"github.com/google/syzkaller/pkg/ipc"
"github.com/google/syzkaller/pkg/ipc/ipcconfig"
- "github.com/google/syzkaller/pkg/rpctype"
"github.com/google/syzkaller/pkg/testutil"
"github.com/google/syzkaller/prog"
"github.com/google/syzkaller/sys/targets"
@@ -41,7 +41,8 @@ func TestFuzz(t *testing.T) {
defer cancel()
fuzzer := NewFuzzer(ctx, &Config{
- Debug: true,
+ Debug: true,
+ Corpus: corpus.NewCorpus(ctx),
Logf: func(level int, msg string, args ...interface{}) {
if level > 1 {
return
@@ -52,7 +53,7 @@ func TestFuzz(t *testing.T) {
EnabledCalls: map[*prog.Syscall]bool{
target.SyscallMap["syz_test_fuzzer1"]: true,
},
- NewInputs: make(chan rpctype.Input),
+ NewInputs: make(chan corpus.NewInput),
}, rand.New(testutil.RandSource(t)), target)
go func() {
@@ -77,7 +78,7 @@ func TestFuzz(t *testing.T) {
tf.wait()
t.Logf("resulting corpus:")
- for _, p := range fuzzer.Corpus.Programs() {
+ for _, p := range fuzzer.Config.Corpus.Programs() {
t.Logf("-----")
t.Logf("%s", p.Serialize())
}
@@ -99,6 +100,7 @@ func BenchmarkFuzzer(b *testing.B) {
calls[c] = true
}
fuzzer := NewFuzzer(ctx, &Config{
+ Corpus: corpus.NewCorpus(ctx),
Coverage: true,
EnabledCalls: calls,
}, rand.New(rand.NewSource(time.Now().UnixNano())), target)
@@ -160,7 +162,7 @@ func (f *testFuzzer) oneMore() bool {
defer f.mu.Unlock()
f.iter++
if f.iter%100 == 0 {
- stat := f.fuzzer.Corpus.Stat()
+ stat := f.fuzzer.Stat()
f.t.Logf("<iter %d>: corpus %d, signal %d, max signal %d, crash types %d",
f.iter, stat.Progs, stat.Signal, stat.MaxSignal, len(f.crashes))
}
diff --git a/pkg/fuzzer/job.go b/pkg/fuzzer/job.go
index a0295affb..63493c914 100644
--- a/pkg/fuzzer/job.go
+++ b/pkg/fuzzer/job.go
@@ -7,10 +7,9 @@ import (
"fmt"
"math/rand"
+ "github.com/google/syzkaller/pkg/corpus"
"github.com/google/syzkaller/pkg/cover"
- "github.com/google/syzkaller/pkg/hash"
"github.com/google/syzkaller/pkg/ipc"
- "github.com/google/syzkaller/pkg/rpctype"
"github.com/google/syzkaller/pkg/signal"
"github.com/google/syzkaller/prog"
)
@@ -71,7 +70,7 @@ func genProgRequest(fuzzer *Fuzzer, rnd *rand.Rand) *Request {
}
func mutateProgRequest(fuzzer *Fuzzer, rnd *rand.Rand) *Request {
- p := fuzzer.Corpus.chooseProgram(rnd)
+ p := fuzzer.Config.Corpus.ChooseProgram(rnd)
if p == nil {
return nil
}
@@ -80,7 +79,7 @@ func mutateProgRequest(fuzzer *Fuzzer, rnd *rand.Rand) *Request {
prog.RecommendedCalls,
fuzzer.ChoiceTable(),
fuzzer.Config.NoMutateCalls,
- fuzzer.Corpus.Programs(),
+ fuzzer.Config.Corpus.Programs(),
)
return &Request{
Prog: newP,
@@ -126,10 +125,9 @@ func triageJobPrio(flags ProgTypes) jobPriority {
}
func (job *triageJob) run(fuzzer *Fuzzer) {
- callName := ".extra"
logCallName := "extra"
if job.call != -1 {
- callName = job.p.Calls[job.call].Meta.Name
+ callName := job.p.Calls[job.call].Meta.Name
logCallName = fmt.Sprintf("call #%v %v", job.call, callName)
}
fuzzer.Logf(3, "triaging input for %v (new signal=%v)", logCallName, job.newSignal.Len())
@@ -144,9 +142,7 @@ func (job *triageJob) run(fuzzer *Fuzzer) {
return
}
}
- data := job.p.Serialize()
- fuzzer.Logf(2, "added new input for %q to the corpus:\n%s",
- logCallName, string(data))
+ fuzzer.Logf(2, "added new input for %q to the corpus:\n%s", logCallName, job.p.String())
if job.flags&ProgSmashed == 0 {
fuzzer.startJob(&smashJob{
p: job.p.Clone(),
@@ -154,18 +150,18 @@ func (job *triageJob) run(fuzzer *Fuzzer) {
jobPriority: newJobPriority(smashPrio),
})
}
- fuzzer.Corpus.Save(job.p, info.stableSignal, hash.Hash(data))
+ input := corpus.NewInput{
+ Prog: job.p,
+ Call: job.call,
+ Signal: info.stableSignal,
+ Cover: info.cover.Serialize(),
+ RawCover: info.rawCover,
+ }
+ fuzzer.Config.Corpus.Save(input)
if fuzzer.Config.NewInputs != nil {
select {
case <-fuzzer.ctx.Done():
- case fuzzer.Config.NewInputs <- rpctype.Input{
- Call: callName,
- CallID: job.call,
- Prog: data,
- Signal: info.stableSignal.Serialize(),
- Cover: info.cover.Serialize(),
- RawCover: info.rawCover,
- }:
+ case fuzzer.Config.NewInputs <- input:
}
}
}
@@ -298,7 +294,7 @@ func (job *smashJob) run(fuzzer *Fuzzer) {
p.Mutate(rnd, prog.RecommendedCalls,
fuzzer.ChoiceTable(),
fuzzer.Config.NoMutateCalls,
- fuzzer.Corpus.Programs())
+ fuzzer.Config.Corpus.Programs())
result := fuzzer.exec(job, &Request{
Prog: p,
NeedSignal: true,
diff --git a/pkg/rpctype/rpctype.go b/pkg/rpctype/rpctype.go
index 3b1244662..efd9d6589 100644
--- a/pkg/rpctype/rpctype.go
+++ b/pkg/rpctype/rpctype.go
@@ -14,11 +14,10 @@ import (
)
type Input struct {
- Call string
+ Call int // seq number of call in the prog to which the item is related (-1 for extra)
Prog []byte
Signal signal.Serial
Cover []uint32
- CallID int // seq number of call in the prog to which the item is related (-1 for extra)
RawCover []uint32
}
diff --git a/syz-fuzzer/fuzzer.go b/syz-fuzzer/fuzzer.go
index 1adec40d4..0b4970b23 100644
--- a/syz-fuzzer/fuzzer.go
+++ b/syz-fuzzer/fuzzer.go
@@ -17,9 +17,9 @@ import (
"sync/atomic"
"time"
+ "github.com/google/syzkaller/pkg/corpus"
"github.com/google/syzkaller/pkg/csource"
"github.com/google/syzkaller/pkg/fuzzer"
- "github.com/google/syzkaller/pkg/hash"
"github.com/google/syzkaller/pkg/host"
"github.com/google/syzkaller/pkg/ipc"
"github.com/google/syzkaller/pkg/ipc/ipcconfig"
@@ -230,6 +230,7 @@ func main() {
calls[target.Syscalls[id]] = true
}
fuzzerObj := fuzzer.NewFuzzer(context.Background(), &fuzzer.Config{
+ Corpus: corpus.NewCorpus(context.Background()),
Coverage: config.Flags&ipc.FlagSignal > 0,
FaultInjection: r.CheckResult.Features[host.FeatureFault].Enabled,
Comparisons: r.CheckResult.Features[host.FeatureComparisons].Enabled,
@@ -239,7 +240,7 @@ func main() {
LeakChecking: r.CheckResult.Features[host.FeatureLeak].Enabled,
FetchRawCover: *flagRawCover,
MinCandidates: uint(*flagProcs * 2),
- NewInputs: make(chan rpctype.Input),
+ NewInputs: make(chan corpus.NewInput),
}, rnd, target)
fuzzerTool := &FuzzerTool{
@@ -270,7 +271,7 @@ func main() {
for needCandidates, more := true, true; more; needCandidates = false {
more = fuzzerTool.poll(needCandidates, nil)
// This loop lead to "no output" in qemu emulation, tell manager we are not dead.
- stat := fuzzerObj.Corpus.Stat()
+ stat := fuzzerObj.Stat()
log.Logf(0, "fetching corpus: %v, signal %v/%v (executing program)",
stat.Progs, stat.Signal, stat.MaxSignal)
}
@@ -289,7 +290,7 @@ func main() {
}
// Start send input workers.
for i := 0; i < *flagProcs*2; i++ {
- go fuzzerTool.sendInputsWorker()
+ go fuzzerTool.sendInputsWorker(fuzzerObj.Config.NewInputs)
}
fuzzerTool.pollLoop()
}
@@ -388,7 +389,7 @@ func (tool *FuzzerTool) poll(needCandidates bool, stats map[string]uint64) bool
a := &rpctype.PollArgs{
Name: tool.name,
NeedCandidates: needCandidates,
- MaxSignal: fuzzer.Corpus.GrabNewSignal().Serialize(),
+ MaxSignal: fuzzer.Cover.GrabNewSignal().Serialize(),
Stats: stats,
}
r := &rpctype.PollRes{}
@@ -398,7 +399,7 @@ func (tool *FuzzerTool) poll(needCandidates bool, stats map[string]uint64) bool
maxSignal := r.MaxSignal.Deserialize()
log.Logf(1, "poll: candidates=%v inputs=%v signal=%v",
len(r.Candidates), len(r.NewInputs), maxSignal.Len())
- fuzzer.Corpus.AddMaxSignal(maxSignal)
+ fuzzer.Cover.AddMaxSignal(maxSignal)
for _, inp := range r.NewInputs {
tool.inputFromOtherFuzzer(inp)
}
@@ -409,11 +410,11 @@ func (tool *FuzzerTool) poll(needCandidates bool, stats map[string]uint64) bool
return len(r.NewInputs) != 0 || len(r.Candidates) != 0 || maxSignal.Len() != 0
}
-func (tool *FuzzerTool) sendInputsWorker() {
- for inp := range tool.fuzzer.Config.NewInputs {
+func (tool *FuzzerTool) sendInputsWorker(ch <-chan corpus.NewInput) {
+ for update := range ch {
a := &rpctype.NewInputArgs{
Name: tool.name,
- Input: inp,
+ Input: update.RPCInput(),
}
if err := tool.manager.Call("Manager.NewInput", a, nil); err != nil {
log.SyzFatalf("Manager.NewInput call failed: %v", err)
@@ -454,9 +455,12 @@ func (tool *FuzzerTool) inputFromOtherFuzzer(inp rpctype.Input) {
if p == nil {
return
}
- tool.fuzzer.Corpus.Save(p,
- inp.Signal.Deserialize(),
- hash.Hash(inp.Prog))
+ tool.fuzzer.Config.Corpus.Save(corpus.NewInput{
+ Prog: p,
+ Call: inp.Call,
+ Signal: inp.Signal.Deserialize(),
+ Cover: inp.Cover,
+ })
}
func (tool *FuzzerTool) deserializeInput(inp []byte) *prog.Prog {
diff --git a/syz-manager/http.go b/syz-manager/http.go
index 5578705bf..7f2c037ff 100644
--- a/syz-manager/http.go
+++ b/syz-manager/http.go
@@ -24,7 +24,6 @@ import (
"github.com/google/syzkaller/pkg/html/pages"
"github.com/google/syzkaller/pkg/log"
"github.com/google/syzkaller/pkg/osutil"
- "github.com/google/syzkaller/pkg/signal"
"github.com/google/syzkaller/pkg/vcs"
"github.com/google/syzkaller/prog"
"github.com/gorilla/handlers"
@@ -106,8 +105,8 @@ func (mgr *Manager) httpSyscalls(w http.ResponseWriter, r *http.Request) {
data.Calls = append(data.Calls, UICallType{
Name: c,
ID: syscallID,
- Inputs: cc.count,
- Cover: len(cc.cov),
+ Inputs: cc.Count,
+ Cover: len(cc.Cover),
})
}
sort.Slice(data.Calls, func(i, j int) bool {
@@ -131,7 +130,7 @@ func (mgr *Manager) collectStats() []UIStat {
{Name: "config", Value: configName, Link: "/config"},
{Name: "uptime", Value: fmt.Sprint(time.Since(mgr.startTime) / 1e9 * 1e9)},
{Name: "fuzzing", Value: fmt.Sprint(mgr.fuzzingTime / 60e9 * 60e9)},
- {Name: "corpus", Value: fmt.Sprint(len(mgr.corpus)), Link: "/corpus"},
+ {Name: "corpus", Value: fmt.Sprint(mgr.corpus.Stat().Progs), Link: "/corpus"},
{Name: "triage queue", Value: fmt.Sprint(len(mgr.candidates))},
{Name: "signal", Value: fmt.Sprint(rawStats["signal"])},
{Name: "coverage", Value: fmt.Sprint(rawStats["coverage"]), Link: "/cover"},
@@ -203,18 +202,13 @@ func (mgr *Manager) httpCorpus(w http.ResponseWriter, r *http.Request) {
Call: r.FormValue("call"),
RawCover: mgr.cfg.RawCover,
}
- for sig, inp := range mgr.corpus {
- if data.Call != "" && data.Call != inp.Call {
+ for _, inp := range mgr.corpus.Items() {
+ if data.Call != "" && data.Call != inp.StringCall() {
continue
}
- p, err := mgr.target.Deserialize(inp.Prog, prog.NonStrict)
- if err != nil {
- http.Error(w, fmt.Sprintf("failed to deserialize program: %v", err), http.StatusInternalServerError)
- return
- }
data.Inputs = append(data.Inputs, &UIInput{
- Sig: sig,
- Short: p.String(),
+ Sig: inp.Sig,
+ Short: inp.Prog.String(),
Cover: len(inp.Cover),
})
}
@@ -312,7 +306,11 @@ func (mgr *Manager) httpCoverCover(w http.ResponseWriter, r *http.Request, funcF
mgr.mu.Lock()
var progs []cover.Prog
if sig := r.FormValue("input"); sig != "" {
- inp := mgr.corpus[sig]
+ inp := mgr.corpus.Item(sig)
+ if inp == nil {
+ http.Error(w, "unknown input hash", http.StatusInternalServerError)
+ return
+ }
if r.FormValue("update_id") != "" {
updateID, err := strconv.Atoi(r.FormValue("update_id"))
if err != nil || updateID < 0 || updateID >= len(inp.Updates) {
@@ -321,25 +319,25 @@ func (mgr *Manager) httpCoverCover(w http.ResponseWriter, r *http.Request, funcF
}
progs = append(progs, cover.Prog{
Sig: sig,
- Data: string(inp.Prog),
+ Data: string(inp.ProgData),
PCs: coverToPCs(rg, inp.Updates[updateID].RawCover),
})
} else {
progs = append(progs, cover.Prog{
Sig: sig,
- Data: string(inp.Prog),
+ Data: string(inp.ProgData),
PCs: coverToPCs(rg, inp.Cover),
})
}
} else {
call := r.FormValue("call")
- for sig, inp := range mgr.corpus {
- if call != "" && call != inp.Call {
+ for _, inp := range mgr.corpus.Items() {
+ if call != "" && call != inp.StringCall() {
continue
}
progs = append(progs, cover.Prog{
- Sig: sig,
- Data: string(inp.Prog),
+ Sig: inp.Sig,
+ Data: string(inp.ProgData),
PCs: coverToPCs(rg, inp.Cover),
})
}
@@ -387,12 +385,8 @@ func (mgr *Manager) httpCoverCover(w http.ResponseWriter, r *http.Request, funcF
func (mgr *Manager) httpCoverFallback(w http.ResponseWriter, r *http.Request) {
mgr.mu.Lock()
defer mgr.mu.Unlock()
- var maxSignal signal.Signal
- for _, inp := range mgr.corpus {
- maxSignal.Merge(inp.Signal.Deserialize())
- }
calls := make(map[int][]int)
- for s := range maxSignal {
+ for s := range mgr.corpus.Signal() {
id, errno := prog.DecodeFallbackSignal(uint32(s))
calls[id] = append(calls[id], errno)
}
@@ -437,13 +431,8 @@ func (mgr *Manager) httpPrio(w http.ResponseWriter, r *http.Request) {
}
var corpus []*prog.Prog
- for _, inp := range mgr.corpus {
- p, err := mgr.target.Deserialize(inp.Prog, prog.NonStrict)
- if err != nil {
- http.Error(w, fmt.Sprintf("failed to deserialize program: %v", err), http.StatusInternalServerError)
- return
- }
- corpus = append(corpus, p)
+ for _, inp := range mgr.corpus.Items() {
+ corpus = append(corpus, inp.Prog)
}
prios := mgr.target.CalculatePriorities(corpus)
@@ -477,34 +466,34 @@ func (mgr *Manager) httpFile(w http.ResponseWriter, r *http.Request) {
func (mgr *Manager) httpInput(w http.ResponseWriter, r *http.Request) {
mgr.mu.Lock()
defer mgr.mu.Unlock()
- inp, ok := mgr.corpus[r.FormValue("sig")]
- if !ok {
+ inp := mgr.corpus.Item(r.FormValue("sig"))
+ if inp == nil {
http.Error(w, "can't find the input", http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
- w.Write(inp.Prog)
+ w.Write(inp.ProgData)
}
func (mgr *Manager) httpDebugInput(w http.ResponseWriter, r *http.Request) {
mgr.mu.Lock()
defer mgr.mu.Unlock()
- inp, ok := mgr.corpus[r.FormValue("sig")]
- if !ok {
+ inp := mgr.corpus.Item(r.FormValue("sig"))
+ if inp == nil {
http.Error(w, "can't find the input", http.StatusInternalServerError)
return
}
getIDs := func(callID int) []int {
ret := []int{}
for id, update := range inp.Updates {
- if update.CallID == callID {
+ if update.Call == callID {
ret = append(ret, id)
}
}
return ret
}
data := []UIRawCallCover{}
- for pos, line := range strings.Split(string(inp.Prog), "\n") {
+ for pos, line := range strings.Split(string(inp.ProgData), "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
diff --git a/syz-manager/hub.go b/syz-manager/hub.go
index c10011818..d06a0cd0e 100644
--- a/syz-manager/hub.go
+++ b/syz-manager/hub.go
@@ -215,7 +215,7 @@ func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus [][]byte) error {
func (hc *HubConnector) processProgs(inputs []rpctype.HubInput) (minimized, smashed, dropped int) {
candidates := make([]rpctype.Candidate, 0, len(inputs))
for _, inp := range inputs {
- bad, disabled, _ := checkProgram(hc.target, hc.enabledCalls, inp.Prog)
+ _, disabled, bad := parseProgram(hc.target, hc.enabledCalls, inp.Prog)
if bad != nil || disabled {
log.Logf(0, "rejecting program from hub (bad=%v, disabled=%v):\n%s",
bad, disabled, inp)
@@ -268,7 +268,7 @@ func splitDomains(domain string) (string, string) {
func (hc *HubConnector) processRepros(repros [][]byte) int {
dropped := 0
for _, repro := range repros {
- bad, disabled, _ := checkProgram(hc.target, hc.enabledCalls, repro)
+ _, disabled, bad := parseProgram(hc.target, hc.enabledCalls, repro)
if bad != nil || disabled {
log.Logf(0, "rejecting repro from hub (bad=%v, disabled=%v):\n%s",
bad, disabled, repro)
diff --git a/syz-manager/manager.go b/syz-manager/manager.go
index 3c09e9a0b..ca35d6660 100644
--- a/syz-manager/manager.go
+++ b/syz-manager/manager.go
@@ -5,6 +5,7 @@ package main
import (
"bytes"
+ "context"
"encoding/json"
"flag"
"fmt"
@@ -14,14 +15,13 @@ import (
"os"
"os/exec"
"path/filepath"
- "sort"
"sync"
"sync/atomic"
"time"
"github.com/google/syzkaller/dashboard/dashapi"
"github.com/google/syzkaller/pkg/asset"
- "github.com/google/syzkaller/pkg/cover"
+ "github.com/google/syzkaller/pkg/corpus"
"github.com/google/syzkaller/pkg/csource"
"github.com/google/syzkaller/pkg/db"
"github.com/google/syzkaller/pkg/gce"
@@ -35,7 +35,6 @@ import (
crash_pkg "github.com/google/syzkaller/pkg/report/crash"
"github.com/google/syzkaller/pkg/repro"
"github.com/google/syzkaller/pkg/rpctype"
- "github.com/google/syzkaller/pkg/signal"
"github.com/google/syzkaller/prog"
"github.com/google/syzkaller/sys/targets"
"github.com/google/syzkaller/vm"
@@ -55,7 +54,9 @@ type Manager struct {
reporter *report.Reporter
crashdir string
serv *RPCServer
+ corpus *corpus.Corpus
corpusDB *db.DB
+ corpusDBMu sync.Mutex // for concurrent operations on corpusDB
startTime time.Time
firstConnect time.Time
fuzzingTime time.Duration
@@ -75,7 +76,6 @@ type Manager struct {
candidates []rpctype.Candidate // untriaged inputs from corpus and hub
disabledHashes map[string]struct{}
- corpus map[string]CorpusItem
seeds [][]byte
newRepros [][]byte
lastMinCorpus int
@@ -100,29 +100,6 @@ type Manager struct {
assetStorage *asset.Storage
}
-type CorpusItemUpdate struct {
- CallID int
- RawCover []uint32
-}
-
-type CorpusItem struct {
- Call string
- Prog []byte
- HasAny bool // whether the prog contains squashed arguments
- Signal signal.Serial
- Cover []uint32
- Updates []CorpusItemUpdate
-}
-
-func (item *CorpusItem) RPCInput() rpctype.Input {
- return rpctype.Input{
- Call: item.Call,
- Prog: item.Prog,
- Signal: item.Signal,
- Cover: item.Cover,
- }
-}
-
const (
// Just started, nothing done yet.
phaseInit = iota
@@ -186,9 +163,11 @@ func RunManager(cfg *mgrconfig.Config) {
log.Fatalf("%v", err)
}
+ corpusUpdates := make(chan corpus.NewItemEvent, 32)
mgr := &Manager{
cfg: cfg,
vmPool: vmPool,
+ corpus: corpus.NewMonitoredCorpus(context.Background(), corpusUpdates),
target: cfg.Target,
sysTarget: cfg.SysTarget,
reporter: reporter,
@@ -196,7 +175,6 @@ func RunManager(cfg *mgrconfig.Config) {
startTime: time.Now(),
stats: &Stats{haveHub: cfg.HubClient != ""},
crashTypes: make(map[string]bool),
- corpus: make(map[string]CorpusItem),
disabledHashes: make(map[string]struct{}),
memoryLeakFrames: make(map[string]bool),
dataRaceFrames: make(map[string]bool),
@@ -213,6 +191,7 @@ func RunManager(cfg *mgrconfig.Config) {
mgr.initStats() // Initializes prometheus variables.
mgr.initHTTP() // Creates HTTP server.
mgr.collectUsedFiles()
+ go mgr.saveCorpus(corpusUpdates)
// Create RPC server for fuzzers.
mgr.serv, err = startRPCServer(mgr)
@@ -295,8 +274,9 @@ func (mgr *Manager) initBench() {
mgr.mu.Unlock()
continue
}
- mgr.minimizeCorpus()
- vals["corpus"] = uint64(len(mgr.corpus))
+ mgr.minimizeCorpusUnlocked()
+ stat := mgr.corpus.Stat()
+ vals["corpus"] = uint64(stat.Progs)
vals["uptime"] = uint64(time.Since(mgr.firstConnect)) / 1e9
vals["fuzzing"] = uint64(mgr.fuzzingTime) / 1e9
vals["candidates"] = uint64(len(mgr.candidates))
@@ -687,7 +667,7 @@ func (mgr *Manager) loadCorpus() {
}
func (mgr *Manager) loadProg(data []byte, minimized, smashed bool) bool {
- bad, disabled, _ := checkProgram(mgr.target, mgr.targetEnabledSyscalls, data)
+ _, disabled, bad := parseProgram(mgr.target, mgr.targetEnabledSyscalls, data)
if bad != nil {
return false
}
@@ -736,29 +716,27 @@ func programLeftover(target *prog.Target, enabled map[*prog.Syscall]bool, data [
return p.Serialize()
}
-// The linter complains about error not being the last argument.
-// nolint: stylecheck
-func checkProgram(target *prog.Target, enabled map[*prog.Syscall]bool, data []byte) (bad error, disabled, hasAny bool) {
- p, err := target.Deserialize(data, prog.NonStrict)
+func parseProgram(target *prog.Target, enabled map[*prog.Syscall]bool, data []byte) (
+ p *prog.Prog, disabled bool, err error) {
+ p, err = target.Deserialize(data, prog.NonStrict)
if err != nil {
- return err, true, false
+ return
}
if len(p.Calls) > prog.MaxCalls {
- return fmt.Errorf("longer than %d calls", prog.MaxCalls), true, false
+ return nil, false, fmt.Errorf("longer than %d calls", prog.MaxCalls)
}
// For some yet unknown reasons, programs with fail_nth > 0 may sneak in. Ignore them.
for _, call := range p.Calls {
if call.Props.FailNth > 0 {
- return fmt.Errorf("input has fail_nth > 0"), true, false
+ return nil, false, fmt.Errorf("input has fail_nth > 0")
}
}
- hasAny = p.ContainsAny()
for _, c := range p.Calls {
if !enabled[c.Meta] {
- return nil, true, hasAny
+ return p, true, nil
}
}
- return nil, false, hasAny
+ return p, false, nil
}
func (mgr *Manager) runInstance(index int) (*Crash, error) {
@@ -1224,13 +1202,29 @@ func fullReproLog(stats *repro.Stats) []byte {
stats.SimplifyProgTime, stats.ExtractCTime, stats.SimplifyCTime, stats.Log))
}
+func (mgr *Manager) saveCorpus(updates <-chan corpus.NewItemEvent) {
+ for update := range updates {
+ if update.Exists {
+ // We only save new progs into the corpus.db file.
+ continue
+ }
+ mgr.corpusDBMu.Lock()
+ mgr.corpusDB.Save(update.Sig, update.ProgData, 0)
+ if err := mgr.corpusDB.Flush(); err != nil {
+ log.Errorf("failed to save corpus database: %v", err)
+ }
+ mgr.corpusDBMu.Unlock()
+ }
+}
+
func (mgr *Manager) getMinimizedCorpus() (corpus, repros [][]byte) {
mgr.mu.Lock()
defer mgr.mu.Unlock()
- mgr.minimizeCorpus()
- corpus = make([][]byte, 0, len(mgr.corpus))
- for _, inp := range mgr.corpus {
- corpus = append(corpus, inp.Prog)
+ mgr.minimizeCorpusUnlocked()
+ items := mgr.corpus.Items()
+ corpus = make([][]byte, 0, len(items))
+ for _, inp := range items {
+ corpus = append(corpus, inp.ProgData)
}
repros = mgr.newRepros
mgr.newRepros = nil
@@ -1252,46 +1246,26 @@ func (mgr *Manager) addNewCandidates(candidates []rpctype.Candidate) {
}
}
-func (mgr *Manager) minimizeCorpus() {
- if mgr.phase < phaseLoadedCorpus || len(mgr.corpus) <= mgr.lastMinCorpus*103/100 {
+func (mgr *Manager) minimizeCorpusUnlocked() {
+ currSize := mgr.corpus.Stat().Progs
+ if mgr.phase < phaseLoadedCorpus || currSize <= mgr.lastMinCorpus*103/100 {
return
}
- inputs := make([]signal.Context, 0, len(mgr.corpus))
- for _, inp := range mgr.corpus {
- inputs = append(inputs, signal.Context{
- Signal: inp.Signal.Deserialize(),
- Context: inp,
- })
- }
-
- // Note: inputs are unsorted (based on map iteration).
- // This gives some intentional non-determinism during minimization.
- // However, we want to give preference to non-squashed inputs,
- // so let's sort by this criteria.
- sort.SliceStable(inputs, func(i, j int) bool {
- firstAny := inputs[i].Context.(CorpusItem).HasAny
- secondAny := inputs[j].Context.(CorpusItem).HasAny
- return !firstAny && secondAny
- })
+ mgr.corpus.Minimize(mgr.cfg.Cover)
+ newSize := mgr.corpus.Stat().Progs
- newCorpus := make(map[string]CorpusItem)
- for _, ctx := range signal.Minimize(inputs) {
- inp := ctx.(CorpusItem)
- newCorpus[hash.String(inp.Prog)] = inp
- }
- log.Logf(1, "minimized corpus: %v -> %v", len(mgr.corpus), len(newCorpus))
- mgr.corpus = newCorpus
- mgr.lastMinCorpus = len(newCorpus)
+ log.Logf(1, "minimized corpus: %v -> %v", currSize, newSize)
+ mgr.lastMinCorpus = newSize
// From time to time we get corpus explosion due to different reason:
// generic bugs, per-OS bugs, problems with fallback coverage, kcov bugs, etc.
// This has bad effect on the instance and especially on instances
// connected via hub. Do some per-syscall sanity checking to prevent this.
- for call, info := range mgr.collectSyscallInfoUnlocked() {
+ for call, info := range mgr.corpus.CallCover() {
if mgr.cfg.Cover {
// If we have less than 1K inputs per this call,
// accept all new inputs unconditionally.
- if info.count < 1000 {
+ if info.Count < 1000 {
continue
}
// If we have more than 3K already, don't accept any more.
@@ -1299,13 +1273,13 @@ func (mgr *Manager) minimizeCorpus() {
// Empirically, real coverage for the most saturated syscalls is ~30-60
// per program (even when we have a thousand of them). For explosion
// case coverage tend to be much lower (~0.3-5 per program).
- if info.count < 3000 && len(info.cov)/info.count >= 10 {
+ if info.Count < 3000 && len(info.Cover)/info.Count >= 10 {
continue
}
} else {
// If we don't have real coverage, signal is weak.
// If we have more than several hundreds, there is something wrong.
- if info.count < 300 {
+ if info.Count < 300 {
continue
}
}
@@ -1320,8 +1294,10 @@ func (mgr *Manager) minimizeCorpus() {
if mgr.phase < phaseTriagedCorpus {
return
}
+ mgr.corpusDBMu.Lock()
+ defer mgr.corpusDBMu.Unlock()
for key := range mgr.corpusDB.Records {
- _, ok1 := mgr.corpus[key]
+ ok1 := mgr.corpus.Item(key) != nil
_, ok2 := mgr.disabledHashes[key]
if !ok1 && !ok2 {
mgr.corpusDB.Delete(key)
@@ -1336,32 +1312,21 @@ func setGuiltyFiles(crash *dashapi.Crash, report *report.Report) {
}
}
-type CallCov struct {
- count int
- cov cover.Cover
-}
-
-func (mgr *Manager) collectSyscallInfo() map[string]*CallCov {
+func (mgr *Manager) collectSyscallInfo() map[string]*corpus.CallCov {
mgr.mu.Lock()
- defer mgr.mu.Unlock()
- return mgr.collectSyscallInfoUnlocked()
-}
+ checkResult := mgr.checkResult
+ mgr.mu.Unlock()
-func (mgr *Manager) collectSyscallInfoUnlocked() map[string]*CallCov {
- if mgr.checkResult == nil {
+ if checkResult == nil {
return nil
}
- calls := make(map[string]*CallCov)
- for _, call := range mgr.checkResult.EnabledCalls[mgr.cfg.Sandbox] {
- calls[mgr.target.Syscalls[call].Name] = new(CallCov)
- }
- for _, inp := range mgr.corpus {
- if calls[inp.Call] == nil {
- calls[inp.Call] = new(CallCov)
+ calls := mgr.corpus.CallCover()
+ // Add enabled, but not yet covered calls.
+ for _, call := range checkResult.EnabledCalls[mgr.cfg.Sandbox] {
+ key := mgr.target.Syscalls[call].Name
+ if calls[key] == nil {
+ calls[key] = new(corpus.CallCov)
}
- cc := calls[inp.Call]
- cc.count++
- cc.cov.Merge(inp.Cover)
}
return calls
}
@@ -1371,10 +1336,11 @@ func (mgr *Manager) fuzzerConnect(modules []host.KernelModule) (
mgr.mu.Lock()
defer mgr.mu.Unlock()
- mgr.minimizeCorpus()
- corpus := make([]rpctype.Input, 0, len(mgr.corpus))
- for _, inp := range mgr.corpus {
- corpus = append(corpus, inp.RPCInput())
+ mgr.minimizeCorpusUnlocked()
+ items := mgr.corpus.Items()
+ corpus := make([]rpctype.Input, 0, len(items))
+ for _, inp := range items {
+ corpus = append(corpus, inp.RPCInputShort())
}
frames := BugFrames{
memoryLeaks: make([]string, 0, len(mgr.memoryLeakFrames)),
@@ -1408,45 +1374,14 @@ func (mgr *Manager) machineChecked(a *rpctype.CheckArgs, enabledSyscalls map[*pr
mgr.firstConnect = time.Now()
}
-func (mgr *Manager) newInput(inp rpctype.Input, sign signal.Signal, hasAny bool) bool {
+func (mgr *Manager) newInput(inp corpus.NewInput) bool {
mgr.mu.Lock()
defer mgr.mu.Unlock()
- if mgr.saturatedCalls[inp.Call] {
+ if mgr.saturatedCalls[inp.StringCall()] {
+ // TODO: move this logic to pkg/corpus or pkg/fuzzer?
return false
}
- update := CorpusItemUpdate{
- CallID: inp.CallID,
- RawCover: inp.RawCover,
- }
- sig := hash.String(inp.Prog)
- if old, ok := mgr.corpus[sig]; ok {
- // The input is already present, but possibly with diffent signal/coverage/call.
- sign.Merge(old.Signal.Deserialize())
- old.Signal = sign.Serialize()
- var cov cover.Cover
- cov.Merge(old.Cover)
- cov.Merge(inp.Cover)
- old.Cover = cov.Serialize()
- const maxUpdates = 32
- old.Updates = append(old.Updates, update)
- if len(old.Updates) > maxUpdates {
- old.Updates = old.Updates[:maxUpdates]
- }
- mgr.corpus[sig] = old
- } else {
- mgr.corpus[sig] = CorpusItem{
- Call: inp.Call,
- Prog: inp.Prog,
- HasAny: hasAny,
- Signal: inp.Signal,
- Cover: inp.Cover,
- Updates: []CorpusItemUpdate{update},
- }
- mgr.corpusDB.Save(sig, inp.Prog, 0)
- if err := mgr.corpusDB.Flush(); err != nil {
- log.Errorf("failed to save corpus database: %v", err)
- }
- }
+ mgr.corpus.Save(inp)
return true
}
@@ -1553,11 +1488,12 @@ func (mgr *Manager) dashboardReporter() {
crashes := mgr.stats.crashes.get()
suppressedCrashes := mgr.stats.crashSuppressed.get()
execs := mgr.stats.execTotal.get()
+ corpusStat := mgr.corpus.Stat()
req := &dashapi.ManagerStatsReq{
Name: mgr.cfg.Name,
Addr: webAddr,
UpTime: time.Since(mgr.firstConnect),
- Corpus: uint64(len(mgr.corpus)),
+ Corpus: uint64(corpusStat.Progs),
PCs: mgr.stats.corpusCover.get(),
Cover: mgr.stats.corpusSignal.get(),
CrashTypes: mgr.stats.crashTypes.get(),
diff --git a/syz-manager/rpc.go b/syz-manager/rpc.go
index dfa050334..0fdaf3b46 100644
--- a/syz-manager/rpc.go
+++ b/syz-manager/rpc.go
@@ -10,6 +10,7 @@ import (
"sync"
"time"
+ "github.com/google/syzkaller/pkg/corpus"
"github.com/google/syzkaller/pkg/cover"
"github.com/google/syzkaller/pkg/host"
"github.com/google/syzkaller/pkg/log"
@@ -30,9 +31,13 @@ type RPCServer struct {
batchSize int
canonicalModules *cover.Canonicalizer
- mu sync.Mutex
- fuzzers map[string]*Fuzzer
- checkResult *rpctype.CheckArgs
+ mu sync.Mutex
+ fuzzers map[string]*Fuzzer
+ checkResult *rpctype.CheckArgs
+
+ // TODO: we don't really need these anymore, but there's not much sense
+ // in rewriting the code that uses them -- most of that code will be dropped
+ // once we move pkg/fuzzer to the host.
maxSignal signal.Signal
corpusSignal signal.Signal
corpusCover cover.Cover
@@ -61,7 +66,7 @@ type RPCManagerView interface {
fuzzerConnect([]host.KernelModule) (
[]rpctype.Input, BugFrames, map[uint32]uint32, map[uint32]uint32, error)
machineChecked(result *rpctype.CheckArgs, enabledSyscalls map[*prog.Syscall]bool)
- newInput(inp rpctype.Input, sign signal.Signal, hasAny bool) bool
+ newInput(inp corpus.NewInput) bool
candidateBatch(size int) []rpctype.Candidate
rotateCorpus() bool
}
@@ -256,7 +261,7 @@ func (serv *RPCServer) Check(a *rpctype.CheckArgs, r *int) error {
}
func (serv *RPCServer) NewInput(a *rpctype.NewInputArgs, r *int) error {
- bad, disabled, hasAny := checkProgram(serv.cfg.Target, serv.targetEnabledSyscalls, a.Input.Prog)
+ p, disabled, bad := parseProgram(serv.cfg.Target, serv.targetEnabledSyscalls, a.Input.Prog)
if bad != nil || disabled {
log.Errorf("rejecting program from fuzzer (bad=%v, disabled=%v):\n%s", bad, disabled, a.Input.Prog)
return nil
@@ -269,8 +274,16 @@ func (serv *RPCServer) NewInput(a *rpctype.NewInputArgs, r *int) error {
a.Cover, a.Signal = f.instModules.Canonicalize(a.Cover, a.Signal)
}
inputSignal := a.Signal.Deserialize()
+
+ inp := corpus.NewInput{
+ Prog: p,
+ Call: a.Call,
+ Signal: inputSignal,
+ Cover: a.Cover,
+ }
+
log.Logf(4, "new input from %v for syscall %v (signal=%v, cover=%v)",
- a.Name, a.Call, inputSignal.Len(), len(a.Cover))
+ a.Name, inp.StringCall(), inputSignal.Len(), len(a.Cover))
// Note: f may be nil if we called shutdownInstance,
// but this request is already in-flight.
genuine := !serv.corpusSignal.Diff(inputSignal).Empty()
@@ -281,7 +294,7 @@ func (serv *RPCServer) NewInput(a *rpctype.NewInputArgs, r *int) error {
if !genuine && !rotated {
return nil
}
- if !serv.mgr.newInput(a.Input, inputSignal, hasAny) {
+ if !serv.mgr.newInput(inp) {
return nil
}