aboutsummaryrefslogtreecommitdiffstats
path: root/prog
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2015-10-14 16:55:09 +0200
committerDmitry Vyukov <dvyukov@google.com>2015-10-14 16:55:09 +0200
commitc9b915608d0ae354e5666093c6e6bb7b11d5ce77 (patch)
treeab40104cd3ab597ea32f8f92d632ee51e35640ee /prog
parent38493312da49f7b5b2d6d2c27f0841c3f0a50302 (diff)
initial support for call priorities
Diffstat (limited to 'prog')
-rw-r--r--prog/analysis.go27
-rw-r--r--prog/generation.go8
-rw-r--r--prog/mutation.go12
-rw-r--r--prog/prio.go274
-rw-r--r--prog/rand.go29
5 files changed, 314 insertions, 36 deletions
diff --git a/prog/analysis.go b/prog/analysis.go
index a0b411c89..43d0aa076 100644
--- a/prog/analysis.go
+++ b/prog/analysis.go
@@ -19,16 +19,16 @@ const (
)
type state struct {
- enabledCalls []*sys.Call
- files map[string]bool
- resources map[sys.ResourceKind]map[sys.ResourceSubkind][]*Arg
- strings map[string]bool
- pages [maxPages]bool
+ ct *ChoiceTable
+ files map[string]bool
+ resources map[sys.ResourceKind]map[sys.ResourceSubkind][]*Arg
+ strings map[string]bool
+ pages [maxPages]bool
}
// analyze analyzes the program p up to but not including call c.
-func analyze(enabledCalls []*sys.Call, p *Prog, c *Call) *state {
- s := newState(enabledCalls)
+func analyze(ct *ChoiceTable, p *Prog, c *Call) *state {
+ s := newState(ct)
for _, c1 := range p.Calls {
if c1 == c {
break
@@ -38,15 +38,12 @@ func analyze(enabledCalls []*sys.Call, p *Prog, c *Call) *state {
return s
}
-func newState(enabledCalls []*sys.Call) *state {
+func newState(ct *ChoiceTable) *state {
s := &state{
- enabledCalls: enabledCalls,
- files: make(map[string]bool),
- resources: make(map[sys.ResourceKind]map[sys.ResourceSubkind][]*Arg),
- strings: make(map[string]bool),
- }
- if len(s.enabledCalls) == 0 {
- s.enabledCalls = sys.Calls
+ ct: ct,
+ files: make(map[string]bool),
+ resources: make(map[sys.ResourceKind]map[sys.ResourceSubkind][]*Arg),
+ strings: make(map[string]bool),
}
return s
}
diff --git a/prog/generation.go b/prog/generation.go
index 43e527aa0..b08e98d31 100644
--- a/prog/generation.go
+++ b/prog/generation.go
@@ -5,18 +5,16 @@ package prog
import (
"math/rand"
-
- "github.com/google/syzkaller/sys"
)
// Generate generates a random program of length ~ncalls.
// calls is a set of allowed syscalls, if nil all syscalls are used.
-func Generate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) *Prog {
+func Generate(rs rand.Source, ncalls int, ct *ChoiceTable) *Prog {
p := new(Prog)
r := newRand(rs)
- s := newState(enabledCalls)
+ s := newState(ct)
for len(p.Calls) < ncalls {
- calls := r.generateCall(s)
+ calls := r.generateCall(s, p)
for _, c := range calls {
s.analyze(c)
p.Calls = append(p.Calls, c)
diff --git a/prog/mutation.go b/prog/mutation.go
index ffd093d7e..39b011cf1 100644
--- a/prog/mutation.go
+++ b/prog/mutation.go
@@ -10,7 +10,7 @@ import (
"github.com/google/syzkaller/sys"
)
-func (p *Prog) Mutate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) {
+func (p *Prog) Mutate(rs rand.Source, ncalls int, ct *ChoiceTable) {
r := newRand(rs)
for stop := false; !stop; stop = r.bin() {
r.choose(
@@ -24,8 +24,8 @@ func (p *Prog) Mutate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) {
if idx < len(p.Calls) {
c = p.Calls[idx]
}
- s := analyze(enabledCalls, p, c)
- calls := r.generateCall(s)
+ s := analyze(ct, p, c)
+ calls := r.generateCall(s, p)
p.insertBefore(c, calls)
},
10, func() {
@@ -37,7 +37,7 @@ func (p *Prog) Mutate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) {
if len(c.Args) == 0 {
return
}
- s := analyze(enabledCalls, p, c)
+ s := analyze(ct, p, c)
for stop := false; !stop; stop = r.bin() {
args, bases, parents := mutationArgs(c)
idx := r.Intn(len(args))
@@ -120,7 +120,7 @@ func (p *Prog) Mutate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) {
for _, arg := range referencedArgs(removed, nil) {
c1 := arg.Call
- s := analyze(enabledCalls, p, c1)
+ s := analyze(ct, p, c1)
arg1, _, calls1 := r.generateArg(s, arg.Type, arg.Dir, nil)
replaceArg(p, arg, arg1, calls1)
}
@@ -203,7 +203,7 @@ func (p *Prog) Mutate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) {
for _, arg := range referencedArgs(c.Args, c.Ret) {
c1 := arg.Call
- s := analyze(enabledCalls, p, c1)
+ s := analyze(ct, p, c1)
arg1, _, calls1 := r.generateArg(s, arg.Type, arg.Dir, nil)
replaceArg(p, arg, arg1, calls1)
}
diff --git a/prog/prio.go b/prog/prio.go
new file mode 100644
index 000000000..e8de453f4
--- /dev/null
+++ b/prog/prio.go
@@ -0,0 +1,274 @@
+// Copyright 2015 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package prog
+
+import (
+ "fmt"
+ "math/rand"
+ "sort"
+ "sync"
+
+ "github.com/google/syzkaller/sys"
+)
+
+// Calulation of call-to-call priorities.
+// For a given pair of calls X and Y, the priority is our guess as to whether
+// additional of call Y into a program containing call X is likely to give
+// new coverage or not.
+// The current algorithm has two components: static and dynamic.
+// The static component is based on analysis of argument types. For example,
+// if call X and call Y both accept fd[sock], then they are more likely to give
+// new coverage together.
+// The dynamic component is based on frequency of occurrence of a particular
+// pair of syscalls in a single program in corpus. For example, if socket and
+// connect frequently occur in programs together, we give higher priority to
+// this pair of syscalls.
+// Note: the current implementation is very basic, there is no theory behind any
+// constants.
+
+func CalculatePriorities(corpus []*Prog) [][]float32 {
+ static := getStaticPrio()
+ dynamic := calcDynamicPrio(corpus)
+ for i, prios := range static {
+ for j, p := range prios {
+ dynamic[i][j] *= p
+ }
+ }
+ return dynamic
+}
+
+var (
+ staticOnce sync.Once
+ staticPrio [][]float32
+)
+
+func getStaticPrio() [][]float32 {
+ staticOnce.Do(func() {
+ staticPrio = calcStaticPriorities()
+ })
+ return staticPrio
+}
+
+func calcStaticPriorities() [][]float32 {
+ uses := make(map[string]map[int]float32)
+ for _, c := range sys.Calls {
+ noteUsage := func(weight float32, str string, args ...interface{}) {
+ id := fmt.Sprintf(str, args...)
+ if uses[id] == nil {
+ uses[id] = make(map[int]float32)
+ }
+ old := uses[id][c.ID]
+ if weight > old {
+ uses[id][c.ID] = weight
+ }
+ }
+ foreachArgType(c, func(t sys.Type, d ArgDir) {
+ switch a := t.(type) {
+ case sys.ResourceType:
+ if a.Kind == sys.ResPid || a.Kind == sys.ResUid || a.Kind == sys.ResGid {
+ // Pid/uid/gid usually play auxiliary role,
+ // but massively happen in some structs.
+ noteUsage(0.1, "res%v", a.Kind)
+ } else if a.Subkind == sys.ResAny {
+ noteUsage(1.0, "res%v", a.Kind)
+ } else {
+ noteUsage(0.2, "res%v", a.Kind)
+ noteUsage(1.0, "res%v-%v", a.Kind, a.Subkind)
+ }
+ case sys.PtrType:
+ if _, ok := a.Type.(sys.StructType); ok {
+ noteUsage(1.0, "ptrto-%v", a.Type.Name())
+ }
+ case sys.BufferType:
+ switch a.Kind {
+ case sys.BufferBlob:
+ case sys.BufferString:
+ noteUsage(0.2, "str")
+ case sys.BufferSockaddr:
+ noteUsage(1.0, "sockaddr")
+ default:
+ panic("unknown buffer kind")
+ }
+ case sys.VmaType:
+ noteUsage(0.5, "vma")
+ case sys.FilenameType:
+ noteUsage(1.0, "filename")
+ case sys.IntType:
+ switch a.Kind {
+ case sys.IntPlain:
+ case sys.IntSignalno:
+ noteUsage(1.0, "signalno")
+ case sys.IntInaddr:
+ noteUsage(1.0, "inaddr")
+ default:
+ panic("unknown int kind")
+ }
+ }
+ })
+ }
+ prios := make([][]float32, len(sys.Calls))
+ for i := range prios {
+ prios[i] = make([]float32, len(sys.Calls))
+ }
+ for _, calls := range uses {
+ for c0, w0 := range calls {
+ for c1, w1 := range calls {
+ if c0 == c1 {
+ // Self-priority is assigned below.
+ continue
+ }
+ prios[c0][c1] += w0 * w1
+ }
+ }
+ }
+ // Self-priority (call wrt itself) is assigned to the maximum priority
+ // this call has wrt other calls. This way the priority is high, but not too high.
+ for c0, pp := range prios {
+ var max float32
+ for _, p := range pp {
+ if max < p {
+ max = p
+ }
+ }
+ pp[c0] = max
+ }
+ normalizePrio(prios)
+ return prios
+}
+
+func calcDynamicPrio(corpus []*Prog) [][]float32 {
+ prios := make([][]float32, len(sys.Calls))
+ for i := range prios {
+ prios[i] = make([]float32, len(sys.Calls))
+ }
+ for _, p := range corpus {
+ for i0 := 0; i0 < len(p.Calls); i0++ {
+ for i1 := 0; i1 < len(p.Calls); i1++ {
+ if i0 == i1 {
+ continue
+ }
+ prios[i0][i1] += 1.0
+ }
+ }
+ }
+ normalizePrio(prios)
+ return prios
+}
+
+// normalizePrio assigns some minimal priorities to calls with zero priority,
+// and then normalizes priorities to 0.1..1 range.
+func normalizePrio(prios [][]float32) {
+ for _, prio := range prios {
+ max := float32(0)
+ min := float32(1e10)
+ nzero := 0
+ for _, p := range prio {
+ if max < p {
+ max = p
+ }
+ if p != 0 && min > p {
+ min = p
+ }
+ if p == 0 {
+ nzero++
+ }
+ }
+ if nzero != 0 {
+ min /= 2 * float32(nzero)
+ }
+ for i, p := range prio {
+ if max == 0 {
+ prio[i] = 1
+ continue
+ }
+ if p == 0 {
+ p = min
+ }
+ p = (p-min)/(max-min)*0.9 + 0.1
+ if p > 1 {
+ p = 1
+ }
+ prio[i] = p
+ }
+ }
+}
+
+func foreachArgType(meta *sys.Call, f func(sys.Type, ArgDir)) {
+ var rec func(t sys.Type, dir ArgDir)
+ rec = func(t sys.Type, d ArgDir) {
+ f(t, d)
+ switch a := t.(type) {
+ case sys.ArrayType:
+ rec(a.Type, d)
+ case sys.PtrType:
+ rec(a.Type, ArgDir(a.Dir))
+ case sys.StructType:
+ for _, f := range a.Fields {
+ rec(f, d)
+ }
+ case sys.ResourceType, sys.FileoffType, sys.BufferType,
+ sys.VmaType, sys.LenType, sys.FlagsType, sys.IntType, sys.FilenameType:
+ default:
+ panic("unknown type")
+ }
+ }
+ for _, t := range meta.Args {
+ rec(t, DirIn)
+ }
+ if meta.Ret != nil {
+ rec(meta.Ret, DirOut)
+ }
+}
+
+// ChooseTable allows to do a weighted choice of a syscall for a given syscall
+// based on call-to-call priorities and a set of enabled syscalls.
+type ChoiceTable struct {
+ run [][]int
+}
+
+func BuildChoiceTable(prios [][]float32, enabledCalls []*sys.Call) *ChoiceTable {
+ enabled := make(map[int]bool)
+ for _, c := range enabledCalls {
+ enabled[c.ID] = true
+ }
+ run := make([][]int, len(sys.Calls))
+ for i := range run {
+ if !enabled[i] {
+ continue
+ }
+ run[i] = make([]int, len(sys.Calls))
+ sum := 0
+ for j := range run[i] {
+ if enabled[j] {
+ sum += int(prios[i][j] * 1000)
+ }
+
+ run[i][j] = sum
+ }
+ }
+ return &ChoiceTable{run}
+}
+
+func (ct *ChoiceTable) Choose(r *rand.Rand, call int) int {
+ if ct == nil || call < 0 {
+ return r.Intn(len(sys.Calls))
+ }
+ run := ct.run[call]
+ if run == nil {
+ return r.Intn(len(sys.Calls))
+ }
+ for {
+ x := r.Intn(run[len(run)-1])
+ i := sort.SearchInts(run, x)
+ p := 0
+ if i > 0 {
+ p = ct.run[call][i-1]
+ }
+ if ct.run[call][i] == p {
+ // Call with weight 0, retry.
+ continue
+ }
+ return i
+ }
+}
diff --git a/prog/rand.go b/prog/rand.go
index 82259a04a..498807afe 100644
--- a/prog/rand.go
+++ b/prog/rand.go
@@ -416,7 +416,10 @@ func (r *randGen) createResource(s *state, res sys.ResourceType) (arg *Arg, call
}
return false
}
- for _, meta := range s.enabledCalls {
+ for i, meta := range sys.Calls {
+ if s.ct == nil || s.ct.run[i] == nil {
+ continue
+ }
ok := false
for _, arg := range meta.Args {
if checkArg(arg, DirIn) {
@@ -432,12 +435,7 @@ func (r *randGen) createResource(s *state, res sys.ResourceType) (arg *Arg, call
}
}
if len(metas) == 0 {
- if len(s.enabledCalls) != len(sys.Calls) {
- // We used only a subset of all syscalls,
- // so we legitimately may not be able to create the resource.
- return constArg(res.Default()), nil
- }
- panic(fmt.Sprintf("can't create resource %v/%v", res.Kind, sk))
+ return constArg(res.Default()), nil
}
// Now we have a set of candidate calls that can create the necessary resource.
@@ -446,7 +444,7 @@ func (r *randGen) createResource(s *state, res sys.ResourceType) (arg *Arg, call
meta := metas[r.Intn(len(metas))]
calls := r.generateParticularCall(s, meta)
assignTypeAndDir(calls[len(calls)-1])
- s1 := newState(s.enabledCalls)
+ s1 := newState(s.ct)
s1.analyze(calls[len(calls)-1])
// Now see if we have what we want.
var allres []*Arg
@@ -504,8 +502,19 @@ func (r *randGen) choose(args ...interface{}) {
panic("choose is broken")
}
-func (r *randGen) generateCall(s *state) []*Call {
- meta := s.enabledCalls[r.rand(len(s.enabledCalls))]
+func (r *randGen) generateCall(s *state, p *Prog) []*Call {
+ call := -1
+ if len(p.Calls) != 0 {
+ for i := 0; i < 5; i++ {
+ c := p.Calls[r.Intn(len(p.Calls))].Meta
+ call = c.ID
+ // There is roughly half of mmap's so ignore them.
+ if c.Name != "mmap" {
+ break
+ }
+ }
+ }
+ meta := sys.Calls[s.ct.Choose(r.Rand, call)]
return r.generateParticularCall(s, meta)
}