diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2015-10-14 16:55:09 +0200 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2015-10-14 16:55:09 +0200 |
| commit | c9b915608d0ae354e5666093c6e6bb7b11d5ce77 (patch) | |
| tree | ab40104cd3ab597ea32f8f92d632ee51e35640ee /prog | |
| parent | 38493312da49f7b5b2d6d2c27f0841c3f0a50302 (diff) | |
initial support for call priorities
Diffstat (limited to 'prog')
| -rw-r--r-- | prog/analysis.go | 27 | ||||
| -rw-r--r-- | prog/generation.go | 8 | ||||
| -rw-r--r-- | prog/mutation.go | 12 | ||||
| -rw-r--r-- | prog/prio.go | 274 | ||||
| -rw-r--r-- | prog/rand.go | 29 |
5 files changed, 314 insertions, 36 deletions
diff --git a/prog/analysis.go b/prog/analysis.go index a0b411c89..43d0aa076 100644 --- a/prog/analysis.go +++ b/prog/analysis.go @@ -19,16 +19,16 @@ const ( ) type state struct { - enabledCalls []*sys.Call - files map[string]bool - resources map[sys.ResourceKind]map[sys.ResourceSubkind][]*Arg - strings map[string]bool - pages [maxPages]bool + ct *ChoiceTable + files map[string]bool + resources map[sys.ResourceKind]map[sys.ResourceSubkind][]*Arg + strings map[string]bool + pages [maxPages]bool } // analyze analyzes the program p up to but not including call c. -func analyze(enabledCalls []*sys.Call, p *Prog, c *Call) *state { - s := newState(enabledCalls) +func analyze(ct *ChoiceTable, p *Prog, c *Call) *state { + s := newState(ct) for _, c1 := range p.Calls { if c1 == c { break @@ -38,15 +38,12 @@ func analyze(enabledCalls []*sys.Call, p *Prog, c *Call) *state { return s } -func newState(enabledCalls []*sys.Call) *state { +func newState(ct *ChoiceTable) *state { s := &state{ - enabledCalls: enabledCalls, - files: make(map[string]bool), - resources: make(map[sys.ResourceKind]map[sys.ResourceSubkind][]*Arg), - strings: make(map[string]bool), - } - if len(s.enabledCalls) == 0 { - s.enabledCalls = sys.Calls + ct: ct, + files: make(map[string]bool), + resources: make(map[sys.ResourceKind]map[sys.ResourceSubkind][]*Arg), + strings: make(map[string]bool), } return s } diff --git a/prog/generation.go b/prog/generation.go index 43e527aa0..b08e98d31 100644 --- a/prog/generation.go +++ b/prog/generation.go @@ -5,18 +5,16 @@ package prog import ( "math/rand" - - "github.com/google/syzkaller/sys" ) // Generate generates a random program of length ~ncalls. // calls is a set of allowed syscalls, if nil all syscalls are used. -func Generate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) *Prog { +func Generate(rs rand.Source, ncalls int, ct *ChoiceTable) *Prog { p := new(Prog) r := newRand(rs) - s := newState(enabledCalls) + s := newState(ct) for len(p.Calls) < ncalls { - calls := r.generateCall(s) + calls := r.generateCall(s, p) for _, c := range calls { s.analyze(c) p.Calls = append(p.Calls, c) diff --git a/prog/mutation.go b/prog/mutation.go index ffd093d7e..39b011cf1 100644 --- a/prog/mutation.go +++ b/prog/mutation.go @@ -10,7 +10,7 @@ import ( "github.com/google/syzkaller/sys" ) -func (p *Prog) Mutate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) { +func (p *Prog) Mutate(rs rand.Source, ncalls int, ct *ChoiceTable) { r := newRand(rs) for stop := false; !stop; stop = r.bin() { r.choose( @@ -24,8 +24,8 @@ func (p *Prog) Mutate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) { if idx < len(p.Calls) { c = p.Calls[idx] } - s := analyze(enabledCalls, p, c) - calls := r.generateCall(s) + s := analyze(ct, p, c) + calls := r.generateCall(s, p) p.insertBefore(c, calls) }, 10, func() { @@ -37,7 +37,7 @@ func (p *Prog) Mutate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) { if len(c.Args) == 0 { return } - s := analyze(enabledCalls, p, c) + s := analyze(ct, p, c) for stop := false; !stop; stop = r.bin() { args, bases, parents := mutationArgs(c) idx := r.Intn(len(args)) @@ -120,7 +120,7 @@ func (p *Prog) Mutate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) { for _, arg := range referencedArgs(removed, nil) { c1 := arg.Call - s := analyze(enabledCalls, p, c1) + s := analyze(ct, p, c1) arg1, _, calls1 := r.generateArg(s, arg.Type, arg.Dir, nil) replaceArg(p, arg, arg1, calls1) } @@ -203,7 +203,7 @@ func (p *Prog) Mutate(rs rand.Source, ncalls int, enabledCalls []*sys.Call) { for _, arg := range referencedArgs(c.Args, c.Ret) { c1 := arg.Call - s := analyze(enabledCalls, p, c1) + s := analyze(ct, p, c1) arg1, _, calls1 := r.generateArg(s, arg.Type, arg.Dir, nil) replaceArg(p, arg, arg1, calls1) } diff --git a/prog/prio.go b/prog/prio.go new file mode 100644 index 000000000..e8de453f4 --- /dev/null +++ b/prog/prio.go @@ -0,0 +1,274 @@ +// Copyright 2015 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package prog + +import ( + "fmt" + "math/rand" + "sort" + "sync" + + "github.com/google/syzkaller/sys" +) + +// Calulation of call-to-call priorities. +// For a given pair of calls X and Y, the priority is our guess as to whether +// additional of call Y into a program containing call X is likely to give +// new coverage or not. +// The current algorithm has two components: static and dynamic. +// The static component is based on analysis of argument types. For example, +// if call X and call Y both accept fd[sock], then they are more likely to give +// new coverage together. +// The dynamic component is based on frequency of occurrence of a particular +// pair of syscalls in a single program in corpus. For example, if socket and +// connect frequently occur in programs together, we give higher priority to +// this pair of syscalls. +// Note: the current implementation is very basic, there is no theory behind any +// constants. + +func CalculatePriorities(corpus []*Prog) [][]float32 { + static := getStaticPrio() + dynamic := calcDynamicPrio(corpus) + for i, prios := range static { + for j, p := range prios { + dynamic[i][j] *= p + } + } + return dynamic +} + +var ( + staticOnce sync.Once + staticPrio [][]float32 +) + +func getStaticPrio() [][]float32 { + staticOnce.Do(func() { + staticPrio = calcStaticPriorities() + }) + return staticPrio +} + +func calcStaticPriorities() [][]float32 { + uses := make(map[string]map[int]float32) + for _, c := range sys.Calls { + noteUsage := func(weight float32, str string, args ...interface{}) { + id := fmt.Sprintf(str, args...) + if uses[id] == nil { + uses[id] = make(map[int]float32) + } + old := uses[id][c.ID] + if weight > old { + uses[id][c.ID] = weight + } + } + foreachArgType(c, func(t sys.Type, d ArgDir) { + switch a := t.(type) { + case sys.ResourceType: + if a.Kind == sys.ResPid || a.Kind == sys.ResUid || a.Kind == sys.ResGid { + // Pid/uid/gid usually play auxiliary role, + // but massively happen in some structs. + noteUsage(0.1, "res%v", a.Kind) + } else if a.Subkind == sys.ResAny { + noteUsage(1.0, "res%v", a.Kind) + } else { + noteUsage(0.2, "res%v", a.Kind) + noteUsage(1.0, "res%v-%v", a.Kind, a.Subkind) + } + case sys.PtrType: + if _, ok := a.Type.(sys.StructType); ok { + noteUsage(1.0, "ptrto-%v", a.Type.Name()) + } + case sys.BufferType: + switch a.Kind { + case sys.BufferBlob: + case sys.BufferString: + noteUsage(0.2, "str") + case sys.BufferSockaddr: + noteUsage(1.0, "sockaddr") + default: + panic("unknown buffer kind") + } + case sys.VmaType: + noteUsage(0.5, "vma") + case sys.FilenameType: + noteUsage(1.0, "filename") + case sys.IntType: + switch a.Kind { + case sys.IntPlain: + case sys.IntSignalno: + noteUsage(1.0, "signalno") + case sys.IntInaddr: + noteUsage(1.0, "inaddr") + default: + panic("unknown int kind") + } + } + }) + } + prios := make([][]float32, len(sys.Calls)) + for i := range prios { + prios[i] = make([]float32, len(sys.Calls)) + } + for _, calls := range uses { + for c0, w0 := range calls { + for c1, w1 := range calls { + if c0 == c1 { + // Self-priority is assigned below. + continue + } + prios[c0][c1] += w0 * w1 + } + } + } + // Self-priority (call wrt itself) is assigned to the maximum priority + // this call has wrt other calls. This way the priority is high, but not too high. + for c0, pp := range prios { + var max float32 + for _, p := range pp { + if max < p { + max = p + } + } + pp[c0] = max + } + normalizePrio(prios) + return prios +} + +func calcDynamicPrio(corpus []*Prog) [][]float32 { + prios := make([][]float32, len(sys.Calls)) + for i := range prios { + prios[i] = make([]float32, len(sys.Calls)) + } + for _, p := range corpus { + for i0 := 0; i0 < len(p.Calls); i0++ { + for i1 := 0; i1 < len(p.Calls); i1++ { + if i0 == i1 { + continue + } + prios[i0][i1] += 1.0 + } + } + } + normalizePrio(prios) + return prios +} + +// normalizePrio assigns some minimal priorities to calls with zero priority, +// and then normalizes priorities to 0.1..1 range. +func normalizePrio(prios [][]float32) { + for _, prio := range prios { + max := float32(0) + min := float32(1e10) + nzero := 0 + for _, p := range prio { + if max < p { + max = p + } + if p != 0 && min > p { + min = p + } + if p == 0 { + nzero++ + } + } + if nzero != 0 { + min /= 2 * float32(nzero) + } + for i, p := range prio { + if max == 0 { + prio[i] = 1 + continue + } + if p == 0 { + p = min + } + p = (p-min)/(max-min)*0.9 + 0.1 + if p > 1 { + p = 1 + } + prio[i] = p + } + } +} + +func foreachArgType(meta *sys.Call, f func(sys.Type, ArgDir)) { + var rec func(t sys.Type, dir ArgDir) + rec = func(t sys.Type, d ArgDir) { + f(t, d) + switch a := t.(type) { + case sys.ArrayType: + rec(a.Type, d) + case sys.PtrType: + rec(a.Type, ArgDir(a.Dir)) + case sys.StructType: + for _, f := range a.Fields { + rec(f, d) + } + case sys.ResourceType, sys.FileoffType, sys.BufferType, + sys.VmaType, sys.LenType, sys.FlagsType, sys.IntType, sys.FilenameType: + default: + panic("unknown type") + } + } + for _, t := range meta.Args { + rec(t, DirIn) + } + if meta.Ret != nil { + rec(meta.Ret, DirOut) + } +} + +// ChooseTable allows to do a weighted choice of a syscall for a given syscall +// based on call-to-call priorities and a set of enabled syscalls. +type ChoiceTable struct { + run [][]int +} + +func BuildChoiceTable(prios [][]float32, enabledCalls []*sys.Call) *ChoiceTable { + enabled := make(map[int]bool) + for _, c := range enabledCalls { + enabled[c.ID] = true + } + run := make([][]int, len(sys.Calls)) + for i := range run { + if !enabled[i] { + continue + } + run[i] = make([]int, len(sys.Calls)) + sum := 0 + for j := range run[i] { + if enabled[j] { + sum += int(prios[i][j] * 1000) + } + + run[i][j] = sum + } + } + return &ChoiceTable{run} +} + +func (ct *ChoiceTable) Choose(r *rand.Rand, call int) int { + if ct == nil || call < 0 { + return r.Intn(len(sys.Calls)) + } + run := ct.run[call] + if run == nil { + return r.Intn(len(sys.Calls)) + } + for { + x := r.Intn(run[len(run)-1]) + i := sort.SearchInts(run, x) + p := 0 + if i > 0 { + p = ct.run[call][i-1] + } + if ct.run[call][i] == p { + // Call with weight 0, retry. + continue + } + return i + } +} diff --git a/prog/rand.go b/prog/rand.go index 82259a04a..498807afe 100644 --- a/prog/rand.go +++ b/prog/rand.go @@ -416,7 +416,10 @@ func (r *randGen) createResource(s *state, res sys.ResourceType) (arg *Arg, call } return false } - for _, meta := range s.enabledCalls { + for i, meta := range sys.Calls { + if s.ct == nil || s.ct.run[i] == nil { + continue + } ok := false for _, arg := range meta.Args { if checkArg(arg, DirIn) { @@ -432,12 +435,7 @@ func (r *randGen) createResource(s *state, res sys.ResourceType) (arg *Arg, call } } if len(metas) == 0 { - if len(s.enabledCalls) != len(sys.Calls) { - // We used only a subset of all syscalls, - // so we legitimately may not be able to create the resource. - return constArg(res.Default()), nil - } - panic(fmt.Sprintf("can't create resource %v/%v", res.Kind, sk)) + return constArg(res.Default()), nil } // Now we have a set of candidate calls that can create the necessary resource. @@ -446,7 +444,7 @@ func (r *randGen) createResource(s *state, res sys.ResourceType) (arg *Arg, call meta := metas[r.Intn(len(metas))] calls := r.generateParticularCall(s, meta) assignTypeAndDir(calls[len(calls)-1]) - s1 := newState(s.enabledCalls) + s1 := newState(s.ct) s1.analyze(calls[len(calls)-1]) // Now see if we have what we want. var allres []*Arg @@ -504,8 +502,19 @@ func (r *randGen) choose(args ...interface{}) { panic("choose is broken") } -func (r *randGen) generateCall(s *state) []*Call { - meta := s.enabledCalls[r.rand(len(s.enabledCalls))] +func (r *randGen) generateCall(s *state, p *Prog) []*Call { + call := -1 + if len(p.Calls) != 0 { + for i := 0; i < 5; i++ { + c := p.Calls[r.Intn(len(p.Calls))].Meta + call = c.ID + // There is roughly half of mmap's so ignore them. + if c.Name != "mmap" { + break + } + } + } + meta := sys.Calls[s.ct.Choose(r.Rand, call)] return r.generateParticularCall(s, meta) } |
