aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzer
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2015-10-12 10:16:57 +0200
committerDmitry Vyukov <dvyukov@google.com>2015-10-12 10:16:57 +0200
commit874c5754bb22dbf77d6b600ff91f0f4f1fc5073a (patch)
tree0075fbd088046ad5c86e6e972235701d68b3ce7c /fuzzer
initial commit
Diffstat (limited to 'fuzzer')
-rw-r--r--fuzzer/fuzzer.go418
1 files changed, 418 insertions, 0 deletions
diff --git a/fuzzer/fuzzer.go b/fuzzer/fuzzer.go
new file mode 100644
index 000000000..5cca527ad
--- /dev/null
+++ b/fuzzer/fuzzer.go
@@ -0,0 +1,418 @@
+// Copyright 2015 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package main
+
+// TODO: implement some form of smashing of new inputs.
+// E.g. alter arguments while the program still gives the new coverage,
+// i.e. aim at cracking new branches and triggering bugs in that new piece of code.
+
+import (
+ "bytes"
+ "crypto/sha1"
+ "encoding/binary"
+ "flag"
+ "fmt"
+ "log"
+ "math/rand"
+ "net/rpc"
+ "os"
+ "runtime/debug"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/google/syzkaller/cover"
+ "github.com/google/syzkaller/ipc"
+ "github.com/google/syzkaller/prog"
+ . "github.com/google/syzkaller/rpctype"
+ "github.com/google/syzkaller/sys"
+)
+
+var (
+ flagName = flag.String("name", "", "unique name for manager")
+ flagExecutor = flag.String("executor", "", "path to executor binary")
+ flagManager = flag.String("manager", "", "manager rpc address")
+ flagStrace = flag.Bool("strace", false, "run executor under strace")
+ flagParallel = flag.Int("parallel", 1, "run that many tests in parallel")
+ flagSaveProg = flag.Bool("saveprog", false, "save programs into local file before executing")
+ flagSyscalls = flag.String("calls", "", "comma-delimited list of enabled syscall IDs (empty string for all syscalls)")
+ flagV = flag.Int("v", 0, "verbosity")
+)
+
+const (
+ programLength = 30
+)
+
+type Sig [sha1.Size]byte
+
+func hash(data []byte) Sig {
+ return Sig(sha1.Sum(data))
+}
+
+type Input struct {
+ p *prog.Prog
+ call int
+ cover cover.Cover
+}
+
+var (
+ corpusCover []cover.Cover
+ maxCover []cover.Cover
+ flakes cover.Cover
+ corpus []Input
+ corpusHashes map[Sig]struct{}
+ triage []Input
+ manager *rpc.Client
+
+ workerIn = make(chan *prog.Prog, 10)
+ workerOut = make(chan []Input, 10)
+)
+
+func main() {
+ debug.SetGCPercent(50)
+ flag.Parse()
+ logf(0, "started")
+
+ var calls []*sys.Call
+ if *flagSyscalls != "" {
+ for _, id := range strings.Split(*flagSyscalls, ",") {
+ n, err := strconv.ParseUint(id, 10, 64)
+ if err != nil || n >= uint64(len(sys.Calls)) {
+ panic(fmt.Sprintf("invalid syscall in -calls flag: '%v", id))
+ }
+ calls = append(calls, sys.Calls[n])
+ }
+ }
+
+ corpusCover = make([]cover.Cover, sys.CallCount)
+ maxCover = make([]cover.Cover, sys.CallCount)
+ corpusHashes = make(map[Sig]struct{})
+
+ conn, err := rpc.Dial("tcp", *flagManager)
+ if err != nil {
+ panic(err)
+ }
+ manager = conn
+ a := &ManagerConnectArgs{*flagName}
+ r := &ManagerConnectRes{}
+ if err := manager.Call("Manager.Connect", a, r); err != nil {
+ panic(err)
+ }
+
+ if *flagParallel <= 0 {
+ *flagParallel = 1
+ }
+ flags := ipc.FlagCover
+ if *flagStrace {
+ flags |= ipc.FlagStrace
+ }
+ workerIn = make(chan *prog.Prog, *flagParallel+10)
+ workerOut = make(chan []Input, *flagParallel)
+ for i := 0; i < *flagParallel; i++ {
+ env, err := ipc.MakeEnv(*flagExecutor, 4*time.Second, flags)
+ if err != nil {
+ panic(err)
+ }
+ workerId := i + 1
+ go func() {
+ for p := range workerIn {
+ workerOut <- execute(env, p, workerId)
+ }
+ }()
+ }
+ env, err := ipc.MakeEnv(*flagExecutor, 4*time.Second, flags)
+ if err != nil {
+ panic(err)
+ }
+ rs := rand.NewSource(time.Now().UnixNano())
+ rnd := rand.New(rs)
+ var lastPoll time.Time
+ var lastPrint time.Time
+ secondTicker := time.NewTicker(100 * time.Millisecond).C
+ for i := 0; ; i++ {
+ if !*flagSaveProg && time.Since(lastPrint) > 10*time.Second {
+ // Keep-alive for manager.
+ logf(0, "#%v: alive", i)
+ lastPrint = time.Now()
+ }
+ if len(triage) != 0 {
+ last := len(triage) - 1
+ inp := triage[last]
+ triage = triage[:last]
+ logf(1, "#%v: triaging : %s", i, inp.p)
+ triageInput(env, inp)
+ continue
+ }
+ if time.Since(lastPoll) > 10*time.Second {
+ a := &ManagerPollArgs{*flagName}
+ r := &ManagerPollRes{}
+ if err := manager.Call("Manager.Poll", a, r); err != nil {
+ panic(err)
+ }
+ for _, inp := range r.NewInputs {
+ addInput(inp)
+ }
+ for _, data := range r.Candidates {
+ p, err := prog.Deserialize(data)
+ if err != nil {
+ panic(err)
+ }
+ inputs := execute(env, p, 0)
+ for _, inp := range inputs {
+ call := inp.p.Calls[inp.call].Meta
+ maxCover[call.CallID] = cover.Union(maxCover[call.CallID], inp.cover)
+ triage = append(triage, inp)
+ }
+ }
+ if len(r.NewInputs) == 0 && len(r.Candidates) == 0 {
+ lastPoll = time.Now()
+ }
+ continue
+ }
+ // Parallel part.
+ pending := 0
+ for ; ; i++ {
+ if !(!*flagSaveProg && time.Since(lastPrint) > 10*time.Second) &&
+ !(len(triage) != 0) &&
+ !(time.Since(lastPoll) > 10*time.Second) {
+ // No need to do any work above.
+ // Send new inputs to workers, if they need some.
+ for len(workerIn) < *flagParallel {
+ if len(corpus) == 0 || i%10 == 0 {
+ p := prog.Generate(rnd, programLength, calls)
+ logf(1, "#%v: generated: %s", i, p)
+ workerIn <- p
+ pending++
+ p = p.Clone()
+ p.Mutate(rnd, programLength, calls)
+ logf(1, "#%v: mutated: %s", i, p)
+ workerIn <- p
+ pending++
+ } else {
+ inp := corpus[rnd.Intn(len(corpus))]
+ p := inp.p.Clone()
+ p.Mutate(rs, programLength, calls)
+ logf(1, "#%v: mutated: %s <- %s", i, p, inp.p)
+ workerIn <- p
+ pending++
+ }
+ }
+ } else if pending == 0 {
+ // Need to do some work above.
+ // Break if collected all pending results.
+ break
+ }
+ // Collect results.
+ select {
+ case inputs := <-workerOut:
+ pending--
+ for _, inp := range inputs {
+ triage = append(triage, inp)
+ }
+ case <-secondTicker:
+ }
+ }
+ // Do this after the parallel section because workers access maxCover.
+ for _, inp := range triage {
+ call := inp.p.Calls[inp.call].Meta
+ maxCover[call.CallID] = cover.Union(maxCover[call.CallID], inp.cover)
+ }
+ }
+}
+
+func addInput(inp RpcInput) {
+ p, err := prog.Deserialize(inp.Prog)
+ if err != nil {
+ panic(err)
+ }
+ if inp.CallIndex < 0 || inp.CallIndex >= len(p.Calls) {
+ panic("bad call index")
+ }
+ call := p.Calls[inp.CallIndex].Meta
+ sig := hash(inp.Prog)
+ if _, ok := corpusHashes[sig]; ok {
+ return
+ }
+ cov := cover.Canonicalize(inp.Cover)
+ diff := cover.Difference(cov, maxCover[call.CallID])
+ diff = cover.Difference(diff, flakes)
+ if len(diff) == 0 {
+ return
+ }
+ inp1 := Input{p, inp.CallIndex, cov}
+ corpus = append(corpus, inp1)
+ corpusCover[call.CallID] = cover.Union(corpusCover[call.CallID], cov)
+ maxCover[call.CallID] = cover.Union(maxCover[call.CallID], cov)
+ corpusHashes[hash(inp.Prog)] = struct{}{}
+}
+
+func triageInput(env *ipc.Env, inp Input) {
+ call := inp.p.Calls[inp.call].Meta
+ newCover := cover.Difference(inp.cover, corpusCover[call.CallID])
+ newCover = cover.Difference(newCover, flakes)
+ if len(newCover) == 0 {
+ return
+ }
+
+ if _, ok := corpusHashes[hash(inp.p.Serialize())]; ok {
+ return
+ }
+
+ minCover := inp.cover
+ for i := 0; i < 3; i++ {
+ allCover := execute1(env, inp.p, 0)
+ if len(allCover[inp.call]) == 0 {
+ // The call was not executed. Happens sometimes, reason unknown.
+ continue
+ }
+ cov := allCover[inp.call]
+ diff := cover.SymmetricDifference(inp.cover, cov)
+ if len(diff) != 0 {
+ flakes = cover.Union(flakes, diff)
+ }
+ minCover = cover.Intersection(minCover, cov)
+ }
+ stableNewCover := cover.Intersection(newCover, minCover)
+ if len(stableNewCover) == 0 {
+ return
+ }
+ inp.p, inp.call = prog.Minimize(inp.p, inp.call, func(p1 *prog.Prog, call1 int) bool {
+ allCover := execute1(env, p1, 0)
+ if len(allCover[call1]) == 0 {
+ return false // The call was not executed.
+ }
+ cov := allCover[call1]
+ if len(cover.Intersection(stableNewCover, cov)) != len(stableNewCover) {
+ return false
+ }
+ minCover = cover.Intersection(minCover, cov)
+ return true
+ })
+ inp.cover = minCover
+ corpusCover[call.CallID] = cover.Union(corpusCover[call.CallID], minCover)
+ corpus = append(corpus, inp)
+ data := inp.p.Serialize()
+ corpusHashes[hash(data)] = struct{}{}
+
+ logf(2, "added new input for %v to corpus:\n%s", call.CallName, data)
+
+ a := &NewManagerInputArgs{*flagName, RpcInput{call.CallName, inp.p.Serialize(), inp.call, []uint32(inp.cover)}}
+ if err := manager.Call("Manager.NewInput", a, nil); err != nil {
+ panic(err)
+ }
+}
+
+func execute(env *ipc.Env, p *prog.Prog, workerId int) []Input {
+ allCover := execute1(env, p, workerId)
+ var inputs []Input
+ for i, cov := range allCover {
+ if len(cov) == 0 {
+ continue
+ }
+ c := p.Calls[i].Meta
+ diff := cover.Difference(cov, maxCover[c.CallID])
+ diff = cover.Difference(diff, flakes)
+ if len(diff) != 0 {
+ p1 := p.Clone()
+ p1.TrimAfter(i)
+ inputs = append(inputs, Input{p1, i, cover.Copy(cov)})
+ }
+ }
+ return inputs
+}
+
+var logMu sync.Mutex
+
+func execute1(env *ipc.Env, p *prog.Prog, workerId int) []cover.Cover {
+ if *flagSaveProg {
+ f, err := os.Create(fmt.Sprintf("%v-%v.prog", *flagName, workerId))
+ if err == nil {
+ f.Write(p.Serialize())
+ f.Close()
+ }
+ } else {
+ // The following output helps to understand what program crashed kernel.
+ // It must not be intermixed.
+ logMu.Lock()
+ log.Printf("worker #%v: executing program:\n%s", workerId, p.Serialize())
+ logMu.Unlock()
+ }
+
+ try := 0
+retry:
+ exec := p.SerializeForExec()
+ if len(exec) > len(env.In) {
+ panic("program is too long")
+ }
+ copy(env.In, exec)
+ // Zero out the first word (ncmd), so that we don't have garbage there
+ // if executor crashes before writing non-garbage there.
+ for i := 0; i < 4; i++ {
+ env.Out[i] = 0
+ }
+ output, strace, failed, hanged, err := env.Exec()
+ if err != nil {
+ if try > 10 {
+ panic(err)
+ }
+ try++
+ debug.FreeOSMemory()
+ time.Sleep(time.Second)
+ goto retry
+ }
+ logf(4, "result failed=%v hanged=%v:\n%v\n", failed, hanged, string(output))
+ if len(strace) != 0 {
+ logf(4, "strace:\n%s\n", strace)
+ }
+ r := bytes.NewReader(env.Out)
+ var ncmd uint32
+ if err := binary.Read(r, binary.LittleEndian, &ncmd); err != nil {
+ panic(err)
+ }
+ cov := make([]cover.Cover, len(p.Calls))
+ for i := uint32(0); i < ncmd; i++ {
+ var callIndex, callNum, coverSize, pc uint32
+ if err := binary.Read(r, binary.LittleEndian, &callIndex); err != nil {
+ panic(err)
+ }
+ if err := binary.Read(r, binary.LittleEndian, &callNum); err != nil {
+ panic(err)
+ }
+ if err := binary.Read(r, binary.LittleEndian, &coverSize); err != nil {
+ panic(err)
+ }
+ if int(callIndex) > len(cov) {
+ panic(fmt.Sprintf("expect index %v, got %v", i, callIndex))
+ }
+ c := p.Calls[callIndex]
+ if num := c.Meta.ID; uint32(num) != callNum {
+ logf(0, "ERROR: call %v: expect syscall %v, got %v, executed %v", callIndex, num, callNum, ncmd)
+ logf(0, "program:\n%s", p.Serialize())
+ logf(0, "output:\n%s", output)
+ }
+ cover1 := make([]uint32, 0, coverSize)
+ for j := uint32(0); j < coverSize; j++ {
+ if err := binary.Read(r, binary.LittleEndian, &pc); err != nil {
+ panic(err)
+ }
+ cover1 = append(cover1, pc)
+ }
+ if *flagV >= 4 {
+ log.Printf("%v:", c.Meta.Name)
+ for _, pc := range cover1 {
+ log.Printf(" 0x%x", (1<<32-1)<<32|uint64(pc))
+ }
+ log.Printf("\n")
+ }
+ cov[callIndex] = cover.Canonicalize(cover1)
+ }
+ return cov
+}
+
+func logf(v int, msg string, args ...interface{}) {
+ if *flagV >= v {
+ log.Printf(msg, args...)
+ }
+}