aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHrutvik Kanabar <hrutvik@google.com>2022-10-21 15:48:05 +0000
committerAleksandr Nogikh <wp32pw@gmail.com>2022-11-21 11:06:14 +0100
commit7954d07c228dd9ce63b7ebd13239b4d1f2c35233 (patch)
tree6ec3684c45217c6f4ae7f3a7ba42e222a9012865
parentc49ccaee639efae8b8249272a79e700e2c1954c6 (diff)
prog: add a notion of a mutation "heatmap"
In this context, a heatmap is a flexible mechanism to choose where to mutate some collection of bytes with some underlying probability distribution, which could be dependent on the bytes themselves. This commit defines the interface for heatmaps, implements a generic one, and adds a test for it. The expected usage of the heatmap inferface is: - Caller selects heatmap type based on program type (or other logic), and calls the appropriate `MakeXYZHeatmap`. Currently there is only one, but over time we will add more with differing probability distributions. - Caller queries the heatmap using `hm.ChooseLocation(r)` for random input `r`. The generic heatmap is based on the current compression for disk images. It avoids mutating within long runs of constant bytes by viewing the file as a series of 64-byte chunks, and ignoring chunks which consist of a single repeated byte.
-rw-r--r--prog/heatmap.go135
-rw-r--r--prog/heatmap_test.go117
2 files changed, 252 insertions, 0 deletions
diff --git a/prog/heatmap.go b/prog/heatmap.go
new file mode 100644
index 000000000..51daaf251
--- /dev/null
+++ b/prog/heatmap.go
@@ -0,0 +1,135 @@
+// Copyright 2022 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package prog
+
+import (
+ "fmt"
+ "math/rand"
+)
+
+// Our heatmaps are a flexible mechanism to assign a probability distribution to
+// some collection of bytes. Usage:
+// 1. Choose a heatmap and initialize it: `hm := MakeXYZHeatmap(data)`.
+// Different heatmaps implement different probability distributions
+// (for now there is only one).
+// 2. Select random indices according to the probability distribution:
+// `idx := hm.ChooseLocation(r)`.
+type Heatmap interface {
+ ChooseLocation(r *rand.Rand) int
+ Size() int
+}
+
+// Generic heatmaps model a probability distribution based on sparse data,
+// prioritising selection of regions which are not a single repeated byte. It
+// views data as a series of chunks of length `granularity`, ignoring chunks
+// which are a single repeated byte. Indices are chosen uniformly amongst the
+// remaining "interesting" segments.
+func MakeGenericHeatmap(data []byte) Heatmap {
+ if len(data) == 0 {
+ panic("cannot create a GenericHeatmap with no data")
+ }
+ var hm GenericHeatmap
+ hm.length, hm.segments = calculateLengthAndSegments(data, granularity)
+ return &hm
+}
+
+func (hm *GenericHeatmap) ChooseLocation(r *rand.Rand) int {
+ // Uniformly choose an index within one of the segments.
+ heatmapIdx := r.Intn(hm.length)
+ rawIdx := translateIdx(heatmapIdx, hm.segments)
+ return rawIdx
+}
+
+func (hm *GenericHeatmap) Size() int {
+ return hm.length
+}
+
+type GenericHeatmap struct {
+ segments []segment // "Interesting" parts of the data.
+ length int // Sum of all segment lengths.
+}
+
+type segment struct {
+ offset int
+ length int
+}
+
+const granularity = 64 // Chunk size in bytes for processing the data.
+
+// Determine the "interesting" segments of data, also returning their combined length.
+func calculateLengthAndSegments(data []byte, granularity int) (int, []segment) {
+ // Offset and length of current segment, total length of all segments, length of original data.
+ offset, currentLength, totalLength, rawLength := 0, 0, 0, len(data)
+ segments := []segment{}
+
+ // Save a segment.
+ saveSegment := func() {
+ if currentLength != 0 {
+ segments = append(segments, segment{offset: offset, length: currentLength})
+ offset, totalLength, currentLength = offset+currentLength, totalLength+currentLength, 0
+ }
+ }
+
+ for len(data) > 0 {
+ var chunk []byte
+ if len(data) < granularity {
+ chunk, data = data, nil
+ } else {
+ chunk, data = data[:granularity], data[granularity:]
+ }
+
+ // Check if buffer contains only a single value.
+ byt0, isConstant := chunk[0], true
+ for _, byt := range chunk {
+ if byt != byt0 {
+ isConstant = false
+ break
+ }
+ }
+
+ if !isConstant {
+ // Non-constant - extend the current segment.
+ currentLength += len(chunk)
+ } else {
+ // Save current segment.
+ saveSegment()
+ // Skip past the constant bytes.
+ offset += len(chunk)
+ }
+ }
+
+ // Save final segment.
+ saveSegment()
+
+ if len(segments) == 0 {
+ // We found no segments, i.e. the data is all "boring". Fall back to a
+ // uniform probability distribution over the original data by considering it
+ // as one long segment.
+ return rawLength, append(segments, segment{offset: 0, length: rawLength})
+ }
+
+ return totalLength, segments
+}
+
+// Convert from an index into "interesting" segments to an index into raw data.
+// I.e. view `idx` as an index into the concatenated segments, and translate
+// this to an index into the original underlying data. E.g.:
+//
+// segs = []segment{{offset: 10, length: 20}, {offset: 50, length: 10}}
+// translateIdx(25, segs) = 5
+//
+// I.e. we index element 5 of the second segment, so element 55 of the raw data.
+func translateIdx(idx int, segs []segment) int {
+ if idx < 0 {
+ panic(fmt.Sprintf("translateIdx: negative index %v", idx))
+ }
+ savedIdx := idx
+ for _, seg := range segs {
+ if idx < seg.length {
+ return seg.offset + idx
+ }
+ idx -= seg.length
+ }
+ panic(fmt.Sprintf("translateIdx: index out of range %v", savedIdx))
+}
diff --git a/prog/heatmap_test.go b/prog/heatmap_test.go
new file mode 100644
index 000000000..cbda3ce43
--- /dev/null
+++ b/prog/heatmap_test.go
@@ -0,0 +1,117 @@
+// Copyright 2022 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package prog
+
+import (
+ "math/rand"
+ "sort"
+ "testing"
+)
+
+func TestGenericHeatmap(t *testing.T) {
+ t.Parallel()
+ // A test case is some data with the regions the heatmap is permitted to choose.
+ testData := []struct {
+ data []byte
+ regions []region
+ }{
+ {
+ // Normal usage test.
+ []byte(
+ "4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh" +
+ "4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh" +
+ "4eHh4eHh4eHh4eHh4Q5GTbHh4eHh4eHh4eHh4eHhcOHh4eHh4eHh4eHh4eHh4eHh4eHh4eEfNuHh4XPh" +
+ "4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHjd+GRzcLh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh" +
+ "4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4dpiSwpoReHh4eHh4eHh4eHh4eHh4eHh4eHh" +
+ "4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4bGfM+Hh4eHh4eHh4eHh4eHh4eHh4eHh4eHh" +
+ "4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh" +
+ "4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh" +
+ "4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh" +
+ "mpNKOZnS4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh" +
+ "4eHh4eHh4eHh4eHh4eHh4Q=="),
+ []region{{128, 384}, {512, 576}},
+ },
+ {
+ // Test all constant bytes, i.e. falling back to uniform selection.
+ []byte(
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" +
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" +
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" +
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" +
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" +
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"),
+ []region{{0, 324}}, // Anywhere in the data.
+ },
+ }
+
+ const tries = 10
+ iters := iterCount() / tries
+
+ r := rand.New(randSource(t))
+ for _, test := range testData {
+ data, err := DecodeB64(test.data)
+ if err != nil {
+ t.Fatalf("bad decode: %v", err)
+ }
+ for i := 0; i < iters; i++ {
+ hm := MakeGenericHeatmap(data).(*GenericHeatmap)
+
+ for j := 0; j < tries; j++ {
+ index := hm.ChooseLocation(r)
+ if !checkIndex(index, len(data), test.regions) {
+ hm.debugPrint(t, data, test.regions)
+ t.Fatalf("selected index %d does not fall in a region\n", index)
+ }
+ }
+ }
+ }
+}
+
+// Check an index is within some regions.
+func checkIndex(index, maxIndex int, regions []region) bool {
+ if index < 0 || index >= maxIndex {
+ return false
+ }
+
+ for _, region := range regions {
+ if region.start <= index && index < region.end {
+ return true
+ }
+ }
+ return false
+}
+
+type region struct {
+ start int
+ end int
+}
+
+func (hm *GenericHeatmap) debugPrint(t *testing.T, data []byte, regions []region) {
+ // Print data.
+ t.Logf("data: len = %d", len(data))
+ for j := 0; j < len(data); j += granularity {
+ end := j + granularity
+ if end > len(data) {
+ end = len(data)
+ }
+ t.Logf("%8d: %x", j*granularity, data[j:end])
+ }
+ t.Logf("\n")
+
+ // Print selected regions in data.
+ sort.Slice(regions, func(i, j int) bool {
+ return regions[i].start < regions[j].start
+ })
+ for j, region := range regions {
+ t.Logf("region %4d: %8v - %8v", j, region.start, region.end)
+ }
+ t.Logf("\n")
+
+ // Print heatmap.
+ t.Logf("generic heatmap (total segment length %d)", hm.length)
+ for j, seg := range hm.segments {
+ t.Logf("segment %4d: %8v - %8v", j, seg.offset, seg.offset+seg.length)
+ }
+ t.Logf("\n\n\n")
+}