aboutsummaryrefslogtreecommitdiffstats
path: root/prog/heatmap.go
diff options
context:
space:
mode:
authorHrutvik Kanabar <hrutvik@google.com>2022-10-21 15:48:05 +0000
committerAleksandr Nogikh <wp32pw@gmail.com>2022-11-21 11:06:14 +0100
commit7954d07c228dd9ce63b7ebd13239b4d1f2c35233 (patch)
tree6ec3684c45217c6f4ae7f3a7ba42e222a9012865 /prog/heatmap.go
parentc49ccaee639efae8b8249272a79e700e2c1954c6 (diff)
prog: add a notion of a mutation "heatmap"
In this context, a heatmap is a flexible mechanism to choose where to mutate some collection of bytes with some underlying probability distribution, which could be dependent on the bytes themselves. This commit defines the interface for heatmaps, implements a generic one, and adds a test for it. The expected usage of the heatmap inferface is: - Caller selects heatmap type based on program type (or other logic), and calls the appropriate `MakeXYZHeatmap`. Currently there is only one, but over time we will add more with differing probability distributions. - Caller queries the heatmap using `hm.ChooseLocation(r)` for random input `r`. The generic heatmap is based on the current compression for disk images. It avoids mutating within long runs of constant bytes by viewing the file as a series of 64-byte chunks, and ignoring chunks which consist of a single repeated byte.
Diffstat (limited to 'prog/heatmap.go')
-rw-r--r--prog/heatmap.go135
1 files changed, 135 insertions, 0 deletions
diff --git a/prog/heatmap.go b/prog/heatmap.go
new file mode 100644
index 000000000..51daaf251
--- /dev/null
+++ b/prog/heatmap.go
@@ -0,0 +1,135 @@
+// Copyright 2022 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package prog
+
+import (
+ "fmt"
+ "math/rand"
+)
+
+// Our heatmaps are a flexible mechanism to assign a probability distribution to
+// some collection of bytes. Usage:
+// 1. Choose a heatmap and initialize it: `hm := MakeXYZHeatmap(data)`.
+// Different heatmaps implement different probability distributions
+// (for now there is only one).
+// 2. Select random indices according to the probability distribution:
+// `idx := hm.ChooseLocation(r)`.
+type Heatmap interface {
+ ChooseLocation(r *rand.Rand) int
+ Size() int
+}
+
+// Generic heatmaps model a probability distribution based on sparse data,
+// prioritising selection of regions which are not a single repeated byte. It
+// views data as a series of chunks of length `granularity`, ignoring chunks
+// which are a single repeated byte. Indices are chosen uniformly amongst the
+// remaining "interesting" segments.
+func MakeGenericHeatmap(data []byte) Heatmap {
+ if len(data) == 0 {
+ panic("cannot create a GenericHeatmap with no data")
+ }
+ var hm GenericHeatmap
+ hm.length, hm.segments = calculateLengthAndSegments(data, granularity)
+ return &hm
+}
+
+func (hm *GenericHeatmap) ChooseLocation(r *rand.Rand) int {
+ // Uniformly choose an index within one of the segments.
+ heatmapIdx := r.Intn(hm.length)
+ rawIdx := translateIdx(heatmapIdx, hm.segments)
+ return rawIdx
+}
+
+func (hm *GenericHeatmap) Size() int {
+ return hm.length
+}
+
+type GenericHeatmap struct {
+ segments []segment // "Interesting" parts of the data.
+ length int // Sum of all segment lengths.
+}
+
+type segment struct {
+ offset int
+ length int
+}
+
+const granularity = 64 // Chunk size in bytes for processing the data.
+
+// Determine the "interesting" segments of data, also returning their combined length.
+func calculateLengthAndSegments(data []byte, granularity int) (int, []segment) {
+ // Offset and length of current segment, total length of all segments, length of original data.
+ offset, currentLength, totalLength, rawLength := 0, 0, 0, len(data)
+ segments := []segment{}
+
+ // Save a segment.
+ saveSegment := func() {
+ if currentLength != 0 {
+ segments = append(segments, segment{offset: offset, length: currentLength})
+ offset, totalLength, currentLength = offset+currentLength, totalLength+currentLength, 0
+ }
+ }
+
+ for len(data) > 0 {
+ var chunk []byte
+ if len(data) < granularity {
+ chunk, data = data, nil
+ } else {
+ chunk, data = data[:granularity], data[granularity:]
+ }
+
+ // Check if buffer contains only a single value.
+ byt0, isConstant := chunk[0], true
+ for _, byt := range chunk {
+ if byt != byt0 {
+ isConstant = false
+ break
+ }
+ }
+
+ if !isConstant {
+ // Non-constant - extend the current segment.
+ currentLength += len(chunk)
+ } else {
+ // Save current segment.
+ saveSegment()
+ // Skip past the constant bytes.
+ offset += len(chunk)
+ }
+ }
+
+ // Save final segment.
+ saveSegment()
+
+ if len(segments) == 0 {
+ // We found no segments, i.e. the data is all "boring". Fall back to a
+ // uniform probability distribution over the original data by considering it
+ // as one long segment.
+ return rawLength, append(segments, segment{offset: 0, length: rawLength})
+ }
+
+ return totalLength, segments
+}
+
+// Convert from an index into "interesting" segments to an index into raw data.
+// I.e. view `idx` as an index into the concatenated segments, and translate
+// this to an index into the original underlying data. E.g.:
+//
+// segs = []segment{{offset: 10, length: 20}, {offset: 50, length: 10}}
+// translateIdx(25, segs) = 5
+//
+// I.e. we index element 5 of the second segment, so element 55 of the raw data.
+func translateIdx(idx int, segs []segment) int {
+ if idx < 0 {
+ panic(fmt.Sprintf("translateIdx: negative index %v", idx))
+ }
+ savedIdx := idx
+ for _, seg := range segs {
+ if idx < seg.length {
+ return seg.offset + idx
+ }
+ idx -= seg.length
+ }
+ panic(fmt.Sprintf("translateIdx: index out of range %v", savedIdx))
+}