// Copyright 2022 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

package prog

import (
	"fmt"
	"math/rand"
)

// Our heatmaps are a flexible mechanism to assign a probability distribution to
// some collection of bytes. Usage:
//  1. Choose a heatmap and initialize it: `hm := MakeXYZHeatmap(data)`.
//     Different heatmaps implement different probability distributions
//     (for now there is only one).
//  2. Select random indices according to the probability distribution:
//     `idx := hm.ChooseLocation(r)`.
type Heatmap interface {
	NumMutations() int
	ChooseLocation() int
}

// Generic heatmaps model a probability distribution based on sparse data,
// prioritising selection of regions which are not a single repeated byte. It
// views data as a series of chunks of length `granularity`, ignoring chunks
// which are a single repeated byte. Indices are chosen uniformly amongst the
// remaining "interesting" segments.
func MakeGenericHeatmap(data []byte, r *rand.Rand) Heatmap {
	if len(data) == 0 {
		panic("cannot create a GenericHeatmap with no data")
	}
	hm := &GenericHeatmap{
		r: r,
	}
	hm.length, hm.segments = calculateLengthAndSegments(data, granularity)
	return hm
}

func (hm *GenericHeatmap) NumMutations() int {
	// At least one mutation.
	n := 1
	// + up to about one mutation every 4 KB of heatmap size.
	n += hm.r.Intn(hm.length/(4<<10) + 1)
	// + up to 4 mutations at random so that even small images can get more than one.
	n += hm.r.Intn(5)
	// But don't do too many as it will most likely corrupt the image.
	return min(n, 10)
}

func (hm *GenericHeatmap) ChooseLocation() int {
	// Uniformly choose an index within one of the segments.
	heatmapIdx := hm.r.Intn(hm.length)
	rawIdx := translateIdx(heatmapIdx, hm.segments)
	return rawIdx
}

type GenericHeatmap struct {
	r        *rand.Rand
	segments []segment // "Interesting" parts of the data.
	length   int       // Sum of all segment lengths.
}

type segment struct {
	offset int
	length int
}

const granularity = 64 // Chunk size in bytes for processing the data.

// Determine the "interesting" segments of data, also returning their combined length.
func calculateLengthAndSegments(data []byte, granularity int) (int, []segment) {
	// Offset and length of current segment, total length of all segments, length of original data.
	offset, currentLength, totalLength, rawLength := 0, 0, 0, len(data)
	segments := []segment{}

	// Save a segment.
	saveSegment := func() {
		if currentLength != 0 {
			segments = append(segments, segment{offset: offset, length: currentLength})
			offset, totalLength, currentLength = offset+currentLength, totalLength+currentLength, 0
		}
	}

	for len(data) > 0 {
		var chunk []byte
		if len(data) < granularity {
			chunk, data = data, nil
		} else {
			chunk, data = data[:granularity], data[granularity:]
		}

		// Check if buffer contains only a single value.
		byt0, isConstant := chunk[0], true
		for _, byt := range chunk {
			if byt != byt0 {
				isConstant = false
				break
			}
		}

		if !isConstant {
			// Non-constant - extend the current segment.
			currentLength += len(chunk)
		} else {
			// Save current segment.
			saveSegment()
			// Skip past the constant bytes.
			offset += len(chunk)
		}
	}

	// Save final segment.
	saveSegment()

	if len(segments) == 0 {
		// We found no segments, i.e. the data is all "boring". Fall back to a
		// uniform probability distribution over the original data by considering it
		// as one long segment.
		return rawLength, append(segments, segment{offset: 0, length: rawLength})
	}

	return totalLength, segments
}

// Convert from an index into "interesting" segments to an index into raw data.
// I.e. view `idx` as an index into the concatenated segments, and translate
// this to an index into the original underlying data. E.g.:
//
//	segs = []segment{{offset: 10, length: 20}, {offset: 50, length: 10}}
//	translateIdx(25, segs) = 5
//
// I.e. we index element 5 of the second segment, so element 55 of the raw data.
func translateIdx(idx int, segs []segment) int {
	if idx < 0 {
		panic(fmt.Sprintf("translateIdx: negative index %v", idx))
	}
	savedIdx := idx
	for _, seg := range segs {
		if idx < seg.length {
			return seg.offset + idx
		}
		idx -= seg.length
	}
	panic(fmt.Sprintf("translateIdx: index out of range %v", savedIdx))
}