// Copyright 2023 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

package minimize

import (
	"errors"
	"fmt"
	"math"
	"slices"
	"strings"
)

type Config[T any] struct {
	// The original slice is minimized with respect to this predicate.
	// If Pred(X) returns true, X is assumed to contain all elements that must stay.
	Pred func([]T) (bool, error)
	// MaxSteps is a limit on the number of predicate calls during bisection.
	// If it's hit, the bisection continues as if Pred() begins to return false.
	// If it's set to 0 (by default), no limit is applied.
	MaxSteps int
	// MaxChunks sets a limit on the number of chunks pursued by the bisection algorithm.
	// If we hit the limit, bisection is stopped and Array() returns ErrTooManyChunks
	// anongside the intermediate bisection result (a valid, but not fully minimized slice).
	MaxChunks int
	// Logf is used for sharing debugging output.
	Logf func(string, ...any)
}

// Slice() finds a minimal subsequence of slice elements that still gives Pred() == true.
// The algorithm works by sequentially splitting the slice into smaller-size chunks and running
// Pred() witout those chunks. Slice() receives the original slice chunks.
// The expected number of Pred() runs is O(|result|*log2(|elements|)).
func Slice[T any](config Config[T], slice []T) ([]T, error) {
	if config.Logf == nil {
		config.Logf = func(string, ...any) {}
	}
	ctx := &sliceCtx[T]{
		Config: config,
		chunks: []*arrayChunk[T]{
			{
				elements: slice,
			},
		},
	}
	return ctx.bisect()
}

// SliceWithFixed behaves like Slice, but also allows to designate the elements that
// must always remain (the "fixed" ones).
func SliceWithFixed[T any](config Config[T], slice []T, fixed func(T) bool) ([]T, error) {
	var freeIdx, fixedIdx []int
	for i := 0; i < len(slice); i++ {
		if fixed(slice[i]) {
			fixedIdx = append(fixedIdx, i)
		} else {
			freeIdx = append(freeIdx, i)
		}
	}
	if len(freeIdx) == 0 {
		return slice, nil
	}
	convert := func(idx []int) []T {
		ret := make([]T, 0, len(idx)+len(fixedIdx))
		idx, fixedIdx := slices.Clone(idx), slices.Clone(fixedIdx)
		for len(idx)+len(fixedIdx) > 0 {
			if len(idx) > 0 && (len(fixedIdx) == 0 ||
				len(fixedIdx) > 0 && idx[0] < fixedIdx[0]) {
				ret = append(ret, slice[idx[0]])
				idx = idx[1:]
			} else {
				ret = append(ret, slice[fixedIdx[0]])
				fixedIdx = fixedIdx[1:]
			}
		}
		return ret
	}
	newConfig := Config[int]{
		MaxSteps:  config.MaxSteps,
		MaxChunks: config.MaxChunks,
		Pred: func(idx []int) (bool, error) {
			return config.Pred(convert(idx))
		},
		Logf: config.Logf,
	}
	result, err := Slice[int](newConfig, freeIdx)
	if err != nil {
		return nil, err
	}
	return convert(result), nil
}

type sliceCtx[T any] struct {
	Config[T]
	chunks   []*arrayChunk[T]
	predRuns int
}

type arrayChunk[T any] struct {
	elements []T
	final    bool // There's no way to further split this chunk.
}

// ErrTooManyChunks is returned if the number of necessary chunks surpassed MaxChunks.
var ErrTooManyChunks = errors.New("the bisection process is following too many necessary chunks")

func (ctx *sliceCtx[T]) bisect() ([]T, error) {
	// At first, we don't know if the original chunks are really necessary.
	err := ctx.splitChunks(false)
	// Then, keep on splitting the chunks layer by layer until we have identified
	// all necessary elements.
	// This way we ensure that we always go from larger to smaller chunks.
	for err == nil && !ctx.done() {
		if ctx.MaxChunks > 0 && len(ctx.chunks) > ctx.MaxChunks {
			err = ErrTooManyChunks
			break
		}
		err = ctx.splitChunks(true)
	}
	if err != nil && err != ErrTooManyChunks {
		return nil, err
	}
	return ctx.elements(), err
}

// splitChunks() splits each chunk in two and only leaves the necessary sub-parts.
func (ctx *sliceCtx[T]) splitChunks(someNeeded bool) error {
	ctx.Logf("split chunks (needed=%v): %s", someNeeded, ctx.chunkInfo())
	splitInto := 2
	if !someNeeded && len(ctx.chunks) == 1 {
		// It's our first iteration.
		splitInto = ctx.initialSplit(len(ctx.chunks[0].elements))
	}
	var newChunks []*arrayChunk[T]
	for i, chunk := range ctx.chunks {
		if chunk.final {
			newChunks = append(newChunks, chunk)
			continue
		}
		ctx.Logf("split chunk #%d of len %d into %d parts", i, len(chunk.elements), splitInto)
		chunks := splitChunk[T](chunk.elements, splitInto)
		if len(chunks) == 1 && someNeeded {
			ctx.Logf("no way to further split the chunk")
			chunk.final = true
			newChunks = append(newChunks, chunk)
			continue
		}
		foundNeeded := false
		for j := range chunks {
			ctx.Logf("testing without sub-chunk %d/%d", j+1, len(chunks))
			if j < len(chunks)-1 || foundNeeded || !someNeeded {
				ret, err := ctx.predRun(
					newChunks,
					mergeRawChunks(chunks[j+1:]),
					ctx.chunks[i+1:],
				)
				if err != nil {
					return err
				}
				if ret {
					ctx.Logf("the chunk can be dropped")
					continue
				}
			} else {
				ctx.Logf("no need to test this chunk, it's definitely needed")
			}
			foundNeeded = true
			newChunks = append(newChunks, &arrayChunk[T]{
				elements: chunks[j],
			})
		}
	}
	ctx.chunks = newChunks
	return nil
}

// Since Pred() runs can be costly, the objective is to get the most out of the
// limited number of Pred() calls.
// We try to achieve it by splitting the initial array in more than 2 elements.
func (ctx *sliceCtx[T]) initialSplit(size int) int {
	// If the number of steps is small and the number of elements is big,
	// let's just split the initial array into MaxSteps chunks.
	// There's no solid reasoning behind the condition below, so feel free to
	// change it if you have better ideas.
	if ctx.MaxSteps > 0 && math.Log2(float64(size)) > float64(ctx.MaxSteps) {
		return ctx.MaxSteps
	}
	// Otherwise let's split in 3.
	return 3
}

// predRun() determines whether (before + mid + after) covers the necessary elements.
func (ctx *sliceCtx[T]) predRun(before []*arrayChunk[T], mid []T, after []*arrayChunk[T]) (bool, error) {
	if ctx.MaxSteps > 0 && ctx.predRuns >= ctx.MaxSteps {
		ctx.Logf("we have reached the limit on predicate runs (%d); pretend it returns false",
			ctx.MaxSteps)
		return false, nil
	}
	ctx.predRuns++
	return ctx.Pred(mergeChunks(before, mid, after))
}

// The bisection process is done once every chunk is marked as final.
func (ctx *sliceCtx[T]) done() bool {
	if ctx.MaxSteps > 0 && ctx.predRuns >= ctx.MaxSteps {
		// No reason to continue.
		return true
	}
	for _, chunk := range ctx.chunks {
		if !chunk.final {
			return false
		}
	}
	return true
}

func (ctx *sliceCtx[T]) elements() []T {
	return mergeChunks(ctx.chunks, nil, nil)
}

func (ctx *sliceCtx[T]) chunkInfo() string {
	var parts []string
	for _, chunk := range ctx.chunks {
		str := ""
		if chunk.final {
			str = ", final"
		}
		parts = append(parts, fmt.Sprintf("<%d%s>", len(chunk.elements), str))
	}
	return strings.Join(parts, ", ")
}

func mergeChunks[T any](before []*arrayChunk[T], mid []T, after []*arrayChunk[T]) []T {
	var ret []T
	for _, chunk := range before {
		ret = append(ret, chunk.elements...)
	}
	ret = append(ret, mid...)
	for _, chunk := range after {
		ret = append(ret, chunk.elements...)
	}
	return ret
}

func mergeRawChunks[T any](chunks [][]T) []T {
	var ret []T
	for _, chunk := range chunks {
		ret = append(ret, chunk...)
	}
	return ret
}

func splitChunk[T any](chunk []T, parts int) [][]T {
	chunkSize := (len(chunk) + parts - 1) / parts
	if chunkSize == 0 {
		chunkSize = 1
	}
	var ret [][]T
	for i := 0; i < len(chunk); i += chunkSize {
		end := min(i+chunkSize, len(chunk))
		ret = append(ret, chunk[i:end])
	}
	return ret
}