2 files changed, 343 insertions, 0 deletions
diff --git a/pkg/bisect/minimize/slice.go b/pkg/bisect/minimize/slice.go
new file mode 100644
index 000000000..76fe82101
--- /dev/null
+++ b/pkg/bisect/minimize/slice.go
@@ -0,0 +1,222 @@
+// Copyright 2023 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package minimize
+
+import (
+	"errors"
+	"fmt"
+	"math"
+	"strings"
+)
+
+type Config[T any] struct {
+	// The original slice is minimized with respect to this predicate.
+	// If Pred(X) returns true, X is assumed to contain all elements that must stay.
+	Pred func([]T) (bool, error)
+	// MaxSteps is a limit on the number of predicate calls during bisection.
+	// If it's hit, the bisection continues as if Pred() begins to return false.
+	// If it's set to 0 (by default), no limit is applied.
+	MaxSteps int
+	// MaxChunks sets a limit on the number of chunks pursued by the bisection algorithm.
+	// If we hit the limit, bisection is stopped and Array() returns ErrTooManyChunks
+	// anongside the intermediate bisection result (a valid, but not fully minimized slice).
+	MaxChunks int
+	// Logf is used for sharing debugging output.
+	Logf func(string, ...interface{})
+}
+
+// Slice() finds a minimal subsequence of slice elements that still gives Pred() == true.
+// The algorithm works by sequentially splitting the slice into smaller-size chunks and running
+// Pred() witout those chunks. Slice() receives the original slice chunks.
+// The expected number of Pred() runs is O(|result|*log2(|elements|)).
+func Slice[T any](config Config[T], slice []T) ([]T, error) {
+	if config.Logf == nil {
+		config.Logf = func(string, ...interface{}) {}
+	}
+	ctx := &sliceCtx[T]{
+		Config: config,
+		chunks: []*arrayChunk[T]{
+			{
+				elements: slice,
+			},
+		},
+	}
+	return ctx.bisect()
+}
+
+type sliceCtx[T any] struct {
+	Config[T]
+	chunks   []*arrayChunk[T]
+	predRuns int
+}
+
+type arrayChunk[T any] struct {
+	elements []T
+	final    bool // There's no way to further split this chunk.
+}
+
+// ErrTooManyChunks is returned if the number of necessary chunks surpassed MaxChunks.
+var ErrTooManyChunks = errors.New("the bisection process is following too many necessary chunks")
+
+func (ctx *sliceCtx[T]) bisect() ([]T, error) {
+	// At first, we don't know if the original chunks are really necessary.
+	err := ctx.splitChunks(false)
+	// Then, keep on splitting the chunks layer by layer until we have identified
+	// all necessary elements.
+	// This way we ensure that we always go from larger to smaller chunks.
+	for err == nil && !ctx.done() {
+		if ctx.MaxChunks > 0 && len(ctx.chunks) > ctx.MaxChunks {
+			err = ErrTooManyChunks
+			break
+		}
+		err = ctx.splitChunks(true)
+	}
+	if err != nil && err != ErrTooManyChunks {
+		return nil, err
+	}
+	return ctx.elements(), err
+}
+
+// splitChunks() splits each chunk in two and only leaves the necessary sub-parts.
+func (ctx *sliceCtx[T]) splitChunks(someNeeded bool) error {
+	ctx.Logf("split chunks (needed=%v): %s", someNeeded, ctx.chunkInfo())
+	splitInto := 2
+	if !someNeeded && len(ctx.chunks) == 1 {
+		// It's our first iteration.
+		splitInto = ctx.initialSplit(len(ctx.chunks[0].elements))
+	}
+	var newChunks []*arrayChunk[T]
+	for i, chunk := range ctx.chunks {
+		if chunk.final {
+			newChunks = append(newChunks, chunk)
+			continue
+		}
+		ctx.Logf("split chunk #%d of len %d into %d parts", i, len(chunk.elements), splitInto)
+		chunks := splitChunk[T](chunk.elements, splitInto)
+		if len(chunks) == 1 && someNeeded {
+			ctx.Logf("no way to further split the chunk")
+			chunk.final = true
+			newChunks = append(newChunks, chunk)
+			continue
+		}
+		foundNeeded := false
+		for j := range chunks {
+			ctx.Logf("testing without sub-chunk %d/%d", j+1, len(chunks))
+			if j < len(chunks)-1 || foundNeeded || !someNeeded {
+				ret, err := ctx.predRun(
+					newChunks,
+					mergeRawChunks(chunks[j+1:]),
+					ctx.chunks[i+1:],
+				)
+				if err != nil {
+					return err
+				}
+				if ret {
+					ctx.Logf("the chunk can be dropped")
+					continue
+				}
+			} else {
+				ctx.Logf("no need to test this chunk, it's definitely needed")
+			}
+			foundNeeded = true
+			newChunks = append(newChunks, &arrayChunk[T]{
+				elements: chunks[j],
+			})
+		}
+	}
+	ctx.chunks = newChunks
+	return nil
+}
+
+// Since Pred() runs can be costly, the objective is to get the most out of the
+// limited number of Pred() calls.
+// We try to achieve it by splitting the initial array in more than 2 elements.
+func (ctx *sliceCtx[T]) initialSplit(size int) int {
+	// If the number of steps is small and the number of elements is big,
+	// let's just split the initial array into MaxSteps chunks.
+	// There's no solid reasoning behind the condition below, so feel free to
+	// change it if you have better ideas.
+	if ctx.MaxSteps > 0 && math.Log2(float64(size)) > float64(ctx.MaxSteps) {
+		return ctx.MaxSteps
+	}
+	// Otherwise let's split in 3.
+	return 3
+}
+
+// predRun() determines whether (before + mid + after) covers the necessary elements.
+func (ctx *sliceCtx[T]) predRun(before []*arrayChunk[T], mid []T, after []*arrayChunk[T]) (bool, error) {
+	if ctx.MaxSteps > 0 && ctx.predRuns >= ctx.MaxSteps {
+		ctx.Logf("we have reached the limit on predicate runs (%d); pretend it returns false",
+			ctx.MaxSteps)
+		return false, nil
+	}
+	ctx.predRuns++
+	return ctx.Pred(mergeChunks(before, mid, after))
+}
+
+// The bisection process is done once every chunk is marked as final.
+func (ctx *sliceCtx[T]) done() bool {
+	if ctx.MaxSteps > 0 && ctx.predRuns >= ctx.MaxSteps {
+		// No reason to continue.
+		return true
+	}
+	for _, chunk := range ctx.chunks {
+		if !chunk.final {
+			return false
+		}
+	}
+	return true
+}
+
+func (ctx *sliceCtx[T]) elements() []T {
+	return mergeChunks(ctx.chunks, nil, nil)
+}
+
+func (ctx *sliceCtx[T]) chunkInfo() string {
+	var parts []string
+	for _, chunk := range ctx.chunks {
+		str := ""
+		if chunk.final {
+			str = ", final"
+		}
+		parts = append(parts, fmt.Sprintf("<%d%s>", len(chunk.elements), str))
+	}
+	return strings.Join(parts, ", ")
+}
+
+func mergeChunks[T any](before []*arrayChunk[T], mid []T, after []*arrayChunk[T]) []T {
+	var ret []T
+	for _, chunk := range before {
+		ret = append(ret, chunk.elements...)
+	}
+	ret = append(ret, mid...)
+	for _, chunk := range after {
+		ret = append(ret, chunk.elements...)
+	}
+	return ret
+}
+
+func mergeRawChunks[T any](chunks [][]T) []T {
+	var ret []T
+	for _, chunk := range chunks {
+		ret = append(ret, chunk...)
+	}
+	return ret
+}
+
+func splitChunk[T any](chunk []T, parts int) [][]T {
+	chunkSize := (len(chunk) + parts - 1) / parts
+	if chunkSize == 0 {
+		chunkSize = 1
+	}
+	var ret [][]T
+	for i := 0; i < len(chunk); i += chunkSize {
+		end := i + chunkSize
+		if end > len(chunk) {
+			end = len(chunk)
+		}
+		ret = append(ret, chunk[i:end])
+	}
+	return ret
+}
diff --git a/pkg/bisect/minimize/slice_test.go b/pkg/bisect/minimize/slice_test.go
new file mode 100644
index 000000000..4e3e65202
--- /dev/null
+++ b/pkg/bisect/minimize/slice_test.go
@@ -0,0 +1,121 @@
+// Copyright 2023 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package minimize
+
+import (
+	"fmt"
+	"math"
+	"math/rand"
+	"testing"
+	"time"
+
+	"github.com/google/syzkaller/pkg/testutil"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestBisectSliceToZero(t *testing.T) {
+	t.Parallel()
+	array := make([]int, 100)
+	ret, err := Slice(Config[int]{
+		Pred: func(arr []int) (bool, error) {
+			// No elements are needed.
+			return true, nil
+		},
+		Logf: t.Logf,
+	}, array)
+	assert.NoError(t, err)
+	assert.Len(t, ret, 0)
+}
+
+func TestBisectSliceFull(t *testing.T) {
+	t.Parallel()
+	array := make([]int, 100)
+	ret, err := Slice(Config[int]{
+		Pred: func(arr []int) (bool, error) {
+			// All elements are needed.
+			return false, nil
+		},
+		Logf: t.Logf,
+	}, array)
+	assert.NoError(t, err)
+	assert.Equal(t, ret, array)
+}
+
+func TestBisectRandomSlice(t *testing.T) {
+	t.Parallel()
+	r := rand.New(testutil.RandSource(t))
+	for i := 0; i < testutil.IterCount(); i++ {
+		// Create an array of random size and set the elements that must remain to non-zero values.
+		size := r.Intn(50)
+		subset := r.Intn(size + 1)
+		array := make([]int, size)
+		for _, j := range r.Perm(size)[:subset] {
+			array[j] = j + 1
+		}
+		var expect []int
+		for _, j := range array {
+			if j > 0 {
+				expect = append(expect, j)
+			}
+		}
+		predCalls := 0
+		ret, err := Slice(Config[int]{
+			Pred: func(arr []int) (bool, error) {
+				predCalls++
+				// All elements of the subarray must be present.
+				nonZero := 0
+				for _, x := range arr {
+					if x > 0 {
+						nonZero++
+					}
+				}
+				return nonZero == subset, nil
+			},
+			Logf: t.Logf,
+		}, array)
+		assert.NoError(t, err)
+		assert.EqualValues(t, expect, ret)
+		// Ensure we don't make too many predicate calls.
+		maxCalls := 3 + 2*subset*(1+int(math.Floor(math.Log2(float64(size)))))
+		assert.LessOrEqual(t, predCalls, maxCalls)
+	}
+}
+
+func BenchmarkSplits(b *testing.B) {
+	for _, guilty := range []int{1, 2, 3, 4} {
+		guilty := guilty
+		b.Run(fmt.Sprintf("%d_guilty", guilty), func(b *testing.B) {
+			var sum int
+			for i := 0; i < b.N; i++ {
+				sum += runMinimize(guilty)
+			}
+			b.ReportMetric(float64(sum)/float64(b.N), "remaining-elements")
+		})
+	}
+}
+
+func runMinimize(guilty int) int {
+	const size = 300
+	const steps = 5
+
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+	array := make([]int, size)
+	for _, j := range r.Perm(size)[:guilty] {
+		array[j] = 1
+	}
+
+	ret, _ := Slice(Config[int]{
+		MaxSteps: steps,
+		Pred: func(arr []int) (bool, error) {
+			nonZero := 0
+			for _, x := range arr {
+				if x > 0 {
+					nonZero++
+				}
+			}
+			return nonZero == guilty, nil
+		},
+	}, array)
+	return len(ret)
+}