aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/bisect
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/bisect')
-rw-r--r--pkg/bisect/minimize/slice.go222
-rw-r--r--pkg/bisect/minimize/slice_test.go121
2 files changed, 343 insertions, 0 deletions
diff --git a/pkg/bisect/minimize/slice.go b/pkg/bisect/minimize/slice.go
new file mode 100644
index 000000000..76fe82101
--- /dev/null
+++ b/pkg/bisect/minimize/slice.go
@@ -0,0 +1,222 @@
+// Copyright 2023 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package minimize
+
+import (
+ "errors"
+ "fmt"
+ "math"
+ "strings"
+)
+
+type Config[T any] struct {
+ // The original slice is minimized with respect to this predicate.
+ // If Pred(X) returns true, X is assumed to contain all elements that must stay.
+ Pred func([]T) (bool, error)
+ // MaxSteps is a limit on the number of predicate calls during bisection.
+ // If it's hit, the bisection continues as if Pred() begins to return false.
+ // If it's set to 0 (by default), no limit is applied.
+ MaxSteps int
+ // MaxChunks sets a limit on the number of chunks pursued by the bisection algorithm.
+ // If we hit the limit, bisection is stopped and Array() returns ErrTooManyChunks
+ // anongside the intermediate bisection result (a valid, but not fully minimized slice).
+ MaxChunks int
+ // Logf is used for sharing debugging output.
+ Logf func(string, ...interface{})
+}
+
+// Slice() finds a minimal subsequence of slice elements that still gives Pred() == true.
+// The algorithm works by sequentially splitting the slice into smaller-size chunks and running
+// Pred() witout those chunks. Slice() receives the original slice chunks.
+// The expected number of Pred() runs is O(|result|*log2(|elements|)).
+func Slice[T any](config Config[T], slice []T) ([]T, error) {
+ if config.Logf == nil {
+ config.Logf = func(string, ...interface{}) {}
+ }
+ ctx := &sliceCtx[T]{
+ Config: config,
+ chunks: []*arrayChunk[T]{
+ {
+ elements: slice,
+ },
+ },
+ }
+ return ctx.bisect()
+}
+
+type sliceCtx[T any] struct {
+ Config[T]
+ chunks []*arrayChunk[T]
+ predRuns int
+}
+
+type arrayChunk[T any] struct {
+ elements []T
+ final bool // There's no way to further split this chunk.
+}
+
+// ErrTooManyChunks is returned if the number of necessary chunks surpassed MaxChunks.
+var ErrTooManyChunks = errors.New("the bisection process is following too many necessary chunks")
+
+func (ctx *sliceCtx[T]) bisect() ([]T, error) {
+ // At first, we don't know if the original chunks are really necessary.
+ err := ctx.splitChunks(false)
+ // Then, keep on splitting the chunks layer by layer until we have identified
+ // all necessary elements.
+ // This way we ensure that we always go from larger to smaller chunks.
+ for err == nil && !ctx.done() {
+ if ctx.MaxChunks > 0 && len(ctx.chunks) > ctx.MaxChunks {
+ err = ErrTooManyChunks
+ break
+ }
+ err = ctx.splitChunks(true)
+ }
+ if err != nil && err != ErrTooManyChunks {
+ return nil, err
+ }
+ return ctx.elements(), err
+}
+
+// splitChunks() splits each chunk in two and only leaves the necessary sub-parts.
+func (ctx *sliceCtx[T]) splitChunks(someNeeded bool) error {
+ ctx.Logf("split chunks (needed=%v): %s", someNeeded, ctx.chunkInfo())
+ splitInto := 2
+ if !someNeeded && len(ctx.chunks) == 1 {
+ // It's our first iteration.
+ splitInto = ctx.initialSplit(len(ctx.chunks[0].elements))
+ }
+ var newChunks []*arrayChunk[T]
+ for i, chunk := range ctx.chunks {
+ if chunk.final {
+ newChunks = append(newChunks, chunk)
+ continue
+ }
+ ctx.Logf("split chunk #%d of len %d into %d parts", i, len(chunk.elements), splitInto)
+ chunks := splitChunk[T](chunk.elements, splitInto)
+ if len(chunks) == 1 && someNeeded {
+ ctx.Logf("no way to further split the chunk")
+ chunk.final = true
+ newChunks = append(newChunks, chunk)
+ continue
+ }
+ foundNeeded := false
+ for j := range chunks {
+ ctx.Logf("testing without sub-chunk %d/%d", j+1, len(chunks))
+ if j < len(chunks)-1 || foundNeeded || !someNeeded {
+ ret, err := ctx.predRun(
+ newChunks,
+ mergeRawChunks(chunks[j+1:]),
+ ctx.chunks[i+1:],
+ )
+ if err != nil {
+ return err
+ }
+ if ret {
+ ctx.Logf("the chunk can be dropped")
+ continue
+ }
+ } else {
+ ctx.Logf("no need to test this chunk, it's definitely needed")
+ }
+ foundNeeded = true
+ newChunks = append(newChunks, &arrayChunk[T]{
+ elements: chunks[j],
+ })
+ }
+ }
+ ctx.chunks = newChunks
+ return nil
+}
+
+// Since Pred() runs can be costly, the objective is to get the most out of the
+// limited number of Pred() calls.
+// We try to achieve it by splitting the initial array in more than 2 elements.
+func (ctx *sliceCtx[T]) initialSplit(size int) int {
+ // If the number of steps is small and the number of elements is big,
+ // let's just split the initial array into MaxSteps chunks.
+ // There's no solid reasoning behind the condition below, so feel free to
+ // change it if you have better ideas.
+ if ctx.MaxSteps > 0 && math.Log2(float64(size)) > float64(ctx.MaxSteps) {
+ return ctx.MaxSteps
+ }
+ // Otherwise let's split in 3.
+ return 3
+}
+
+// predRun() determines whether (before + mid + after) covers the necessary elements.
+func (ctx *sliceCtx[T]) predRun(before []*arrayChunk[T], mid []T, after []*arrayChunk[T]) (bool, error) {
+ if ctx.MaxSteps > 0 && ctx.predRuns >= ctx.MaxSteps {
+ ctx.Logf("we have reached the limit on predicate runs (%d); pretend it returns false",
+ ctx.MaxSteps)
+ return false, nil
+ }
+ ctx.predRuns++
+ return ctx.Pred(mergeChunks(before, mid, after))
+}
+
+// The bisection process is done once every chunk is marked as final.
+func (ctx *sliceCtx[T]) done() bool {
+ if ctx.MaxSteps > 0 && ctx.predRuns >= ctx.MaxSteps {
+ // No reason to continue.
+ return true
+ }
+ for _, chunk := range ctx.chunks {
+ if !chunk.final {
+ return false
+ }
+ }
+ return true
+}
+
+func (ctx *sliceCtx[T]) elements() []T {
+ return mergeChunks(ctx.chunks, nil, nil)
+}
+
+func (ctx *sliceCtx[T]) chunkInfo() string {
+ var parts []string
+ for _, chunk := range ctx.chunks {
+ str := ""
+ if chunk.final {
+ str = ", final"
+ }
+ parts = append(parts, fmt.Sprintf("<%d%s>", len(chunk.elements), str))
+ }
+ return strings.Join(parts, ", ")
+}
+
+func mergeChunks[T any](before []*arrayChunk[T], mid []T, after []*arrayChunk[T]) []T {
+ var ret []T
+ for _, chunk := range before {
+ ret = append(ret, chunk.elements...)
+ }
+ ret = append(ret, mid...)
+ for _, chunk := range after {
+ ret = append(ret, chunk.elements...)
+ }
+ return ret
+}
+
+func mergeRawChunks[T any](chunks [][]T) []T {
+ var ret []T
+ for _, chunk := range chunks {
+ ret = append(ret, chunk...)
+ }
+ return ret
+}
+
+func splitChunk[T any](chunk []T, parts int) [][]T {
+ chunkSize := (len(chunk) + parts - 1) / parts
+ if chunkSize == 0 {
+ chunkSize = 1
+ }
+ var ret [][]T
+ for i := 0; i < len(chunk); i += chunkSize {
+ end := i + chunkSize
+ if end > len(chunk) {
+ end = len(chunk)
+ }
+ ret = append(ret, chunk[i:end])
+ }
+ return ret
+}
diff --git a/pkg/bisect/minimize/slice_test.go b/pkg/bisect/minimize/slice_test.go
new file mode 100644
index 000000000..4e3e65202
--- /dev/null
+++ b/pkg/bisect/minimize/slice_test.go
@@ -0,0 +1,121 @@
+// Copyright 2023 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package minimize
+
+import (
+ "fmt"
+ "math"
+ "math/rand"
+ "testing"
+ "time"
+
+ "github.com/google/syzkaller/pkg/testutil"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestBisectSliceToZero(t *testing.T) {
+ t.Parallel()
+ array := make([]int, 100)
+ ret, err := Slice(Config[int]{
+ Pred: func(arr []int) (bool, error) {
+ // No elements are needed.
+ return true, nil
+ },
+ Logf: t.Logf,
+ }, array)
+ assert.NoError(t, err)
+ assert.Len(t, ret, 0)
+}
+
+func TestBisectSliceFull(t *testing.T) {
+ t.Parallel()
+ array := make([]int, 100)
+ ret, err := Slice(Config[int]{
+ Pred: func(arr []int) (bool, error) {
+ // All elements are needed.
+ return false, nil
+ },
+ Logf: t.Logf,
+ }, array)
+ assert.NoError(t, err)
+ assert.Equal(t, ret, array)
+}
+
+func TestBisectRandomSlice(t *testing.T) {
+ t.Parallel()
+ r := rand.New(testutil.RandSource(t))
+ for i := 0; i < testutil.IterCount(); i++ {
+ // Create an array of random size and set the elements that must remain to non-zero values.
+ size := r.Intn(50)
+ subset := r.Intn(size + 1)
+ array := make([]int, size)
+ for _, j := range r.Perm(size)[:subset] {
+ array[j] = j + 1
+ }
+ var expect []int
+ for _, j := range array {
+ if j > 0 {
+ expect = append(expect, j)
+ }
+ }
+ predCalls := 0
+ ret, err := Slice(Config[int]{
+ Pred: func(arr []int) (bool, error) {
+ predCalls++
+ // All elements of the subarray must be present.
+ nonZero := 0
+ for _, x := range arr {
+ if x > 0 {
+ nonZero++
+ }
+ }
+ return nonZero == subset, nil
+ },
+ Logf: t.Logf,
+ }, array)
+ assert.NoError(t, err)
+ assert.EqualValues(t, expect, ret)
+ // Ensure we don't make too many predicate calls.
+ maxCalls := 3 + 2*subset*(1+int(math.Floor(math.Log2(float64(size)))))
+ assert.LessOrEqual(t, predCalls, maxCalls)
+ }
+}
+
+func BenchmarkSplits(b *testing.B) {
+ for _, guilty := range []int{1, 2, 3, 4} {
+ guilty := guilty
+ b.Run(fmt.Sprintf("%d_guilty", guilty), func(b *testing.B) {
+ var sum int
+ for i := 0; i < b.N; i++ {
+ sum += runMinimize(guilty)
+ }
+ b.ReportMetric(float64(sum)/float64(b.N), "remaining-elements")
+ })
+ }
+}
+
+func runMinimize(guilty int) int {
+ const size = 300
+ const steps = 5
+
+ r := rand.New(rand.NewSource(time.Now().UnixNano()))
+ array := make([]int, size)
+ for _, j := range r.Perm(size)[:guilty] {
+ array[j] = 1
+ }
+
+ ret, _ := Slice(Config[int]{
+ MaxSteps: steps,
+ Pred: func(arr []int) (bool, error) {
+ nonZero := 0
+ for _, x := range arr {
+ if x > 0 {
+ nonZero++
+ }
+ }
+ return nonZero == guilty, nil
+ },
+ }, array)
+ return len(ret)
+}