diff options
Diffstat (limited to 'pkg/bisect')
| -rw-r--r-- | pkg/bisect/minimize/slice.go | 222 | ||||
| -rw-r--r-- | pkg/bisect/minimize/slice_test.go | 121 |
2 files changed, 343 insertions, 0 deletions
diff --git a/pkg/bisect/minimize/slice.go b/pkg/bisect/minimize/slice.go new file mode 100644 index 000000000..76fe82101 --- /dev/null +++ b/pkg/bisect/minimize/slice.go @@ -0,0 +1,222 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package minimize + +import ( + "errors" + "fmt" + "math" + "strings" +) + +type Config[T any] struct { + // The original slice is minimized with respect to this predicate. + // If Pred(X) returns true, X is assumed to contain all elements that must stay. + Pred func([]T) (bool, error) + // MaxSteps is a limit on the number of predicate calls during bisection. + // If it's hit, the bisection continues as if Pred() begins to return false. + // If it's set to 0 (by default), no limit is applied. + MaxSteps int + // MaxChunks sets a limit on the number of chunks pursued by the bisection algorithm. + // If we hit the limit, bisection is stopped and Array() returns ErrTooManyChunks + // anongside the intermediate bisection result (a valid, but not fully minimized slice). + MaxChunks int + // Logf is used for sharing debugging output. + Logf func(string, ...interface{}) +} + +// Slice() finds a minimal subsequence of slice elements that still gives Pred() == true. +// The algorithm works by sequentially splitting the slice into smaller-size chunks and running +// Pred() witout those chunks. Slice() receives the original slice chunks. +// The expected number of Pred() runs is O(|result|*log2(|elements|)). +func Slice[T any](config Config[T], slice []T) ([]T, error) { + if config.Logf == nil { + config.Logf = func(string, ...interface{}) {} + } + ctx := &sliceCtx[T]{ + Config: config, + chunks: []*arrayChunk[T]{ + { + elements: slice, + }, + }, + } + return ctx.bisect() +} + +type sliceCtx[T any] struct { + Config[T] + chunks []*arrayChunk[T] + predRuns int +} + +type arrayChunk[T any] struct { + elements []T + final bool // There's no way to further split this chunk. +} + +// ErrTooManyChunks is returned if the number of necessary chunks surpassed MaxChunks. +var ErrTooManyChunks = errors.New("the bisection process is following too many necessary chunks") + +func (ctx *sliceCtx[T]) bisect() ([]T, error) { + // At first, we don't know if the original chunks are really necessary. + err := ctx.splitChunks(false) + // Then, keep on splitting the chunks layer by layer until we have identified + // all necessary elements. + // This way we ensure that we always go from larger to smaller chunks. + for err == nil && !ctx.done() { + if ctx.MaxChunks > 0 && len(ctx.chunks) > ctx.MaxChunks { + err = ErrTooManyChunks + break + } + err = ctx.splitChunks(true) + } + if err != nil && err != ErrTooManyChunks { + return nil, err + } + return ctx.elements(), err +} + +// splitChunks() splits each chunk in two and only leaves the necessary sub-parts. +func (ctx *sliceCtx[T]) splitChunks(someNeeded bool) error { + ctx.Logf("split chunks (needed=%v): %s", someNeeded, ctx.chunkInfo()) + splitInto := 2 + if !someNeeded && len(ctx.chunks) == 1 { + // It's our first iteration. + splitInto = ctx.initialSplit(len(ctx.chunks[0].elements)) + } + var newChunks []*arrayChunk[T] + for i, chunk := range ctx.chunks { + if chunk.final { + newChunks = append(newChunks, chunk) + continue + } + ctx.Logf("split chunk #%d of len %d into %d parts", i, len(chunk.elements), splitInto) + chunks := splitChunk[T](chunk.elements, splitInto) + if len(chunks) == 1 && someNeeded { + ctx.Logf("no way to further split the chunk") + chunk.final = true + newChunks = append(newChunks, chunk) + continue + } + foundNeeded := false + for j := range chunks { + ctx.Logf("testing without sub-chunk %d/%d", j+1, len(chunks)) + if j < len(chunks)-1 || foundNeeded || !someNeeded { + ret, err := ctx.predRun( + newChunks, + mergeRawChunks(chunks[j+1:]), + ctx.chunks[i+1:], + ) + if err != nil { + return err + } + if ret { + ctx.Logf("the chunk can be dropped") + continue + } + } else { + ctx.Logf("no need to test this chunk, it's definitely needed") + } + foundNeeded = true + newChunks = append(newChunks, &arrayChunk[T]{ + elements: chunks[j], + }) + } + } + ctx.chunks = newChunks + return nil +} + +// Since Pred() runs can be costly, the objective is to get the most out of the +// limited number of Pred() calls. +// We try to achieve it by splitting the initial array in more than 2 elements. +func (ctx *sliceCtx[T]) initialSplit(size int) int { + // If the number of steps is small and the number of elements is big, + // let's just split the initial array into MaxSteps chunks. + // There's no solid reasoning behind the condition below, so feel free to + // change it if you have better ideas. + if ctx.MaxSteps > 0 && math.Log2(float64(size)) > float64(ctx.MaxSteps) { + return ctx.MaxSteps + } + // Otherwise let's split in 3. + return 3 +} + +// predRun() determines whether (before + mid + after) covers the necessary elements. +func (ctx *sliceCtx[T]) predRun(before []*arrayChunk[T], mid []T, after []*arrayChunk[T]) (bool, error) { + if ctx.MaxSteps > 0 && ctx.predRuns >= ctx.MaxSteps { + ctx.Logf("we have reached the limit on predicate runs (%d); pretend it returns false", + ctx.MaxSteps) + return false, nil + } + ctx.predRuns++ + return ctx.Pred(mergeChunks(before, mid, after)) +} + +// The bisection process is done once every chunk is marked as final. +func (ctx *sliceCtx[T]) done() bool { + if ctx.MaxSteps > 0 && ctx.predRuns >= ctx.MaxSteps { + // No reason to continue. + return true + } + for _, chunk := range ctx.chunks { + if !chunk.final { + return false + } + } + return true +} + +func (ctx *sliceCtx[T]) elements() []T { + return mergeChunks(ctx.chunks, nil, nil) +} + +func (ctx *sliceCtx[T]) chunkInfo() string { + var parts []string + for _, chunk := range ctx.chunks { + str := "" + if chunk.final { + str = ", final" + } + parts = append(parts, fmt.Sprintf("<%d%s>", len(chunk.elements), str)) + } + return strings.Join(parts, ", ") +} + +func mergeChunks[T any](before []*arrayChunk[T], mid []T, after []*arrayChunk[T]) []T { + var ret []T + for _, chunk := range before { + ret = append(ret, chunk.elements...) + } + ret = append(ret, mid...) + for _, chunk := range after { + ret = append(ret, chunk.elements...) + } + return ret +} + +func mergeRawChunks[T any](chunks [][]T) []T { + var ret []T + for _, chunk := range chunks { + ret = append(ret, chunk...) + } + return ret +} + +func splitChunk[T any](chunk []T, parts int) [][]T { + chunkSize := (len(chunk) + parts - 1) / parts + if chunkSize == 0 { + chunkSize = 1 + } + var ret [][]T + for i := 0; i < len(chunk); i += chunkSize { + end := i + chunkSize + if end > len(chunk) { + end = len(chunk) + } + ret = append(ret, chunk[i:end]) + } + return ret +} diff --git a/pkg/bisect/minimize/slice_test.go b/pkg/bisect/minimize/slice_test.go new file mode 100644 index 000000000..4e3e65202 --- /dev/null +++ b/pkg/bisect/minimize/slice_test.go @@ -0,0 +1,121 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package minimize + +import ( + "fmt" + "math" + "math/rand" + "testing" + "time" + + "github.com/google/syzkaller/pkg/testutil" + "github.com/stretchr/testify/assert" +) + +func TestBisectSliceToZero(t *testing.T) { + t.Parallel() + array := make([]int, 100) + ret, err := Slice(Config[int]{ + Pred: func(arr []int) (bool, error) { + // No elements are needed. + return true, nil + }, + Logf: t.Logf, + }, array) + assert.NoError(t, err) + assert.Len(t, ret, 0) +} + +func TestBisectSliceFull(t *testing.T) { + t.Parallel() + array := make([]int, 100) + ret, err := Slice(Config[int]{ + Pred: func(arr []int) (bool, error) { + // All elements are needed. + return false, nil + }, + Logf: t.Logf, + }, array) + assert.NoError(t, err) + assert.Equal(t, ret, array) +} + +func TestBisectRandomSlice(t *testing.T) { + t.Parallel() + r := rand.New(testutil.RandSource(t)) + for i := 0; i < testutil.IterCount(); i++ { + // Create an array of random size and set the elements that must remain to non-zero values. + size := r.Intn(50) + subset := r.Intn(size + 1) + array := make([]int, size) + for _, j := range r.Perm(size)[:subset] { + array[j] = j + 1 + } + var expect []int + for _, j := range array { + if j > 0 { + expect = append(expect, j) + } + } + predCalls := 0 + ret, err := Slice(Config[int]{ + Pred: func(arr []int) (bool, error) { + predCalls++ + // All elements of the subarray must be present. + nonZero := 0 + for _, x := range arr { + if x > 0 { + nonZero++ + } + } + return nonZero == subset, nil + }, + Logf: t.Logf, + }, array) + assert.NoError(t, err) + assert.EqualValues(t, expect, ret) + // Ensure we don't make too many predicate calls. + maxCalls := 3 + 2*subset*(1+int(math.Floor(math.Log2(float64(size))))) + assert.LessOrEqual(t, predCalls, maxCalls) + } +} + +func BenchmarkSplits(b *testing.B) { + for _, guilty := range []int{1, 2, 3, 4} { + guilty := guilty + b.Run(fmt.Sprintf("%d_guilty", guilty), func(b *testing.B) { + var sum int + for i := 0; i < b.N; i++ { + sum += runMinimize(guilty) + } + b.ReportMetric(float64(sum)/float64(b.N), "remaining-elements") + }) + } +} + +func runMinimize(guilty int) int { + const size = 300 + const steps = 5 + + r := rand.New(rand.NewSource(time.Now().UnixNano())) + array := make([]int, size) + for _, j := range r.Perm(size)[:guilty] { + array[j] = 1 + } + + ret, _ := Slice(Config[int]{ + MaxSteps: steps, + Pred: func(arr []int) (bool, error) { + nonZero := 0 + for _, x := range arr { + if x > 0 { + nonZero++ + } + } + return nonZero == guilty, nil + }, + }, array) + return len(ret) +} |
