From d0405298b24db0e2a6b2abfdc8c7e5ebbe49d1a0 Mon Sep 17 00:00:00 2001 From: Hrutvik Kanabar Date: Tue, 25 Oct 2022 10:13:51 +0000 Subject: prog, pkg/compiler: add `BufferCompressed` buffer type & `compressed_image` builtin Create the `BufferCompressed` kind of `BufferType`, which will be used to represent compressed data. Create the corresponding `compressed_image` syzlang builtin, which is backed by `BufferCompressed`. For now, no syscalls use this feature - this will be introduced in future commits. We have to be careful to decompress the data before mutating, and re-compress before storing. We make sure that any deserialised `BufferCompressed` data is valid too. `BufferCompressed` arguments are mutated using a generic heatmap. In future, we could add variants of `BufferCompressed` or populate the `BufferType` sub-kind, using it to choose different kinds of heatmap for different uncompressed data formats. Various operations on compressed data must be forbidden, so we check for `BufferCompressed` in key places. We also have to ensure `compressed_image` can only be used in syscalls that are marked `no_{generate,minimize}`. Therefore, we add a generic compiler check which allows type descriptions to require attributes on the syscalls which use them. --- prog/analysis.go | 3 ++- prog/encoding.go | 9 +++++++++ prog/hints.go | 5 +++++ prog/minimization.go | 3 +++ prog/mutation.go | 28 ++++++++++++++++++++++++++++ prog/prio.go | 2 +- prog/rand.go | 2 ++ prog/types.go | 5 +++++ 8 files changed, 55 insertions(+), 2 deletions(-) (limited to 'prog') diff --git a/prog/analysis.go b/prog/analysis.go index 04f0b270a..b4a30dbe8 100644 --- a/prog/analysis.go +++ b/prog/analysis.go @@ -76,7 +76,8 @@ func (s *state) analyzeImpl(c *Call, resources bool) { } case *BufferType: a := arg.(*DataArg) - if a.Dir() != DirOut && len(a.Data()) != 0 { + if a.Dir() != DirOut && len(a.Data()) != 0 && + (typ.Kind == BufferString || typ.Kind == BufferFilename) { val := string(a.Data()) // Remove trailing zero padding. for len(val) >= 2 && val[len(val)-1] == 0 && val[len(val)-2] == 0 { diff --git a/prog/encoding.go b/prog/encoding.go index 92b7f2be3..82c93273a 100644 --- a/prog/encoding.go +++ b/prog/encoding.go @@ -597,6 +597,15 @@ func (p *parser) parseArgString(t Type, dir Dir) (Arg, error) { if err != nil { return nil, err } + // Check compressed data for validity. + if typ.IsCompressed() { + _, err = Decompress(data) + if err != nil { + p.strictFailf("invalid compressed data in arg: %v", err) + // In non-strict mode, empty the data slice. + data = Compress([]byte{}) + } + } size := ^uint64(0) if p.Char() == '/' { p.Parse('/') diff --git a/prog/hints.go b/prog/hints.go index c66242357..ef7ee17a5 100644 --- a/prog/hints.go +++ b/prog/hints.go @@ -109,6 +109,11 @@ func generateHints(compMap CompMap, arg Arg, exec func()) { // (and filter out file names). return } + case BufferCompressed: + // We can reconsider this in the future, e.g. by decompressing, applying + // hints, then re-compressing. We will need to ensure this doesn't + // produce too many mutants given the current handling of buffers. + return } } diff --git a/prog/minimization.go b/prog/minimization.go index ee8f565c9..68cca2ec4 100644 --- a/prog/minimization.go +++ b/prog/minimization.go @@ -301,6 +301,9 @@ func (typ *BufferType) minimize(ctx *minimizeArgsCtx, arg Arg, path string) bool if arg.Dir() == DirOut { return false } + if typ.IsCompressed() { + panic(fmt.Sprintf("minimizing `no_minimize` call %v", ctx.call.Meta.Name)) + } a := arg.(*DataArg) switch typ.Kind { case BufferBlobRand, BufferBlobRange: diff --git a/prog/mutation.go b/prog/mutation.go index ca53b1c7b..147f75ac1 100644 --- a/prog/mutation.go +++ b/prog/mutation.go @@ -374,6 +374,30 @@ func (t *BufferType) mutate(r *randGen, s *state, arg Arg, ctx ArgCtx) (calls [] case BufferText: data := append([]byte{}, a.Data()...) a.data = r.mutateText(t.Text, data) + case BufferCompressed: + data := a.Data() + if len(data) == 0 { + return + } + data, err := Decompress(data) + if err != nil { + panic(fmt.Sprintf("could not decompress data: %v", err)) + } + if len(data) == 0 { + return // Do not mutate empty data. + } + hm := MakeGenericHeatmap(data) + // At least two mutations, up to about one mutation every 128 KB of heatmap size. + numMutations := r.Intn(hm.Size()/(1<<17)+1) + 2 + for i := 0; i < numMutations; i++ { + index := hm.ChooseLocation(r.Rand) + width := 1 << uint(r.Intn(4)) + if index+width > len(data) { + width = 1 + } + storeInt(data[index:], r.Uint64(), width) + } + a.data = Compress(data) default: panic("unknown buffer kind") } @@ -663,6 +687,10 @@ func (t *BufferType) getMutationPrio(target *Target, arg Arg, ignoreSpecial bool // These are effectively consts (and frequently file names). return dontMutate, false } + if t.Kind == BufferCompressed { + // Prioritise mutation of compressed buffers, e.g. disk images (`compressed_image`). + return maxPriority, false + } return 0.8 * maxPriority, false } diff --git a/prog/prio.go b/prog/prio.go index d0013b75d..535999c1a 100644 --- a/prog/prio.go +++ b/prog/prio.go @@ -96,7 +96,7 @@ func (target *Target) calcResourceUsage() map[string]map[int]weights { } case *BufferType: switch a.Kind { - case BufferBlobRand, BufferBlobRange, BufferText: + case BufferBlobRand, BufferBlobRange, BufferText, BufferCompressed: case BufferString, BufferGlob: if a.SubKind != "" { noteUsage(uses, c, 2, ctx.Dir, fmt.Sprintf("str-%v", a.SubKind)) diff --git a/prog/rand.go b/prog/rand.go index 8337e7af4..c91ffeee4 100644 --- a/prog/rand.go +++ b/prog/rand.go @@ -768,6 +768,8 @@ func (a *BufferType) generate(r *randGen, s *state, dir Dir) (arg Arg, calls []* return MakeOutDataArg(a, dir, uint64(r.Intn(100))), nil } return MakeDataArg(a, dir, r.generateText(a.Text)), nil + case BufferCompressed: + panic(fmt.Sprintf("can't generate compressed type %v", a)) default: panic("unknown buffer kind") } diff --git a/prog/types.go b/prog/types.go index 07eb54395..4fd6a9daa 100644 --- a/prog/types.go +++ b/prog/types.go @@ -484,6 +484,7 @@ const ( BufferFilename BufferText BufferGlob + BufferCompressed ) type TextKind int @@ -554,6 +555,10 @@ func (t *BufferType) isDefaultArg(arg Arg) bool { return true } +func (t *BufferType) IsCompressed() bool { + return t.Kind == BufferCompressed +} + type ArrayKind int const ( -- cgit mrf-deployment