From d0405298b24db0e2a6b2abfdc8c7e5ebbe49d1a0 Mon Sep 17 00:00:00 2001 From: Hrutvik Kanabar Date: Tue, 25 Oct 2022 10:13:51 +0000 Subject: prog, pkg/compiler: add `BufferCompressed` buffer type & `compressed_image` builtin Create the `BufferCompressed` kind of `BufferType`, which will be used to represent compressed data. Create the corresponding `compressed_image` syzlang builtin, which is backed by `BufferCompressed`. For now, no syscalls use this feature - this will be introduced in future commits. We have to be careful to decompress the data before mutating, and re-compress before storing. We make sure that any deserialised `BufferCompressed` data is valid too. `BufferCompressed` arguments are mutated using a generic heatmap. In future, we could add variants of `BufferCompressed` or populate the `BufferType` sub-kind, using it to choose different kinds of heatmap for different uncompressed data formats. Various operations on compressed data must be forbidden, so we check for `BufferCompressed` in key places. We also have to ensure `compressed_image` can only be used in syscalls that are marked `no_{generate,minimize}`. Therefore, we add a generic compiler check which allows type descriptions to require attributes on the syscalls which use them. --- docs/syscall_descriptions_syntax.md | 6 +++++- pkg/compiler/check.go | 40 ++++++++++++++++++++++++++++++++++ pkg/compiler/testdata/all.txt | 16 ++++++++++++++ pkg/compiler/testdata/errors.txt | 3 +++ pkg/compiler/testdata/errors2.txt | 14 ++++++++++++ pkg/compiler/types.go | 43 ++++++++++++++++++++++++++++++------- prog/analysis.go | 3 ++- prog/encoding.go | 9 ++++++++ prog/hints.go | 5 +++++ prog/minimization.go | 3 +++ prog/mutation.go | 28 ++++++++++++++++++++++++ prog/prio.go | 2 +- prog/rand.go | 2 ++ prog/types.go | 5 +++++ 14 files changed, 168 insertions(+), 11 deletions(-) diff --git a/docs/syscall_descriptions_syntax.md b/docs/syscall_descriptions_syntax.md index 1fe61343e..6d2e28273 100644 --- a/docs/syscall_descriptions_syntax.md +++ b/docs/syscall_descriptions_syntax.md @@ -10,7 +10,8 @@ argname = identifier type = typename [ "[" type-options "]" ] typename = "const" | "intN" | "intptr" | "flags" | "array" | "ptr" | "string" | "strconst" | "filename" | "glob" | "len" | - "bytesize" | "bytesizeN" | "bitsize" | "vma" | "proc" + "bytesize" | "bytesizeN" | "bitsize" | "vma" | "proc" | + "compressed_image" type-options = [type-opt ["," type-opt]] ``` @@ -62,6 +63,9 @@ rest of the type-options are type-specific: vma64 has size of 8 bytes regardless of target pointer size "proc": per process int (see description below), type-options: value range start, how many values per process, underlying type +"compressed_image": zlib-compressed disk image + syscalls accepting compressed images must be marked with `no_generate` + and `no_minimize` call attributes. "text": machine code of the specified type, type-options: text type (x86_real, x86_16, x86_32, x86_64, arm64) "void": type with static size 0 diff --git a/pkg/compiler/check.go b/pkg/compiler/check.go index 402e9a84e..4b2dd1330 100644 --- a/pkg/compiler/check.go +++ b/pkg/compiler/check.go @@ -341,8 +341,48 @@ func (comp *compiler) checkAttributeValues() { comp.error(f.Pos, "%v type must not be used as output", f.Type.Ident) } } + case *ast.Call: + attrNames := make(map[string]bool) + descAttrs := comp.parseAttrs(callAttrs, n, n.Attrs) + for desc := range descAttrs { + attrNames[prog.CppName(desc.Name)] = true + } + + checked := make(map[string]bool) + for _, a := range n.Args { + comp.checkRequiredCallAttrs(n, attrNames, a.Type, checked) + } + } + } +} + +func (comp *compiler) checkRequiredCallAttrs(call *ast.Call, callAttrNames map[string]bool, + t *ast.Type, checked map[string]bool) { + desc := comp.getTypeDesc(t) + for attr := range desc.RequiresCallAttrs { + if !callAttrNames[attr] { + comp.error(call.Pos, "call %v refers to type %v and so must be marked %s", call.Name.Name, t.Ident, attr) } } + + if desc == typeStruct { + s := comp.structs[t.Ident] + // Prune recursion, can happen even on correct tree via opt pointers. + if checked[s.Name.Name] { + return + } + checked[s.Name.Name] = true + fields := s.Fields + for _, fld := range fields { + comp.checkRequiredCallAttrs(call, callAttrNames, fld.Type, checked) + } + } else if desc == typeArray { + typ := t.Args[0] + comp.checkRequiredCallAttrs(call, callAttrNames, typ, checked) + } else if desc == typePtr { + typ := t.Args[1] + comp.checkRequiredCallAttrs(call, callAttrNames, typ, checked) + } } func (comp *compiler) checkLenTargets() { diff --git a/pkg/compiler/testdata/all.txt b/pkg/compiler/testdata/all.txt index d03e35def..6fd9f7cc6 100644 --- a/pkg/compiler/testdata/all.txt +++ b/pkg/compiler/testdata/all.txt @@ -322,3 +322,19 @@ struct$fmt0 { } flags_with_one_value = 0 + +# Compressed images. + +struct_compressed { + f0 compressed_image +} + +union_compressed [ + f0 compressed_image + f1 int32 +] [varlen] + +compressed$1(a compressed_image) (no_generate, no_minimize) +compressed$2(a ptr[in, compressed_image]) (no_generate, no_minimize) +compressed$3(a ptr[in, struct_compressed]) (no_generate, no_minimize) +compressed$4(a ptr[in, union_compressed]) (no_generate, no_minimize) \ No newline at end of file diff --git a/pkg/compiler/testdata/errors.txt b/pkg/compiler/testdata/errors.txt index 1201fa361..9d625a34b 100644 --- a/pkg/compiler/testdata/errors.txt +++ b/pkg/compiler/testdata/errors.txt @@ -422,3 +422,6 @@ union$directions [ f1 int32 (in) ### unknown arg/field f1 attribute in f2 int32 (out) ### unknown arg/field f2 attribute out ] + + +compressed$test(a int32) compressed_image ### compressed_image can't be syscall return diff --git a/pkg/compiler/testdata/errors2.txt b/pkg/compiler/testdata/errors2.txt index 482314b4a..3cd63ebd2 100644 --- a/pkg/compiler/testdata/errors2.txt +++ b/pkg/compiler/testdata/errors2.txt @@ -407,3 +407,17 @@ struct$out0 { f2 proc[0, 1, int32] (out) ### proc type must not be used as output f3 bytesize[f1, int32] (out) ### bytesize type must not be used as output } + +struct_non_generatable { + f0 ptr[in, array[compressed_image]] +} + +union_non_minimizable [ + f0 struct_non_generatable + f2 int32 +] + +foo$non_generatable(a compressed_image) (no_minimize) ### call foo$non_generatable refers to type compressed_image and so must be marked no_generate +foo$non_minimizable(a compressed_image) (no_generate) ### call foo$non_minimizable refers to type compressed_image and so must be marked no_minimize +foo$non_generatable_via_struct(a ptr[in, struct_non_generatable]) (no_minimize) ### call foo$non_generatable_via_struct refers to type compressed_image and so must be marked no_generate +foo$non_minimizable_via_union(a ptr[in, union_non_minimizable]) (no_generate) ### call foo$non_minimizable_via_union refers to type compressed_image and so must be marked no_minimize diff --git a/pkg/compiler/types.go b/pkg/compiler/types.go index 9e75de18d..095791adb 100644 --- a/pkg/compiler/types.go +++ b/pkg/compiler/types.go @@ -16,14 +16,15 @@ import ( // typeDesc is arg/field type descriptor. type typeDesc struct { - Names []string - CanBeTypedef bool // can be type alias target? - CantBeOpt bool // can't be marked as opt? - CantBeOut bool // can't be used as an explicitly output argument - NeedBase bool // needs base type when used as field? - MaxColon int // max number of colons (int8:2) on fields - OptArgs int // number of optional arguments in Args array - Args []namedArg // type arguments + Names []string + CanBeTypedef bool // can be type alias target? + CantBeOpt bool // can't be marked as opt? + CantBeOut bool // can't be used as an explicitly output argument? + NeedBase bool // needs base type when used as field? + MaxColon int // max number of colons (int8:2) on fields + OptArgs int // number of optional arguments in Args array + Args []namedArg // type arguments + RequiresCallAttrs map[string]bool // calls using this type must have these attrs. // CanBeArgRet returns if this type can be syscall argument/return (false if nil). CanBeArgRet func(comp *compiler, t *ast.Type) (bool, bool) // CanBeResourceBase returns if this type can be a resource base type (false if nil. @@ -830,6 +831,31 @@ var typeFmtFormat = &typeArg{ Kind: kindIdent, } +// typeCompressedImage is used for compressed disk images. +var typeCompressedImage = &typeDesc{ + Names: []string{"compressed_image"}, + CantBeOpt: true, + CantBeOut: true, + RequiresCallAttrs: map[string]bool{ + "no_generate": true, + "no_minimize": true, + }, + CanBeArgRet: func(comp *compiler, t *ast.Type) (bool, bool) { + return true, false + }, + Varlen: func(comp *compiler, t *ast.Type, args []*ast.Type) bool { + return true + }, + Gen: func(comp *compiler, t *ast.Type, args []*ast.Type, base prog.IntTypeCommon) prog.Type { + base.TypeSize = 0 + base.TypeAlign = 1 + return &prog.BufferType{ + TypeCommon: base.TypeCommon, + Kind: prog.BufferCompressed, + } + }, +} + // typeArgType is used as placeholder for any type (e.g. ptr target type). var typeArgType = &typeArg{} @@ -1116,6 +1142,7 @@ func init() { typeText, typeString, typeFmt, + typeCompressedImage, } for _, desc := range builtins { for _, name := range desc.Names { diff --git a/prog/analysis.go b/prog/analysis.go index 04f0b270a..b4a30dbe8 100644 --- a/prog/analysis.go +++ b/prog/analysis.go @@ -76,7 +76,8 @@ func (s *state) analyzeImpl(c *Call, resources bool) { } case *BufferType: a := arg.(*DataArg) - if a.Dir() != DirOut && len(a.Data()) != 0 { + if a.Dir() != DirOut && len(a.Data()) != 0 && + (typ.Kind == BufferString || typ.Kind == BufferFilename) { val := string(a.Data()) // Remove trailing zero padding. for len(val) >= 2 && val[len(val)-1] == 0 && val[len(val)-2] == 0 { diff --git a/prog/encoding.go b/prog/encoding.go index 92b7f2be3..82c93273a 100644 --- a/prog/encoding.go +++ b/prog/encoding.go @@ -597,6 +597,15 @@ func (p *parser) parseArgString(t Type, dir Dir) (Arg, error) { if err != nil { return nil, err } + // Check compressed data for validity. + if typ.IsCompressed() { + _, err = Decompress(data) + if err != nil { + p.strictFailf("invalid compressed data in arg: %v", err) + // In non-strict mode, empty the data slice. + data = Compress([]byte{}) + } + } size := ^uint64(0) if p.Char() == '/' { p.Parse('/') diff --git a/prog/hints.go b/prog/hints.go index c66242357..ef7ee17a5 100644 --- a/prog/hints.go +++ b/prog/hints.go @@ -109,6 +109,11 @@ func generateHints(compMap CompMap, arg Arg, exec func()) { // (and filter out file names). return } + case BufferCompressed: + // We can reconsider this in the future, e.g. by decompressing, applying + // hints, then re-compressing. We will need to ensure this doesn't + // produce too many mutants given the current handling of buffers. + return } } diff --git a/prog/minimization.go b/prog/minimization.go index ee8f565c9..68cca2ec4 100644 --- a/prog/minimization.go +++ b/prog/minimization.go @@ -301,6 +301,9 @@ func (typ *BufferType) minimize(ctx *minimizeArgsCtx, arg Arg, path string) bool if arg.Dir() == DirOut { return false } + if typ.IsCompressed() { + panic(fmt.Sprintf("minimizing `no_minimize` call %v", ctx.call.Meta.Name)) + } a := arg.(*DataArg) switch typ.Kind { case BufferBlobRand, BufferBlobRange: diff --git a/prog/mutation.go b/prog/mutation.go index ca53b1c7b..147f75ac1 100644 --- a/prog/mutation.go +++ b/prog/mutation.go @@ -374,6 +374,30 @@ func (t *BufferType) mutate(r *randGen, s *state, arg Arg, ctx ArgCtx) (calls [] case BufferText: data := append([]byte{}, a.Data()...) a.data = r.mutateText(t.Text, data) + case BufferCompressed: + data := a.Data() + if len(data) == 0 { + return + } + data, err := Decompress(data) + if err != nil { + panic(fmt.Sprintf("could not decompress data: %v", err)) + } + if len(data) == 0 { + return // Do not mutate empty data. + } + hm := MakeGenericHeatmap(data) + // At least two mutations, up to about one mutation every 128 KB of heatmap size. + numMutations := r.Intn(hm.Size()/(1<<17)+1) + 2 + for i := 0; i < numMutations; i++ { + index := hm.ChooseLocation(r.Rand) + width := 1 << uint(r.Intn(4)) + if index+width > len(data) { + width = 1 + } + storeInt(data[index:], r.Uint64(), width) + } + a.data = Compress(data) default: panic("unknown buffer kind") } @@ -663,6 +687,10 @@ func (t *BufferType) getMutationPrio(target *Target, arg Arg, ignoreSpecial bool // These are effectively consts (and frequently file names). return dontMutate, false } + if t.Kind == BufferCompressed { + // Prioritise mutation of compressed buffers, e.g. disk images (`compressed_image`). + return maxPriority, false + } return 0.8 * maxPriority, false } diff --git a/prog/prio.go b/prog/prio.go index d0013b75d..535999c1a 100644 --- a/prog/prio.go +++ b/prog/prio.go @@ -96,7 +96,7 @@ func (target *Target) calcResourceUsage() map[string]map[int]weights { } case *BufferType: switch a.Kind { - case BufferBlobRand, BufferBlobRange, BufferText: + case BufferBlobRand, BufferBlobRange, BufferText, BufferCompressed: case BufferString, BufferGlob: if a.SubKind != "" { noteUsage(uses, c, 2, ctx.Dir, fmt.Sprintf("str-%v", a.SubKind)) diff --git a/prog/rand.go b/prog/rand.go index 8337e7af4..c91ffeee4 100644 --- a/prog/rand.go +++ b/prog/rand.go @@ -768,6 +768,8 @@ func (a *BufferType) generate(r *randGen, s *state, dir Dir) (arg Arg, calls []* return MakeOutDataArg(a, dir, uint64(r.Intn(100))), nil } return MakeDataArg(a, dir, r.generateText(a.Text)), nil + case BufferCompressed: + panic(fmt.Sprintf("can't generate compressed type %v", a)) default: panic("unknown buffer kind") } diff --git a/prog/types.go b/prog/types.go index 07eb54395..4fd6a9daa 100644 --- a/prog/types.go +++ b/prog/types.go @@ -484,6 +484,7 @@ const ( BufferFilename BufferText BufferGlob + BufferCompressed ) type TextKind int @@ -554,6 +555,10 @@ func (t *BufferType) isDefaultArg(arg Arg) bool { return true } +func (t *BufferType) IsCompressed() bool { + return t.Kind == BufferCompressed +} + type ArrayKind int const ( -- cgit mrf-deployment