aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHrutvik Kanabar <hrutvik@google.com>2022-10-25 10:13:51 +0000
committerAleksandr Nogikh <wp32pw@gmail.com>2022-11-21 11:06:14 +0100
commitd0405298b24db0e2a6b2abfdc8c7e5ebbe49d1a0 (patch)
treec022669ce377e0c09376d60b45a01584c67c0989
parent7954d07c228dd9ce63b7ebd13239b4d1f2c35233 (diff)
prog, pkg/compiler: add `BufferCompressed` buffer type & `compressed_image` builtin
Create the `BufferCompressed` kind of `BufferType`, which will be used to represent compressed data. Create the corresponding `compressed_image` syzlang builtin, which is backed by `BufferCompressed`. For now, no syscalls use this feature - this will be introduced in future commits. We have to be careful to decompress the data before mutating, and re-compress before storing. We make sure that any deserialised `BufferCompressed` data is valid too. `BufferCompressed` arguments are mutated using a generic heatmap. In future, we could add variants of `BufferCompressed` or populate the `BufferType` sub-kind, using it to choose different kinds of heatmap for different uncompressed data formats. Various operations on compressed data must be forbidden, so we check for `BufferCompressed` in key places. We also have to ensure `compressed_image` can only be used in syscalls that are marked `no_{generate,minimize}`. Therefore, we add a generic compiler check which allows type descriptions to require attributes on the syscalls which use them.
-rw-r--r--docs/syscall_descriptions_syntax.md6
-rw-r--r--pkg/compiler/check.go40
-rw-r--r--pkg/compiler/testdata/all.txt16
-rw-r--r--pkg/compiler/testdata/errors.txt3
-rw-r--r--pkg/compiler/testdata/errors2.txt14
-rw-r--r--pkg/compiler/types.go43
-rw-r--r--prog/analysis.go3
-rw-r--r--prog/encoding.go9
-rw-r--r--prog/hints.go5
-rw-r--r--prog/minimization.go3
-rw-r--r--prog/mutation.go28
-rw-r--r--prog/prio.go2
-rw-r--r--prog/rand.go2
-rw-r--r--prog/types.go5
14 files changed, 168 insertions, 11 deletions
diff --git a/docs/syscall_descriptions_syntax.md b/docs/syscall_descriptions_syntax.md
index 1fe61343e..6d2e28273 100644
--- a/docs/syscall_descriptions_syntax.md
+++ b/docs/syscall_descriptions_syntax.md
@@ -10,7 +10,8 @@ argname = identifier
type = typename [ "[" type-options "]" ]
typename = "const" | "intN" | "intptr" | "flags" | "array" | "ptr" |
"string" | "strconst" | "filename" | "glob" | "len" |
- "bytesize" | "bytesizeN" | "bitsize" | "vma" | "proc"
+ "bytesize" | "bytesizeN" | "bitsize" | "vma" | "proc" |
+ "compressed_image"
type-options = [type-opt ["," type-opt]]
```
@@ -62,6 +63,9 @@ rest of the type-options are type-specific:
vma64 has size of 8 bytes regardless of target pointer size
"proc": per process int (see description below), type-options:
value range start, how many values per process, underlying type
+"compressed_image": zlib-compressed disk image
+ syscalls accepting compressed images must be marked with `no_generate`
+ and `no_minimize` call attributes.
"text": machine code of the specified type, type-options:
text type (x86_real, x86_16, x86_32, x86_64, arm64)
"void": type with static size 0
diff --git a/pkg/compiler/check.go b/pkg/compiler/check.go
index 402e9a84e..4b2dd1330 100644
--- a/pkg/compiler/check.go
+++ b/pkg/compiler/check.go
@@ -341,8 +341,48 @@ func (comp *compiler) checkAttributeValues() {
comp.error(f.Pos, "%v type must not be used as output", f.Type.Ident)
}
}
+ case *ast.Call:
+ attrNames := make(map[string]bool)
+ descAttrs := comp.parseAttrs(callAttrs, n, n.Attrs)
+ for desc := range descAttrs {
+ attrNames[prog.CppName(desc.Name)] = true
+ }
+
+ checked := make(map[string]bool)
+ for _, a := range n.Args {
+ comp.checkRequiredCallAttrs(n, attrNames, a.Type, checked)
+ }
+ }
+ }
+}
+
+func (comp *compiler) checkRequiredCallAttrs(call *ast.Call, callAttrNames map[string]bool,
+ t *ast.Type, checked map[string]bool) {
+ desc := comp.getTypeDesc(t)
+ for attr := range desc.RequiresCallAttrs {
+ if !callAttrNames[attr] {
+ comp.error(call.Pos, "call %v refers to type %v and so must be marked %s", call.Name.Name, t.Ident, attr)
}
}
+
+ if desc == typeStruct {
+ s := comp.structs[t.Ident]
+ // Prune recursion, can happen even on correct tree via opt pointers.
+ if checked[s.Name.Name] {
+ return
+ }
+ checked[s.Name.Name] = true
+ fields := s.Fields
+ for _, fld := range fields {
+ comp.checkRequiredCallAttrs(call, callAttrNames, fld.Type, checked)
+ }
+ } else if desc == typeArray {
+ typ := t.Args[0]
+ comp.checkRequiredCallAttrs(call, callAttrNames, typ, checked)
+ } else if desc == typePtr {
+ typ := t.Args[1]
+ comp.checkRequiredCallAttrs(call, callAttrNames, typ, checked)
+ }
}
func (comp *compiler) checkLenTargets() {
diff --git a/pkg/compiler/testdata/all.txt b/pkg/compiler/testdata/all.txt
index d03e35def..6fd9f7cc6 100644
--- a/pkg/compiler/testdata/all.txt
+++ b/pkg/compiler/testdata/all.txt
@@ -322,3 +322,19 @@ struct$fmt0 {
}
flags_with_one_value = 0
+
+# Compressed images.
+
+struct_compressed {
+ f0 compressed_image
+}
+
+union_compressed [
+ f0 compressed_image
+ f1 int32
+] [varlen]
+
+compressed$1(a compressed_image) (no_generate, no_minimize)
+compressed$2(a ptr[in, compressed_image]) (no_generate, no_minimize)
+compressed$3(a ptr[in, struct_compressed]) (no_generate, no_minimize)
+compressed$4(a ptr[in, union_compressed]) (no_generate, no_minimize) \ No newline at end of file
diff --git a/pkg/compiler/testdata/errors.txt b/pkg/compiler/testdata/errors.txt
index 1201fa361..9d625a34b 100644
--- a/pkg/compiler/testdata/errors.txt
+++ b/pkg/compiler/testdata/errors.txt
@@ -422,3 +422,6 @@ union$directions [
f1 int32 (in) ### unknown arg/field f1 attribute in
f2 int32 (out) ### unknown arg/field f2 attribute out
]
+
+
+compressed$test(a int32) compressed_image ### compressed_image can't be syscall return
diff --git a/pkg/compiler/testdata/errors2.txt b/pkg/compiler/testdata/errors2.txt
index 482314b4a..3cd63ebd2 100644
--- a/pkg/compiler/testdata/errors2.txt
+++ b/pkg/compiler/testdata/errors2.txt
@@ -407,3 +407,17 @@ struct$out0 {
f2 proc[0, 1, int32] (out) ### proc type must not be used as output
f3 bytesize[f1, int32] (out) ### bytesize type must not be used as output
}
+
+struct_non_generatable {
+ f0 ptr[in, array[compressed_image]]
+}
+
+union_non_minimizable [
+ f0 struct_non_generatable
+ f2 int32
+]
+
+foo$non_generatable(a compressed_image) (no_minimize) ### call foo$non_generatable refers to type compressed_image and so must be marked no_generate
+foo$non_minimizable(a compressed_image) (no_generate) ### call foo$non_minimizable refers to type compressed_image and so must be marked no_minimize
+foo$non_generatable_via_struct(a ptr[in, struct_non_generatable]) (no_minimize) ### call foo$non_generatable_via_struct refers to type compressed_image and so must be marked no_generate
+foo$non_minimizable_via_union(a ptr[in, union_non_minimizable]) (no_generate) ### call foo$non_minimizable_via_union refers to type compressed_image and so must be marked no_minimize
diff --git a/pkg/compiler/types.go b/pkg/compiler/types.go
index 9e75de18d..095791adb 100644
--- a/pkg/compiler/types.go
+++ b/pkg/compiler/types.go
@@ -16,14 +16,15 @@ import (
// typeDesc is arg/field type descriptor.
type typeDesc struct {
- Names []string
- CanBeTypedef bool // can be type alias target?
- CantBeOpt bool // can't be marked as opt?
- CantBeOut bool // can't be used as an explicitly output argument
- NeedBase bool // needs base type when used as field?
- MaxColon int // max number of colons (int8:2) on fields
- OptArgs int // number of optional arguments in Args array
- Args []namedArg // type arguments
+ Names []string
+ CanBeTypedef bool // can be type alias target?
+ CantBeOpt bool // can't be marked as opt?
+ CantBeOut bool // can't be used as an explicitly output argument?
+ NeedBase bool // needs base type when used as field?
+ MaxColon int // max number of colons (int8:2) on fields
+ OptArgs int // number of optional arguments in Args array
+ Args []namedArg // type arguments
+ RequiresCallAttrs map[string]bool // calls using this type must have these attrs.
// CanBeArgRet returns if this type can be syscall argument/return (false if nil).
CanBeArgRet func(comp *compiler, t *ast.Type) (bool, bool)
// CanBeResourceBase returns if this type can be a resource base type (false if nil.
@@ -830,6 +831,31 @@ var typeFmtFormat = &typeArg{
Kind: kindIdent,
}
+// typeCompressedImage is used for compressed disk images.
+var typeCompressedImage = &typeDesc{
+ Names: []string{"compressed_image"},
+ CantBeOpt: true,
+ CantBeOut: true,
+ RequiresCallAttrs: map[string]bool{
+ "no_generate": true,
+ "no_minimize": true,
+ },
+ CanBeArgRet: func(comp *compiler, t *ast.Type) (bool, bool) {
+ return true, false
+ },
+ Varlen: func(comp *compiler, t *ast.Type, args []*ast.Type) bool {
+ return true
+ },
+ Gen: func(comp *compiler, t *ast.Type, args []*ast.Type, base prog.IntTypeCommon) prog.Type {
+ base.TypeSize = 0
+ base.TypeAlign = 1
+ return &prog.BufferType{
+ TypeCommon: base.TypeCommon,
+ Kind: prog.BufferCompressed,
+ }
+ },
+}
+
// typeArgType is used as placeholder for any type (e.g. ptr target type).
var typeArgType = &typeArg{}
@@ -1116,6 +1142,7 @@ func init() {
typeText,
typeString,
typeFmt,
+ typeCompressedImage,
}
for _, desc := range builtins {
for _, name := range desc.Names {
diff --git a/prog/analysis.go b/prog/analysis.go
index 04f0b270a..b4a30dbe8 100644
--- a/prog/analysis.go
+++ b/prog/analysis.go
@@ -76,7 +76,8 @@ func (s *state) analyzeImpl(c *Call, resources bool) {
}
case *BufferType:
a := arg.(*DataArg)
- if a.Dir() != DirOut && len(a.Data()) != 0 {
+ if a.Dir() != DirOut && len(a.Data()) != 0 &&
+ (typ.Kind == BufferString || typ.Kind == BufferFilename) {
val := string(a.Data())
// Remove trailing zero padding.
for len(val) >= 2 && val[len(val)-1] == 0 && val[len(val)-2] == 0 {
diff --git a/prog/encoding.go b/prog/encoding.go
index 92b7f2be3..82c93273a 100644
--- a/prog/encoding.go
+++ b/prog/encoding.go
@@ -597,6 +597,15 @@ func (p *parser) parseArgString(t Type, dir Dir) (Arg, error) {
if err != nil {
return nil, err
}
+ // Check compressed data for validity.
+ if typ.IsCompressed() {
+ _, err = Decompress(data)
+ if err != nil {
+ p.strictFailf("invalid compressed data in arg: %v", err)
+ // In non-strict mode, empty the data slice.
+ data = Compress([]byte{})
+ }
+ }
size := ^uint64(0)
if p.Char() == '/' {
p.Parse('/')
diff --git a/prog/hints.go b/prog/hints.go
index c66242357..ef7ee17a5 100644
--- a/prog/hints.go
+++ b/prog/hints.go
@@ -109,6 +109,11 @@ func generateHints(compMap CompMap, arg Arg, exec func()) {
// (and filter out file names).
return
}
+ case BufferCompressed:
+ // We can reconsider this in the future, e.g. by decompressing, applying
+ // hints, then re-compressing. We will need to ensure this doesn't
+ // produce too many mutants given the current handling of buffers.
+ return
}
}
diff --git a/prog/minimization.go b/prog/minimization.go
index ee8f565c9..68cca2ec4 100644
--- a/prog/minimization.go
+++ b/prog/minimization.go
@@ -301,6 +301,9 @@ func (typ *BufferType) minimize(ctx *minimizeArgsCtx, arg Arg, path string) bool
if arg.Dir() == DirOut {
return false
}
+ if typ.IsCompressed() {
+ panic(fmt.Sprintf("minimizing `no_minimize` call %v", ctx.call.Meta.Name))
+ }
a := arg.(*DataArg)
switch typ.Kind {
case BufferBlobRand, BufferBlobRange:
diff --git a/prog/mutation.go b/prog/mutation.go
index ca53b1c7b..147f75ac1 100644
--- a/prog/mutation.go
+++ b/prog/mutation.go
@@ -374,6 +374,30 @@ func (t *BufferType) mutate(r *randGen, s *state, arg Arg, ctx ArgCtx) (calls []
case BufferText:
data := append([]byte{}, a.Data()...)
a.data = r.mutateText(t.Text, data)
+ case BufferCompressed:
+ data := a.Data()
+ if len(data) == 0 {
+ return
+ }
+ data, err := Decompress(data)
+ if err != nil {
+ panic(fmt.Sprintf("could not decompress data: %v", err))
+ }
+ if len(data) == 0 {
+ return // Do not mutate empty data.
+ }
+ hm := MakeGenericHeatmap(data)
+ // At least two mutations, up to about one mutation every 128 KB of heatmap size.
+ numMutations := r.Intn(hm.Size()/(1<<17)+1) + 2
+ for i := 0; i < numMutations; i++ {
+ index := hm.ChooseLocation(r.Rand)
+ width := 1 << uint(r.Intn(4))
+ if index+width > len(data) {
+ width = 1
+ }
+ storeInt(data[index:], r.Uint64(), width)
+ }
+ a.data = Compress(data)
default:
panic("unknown buffer kind")
}
@@ -663,6 +687,10 @@ func (t *BufferType) getMutationPrio(target *Target, arg Arg, ignoreSpecial bool
// These are effectively consts (and frequently file names).
return dontMutate, false
}
+ if t.Kind == BufferCompressed {
+ // Prioritise mutation of compressed buffers, e.g. disk images (`compressed_image`).
+ return maxPriority, false
+ }
return 0.8 * maxPriority, false
}
diff --git a/prog/prio.go b/prog/prio.go
index d0013b75d..535999c1a 100644
--- a/prog/prio.go
+++ b/prog/prio.go
@@ -96,7 +96,7 @@ func (target *Target) calcResourceUsage() map[string]map[int]weights {
}
case *BufferType:
switch a.Kind {
- case BufferBlobRand, BufferBlobRange, BufferText:
+ case BufferBlobRand, BufferBlobRange, BufferText, BufferCompressed:
case BufferString, BufferGlob:
if a.SubKind != "" {
noteUsage(uses, c, 2, ctx.Dir, fmt.Sprintf("str-%v", a.SubKind))
diff --git a/prog/rand.go b/prog/rand.go
index 8337e7af4..c91ffeee4 100644
--- a/prog/rand.go
+++ b/prog/rand.go
@@ -768,6 +768,8 @@ func (a *BufferType) generate(r *randGen, s *state, dir Dir) (arg Arg, calls []*
return MakeOutDataArg(a, dir, uint64(r.Intn(100))), nil
}
return MakeDataArg(a, dir, r.generateText(a.Text)), nil
+ case BufferCompressed:
+ panic(fmt.Sprintf("can't generate compressed type %v", a))
default:
panic("unknown buffer kind")
}
diff --git a/prog/types.go b/prog/types.go
index 07eb54395..4fd6a9daa 100644
--- a/prog/types.go
+++ b/prog/types.go
@@ -484,6 +484,7 @@ const (
BufferFilename
BufferText
BufferGlob
+ BufferCompressed
)
type TextKind int
@@ -554,6 +555,10 @@ func (t *BufferType) isDefaultArg(arg Arg) bool {
return true
}
+func (t *BufferType) IsCompressed() bool {
+ return t.Kind == BufferCompressed
+}
+
type ArrayKind int
const (