From 026aaeb2b5393e0c838873306e1c5f2084a8a1aa Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Sun, 5 Jan 2020 11:46:35 +0100 Subject: prog: don't mutate strings with enumerated values Strings with enumerated values are frequently file names or have complete enumeration of relevant values. Mutating complete enumeration if not very profitable. Mutating file names leads to escaping paths and fuzzer messing with things it is not supposed to mess with as in: r0 = openat$apparmor_task_exec(0xffffffffffffff9c, &(0x7f0000000440)='/proc/self//exe\x00', 0x3, 0x0) --- executor/defs.h | 2 +- executor/syscalls.h | 2 ++ prog/encoding.go | 20 ++++++++++++--- prog/encoding_test.go | 69 ++++++++++++++++++++++++++++++++------------------- prog/hints.go | 10 +++++++- prog/mutation.go | 12 ++++++--- sys/test/gen/64.go | 8 +++++- sys/test/test.txt | 4 +++ 8 files changed, 91 insertions(+), 36 deletions(-) diff --git a/executor/defs.h b/executor/defs.h index 6726e29a2..28a41299b 100644 --- a/executor/defs.h +++ b/executor/defs.h @@ -185,7 +185,7 @@ #if GOARCH_64 #define GOARCH "64" -#define SYZ_REVISION "fd5870a81478a9208df976f4d8066bbdfa12f8e5" +#define SYZ_REVISION "108f16d364fe32a8c41e4592f79eab831b5935a0" #define SYZ_EXECUTOR_USES_FORK_SERVER 0 #define SYZ_EXECUTOR_USES_SHMEM 0 #define SYZ_PAGE_SIZE 4096 diff --git a/executor/syscalls.h b/executor/syscalls.h index c5c2dda54..82d610197 100644 --- a/executor/syscalls.h +++ b/executor/syscalls.h @@ -21431,6 +21431,8 @@ const call_t syscalls[] = { {"test$res1", 0}, {"test$res2", 0}, {"test$str0", 0}, + {"test$str1", 0}, + {"test$str2", 0}, {"test$struct", 0}, {"test$syz_union3", 0}, {"test$syz_union4", 0}, diff --git a/prog/encoding.go b/prog/encoding.go index 943dfe350..f288dbc93 100644 --- a/prog/encoding.go +++ b/prog/encoding.go @@ -506,10 +506,11 @@ func (p *parser) parseArgAddr(typ Type) (Arg, error) { return arg, nil } -func (p *parser) parseArgString(typ Type) (Arg, error) { - if _, ok := typ.(*BufferType); !ok { +func (p *parser) parseArgString(t Type) (Arg, error) { + typ, ok := t.(*BufferType) + if !ok { p.eatExcessive(true, "wrong string arg") - return typ.DefaultArg(), nil + return t.DefaultArg(), nil } data, err := p.deserializeData() if err != nil { @@ -541,6 +542,19 @@ func (p *parser) parseArgString(typ Type) (Arg, error) { data = append(data, make([]byte, diff)...) } data = data[:size] + if typ.Kind == BufferString && len(typ.Values) != 0 { + matched := false + for _, val := range typ.Values { + if string(data) == val { + matched = true + break + } + } + if !matched { + p.strictFailf("bad string value %q, expect %q", data, typ.Values) + data = []byte(typ.Values[0]) + } + } return MakeDataArg(typ, data), nil } diff --git a/prog/encoding_test.go b/prog/encoding_test.go index ba7fe3329..b6e331cf0 100644 --- a/prog/encoding_test.go +++ b/prog/encoding_test.go @@ -8,7 +8,6 @@ import ( "fmt" "math/rand" "reflect" - "regexp" "sort" "strings" "testing" @@ -132,8 +131,8 @@ func TestDeserialize(t *testing.T) { tests := []struct { input string output string - err *regexp.Regexp - strictErr *regexp.Regexp + err string + strictErr string }{ { input: `test$struct(&(0x7f0000000000)={0x0, {0x0}})`, @@ -141,7 +140,7 @@ func TestDeserialize(t *testing.T) { { input: `test$struct(&(0x7f0000000000)=0x0)`, output: `test$struct(&(0x7f0000000000))`, - strictErr: regexp.MustCompile("wrong int arg"), + strictErr: "wrong int arg", }, { input: `test$regression1(&(0x7f0000000000)=[{"000000"}, {"0000000000"}])`, @@ -151,23 +150,23 @@ func TestDeserialize(t *testing.T) { }, { input: `test$excessive_args1(0x0, 0x1, {0x1, &(0x7f0000000000)=[0x1, 0x2]})`, - strictErr: regexp.MustCompile("excessive syscall arguments"), + strictErr: "excessive syscall arguments", }, { input: `test$excessive_args2(0x0, 0x1, {0x1, &(0x7f0000000000)={0x1, 0x2}})`, - strictErr: regexp.MustCompile("excessive syscall arguments"), + strictErr: "excessive syscall arguments", }, { input: `test$excessive_args2(0x0, 0x1, {0x1, &(0x7f0000000000)=nil})`, - strictErr: regexp.MustCompile("excessive syscall arguments"), + strictErr: "excessive syscall arguments", }, { input: `test$excessive_args2(0x0, &(0x7f0000000000), 0x0)`, - strictErr: regexp.MustCompile("excessive syscall arguments"), + strictErr: "excessive syscall arguments", }, { input: `test$excessive_fields1(&(0x7f0000000000)={0x1, &(0x7f0000000000)=[{0x0}, 0x2]}, {0x1, 0x2, [0x1, 0x2]})`, - strictErr: regexp.MustCompile("excessive struct excessive_fields fields"), + strictErr: "excessive struct excessive_fields fields", }, { input: `test$excessive_fields1(0x0)`, @@ -176,37 +175,37 @@ func TestDeserialize(t *testing.T) { { input: `test$excessive_fields1(r0)`, output: `test$excessive_fields1(&(0x7f0000000000))`, - strictErr: regexp.MustCompile("undeclared variable r0"), + strictErr: "undeclared variable r0", }, { input: `test$excessive_args2(r1)`, output: `test$excessive_args2(0x0)`, - strictErr: regexp.MustCompile("undeclared variable r1"), + strictErr: "undeclared variable r1", }, { input: `test$excessive_args2({0x0, 0x1})`, output: `test$excessive_args2(0x0)`, - strictErr: regexp.MustCompile("wrong struct arg"), + strictErr: "wrong struct arg", }, { input: `test$excessive_args2([0x0], 0x0)`, output: `test$excessive_args2(0x0)`, - strictErr: regexp.MustCompile("wrong array arg"), + strictErr: "wrong array arg", }, { input: `test$excessive_args2(@foo)`, output: `test$excessive_args2(0x0)`, - strictErr: regexp.MustCompile("wrong union arg"), + strictErr: "wrong union arg", }, { input: `test$excessive_args2('foo')`, output: `test$excessive_args2(0x0)`, - strictErr: regexp.MustCompile("wrong string arg"), + strictErr: "wrong string arg", }, { input: `test$excessive_args2(&(0x7f0000000000)={0x0, 0x1})`, output: `test$excessive_args2(0x0)`, - strictErr: regexp.MustCompile("wrong addr arg"), + strictErr: "wrong addr arg", }, { input: `test$excessive_args2(nil)`, @@ -215,17 +214,17 @@ func TestDeserialize(t *testing.T) { { input: `test$type_confusion1(&(0x7f0000000000)=@unknown)`, output: `test$type_confusion1(&(0x7f0000000000))`, - strictErr: regexp.MustCompile("wrong union option"), + strictErr: "wrong union option", }, { input: `test$type_confusion1(&(0x7f0000000000)=@unknown={0x0, 'abc'}, 0x0)`, output: `test$type_confusion1(&(0x7f0000000000))`, - strictErr: regexp.MustCompile("wrong union option"), + strictErr: "wrong union option", }, { input: `test$excessive_fields1(&(0x7f0000000000)=0x0)`, output: `test$excessive_fields1(&(0x7f0000000000))`, - strictErr: regexp.MustCompile("wrong int arg"), + strictErr: "wrong int arg", }, { input: `test$excessive_fields1(0x0)`, @@ -253,7 +252,7 @@ func TestDeserialize(t *testing.T) { }, { input: `test$auto0(AUTO, &AUTO={AUTO, AUTO, AUTO}, AUTO, 0x0)`, - err: regexp.MustCompile(`wrong type \*prog\.IntType for AUTO`), + err: `wrong type *prog.IntType for AUTO`, }, { input: `test$str0(&AUTO="303100090a0d7022273a")`, @@ -270,15 +269,33 @@ func TestDeserialize(t *testing.T) { { input: `test$out_const(&(0x7f0000000000)=0x2)`, output: `test$out_const(&(0x7f0000000000))`, - strictErr: regexp.MustCompile(`out arg const\[1, const\] has non-default value: 2`), + strictErr: `out arg const[1, const] has non-default value: 2`, + }, + { + input: `test$str1(&(0x7f0000000000)='foo\x00')`, + output: `test$str1(&(0x7f0000000000)='foo\x00')`, + }, + { + input: `test$str1(&(0x7f0000000000)='bar\x00')`, + output: `test$str1(&(0x7f0000000000)='foo\x00')`, + strictErr: `bad string value "bar\x00", expect ["foo\x00"]`, + }, + { + input: `test$str2(&(0x7f0000000000)='bar\x00')`, + output: `test$str2(&(0x7f0000000000)='bar\x00')`, + }, + { + input: `test$str2(&(0x7f0000000000)='baz\x00')`, + output: `test$str2(&(0x7f0000000000)='foo\x00')`, + strictErr: `bad string value "baz\x00", expect ["foo\x00" "bar\x00"]`, }, } buf := make([]byte, ExecBufferSize) for _, test := range tests { - if test.err != nil && test.strictErr == nil { + if test.strictErr == "" { test.strictErr = test.err } - if test.err != nil && test.output != "" { + if test.err != "" && test.output != "" { t.Errorf("both err and output are set") continue } @@ -289,18 +306,18 @@ func TestDeserialize(t *testing.T) { wantErr = test.strictErr } if err != nil { - if wantErr == nil { + if wantErr == "" { t.Errorf("deserialization failed with\n%s\ndata:\n%s\n", err, test.input) continue } - if !wantErr.MatchString(err.Error()) { + if !strings.Contains(err.Error(), wantErr) { t.Errorf("deserialization failed with\n%s\nwhich doesn't match\n%s\ndata:\n%s", err, wantErr, test.input) continue } } else { - if wantErr != nil { + if wantErr != "" { t.Errorf("deserialization should have failed with:\n%s\ndata:\n%s\n", wantErr, test.input) continue diff --git a/prog/hints.go b/prog/hints.go index 91a254acc..f7f9dc487 100644 --- a/prog/hints.go +++ b/prog/hints.go @@ -93,9 +93,17 @@ func generateHints(compMap CompMap, arg Arg, exec func()) { // Csum will not pass validation and is always computed. return case *BufferType: - if t.Kind == BufferFilename { + switch t.Kind { + case BufferFilename: // This can generate escaping paths and is probably not too useful anyway. return + case BufferString: + if len(t.Values) != 0 { + // These are frequently file names or complete enumerations. + // Mutating these may be useful iff we intercept strcmp + // (and filter out file names). + return + } } } diff --git a/prog/mutation.go b/prog/mutation.go index c9f647ae8..571b54a3f 100644 --- a/prog/mutation.go +++ b/prog/mutation.go @@ -344,14 +344,14 @@ func (t *BufferType) mutate(r *randGen, s *state, arg Arg, ctx ArgCtx) (calls [] data := append([]byte{}, a.Data()...) a.data = mutateData(r, data, minLen, maxLen) case BufferString: - data := append([]byte{}, a.Data()...) - if r.bin() { + if len(t.Values) != 0 { + a.data = r.randString(s, t) + } else { if t.TypeSize != 0 { minLen, maxLen = t.TypeSize, t.TypeSize } + data := append([]byte{}, a.Data()...) a.data = mutateData(r, data, minLen, maxLen) - } else { - a.data = r.randString(s, t) } case BufferFilename: a.data = []byte(r.filename(s, t)) @@ -630,6 +630,10 @@ func (t *BufferType) getMutationPrio(target *Target, arg Arg, ignoreSpecial bool if t.Dir() == DirOut && !t.Varlen() { return dontMutate, false } + if t.Kind == BufferString && len(t.Values) == 1 { + // These are effectively consts (and frequently file names). + return dontMutate, false + } return 0.8 * maxPriority, false } diff --git a/sys/test/gen/64.go b/sys/test/gen/64.go index ce4ca5a73..681b7fee4 100644 --- a/sys/test/gen/64.go +++ b/sys/test/gen/64.go @@ -1262,6 +1262,12 @@ var syscalls_64 = []*Syscall{ {Name: "test$str0", CallName: "test", MissingArgs: 5, Args: []Type{ &PtrType{TypeCommon: TypeCommon{TypeName: "ptr", FldName: "a", TypeSize: 8}, Type: &BufferType{TypeCommon: TypeCommon{TypeName: "string", IsVarlen: true}, Kind: 2}}, }}, + {Name: "test$str1", CallName: "test", MissingArgs: 5, Args: []Type{ + &PtrType{TypeCommon: TypeCommon{TypeName: "ptr", FldName: "a", TypeSize: 8}, Type: &BufferType{TypeCommon: TypeCommon{TypeName: "string", TypeSize: 4}, Kind: 2, Values: []string{"foo\x00"}}}, + }}, + {Name: "test$str2", CallName: "test", MissingArgs: 5, Args: []Type{ + &PtrType{TypeCommon: TypeCommon{TypeName: "ptr", FldName: "a", TypeSize: 8}, Type: &BufferType{TypeCommon: TypeCommon{TypeName: "string", TypeSize: 4}, Kind: 2, SubKind: "fixed_strings", Values: []string{"foo\x00", "bar\x00"}}}, + }}, {Name: "test$struct", CallName: "test", MissingArgs: 5, Args: []Type{ &PtrType{TypeCommon: TypeCommon{TypeName: "ptr", FldName: "a0", TypeSize: 8}, Type: &StructType{Key: StructKey{Name: "syz_struct0"}}}, }}, @@ -1340,4 +1346,4 @@ var consts_64 = []ConstValue{ {Name: "SYS_unsupported"}, } -const revision_64 = "fd5870a81478a9208df976f4d8066bbdfa12f8e5" +const revision_64 = "108f16d364fe32a8c41e4592f79eab831b5935a0" diff --git a/sys/test/test.txt b/sys/test/test.txt index bcbc3462f..3ebbf5979 100644 --- a/sys/test/test.txt +++ b/sys/test/test.txt @@ -14,8 +14,12 @@ test$int(a0 intptr, a1 int8, a2 int16, a3 int32, a4 int64) # String types. test$str0(a ptr[in, string]) +test$str1(a ptr[in, string["foo"]]) +test$str2(a ptr[in, string[fixed_strings]]) test$blob0(a ptr[in, array[int8]]) +fixed_strings = "foo", "bar" + # Opt arguments test$opt0(a0 intptr[opt]) -- cgit mrf-deployment