aboutsummaryrefslogtreecommitdiffstats
path: root/prog/target.go
blob: 11127046bda5e354e984ae7f18193c80c60756c9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
// Copyright 2017 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

package prog

import (
	"fmt"
	"math/rand"
	"slices"
	"sort"
	"strings"
	"sync"
	"sync/atomic"

	"github.com/google/syzkaller/pkg/hash"
)

// Target describes target OS/arch pair.
type Target struct {
	OS         string
	Arch       string
	Revision   string // unique hash representing revision of the descriptions
	PtrSize    uint64
	PageSize   uint64
	NumPages   uint64
	DataOffset uint64
	BigEndian  bool

	Syscalls  []*Syscall
	Resources []*ResourceDesc
	Consts    []ConstValue
	Flags     []FlagDesc
	Types     []Type

	// MakeDataMmap creates calls that mmaps target data memory range.
	MakeDataMmap func() []*Call

	// Neutralize neutralizes harmful calls by transforming them into non-harmful ones
	// (e.g. an ioctl that turns off console output is turned into ioctl that turns on output).
	// fixStructure determines whether it's allowed to make structural changes (e.g. add or
	// remove arguments). It is helpful e.g. when we do neutralization while iterating over the
	// arguments.
	Neutralize func(c *Call, fixStructure bool) error

	// AnnotateCall annotates a syscall invocation in C reproducers.
	// The returned string will be placed inside a comment except for the
	// empty string which will omit the comment.
	AnnotateCall func(c ExecCall) string

	// SpecialTypes allows target to do custom generation/mutation for some struct's and union's.
	// Map key is struct/union name for which custom generation/mutation is required.
	// Map value is custom generation/mutation function that will be called
	// for the corresponding type. g is helper object that allows generate random numbers,
	// allocate memory, etc. typ is the struct/union type. old is the old value of the struct/union
	// for mutation, or nil for generation. The function returns a new value of the struct/union,
	// and optionally any calls that need to be inserted before the arg reference.
	SpecialTypes map[string]func(g *Gen, typ Type, dir Dir, old Arg) (Arg, []*Call)

	// Resources that play auxiliary role, but widely used throughout all syscalls (e.g. pid/uid).
	AuxResources map[string]bool

	// Additional special invalid pointer values besides NULL to use.
	SpecialPointers []uint64

	// Special file name length that can provoke bugs (e.g. PATH_MAX).
	SpecialFileLenghts []int

	// Filled by prog package:
	SyscallMap map[string]*Syscall
	ConstMap   map[string]uint64
	FlagsMap   map[string][]string

	init        sync.Once
	fillArch    func(target *Target)
	initArch    func(target *Target)
	resourceMap map[string]*ResourceDesc
	// Maps resource name to a list of calls that can create the resource.
	resourceCtors map[string][]ResourceCtor
	any           anyTypes

	// The default ChoiceTable is used only by tests and utilities, so we initialize it lazily.
	defaultOnce        sync.Once
	defaultChoiceTable *ChoiceTable
}

const maxSpecialPointers = 16

var targets = make(map[string]*Target)

func RegisterTarget(target *Target, fill, init func(target *Target)) {
	key := target.OS + "/" + target.Arch
	if targets[key] != nil {
		panic(fmt.Sprintf("duplicate target %v", key))
	}
	target.fillArch = fill
	target.initArch = init
	targets[key] = target
}

func GetTarget(OS, arch string) (*Target, error) {
	key := OS + "/" + arch
	target := targets[key]
	if target == nil {
		var supported []string
		for _, t := range targets {
			supported = append(supported, fmt.Sprintf("%v/%v", t.OS, t.Arch))
		}
		sort.Strings(supported)
		return nil, fmt.Errorf("unknown target: %v (supported: %v)", key, supported)
	}
	target.init.Do(target.lazyInit)
	return target, nil
}

func AllTargets() []*Target {
	var res []*Target
	for _, target := range targets {
		target.init.Do(target.lazyInit)
		res = append(res, target)
	}
	sort.Slice(res, func(i, j int) bool {
		if res[i].OS != res[j].OS {
			return res[i].OS < res[j].OS
		}
		return res[i].Arch < res[j].Arch
	})
	return res
}

func (target *Target) lazyInit() {
	target.Neutralize = func(c *Call, fixStructure bool) error { return nil }
	target.AnnotateCall = func(c ExecCall) string { return "" }
	target.fillArch(target)
	target.initTarget()
	target.initUselessHints()
	target.initRelatedFields()
	target.initArch(target)
	// Give these 2 known addresses fixed positions and prepend target-specific ones at the end.
	target.SpecialPointers = append([]uint64{
		0x0000000000000000, // NULL pointer (keep this first because code uses special index=0 as NULL)
		0xffffffffffffffff, // unmapped kernel address (keep second because serialized value will match actual pointer value)
		0x9999999999999999, // non-canonical address
	}, target.SpecialPointers...)
	if len(target.SpecialPointers) > maxSpecialPointers {
		panic("too many special pointers")
	}
	if len(target.SpecialFileLenghts) == 0 {
		// Just some common lengths that can be used as PATH_MAX/MAX_NAME.
		target.SpecialFileLenghts = []int{256, 512, 4096}
	}
	for _, ln := range target.SpecialFileLenghts {
		if ln <= 0 || ln >= memAllocMaxMem {
			panic(fmt.Sprintf("bad special file length %v", ln))
		}
	}
	// These are used only during lazyInit.
	target.Types = nil
}

func (target *Target) initTarget() {
	checkMaxCallID(len(target.Syscalls) - 1)
	target.ConstMap = make(map[string]uint64)
	for _, c := range target.Consts {
		target.ConstMap[c.Name] = c.Value
	}

	target.resourceMap = restoreLinks(target.Syscalls, target.Resources, target.Types)
	target.initAnyTypes()

	target.SyscallMap = make(map[string]*Syscall)
	for i, c := range target.Syscalls {
		c.ID = i
		target.SyscallMap[c.Name] = c
	}

	target.FlagsMap = make(map[string][]string)
	for _, c := range target.Flags {
		target.FlagsMap[c.Name] = c.Values
	}

	target.populateResourceCtors()
	target.resourceCtors = make(map[string][]ResourceCtor)
	for _, res := range target.Resources {
		target.resourceCtors[res.Name] = target.calcResourceCtors(res, false)
	}
}

func (target *Target) initUselessHints() {
	// Pre-compute useless hints for each type and deduplicate resulting maps
	// (there will be lots of duplicates).
	computed := make(map[Type]bool)
	dedup := make(map[string]map[uint64]struct{})
	ForeachType(target.Syscalls, func(t Type, ctx *TypeCtx) {
		hinter, ok := t.(uselessHinter)
		if !ok || computed[t] {
			return
		}
		computed[t] = true
		hints := hinter.calcUselessHints()
		if len(hints) == 0 {
			return
		}
		slices.Sort(hints)
		hints = slices.Compact(hints)
		sig := hash.String(hints)
		m := dedup[sig]
		if m == nil {
			m = make(map[uint64]struct{})
			for _, v := range hints {
				m[v] = struct{}{}
			}
			dedup[sig] = m
		}
		hinter.setUselessHints(m)
	})
}

func (target *Target) initRelatedFields() {
	// Compute sets of related fields that are used to reduce amount of produced hint replacements.
	// Related fields are sets of arguments to the same syscall, in the same position, that operate
	// on the same resource. The best example of related fields is a set of ioctl commands on the same fd:
	//
	//	ioctl$FOO1(fd fd_foo, cmd const[FOO1], ...)
	//	ioctl$FOO2(fd fd_foo, cmd const[FOO2], ...)
	//	ioctl$FOO3(fd fd_foo, cmd const[FOO3], ...)
	//
	// All cmd args related and we should not try to replace them with each other
	// (e.g. try to morph ioctl$FOO1 into ioctl$FOO2). This is both unnecessary, leads to confusing reproducers,
	// and in some cases to badly confused argument types, see e.g.:
	// https://github.com/google/syzkaller/issues/502
	// https://github.com/google/syzkaller/issues/4939
	//
	// However, notion of related fields is wider and includes e.g. socket syscall family/type/proto,
	// setsockopt consts, and in some cases even openat flags/mode.
	//
	// Related fields can include const, flags and int types.
	//
	// Notion of "same resource" is also quite generic b/c syscalls can accept several resource types,
	// and filenames/strings are also considered as a resource in this context. For example, openat syscalls
	// that operate on the same file are related, but are not related to openat calls that operate on other files.
	groups := make(map[string]map[Type]struct{})
	for _, call := range target.Syscalls {
		// Id is used to identify related syscalls.
		// We first collect all resources/strings/files. This needs to be done first b/c e.g. mmap has
		// fd resource at the end, so we need to do this before the next loop.
		id := call.CallName
		for i, field := range call.Args {
			switch arg := field.Type.(type) {
			case *ResourceType:
				id += fmt.Sprintf("-%v:%v", i, arg.Name())
			case *PtrType:
				if typ, ok := arg.Elem.(*BufferType); ok && typ.Kind == BufferString && len(typ.Values) == 1 {
					id += fmt.Sprintf("-%v:%v", i, typ.Values[0])
				}
			}
		}
		// Now we group const/flags args together.
		// But also if we see a const, we update id to include it. This is required for e.g.
		// socket/socketpair/setsockopt calls. For these calls all families can be groups, but types should be
		// grouped only for the same family, and protocols should be grouped only for the same family+type.
		// We assume the "more important" discriminating arguments come first (this is not necessary true,
		// but seems to be the case in real syscalls as it's unreasonable to pass less important things first).
		for i, field := range call.Args {
			switch field.Type.(type) {
			case *ConstType:
			case *FlagsType:
			case *IntType:
			default:
				continue
			}
			argID := fmt.Sprintf("%v/%v", id, i)
			group := groups[argID]
			if group == nil {
				group = make(map[Type]struct{})
				groups[argID] = group
			}
			call.Args[i].relatedFields = group
			group[field.Type] = struct{}{}
			switch arg := field.Type.(type) {
			case *ConstType:
				id += fmt.Sprintf("-%v:%v", i, arg.Val)
			}
		}
	}
	// Drop groups that consist of only a single field as they are not useful.
	for _, call := range target.Syscalls {
		for i := range call.Args {
			if len(call.Args[i].relatedFields) == 1 {
				call.Args[i].relatedFields = nil
			}
		}
	}
}

func (target *Target) GetConst(name string) uint64 {
	v, ok := target.ConstMap[name]
	if !ok {
		panic(fmt.Sprintf("const %v is not defined for %v/%v", name, target.OS, target.Arch))
	}
	return v
}

func (target *Target) sanitize(c *Call, fix bool) error {
	// For now, even though we accept the fix argument, it does not have the full effect.
	// It de facto only denies structural changes, e.g. deletions of arguments.
	// TODO: rewrite the corresponding sys/*/init.go code.
	return target.Neutralize(c, fix)
}

func RestoreLinks(syscalls []*Syscall, resources []*ResourceDesc, types []Type) {
	restoreLinks(syscalls, resources, types)
}

var (
	typeRefMu sync.Mutex
	typeRefs  atomic.Value // []Type
)

func restoreLinks(syscalls []*Syscall, resources []*ResourceDesc, types []Type) map[string]*ResourceDesc {
	typeRefMu.Lock()
	defer typeRefMu.Unlock()
	refs := []Type{nil}
	if old := typeRefs.Load(); old != nil {
		refs = old.([]Type)
	}
	for _, typ := range types {
		typ.setRef(Ref(len(refs)))
		refs = append(refs, typ)
	}
	typeRefs.Store(refs)

	resourceMap := make(map[string]*ResourceDesc)
	for _, res := range resources {
		resourceMap[res.Name] = res
	}

	ForeachType(syscalls, func(typ Type, ctx *TypeCtx) {
		if ref, ok := typ.(Ref); ok {
			typ = types[ref]
			*ctx.Ptr = typ
		}
		switch t := typ.(type) {
		case *ResourceType:
			t.Desc = resourceMap[t.TypeName]
			if t.Desc == nil {
				panic("no resource desc")
			}
		}
	})
	return resourceMap
}

func (target *Target) DefaultChoiceTable() *ChoiceTable {
	target.defaultOnce.Do(func() {
		target.defaultChoiceTable = target.BuildChoiceTable(nil, nil)
	})
	return target.defaultChoiceTable
}

func (target *Target) RequiredGlobs() []string {
	globs := make(map[string]bool)
	ForeachType(target.Syscalls, func(typ Type, ctx *TypeCtx) {
		switch a := typ.(type) {
		case *BufferType:
			if a.Kind == BufferGlob {
				for _, glob := range requiredGlobs(a.SubKind) {
					globs[glob] = true
				}
			}
		}
	})
	return stringMapToSlice(globs)
}

func (target *Target) UpdateGlobs(globFiles map[string][]string) {
	// TODO: make host.DetectSupportedSyscalls below filter out globs with no values.
	// Also make prog package more strict with respect to generation/mutation of globs
	// with no values (they still can appear in tests and tools). We probably should
	// generate an empty string for these and never mutate.
	ForeachType(target.Syscalls, func(typ Type, ctx *TypeCtx) {
		switch a := typ.(type) {
		case *BufferType:
			if a.Kind == BufferGlob {
				a.Values = populateGlob(a.SubKind, globFiles)
			}
		}
	})
}

func requiredGlobs(pattern string) []string {
	var res []string
	for _, tok := range strings.Split(pattern, ":") {
		if tok[0] != '-' {
			res = append(res, tok)
		}
	}
	return res
}

func populateGlob(pattern string, globFiles map[string][]string) []string {
	files := make(map[string]bool)
	parts := strings.Split(pattern, ":")
	for _, tok := range parts {
		if tok[0] != '-' {
			for _, file := range globFiles[tok] {
				files[file] = true
			}
		}
	}
	for _, tok := range parts {
		if tok[0] == '-' {
			delete(files, tok[1:])
		}
	}
	return stringMapToSlice(files)
}

func stringMapToSlice(m map[string]bool) []string {
	var res []string
	for k := range m {
		res = append(res, k)
	}
	sort.Strings(res)
	return res
}

type Gen struct {
	r *randGen
	s *state
}

func (g *Gen) Target() *Target {
	return g.r.target
}

func (g *Gen) Rand() *rand.Rand {
	return g.r.Rand
}

func (g *Gen) NOutOf(n, outOf int) bool {
	return g.r.nOutOf(n, outOf)
}

func (g *Gen) Alloc(ptrType Type, dir Dir, data Arg) (Arg, []*Call) {
	return g.r.allocAddr(g.s, ptrType, dir, data.Size(), data), nil
}

func (g *Gen) GenerateArg(typ Type, dir Dir, pcalls *[]*Call) Arg {
	return g.generateArg(typ, dir, pcalls, false)
}

func (g *Gen) GenerateSpecialArg(typ Type, dir Dir, pcalls *[]*Call) Arg {
	return g.generateArg(typ, dir, pcalls, true)
}

func (g *Gen) generateArg(typ Type, dir Dir, pcalls *[]*Call, ignoreSpecial bool) Arg {
	arg, calls := g.r.generateArgImpl(g.s, typ, dir, ignoreSpecial)
	*pcalls = append(*pcalls, calls...)
	g.r.target.assignSizesArray([]Arg{arg}, []Field{{Name: "", Type: arg.Type()}}, nil)
	return arg
}

func (g *Gen) MutateArg(arg0 Arg) (calls []*Call) {
	updateSizes := true
	for stop := false; !stop; stop = g.r.oneOf(3) {
		ma := &mutationArgs{target: g.r.target, ignoreSpecial: true}
		ForeachSubArg(arg0, ma.collectArg)
		if len(ma.args) == 0 {
			// TODO(dvyukov): probably need to return this condition
			// and updateSizes to caller so that Mutate can act accordingly.
			return
		}
		arg, ctx := ma.chooseArg(g.r.Rand)
		newCalls, ok := g.r.target.mutateArg(g.r, g.s, arg, ctx, &updateSizes)
		if !ok {
			continue
		}
		calls = append(calls, newCalls...)
	}
	return calls
}

type Builder struct {
	target *Target
	ma     *memAlloc
	p      *Prog
}

func MakeProgGen(target *Target) *Builder {
	return &Builder{
		target: target,
		ma:     newMemAlloc(target.NumPages * target.PageSize),
		p: &Prog{
			Target: target,
		},
	}
}

func (pg *Builder) Append(c *Call) error {
	pg.target.assignSizesCall(c)
	pg.target.sanitize(c, true)
	pg.p.Calls = append(pg.p.Calls, c)
	return nil
}

func (pg *Builder) Allocate(size, alignment uint64) uint64 {
	return pg.ma.alloc(nil, size, alignment)
}

func (pg *Builder) AllocateVMA(npages uint64) uint64 {
	return pg.ma.alloc(nil, npages*pg.target.PageSize, pg.target.PageSize)
}

func (pg *Builder) Finalize() (*Prog, error) {
	if err := pg.p.validate(); err != nil {
		return nil, err
	}
	if _, err := pg.p.SerializeForExec(); err != nil {
		return nil, err
	}
	p := pg.p
	pg.p = nil
	return p, nil
}