diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2024-12-02 10:57:36 +0100 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2024-12-11 15:22:17 +0000 |
| commit | b2c5a234aeb69e981c6e7ad120b49d37a86c6cae (patch) | |
| tree | d2e575e4c5dd3f044d43a71231c50c1b1459e35a /pkg/declextract/entity.go | |
| parent | bfb4b3275371a3b53cd6562fa06e5a9dfb5627b7 (diff) | |
tools/syz-declextract: rewrite
syz-declextract accumulated a bunch of code health problems
so that now it's hard to change/extend it, lots of new features
can only be added in in hacky ways and cause lots of code duplication.
It's also completly untested. Rewrite the tool to:
- move as much code as possible to Go (working with the clang tool
is painful for a number of reasons)
- allow testing and add unit tests (first layer of tests test
what information is produced by the clang tool, second layer
of tests test how that information is transformed to descriptions)
- allow extending the clang tool output to export arbitrary info
in non-hacky way (now it produces arbitrary JSON instead of a mix
of incomplete descriptions and interfaces)
- remove code duplication in the clang tool and provide common
infrastructure to add new analysis w/o causing more duplication
- provide more convinient primitives in the clang tool
- improve code style consistency and stick to the LLVM code style
(in particular, variable names must start with a capital letter,
single-statement blocks are not surrounded with {})
- remove intermixing of code that works on different levels
(currently we have AST analysis + busness logic + printfs
all intermixed with each other)
- provide several helper Go packages for better code structuring
(e.g. pkg/clangtool just runs the tool on source files in parallel
and returns results, this already separates a bunch of low-level
logic from the rest of the code under a simple abstraction)
I've tried to make the output match the current output as much as possible
so that the diff is managable (in some cases at the cost of code quality,
this should be fixed in future commits). There are still some differences,
but hopefully they are managable for review (more includes/defines,
reordered some netlink attributes).
Fixed minor bugs are fixed along the way, but mostly NFC:
1. Some unions were incorrectly emitted as [varlen]
(C unions are never varlen).
2. Only a of [packed], [align[N]] attributes was emitted
for struct (both couldn't be emitted).
Diffstat (limited to 'pkg/declextract/entity.go')
| -rw-r--r-- | pkg/declextract/entity.go | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/pkg/declextract/entity.go b/pkg/declextract/entity.go new file mode 100644 index 000000000..57e589e40 --- /dev/null +++ b/pkg/declextract/entity.go @@ -0,0 +1,175 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package declextract + +import ( + "bytes" + "encoding/json" + "slices" +) + +type Output struct { + Includes []string `json:"includes,omitempty"` + Defines []*Define `json:"defines,omitempty"` + Enums []*Enum `json:"enums,omitempty"` + Structs []*Struct `json:"structs,omitempty"` + Syscalls []*Syscall `json:"syscalls,omitempty"` + IouringOps []*IouringOp `json:"iouring_ops,omitempty"` + NetlinkFamilies []*NetlinkFamily `json:"netlink_families,omitempty"` + NetlinkPolicies []*NetlinkPolicy `json:"netlink_policies,omitempty"` +} + +type Define struct { + Name string `json:"name,omitempty"` + Value string `json:"value,omitempty"` +} + +type Field struct { + Name string `json:"name,omitempty"` + IsAnonymous bool `json:"is_anonymous,omitempty"` + BitWidth int `json:"bit_width,omitempty"` + CountedBy int `json:"counted_by,omitempty"` + Type *Type `json:"type,omitempty"` + + syzType string +} + +type Syscall struct { + Func string `json:"func,omitempty"` + Args []*Field `json:"args,omitempty"` + SourceFile string `json:"source_file,omitempty"` +} + +type IouringOp struct { + Name string `json:"name,omitempty"` + Func string `json:"func,omitempty"` + SourceFile string `json:"source_file,omitempty"` +} + +type NetlinkFamily struct { + Name string `json:"name,omitempty"` + Ops []*NetlinkOp `json:"ops,omitempty"` + SourceFile string `json:"source_file,omitempty"` +} + +type NetlinkPolicy struct { + Name string `json:"name,omitempty"` + Attrs []*NetlinkAttr `json:"attrs,omitempty"` +} + +type NetlinkOp struct { + Name string `json:"name,omitempty"` + Func string `json:"func,omitempty"` + Access string `json:"access,omitempty"` + Policy string `json:"policy,omitempty"` +} + +type NetlinkAttr struct { + Name string `json:"name,omitempty"` + Kind string `json:"kind,omitempty"` + MaxSize int `json:"max_size,omitempty"` + NestedPolicy string `json:"nested_policy,omitempty"` + Elem *Type `json:"elem,omitempty"` +} + +type Struct struct { + Name string `json:"name,omitempty"` + ByteSize int `json:"byte_size,omitempty"` + IsUnion bool `json:"is_union,omitempty"` + IsPacked bool `json:"is_packed,omitempty"` + Align int `json:"align,omitempty"` + Fields []*Field `json:"fields,omitempty"` + + // TODO: remove me. + isVarlen bool +} + +type Enum struct { + Name string `json:"name,omitempty"` + Values []string `json:"values,omitempty"` +} + +type Type struct { + Int *IntType `json:"int,omitempty"` + Ptr *PtrType `json:"ptr,omitempty"` + Array *ArrayType `json:"array,omitempty"` + Buffer *BufferType `json:"buffer,omitempty"` + Struct string `json:"struct,omitempty"` +} + +type IntType struct { + ByteSize int `json:"byte_size,omitempty"` + Name string `json:"name,omitempty"` + Base string `json:"base,omitempty"` + Enum string `json:"enum,omitempty"` + + isBigEndian bool +} + +type PtrType struct { + Elem *Type `json:"elem,omitempty"` + IsConst bool `json:"is_const,omitempty"` +} + +type ArrayType struct { + Elem *Type `json:"elem,omitempty"` + MinSize int `json:"min_size,omitempty"` + MaxSize int `json:"max_size,omitempty"` +} + +type BufferType struct { + MinSize int `json:"min_size,omitempty"` + MaxSize int `json:"max_size,omitempty"` + IsString bool `json:"is_string,omitempty"` + IsNonTerminated bool `json:"is_non_terminated,omitempty"` +} + +func (out *Output) Merge(other *Output) { + out.Includes = append(out.Includes, other.Includes...) + out.Defines = append(out.Defines, other.Defines...) + out.Enums = append(out.Enums, other.Enums...) + out.Structs = append(out.Structs, other.Structs...) + out.Syscalls = append(out.Syscalls, other.Syscalls...) + out.IouringOps = append(out.IouringOps, other.IouringOps...) + out.NetlinkFamilies = append(out.NetlinkFamilies, other.NetlinkFamilies...) + out.NetlinkPolicies = append(out.NetlinkPolicies, other.NetlinkPolicies...) +} + +func (out *Output) SortAndDedup() { + out.Includes = sortAndDedupSlice(out.Includes) + out.Defines = sortAndDedupSlice(out.Defines) + out.Enums = sortAndDedupSlice(out.Enums) + out.Structs = sortAndDedupSlice(out.Structs) + out.Syscalls = sortAndDedupSlice(out.Syscalls) + out.IouringOps = sortAndDedupSlice(out.IouringOps) + out.NetlinkFamilies = sortAndDedupSlice(out.NetlinkFamilies) + out.NetlinkPolicies = sortAndDedupSlice(out.NetlinkPolicies) +} + +// SetSoureFile attaches the source file to the entities that need it. +// The clang tool could do it, but it looks easier to do it here. +func (out *Output) SetSourceFile(file string) { + for _, call := range out.Syscalls { + call.SourceFile = file + } + for _, fam := range out.NetlinkFamilies { + fam.SourceFile = file + } + for _, op := range out.IouringOps { + op.SourceFile = file + } +} + +func sortAndDedupSlice[Slice ~[]E, E any](s Slice) Slice { + slices.SortFunc(s, func(a, b E) int { + aa, _ := json.Marshal(a) + bb, _ := json.Marshal(b) + return bytes.Compare(aa, bb) + }) + return slices.CompactFunc(s, func(a, b E) bool { + aa, _ := json.Marshal(a) + bb, _ := json.Marshal(b) + return bytes.Equal(aa, bb) + }) +} |
