From 2b76b86c449fff4c26410164052c32f3b9cf56fe Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 17 Jan 2025 10:39:47 +0100 Subject: tools/syz-declextract: fix empty structs and arrays This fixes 2 bugs: 1. We completly remove empty structs, but they can have effect on parent struct layout if they have >1 alignment. Replace empty structs with a special auto_aligner type that preserves alignment. 2. Arrays of 0 size are currently emitted as dynamically-sized (we assume 0 size means "this is not a const-size array"). Add separate IsConstSize flag for arrays that marks const-size arrays. Additionally cross-check that generated structs have exactly the same size/alignment as the corresponding C structs. This allows to catch the above bugs. --- tools/syz-declextract/clangtool/declextract.cpp | 13 +- tools/syz-declextract/clangtool/output.h | 12 +- tools/syz-declextract/declextract.go | 40 +-- tools/syz-declextract/declextract_test.go | 20 +- .../syz-declextract/testdata/arch/x86/syscalls.tbl | 3 +- .../testdata/file_operations.c.json | 1 + tools/syz-declextract/testdata/netlink.c.json | 2 + tools/syz-declextract/testdata/types.c | 35 ++- tools/syz-declextract/testdata/types.c.info | 1 + tools/syz-declextract/testdata/types.c.json | 284 +++++++++++++++++++-- tools/syz-declextract/testdata/types.c.txt | 39 ++- 11 files changed, 399 insertions(+), 51 deletions(-) (limited to 'tools') diff --git a/tools/syz-declextract/clangtool/declextract.cpp b/tools/syz-declextract/clangtool/declextract.cpp index 4012900d6..d7d6bc824 100644 --- a/tools/syz-declextract/clangtool/declextract.cpp +++ b/tools/syz-declextract/clangtool/declextract.cpp @@ -128,6 +128,7 @@ private: const T* findFirstMatch(const Node* Expr, const Condition& Cond); std::optional getSizeofType(const Expr* E); int sizeofType(const Type* T); + int alignofType(const Type* T); std::vector extractIoctlCommands(const std::string& Ioctl); std::optional getTypingEntity(const std::string& CurrentFunc, std::unordered_map& LocalVars, @@ -206,11 +207,15 @@ FieldType Extractor::genType(QualType QT, const std::string& BackupName) { return extractRecord(QT, Typ, BackupName); } if (auto* Typ = llvm::dyn_cast(T)) { + // TODO: the size may be a macro that is different for each arch, e.g.: + // long foo[FOOSIZE/sizeof(long)]; int Size = Typ->getSize().getZExtValue(); return ArrType{ .Elem = genType(Typ->getElementType(), BackupName), .MinSize = Size, .MaxSize = Size, + .Align = alignofType(Typ), + .IsConstSize = true, }; } if (auto* Typ = llvm::dyn_cast(T)) { @@ -275,12 +280,12 @@ FieldType Extractor::extractRecord(QualType QT, const RecordType* Typ, const std .Type = std::move(FieldType), }); } - int Align = 0; + int AlignAttr = 0; bool Packed = false; if (Decl->isStruct() && Decl->hasAttrs()) { for (const auto& A : Decl->getAttrs()) { if (auto* Attr = llvm::dyn_cast(A)) - Align = Attr->getAlignment(*Context) / 8; + AlignAttr = Attr->getAlignment(*Context) / 8; else if (llvm::isa(A)) Packed = true; } @@ -288,9 +293,10 @@ FieldType Extractor::extractRecord(QualType QT, const RecordType* Typ, const std Output.emit(Struct{ .Name = Name, .ByteSize = sizeofType(Typ), + .Align = alignofType(Typ), .IsUnion = Decl->isUnion(), .IsPacked = Packed, - .Align = Align, + .AlignAttr = AlignAttr, .Fields = std::move(Fields), }); return Name; @@ -445,6 +451,7 @@ std::vector> Extractor::extractDesignatedInitConsts( } int Extractor::sizeofType(const Type* T) { return static_cast(Context->getTypeInfo(T).Width) / 8; } +int Extractor::alignofType(const Type* T) { return static_cast(Context->getTypeInfo(T).Align) / 8; } template T Extractor::evaluate(const Expr* E) { Expr::EvalResult Res; diff --git a/tools/syz-declextract/clangtool/output.h b/tools/syz-declextract/clangtool/output.h index d89d64afb..ffa0844c9 100644 --- a/tools/syz-declextract/clangtool/output.h +++ b/tools/syz-declextract/clangtool/output.h @@ -53,6 +53,8 @@ struct ArrType { FieldType Elem; int MinSize = 0; int MaxSize = 0; + int Align = 0; + bool IsConstSize = false; }; struct BufferType { @@ -79,9 +81,10 @@ struct Field { struct Struct { std::string Name; int ByteSize = 0; + int Align = 0; bool IsUnion = false; bool IsPacked = false; - int Align = 0; + int AlignAttr = 0; std::vector Fields; }; @@ -217,10 +220,11 @@ inline void print(JSONPrinter& Printer, const Field& V) { inline void print(JSONPrinter& Printer, const Struct& V) { JSONPrinter::Scope Scope(Printer); Printer.Field("name", V.Name); + Printer.Field("align", V.Align); Printer.Field("byte_size", V.ByteSize); Printer.Field("is_union", V.IsUnion); Printer.Field("is_packed", V.IsPacked); - Printer.Field("align", V.Align); + Printer.Field("align_attr", V.AlignAttr); Printer.Field("fields", V.Fields, true); } @@ -265,7 +269,9 @@ inline void print(JSONPrinter& Printer, const ArrType& V) { JSONPrinter::Scope Scope(Printer); Printer.Field("elem", V.Elem); Printer.Field("min_size", V.MinSize); - Printer.Field("max_size", V.MaxSize, true); + Printer.Field("max_size", V.MaxSize); + Printer.Field("align", V.Align); + Printer.Field("is_const_size", V.IsConstSize, true); } inline void print(JSONPrinter& Printer, const BufferType& V) { diff --git a/tools/syz-declextract/declextract.go b/tools/syz-declextract/declextract.go index 7d85adaf8..dcf957431 100644 --- a/tools/syz-declextract/declextract.go +++ b/tools/syz-declextract/declextract.go @@ -46,7 +46,7 @@ func main() { loadProbeInfo := func() (*ifaceprobe.Info, error) { return probe(cfg, *flagConfig) } - if err := run(filepath.FromSlash("sys/linux/auto.txt"), loadProbeInfo, &clangtool.Config{ + if _, err := run(filepath.FromSlash("sys/linux/auto.txt"), loadProbeInfo, &clangtool.Config{ ToolBin: *flagBinary, KernelSrc: cfg.KernelSrc, KernelObj: cfg.KernelObj, @@ -57,20 +57,21 @@ func main() { } } -func run(autoFile string, loadProbeInfo func() (*ifaceprobe.Info, error), cfg *clangtool.Config) error { +func run(autoFile string, loadProbeInfo func() (*ifaceprobe.Info, error), cfg *clangtool.Config) ( + *declextract.Result, error) { out, probeInfo, syscallRename, err := prepare(loadProbeInfo, cfg) if err != nil { - return err + return nil, err } - descriptions, interfaces, includeUse, err := declextract.Run(out, probeInfo, syscallRename, cfg.DebugTrace) + res, err := declextract.Run(out, probeInfo, syscallRename, cfg.DebugTrace) if err != nil { - return err + return nil, err } - if err := osutil.WriteFile(autoFile, descriptions); err != nil { - return err + if err := osutil.WriteFile(autoFile, res.Descriptions); err != nil { + return nil, err } - if err := osutil.WriteFile(autoFile+".info", serialize(interfaces)); err != nil { - return err + if err := osutil.WriteFile(autoFile+".info", serialize(res.Interfaces)); err != nil { + return nil, err } // In order to remove unused bits of the descriptions, we need to write them out first, // and then parse all descriptions back b/c auto descriptions use some types defined @@ -79,32 +80,35 @@ func run(autoFile string, loadProbeInfo func() (*ifaceprobe.Info, error), cfg *c eh, errors := errorHandler() desc := ast.ParseGlob(filepath.Join(filepath.Dir(autoFile), "*.txt"), eh) if desc == nil { - return fmt.Errorf("failed to parse descriptions\n%s", errors.Bytes()) + return nil, fmt.Errorf("failed to parse descriptions\n%s", errors.Bytes()) } // Need to clone descriptions b/c CollectUnused changes them slightly during type checking. unusedNodes, err := compiler.CollectUnused(desc.Clone(), target, eh) if err != nil { - return fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes()) + return nil, fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes()) } consts := compiler.ExtractConsts(desc.Clone(), target, eh) if consts == nil { - return fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes()) + return nil, fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes()) } - finishInterfaces(interfaces, consts, autoFile) - if err := osutil.WriteFile(autoFile+".info", serialize(interfaces)); err != nil { - return err + finishInterfaces(res.Interfaces, consts, autoFile) + if err := osutil.WriteFile(autoFile+".info", serialize(res.Interfaces)); err != nil { + return nil, err } removeUnused(desc, "", unusedNodes) // Second pass to remove unused defines/includes. This needs to be done after removing // other garbage b/c they may be used by other garbage. - unusedConsts, err := compiler.CollectUnusedConsts(desc.Clone(), target, includeUse, eh) + unusedConsts, err := compiler.CollectUnusedConsts(desc.Clone(), target, res.IncludeUse, eh) if err != nil { - return fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes()) + return nil, fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes()) } removeUnused(desc, autoFile, unusedConsts) // We need re-parse them again b/c new lines are fixed up during parsing. formatted := ast.Format(ast.Parse(ast.Format(desc), autoFile, nil)) - return osutil.WriteFile(autoFile, formatted) + if err := osutil.WriteFile(autoFile, formatted); err != nil { + return nil, err + } + return res, nil } func removeUnused(desc *ast.Description, autoFile string, unusedNodes []ast.Node) { diff --git a/tools/syz-declextract/declextract_test.go b/tools/syz-declextract/declextract_test.go index ec1071327..222a96970 100644 --- a/tools/syz-declextract/declextract_test.go +++ b/tools/syz-declextract/declextract_test.go @@ -68,7 +68,8 @@ func TestDeclextract(t *testing.T) { return probeInfo, nil } autoFile := filepath.Join(cfg.KernelObj, filepath.Base(file)+".txt") - if err := run(autoFile, loadProbeInfo, cfg); err != nil { + res, err := run(autoFile, loadProbeInfo, cfg) + if err != nil { if *flagUpdate { osutil.CopyFile(autoFile, file+".txt") osutil.CopyFile(autoFile+".info", file+".info") @@ -93,11 +94,24 @@ func TestDeclextract(t *testing.T) { consts[c.Name] = uint64(i + 1) } } - res := compiler.Compile(full, consts, target, eh) - if res == nil { + desc := compiler.Compile(full, consts, target, eh) + if desc == nil { t.Fatalf("failed to compile full descriptions:\n%s", errors) } + // Check that generated structs have the same size/align as they had in C. + // We assume size/align do not depend on const values (which we fabricated). + for _, typ := range desc.Types { + info := res.StructInfo[typ.Name()] + if info == nil { + continue + } + if typ.Size() != uint64(info.Size) || typ.Alignment() != uint64(info.Align) { + t.Errorf("incorrect generated type %v: size %v/%v align %v/%v", + typ.Name(), typ.Size(), info.Size, typ.Alignment(), info.Align) + } + } + // TODO: Ensure that none of the syscalls will be disabled by TransitivelyEnabledCalls. compareGoldenFile(t, file+".txt", autoFile) diff --git a/tools/syz-declextract/testdata/arch/x86/syscalls.tbl b/tools/syz-declextract/testdata/arch/x86/syscalls.tbl index d79726fdf..7f728f3f2 100644 --- a/tools/syz-declextract/testdata/arch/x86/syscalls.tbl +++ b/tools/syz-declextract/testdata/arch/x86/syscalls.tbl @@ -4,4 +4,5 @@ 1 64 open sys_open 2 64 chmod sys_chmod 3 64 types_syscall sys_types_syscall -4 64 functions sys_functions +4 64 align_syscall sys_align_syscall +5 64 functions sys_functions diff --git a/tools/syz-declextract/testdata/file_operations.c.json b/tools/syz-declextract/testdata/file_operations.c.json index 716dbec38..c8fd5a9dc 100644 --- a/tools/syz-declextract/testdata/file_operations.c.json +++ b/tools/syz-declextract/testdata/file_operations.c.json @@ -94,6 +94,7 @@ { "name": "foo_ioctl_arg", "byte_size": 8, + "align": 4, "fields": [ { "name": "a", diff --git a/tools/syz-declextract/testdata/netlink.c.json b/tools/syz-declextract/testdata/netlink.c.json index e1c2754b1..35b2a79b5 100644 --- a/tools/syz-declextract/testdata/netlink.c.json +++ b/tools/syz-declextract/testdata/netlink.c.json @@ -108,6 +108,7 @@ { "name": "netlink_foo_struct1", "byte_size": 12, + "align": 4, "fields": [ { "name": "a", @@ -147,6 +148,7 @@ { "name": "netlink_foo_struct2", "byte_size": 24, + "align": 8, "fields": [ { "name": "a", diff --git a/tools/syz-declextract/testdata/types.c b/tools/syz-declextract/testdata/types.c index 20f92673f..014318d42 100644 --- a/tools/syz-declextract/testdata/types.c +++ b/tools/syz-declextract/testdata/types.c @@ -46,7 +46,8 @@ struct various { }; struct recursive { - struct various various; + // This is not handled properly yet. + // struct various various; }; SYSCALL_DEFINE1(types_syscall, struct anon_struct* p, struct empty_struct* y, @@ -64,3 +65,35 @@ void anon_flow(int x) { s.ptr->a = x; s.ptr_array[1]->b = x; } + +struct aligned_empty_struct {} __attribute__((aligned(8))); +struct large_struct { long foo[10]; }; + +struct align1 { + char f1; + long aligner[0]; + char f2; +}; + +struct align2 { + char f1; + struct empty_struct aligner; + char f2; +}; + +struct align3 { + char f1; + struct aligned_empty_struct aligner; + char f2; +}; + +struct align4 { + char f1; + struct large_struct aligner[0]; + char f2; +}; + +SYSCALL_DEFINE1(align_syscall, struct align1* a1, struct align2* a2, struct align3* a3, struct align4* a4) { + return 0; +} + diff --git a/tools/syz-declextract/testdata/types.c.info b/tools/syz-declextract/testdata/types.c.info index cf7001874..ac9903fbe 100644 --- a/tools/syz-declextract/testdata/types.c.info +++ b/tools/syz-declextract/testdata/types.c.info @@ -1 +1,2 @@ +SYSCALL align_syscall func:__do_sys_align_syscall loc:1 access:unknown manual_desc:false auto_desc:true file:types.c subsystem:kernel SYSCALL types_syscall func:__do_sys_types_syscall loc:2 access:unknown manual_desc:false auto_desc:true file:types.c subsystem:kernel diff --git a/tools/syz-declextract/testdata/types.c.json b/tools/syz-declextract/testdata/types.c.json index 82d45e3fe..99d0adc2b 100644 --- a/tools/syz-declextract/testdata/types.c.json +++ b/tools/syz-declextract/testdata/types.c.json @@ -1,5 +1,10 @@ { "functions": [ + { + "name": "__do_sys_align_syscall", + "file": "types.c", + "loc": 1 + }, { "name": "__do_sys_types_syscall", "file": "types.c", @@ -139,9 +144,175 @@ } ], "structs": [ + { + "name": "align1", + "byte_size": 16, + "align": 8, + "fields": [ + { + "name": "f1", + "counted_by": -1, + "type": { + "int": { + "byte_size": 1, + "name": "char", + "base": "char" + } + } + }, + { + "name": "aligner", + "counted_by": -1, + "type": { + "array": { + "elem": { + "int": { + "byte_size": 8, + "name": "long", + "base": "long" + } + }, + "align": 8, + "is_const_size": true + } + } + }, + { + "name": "f2", + "counted_by": -1, + "type": { + "int": { + "byte_size": 1, + "name": "char", + "base": "char" + } + } + } + ] + }, + { + "name": "align2", + "byte_size": 2, + "align": 1, + "fields": [ + { + "name": "f1", + "counted_by": -1, + "type": { + "int": { + "byte_size": 1, + "name": "char", + "base": "char" + } + } + }, + { + "name": "aligner", + "counted_by": -1, + "type": { + "struct": "empty_struct" + } + }, + { + "name": "f2", + "counted_by": -1, + "type": { + "int": { + "byte_size": 1, + "name": "char", + "base": "char" + } + } + } + ] + }, + { + "name": "align3", + "byte_size": 16, + "align": 8, + "fields": [ + { + "name": "f1", + "counted_by": -1, + "type": { + "int": { + "byte_size": 1, + "name": "char", + "base": "char" + } + } + }, + { + "name": "aligner", + "counted_by": -1, + "type": { + "struct": "aligned_empty_struct" + } + }, + { + "name": "f2", + "counted_by": -1, + "type": { + "int": { + "byte_size": 1, + "name": "char", + "base": "char" + } + } + } + ] + }, + { + "name": "align4", + "byte_size": 16, + "align": 8, + "fields": [ + { + "name": "f1", + "counted_by": -1, + "type": { + "int": { + "byte_size": 1, + "name": "char", + "base": "char" + } + } + }, + { + "name": "aligner", + "counted_by": -1, + "type": { + "array": { + "elem": { + "struct": "large_struct" + }, + "align": 8, + "is_const_size": true + } + } + }, + { + "name": "f2", + "counted_by": -1, + "type": { + "int": { + "byte_size": 1, + "name": "char", + "base": "char" + } + } + } + ] + }, + { + "name": "aligned_empty_struct", + "align": 8, + "align_attr": 8 + }, { "name": "anon_struct", "byte_size": 104, + "align": 8, "fields": [ { "name": "a", @@ -204,7 +375,9 @@ "struct": "anon_struct_array" }, "min_size": 4, - "max_size": 4 + "max_size": 4, + "align": 4, + "is_const_size": true } } }, @@ -232,7 +405,9 @@ } }, "min_size": 4, - "max_size": 4 + "max_size": 4, + "align": 8, + "is_const_size": true } } } @@ -241,6 +416,7 @@ { "name": "anon_struct_2", "byte_size": 4, + "align": 4, "fields": [ { "name": "y", @@ -258,6 +434,7 @@ { "name": "anon_struct_3", "byte_size": 8, + "align": 8, "is_union": true, "fields": [ { @@ -287,6 +464,7 @@ { "name": "anon_struct_a", "byte_size": 4, + "align": 4, "fields": [ { "name": "x", @@ -304,6 +482,7 @@ { "name": "anon_struct_array", "byte_size": 8, + "align": 4, "fields": [ { "name": "a", @@ -330,11 +509,13 @@ ] }, { - "name": "anon_struct_b" + "name": "anon_struct_b", + "align": 1 }, { "name": "anon_struct_ptr", "byte_size": 8, + "align": 4, "fields": [ { "name": "a", @@ -363,6 +544,7 @@ { "name": "anon_struct_ptr_array", "byte_size": 8, + "align": 4, "fields": [ { "name": "a", @@ -391,6 +573,7 @@ { "name": "anon_t", "byte_size": 4, + "align": 4, "fields": [ { "name": "f", @@ -409,6 +592,7 @@ "name": "bitfields", "byte_size": 32, "align": 32, + "align_attr": 32, "fields": [ { "name": "a", @@ -512,13 +696,41 @@ ] }, { - "name": "empty_struct" + "name": "empty_struct", + "align": 1 + }, + { + "name": "large_struct", + "byte_size": 80, + "align": 8, + "fields": [ + { + "name": "foo", + "counted_by": -1, + "type": { + "array": { + "elem": { + "int": { + "byte_size": 8, + "name": "long", + "base": "long" + } + }, + "min_size": 10, + "max_size": 10, + "align": 8, + "is_const_size": true + } + } + } + ] }, { "name": "packed_t", "byte_size": 32, - "is_packed": true, "align": 32, + "is_packed": true, + "align_attr": 32, "fields": [ { "name": "x", @@ -546,20 +758,12 @@ }, { "name": "recursive", - "byte_size": 64, - "fields": [ - { - "name": "various", - "counted_by": -1, - "type": { - "struct": "various" - } - } - ] + "align": 1 }, { "name": "various", "byte_size": 64, + "align": 32, "fields": [ { "name": "recursive", @@ -594,6 +798,56 @@ } ], "syscalls": [ + { + "func": "__do_sys_align_syscall", + "args": [ + { + "name": "a1", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "struct": "align1" + } + } + } + }, + { + "name": "a2", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "struct": "align2" + } + } + } + }, + { + "name": "a3", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "struct": "align3" + } + } + } + }, + { + "name": "a4", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "struct": "align4" + } + } + } + } + ], + "source_file": "types.c" + }, { "func": "__do_sys_types_syscall", "args": [ diff --git a/tools/syz-declextract/testdata/types.c.txt b/tools/syz-declextract/testdata/types.c.txt index 04a160771..ea95b4bb5 100644 --- a/tools/syz-declextract/testdata/types.c.txt +++ b/tools/syz-declextract/testdata/types.c.txt @@ -4,13 +4,42 @@ meta automatic type auto_todo int8 +type auto_aligner[N] { + void void +} [align[N]] + bitfield_enum$auto = a, b, c -types_syscall$auto(p ptr[inout, anon_struct$auto], y ptr[inout, void], b ptr[inout, bitfields$auto], pid pid, f int32, v ptr[inout, various$auto]) +align_syscall$auto(a1 ptr[inout, align1$auto], a2 ptr[inout, align2$auto], a3 ptr[inout, align3$auto], a4 ptr[inout, align4$auto]) +types_syscall$auto(p ptr[inout, anon_struct$auto], y ptr[inout, auto_aligner[1]], b ptr[inout, bitfields$auto], pid pid, f int32, v ptr[inout, various$auto]) + +align1$auto { + f1 int8 + aligner auto_aligner[8] + f2 int8 +} + +align2$auto { + f1 int8 + aligner auto_aligner[1] + f2 int8 +} + +align3$auto { + f1 int8 + aligner auto_aligner[8] + f2 int8 +} + +align4$auto { + f1 int8 + aligner auto_aligner[8] + f2 int8 +} anon_struct$auto { a anon_struct_a$auto - b void + b auto_aligner[1] anon_struct_2 anon_struct_2$auto anon_struct_3 anon_struct_3$auto foo anon_t$auto @@ -68,13 +97,9 @@ packed_t$auto { y int32 } [packed, align[32]] -recursive$auto { - various various$auto -} - various$auto { recursive ptr[inout, various$auto, opt] - next ptr[inout, recursive$auto, opt] + next ptr[inout, auto_aligner[1], opt] packed packed_t$auto } -- cgit mrf-deployment