diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2024-12-02 10:57:36 +0100 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2024-12-11 15:22:17 +0000 |
| commit | b2c5a234aeb69e981c6e7ad120b49d37a86c6cae (patch) | |
| tree | d2e575e4c5dd3f044d43a71231c50c1b1459e35a /tools | |
| parent | bfb4b3275371a3b53cd6562fa06e5a9dfb5627b7 (diff) | |
tools/syz-declextract: rewrite
syz-declextract accumulated a bunch of code health problems
so that now it's hard to change/extend it, lots of new features
can only be added in in hacky ways and cause lots of code duplication.
It's also completly untested. Rewrite the tool to:
- move as much code as possible to Go (working with the clang tool
is painful for a number of reasons)
- allow testing and add unit tests (first layer of tests test
what information is produced by the clang tool, second layer
of tests test how that information is transformed to descriptions)
- allow extending the clang tool output to export arbitrary info
in non-hacky way (now it produces arbitrary JSON instead of a mix
of incomplete descriptions and interfaces)
- remove code duplication in the clang tool and provide common
infrastructure to add new analysis w/o causing more duplication
- provide more convinient primitives in the clang tool
- improve code style consistency and stick to the LLVM code style
(in particular, variable names must start with a capital letter,
single-statement blocks are not surrounded with {})
- remove intermixing of code that works on different levels
(currently we have AST analysis + busness logic + printfs
all intermixed with each other)
- provide several helper Go packages for better code structuring
(e.g. pkg/clangtool just runs the tool on source files in parallel
and returns results, this already separates a bunch of low-level
logic from the rest of the code under a simple abstraction)
I've tried to make the output match the current output as much as possible
so that the diff is managable (in some cases at the cost of code quality,
this should be fixed in future commits). There are still some differences,
but hopefully they are managable for review (more includes/defines,
reordered some netlink attributes).
Fixed minor bugs are fixed along the way, but mostly NFC:
1. Some unions were incorrectly emitted as [varlen]
(C unions are never varlen).
2. Only a of [packed], [align[N]] attributes was emitted
for struct (both couldn't be emitted).
Diffstat (limited to 'tools')
39 files changed, 2773 insertions, 1568 deletions
diff --git a/tools/syz-declextract/README.md b/tools/syz-declextract/README.md index fefc157a4..cab397192 100644 --- a/tools/syz-declextract/README.md +++ b/tools/syz-declextract/README.md @@ -17,8 +17,8 @@ make CC=clang -j`nproc` # kernel has to be built at least once for the script to LLVM=$PWD/llvm-project git clone https://github.com/llvm/llvm-project.git $LLVM cd $LLVM -git checkout 0f231567719c99caa99164d8f91bad50883dab03 # In case of any breaking changes, this commit works -echo 'add_clang_executable(syz-declextract syz-declextract/syz-declextract.cpp) +git checkout 3a31427224d4fa49d7ef737b21f6027dc4928ecf # In case of any breaking changes, this commit works +echo 'add_clang_executable(syz-declextract syz-declextract/declextract.cpp) target_link_libraries(syz-declextract PRIVATE clangTooling)' >> $LLVM/clang/CMakeLists.txt ``` @@ -26,12 +26,12 @@ target_link_libraries(syz-declextract PRIVATE clangTooling)' >> $LLVM/clang/CMak ``` mkdir $LLVM/clang/syz-declextract ``` -Download `syz-declextract.cpp` file and add it to `$LLVM/clang/syz-declextract` directory +Copy `tools/syz-declextract/clangtool/*.{cpp,h}` files to `$LLVM/clang/syz-declextract/` directory. ``` LLVM_BUILD=$PWD/syz mkdir $LLVM_BUILD && cd $LLVM_BUILD -cmake -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ --DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ $LLVM/llvm +cmake -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=On \ +-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -GNinja $LLVM/llvm make -j`nproc` syz-declextract ``` @@ -42,6 +42,6 @@ make -j`nproc` syz-declextract ## Running on the whole kernel ``` -go run tools/syz-declextract/run.go -binary=$LLVM_BUILD/bin/syz-declextract -sourcedir=$KERNEL +go run tools/syz-declextract -binary=$LLVM_BUILD/bin/syz-declextract -config=manager.cfg syz-env make extract SOURCEDIR=$KERNEL ``` diff --git a/tools/syz-declextract/.clang-format b/tools/syz-declextract/clangtool/.clang-format index 5d1fbe84f..15868add4 100644 --- a/tools/syz-declextract/.clang-format +++ b/tools/syz-declextract/clangtool/.clang-format @@ -1,3 +1,5 @@ BasedOnStyle: LLVM ColumnLimit: 120 +DerivePointerAlignment: false +PointerAlignment: Left CommentPragmas: '^[^ ]' diff --git a/tools/syz-declextract/clangtool/declextract.cpp b/tools/syz-declextract/clangtool/declextract.cpp new file mode 100644 index 000000000..8e8632c12 --- /dev/null +++ b/tools/syz-declextract/clangtool/declextract.cpp @@ -0,0 +1,545 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#include "json.h" +#include "output.h" + +#include "clang/AST/APValue.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Attrs.inc" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclarationName.h" +#include "clang/AST/Expr.h" +#include "clang/AST/PrettyPrinter.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TypeTraits.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <filesystem> +#include <string> +#include <string_view> +#include <unordered_map> +#include <vector> + +using namespace clang; +using namespace clang::ast_matchers; + +class Extractor : public MatchFinder { +public: + Extractor() { + match(&Extractor::matchSyscall, + functionDecl(isExpandedFromMacro("SYSCALL_DEFINEx"), matchesName("__do_sys_.*")).bind("syscall")); + + match(&Extractor::matchIouring, + translationUnitDecl(forEachDescendant( + varDecl(hasType(constantArrayType(hasElementType(hasDeclaration(recordDecl(hasName("io_issue_def")))))), + isDefinition()) + .bind("io_issue_defs")))); + + match(&Extractor::matchNetlinkPolicy, + translationUnitDecl(forEachDescendant( + varDecl(hasType(constantArrayType(hasElementType(hasDeclaration(recordDecl(hasName("nla_policy")))))), + isDefinition()) + .bind("netlink_policy")))); + + match(&Extractor::matchNetlinkFamily, varDecl(hasType(recordDecl(hasName("genl_family")).bind("genl_family")), + has(initListExpr().bind("genl_family_init")))); + } + + void print() const { Output.print(); } + +private: + using MatchFunc = void (Extractor::*)(); + // Thunk that redirects MatchCallback::run method to one of the methods of the Extractor class. + struct MatchCallbackThunk : MatchFinder::MatchCallback { + Extractor& Ex; + MatchFunc Action; + MatchCallbackThunk(Extractor& Ex, MatchFunc Action) : Ex(Ex), Action(Action) {} + void run(const MatchFinder::MatchResult& Result) override { Ex.run(Result, Action); } + }; + std::vector<std::unique_ptr<MatchCallbackThunk>> Matchers; + + // These set to point to the Result of the current match (to avoid passing them through all methods). + const BoundNodes* Nodes = nullptr; + ASTContext* Context = nullptr; + SourceManager* SourceManager = nullptr; + + Output Output; + std::unordered_map<std::string, bool> EnumDedup; + std::unordered_map<std::string, bool> StructDedup; + + void matchSyscall(); + void matchIouring(); + void matchNetlinkPolicy(); + void matchNetlinkFamily(); + template <typename M> void match(MatchFunc Action, const M& Matcher); + void run(const MatchFinder::MatchResult& Result, MatchFunc Action); + template <typename T> const T* getResult(StringRef ID) const; + FieldType extractRecord(QualType QT, const RecordType* Typ, const std::string& BackupName); + std::string extractEnum(const EnumDecl* Decl); + void noteConstUse(const std::string& Name, int64_t Val, const SourceRange& Range); + std::string getDeclName(const Expr* Expr); + const ValueDecl* getValueDecl(const Expr* Expr); + std::string getDeclFileID(const Decl* Decl); + std::string policyName(const ValueDecl* Decl); + std::vector<std::pair<int, std::string>> extractDesignatedInitConsts(const VarDecl& ArrayDecl); + FieldType genType(QualType Typ, const std::string& BackupName = ""); + std::unordered_map<std::string, unsigned> structFieldIndexes(const RecordDecl* Decl); + template <typename T = int64_t> T evaluate(const Expr* E); + template <typename T, typename Node, typename Condition> + std::vector<const T*> findAllMatches(const Node* Expr, const Condition& Cond); + template <typename T, typename Node, typename Condition> + const T* findFirstMatch(const Node* Expr, const Condition& Cond); + std::optional<QualType> getSizeofType(const Expr* E); + int sizeofType(const Type* T); +}; + +template <typename M> void Extractor::match(MatchFunc Action, const M& Matcher) { + Matchers.emplace_back(new MatchCallbackThunk(*this, Action)); + addMatcher(Matcher, Matchers.back().get()); +} + +void Extractor::run(const MatchFinder::MatchResult& Result, MatchFunc Action) { + Nodes = &Result.Nodes; + Context = Result.Context; + SourceManager = Result.SourceManager; + (this->*Action)(); +} + +template <typename T> const T* Extractor::getResult(StringRef ID) const { return Nodes->getNodeAs<T>(ID); } + +// Top function that converts any clang type QT to our output type. +FieldType Extractor::genType(QualType QT, const std::string& BackupName) { + const Type* T = QT.IgnoreParens().getUnqualifiedType().getDesugaredType(*Context).getTypePtr(); + if (auto* Typ = llvm::dyn_cast<BuiltinType>(T)) { + return IntType{.ByteSize = sizeofType(T), .Name = QT.getAsString(), .Base = QualType(T, 0).getAsString()}; + } + if (auto* Typ = llvm::dyn_cast<EnumType>(T)) { + return IntType{.ByteSize = sizeofType(T), .Enum = extractEnum(Typ->getDecl())}; + } + if (auto* Typ = llvm::dyn_cast<FunctionProtoType>(T)) { + return PtrType{.Elem = TodoType(), .IsConst = true}; + } + if (auto* Typ = llvm::dyn_cast<IncompleteArrayType>(T)) { + return ArrType{.Elem = genType(Typ->getElementType(), BackupName)}; + } + if (auto* Typ = llvm::dyn_cast<RecordType>(T)) { + return extractRecord(QT, Typ, BackupName); + } + if (auto* Typ = llvm::dyn_cast<ConstantArrayType>(T)) { + int Size = Typ->getSize().getZExtValue(); + return ArrType{ + .Elem = genType(Typ->getElementType(), BackupName), + .MinSize = Size, + .MaxSize = Size, + }; + } + if (auto* Typ = llvm::dyn_cast<PointerType>(T)) { + FieldType Elem; + const QualType& Pointee = Typ->getPointeeType(); + if (Pointee->isAnyCharacterType()) + Elem = BufferType{.IsString = true}; + else if (Pointee->isVoidType()) + Elem = ArrType{.Elem = TodoType()}; + else + Elem = genType(Pointee, BackupName); // note: it may be an array as well + return PtrType{ + .Elem = std::move(Elem), + .IsConst = Pointee.isConstQualified(), + }; + } + QT.dump(); + llvm::report_fatal_error("unhandled type"); +} + +FieldType Extractor::extractRecord(QualType QT, const RecordType* Typ, const std::string& BackupName) { + auto* Decl = Typ->getDecl()->getDefinition(); + if (!Decl) + return TodoType(); // definition is in a different TU + std::string Name = Decl->getDeclName().getAsString(); + // If it's a typedef of anon struct, we want to use the typedef name: + // typedef struct {...} foo_t; + if (Name.empty() && QT->isTypedefNameType()) + Name = QualType(Typ, 0).getAsString(); + // If no other names, fallback to the parent-struct-based name. + if (Name.empty()) { + assert(!BackupName.empty()); + // The BackupName is supposed to be unique. + assert(!StructDedup[BackupName]); + Name = BackupName; + } + if (StructDedup[Name]) + return Name; + StructDedup[Name] = true; + std::vector<Field> Fields; + for (const FieldDecl* F : Decl->fields()) { + std::string FieldName = F->getNameAsString(); + std::string BackupFieldName = Name + "_" + FieldName; + bool IsAnonymous = false; + if (FieldName.empty()) { + BackupFieldName = Name + "_" + std::to_string(F->getFieldIndex()); + FieldName = BackupFieldName; + IsAnonymous = true; + } + FieldType FieldType = genType(F->getType(), BackupFieldName); + int BitWidth = F->isBitField() ? F->getBitWidthValue(*Context) : 0; + int CountedBy = F->getType()->isCountAttributedType() + ? llvm::dyn_cast<FieldDecl>( + F->getType()->getAs<CountAttributedType>()->getCountExpr()->getReferencedDeclOfCallee()) + ->getFieldIndex() + : -1; + Fields.push_back(Field{ + .Name = FieldName, + .IsAnonymous = IsAnonymous, + .BitWidth = BitWidth, + .CountedBy = CountedBy, + .Type = std::move(FieldType), + }); + } + int Align = 0; + bool Packed = false; + if (Decl->isStruct() && Decl->hasAttrs()) { + for (const auto& A : Decl->getAttrs()) { + if (auto* Attr = llvm::dyn_cast<AlignedAttr>(A)) + Align = Attr->getAlignment(*Context) / 8; + else if (llvm::isa<PackedAttr>(A)) + Packed = true; + } + } + Output.emit(Struct{ + .Name = Name, + .ByteSize = sizeofType(Typ), + .IsUnion = Decl->isUnion(), + .IsPacked = Packed, + .Align = Align, + .Fields = std::move(Fields), + }); + return Name; +} + +std::string Extractor::extractEnum(const EnumDecl* Decl) { + const std::string& Name = Decl->getNameAsString(); + if (EnumDedup[Name]) + return Name; + EnumDedup[Name] = true; + std::vector<std::string> Values; + for (const auto* Enumerator : Decl->enumerators()) { + const std::string& Name = Enumerator->getNameAsString(); + noteConstUse(Name, Enumerator->getInitVal().getExtValue(), Decl->getSourceRange()); + Values.push_back(Name); + } + Output.emit(Enum{ + .Name = Name, + .Values = Values, + }); + return Name; +} + +void Extractor::noteConstUse(const std::string& Name, int64_t Val, const SourceRange& Range) { + const std::string& Filename = std::filesystem::relative(SourceManager->getFilename(Range.getBegin()).str()); + // Include only uapi headers. Some ioctl commands defined in internal headers, or even in .c files. + // They have high chances of breaking compilation during const extract. + // If it's not defined in uapi, emit define with concrete value. + // Note: the value may be wrong for other arches. + if (Filename.find("/uapi/") != std::string::npos && Filename.back() == 'h') { + Output.emit(Include{Filename}); + return; + } + Output.emit(Define{ + .Name = Name, + .Value = std::to_string(Val), + }); +} + +// Returns base part of the source file containing the canonical declaration. +// If the passed declaration is also a definition, then it will look for a preceeding declaration. +// This is used to generate unique names for static definitions that may have duplicate names +// across different TUs. We assume that the base part of the source file is enough +// to make them unique. +std::string Extractor::getDeclFileID(const Decl* Decl) { + std::string file = + std::filesystem::path(SourceManager->getFilename(Decl->getCanonicalDecl()->getSourceRange().getBegin()).str()) + .filename() + .stem() + .string(); + std::replace(file.begin(), file.end(), '-', '_'); + return file; +} + +template <typename Node> void matchHelper(MatchFinder& Finder, ASTContext* Context, const Node* Expr) { + Finder.match(*Expr, *Context); +} + +void matchHelper(MatchFinder& Finder, ASTContext* Context, const ASTContext* Expr) { + assert(Context == Expr); + Finder.matchAST(*Context); +} + +// Returns all matches of Cond named "res" in Expr and returns them casted to T. +// Expr can point to Context for a global match. +template <typename T, typename Node, typename Condition> +std::vector<const T*> Extractor::findAllMatches(const Node* Expr, const Condition& Cond) { + if (!Expr) + return {}; + struct Matcher : MatchFinder::MatchCallback { + std::vector<const T*> Matches; + void run(const MatchFinder::MatchResult& Result) override { + if (const T* M = Result.Nodes.getNodeAs<T>("res")) + Matches.push_back(M); + } + }; + MatchFinder Finder; + Matcher Matcher; + Finder.addMatcher(Cond, &Matcher); + matchHelper(Finder, Context, Expr); + return std::move(Matcher.Matches); +} + +// Returns the first match of Cond named "res" in Expr and returns it casted to T. +// If no match is found, returns nullptr. +template <typename T, typename Node, typename Condition> +const T* Extractor::findFirstMatch(const Node* Expr, const Condition& Cond) { + const auto& Matches = findAllMatches<T>(Expr, Cond); + return Matches.empty() ? nullptr : Matches[0]; +} + +// If expression refers to some identifier, returns the identifier name. +// Otherwise returns an empty string. +// For example, if the expression is `function_name`, returns "function_name" string. +// If AppendFile, then it also appends per-file suffix. +std::string Extractor::getDeclName(const Expr* Expr) { + // The expression can be complex and include casts and e.g. InitListExpr, + // to remove all of these we match the first/any DeclRefExpr. + auto* Decl = getValueDecl(Expr); + return Decl ? Decl->getNameAsString() : ""; +} + +// Returns the first ValueDecl in the expression. +const ValueDecl* Extractor::getValueDecl(const Expr* Expr) { + // The expression can be complex and include casts and e.g. InitListExpr, + // to remove all of these we match the first/any DeclRefExpr. + auto* Decl = findFirstMatch<DeclRefExpr>(Expr, stmt(forEachDescendant(declRefExpr().bind("res")))); + return Decl ? Decl->getDecl() : nullptr; +} + +// Recursively finds first sizeof in the expression and return the type passed to sizeof. +std::optional<QualType> Extractor::getSizeofType(const Expr* E) { + auto* Res = findFirstMatch<UnaryExprOrTypeTraitExpr>( + E, stmt(forEachDescendant(unaryExprOrTypeTraitExpr(ofKind(UETT_SizeOf)).bind("res")))); + if (!Res) + return {}; + if (Res->isArgumentType()) + return Res->getArgumentType(); + return Res->getArgumentExpr()->getType(); +} + +// Returns map of field name -> field index. +std::unordered_map<std::string, unsigned> Extractor::structFieldIndexes(const RecordDecl* Decl) { + // TODO: this is wrong for structs that contain unions and anonymous sub-structs (e.g. genl_split_ops). + // To handle these we would need to look at InitListExpr::getInitializedFieldInUnion, and recurse + // into anonymous structs. + std::unordered_map<std::string, unsigned> Indexes; + for (const auto& F : Decl->fields()) + Indexes[F->getNameAsString()] = F->getFieldIndex(); + return Indexes; +} + +// Extracts enum info from array variable designated initialization. +// For example, for the following code: +// +// enum Foo { +// FooA = 11, +// FooB = 42, +// }; +// +// struct Bar bars[] = { +// [FooA] = {...}, +// [FooB] = {...}, +// }; +// +// it returns the following vector: {{11, "FooA"}, {42, "FooB"}}. +std::vector<std::pair<int, std::string>> Extractor::extractDesignatedInitConsts(const VarDecl& ArrayDecl) { + const auto& Matches = findAllMatches<ConstantExpr>( + &ArrayDecl, + decl(forEachDescendant(designatedInitExpr(optionally(has(constantExpr(has(declRefExpr())).bind("res"))))))); + std::vector<std::pair<int, std::string>> Inits; + for (auto* Match : Matches) { + const int64_t Val = *Match->getAPValueResult().getInt().getRawData(); + const auto& Name = Match->getEnumConstantDecl()->getNameAsString(); + const auto& SR = Match->getEnumConstantDecl()->getSourceRange(); + noteConstUse(Name, Val, SR); + Inits.emplace_back(Val, Name); + } + return Inits; +} + +int Extractor::sizeofType(const Type* T) { return static_cast<int>(Context->getTypeInfo(T).Width) / 8; } + +template <typename T> T Extractor::evaluate(const Expr* E) { + Expr::EvalResult Res; + E->EvaluateAsConstantExpr(Res, *Context); + return static_cast<T>(Res.Val.getInt().getExtValue()); +} + +void Extractor::matchNetlinkPolicy() { + const auto* PolicyArray = getResult<VarDecl>("netlink_policy"); + const auto* Init = llvm::dyn_cast_if_present<InitListExpr>(PolicyArray->getInit()); + if (!Init) + return; + const auto& InitConsts = extractDesignatedInitConsts(*PolicyArray); + auto Fields = structFieldIndexes(Init->getInit(0)->getType()->getAsRecordDecl()); + std::vector<NetlinkAttr> Attrs; + for (const auto& [I, Name] : InitConsts) { + const auto* AttrInit = llvm::dyn_cast<InitListExpr>(Init->getInit(I)); + const std::string& AttrKind = getDeclName(AttrInit->getInit(Fields["type"])); + if (AttrKind == "NLA_REJECT") + continue; + auto* LenExpr = AttrInit->getInit(Fields["len"]); + int MaxSize = 0; + std::string NestedPolicy; + std::unique_ptr<FieldType> Elem; + if (AttrKind == "NLA_NESTED" || AttrKind == "NLA_NESTED_ARRAY") { + if (const auto* NestedDecl = getValueDecl(AttrInit->getInit(2))) + NestedPolicy = policyName(NestedDecl); + } else { + MaxSize = evaluate<int>(LenExpr); + if (auto SizeofType = getSizeofType(LenExpr)) + Elem = std::make_unique<FieldType>(genType(*SizeofType)); + } + Attrs.push_back(NetlinkAttr{ + .Name = Name, + .Kind = AttrKind, + .MaxSize = MaxSize, + .NestedPolicy = NestedPolicy, + .Elem = std::move(Elem), + }); + } + Output.emit(NetlinkPolicy{ + .Name = policyName(PolicyArray), + .Attrs = std::move(Attrs), + }); +} + +void Extractor::matchNetlinkFamily() { + const auto* FamilyInit = getResult<InitListExpr>("genl_family_init"); + auto Fields = structFieldIndexes(getResult<RecordDecl>("genl_family")); + const std::string& FamilyName = llvm::dyn_cast<StringLiteral>(FamilyInit->getInit(Fields["name"]))->getString().str(); + std::string DefaultPolicy; + if (const auto* PolicyDecl = FamilyInit->getInit(Fields["policy"])->getAsBuiltinConstantDeclRef(*Context)) + DefaultPolicy = policyName(PolicyDecl); + std::vector<NetlinkOp> Ops; + for (const auto& OpsName : {"ops", "small_ops", "split_ops"}) { + const auto* OpsDecl = + llvm::dyn_cast_if_present<VarDecl>(FamilyInit->getInit(Fields[OpsName])->getAsBuiltinConstantDeclRef(*Context)); + const auto NumOps = FamilyInit->getInit(Fields[std::string("n_") + OpsName])->getIntegerConstantExpr(*Context); + // The ops variable may be defined in another TU. + // TODO: extract variables from another TUs. + if (!OpsDecl || !OpsDecl->getInit() || !NumOps) + continue; + const auto* OpsInit = llvm::dyn_cast<InitListExpr>(OpsDecl->getInit()); + auto OpsFields = structFieldIndexes(OpsInit->getInit(0)->getType()->getAsRecordDecl()); + for (int I = 0; I < *NumOps; I++) { + const auto* OpInit = llvm::dyn_cast<InitListExpr>(OpsInit->getInit(I)); + const auto* CmdInit = OpInit->getInit(OpsFields["cmd"])->getEnumConstantDecl(); + if (!CmdInit) + continue; + const std::string& OpName = CmdInit->getNameAsString(); + noteConstUse(OpName, CmdInit->getInitVal().getExtValue(), CmdInit->getSourceRange()); + std::string Policy; + if (OpsFields.count("policy") != 0) { + if (const auto* PolicyDecl = OpInit->getInit(OpsFields["policy"])->getAsBuiltinConstantDeclRef(*Context)) + Policy = policyName(PolicyDecl); + } + if (Policy.empty()) + Policy = DefaultPolicy; + std::string Func = getDeclName(OpInit->getInit(OpsFields["doit"])); + if (Func.empty()) + Func = getDeclName(OpInit->getInit(OpsFields["dumpit"])); + int Flags = evaluate(OpInit->getInit(OpsFields["flags"])); + const char* Access = AccessUser; + constexpr int GENL_ADMIN_PERM = 0x01; + constexpr int GENL_UNS_ADMIN_PERM = 0x10; + if (Flags & GENL_ADMIN_PERM) + Access = AccessAdmin; + else if (Flags & GENL_UNS_ADMIN_PERM) + Access = AccessNsAdmin; + Ops.push_back(NetlinkOp{ + .Name = OpName, + .Func = Func, + .Access = Access, + .Policy = Policy, + }); + } + } + Output.emit(NetlinkFamily{ + .Name = FamilyName, + .Ops = std::move(Ops), + }); +} + +std::string Extractor::policyName(const ValueDecl* Decl) { + // TODO: remove appending of $ sign here. + return Decl->getNameAsString() + "$auto_" + getDeclFileID(Decl); +} + +void Extractor::matchSyscall() { + const auto* Func = getResult<FunctionDecl>("syscall"); + std::vector<Field> Args; + for (const auto& Param : Func->parameters()) { + Args.push_back(Field{ + .Name = Param->getNameAsString(), + .Type = genType(Param->getType()), + }); + } + Output.emit(Syscall{ + .Func = Func->getNameAsString(), + .Args = std::move(Args), + }); +} + +void Extractor::matchIouring() { + const auto* IssueDefs = getResult<VarDecl>("io_issue_defs"); + const auto& InitConsts = extractDesignatedInitConsts(*IssueDefs); + const auto* InitList = llvm::dyn_cast<InitListExpr>(IssueDefs->getInit()); + auto Fields = structFieldIndexes(InitList->getInit(0)->getType()->getAsRecordDecl()); + for (const auto& [I, Name] : InitConsts) { + const auto& Init = llvm::dyn_cast<InitListExpr>(InitList->getInit(I)); + std::string Prep = getDeclName(Init->getInit(Fields["prep"])); + if (Prep == "io_eopnotsupp_prep") + continue; + Output.emit(IouringOp{ + .Name = Name, + .Func = getDeclName(Init->getInit(Fields["issue"])), + }); + } +} + +int main(int argc, const char** argv) { + llvm::cl::OptionCategory Options("syz-declextract options"); + auto OptionsParser = tooling::CommonOptionsParser::create(argc, argv, Options); + if (!OptionsParser) { + llvm::errs() << OptionsParser.takeError(); + return 1; + } + Extractor Ex; + tooling::ClangTool Tool(OptionsParser->getCompilations(), OptionsParser->getSourcePathList()); + if (Tool.run(tooling::newFrontendActionFactory(&Ex).get())) + return 1; + Ex.print(); + return 0; +} diff --git a/tools/syz-declextract/clangtool/json.h b/tools/syz-declextract/clangtool/json.h new file mode 100644 index 000000000..fbbcc12a1 --- /dev/null +++ b/tools/syz-declextract/clangtool/json.h @@ -0,0 +1,78 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#ifndef SYZ_DECLEXTRACT_JSON_H +#define SYZ_DECLEXTRACT_JSON_H + +#include <cassert> +#include <cstdio> +#include <memory> +#include <string> +#include <vector> + +class JSONPrinter { +public: + JSONPrinter() : Top(*this) {} + + template <typename T> void Field(const char* Name, const T& V, bool Last = false) { + printf("%s\"%s\": ", Indent(), Name); + print(*this, V); + printf("%s\n", Last ? "" : ","); + } + + const char* Indent() const { + static std::string Indents; + while (Indents.size() < Nesting) + Indents.push_back('\t'); + return Indents.c_str() + Indents.size() - Nesting; + } + + class Scope { + public: + Scope(JSONPrinter& Printer, bool Array = false) : Printer(Printer), Array(Array) { + printf("%c\n", "{["[Array]); + Printer.Nesting++; + assert(Printer.Nesting < 1000); + } + + ~Scope() { + assert(Printer.Nesting > 0); + Printer.Nesting--; + printf("%s%c", Printer.Indent(), "}]"[Array]); + } + + private: + JSONPrinter& Printer; + const bool Array; + }; + +private: + friend class Scope; + size_t Nesting = 0; + Scope Top; +}; + +inline void print(JSONPrinter& Printer, int V) { printf("%d", V); } +inline void print(JSONPrinter& Printer, int64_t V) { printf("%ld", V); } +inline void print(JSONPrinter& Printer, bool V) { printf("%s", V ? "true" : "false"); } +inline void print(JSONPrinter& Printer, const char* V) { printf("\"%s\"", V ? V : ""); } +inline void print(JSONPrinter& Printer, const std::string& V) { print(Printer, V.c_str()); } + +template <typename E> void print(JSONPrinter& Printer, const std::unique_ptr<E>& V) { + if (!V) + printf("null"); + else + print(Printer, *V); +} + +template <typename E> void print(JSONPrinter& Printer, const std::vector<E>& V) { + JSONPrinter::Scope Scope(Printer, true); + size_t i = 0; + for (const auto& Elem : V) { + printf("%s", Printer.Indent()); + print(Printer, Elem); + printf("%s\n", ++i == V.size() ? "" : ","); + } +} + +#endif diff --git a/tools/syz-declextract/clangtool/output.h b/tools/syz-declextract/clangtool/output.h new file mode 100644 index 000000000..df3f290b6 --- /dev/null +++ b/tools/syz-declextract/clangtool/output.h @@ -0,0 +1,292 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#ifndef SYZ_DECLEXTRACT_OUTPUT_H +#define SYZ_DECLEXTRACT_OUTPUT_H + +#include <memory> +#include <string> +#include <unordered_set> +#include <vector> + +const char* const AccessUnknown = ""; +const char* const AccessUser = "user"; +const char* const AccessNsAdmin = "ns_admin"; +const char* const AccessAdmin = "admin"; + +struct IntType; +struct PtrType; +struct ArrType; +struct BufferType; + +struct FieldType { + std::unique_ptr<IntType> Int; + std::unique_ptr<PtrType> Ptr; + std::unique_ptr<ArrType> Array; + std::unique_ptr<BufferType> Buffer; + std::unique_ptr<std::string> Struct; + + FieldType() = default; + FieldType(IntType&& Typ) : Int(std::make_unique<IntType>(std::move(Typ))) {} + FieldType(PtrType&& Typ) : Ptr(std::make_unique<PtrType>(std::move(Typ))) {} + FieldType(ArrType&& Typ) : Array(std::make_unique<ArrType>(std::move(Typ))) {} + FieldType(BufferType&& Typ) : Buffer(std::make_unique<BufferType>(std::move(Typ))) {} + FieldType(const std::string& Typ) : Struct(std::make_unique<std::string>(std::move(Typ))) {} +}; + +struct IntType { + int ByteSize = 0; + std::string Name; + std::string Base; + std::string Enum; +}; + +struct PtrType { + FieldType Elem; + bool IsConst = false; +}; + +struct ArrType { + FieldType Elem; + int MinSize = 0; + int MaxSize = 0; +}; + +struct BufferType { + int MinSize = 0; + int MaxSize = 0; + bool IsString = false; + bool IsNonTerminated = false; +}; + +struct Include { + std::string Filename; +}; + +struct Define { + std::string Name; + std::string Value; +}; + +struct Field { + std::string Name; + bool IsAnonymous = false; + int BitWidth = 0; + int CountedBy = -1; + FieldType Type; +}; + +struct Struct { + std::string Name; + int ByteSize = 0; + bool IsUnion = false; + bool IsPacked = false; + int Align = 0; + std::vector<Field> Fields; +}; + +struct Enum { + std::string Name; + std::vector<std::string> Values; +}; + +struct Syscall { + std::string Func; + std::vector<Field> Args; +}; + +struct IouringOp { + std::string Name; + std::string Func; +}; + +struct NetlinkOp { + std::string Name; + std::string Func; + const char* Access; + std::string Policy; +}; + +struct NetlinkFamily { + std::string Name; + std::vector<NetlinkOp> Ops; +}; + +struct NetlinkAttr { + std::string Name; + std::string Kind; + int MaxSize = 0; + std::string NestedPolicy; + std::unique_ptr<FieldType> Elem; +}; + +struct NetlinkPolicy { + std::string Name; + std::vector<NetlinkAttr> Attrs; +}; + +inline void print(JSONPrinter& Printer, const Define& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("value", V.Value, true); +} + +inline void print(JSONPrinter& Printer, const Field& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("is_anonymous", V.IsAnonymous); + Printer.Field("bit_width", V.BitWidth); + Printer.Field("counted_by", V.CountedBy); + Printer.Field("type", V.Type, true); +} + +inline void print(JSONPrinter& Printer, const Struct& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("byte_size", V.ByteSize); + Printer.Field("is_union", V.IsUnion); + Printer.Field("is_packed", V.IsPacked); + Printer.Field("align", V.Align); + Printer.Field("fields", V.Fields, true); +} + +inline void print(JSONPrinter& Printer, const Enum& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("values", V.Values, true); +} + +inline void print(JSONPrinter& Printer, const FieldType& V) { + JSONPrinter::Scope Scope(Printer); + if (V.Int) + Printer.Field("int", *V.Int, true); + else if (V.Ptr) + Printer.Field("ptr", *V.Ptr, true); + else if (V.Array) + Printer.Field("array", *V.Array, true); + else if (V.Buffer) + Printer.Field("buffer", *V.Buffer, true); + else + Printer.Field("struct", *V.Struct, true); +} + +inline void print(JSONPrinter& Printer, const IntType& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("byte_size", V.ByteSize); + Printer.Field("name", V.Name); + Printer.Field("base", V.Base); + Printer.Field("enum", V.Enum, true); +} + +inline void print(JSONPrinter& Printer, const PtrType& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("elem", V.Elem); + Printer.Field("is_const", V.IsConst, true); +} + +inline void print(JSONPrinter& Printer, const ArrType& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("elem", V.Elem); + Printer.Field("min_size", V.MinSize); + Printer.Field("max_size", V.MaxSize, true); +} + +inline void print(JSONPrinter& Printer, const BufferType& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("min_size", V.MinSize); + Printer.Field("max_size", V.MaxSize); + Printer.Field("is_string", V.IsString); + Printer.Field("is_non_terminated", V.IsNonTerminated, true); +} + +inline void print(JSONPrinter& Printer, const Syscall& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("func", V.Func); + Printer.Field("args", V.Args, true); +} + +inline void print(JSONPrinter& Printer, const IouringOp& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("func", V.Func, true); +} + +inline void print(JSONPrinter& Printer, const NetlinkOp& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("func", V.Func); + Printer.Field("access", V.Access); + Printer.Field("policy", V.Policy, true); +} + +inline void print(JSONPrinter& Printer, const NetlinkFamily& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("ops", V.Ops, true); +} + +inline void print(JSONPrinter& Printer, const NetlinkAttr& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("kind", V.Kind); + Printer.Field("max_size", V.MaxSize); + Printer.Field("nested_policy", V.NestedPolicy); + Printer.Field("elem", V.Elem, true); +} + +inline void print(JSONPrinter& Printer, const NetlinkPolicy& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("attrs", V.Attrs, true); +} + +// This type is used when we can't figure out the right type, but need some type to use. +inline FieldType TodoType() { + return IntType{ + // TODO: use size 1, then arrays will be lowered to buffers. + .ByteSize = 8, + .Name = "TODO", + .Base = "long", + }; +} + +class Output { +public: + void emit(Include&& Inc) { + if (IncludesDedup.insert(Inc.Filename).second) + Includes.push_back(Inc.Filename); + } + + void emit(Define&& V) { Defines.push_back(std::move(V)); } + void emit(Struct&& V) { Structs.push_back(std::move(V)); } + void emit(Enum&& V) { Enums.push_back(std::move(V)); } + void emit(Syscall&& V) { Syscalls.push_back(std::move(V)); } + void emit(IouringOp&& V) { IouringOps.push_back(std::move(V)); } + void emit(NetlinkFamily&& V) { NetlinkFamilies.push_back(std::move(V)); } + void emit(NetlinkPolicy&& V) { NetlinkPolicies.push_back(std::move(V)); } + + void print() const { + JSONPrinter Printer; + Printer.Field("includes", Includes); + Printer.Field("defines", Defines); + Printer.Field("enums", Enums); + Printer.Field("structs", Structs); + Printer.Field("syscalls", Syscalls); + Printer.Field("iouring_ops", IouringOps); + Printer.Field("netlink_families", NetlinkFamilies); + Printer.Field("netlink_policies", NetlinkPolicies, true); + } + +private: + std::vector<std::string> Includes; + std::unordered_set<std::string> IncludesDedup; + std::vector<Define> Defines; + std::vector<Enum> Enums; + std::vector<Struct> Structs; + std::vector<Syscall> Syscalls; + std::vector<IouringOp> IouringOps; + std::vector<NetlinkFamily> NetlinkFamilies; + std::vector<NetlinkPolicy> NetlinkPolicies; +}; + +#endif diff --git a/tools/syz-declextract/declextract.go b/tools/syz-declextract/declextract.go new file mode 100644 index 000000000..7bf6d6081 --- /dev/null +++ b/tools/syz-declextract/declextract.go @@ -0,0 +1,260 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package main + +import ( + "bufio" + "bytes" + "flag" + "fmt" + "io/fs" + "os" + "path/filepath" + "slices" + "strings" + + "github.com/google/syzkaller/pkg/ast" + "github.com/google/syzkaller/pkg/clangtool" + "github.com/google/syzkaller/pkg/compiler" + "github.com/google/syzkaller/pkg/declextract" + "github.com/google/syzkaller/pkg/mgrconfig" + "github.com/google/syzkaller/pkg/osutil" + "github.com/google/syzkaller/pkg/subsystem" + _ "github.com/google/syzkaller/pkg/subsystem/lists" + "github.com/google/syzkaller/pkg/tool" + "github.com/google/syzkaller/sys/targets" +) + +// The target we currently assume for extracted descriptions. +var target = targets.Get(targets.Linux, targets.AMD64) + +func main() { + var ( + flagConfig = flag.String("config", "", "manager config file") + flagBinary = flag.String("binary", "syz-declextract", "path to syz-declextract binary") + flagCacheExtract = flag.Bool("cache-extract", false, "use cached extract results if present"+ + " (cached in manager.workdir/declextract.cache)") + ) + defer tool.Init()() + cfg, err := mgrconfig.LoadFile(*flagConfig) + if err != nil { + tool.Fail(err) + } + if err := run(filepath.FromSlash("sys/linux/auto.txt"), &clangtool.Config{ + ToolBin: *flagBinary, + KernelSrc: cfg.KernelSrc, + KernelObj: cfg.KernelObj, + CacheDir: filepath.Join(cfg.Workdir, "declextract.cache"), + ReuseCache: *flagCacheExtract, + }); err != nil { + tool.Fail(err) + } +} + +func run(autoFile string, cfg *clangtool.Config) error { + syscallRename, err := buildSyscallRenameMap(cfg.KernelSrc) + if err != nil { + return fmt.Errorf("failed to build syscall rename map: %w", err) + } + out, err := clangtool.Run(cfg) + if err != nil { + return err + } + descriptions, interfaces, err := declextract.Run(out, syscallRename) + if err != nil { + return err + } + if err := osutil.WriteFile(autoFile, descriptions); err != nil { + return err + } + if err := osutil.WriteFile(autoFile+".info", serialize(interfaces)); err != nil { + return err + } + // In order to remove unused bits of the descriptions, we need to write them out first, + // and then parse all descriptions back b/c auto descriptions use some types defined + // by manual descriptions (compiler.CollectUnused requires complete descriptions). + // This also canonicalizes them b/c new lines are added during parsing. + eh, errors := errorHandler() + desc := ast.ParseGlob(filepath.Join(filepath.Dir(autoFile), "*.txt"), eh) + if desc == nil { + return fmt.Errorf("failed to parse descriptions\n%s", errors.Bytes()) + } + // Need to clone descriptions b/c CollectUnused changes them slightly during type checking. + unusedNodes, err := compiler.CollectUnused(desc.Clone(), target, eh) + if err != nil { + return fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes()) + } + consts := compiler.ExtractConsts(desc.Clone(), target, eh) + if consts == nil { + return fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes()) + } + finishInterfaces(interfaces, consts, autoFile) + if err := osutil.WriteFile(autoFile+".info", serialize(interfaces)); err != nil { + return err + } + unused := make(map[string]bool) + for _, n := range unusedNodes { + _, typ, name := n.Info() + unused[typ+name] = true + } + desc.Nodes = slices.DeleteFunc(desc.Nodes, func(n ast.Node) bool { + pos, typ, name := n.Info() + return pos.File != autoFile || unused[typ+name] + }) + // We need re-parse them again b/c new lines are fixed up during parsing. + formatted := ast.Format(ast.Parse(ast.Format(desc), autoFile, nil)) + return osutil.WriteFile(autoFile, formatted) +} + +func errorHandler() (func(pos ast.Pos, msg string), *bytes.Buffer) { + errors := new(bytes.Buffer) + eh := func(pos ast.Pos, msg string) { + pos.File = filepath.Base(pos.File) + fmt.Fprintf(errors, "%v: %v\n", pos, msg) + } + return eh, errors +} + +func serialize(interfaces []*declextract.Interface) []byte { + w := new(bytes.Buffer) + for _, iface := range interfaces { + fmt.Fprintf(w, "%v\t%v\tfunc:%v\taccess:%v\tmanual_desc:%v\tauto_desc:%v", + iface.Type, iface.Name, iface.Func, iface.Access, + iface.ManualDescriptions, iface.AutoDescriptions) + for _, file := range iface.Files { + fmt.Fprintf(w, "\tfile:%v", file) + } + for _, subsys := range iface.Subsystems { + fmt.Fprintf(w, "\tsubsystem:%v", subsys) + } + fmt.Fprintf(w, "\n") + } + return w.Bytes() +} + +func finishInterfaces(interfaces []*declextract.Interface, consts map[string]*compiler.ConstInfo, autoFile string) { + manual := make(map[string]bool) + for file, desc := range consts { + for _, c := range desc.Consts { + if file != autoFile { + manual[c.Name] = true + } + } + } + extractor := subsystem.MakeExtractor(subsystem.GetList(target.OS)) + for _, iface := range interfaces { + iface.ManualDescriptions = manual[iface.IdentifyingConst] + var crashes []*subsystem.Crash + for _, file := range iface.Files { + crashes = append(crashes, &subsystem.Crash{GuiltyPath: file}) + } + for _, s := range extractor.Extract(crashes) { + iface.Subsystems = append(iface.Subsystems, s.Name) + } + slices.Sort(iface.Subsystems) + } +} + +func buildSyscallRenameMap(sourceDir string) (map[string][]string, error) { + // Some syscalls have different names and entry points and thus need to be renamed. + // e.g. SYSCALL_DEFINE1(setuid16, old_uid_t, uid) is referred to in the .tbl file with setuid. + // Parse *.tbl files that map functions defined with SYSCALL_DEFINE macros to actual syscall names. + // Lines in the files look as follows: + // 288 common accept4 sys_accept4 + // Total mapping is many-to-many, so we give preference to x86 arch, then to 64-bit syscalls, + // and then just order arches by name to have deterministic result. + // Note: some syscalls may have no record in the tables for the architectures we support. + syscalls := make(map[string][]tblSyscall) + tblFiles, err := findTblFiles(sourceDir) + if err != nil { + return nil, err + } + if len(tblFiles) == 0 { + return nil, fmt.Errorf("found no *.tbl files in the kernel dir %v", sourceDir) + } + for file, arches := range tblFiles { + for _, arch := range arches { + data, err := os.ReadFile(file) + if err != nil { + return nil, err + } + parseTblFile(data, arch, syscalls) + } + } + rename := make(map[string][]string) + for syscall, descs := range syscalls { + slices.SortFunc(descs, func(a, b tblSyscall) int { + if (a.arch == target.Arch) != (b.arch == target.Arch) { + if a.arch == target.Arch { + return -1 + } + return 1 + } + if a.is64bit != b.is64bit { + if a.is64bit { + return -1 + } + return 1 + } + return strings.Compare(a.arch, b.arch) + }) + fn := descs[0].fn + rename[fn] = append(rename[fn], syscall) + } + return rename, nil +} + +type tblSyscall struct { + fn string + arch string + is64bit bool +} + +func parseTblFile(data []byte, arch string, syscalls map[string][]tblSyscall) { + for s := bufio.NewScanner(bytes.NewReader(data)); s.Scan(); { + fields := strings.Fields(s.Text()) + if len(fields) < 4 || fields[0] == "#" { + continue + } + group := fields[1] + syscall := fields[2] + fn := strings.TrimPrefix(fields[3], "sys_") + if strings.HasPrefix(syscall, "unused") || fn == "-" || + // Powerpc spu group defines some syscalls (utimesat) + // that are not present on any of our arches. + group == "spu" || + // llseek does not exist, it comes from: + // arch/arm64/tools/syscall_64.tbl -> scripts/syscall.tbl + // 62 32 llseek sys_llseek + // So scripts/syscall.tbl is pulled for 64-bit arch, but the syscall + // is defined only for 32-bit arch in that file. + syscall == "llseek" || + // Don't want to test it (but see issue 5308). + syscall == "reboot" { + continue + } + syscalls[syscall] = append(syscalls[syscall], tblSyscall{ + fn: fn, + arch: arch, + is64bit: group == "common" || strings.Contains(group, "64"), + }) + } +} + +func findTblFiles(sourceDir string) (map[string][]string, error) { + files := make(map[string][]string) + for _, arch := range targets.List[target.OS] { + err := filepath.Walk(filepath.Join(sourceDir, "arch", arch.KernelHeaderArch), + func(file string, info fs.FileInfo, err error) error { + if err == nil && strings.HasSuffix(file, ".tbl") { + files[file] = append(files[file], arch.VMArch) + } + return err + }) + if err != nil { + return nil, err + } + } + return files, nil +} diff --git a/tools/syz-declextract/declextract_test.go b/tools/syz-declextract/declextract_test.go new file mode 100644 index 000000000..b93a6dcbf --- /dev/null +++ b/tools/syz-declextract/declextract_test.go @@ -0,0 +1,154 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package main + +import ( + "encoding/json" + "flag" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/syzkaller/pkg/ast" + "github.com/google/syzkaller/pkg/clangtool" + "github.com/google/syzkaller/pkg/compiler" + "github.com/google/syzkaller/pkg/osutil" +) + +var ( + flagBin = flag.String("bin", "", "path to syz-declextract binary to use") + flagUpdate = flag.Bool("update", false, "update golden files") +) + +func TestClangTool(t *testing.T) { + if *flagBin == "" { + t.Skipf("syz-declextract path is not specified, run with -bin=syz-declextract flag") + } + testEachFile(t, func(t *testing.T, cfg *clangtool.Config, file string) { + out, err := clangtool.Run(cfg) + if err != nil { + t.Fatal(err) + } + got, err := json.MarshalIndent(out, "", "\t") + if err != nil { + t.Fatal(err) + } + compareGoldenData(t, file+".json", got) + }) +} + +func TestDeclextract(t *testing.T) { + testEachFile(t, func(t *testing.T, cfg *clangtool.Config, file string) { + // Created cache file to avoid running the clang tool. + goldenFile := file + ".json" + cacheFile := filepath.Join(cfg.KernelObj, filepath.Base(goldenFile)) + if err := os.Symlink(goldenFile, cacheFile); err != nil { + t.Fatal(err) + } + if err := os.Symlink(filepath.Join(cfg.KernelSrc, "manual.txt"), + filepath.Join(cfg.KernelObj, "manual.txt")); err != nil { + t.Fatal(err) + } + cfg.ToolBin = "this-is-not-supposed-to-run" + autoFile := filepath.Join(cfg.KernelObj, filepath.Base(file)+".txt") + if err := run(autoFile, cfg); err != nil { + if *flagUpdate { + osutil.CopyFile(autoFile, file+".txt") + osutil.CopyFile(autoFile+".info", file+".info") + } + t.Fatal(err) + } + + // Check that descriptions compile. + eh, errors := errorHandler() + full := ast.ParseGlob(filepath.Join(cfg.KernelObj, "*.txt"), eh) + if full == nil { + t.Fatalf("failed to parse full descriptions:\n%s", errors) + } + constInfo := compiler.ExtractConsts(full, target, eh) + if constInfo == nil { + t.Fatalf("failed to compile full descriptions:\n%s", errors) + } + // Fabricate consts. + consts := make(map[string]uint64) + for _, info := range constInfo { + for i, c := range info.Consts { + consts[c.Name] = uint64(i + 1) + } + } + res := compiler.Compile(full, consts, target, eh) + if res == nil { + t.Fatalf("failed to compile full descriptions:\n%s", errors) + } + + // TODO: Ensure that none of the syscalls will be disabled by TransitivelyEnabledCalls. + + compareGoldenFile(t, file+".txt", autoFile) + compareGoldenFile(t, file+".info", autoFile+".info") + }) +} + +func testEachFile(t *testing.T, fn func(t *testing.T, cfg *clangtool.Config, file string)) { + testdata, err := filepath.Abs("testdata") + if err != nil { + t.Fatal(err) + } + files, err := filepath.Glob(filepath.Join(testdata, "*.c")) + if err != nil { + t.Fatal(err) + } + if len(files) == 0 { + t.Fatal("found no source files") + } + for _, file := range files { + file := file + t.Run(filepath.Base(file), func(t *testing.T) { + t.Parallel() + buildDir := t.TempDir() + commands := fmt.Sprintf(`[{ + "file": "%s", + "directory": "%s", + "command": "clang -c %s -DKBUILD_BASENAME=foo" + }]`, + file, buildDir, file) + dbFile := filepath.Join(buildDir, "compile_commands.json") + if err := os.WriteFile(dbFile, []byte(commands), 0600); err != nil { + t.Fatal(err) + } + cfg := &clangtool.Config{ + ToolBin: *flagBin, + KernelSrc: testdata, + KernelObj: buildDir, + CacheDir: buildDir, + ReuseCache: true, + } + fn(t, cfg, file) + }) + } +} + +func compareGoldenFile(t *testing.T, goldenFile, gotFile string) { + got, err := os.ReadFile(gotFile) + if err != nil { + t.Fatal(err) + } + compareGoldenData(t, goldenFile, got) +} + +func compareGoldenData(t *testing.T, goldenFile string, got []byte) { + if *flagUpdate { + if err := os.WriteFile(goldenFile, got, 0644); err != nil { + t.Fatal(err) + } + } + want, err := os.ReadFile(goldenFile) + if err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(got, want); diff != "" { + t.Fatal(diff) + } +} diff --git a/tools/syz-declextract/run.go b/tools/syz-declextract/run.go deleted file mode 100644 index e93723c95..000000000 --- a/tools/syz-declextract/run.go +++ /dev/null @@ -1,546 +0,0 @@ -// Copyright 2024 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package main - -import ( - "bufio" - "bytes" - "encoding/json" - "errors" - "flag" - "fmt" - "io/fs" - "math/rand" - "os" - "os/exec" - "path/filepath" - "runtime" - "slices" - "strconv" - "strings" - "time" - - "github.com/google/syzkaller/pkg/ast" - "github.com/google/syzkaller/pkg/compiler" - "github.com/google/syzkaller/pkg/mgrconfig" - "github.com/google/syzkaller/pkg/osutil" - "github.com/google/syzkaller/pkg/subsystem" - _ "github.com/google/syzkaller/pkg/subsystem/lists" - "github.com/google/syzkaller/pkg/tool" - "github.com/google/syzkaller/sys/targets" -) - -var ( - autoFile = filepath.FromSlash("sys/linux/auto.txt") - target = targets.Get(targets.Linux, targets.AMD64) -) - -func main() { - var ( - flagConfig = flag.String("config", "", "manager config file") - flagBinary = flag.String("binary", "syz-declextract", "path to syz-declextract binary") - flagCacheExtract = flag.Bool("cache-extract", false, "use cached extract results if present"+ - " (cached in manager.workdir/declextract.cache)") - ) - defer tool.Init()() - cfg, err := mgrconfig.LoadFile(*flagConfig) - if err != nil { - tool.Failf("failed to load manager config: %v", err) - } - - compilationDatabase := filepath.Join(cfg.KernelObj, "compile_commands.json") - cmds, err := loadCompileCommands(compilationDatabase) - if err != nil { - tool.Failf("failed to load compile commands: %v", err) - } - - ctx := &context{ - cfg: cfg, - clangTool: *flagBinary, - compilationDatabase: compilationDatabase, - compileCommands: cmds, - extractor: subsystem.MakeExtractor(subsystem.GetList(target.OS)), - syscallNameMap: readSyscallMap(cfg.KernelSrc), - interfaces: make(map[string]Interface), - } - - outputs := make(chan *output, len(cmds)) - files := make(chan string, len(cmds)) - for w := 0; w < runtime.NumCPU(); w++ { - go ctx.worker(outputs, files, *flagCacheExtract) - } - - for _, cmd := range cmds { - files <- cmd.File - } - close(files) - - for range cmds { - out := <-outputs - if out == nil { - continue - } - file, err := filepath.Rel(cfg.KernelSrc, out.file) - if err != nil { - tool.Fail(err) - } - if out.err != nil { - tool.Failf("%v: %v", file, out.err) - } - parse := ast.Parse(out.output, "", nil) - if parse == nil { - tool.Failf("%v: parsing error:\n%s", file, out.output) - } - ctx.appendNodes(parse.Nodes, file) - } - ctx.finishDescriptions() - - desc := &ast.Description{ - Nodes: ctx.nodes, - } - writeDescriptions(desc) - // In order to remove unused bits of the descriptions, we need to write them out first, - // and then parse all descriptions back b/c auto descriptions use some types defined - // by manual descriptions (compiler.CollectUnused requires complete descriptions). - removeUnused(desc) - writeDescriptions(desc) - - ifaces := ctx.finishInterfaces() - ifacesData := serializeInterfaces(ifaces) - if err := osutil.WriteFile(autoFile+".info", ifacesData); err != nil { - tool.Fail(err) - } -} - -type context struct { - cfg *mgrconfig.Config - clangTool string - compilationDatabase string - compileCommands []compileCommand - extractor *subsystem.Extractor - syscallNameMap map[string][]string - interfaces map[string]Interface - nodes []ast.Node -} - -type compileCommand struct { - Command string - Directory string - File string -} - -func loadCompileCommands(file string) ([]compileCommand, error) { - data, err := os.ReadFile(file) - if err != nil { - return nil, err - } - var cmds []compileCommand - if err := json.Unmarshal(data, &cmds); err != nil { - return nil, err - } - // Remove commands that don't relate to the kernel build - // (probably some host tools, etc). - cmds = slices.DeleteFunc(cmds, func(cmd compileCommand) bool { - return !strings.HasSuffix(cmd.File, ".c") || - // Files compiled with gcc are not a part of the kernel - // (assuming compile commands were generated with make CC=clang). - // They are probably a part of some host tool. - strings.HasPrefix(cmd.Command, "gcc") || - // KBUILD should add this define all kernel files. - !strings.Contains(cmd.Command, "-DKBUILD_BASENAME") - }) - // Shuffle the order to detect any non-determinism caused by the order early. - // The result should be the same regardless. - rand.New(rand.NewSource(time.Now().UnixNano())).Shuffle(len(cmds), func(i, j int) { - cmds[i], cmds[j] = cmds[j], cmds[i] - }) - return cmds, nil -} - -type output struct { - file string - output []byte - err error -} - -type Interface struct { - Type string - Name string - Files []string - Func string - Access string - Subsystems []string - ManualDescriptions bool - AutoDescriptions bool - - identifyingConst string -} - -func (iface *Interface) ID() string { - return fmt.Sprintf("%v/%v", iface.Type, iface.Name) -} - -func serializeInterfaces(ifaces []Interface) []byte { - w := new(bytes.Buffer) - for _, iface := range ifaces { - fmt.Fprintf(w, "%v\t%v\tfunc:%v\taccess:%v\tmanual_desc:%v\tauto_desc:%v", - iface.Type, iface.Name, iface.Func, iface.Access, - iface.ManualDescriptions, iface.AutoDescriptions) - for _, file := range iface.Files { - fmt.Fprintf(w, "\tfile:%v", file) - } - for _, subsys := range iface.Subsystems { - fmt.Fprintf(w, "\tsubsystem:%v", subsys) - } - fmt.Fprintf(w, "\n") - } - return w.Bytes() -} - -func (ctx *context) finishInterfaces() []Interface { - var interfaces []Interface - for _, iface := range ctx.interfaces { - slices.Sort(iface.Files) - iface.Files = slices.Compact(iface.Files) - var crashes []*subsystem.Crash - for _, file := range iface.Files { - crashes = append(crashes, &subsystem.Crash{GuiltyPath: file}) - } - for _, s := range ctx.extractor.Extract(crashes) { - iface.Subsystems = append(iface.Subsystems, s.Name) - } - slices.Sort(iface.Subsystems) - if iface.Access == "" { - iface.Access = "unknown" - } - interfaces = append(interfaces, iface) - } - slices.SortFunc(interfaces, func(a, b Interface) int { - return strings.Compare(a.ID(), b.ID()) - }) - checkDescriptionPresence(interfaces, autoFile) - return interfaces -} - -func (ctx *context) mergeInterface(iface Interface) { - prev, ok := ctx.interfaces[iface.ID()] - if ok { - if iface.identifyingConst != prev.identifyingConst { - tool.Failf("interface %v has different identifying consts: %v vs %v", - iface.ID(), iface.identifyingConst, prev.identifyingConst) - } - iface.Files = append(iface.Files, prev.Files...) - } - ctx.interfaces[iface.ID()] = iface -} - -func checkDescriptionPresence(interfaces []Interface, autoFile string) { - desc := ast.ParseGlob(filepath.Join("sys", target.OS, "*.txt"), nil) - if desc == nil { - tool.Failf("failed to parse descriptions") - } - consts := compiler.ExtractConsts(desc, target, nil) - auto := make(map[string]bool) - manual := make(map[string]bool) - for file, desc := range consts { - for _, c := range desc.Consts { - if file == autoFile { - auto[c.Name] = true - } else { - manual[c.Name] = true - } - } - } - for i := range interfaces { - iface := &interfaces[i] - if auto[iface.identifyingConst] { - iface.AutoDescriptions = true - } - if manual[iface.identifyingConst] { - iface.ManualDescriptions = true - } - } -} - -func writeDescriptions(desc *ast.Description) { - // New lines are added in the parsing step. This is why we need to Format (serialize the description), - // Parse, then Format again. - output := ast.Format(ast.Parse(ast.Format(desc), "", ast.LoggingHandler)) - if err := osutil.WriteFile(autoFile, output); err != nil { - tool.Fail(err) - } -} - -func (ctx *context) finishDescriptions() { - slices.SortFunc(ctx.nodes, func(a, b ast.Node) int { - return strings.Compare(ast.SerializeNode(a), ast.SerializeNode(b)) - }) - ctx.nodes = slices.CompactFunc(ctx.nodes, func(a, b ast.Node) bool { - return ast.SerializeNode(a) == ast.SerializeNode(b) - }) - slices.SortStableFunc(ctx.nodes, func(a, b ast.Node) int { - return getTypeOrder(a) - getTypeOrder(b) - }) - - prevCall, prevCallIndex := "", 0 - for _, node := range ctx.nodes { - switch n := node.(type) { - case *ast.Call: - if n.Name.Name == prevCall { - n.Name.Name += strconv.Itoa(prevCallIndex) - prevCallIndex++ - } else { - prevCall = n.Name.Name - prevCallIndex = 0 - } - } - } - - // These additional includes must be at the top (added after sorting), because other kernel headers - // are broken and won't compile without these additional ones included first. - header := `# Code generated by syz-declextract. DO NOT EDIT. - -include <include/vdso/bits.h> -include <include/linux/types.h> -` - desc := ast.Parse([]byte(header), "", nil) - ctx.nodes = append(desc.Nodes, ctx.nodes...) -} - -func removeUnused(desc *ast.Description) { - all := ast.ParseGlob(filepath.Join("sys", target.OS, "*.txt"), nil) - if all == nil { - tool.Failf("failed to parse descriptions") - } - unusedNodes, err := compiler.CollectUnused(all, target, nil) - if err != nil { - tool.Failf("failed to typecheck descriptions: %v", err) - } - unused := make(map[string]bool) - for _, n := range unusedNodes { - if pos, typ, name := n.Info(); pos.File == autoFile { - unused[fmt.Sprintf("%v/%v", typ, name)] = true - } - } - desc.Nodes = slices.DeleteFunc(desc.Nodes, func(n ast.Node) bool { - _, typ, name := n.Info() - return unused[fmt.Sprintf("%v/%v", typ, name)] - }) -} - -func (ctx *context) worker(outputs chan *output, files chan string, cache bool) { - for file := range files { - cacheFile := filepath.Join(ctx.cfg.Workdir, "declextract.cache", - strings.TrimPrefix(strings.TrimPrefix(filepath.Clean(file), - ctx.cfg.KernelSrc), ctx.cfg.KernelObj)) - if cache { - out, err := os.ReadFile(cacheFile) - if err == nil { - outputs <- &output{file, out, nil} - continue - } - } - // Suppress warning since we may build the tool on a different clang - // version that produces more warnings. - out, err := exec.Command(ctx.clangTool, "-p", ctx.compilationDatabase, file, "--extra-arg=-w").Output() - var exitErr *exec.ExitError - if err != nil && errors.As(err, &exitErr) && len(exitErr.Stderr) != 0 { - err = fmt.Errorf("%s", exitErr.Stderr) - } - if err == nil { - osutil.MkdirAll(filepath.Dir(cacheFile)) - osutil.WriteFile(cacheFile, out) - } - outputs <- &output{file, out, err} - } -} - -func (ctx *context) renameSyscall(syscall *ast.Call) []ast.Node { - names := ctx.syscallNameMap[syscall.CallName] - if len(names) == 0 { - // Syscall has no record in the tables for the architectures we support. - return nil - } - variant := strings.TrimPrefix(syscall.Name.Name, syscall.CallName) - if variant == "" { - variant = "$auto" - } - var renamed []ast.Node - for _, name := range names { - newCall := syscall.Clone().(*ast.Call) - newCall.Name.Name = name + variant - newCall.CallName = name // Not required but avoids mistakenly treating CallName as the part before the $. - renamed = append(renamed, newCall) - } - - return renamed -} - -func readSyscallMap(sourceDir string) map[string][]string { - // Parse arch/*/*.tbl files that map functions defined with SYSCALL_DEFINE macros to actual syscall names. - // Lines in the files look as follows: - // 288 common accept4 sys_accept4 - // Total mapping is many-to-many, so we give preference to x86 arch, then to 64-bit syscalls, - // and then just order arches by name to have deterministic result. - type desc struct { - fn string - arch string - is64bit bool - } - syscalls := make(map[string][]desc) - for _, arch := range targets.List[target.OS] { - filepath.Walk(filepath.Join(sourceDir, "arch", arch.KernelHeaderArch), - func(path string, info fs.FileInfo, err error) error { - if err != nil || !strings.HasSuffix(path, ".tbl") { - return err - } - f, err := os.Open(path) - if err != nil { - tool.Fail(err) - } - defer f.Close() - for s := bufio.NewScanner(f); s.Scan(); { - fields := strings.Fields(s.Text()) - if len(fields) < 4 || fields[0] == "#" { - continue - } - group := fields[1] - syscall := fields[2] - fn := strings.TrimPrefix(fields[3], "sys_") - if strings.HasPrefix(syscall, "unused") || fn == "-" || - // Powerpc spu group defines some syscalls (utimesat) - // that are not present on any of our arches. - group == "spu" || - // llseek does not exist, it comes from: - // arch/arm64/tools/syscall_64.tbl -> scripts/syscall.tbl - // 62 32 llseek sys_llseek - // So scripts/syscall.tbl is pulled for 64-bit arch, but the syscall - // is defined only for 32-bit arch in that file. - syscall == "llseek" || - // Don't want to test it (see issue 5308). - syscall == "reboot" { - continue - } - syscalls[syscall] = append(syscalls[syscall], desc{ - fn: fn, - arch: arch.VMArch, - is64bit: group == "common" || strings.Contains(group, "64"), - }) - } - return nil - }) - } - - rename := map[string][]string{ - "syz_genetlink_get_family_id": {"syz_genetlink_get_family_id"}, - } - for syscall, descs := range syscalls { - slices.SortFunc(descs, func(a, b desc) int { - if (a.arch == target.Arch) != (b.arch == target.Arch) { - if a.arch == target.Arch { - return -1 - } - return 1 - } - if a.is64bit != b.is64bit { - if a.is64bit { - return -1 - } - return 1 - } - return strings.Compare(a.arch, b.arch) - }) - fn := descs[0].fn - rename[fn] = append(rename[fn], syscall) - } - return rename -} - -func (ctx *context) appendNodes(nodes []ast.Node, file string) { - for _, node := range nodes { - switch node := node.(type) { - case *ast.Call: - // Some syscalls have different names and entry points and thus need to be renamed. - // e.g. SYSCALL_DEFINE1(setuid16, old_uid_t, uid) is referred to in the .tbl file with setuid. - ctx.nodes = append(ctx.nodes, ctx.renameSyscall(node)...) - case *ast.Include: - if file, err := filepath.Rel(ctx.cfg.KernelSrc, filepath.Join(ctx.cfg.KernelObj, node.File.Value)); err == nil { - node.File.Value = file - } - if replace := includeReplaces[node.File.Value]; replace != "" { - node.File.Value = replace - } - ctx.nodes = append(ctx.nodes, node) - case *ast.Comment: - switch { - case strings.HasPrefix(node.Text, "INTERFACE:"): - fields := strings.Fields(node.Text) - if len(fields) != 6 { - tool.Failf("%q has wrong number of fields", node.Text) - } - for i := range fields { - if fields[i] == "-" { - fields[i] = "" - } - } - iface := Interface{ - Type: fields[1], - Name: fields[2], - Files: []string{file}, - identifyingConst: fields[3], - Func: fields[4], - Access: fields[5], - } - if iface.Type == "SYSCALL" { - for _, name := range ctx.syscallNameMap[iface.Name] { - iface.Name = name - iface.identifyingConst = "__NR_" + name - ctx.mergeInterface(iface) - } - } else { - ctx.mergeInterface(iface) - } - default: - ctx.nodes = append(ctx.nodes, node) - } - default: - ctx.nodes = append(ctx.nodes, node) - } - } -} - -// Replace these includes in the tool output. -var includeReplaces = map[string]string{ - // Arches may use some includes from asm-generic and some from arch/arm. - // If the arch used for extract used asm-generic for a header, - // other arches may need arch/asm version of the header. So switch to - // a more generic file name that should resolve correctly for all arches. - "include/uapi/asm-generic/ioctls.h": "asm/ioctls.h", - "include/uapi/asm-generic/sockios.h": "asm/sockios.h", -} - -func getTypeOrder(a ast.Node) int { - switch a.(type) { - case *ast.Comment: - return 0 - case *ast.Include: - return 1 - case *ast.Define: - return 2 - case *ast.IntFlags: - return 3 - case *ast.Resource: - return 4 - case *ast.TypeDef: - return 5 - case *ast.Call: - return 6 - case *ast.Struct: - return 7 - case *ast.NewLine: - return 8 - default: - panic(fmt.Sprintf("unhandled type %T", a)) - } -} diff --git a/tools/syz-declextract/syz-declextract.cpp b/tools/syz-declextract/syz-declextract.cpp deleted file mode 100644 index c348eb16d..000000000 --- a/tools/syz-declextract/syz-declextract.cpp +++ /dev/null @@ -1,1016 +0,0 @@ -// Copyright 2024 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -//go:build ignore - -#include "clang/AST/APValue.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/Attr.h" -#include "clang/AST/Attrs.inc" -#include "clang/AST/Decl.h" -#include "clang/AST/DeclarationName.h" -#include "clang/AST/Expr.h" -#include "clang/AST/PrettyPrinter.h" -#include "clang/AST/Stmt.h" -#include "clang/AST/Type.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/ASTMatchers/ASTMatchersInternal.h" -#include "clang/Basic/CharInfo.h" -#include "clang/Basic/LLVM.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Basic/TypeTraits.h" -#include "clang/Lex/Lexer.h" -#include "clang/Sema/Ownership.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Tooling.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Frontend/OpenMP/OMP.h.inc" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <cstddef> -#include <cstdint> -#include <filesystem> -#include <optional> -#include <stdio.h> -#include <string> -#include <string_view> -#include <unordered_map> -#include <vector> - -using namespace clang; -using namespace clang::ast_matchers; - -const char *const AccessUnknown = "-"; -const char *const AccessUser = "user"; -const char *const AccessNsAdmin = "ns_admin"; -const char *const AccessAdmin = "admin"; - -struct Param { - std::string type; - std::string name; -}; - -struct NetlinkOps { - std::string cmd; - std::string func; - const char *access; - std::string policy; -}; - -struct NetlinkType { - RecordDecl *decl; - int64_t len; -}; - -struct StructMember { - std::string type; - std::string name; - unsigned int countedBy; -}; - -void emitInterface(const char *type, std::string_view name, std::string_view identifying_const, - std::string_view entry_func = "", const char *access = AccessUnknown) { - if (entry_func.empty()) - entry_func = "-"; - printf("\n#INTERFACE: %s %s %s %s %s\n\n", type, std::string(name).c_str(), std::string(identifying_const).c_str(), - std::string(entry_func).c_str(), access); -} - -std::string toIdentifier(std::string name) { - if (name == "resource" || name == "include" || name == "define" || name == "incdir" || name == "syscall" || - name == "parent") { - return "_" + name; - } - std::replace(name.begin(), name.end(), '.', '_'); - std::replace(name.begin(), name.end(), ' ', '_'); - std::replace(name.begin(), name.end(), '-', '_'); - return name; -} - -struct SyzRecordDecl { - std::string name; - std::vector<StructMember> members; - std::string attr; - bool isUnion; - bool isVarlen; - bool operator==(const SyzRecordDecl &decl) { return name == decl.name; } - bool operator<(const SyzRecordDecl &decl) { return name < decl.name; } - void print() const { - if (name.empty()) { - return; - } - const char openBracket = isUnion ? '[' : '{'; - const char closeBracket = isUnion ? ']' : '}'; - printf("%s %c\n", name.c_str(), openBracket); - for (const auto &member : members) { - printf("\t%s %s\n", toIdentifier(member.name).c_str(), member.type.c_str()); - } - putchar(closeBracket); - if (isUnion && isVarlen) { - printf("[%s]", "varlen"); - } else if (!isUnion && !attr.empty()) { - printf("[%s]", attr.c_str()); - } - puts(""); - } -}; - -std::string getDeclFilename(const SourceManager *SM, const Decl *decl) { - return toIdentifier( - std::filesystem::path(SM->getFilename(decl->getCanonicalDecl()->getSourceRange().getBegin()).str()) - .filename() - .stem() - .string()); -} - -// If expression refers to some identifier, returns the identifier name. -// Otherwise returns an empty string. -// For example, if the expression is `function_name`, returns "function_name" string. -// If SM is passed, then it also appends per-file suffix. -std::string getDeclName(ASTContext &context, const clang::Expr *expr, const SourceManager *SM = nullptr) { - if (!expr) { - return ""; - } - // The expression can be complex and include casts and e.g. InitListExpr, - // to remove all of these we match the first/any DeclRefExpr. - struct Matcher : MatchFinder::MatchCallback { - const DeclRefExpr *decl = nullptr; - void run(const MatchFinder::MatchResult &Result) override { decl = Result.Nodes.getNodeAs<DeclRefExpr>("decl"); } - }; - MatchFinder finder; - Matcher matcher; - finder.addMatcher(stmt(forEachDescendant(declRefExpr().bind("decl"))), &matcher); - finder.match(*expr, context); - if (!matcher.decl) { - return ""; - } - std::string name = matcher.decl->getDecl()->getNameAsString(); - if (SM) { - name += "$auto_" + getDeclFilename(SM, matcher.decl->getDecl()); - } - return name; -} - -bool endsWith(const std::string_view &str, const std::string_view end) { - size_t substrBegin = str.rfind(end); - return substrBegin != std::string::npos && str.substr(substrBegin) == end; -} - -bool beginsWith(const std::string_view &str, const std::string_view begin) { - size_t substrBegin = str.find(begin); - return substrBegin != std::string::npos && str.substr(0, begin.size()) == begin; -} - -bool contains(const std::string_view &str, const std::string_view sub) { return str.find(sub) != std::string::npos; } - -std::string makeArray(const std::string &type, const size_t min = 0, const size_t max = -1) { - if (max != size_t(-1)) { - return "array[" + type + ", " + std::to_string(min) + ":" + std::to_string(max) + "]"; - } - if (min == 1) { - return type; - } - if (min) { - return "array[" + type + ", " + std::to_string(min) + "]"; - } - return "array[" + type + "]"; -} - -std::string makePtr(const std::string &dir, const std::string &type, bool isOpt = false) { - std::string ptr = "ptr[" + dir + ", " + type; - if (isOpt) { - return ptr + ", opt]"; - } - return ptr + "]"; -} - -std::string makeConst(bool isSyscallArg, const std::string &type, const std::string &val) { - if (isSyscallArg) { - return "const[" + val + "]"; - } - return "const[" + val + ", " + type + "]"; -} - -std::string makeFlags(bool isSyscallArg, const std::string &type, const std::string &flags) { - if (isSyscallArg) { - return "flags[" + flags + "]"; - } - return "flags[" + flags + ", " + type + "]"; -} - -std::string int8Subtype(const std::string &name, const bool isSyscallParam) { return "int8"; } - -std::string int16Subtype(const std::string &name, const bool isSyscallParam) { - if (contains(name, "port")) { - return "sock_port"; - } - return "int16"; -} - -std::string int32Subtype(const std::string &name, const bool isSyscallParam) { - if (contains(name, "ipv4")) { - return "ipv4_addr"; - } - if (endsWith(name, "_pid") || endsWith(name, "_tid") || endsWith(name, "_pgid") || endsWith(name, "_tgid") || - name == "pid" || name == "tid" || name == "pgid" || name == "tgid") { - return "pid"; - } - if (endsWith(name, "dfd") && !endsWith(name, "oldfd") && !endsWith(name, "pidfd")) { - return "fd_dir"; - } - if (endsWith(name, "ns_fd")) { - return "fd_namespace"; - } - if (endsWith(name, "_uid") || name == "uid" || name == "user" || name == "ruid" || name == "euid" || name == "suid") { - return "uid"; - } - if (endsWith(name, "_gid") || name == "gid" || name == "group" || name == "rgid" || name == "egid" || - name == "sgid") { - return "gid"; - } - if (endsWith(name, "fd") || beginsWith(name, "fd_") || contains(name, "fildes") || name == "fdin" || - name == "fdout") { - return "fd"; - } - if (contains(name, "ifindex") || contains(name, "dev_index")) { - return "ifindex"; - } - return "int32"; -} - -std::string int64Subtype(const std::string &name, const bool isSyscallParam) { return "int64"; } - -std::string intptrSubtype(const std::string &name, const bool isSyscallParam) { - if (name == "sigsetsize") { - return makeConst(isSyscallParam, "intptr", "8"); - } - return "intptr"; -} - -std::string stringSubtype(const std::string &name, const char *defaultName = "string") { - if (contains(name, "ifname") || endsWith(name, "dev_name")) { - return "devname"; - } - if (contains(name, "filename") || contains(name, "pathname") || contains(name, "dir_name") || name == "oldname" || - name == "newname" || name == "path") { - return "filename"; - } - return defaultName; -} - -enum IntType { - INVALID_INT = 0, - INT_8 = 1, - INT_16 = 2, - INT_32 = 4, - INT_64 = 8, - INT_PTR, -}; - -IntType getIntType(const std::string &ctype, const bool isSyscallParam) { - // TODO: Handle arm32 passing 64bit arguments - if (!isSyscallParam && (contains(ctype, "long long") || contains(ctype, "64"))) { - return INT_64; - } - if (contains(ctype, "16") || contains(ctype, "short")) { - return INT_16; - } - if (contains(ctype, "8") || contains(ctype, "char") || ctype == "_Bool") { - return INT_8; - } - if (contains(ctype, "32") || contains(ctype, "int")) { - return INT_32; - } - if (contains(ctype, "long")) { - return INT_PTR; - } - fprintf(stderr, "Unhandled int length for type: %s\n", ctype.c_str()); - exit(1); -} - -const std::string intNSubtype(const std::string &name, const IntType len, const bool isSyscallParam) { - switch (len) { - case INT_8: - return int8Subtype(name, isSyscallParam); - case INT_16: - return int16Subtype(name, isSyscallParam); - case INT_32: - return int32Subtype(name, isSyscallParam); - case INT_64: - return int64Subtype(name, isSyscallParam); - case INT_PTR: - return intptrSubtype(name, isSyscallParam); - default: - fprintf(stderr, "invalid int type: %d\n", static_cast<int>(len)); - exit(1); - } -} - -bool isIntN(const std::string &type) { - return (!type.compare(0, 3, "int") && std::all_of(type.begin() + 3, type.end(), ::isDigit)) || (type == "intptr"); -} - -const std::string intSubtype(const std::string &name, const IntType len, const bool isSyscallParam = false) { - if (len == INVALID_INT) { - fprintf(stderr, "Invalid int type\n"); - exit(1); - } - - const std::string subType = intNSubtype(name, len, isSyscallParam); - if (!isIntN(subType)) { - return subType; - } - if (endsWith(name, "enabled") || endsWith(name, "enable")) { - // Replace "int" with "bool". - return "bool" + subType.substr(3); - } - return subType; -} - -const std::string getSyzType(const std::string &ctype, std::string name, const bool isSyscallParam, - const int bitFieldWidth = 0) { - std::transform(name.begin(), name.end(), name.begin(), ::tolower); - IntType len = getIntType(ctype, isSyscallParam); - const int byteLen = len * 8; - if (INT_8 <= len && len <= INT_64 && contains(ctype, "be")) { - return "int" + std::to_string(byteLen) + "be"; - } - - std::string type; - const bool isBitField = bitFieldWidth; - if (isBitField) { - type = "int" + std::to_string(byteLen); - if (byteLen != bitFieldWidth) { - type += ":" + std::to_string(bitFieldWidth); - } - } else { - type = intSubtype(name, len, isSyscallParam); - } - - if (isBitField || isIntN(type)) { - if (name.empty() || contains(name, "pad") || contains(name, "unused") || contains(name, "_reserved")) { - return makeConst(isSyscallParam, type, "0"); - } - } - - return type; -} - -class RecordExtractor { -private: - const SourceManager *const SM; - std::vector<std::string> includes; - std::vector<std::string> flags; - std::unordered_map<std::string, SyzRecordDecl> extractedRecords; - const std::string emptyStructType = "empty struct"; - const std::string autoTodo = "auto_todo"; - - unsigned int getCountedBy(const FieldDecl *const &field) { - return field->getType()->isCountAttributedType() - ? llvm::dyn_cast<FieldDecl>( - field->getType()->getAs<CountAttributedType>()->getCountExpr()->getReferencedDeclOfCallee()) - ->getFieldIndex() - : UINT_MAX; - } - - bool isFieldVarlen(const QualType &fieldType) { - return fieldType->isIncompleteArrayType() || - (fieldType->isConstantArrayType() && llvm::dyn_cast<ConstantArrayType>(fieldType)->getSize().isZero()); - } - - std::string getStructAttr(const RecordDecl *const recordDecl, ASTContext *context) { - if (recordDecl->isStruct() && recordDecl->hasAttrs()) { - for (const auto &item : recordDecl->getAttrs()) { - if (item->getKind() == clang::attr::Aligned) { - return "align[" + std::to_string(llvm::dyn_cast<AlignedAttr>(item)->getAlignment(*context) / 8) + "]"; - } else if (item->getKind() == clang::attr::Packed) { - return "packed"; - } - } - } - return ""; - } - -public: - RecordExtractor(const SourceManager *const SM) : SM(SM){}; - std::string getFieldType(const QualType &fieldType, ASTContext *context, const std::string &fieldName, - const std::string &parent = "", bool isSyscallParam = false, - const std::string &fieldTypeName = "") { - const auto &field = fieldType.IgnoreParens().getUnqualifiedType().getDesugaredType(*context); - switch (fieldType.IgnoreParens()->getTypeClass()) { - case clang::Type::Record: { - std::string backupName; - if (!parent.empty()) { - backupName = parent + "_" + fieldName; - } else if (!fieldTypeName.empty()) { - backupName = fieldTypeName; - } else { - backupName = fieldName; - } - return extractRecord(field->getAsRecordDecl(), context, backupName); - } - case clang::Type::IncompleteArray: // Defined as type[] - return makeArray(getFieldType(llvm::dyn_cast<IncompleteArrayType>(field)->getElementType(), context, fieldName)); - case clang::Type::ConstantArray: { - const auto &array = llvm::dyn_cast<ConstantArrayType>(field); - return makeArray(getFieldType(array->getElementType(), context, fieldName), array->getSize().getZExtValue()); - } - case clang::Type::Pointer: { - const auto &pointerType = llvm::dyn_cast<PointerType>(field); - const auto &pointeeType = pointerType->getPointeeType(); - std::string fieldType; - if (pointeeType->isAnyCharacterType()) { - fieldType = stringSubtype(fieldName); - } else if (pointeeType->isVoidType()) { - fieldType = makeArray(autoTodo); - } else { - fieldType = getFieldType(pointeeType, context, fieldName); - } - const auto &ptrDir = pointeeType.isConstQualified() ? "in" : "inout"; // TODO: Infer direction of non-const. - return makePtr(ptrDir, fieldType, - parent + "$auto_record" == fieldType); // Checks if the direct parent is the same as the node. - } - case clang::Type::Builtin: - return getSyzType(field.getAsString(), fieldName, isSyscallParam); - case clang::Type::CountAttributed: // Has the attribute counted_by. Handled by getCountedBy - case clang::Type::BTFTagAttributed: // Currently Unused - case clang::Type::Typedef: - return getFieldType(field, context, fieldName, parent, isSyscallParam, field.getAsString()); - case clang::Type::Elaborated: - return getFieldType(llvm::dyn_cast<ElaboratedType>(fieldType)->desugar(), context, fieldName, parent, - isSyscallParam); // NOTE: The fieldType contains information we need, don't use field instead. - case clang::Type::Enum: { - const auto &enumDecl = llvm::dyn_cast<EnumType>(field)->getDecl(); - auto name = "auto_" + enumDecl->getNameAsString(); - flags.push_back(name); - includes.push_back(std::filesystem::relative(SM->getFilename(enumDecl->getSourceRange().getBegin()).str())); - const char *sep = " = "; - for (const auto &enumerator : enumDecl->enumerators()) { - flags.back() += sep + enumerator->getNameAsString(); - sep = ", "; - } - std::string baseType = "int" + std::to_string(context->getTypeInfo(field).Width); - return makeFlags(isSyscallParam, baseType, name); - } - case clang::Type::FunctionProto: - return makePtr("in", autoTodo); - default: - field->dump(); - fprintf(stderr, "Unhandled field type %s\n", field->getTypeClassName()); - exit(1); - } - } - - std::string extractRecord(const RecordDecl *recordDecl, ASTContext *context, const std::string &backupName) { - recordDecl = recordDecl->getDefinition(); - if (!recordDecl) { // When the definition is in a different translation unit. - return autoTodo; - } - const auto &name = (recordDecl->getNameAsString().empty() ? backupName : recordDecl->getNameAsString()); - const auto &recordName = name + "$auto_record"; - if (extractedRecords.find(name) != extractedRecords.end()) { // Don't extract the same record twice. - return recordName; - } - extractedRecords[name]; - bool isVarlen = false; - std::vector<StructMember> members; - for (const auto &field : recordDecl->fields()) { - std::string fieldName; - if (field->getName().empty()) { - fieldName = name + "_" + std::to_string(field->getFieldIndex()); - } else if (field->isAnonymousStructOrUnion()) { - fieldName = name; - } else { - fieldName = field->getNameAsString(); - } - const std::string &parentName = field->isAnonymousStructOrUnion() ? "" : name; - const std::string &fieldType = - field->isBitField() ? getSyzType(field->getType().getAsString(), field->isUnnamedBitField() ? "" : fieldName, - false, field->getBitWidthValue(*context)) - : getFieldType(field->getType(), context, fieldName, parentName); - if (fieldType == emptyStructType) { - continue; - } - isVarlen |= isFieldVarlen(field->getType()) || - (extractedRecords.find(fieldName) != extractedRecords.end() && - !extractedRecords[fieldName].name.empty() && extractedRecords[fieldName].isVarlen); - members.push_back({fieldType, fieldName, getCountedBy(field)}); - } - if (members.empty()) { // Empty structs are not allowed in Syzlang. - return emptyStructType; - } - extractedRecords[name] = {recordName, std::move(members), getStructAttr(recordDecl, context), recordDecl->isUnion(), - isVarlen}; - return recordName; - } - - void print() { - puts("type auto_todo intptr"); - for (const auto &inc : includes) { - printf("include<%s>\n", inc.c_str()); - } - for (const auto &flag : flags) { - puts(flag.c_str()); - } - for (auto &[_, decl] : extractedRecords) { - for (auto &member : decl.members) { - if (member.countedBy != UINT_MAX) { - auto &type = decl.members[member.countedBy].type; - type = "len[" + member.name + ", " + type + "]"; - } - } - } - for (const auto &[_, decl] : extractedRecords) { - decl.print(); - } - } -}; - -struct EnumData { - std::string name; - unsigned long long value; - std::string file; -}; - -// Extracts enum info from array variable designated initialization. -// For example, for the following code: -// -// enum Foo { -// FooA = 11, -// FooB = 42, -// }; -// -// struct Bar bars[] = { -// [FooA] = {...}, -// [FooB] = {...}, -// }; -// -// it returns the following map: -// 11: {"FooA", 11, file.c}, -// 42: {"FooB", 42, file.c}, -std::map<int, EnumData> extractDesignatedInitConsts(ASTContext &context, const VarDecl &arrayDecl) { - struct DesignatedInitMatcher : MatchFinder::MatchCallback { - std::vector<EnumData> Inits; - - DesignatedInitMatcher(MatchFinder &Finder) { - Finder.addMatcher( - decl(forEachDescendant(designatedInitExpr(optionally(has(constantExpr(has(declRefExpr())).bind("init")))))), - this); - } - - void run(const MatchFinder::MatchResult &Result) override { - const auto *init = Result.Nodes.getNodeAs<ConstantExpr>("init"); - if (!init) { - return; - } - const auto &name = init->getEnumConstantDecl()->getNameAsString(); - const auto value = *init->getAPValueResult().getInt().getRawData(); - const auto &path = std::filesystem::relative( - Result.SourceManager->getFilename(init->getEnumConstantDecl()->getSourceRange().getBegin()).str()); - Inits.push_back({std::move(name), value, std::move(path)}); - } - }; - - MatchFinder finder; - DesignatedInitMatcher matcher(finder); - finder.match(arrayDecl, context); - std::map<int, EnumData> ordered; - for (auto &init : matcher.Inits) { - ordered[init.value] = init; - } - return ordered; -} - -class SyscallMatcher : public MatchFinder::MatchCallback { -public: - SyscallMatcher(MatchFinder &Finder) { - Finder.addMatcher(functionDecl(isExpandedFromMacro("SYSCALL_DEFINEx"), matchesName("__do_sys_.*")).bind("syscall"), - this); - } - -private: - void run(const MatchFinder::MatchResult &Result) override { - ASTContext *context = Result.Context; - const auto *syscall = Result.Nodes.getNodeAs<FunctionDecl>("syscall"); - RecordExtractor recordExtractor(Result.SourceManager); - - const char *sep = ""; - const auto func = syscall->getNameAsString(); - const auto &name = func.substr(9); // Remove "__do_sys_" prefix. - emitInterface("SYSCALL", name, "__NR_" + name, func); - printf("%s(", name.c_str()); - for (const auto ¶m : syscall->parameters()) { - const auto &type = recordExtractor.getFieldType(param->getType(), context, param->getNameAsString(), "", true); - const auto &name = param->getNameAsString(); - printf("%s%s %s", sep, toIdentifier(name).c_str(), type.c_str()); - sep = ", "; - } - printf(") (automatic)\n"); - recordExtractor.print(); - } -}; - -class NetlinkPolicyMatcher : public MatchFinder::MatchCallback { -public: - NetlinkPolicyMatcher(MatchFinder &Finder) { - Finder.addMatcher( - translationUnitDecl( - hasDescendant(enumDecl(has(enumConstantDecl(hasName("__NLA_TYPE_MAX")))).bind("NLA_ENUM")), - forEachDescendant( - varDecl(hasType(constantArrayType(hasElementType(hasDeclaration( - recordDecl(hasName("nla_policy")).bind("nla_policy")))) - .bind("nla_policy_array")), - isDefinition()) - .bind("netlink"))), - this); - Finder.addMatcher(varDecl(hasType(recordDecl(hasName("genl_family")).bind("genl_family")), - has(initListExpr().bind("genl_family_init"))) - .bind("genl_family_decl"), - this); - } - -private: - void run(const MatchFinder::MatchResult &Result) override { - nlaEnum(Result); // NOTE: Must be executed first, as it generates maps that are used in the following methods. - netlink(Result); - genlFamily(Result); - } - - // u8ToNlaEnum stores the Enum values to string conversions. This is later used to transfer types from an unnamed - // integer to a readable form. E.g. 1 -> NLA_U8 - // See: https://elixir.bootlin.com/linux/v6.10/source/include/net/netlink.h#L172 - std::unordered_map<uint8_t, std::string> u8ToNlaEnum; - void nlaEnum(const MatchFinder::MatchResult &Result) { - const auto &num = Result.Nodes.getNodeAs<EnumDecl>("NLA_ENUM"); - if (!num || !u8ToNlaEnum.empty()) { // Don't evaluate the Enum twice - return; - } - for (const auto &enumerator : num->enumerators()) { - const auto &name = enumerator->getNameAsString(); - const auto val = uint8_t(enumerator->getValue().getZExtValue()); - u8ToNlaEnum[val] = name.substr(4); // Remove NLA_ prefix - } - } - - const std::string nlaArraySubtype(const std::string &name, const std::string &type, const size_t len, - const std::string &typeOfLen) { - if (!typeOfLen.empty()) { - return len == 0 ? typeOfLen : makeArray(typeOfLen, 0, len); - } - switch (len) { - case 0: - return makeArray("int8"); - case 1: - case 2: - case 4: - case 8: - return intSubtype(name, IntType(len)); - default: - if (contains(name, "IPV6")) { - return "ipv6_addr"; - } - if (type == "BINARY") { - return makeArray("int8", 0, len); - } - return makeArray("int8", len); - } - } - - const std::string nlaToSyz(std::string name, const std::string &type, const size_t len, - const std::string &typeOfLen) { - std::transform(name.begin(), name.end(), name.begin(), ::tolower); - // TODO:Gather information from other defined fields to better specify a type. - // Loosely based on https://elixir.bootlin.com/linux/v6.10/source/lib/nlattr.c - if (type == "U8" || type == "S8") { - return intSubtype(name, INT_8); - } - if (type == "U16" || type == "S16") { - return intSubtype(name, INT_16); - } - if (type == "U32" || type == "S32") { - return intSubtype(name, INT_32); - } - if (type == "U64" || type == "S64" || type == "SINT" || type == "UINT" || type == "MSECS") { - return intSubtype(name, INT_64); - } - if (type == "BINARY" || type == "UNSPEC") { - return nlaArraySubtype(name, type, len, typeOfLen); - } - if (type == "BE16") { - return "int16be"; - } - if (type == "BE32") { - return "int32be"; - } - if (type == "FLAG") { - return "void"; - } - if (type == "STRING") { - return stringSubtype(name, "stringnoz"); - } - if (type == "NUL_STRING") { - return stringSubtype(name); - } - if (type == "BITFIELD32") { // TODO:Extract valued values from NLA_POLICY_BITFIELD32 macro. - return "int32"; - } - if (type == "NESTED") { - return makeArray(typeOfLen.empty() ? "nl_generic_attr" : typeOfLen); - } - if (type == "NESTED_ARRAY") { - return "array[nlnest[0, array[" + (typeOfLen.empty() ? "nl_generic_attr" : typeOfLen) + "]]]"; - } - fprintf(stderr, "Unsupported netlink type %s\n", type.c_str()); - exit(1); - } - - RecordDecl *getStructFromSizeof(UnaryExprOrTypeTraitExpr *stmt) { - if (!stmt || stmt->getKind() != clang::UETT_SizeOf) { - return NULL; - } - return stmt->getTypeOfArgument()->getAsRecordDecl(); - } - - NetlinkType getStructAndLenFromBinary(BinaryOperator *stmt, ASTContext *context) { - const auto &lhs = stmt->getLHS(); - const auto &rhs = stmt->getRHS(); - - // NOTE: Usually happens in case of NESTED_POLICY which is not handled currently. - // TODO: Handle NESTED_POLICY - if (lhs->getStmtClass() == clang::Stmt::BinaryOperatorClass || - rhs->getStmtClass() == clang::Stmt::BinaryOperatorClass) { - return {NULL, 0}; - } - auto decl = getStructFromSizeof(llvm::dyn_cast<UnaryExprOrTypeTraitExpr>(lhs)); - Expr::EvalResult len; - if (!decl) { - decl = getStructFromSizeof(llvm::dyn_cast<UnaryExprOrTypeTraitExpr>(rhs)); - lhs->EvaluateAsConstantExpr(len, *context); - } else { - rhs->EvaluateAsConstantExpr(len, *context); - } - return NetlinkType{decl, len.Val.getInt().getExtValue()}; - } - - // Returns the struct type from .len field. - // e.g. if .len = sizeof(struct x * LEN), returns the declaration of struct x and LEN - NetlinkType getNetlinkStruct(clang::Expr *stmt, ASTContext *context) { - stmt = stmt->IgnoreParens(); - Expr::EvalResult len; - stmt->EvaluateAsConstantExpr(len, *context); - switch (stmt->getStmtClass()) { - case clang::Stmt::ImplicitValueInitExprClass: - return NetlinkType{NULL, 0}; - case clang::Stmt::BinaryOperatorClass: - return getStructAndLenFromBinary(llvm::dyn_cast<BinaryOperator>(stmt), context); - case clang::Stmt::UnaryExprOrTypeTraitExprClass: - return NetlinkType{getStructFromSizeof(llvm::dyn_cast<UnaryExprOrTypeTraitExpr>(stmt)), 0}; - case clang::Stmt::UnaryOperatorClass: - case clang::Stmt::DeclRefExprClass: - case clang::Stmt::CStyleCastExprClass: - case clang::Stmt::IntegerLiteralClass: - return NetlinkType{NULL, len.Val.getInt().getExtValue()}; - default: - fprintf(stderr, "Unhandled .len case %s\n", stmt->getStmtClassName()); - exit(1); - } - } - - void netlink(const MatchFinder::MatchResult &Result) { - ASTContext *context = Result.Context; - const auto *netlinkDecl = Result.Nodes.getNodeAs<VarDecl>("netlink"); - if (!netlinkDecl) { - return; - } - - const auto *init = netlinkDecl->getInit(); - if (!init) { - return; - } - std::vector<std::vector<Expr *>> fields; - for (const auto &policy : *llvm::dyn_cast<InitListExpr>(init)) { - fields.push_back({}); - for (const auto &member : policy->children()) { - fields.back().push_back(llvm::dyn_cast<Expr>(member)); - } - } - - auto enumData = extractDesignatedInitConsts(*context, *netlinkDecl); - if (enumData.empty()) { - // We need to emit at least some type for it. - // Ideally it should be void, but typedef to void currently does not work. - printf("type %s auto_todo\n", getPolicyName(Result, netlinkDecl).c_str()); - return; - } - for (const auto &[_, item] : enumData) { - if (!endsWith(item.file, ".h")) { - continue; - } - printf("include <%s>\n", item.file.c_str()); - } - - RecordExtractor recordExtractor(Result.SourceManager); - printf("%s [\n", getPolicyName(Result, netlinkDecl).c_str()); - for (size_t i = 0; i < fields.size(); ++i) { - // The array could have an implicitly initialized policy (i.e. empty) or an unnamed attribute - if (fields[i].empty() || enumData[i].name.empty()) { - continue; - } - - Expr::EvalResult evalResult; - fields[i][0]->EvaluateAsConstantExpr(evalResult, *context); // This contains the NLA Enum type - const auto &nlaEnum = u8ToNlaEnum[evalResult.Val.getInt().getZExtValue()]; - if (nlaEnum == "REJECT") { - continue; - } - auto [structDecl, len] = getNetlinkStruct(fields[i][2]->IgnoreCasts(), context); - std::string netlinkStruct; - if (structDecl) { - netlinkStruct = recordExtractor.extractRecord(structDecl, context, enumData[i].name); - } else { - fields[i][2]->EvaluateAsConstantExpr(evalResult, *context); - len = evalResult.Val.getInt().getExtValue(); - } - const char *nlattr = "nlattr"; - if (nlaEnum == "NESTED" || nlaEnum == "NESTED_ARRAY") { - nlattr = "nlnest"; - netlinkStruct = getDeclName(*context, fields[i][3], Result.SourceManager); - } - printf("\t%s %s[%s, %s]\n", enumData[i].name.c_str(), nlattr, enumData[i].name.c_str(), - nlaToSyz(enumData[i].name, nlaEnum, len, netlinkStruct).c_str()); - } - puts("] [varlen]"); - recordExtractor.print(); - } - - std::map<std::string, unsigned> genlFamilyMember; - - std::string getPolicyName(const MatchFinder::MatchResult &Result, const ValueDecl *decl) { - if (!decl) { - return ""; - } - // Filename is added to address ambiguity when multiple policies - // are named the same but have different definitions. - return decl->getNameAsString() + "$auto_" + getDeclFilename(Result.SourceManager, decl); - } - - std::vector<NetlinkOps> getOps(const MatchFinder::MatchResult &Result, const std::string &opsName, - const InitListExpr *init) { - ASTContext *context = Result.Context; - const auto n_ops = init->getInit(genlFamilyMember["n_" + opsName])->getIntegerConstantExpr(*context); - const auto &opsRef = init->getInit(genlFamilyMember[opsName])->getAsBuiltinConstantDeclRef(*context); - if (!n_ops || !opsRef) { - return {}; - } - const auto *opsDecl = llvm::dyn_cast<VarDecl>(opsRef); - if (!opsDecl->getInit()) { - // NOTE: This usually happens when the ops is defined as an extern variable - // TODO: Extract extern variables - return {}; - } - const auto *opsInit = llvm::dyn_cast<InitListExpr>(opsDecl->getInit()); - std::map<std::string, unsigned> opsMember; - for (const auto &field : opsInit->getInit(0)->getType()->getAsRecordDecl()->fields()) { - opsMember[field->getNameAsString()] = field->getFieldIndex(); - } - std::vector<NetlinkOps> ops; - for (int i = 0; i < n_ops; ++i) { - const auto &init = llvm::dyn_cast<InitListExpr>(opsInit->getInit(i)); - const auto &cmdInit = init->getInit(opsMember["cmd"])->getEnumConstantDecl(); - if (!cmdInit) { - continue; - } - const auto &cmd = cmdInit->getNameAsString(); - const ValueDecl *policyDecl = nullptr; - if (opsName != "small_ops") { - policyDecl = init->getInit(opsMember["policy"])->getAsBuiltinConstantDeclRef(*context); - } - std::string func = getDeclName(*context, init->getInit(opsMember["doit"])); - if (func.empty()) - func = getDeclName(*context, init->getInit(opsMember["dumpit"])); - const Expr *flagsDecl = init->getInit(opsMember["flags"]); - Expr::EvalResult flags; - flagsDecl->EvaluateAsConstantExpr(flags, *context); - auto flagsVal = flags.Val.getInt().getExtValue(); - const char *access = AccessUser; - constexpr int GENL_ADMIN_PERM = 0x01; - constexpr int GENL_UNS_ADMIN_PERM = 0x10; - if (flagsVal & GENL_ADMIN_PERM) - access = AccessAdmin; - else if (flagsVal & GENL_UNS_ADMIN_PERM) - access = AccessNsAdmin; - ops.push_back({std::move(cmd), func, access, getPolicyName(Result, policyDecl)}); - } - return ops; - } - - void genlFamily(const MatchFinder::MatchResult &Result) { - ASTContext *context = Result.Context; - const auto *genlFamilyInit = Result.Nodes.getNodeAs<InitListExpr>("genl_family_init"); - if (!genlFamilyInit) { - return; - } - if (genlFamilyMember.empty()) { - const auto *genlFamily = Result.Nodes.getNodeAs<RecordDecl>("genl_family"); - for (const auto &field : genlFamily->fields()) { - genlFamilyMember[field->getNameAsString()] = field->getFieldIndex(); - } - } - - const auto &globalPolicyName = - genlFamilyInit->getInit(genlFamilyMember["policy"])->getAsBuiltinConstantDeclRef(*context); - - std::string familyPolicyName; - if (globalPolicyName) { - familyPolicyName = getPolicyName(Result, globalPolicyName); - } - - std::string familyName = - llvm::dyn_cast<StringLiteral>(genlFamilyInit->getInit(genlFamilyMember["name"]))->getString().str(); - std::string identifierName = toIdentifier(familyName); - std::string msghdr = "msghdr_" + identifierName + "_auto"; - bool printedCmds = false; - for (const auto &opsType : {"ops", "small_ops", "split_ops"}) { - for (auto &ops : getOps(Result, opsType, genlFamilyInit)) { - const char *policyName; - if (!ops.policy.empty()) { - policyName = ops.policy.c_str(); - } else if (globalPolicyName) { - policyName = familyPolicyName.c_str(); - } else { - continue; - } - emitInterface("NETLINK", ops.cmd, ops.cmd, ops.func, ops.access); - printf("sendmsg$auto_%s(fd sock_nl_generic, msg ptr[in, %s[%s, %s]], f flags[send_flags]) (automatic)\n", - ops.cmd.c_str(), msghdr.c_str(), ops.cmd.c_str(), policyName); - printedCmds = true; - } - } - if (!printedCmds) { // Do not print resources and types if they're not used in any cmds - return; - } - std::string resourceName = "genl_" + identifierName + "_family_id_auto"; - printf("resource %s[int16]\n", resourceName.c_str()); - printf("type %s[CMD, POLICY] msghdr_netlink[netlink_msg_t[%s, genlmsghdr_t[CMD], POLICY]]\n", msghdr.c_str(), - resourceName.c_str()); - printf("syz_genetlink_get_family_id$auto_%s(name ptr[in, string[\"%s\"]], fd sock_nl_generic) %s (automatic)\n", - identifierName.c_str(), familyName.c_str(), resourceName.c_str()); - } -}; - -class IouringMatcher : public MatchFinder::MatchCallback { -public: - IouringMatcher(MatchFinder &Finder) { - Finder.addMatcher( - translationUnitDecl(forEachDescendant( - varDecl(hasType(constantArrayType(hasElementType(hasDeclaration(recordDecl(hasName("io_issue_def")))))), - isDefinition()) - .bind("io_issue_defs"))), - this); - } - -private: - void run(const MatchFinder::MatchResult &Result) override { - ASTContext *context = Result.Context; - const auto *ioIssueDefs = Result.Nodes.getNodeAs<VarDecl>("io_issue_defs"); - if (!ioIssueDefs) { - return; - } - auto elements = extractDesignatedInitConsts(*Result.Context, *ioIssueDefs); - const auto *initList = llvm::dyn_cast<InitListExpr>(ioIssueDefs->getInit()); - std::map<std::string, unsigned> fields; - for (const auto &field : initList->getInit(0)->getType()->getAsRecordDecl()->fields()) { - fields[field->getNameAsString()] = field->getFieldIndex(); - } - for (const auto &[i, op] : elements) { - const auto &init = llvm::dyn_cast<InitListExpr>(initList->getInit(i)); - std::string prep = getDeclName(*context, init->getInit(fields["prep"])); - if (prep == "io_eopnotsupp_prep") { - continue; - } - std::string issue = getDeclName(*context, init->getInit(fields["issue"])); - emitInterface("IOURING", op.name, op.name, issue, AccessUser); - } - } -}; - -int main(int argc, const char **argv) { - llvm::cl::OptionCategory SyzDeclExtractOptionCategory("syz-declextract options"); - auto ExpectedParser = clang::tooling::CommonOptionsParser::create(argc, argv, SyzDeclExtractOptionCategory); - if (!ExpectedParser) { - llvm::errs() << ExpectedParser.takeError(); - return 1; - } - - MatchFinder Finder; - SyscallMatcher SyscallMatcher(Finder); - NetlinkPolicyMatcher NetlinkPolicyMatcher(Finder); - IouringMatcher IouringMatcher(Finder); - - clang::tooling::CommonOptionsParser &OptionsParser = ExpectedParser.get(); - clang::tooling::ClangTool Tool(OptionsParser.getCompilations(), OptionsParser.getSourcePathList()); - return Tool.run(clang::tooling::newFrontendActionFactory(&Finder).get()); -} diff --git a/tools/syz-declextract/testdata/README.md b/tools/syz-declextract/testdata/README.md new file mode 100644 index 000000000..494ddac6a --- /dev/null +++ b/tools/syz-declextract/testdata/README.md @@ -0,0 +1,28 @@ +This dir contains sources of a fake kernel that resembles Linux for testing of the `syz-declextract` tool. + +For each `*.c` file there 3 golden files: + - `*.c.json` with the expected output of the clang tool + - `*.c.txt` with the expected syzlang descriptions + - `*.c.info` with the expected kernel interface list + +Testing is done by `tools/syz-declextract` tests. + +`TestClangTool` invokes the clang tool and verifies `*.c.json` contents. +The test requires the clang tool binary specified in the `-bin`, otherwise it skips testing. +You also want to run it with `-count=1` flag since the Go tool does not detect changes in the tool binary, +and will cache and reuse test results: +``` +go test -count=1 ./tools/syz-declextract -bin=llvm/build/bin/syz-declextract +``` + +`TestDeclextract` verifies `*.c.txt` and `*.c.info` using `*.c.json` files as inputs +(it does not involve the clang tool and runs always). + +All tests also support `-update` flag, which updates the golden files. +Generally you don't need to update them manually. + +Since the test covers multiple packages, it's useful to run coverage as follows: +``` +go test -count=1 -coverprofile=/tmp/cover -coverpkg="github.com/google/syzkaller/tools/syz-declextract,github.com/google/syzkaller/pkg/declextract,github.com/google/syzkaller/pkg/clangtool" ./tools/syz-declextract -bin=llvm/build/bin/syz-declextract +go tool cover -html /tmp/cover +``` diff --git a/tools/syz-declextract/testdata/arch/arm/syscalls.tbl b/tools/syz-declextract/testdata/arch/arm/syscalls.tbl new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tools/syz-declextract/testdata/arch/arm/syscalls.tbl diff --git a/tools/syz-declextract/testdata/arch/arm64/syscalls.tbl b/tools/syz-declextract/testdata/arch/arm64/syscalls.tbl new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tools/syz-declextract/testdata/arch/arm64/syscalls.tbl diff --git a/tools/syz-declextract/testdata/arch/mips/syscalls.tbl b/tools/syz-declextract/testdata/arch/mips/syscalls.tbl new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tools/syz-declextract/testdata/arch/mips/syscalls.tbl diff --git a/tools/syz-declextract/testdata/arch/powerpc/syscalls.tbl b/tools/syz-declextract/testdata/arch/powerpc/syscalls.tbl new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tools/syz-declextract/testdata/arch/powerpc/syscalls.tbl diff --git a/tools/syz-declextract/testdata/arch/riscv/syscalls.tbl b/tools/syz-declextract/testdata/arch/riscv/syscalls.tbl new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tools/syz-declextract/testdata/arch/riscv/syscalls.tbl diff --git a/tools/syz-declextract/testdata/arch/s390/syscalls.tbl b/tools/syz-declextract/testdata/arch/s390/syscalls.tbl new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tools/syz-declextract/testdata/arch/s390/syscalls.tbl diff --git a/tools/syz-declextract/testdata/arch/x86/syscalls.tbl b/tools/syz-declextract/testdata/arch/x86/syscalls.tbl new file mode 100644 index 000000000..19309c4ef --- /dev/null +++ b/tools/syz-declextract/testdata/arch/x86/syscalls.tbl @@ -0,0 +1,6 @@ +# Copyright 2024 syzkaller project authors. All rights reserved. +# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +1 64 open sys_open +2 64 chmod sys_chmod +3 64 types_syscall sys_types_syscall diff --git a/tools/syz-declextract/testdata/include/netlink.h b/tools/syz-declextract/testdata/include/netlink.h new file mode 100644 index 000000000..ee8091290 --- /dev/null +++ b/tools/syz-declextract/testdata/include/netlink.h @@ -0,0 +1,86 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#include "types.h" + +enum { + NLA_UNSPEC, + NLA_U8, + NLA_U16, + NLA_U32, + NLA_U64, + NLA_STRING, + NLA_FLAG, + NLA_MSECS, + NLA_NESTED, + NLA_NESTED_ARRAY, + NLA_NUL_STRING, + NLA_BINARY, + NLA_S8, + NLA_S16, + NLA_S32, + NLA_S64, + NLA_BITFIELD32, + NLA_REJECT, + NLA_BE16, + NLA_BE32, + NLA_SINT, + NLA_UINT, + __NLA_TYPE_MAX, +}; + +struct nla_policy { + u8 type; + u8 validation_type; + u16 len; + union { + const u32 bitfield32_valid; + const u32 mask; + const struct nla_policy *nested_policy; + struct { s16 min, max; }; + }; +}; + +#define NLA_POLICY_NESTED(policy) { .type = NLA_NESTED, .nested_policy = policy, .len = sizeof(policy)/sizeof(policy[0]) } + +#define GENL_ADMIN_PERM 0x01 +#define GENL_UNS_ADMIN_PERM 0x10 + +struct genl_ops { + u8 cmd; + u8 flags; + const struct nla_policy* policy; + void (*doit)(void); + void (*dumpit)(void); +}; + +struct genl_split_ops { + u8 cmd; + u8 flags; + const struct nla_policy *policy; + union { + struct { + void (*pre_doit)(void); + void (*doit)(void); + void (*post_doit)(void); + }; + struct { + void (*start)(void); + void (*dumpit)(void); + void (*done)(void); + }; + }; +}; + +struct genl_small_ops {}; + +struct genl_family { + char name[64]; + u8 n_ops; + u8 n_small_ops; + u8 n_split_ops; + const struct nla_policy* policy; + const struct genl_ops* ops; + const struct genl_small_ops* mall_ops; + const struct genl_split_ops* split_ops; +}; diff --git a/tools/syz-declextract/testdata/include/syscall.h b/tools/syz-declextract/testdata/include/syscall.h new file mode 100644 index 000000000..601480cb4 --- /dev/null +++ b/tools/syz-declextract/testdata/include/syscall.h @@ -0,0 +1,7 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#define SYSCALL_DEFINE1(NAME, ...) SYSCALL_DEFINEx(1, NAME, __VA_ARGS__) +#define SYSCALL_DEFINE2(NAME, ...) SYSCALL_DEFINEx(2, NAME, __VA_ARGS__) +#define SYSCALL_DEFINEx(NARGS, NAME, ...) long __do_sys_##NAME(__VA_ARGS__); \ +long __do_sys_##NAME(__VA_ARGS__) diff --git a/tools/syz-declextract/testdata/include/types.h b/tools/syz-declextract/testdata/include/types.h new file mode 100644 index 000000000..5b1d6303c --- /dev/null +++ b/tools/syz-declextract/testdata/include/types.h @@ -0,0 +1,13 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +typedef signed char s8; +typedef short s16; +typedef int s32; +typedef long long s64; +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) diff --git a/tools/syz-declextract/testdata/include/uapi/io_uring.h b/tools/syz-declextract/testdata/include/uapi/io_uring.h new file mode 100644 index 000000000..cdc5a372a --- /dev/null +++ b/tools/syz-declextract/testdata/include/uapi/io_uring.h @@ -0,0 +1,9 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +enum io_uring_op { + IORING_OP_NOP, + IORING_OP_READV, + IORING_OP_WRITEV, + IORING_OP_NOT_SUPPORTED, +}; diff --git a/tools/syz-declextract/testdata/include/uapi/netlink_family.h b/tools/syz-declextract/testdata/include/uapi/netlink_family.h new file mode 100644 index 000000000..ffaf66bf7 --- /dev/null +++ b/tools/syz-declextract/testdata/include/uapi/netlink_family.h @@ -0,0 +1,27 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// Description of some hypothetic netlink family. + +enum netlink_foo_cmds { + NETLINK_FOO_CMD_FOO, + NETLINK_FOO_CMD_BAR, +}; + +enum netlink_foo_attrs { + NETLINK_FOO_ATTR1, + NETLINK_FOO_ATTR2, + NETLINK_FOO_ATTR3 = NETLINK_FOO_ATTR2 + 3, // make them non-dense + NETLINK_FOO_ATTR4, + NETLINK_FOO_ATTR5, + NETLINK_FOO_ATTR6, + NETLINK_FOO_ATTR7, +}; + +struct netlink_foo_struct1 { + int a, b, c; +}; + +typedef struct { + double a, b, c; +} netlink_foo_struct2; diff --git a/tools/syz-declextract/testdata/io_uring.c b/tools/syz-declextract/testdata/io_uring.c new file mode 100644 index 000000000..20f85f0e5 --- /dev/null +++ b/tools/syz-declextract/testdata/io_uring.c @@ -0,0 +1,36 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#include "include/uapi/io_uring.h" + +struct io_issue_def { + void (*prep)(void); + void (*issue)(void); +}; + +void io_eopnotsupp_prep() {} +void io_nop_prep() {} +void io_nop() {} +void io_readv_prep() {} +void io_read() {} +void io_writev_prep() {} +void io_write() {} + +const struct io_issue_def ops[] = { + [IORING_OP_NOP] = { + .prep = io_nop_prep, + .issue = io_nop, + }, + [IORING_OP_READV] = { + .prep = io_readv_prep, + .issue = io_read, + }, + [IORING_OP_WRITEV] = { + .prep = io_writev_prep, + .issue = io_write, + }, + [IORING_OP_NOT_SUPPORTED] = { + .prep = io_eopnotsupp_prep, + .issue = io_write, + }, +}; diff --git a/tools/syz-declextract/testdata/io_uring.c.info b/tools/syz-declextract/testdata/io_uring.c.info new file mode 100644 index 000000000..6757eca74 --- /dev/null +++ b/tools/syz-declextract/testdata/io_uring.c.info @@ -0,0 +1,3 @@ +IOURING IORING_OP_NOP func:io_nop access:user manual_desc:false auto_desc:false file:io_uring.c subsystem:kernel +IOURING IORING_OP_READV func:io_read access:user manual_desc:false auto_desc:false file:io_uring.c subsystem:kernel +IOURING IORING_OP_WRITEV func:io_write access:user manual_desc:false auto_desc:false file:io_uring.c subsystem:kernel diff --git a/tools/syz-declextract/testdata/io_uring.c.json b/tools/syz-declextract/testdata/io_uring.c.json new file mode 100644 index 000000000..927adfe26 --- /dev/null +++ b/tools/syz-declextract/testdata/io_uring.c.json @@ -0,0 +1,22 @@ +{ + "includes": [ + "include/uapi/io_uring.h" + ], + "iouring_ops": [ + { + "name": "IORING_OP_NOP", + "func": "io_nop", + "source_file": "io_uring.c" + }, + { + "name": "IORING_OP_READV", + "func": "io_read", + "source_file": "io_uring.c" + }, + { + "name": "IORING_OP_WRITEV", + "func": "io_write", + "source_file": "io_uring.c" + } + ] +}
\ No newline at end of file diff --git a/tools/syz-declextract/testdata/io_uring.c.txt b/tools/syz-declextract/testdata/io_uring.c.txt new file mode 100644 index 000000000..3ddbbcf40 --- /dev/null +++ b/tools/syz-declextract/testdata/io_uring.c.txt @@ -0,0 +1,10 @@ +# Code generated by syz-declextract. DO NOT EDIT. + +meta automatic + +type auto_todo intptr + +include <vdso/bits.h> +include <linux/types.h> +include <net/netlink.h> +include <include/uapi/io_uring.h> diff --git a/tools/syz-declextract/testdata/manual.txt b/tools/syz-declextract/testdata/manual.txt new file mode 100644 index 000000000..31ab63c9c --- /dev/null +++ b/tools/syz-declextract/testdata/manual.txt @@ -0,0 +1,50 @@ +# Copyright 2024 syzkaller project authors. All rights reserved. +# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +# This file contains manual descriptions that are required to compile auto-generated descriptions. + +resource fd[int32] +resource sock_nl_generic[fd] +resource pid[int32] + +type ifindex int32 + +create$pid() pid (automatic_helper) +create$sock_nl_generic() sock_nl_generic (automatic_helper) +sendmsg$netlink(fd sock_nl_generic, data ptr[in, msghdr_netlink[netlink_msg_t[int32, genlmsghdr_t[1], int32]]], flags flags[send_flags]) + +use(a ptr[in, use]) +use { + f0 pid + f1 ifindex + f2 auto_todo + f3 nlattr[1, int32] + f4 nlnest[1, int32] + f5 nl_generic_attr + +} + +type msghdr_netlink[MSG] { + vec ptr[in, MSG] +} + +type netlink_msg_t[TYPE, PAYLOAD, ATTRS] { + type TYPE + payload PAYLOAD + attrs array[ATTRS] +} + +type genlmsghdr_t[CMD] { + cmd const[CMD, int8] +} + +type nlattr[ATTR, TYPE] { + attr const[ATTR, int32] + data TYPE +} + +type nlnest[ATTR, TYPE] nlattr[ATTR, TYPE] + +type nl_generic_attr int32 + +send_flags = 1, 2 diff --git a/tools/syz-declextract/testdata/netlink.c b/tools/syz-declextract/testdata/netlink.c new file mode 100644 index 000000000..355b84f1f --- /dev/null +++ b/tools/syz-declextract/testdata/netlink.c @@ -0,0 +1,87 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#include "include/netlink.h" +#include "include/uapi/netlink_family.h" + +// These consts are defined not in uapi .h, so the descriptions should contain +// values for them rather than includes. +enum netlink_foo_nested_attrs { + NETLINK_FOO_NESTED_ATTR1, + NETLINK_FOO_NESTED_ATTR2, +}; + +static netlink_foo_struct2 var; + +const struct nla_policy foo_genl_nested_policy[] = { + [NETLINK_FOO_NESTED_ATTR1] = { .type = NLA_U32 }, + [NETLINK_FOO_NESTED_ATTR2] = { .type = NLA_U32 }, +}; + +const struct nla_policy foo_genl_policy[] = { + [NETLINK_FOO_ATTR1] = { .type = NLA_U32 }, + [NETLINK_FOO_ATTR2] = { .type = NLA_STRING, .len = 10 }, + [NETLINK_FOO_ATTR3] = { .type = NLA_NESTED }, + [NETLINK_FOO_ATTR4] = NLA_POLICY_NESTED(foo_genl_nested_policy), + [NETLINK_FOO_ATTR5] = { .len = sizeof(struct netlink_foo_struct1) }, + [NETLINK_FOO_ATTR6] = { .len = sizeof(netlink_foo_struct2) * 10 }, + [NETLINK_FOO_ATTR7] = { .len = sizeof(var) }, +}; + +const struct nla_policy genl_policy_reject_all[] = { + { .type = NLA_REJECT }, + { .type = NLA_REJECT }, +}; + +const struct nla_policy policy_forward_decl[10]; + +static void foo_cmd() {} +static void bar_cmd() {} + +static const struct genl_ops foo_ops[] = { + { + .cmd = NETLINK_FOO_CMD_FOO, + .flags = GENL_ADMIN_PERM, + .doit = foo_cmd, + }, + { + .cmd = NETLINK_FOO_CMD_BAR, + .flags = GENL_UNS_ADMIN_PERM, + .dumpit = bar_cmd, + }, +}; + +static struct genl_family foo_family = { + .ops = foo_ops, + .n_ops = ARRAY_SIZE(foo_ops), + .name = "foo family", + .policy = foo_genl_policy, +}; + +enum { + NETLINK_BAR_CMD_FOO, +}; + +static void bar_pre_doit() {} +static void bar_doit() {} +static void bar_post_doit() {} + +static const struct genl_split_ops bar_ops[] = { + { + .cmd = NETLINK_BAR_CMD_FOO, + .pre_doit = bar_pre_doit, + .doit = bar_doit, + .post_doit = bar_post_doit, + }, +}; + +struct genl_family bar_family = { + .split_ops = bar_ops, + .n_split_ops = ARRAY_SIZE(bar_ops), + .name = "BAR", + .policy = foo_genl_policy, +}; + +struct genl_family noops_family = { + .name = "NOOPS", +}; diff --git a/tools/syz-declextract/testdata/netlink.c.info b/tools/syz-declextract/testdata/netlink.c.info new file mode 100644 index 000000000..e15a8e738 --- /dev/null +++ b/tools/syz-declextract/testdata/netlink.c.info @@ -0,0 +1,3 @@ +NETLINK NETLINK_BAR_CMD_FOO func:NETLINK_BAR_CMD_FOO access:user manual_desc:false auto_desc:true file:netlink.c subsystem:kernel +NETLINK NETLINK_FOO_CMD_BAR func:bar_cmd access:ns_admin manual_desc:false auto_desc:true file:netlink.c subsystem:kernel +NETLINK NETLINK_FOO_CMD_FOO func:foo_cmd access:admin manual_desc:false auto_desc:true file:netlink.c subsystem:kernel diff --git a/tools/syz-declextract/testdata/netlink.c.json b/tools/syz-declextract/testdata/netlink.c.json new file mode 100644 index 000000000..e4e485182 --- /dev/null +++ b/tools/syz-declextract/testdata/netlink.c.json @@ -0,0 +1,197 @@ +{ + "includes": [ + "include/uapi/netlink_family.h" + ], + "defines": [ + { + "name": "NETLINK_BAR_CMD_FOO", + "value": "0" + }, + { + "name": "NETLINK_FOO_NESTED_ATTR1", + "value": "0" + }, + { + "name": "NETLINK_FOO_NESTED_ATTR2", + "value": "1" + } + ], + "structs": [ + { + "name": "netlink_foo_struct1", + "byte_size": 12, + "fields": [ + { + "name": "a", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + }, + { + "name": "b", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + }, + { + "name": "c", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + } + ] + }, + { + "name": "netlink_foo_struct2", + "byte_size": 24, + "fields": [ + { + "name": "a", + "counted_by": -1, + "type": { + "int": { + "byte_size": 8, + "name": "double", + "base": "double" + } + } + }, + { + "name": "b", + "counted_by": -1, + "type": { + "int": { + "byte_size": 8, + "name": "double", + "base": "double" + } + } + }, + { + "name": "c", + "counted_by": -1, + "type": { + "int": { + "byte_size": 8, + "name": "double", + "base": "double" + } + } + } + ] + } + ], + "netlink_families": [ + { + "name": "BAR", + "ops": [ + { + "name": "NETLINK_BAR_CMD_FOO", + "func": "NETLINK_BAR_CMD_FOO", + "access": "user", + "policy": "foo_genl_policy$auto_netlink" + } + ], + "source_file": "netlink.c" + }, + { + "name": "NOOPS", + "source_file": "netlink.c" + }, + { + "name": "foo family", + "ops": [ + { + "name": "NETLINK_FOO_CMD_FOO", + "func": "foo_cmd", + "access": "admin", + "policy": "foo_genl_policy$auto_netlink" + }, + { + "name": "NETLINK_FOO_CMD_BAR", + "func": "bar_cmd", + "access": "ns_admin", + "policy": "foo_genl_policy$auto_netlink" + } + ], + "source_file": "netlink.c" + } + ], + "netlink_policies": [ + { + "name": "foo_genl_nested_policy$auto_netlink", + "attrs": [ + { + "name": "NETLINK_FOO_NESTED_ATTR1", + "kind": "NLA_U32" + }, + { + "name": "NETLINK_FOO_NESTED_ATTR2", + "kind": "NLA_U32" + } + ] + }, + { + "name": "foo_genl_policy$auto_netlink", + "attrs": [ + { + "name": "NETLINK_FOO_ATTR1", + "kind": "NLA_U32" + }, + { + "name": "NETLINK_FOO_ATTR2", + "kind": "NLA_STRING", + "max_size": 10 + }, + { + "name": "NETLINK_FOO_ATTR3", + "kind": "NLA_NESTED" + }, + { + "name": "NETLINK_FOO_ATTR4", + "kind": "NLA_NESTED", + "nested_policy": "foo_genl_nested_policy$auto_netlink" + }, + { + "name": "NETLINK_FOO_ATTR5", + "max_size": 12, + "elem": { + "struct": "netlink_foo_struct1" + } + }, + { + "name": "NETLINK_FOO_ATTR6", + "max_size": 240, + "elem": { + "struct": "netlink_foo_struct2" + } + }, + { + "name": "NETLINK_FOO_ATTR7", + "max_size": 24, + "elem": { + "struct": "netlink_foo_struct2" + } + } + ] + }, + { + "name": "genl_policy_reject_all$auto_netlink" + } + ] +}
\ No newline at end of file diff --git a/tools/syz-declextract/testdata/netlink.c.txt b/tools/syz-declextract/testdata/netlink.c.txt new file mode 100644 index 000000000..4b67a444d --- /dev/null +++ b/tools/syz-declextract/testdata/netlink.c.txt @@ -0,0 +1,51 @@ +# Code generated by syz-declextract. DO NOT EDIT. + +meta automatic + +type auto_todo intptr + +include <vdso/bits.h> +include <linux/types.h> +include <net/netlink.h> +include <include/uapi/netlink_family.h> + +resource genl_BAR_family_id_auto[int16] +resource genl_foo_family_family_id_auto[int16] +type msghdr_BAR_auto[CMD, POLICY] msghdr_netlink[netlink_msg_t[genl_BAR_family_id_auto, genlmsghdr_t[CMD], POLICY]] +type msghdr_foo_family_auto[CMD, POLICY] msghdr_netlink[netlink_msg_t[genl_foo_family_family_id_auto, genlmsghdr_t[CMD], POLICY]] +syz_genetlink_get_family_id$auto_BAR(name ptr[in, string["BAR"]], fd sock_nl_generic) genl_BAR_family_id_auto +syz_genetlink_get_family_id$auto_foo_family(name ptr[in, string["foo family"]], fd sock_nl_generic) genl_foo_family_family_id_auto +sendmsg$auto_NETLINK_BAR_CMD_FOO(fd sock_nl_generic, msg ptr[in, msghdr_BAR_auto[NETLINK_BAR_CMD_FOO, foo_genl_policy$auto_netlink]], f flags[send_flags]) +sendmsg$auto_NETLINK_FOO_CMD_BAR(fd sock_nl_generic, msg ptr[in, msghdr_foo_family_auto[NETLINK_FOO_CMD_BAR, foo_genl_policy$auto_netlink]], f flags[send_flags]) +sendmsg$auto_NETLINK_FOO_CMD_FOO(fd sock_nl_generic, msg ptr[in, msghdr_foo_family_auto[NETLINK_FOO_CMD_FOO, foo_genl_policy$auto_netlink]], f flags[send_flags]) + +foo_genl_nested_policy$auto_netlink [ + NETLINK_FOO_NESTED_ATTR1 nlattr[NETLINK_FOO_NESTED_ATTR1, int32] + NETLINK_FOO_NESTED_ATTR2 nlattr[NETLINK_FOO_NESTED_ATTR2, int32] +] [varlen] + +foo_genl_policy$auto_netlink [ + NETLINK_FOO_ATTR1 nlattr[NETLINK_FOO_ATTR1, int32] + NETLINK_FOO_ATTR2 nlattr[NETLINK_FOO_ATTR2, stringnoz] + NETLINK_FOO_ATTR3 nlnest[NETLINK_FOO_ATTR3, array[nl_generic_attr]] + NETLINK_FOO_ATTR4 nlnest[NETLINK_FOO_ATTR4, array[foo_genl_nested_policy$auto_netlink]] + NETLINK_FOO_ATTR5 nlattr[NETLINK_FOO_ATTR5, netlink_foo_struct1$auto_record] + NETLINK_FOO_ATTR6 nlattr[NETLINK_FOO_ATTR6, array[netlink_foo_struct2$auto_record, 0:10]] + NETLINK_FOO_ATTR7 nlattr[NETLINK_FOO_ATTR7, netlink_foo_struct2$auto_record] +] [varlen] + +netlink_foo_struct1$auto_record { + a int32 + b int32 + c int32 +} + +netlink_foo_struct2$auto_record { + a int64 + b int64 + c int64 +} + +define NETLINK_BAR_CMD_FOO 0 +define NETLINK_FOO_NESTED_ATTR1 0 +define NETLINK_FOO_NESTED_ATTR2 1 diff --git a/tools/syz-declextract/testdata/syscall.c b/tools/syz-declextract/testdata/syscall.c new file mode 100644 index 000000000..be247151b --- /dev/null +++ b/tools/syz-declextract/testdata/syscall.c @@ -0,0 +1,12 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#include "include/syscall.h" + +SYSCALL_DEFINE1(open, const char* filename, int flags, int mode) { + return 0; +} + +SYSCALL_DEFINE1(chmod, const char* filename, int mode) { + return 0; +} diff --git a/tools/syz-declextract/testdata/syscall.c.info b/tools/syz-declextract/testdata/syscall.c.info new file mode 100644 index 000000000..5d204a68e --- /dev/null +++ b/tools/syz-declextract/testdata/syscall.c.info @@ -0,0 +1,2 @@ +SYSCALL chmod func:__do_sys_chmod access:unknown manual_desc:false auto_desc:true file:syscall.c subsystem:kernel +SYSCALL open func:__do_sys_open access:unknown manual_desc:false auto_desc:true file:syscall.c subsystem:kernel diff --git a/tools/syz-declextract/testdata/syscall.c.json b/tools/syz-declextract/testdata/syscall.c.json new file mode 100644 index 000000000..6e52fd57e --- /dev/null +++ b/tools/syz-declextract/testdata/syscall.c.json @@ -0,0 +1,77 @@ +{ + "syscalls": [ + { + "func": "__do_sys_chmod", + "args": [ + { + "name": "filename", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "buffer": { + "is_string": true + } + }, + "is_const": true + } + } + }, + { + "name": "mode", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + } + ], + "source_file": "syscall.c" + }, + { + "func": "__do_sys_open", + "args": [ + { + "name": "filename", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "buffer": { + "is_string": true + } + }, + "is_const": true + } + } + }, + { + "name": "flags", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + }, + { + "name": "mode", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + } + ], + "source_file": "syscall.c" + } + ] +}
\ No newline at end of file diff --git a/tools/syz-declextract/testdata/syscall.c.txt b/tools/syz-declextract/testdata/syscall.c.txt new file mode 100644 index 000000000..b55b077c3 --- /dev/null +++ b/tools/syz-declextract/testdata/syscall.c.txt @@ -0,0 +1,12 @@ +# Code generated by syz-declextract. DO NOT EDIT. + +meta automatic + +type auto_todo intptr + +include <vdso/bits.h> +include <linux/types.h> +include <net/netlink.h> + +chmod$auto(filename ptr[in, filename], mode int32) +open$auto(filename ptr[in, filename], flags int32, mode int32) diff --git a/tools/syz-declextract/testdata/types.c b/tools/syz-declextract/testdata/types.c new file mode 100644 index 000000000..8fc67aeb9 --- /dev/null +++ b/tools/syz-declextract/testdata/types.c @@ -0,0 +1,55 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#include "include/syscall.h" + +typedef struct { float f; } anon_t; +struct empty_struct {}; +typedef int fd_t; +typedef struct forward forward_t; + +struct anon_struct { + // Various tricky anon cases. + struct { int x; } a; + struct {} b; + struct { int y; }; + union { int q; long w; }; + anon_t foo; + forward_t* forward; + struct { int a; int b; } array[4]; + struct { int a; int b; } *ptr; + struct { int a; int b; } *ptr_array[4]; +}; + +enum bitfield_enum { a, b, c }; + +struct bitfields { + int a : 1; + int : 2; + int b : 3; + long d : 2; + long pad : 3; + enum bitfield_enum e : 10; + int l : 10; + int* p __attribute__((counted_by(l))); +} __attribute__((aligned(32))); + +struct packed_t { + char x; + int y; +} __attribute__((packed, aligned(32))); + +struct various { + struct various* recursive; + struct recursive* next; + struct packed_t packed; +}; + +struct recursive { + struct various various; +}; + +SYSCALL_DEFINE1(types_syscall, struct anon_struct* p, struct empty_struct* y, + struct bitfields* b, int pid, fd_t f, struct various* v) { + return 0; +} diff --git a/tools/syz-declextract/testdata/types.c.info b/tools/syz-declextract/testdata/types.c.info new file mode 100644 index 000000000..4e1bdf314 --- /dev/null +++ b/tools/syz-declextract/testdata/types.c.info @@ -0,0 +1 @@ +SYSCALL types_syscall func:__do_sys_types_syscall access:unknown manual_desc:false auto_desc:true file:types.c subsystem:kernel diff --git a/tools/syz-declextract/testdata/types.c.json b/tools/syz-declextract/testdata/types.c.json new file mode 100644 index 000000000..df19ab4a4 --- /dev/null +++ b/tools/syz-declextract/testdata/types.c.json @@ -0,0 +1,555 @@ +{ + "defines": [ + { + "name": "a", + "value": "0" + }, + { + "name": "b", + "value": "1" + }, + { + "name": "c", + "value": "2" + } + ], + "enums": [ + { + "name": "bitfield_enum", + "values": [ + "a", + "b", + "c" + ] + } + ], + "structs": [ + { + "name": "anon_struct", + "byte_size": 104, + "fields": [ + { + "name": "a", + "counted_by": -1, + "type": { + "struct": "anon_struct_a" + } + }, + { + "name": "b", + "counted_by": -1, + "type": { + "struct": "anon_struct_b" + } + }, + { + "name": "anon_struct_2", + "is_anonymous": true, + "counted_by": -1, + "type": { + "struct": "anon_struct_2" + } + }, + { + "name": "anon_struct_3", + "is_anonymous": true, + "counted_by": -1, + "type": { + "struct": "anon_struct_3" + } + }, + { + "name": "foo", + "counted_by": -1, + "type": { + "struct": "anon_t" + } + }, + { + "name": "forward", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "int": { + "byte_size": 8, + "name": "TODO", + "base": "long" + } + } + } + } + }, + { + "name": "array", + "counted_by": -1, + "type": { + "array": { + "elem": { + "struct": "anon_struct_array" + }, + "min_size": 4, + "max_size": 4 + } + } + }, + { + "name": "ptr", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "struct": "anon_struct_ptr" + } + } + } + }, + { + "name": "ptr_array", + "counted_by": -1, + "type": { + "array": { + "elem": { + "ptr": { + "elem": { + "struct": "anon_struct_ptr_array" + } + } + }, + "min_size": 4, + "max_size": 4 + } + } + } + ] + }, + { + "name": "anon_struct_2", + "byte_size": 4, + "fields": [ + { + "name": "y", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + } + ] + }, + { + "name": "anon_struct_3", + "byte_size": 8, + "is_union": true, + "fields": [ + { + "name": "q", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + }, + { + "name": "w", + "counted_by": -1, + "type": { + "int": { + "byte_size": 8, + "name": "long", + "base": "long" + } + } + } + ] + }, + { + "name": "anon_struct_a", + "byte_size": 4, + "fields": [ + { + "name": "x", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + } + ] + }, + { + "name": "anon_struct_array", + "byte_size": 8, + "fields": [ + { + "name": "a", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + }, + { + "name": "b", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + } + ] + }, + { + "name": "anon_struct_b" + }, + { + "name": "anon_struct_ptr", + "byte_size": 8, + "fields": [ + { + "name": "a", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + }, + { + "name": "b", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + } + ] + }, + { + "name": "anon_struct_ptr_array", + "byte_size": 8, + "fields": [ + { + "name": "a", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + }, + { + "name": "b", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + } + ] + }, + { + "name": "anon_t", + "byte_size": 4, + "fields": [ + { + "name": "f", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "float", + "base": "float" + } + } + } + ] + }, + { + "name": "bitfields", + "byte_size": 32, + "align": 32, + "fields": [ + { + "name": "a", + "bit_width": 1, + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + }, + { + "name": "bitfields_1", + "is_anonymous": true, + "bit_width": 2, + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + }, + { + "name": "b", + "bit_width": 3, + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + }, + { + "name": "d", + "bit_width": 2, + "counted_by": -1, + "type": { + "int": { + "byte_size": 8, + "name": "long", + "base": "long" + } + } + }, + { + "name": "pad", + "bit_width": 3, + "counted_by": -1, + "type": { + "int": { + "byte_size": 8, + "name": "long", + "base": "long" + } + } + }, + { + "name": "e", + "bit_width": 10, + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "enum": "bitfield_enum" + } + } + }, + { + "name": "l", + "bit_width": 10, + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + }, + { + "name": "p", + "counted_by": 6, + "type": { + "ptr": { + "elem": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + } + } + } + ] + }, + { + "name": "empty_struct" + }, + { + "name": "packed_t", + "byte_size": 32, + "is_packed": true, + "align": 32, + "fields": [ + { + "name": "x", + "counted_by": -1, + "type": { + "int": { + "byte_size": 1, + "name": "char", + "base": "char" + } + } + }, + { + "name": "y", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + } + ] + }, + { + "name": "recursive", + "byte_size": 64, + "fields": [ + { + "name": "various", + "counted_by": -1, + "type": { + "struct": "various" + } + } + ] + }, + { + "name": "various", + "byte_size": 64, + "fields": [ + { + "name": "recursive", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "struct": "various" + } + } + } + }, + { + "name": "next", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "struct": "recursive" + } + } + } + }, + { + "name": "packed", + "counted_by": -1, + "type": { + "struct": "packed_t" + } + } + ] + } + ], + "syscalls": [ + { + "func": "__do_sys_types_syscall", + "args": [ + { + "name": "p", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "struct": "anon_struct" + } + } + } + }, + { + "name": "y", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "struct": "empty_struct" + } + } + } + }, + { + "name": "b", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "struct": "bitfields" + } + } + } + }, + { + "name": "pid", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + }, + { + "name": "f", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "fd_t", + "base": "int" + } + } + }, + { + "name": "v", + "counted_by": -1, + "type": { + "ptr": { + "elem": { + "struct": "various" + } + } + } + } + ], + "source_file": "types.c" + } + ] +}
\ No newline at end of file diff --git a/tools/syz-declextract/testdata/types.c.txt b/tools/syz-declextract/testdata/types.c.txt new file mode 100644 index 000000000..d7586effc --- /dev/null +++ b/tools/syz-declextract/testdata/types.c.txt @@ -0,0 +1,87 @@ +# Code generated by syz-declextract. DO NOT EDIT. + +meta automatic + +type auto_todo intptr + +include <vdso/bits.h> +include <linux/types.h> +include <net/netlink.h> + +auto_bitfield_enum = a, b, c + +types_syscall$auto(p ptr[inout, anon_struct$auto_record], y ptr[inout, void], b ptr[inout, bitfields$auto_record], pid pid, f int32, v ptr[inout, various$auto_record]) + +anon_struct$auto_record { + a anon_struct_a$auto_record + b void + anon_struct_2 anon_struct_2$auto_record + anon_struct_3 anon_struct_3$auto_record + foo anon_t$auto_record + forward ptr[inout, auto_todo] + array array[anon_struct_array$auto_record, 4] + ptr ptr[inout, anon_struct_ptr$auto_record] + ptr_array array[ptr[inout, anon_struct_ptr_array$auto_record], 4] +} + +anon_struct_2$auto_record { + y int32 +} + +anon_struct_3$auto_record [ + q int32 + w intptr +] + +anon_struct_a$auto_record { + x int32 +} + +anon_struct_array$auto_record { + a int32 + b int32 +} + +anon_struct_ptr$auto_record { + a int32 + b int32 +} + +anon_struct_ptr_array$auto_record { + a int32 + b int32 +} + +anon_t$auto_record { + f int32 +} + +bitfields$auto_record { + a int32:1 + bitfields_1 const[0, int32:2] + b int32:3 + d intptr:2 + pad const[0, intptr:3] + e flags[auto_bitfield_enum, int32:10] + l len[p, int32:10] + p ptr[inout, int32] +} [align[32]] + +packed_t$auto_record { + x int8 + y int32 +} [packed, align[32]] + +recursive$auto_record { + various various$auto_record +} + +various$auto_record { + recursive ptr[inout, various$auto_record, opt] + next ptr[inout, recursive$auto_record, opt] + packed packed_t$auto_record +} + +define a 0 +define b 1 +define c 2 |
