From de73685def8340e60ffed6f23ce0016718fc53f3 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 17 Nov 2025 07:50:29 +0100 Subject: tools/clang/declextract: move from tools/syz-declextract/clangtool Some of the common helpers may be reused across different Clang tools (currently json.h and .clang-format). Move the files to allow such reuse. --- tools/clang/.clang-format | 5 + tools/clang/declextract/declextract.cpp | 1009 +++++++++++++++++++++++ tools/clang/declextract/output.h | 474 +++++++++++ tools/clang/json.h | 79 ++ tools/syz-declextract/README.md | 2 +- tools/syz-declextract/clangtool/.clang-format | 5 - tools/syz-declextract/clangtool/declextract.cpp | 1009 ----------------------- tools/syz-declextract/clangtool/json.h | 79 -- tools/syz-declextract/clangtool/output.h | 474 ----------- 9 files changed, 1568 insertions(+), 1568 deletions(-) create mode 100644 tools/clang/.clang-format create mode 100644 tools/clang/declextract/declextract.cpp create mode 100644 tools/clang/declextract/output.h create mode 100644 tools/clang/json.h delete mode 100644 tools/syz-declextract/clangtool/.clang-format delete mode 100644 tools/syz-declextract/clangtool/declextract.cpp delete mode 100644 tools/syz-declextract/clangtool/json.h delete mode 100644 tools/syz-declextract/clangtool/output.h (limited to 'tools') diff --git a/tools/clang/.clang-format b/tools/clang/.clang-format new file mode 100644 index 000000000..15868add4 --- /dev/null +++ b/tools/clang/.clang-format @@ -0,0 +1,5 @@ +BasedOnStyle: LLVM +ColumnLimit: 120 +DerivePointerAlignment: false +PointerAlignment: Left +CommentPragmas: '^[^ ]' diff --git a/tools/clang/declextract/declextract.cpp b/tools/clang/declextract/declextract.cpp new file mode 100644 index 000000000..d7230a578 --- /dev/null +++ b/tools/clang/declextract/declextract.cpp @@ -0,0 +1,1009 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#include "json.h" +#include "output.h" + +#include "clang/AST/APValue.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Attrs.inc" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclarationName.h" +#include "clang/AST/Expr.h" +#include "clang/AST/PrettyPrinter.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TypeTraits.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace clang; +using namespace clang::ast_matchers; + +// MacroDef/MacroMap hold information about macros defined in the file. +struct MacroDef { + std::string Value; // value as written in the source + SourceRange SourceRange; // soruce range of the value +}; +using MacroMap = std::unordered_map; + +// ConstDesc describes a macro or an enum value. +struct ConstDesc { + std::string Name; + std::string Value; + SourceRange SourceRange; + int64_t IntValue; +}; + +class Extractor : public MatchFinder, public tooling::SourceFileCallbacks { +public: + Extractor() { + match(&Extractor::matchFunctionDef, functionDecl(isDefinition()).bind("function")); + + match(&Extractor::matchSyscall, + functionDecl(isExpandedFromMacro("SYSCALL_DEFINEx"), matchesName("__do_sys_.*")).bind("syscall")); + + match(&Extractor::matchIouring, + translationUnitDecl(forEachDescendant( + varDecl(hasType(constantArrayType(hasElementType(hasDeclaration(recordDecl(hasName("io_issue_def")))))), + isDefinition()) + .bind("io_issue_defs")))); + + match(&Extractor::matchNetlinkPolicy, + translationUnitDecl(forEachDescendant( + varDecl(hasType(constantArrayType(hasElementType(hasDeclaration(recordDecl(hasName("nla_policy")))))), + isDefinition()) + .bind("netlink_policy")))); + + match(&Extractor::matchNetlinkFamily, varDecl(hasType(recordDecl(hasName("genl_family")).bind("genl_family")), + has(initListExpr().bind("genl_family_init")))); + + match(&Extractor::matchFileOps, + varDecl(forEachDescendant(initListExpr(hasType(recordDecl(hasName("file_operations")))).bind("init"))) + .bind("var")); + } + + void print() const { Output.print(); } + +private: + friend struct FunctionAnalyzer; + using MatchFunc = void (Extractor::*)(); + // Thunk that redirects MatchCallback::run method to one of the methods of the Extractor class. + struct MatchCallbackThunk : MatchFinder::MatchCallback { + Extractor& Ex; + MatchFunc Action; + MatchCallbackThunk(Extractor& Ex, MatchFunc Action) : Ex(Ex), Action(Action) {} + void run(const MatchFinder::MatchResult& Result) override { Ex.run(Result, Action); } + }; + std::vector> Matchers; + + // These set to point to the Result of the current match (to avoid passing them through all methods). + const BoundNodes* Nodes = nullptr; + ASTContext* Context = nullptr; + SourceManager* SourceManager = nullptr; + + Output Output; + MacroMap Macros; + std::unordered_map EnumDedup; + std::unordered_map StructDedup; + std::unordered_map FileOpsDedup; + + void matchFunctionDef(); + void matchSyscall(); + void matchIouring(); + void matchNetlinkPolicy(); + void matchNetlinkFamily(); + void matchFileOps(); + bool handleBeginSource(CompilerInstance& CI) override; + template void match(MatchFunc Action, const M& Matcher); + void run(const MatchFinder::MatchResult& Result, MatchFunc Action); + template const T* getResult(StringRef ID) const; + FieldType extractRecord(QualType QT, const RecordType* Typ, const std::string& BackupName); + std::string extractEnum(QualType QT, const EnumDecl* Decl); + void emitConst(const std::string& Name, int64_t Val, SourceLocation Loc); + std::string getFuncName(const Expr* Expr); + std::string getDeclName(const Expr* Expr); + const ValueDecl* getValueDecl(const Expr* Expr); + std::string getDeclFileID(const Decl* Decl); + std::string getUniqueDeclName(const NamedDecl* Decl); + std::vector> extractDesignatedInitConsts(const VarDecl& ArrayDecl); + FieldType genType(QualType Typ, const std::string& BackupName = ""); + std::unordered_map structFieldIndexes(const RecordDecl* Decl); + template T evaluate(const Expr* E); + template + std::vector findAllMatches(const Node* Expr, const Condition& Cond); + template + const T* findFirstMatch(const Node* Expr, const Condition& Cond); + std::optional getSizeofType(const Expr* E); + int sizeofType(const Type* T); + int alignofType(const Type* T); + void extractIoctl(const Expr* Cmd, const ConstDesc& Const); + std::optional isMacroOrEnum(const Expr* E); + ConstDesc constDesc(const Expr* E, const std::string& Str, const std::string& Value, const SourceRange& SourceRange); +}; + +// PPCallbacksTracker records all macro definitions (name/value/source location). +class PPCallbacksTracker : public PPCallbacks { +public: + PPCallbacksTracker(Preprocessor& PP, MacroMap& Macros) : SM(PP.getSourceManager()), Macros(Macros) {} + +private: + SourceManager& SM; + MacroMap& Macros; + + void MacroDefined(const Token& MacroName, const MacroDirective* MD) override { + const char* NameBegin = SM.getCharacterData(MacroName.getLocation()); + const char* NameEnd = SM.getCharacterData(MacroName.getEndLoc()); + std::string Name(NameBegin, NameEnd - NameBegin); + const char* ValBegin = SM.getCharacterData(MD->getMacroInfo()->getDefinitionLoc()); + const char* ValEnd = SM.getCharacterData(MD->getMacroInfo()->getDefinitionEndLoc()) + 1; + // Definition includes the macro name, remove it. + ValBegin += std::min(Name.size(), ValEnd - ValBegin); + // Trim whitespace from both ends. + while (ValBegin < ValEnd && isspace(*ValBegin)) + ValBegin++; + while (ValBegin < ValEnd && isspace(*(ValEnd - 1))) + ValEnd--; + std::string Value(ValBegin, ValEnd - ValBegin); + Macros[Name] = MacroDef{ + .Value = Value, + .SourceRange = SourceRange(MD->getMacroInfo()->getDefinitionLoc(), MD->getMacroInfo()->getDefinitionEndLoc()), + }; + } +}; + +const Expr* removeCasts(const Expr* E) { + for (;;) { + if (auto* P = dyn_cast(E)) + E = P->getSubExpr(); + else if (auto* C = dyn_cast(E)) + E = C->getSubExpr(); + else + break; + } + return E; +} + +bool Extractor::handleBeginSource(CompilerInstance& CI) { + Preprocessor& PP = CI.getPreprocessor(); + PP.addPPCallbacks(std::make_unique(PP, Macros)); + return true; +} + +template void Extractor::match(MatchFunc Action, const M& Matcher) { + Matchers.emplace_back(new MatchCallbackThunk(*this, Action)); + addMatcher(Matcher, Matchers.back().get()); +} + +void Extractor::run(const MatchFinder::MatchResult& Result, MatchFunc Action) { + Nodes = &Result.Nodes; + Context = Result.Context; + SourceManager = Result.SourceManager; + (this->*Action)(); +} + +template const T* Extractor::getResult(StringRef ID) const { return Nodes->getNodeAs(ID); } + +std::string TypeName(QualType QT) { + std::string Name = QT.getAsString(); + auto Attr = Name.find(" __attribute__"); + if (Attr != std::string::npos) + Name = Name.substr(0, Attr); + return Name; +} + +// Top function that converts any clang type QT to our output type. +FieldType Extractor::genType(QualType QT, const std::string& BackupName) { + const Type* T = QT.IgnoreParens().getUnqualifiedType().getDesugaredType(*Context).getTypePtr(); + if (llvm::isa(T)) { + return IntType{.ByteSize = sizeofType(T), .Name = TypeName(QT), .Base = QualType(T, 0).getAsString()}; + } + if (auto* Typ = llvm::dyn_cast(T)) { + return IntType{.ByteSize = sizeofType(T), .Enum = extractEnum(QT, Typ->getDecl())}; + } + if (llvm::isa(T)) { + return PtrType{.Elem = TodoType(), .IsConst = true}; + } + if (auto* Typ = llvm::dyn_cast(T)) { + return ArrType{.Elem = genType(Typ->getElementType(), BackupName)}; + } + if (auto* Typ = llvm::dyn_cast(T)) { + return extractRecord(QT, Typ, BackupName); + } + if (auto* Typ = llvm::dyn_cast(T)) { + // TODO: the size may be a macro that is different for each arch, e.g.: + // long foo[FOOSIZE/sizeof(long)]; + int Size = Typ->getSize().getZExtValue(); + return ArrType{ + .Elem = genType(Typ->getElementType(), BackupName), + .MinSize = Size, + .MaxSize = Size, + .Align = alignofType(Typ), + .IsConstSize = true, + }; + } + if (auto* Typ = llvm::dyn_cast(T)) { + FieldType Elem; + const QualType& Pointee = Typ->getPointeeType(); + if (Pointee->isAnyCharacterType()) + Elem = BufferType{.IsString = true}; + else if (Pointee->isVoidType()) + Elem = ArrType{.Elem = TodoType()}; + else + Elem = genType(Pointee, BackupName); // note: it may be an array as well + return PtrType{ + .Elem = std::move(Elem), + .IsConst = Pointee.isConstQualified(), + }; + } + QT.dump(); + llvm::report_fatal_error("unhandled type"); +} + +FieldType Extractor::extractRecord(QualType QT, const RecordType* Typ, const std::string& BackupName) { + auto* Decl = Typ->getDecl()->getDefinition(); + if (!Decl) + return TodoType(); // definition is in a different TU + std::string Name = Decl->getDeclName().getAsString(); + // If it's a typedef of anon struct, we want to use the typedef name: + // typedef struct {...} foo_t; + if (Name.empty() && QT->isTypedefNameType()) + Name = QualType(Typ, 0).getAsString(); + // If no other names, fallback to the parent-struct-based name. + if (Name.empty()) { + assert(!BackupName.empty()); + // The BackupName is supposed to be unique. + assert(!StructDedup[BackupName]); + Name = BackupName; + } + if (Name.find("struct ") == 0) + Name = Name.substr(strlen("struct ")); + if (StructDedup[Name]) + return Name; + StructDedup[Name] = true; + std::vector Fields; + for (const FieldDecl* F : Decl->fields()) { + std::string FieldName = F->getNameAsString(); + std::string BackupFieldName = Name + "_" + FieldName; + bool IsAnonymous = false; + if (FieldName.empty()) { + BackupFieldName = Name + "_" + std::to_string(F->getFieldIndex()); + FieldName = BackupFieldName; + IsAnonymous = true; + } + FieldType FieldType = genType(F->getType(), BackupFieldName); + int BitWidth = F->isBitField() ? F->getBitWidthValue() : 0; + int CountedBy = F->getType()->isCountAttributedType() + ? llvm::dyn_cast( + F->getType()->getAs()->getCountExpr()->getReferencedDeclOfCallee()) + ->getFieldIndex() + : -1; + Fields.push_back(Field{ + .Name = FieldName, + .IsAnonymous = IsAnonymous, + .BitWidth = BitWidth, + .CountedBy = CountedBy, + .Type = std::move(FieldType), + }); + } + int AlignAttr = 0; + bool Packed = false; + if (Decl->isStruct() && Decl->hasAttrs()) { + for (const auto& A : Decl->getAttrs()) { + if (auto* Attr = llvm::dyn_cast(A)) + AlignAttr = Attr->getAlignment(*Context) / 8; + else if (llvm::isa(A)) + Packed = true; + } + } + Output.emit(Struct{ + .Name = Name, + .ByteSize = sizeofType(Typ), + .Align = alignofType(Typ), + .IsUnion = Decl->isUnion(), + .IsPacked = Packed, + .AlignAttr = AlignAttr, + .Fields = std::move(Fields), + }); + return Name; +} + +std::string Extractor::extractEnum(QualType QT, const EnumDecl* Decl) { + std::string Name = Decl->getNameAsString(); + if (Name.empty()) { + // This is an unnamed enum declared with a typedef: + // typedef enum {...} enum_name; + auto Typedef = dyn_cast(QT.getTypePtr()); + if (Typedef) + Name = Typedef->getDecl()->getNameAsString(); + if (Name.empty()) { + QT.dump(); + llvm::report_fatal_error("enum with empty name"); + } + } + if (EnumDedup[Name]) + return Name; + EnumDedup[Name] = true; + std::vector Values; + for (const auto* Enumerator : Decl->enumerators()) { + const std::string& Name = Enumerator->getNameAsString(); + emitConst(Name, Enumerator->getInitVal().getExtValue(), Decl->getBeginLoc()); + Values.push_back(Name); + } + Output.emit(Enum{ + .Name = Name, + .Values = Values, + }); + return Name; +} + +void Extractor::emitConst(const std::string& Name, int64_t Val, SourceLocation Loc) { + Output.emit(ConstInfo{ + .Name = Name, + .Filename = std::filesystem::relative(SourceManager->getFilename(Loc).str()), + .Value = Val, + }); +} + +// Returns base part of the source file containing the canonical declaration. +// If the passed declaration is also a definition, then it will look for a preceeding declaration. +// This is used to generate unique names for static definitions that may have duplicate names +// across different TUs. We assume that the base part of the source file is enough +// to make them unique. +std::string Extractor::getDeclFileID(const Decl* Decl) { + std::string file = + std::filesystem::path(SourceManager->getFilename(Decl->getCanonicalDecl()->getSourceRange().getBegin()).str()) + .filename() + .stem() + .string(); + std::replace(file.begin(), file.end(), '-', '_'); + return file; +} + +std::optional Extractor::isMacroOrEnum(const Expr* E) { + if (!E) + return {}; + if (auto* Enum = removeCasts(E)->getEnumConstantDecl()) + return constDesc(E, Enum->getNameAsString(), "", Enum->getSourceRange()); + auto Range = Lexer::getAsCharRange(E->getSourceRange(), *SourceManager, Context->getLangOpts()); + const std::string& Str = Lexer::getSourceText(Range, *SourceManager, Context->getLangOpts()).str(); + auto MacroDef = Macros.find(Str); + if (MacroDef == Macros.end()) + return {}; + return constDesc(E, Str, MacroDef->second.Value, MacroDef->second.SourceRange); +} + +ConstDesc Extractor::constDesc(const Expr* E, const std::string& Str, const std::string& Value, + const SourceRange& SourceRange) { + int64_t Val = evaluate(E); + emitConst(Str, Val, SourceRange.getBegin()); + return ConstDesc{ + .Name = Str, + .Value = Value, + .SourceRange = SourceRange, + .IntValue = Val, + }; +} + +template void matchHelper(MatchFinder& Finder, ASTContext* Context, const Node* Expr) { + Finder.match(*Expr, *Context); +} + +void matchHelper(MatchFinder& Finder, ASTContext* Context, const ASTContext* Expr) { + assert(Context == Expr); + Finder.matchAST(*Context); +} + +// Returns all matches of Cond named "res" in Expr and returns them casted to T. +// Expr can point to Context for a global match. +template +std::vector Extractor::findAllMatches(const Node* Expr, const Condition& Cond) { + if (!Expr) + return {}; + struct Matcher : MatchFinder::MatchCallback { + std::vector Matches; + void run(const MatchFinder::MatchResult& Result) override { + if (const T* M = Result.Nodes.getNodeAs("res")) + Matches.push_back(M); + } + }; + MatchFinder Finder; + Matcher Matcher; + Finder.addMatcher(Cond, &Matcher); + matchHelper(Finder, Context, Expr); + return std::move(Matcher.Matches); +} + +// Returns the first match of Cond named "res" in Expr and returns it casted to T. +// If no match is found, returns nullptr. +template +const T* Extractor::findFirstMatch(const Node* Expr, const Condition& Cond) { + const auto& Matches = findAllMatches(Expr, Cond); + return Matches.empty() ? nullptr : Matches[0]; +} + +// Extracts the first function reference from the expression. +// TODO: try to extract the actual function reference the expression will be evaluated to +// (the first one is not necessarily the right one). +std::string Extractor::getFuncName(const Expr* Expr) { + auto* Decl = + findFirstMatch(Expr, stmt(forEachDescendant(declRefExpr(hasType(functionType())).bind("res")))); + return Decl ? Decl->getDecl()->getNameAsString() : ""; +} + +// If expression refers to some identifier, returns the identifier name. +// Otherwise returns an empty string. +// For example, if the expression is `function_name`, returns "function_name" string. +std::string Extractor::getDeclName(const Expr* Expr) { + // The expression can be complex and include casts and e.g. InitListExpr, + // to remove all of these we match the first/any DeclRefExpr. + auto* Decl = getValueDecl(Expr); + return Decl ? Decl->getNameAsString() : ""; +} + +// Returns the first ValueDecl in the expression. +const ValueDecl* Extractor::getValueDecl(const Expr* Expr) { + // The expression can be complex and include casts and e.g. InitListExpr, + // to remove all of these we match the first/any DeclRefExpr. + auto* Decl = findFirstMatch(Expr, stmt(forEachDescendant(declRefExpr().bind("res")))); + return Decl ? Decl->getDecl() : nullptr; +} + +// Recursively finds first sizeof in the expression and return the type passed to sizeof. +std::optional Extractor::getSizeofType(const Expr* E) { + auto* Res = findFirstMatch( + E, stmt(forEachDescendant(unaryExprOrTypeTraitExpr(ofKind(UETT_SizeOf)).bind("res")))); + if (!Res) + return {}; + if (Res->isArgumentType()) + return Res->getArgumentType(); + return Res->getArgumentExpr()->getType(); +} + +// Returns map of field name -> field index. +std::unordered_map Extractor::structFieldIndexes(const RecordDecl* Decl) { + // TODO: this is wrong for structs that contain unions and anonymous sub-structs (e.g. genl_split_ops). + // To handle these we would need to look at InitListExpr::getInitializedFieldInUnion, and recurse + // into anonymous structs. + std::unordered_map Indexes; + for (const auto& F : Decl->fields()) + Indexes[F->getNameAsString()] = F->getFieldIndex(); + return Indexes; +} + +// Extracts enum info from array variable designated initialization. +// For example, for the following code: +// +// enum Foo { +// FooA = 11, +// FooB = 42, +// }; +// +// struct Bar bars[] = { +// [FooA] = {...}, +// [FooB] = {...}, +// }; +// +// it returns the following vector: {{11, "FooA"}, {42, "FooB"}}. +std::vector> Extractor::extractDesignatedInitConsts(const VarDecl& ArrayDecl) { + const auto& Matches = findAllMatches( + &ArrayDecl, + decl(forEachDescendant(designatedInitExpr(optionally(has(constantExpr(has(declRefExpr())).bind("res"))))))); + std::vector> Inits; + for (auto* Match : Matches) { + const int64_t Val = *Match->getAPValueResult().getInt().getRawData(); + const auto& Name = Match->getEnumConstantDecl()->getNameAsString(); + const auto& Loc = Match->getEnumConstantDecl()->getBeginLoc(); + emitConst(Name, Val, Loc); + Inits.emplace_back(Val, Name); + } + return Inits; +} + +int Extractor::sizeofType(const Type* T) { return static_cast(Context->getTypeInfo(T).Width) / 8; } +int Extractor::alignofType(const Type* T) { return static_cast(Context->getTypeInfo(T).Align) / 8; } + +template T Extractor::evaluate(const Expr* E) { + Expr::EvalResult Res; + E->EvaluateAsConstantExpr(Res, *Context); + // TODO: it's unclear what to do if it's not Int (in some cases we see None here). + if (Res.Val.getKind() != APValue::Int) + return 0; + auto val = Res.Val.getInt(); + if (val.isSigned()) + return val.sextOrTrunc(64).getSExtValue(); + return val.zextOrTrunc(64).getZExtValue(); +} + +void Extractor::matchNetlinkPolicy() { + const auto* PolicyArray = getResult("netlink_policy"); + const auto* Init = llvm::dyn_cast_if_present(PolicyArray->getInit()); + if (!Init) + return; + const auto& InitConsts = extractDesignatedInitConsts(*PolicyArray); + auto Fields = structFieldIndexes(Init->getInit(0)->getType()->getAsRecordDecl()); + std::vector Attrs; + for (const auto& [I, Name] : InitConsts) { + const auto* AttrInit = llvm::dyn_cast(Init->getInit(I)); + const std::string& AttrKind = getDeclName(AttrInit->getInit(Fields["type"])); + if (AttrKind == "NLA_REJECT") + continue; + auto* LenExpr = AttrInit->getInit(Fields["len"]); + int MaxSize = 0; + std::string NestedPolicy; + std::unique_ptr Elem; + if (AttrKind == "NLA_NESTED" || AttrKind == "NLA_NESTED_ARRAY") { + if (const auto* NestedDecl = getValueDecl(AttrInit->getInit(2))) + NestedPolicy = getUniqueDeclName(NestedDecl); + } else { + MaxSize = evaluate(LenExpr); + if (auto SizeofType = getSizeofType(LenExpr)) + Elem = std::make_unique(genType(*SizeofType)); + } + Attrs.push_back(NetlinkAttr{ + .Name = Name, + .Kind = AttrKind, + .MaxSize = MaxSize, + .NestedPolicy = NestedPolicy, + .Elem = std::move(Elem), + }); + } + Output.emit(NetlinkPolicy{ + .Name = getUniqueDeclName(PolicyArray), + .Attrs = std::move(Attrs), + }); +} + +void Extractor::matchNetlinkFamily() { + const auto* FamilyInit = getResult("genl_family_init"); + auto Fields = structFieldIndexes(getResult("genl_family")); + const std::string& FamilyName = llvm::dyn_cast(FamilyInit->getInit(Fields["name"]))->getString().str(); + std::string DefaultPolicy; + if (const auto* PolicyDecl = FamilyInit->getInit(Fields["policy"])->getAsBuiltinConstantDeclRef(*Context)) + DefaultPolicy = getUniqueDeclName(PolicyDecl); + std::vector Ops; + for (const auto& OpsName : {"ops", "small_ops", "split_ops"}) { + const auto* OpsDecl = + llvm::dyn_cast_if_present(FamilyInit->getInit(Fields[OpsName])->getAsBuiltinConstantDeclRef(*Context)); + const auto NumOps = FamilyInit->getInit(Fields[std::string("n_") + OpsName])->getIntegerConstantExpr(*Context); + // The ops variable may be defined in another TU. + // TODO: extract variables from another TUs. + if (!OpsDecl || !OpsDecl->getInit() || !NumOps) + continue; + const auto* OpsInit = llvm::dyn_cast(OpsDecl->getInit()); + auto OpsFields = structFieldIndexes(OpsInit->getInit(0)->getType()->getAsRecordDecl()); + for (int I = 0; I < *NumOps; I++) { + const auto* OpInit = llvm::dyn_cast(OpsInit->getInit(I)); + const auto* CmdInit = OpInit->getInit(OpsFields["cmd"])->getEnumConstantDecl(); + if (!CmdInit) + continue; + const std::string& OpName = CmdInit->getNameAsString(); + emitConst(OpName, CmdInit->getInitVal().getExtValue(), CmdInit->getBeginLoc()); + std::string Policy; + if (OpsFields.count("policy") != 0) { + if (const auto* PolicyDecl = OpInit->getInit(OpsFields["policy"])->getAsBuiltinConstantDeclRef(*Context)) + Policy = getUniqueDeclName(PolicyDecl); + } + if (Policy.empty()) + Policy = DefaultPolicy; + std::string Func = getFuncName(OpInit->getInit(OpsFields["doit"])); + if (Func.empty()) + Func = getFuncName(OpInit->getInit(OpsFields["dumpit"])); + int Flags = evaluate(OpInit->getInit(OpsFields["flags"])); + const char* Access = AccessUser; + constexpr int GENL_ADMIN_PERM = 0x01; + constexpr int GENL_UNS_ADMIN_PERM = 0x10; + if (Flags & GENL_ADMIN_PERM) + Access = AccessAdmin; + else if (Flags & GENL_UNS_ADMIN_PERM) + Access = AccessNsAdmin; + Ops.push_back(NetlinkOp{ + .Name = OpName, + .Func = Func, + .Access = Access, + .Policy = Policy, + }); + } + } + Output.emit(NetlinkFamily{ + .Name = FamilyName, + .Ops = std::move(Ops), + }); +} + +std::string Extractor::getUniqueDeclName(const NamedDecl* Decl) { + return Decl->getNameAsString() + "_" + getDeclFileID(Decl); +} + +bool isInterestingCall(const CallExpr* Call) { + auto* CalleeDecl = Call->getDirectCallee(); + // We don't handle indirect calls yet. + if (!CalleeDecl) + return false; + // Builtins are not interesting and won't have a body. + if (CalleeDecl->getBuiltinID() != Builtin::ID::NotBuiltin) + return false; + const std::string& Callee = CalleeDecl->getNameAsString(); + // There are too many of these and they should only be called at runtime in broken builds. + if (Callee.rfind("__compiletime_assert", 0) == 0 || Callee == "____wrong_branch_error" || + Callee == "__bad_size_call_parameter") + return false; + return true; +} + +struct FunctionAnalyzer : RecursiveASTVisitor { + FunctionAnalyzer(Extractor* Extractor, const FunctionDecl* Func) + : Extractor(Extractor), CurrentFunc(Func->getNameAsString()), Context(Extractor->Context), + SourceManager(Extractor->SourceManager) { + // The global function scope. + Scopes.push_back(FunctionScope{.Arg = -1}); + Current = &Scopes[0]; + TraverseStmt(Func->getBody()); + } + + bool VisitBinaryOperator(const BinaryOperator* B) { + if (B->isAssignmentOp()) + noteFact(getTypingEntity(B->getRHS()), getTypingEntity(B->getLHS())); + return true; + } + + bool VisitVarDecl(const VarDecl* D) { + if (D->getStorageDuration() == SD_Automatic) + noteFact(getTypingEntity(D->getInit()), getDeclTypingEntity(D)); + return true; + } + + bool VisitReturnStmt(const ReturnStmt* Ret) { + noteFact(getTypingEntity(Ret->getRetValue()), EntityReturn{.Func = CurrentFunc}); + return true; + } + + bool VisitCallExpr(const CallExpr* Call) { + if (isInterestingCall(Call)) { + const std::string& Callee = Call->getDirectCallee()->getNameAsString(); + Current->Calls.push_back(Callee); + for (unsigned AI = 0; AI < Call->getNumArgs(); AI++) { + noteFact(getTypingEntity(Call->getArg(AI)), EntityArgument{ + .Func = Callee, + .Arg = AI, + }); + } + } + return true; + } + + bool VisitSwitchStmt(const SwitchStmt* S) { + // We are only interested in switches on the function arguments + // with cases that mention defines from uapi headers. + // This covers ioctl/fcntl/prctl/ptrace/etc. + bool IsInteresting = false; + auto Param = getTypingEntity(S->getCond()); + if (Current == &Scopes[0] && Param && Param->Argument) { + for (auto* C = S->getSwitchCaseList(); C; C = C->getNextSwitchCase()) { + auto* Case = dyn_cast(C); + if (!Case) + continue; + auto LMacro = Extractor->isMacroOrEnum(Case->getLHS()); + auto RMacro = Extractor->isMacroOrEnum(Case->getRHS()); + if (LMacro || RMacro) { + IsInteresting = true; + break; + } + } + } + + SwitchStack.push({S, IsInteresting, IsInteresting ? static_cast(Param->Argument->Arg) : -1}); + return true; + } + + bool VisitSwitchCase(const SwitchCase* C) { + if (!SwitchStack.top().IsInteresting) + return true; + // If there are several cases with the same "body", we want to create new scope + // only for the first one: + // case FOO: + // case BAR: + // ... some code ... + if (!C->getNextSwitchCase() || C->getNextSwitchCase()->getSubStmt() != C) { + int Line = SourceManager->getExpansionLineNumber(C->getBeginLoc()); + if (Current != &Scopes[0]) + Current->EndLine = Line; + Scopes.push_back(FunctionScope{ + .Arg = SwitchStack.top().Arg, + .StartLine = Line, + }); + Current = &Scopes.back(); + } + // Otherwise it's a default case, for which we don't add any values. + if (auto* Case = dyn_cast(C)) { + int64_t LVal = Extractor->evaluate(Case->getLHS()); + auto LMacro = Extractor->isMacroOrEnum(Case->getLHS()); + if (LMacro) { + Current->Values.push_back(LMacro->Name); + Extractor->extractIoctl(Case->getLHS(), *LMacro); + } else { + Current->Values.push_back(std::to_string(LVal)); + } + if (Case->caseStmtIsGNURange()) { + // GNU range is: + // case FOO ... BAR: + // Add all values in the range. + int64_t RVal = Extractor->evaluate(Case->getRHS()); + auto RMacro = Extractor->isMacroOrEnum(Case->getRHS()); + for (int64_t V = LVal + 1; V <= RVal - (RMacro ? 1 : 0); V++) + Current->Values.push_back(std::to_string(V)); + if (RMacro) + Current->Values.push_back(RMacro->Name); + } + } + return true; + } + + bool dataTraverseStmtPost(const Stmt* S) { + if (SwitchStack.empty()) + return true; + auto Top = SwitchStack.top(); + if (Top.S != S) + return true; + if (Top.IsInteresting) { + if (Current != &Scopes[0]) + Current->EndLine = SourceManager->getExpansionLineNumber(S->getEndLoc()); + Current = &Scopes[0]; + } + SwitchStack.pop(); + return true; + } + + void noteFact(std::optional&& Src, std::optional&& Dst) { + if (Src && Dst) + Current->Facts.push_back({std::move(*Src), std::move(*Dst)}); + } + + std::optional getTypingEntity(const Expr* E); + std::optional getDeclTypingEntity(const Decl* Decl); + + struct SwitchDesc { + const SwitchStmt* S; + bool IsInteresting; + int Arg; + }; + + Extractor* Extractor; + std::string CurrentFunc; + ASTContext* Context; + SourceManager* SourceManager; + std::vector Scopes; + FunctionScope* Current = nullptr; + std::unordered_map LocalVars; + std::unordered_map LocalSeq; + std::stack SwitchStack; +}; + +void Extractor::matchFunctionDef() { + const auto* Func = getResult("function"); + if (!Func->getBody()) + return; + auto Range = Func->getSourceRange(); + const std::string& SourceFile = + std::filesystem::relative(SourceManager->getFilename(SourceManager->getExpansionLoc(Range.getBegin())).str()); + const int StartLine = SourceManager->getExpansionLineNumber(Range.getBegin()); + const int EndLine = SourceManager->getExpansionLineNumber(Range.getEnd()); + FunctionAnalyzer Analyzer(this, Func); + Output.emit(Function{ + .Name = Func->getNameAsString(), + .File = SourceFile, + .StartLine = StartLine, + .EndLine = EndLine, + .IsStatic = Func->isStatic(), + .Scopes = std::move(Analyzer.Scopes), + }); +} + +std::optional FunctionAnalyzer::getTypingEntity(const Expr* E) { + if (!E) + return {}; + E = removeCasts(E); + if (auto* DeclRef = dyn_cast(E)) { + return getDeclTypingEntity(DeclRef->getDecl()); + } else if (auto* Member = dyn_cast(E)) { + const Type* StructType = + Member->getBase()->getType().IgnoreParens().getUnqualifiedType().getDesugaredType(*Context).getTypePtr(); + if (auto* T = dyn_cast(StructType)) + StructType = T->getPointeeType().IgnoreParens().getUnqualifiedType().getDesugaredType(*Context).getTypePtr(); + auto* StructDecl = dyn_cast(StructType)->getDecl(); + std::string StructName = StructDecl->getNameAsString(); + if (StructName.empty()) { + // The struct may be anonymous, but we need some name. + // Ideally we generate the same name we generate in struct definitions, then it will be possible + // to match them between each other. However, it does not seem to be easy. We can use DeclContext::getParent + // to get declaration of the enclosing struct, but we will also need to figure out the field index + // and handle all corner cases. For now we just use the following quick hack: hash declaration file:line. + // Note: the hash must be stable across different machines (for test golden files), so we take just + // the last part of the file name. + const std::string& SourceFile = + std::filesystem::path( + SourceManager->getFilename(SourceManager->getExpansionLoc(StructDecl->getBeginLoc())).str()) + .filename() + .string(); + int Line = SourceManager->getExpansionLineNumber(StructDecl->getBeginLoc()); + StructName = std::to_string(std::hash()(SourceFile) + std::hash()(Line)); + } + return EntityField{ + .Struct = StructName, + .Field = Member->getMemberDecl()->getNameAsString(), + }; + } else if (auto* Unary = dyn_cast(E)) { + if (Unary->getOpcode() == UnaryOperatorKind::UO_AddrOf) { + if (auto* DeclRef = dyn_cast(removeCasts(Unary->getSubExpr()))) { + if (auto* Var = dyn_cast(DeclRef->getDecl())) { + if (Var->hasGlobalStorage()) { + return EntityGlobalAddr{ + .Name = Extractor->getUniqueDeclName(Var), + }; + } + } + } + } + } else if (auto* Call = dyn_cast(E)) { + if (isInterestingCall(Call)) { + return EntityReturn{ + .Func = Call->getDirectCallee()->getNameAsString(), + }; + } + } + return {}; +} + +std::optional FunctionAnalyzer::getDeclTypingEntity(const Decl* Decl) { + if (auto* Parm = dyn_cast(Decl)) { + return EntityArgument{ + .Func = CurrentFunc, + .Arg = Parm->getFunctionScopeIndex(), + }; + } else if (auto* Var = dyn_cast(Decl)) { + if (Var->hasLocalStorage()) { + std::string VarName = Var->getNameAsString(); + // Theoretically there can be several local vars with the same name. + // Give them unique suffixes if that's the case. + if (LocalVars.count(Var) == 0) + LocalVars[Var] = LocalSeq[VarName]++; + if (int Seq = LocalVars[Var]) + VarName += std::to_string(Seq); + return EntityLocal{ + .Name = VarName, + }; + } + } + return {}; +} + +void Extractor::matchSyscall() { + const auto* Func = getResult("syscall"); + std::vector Args; + for (const auto& Param : Func->parameters()) { + Args.push_back(Field{ + .Name = Param->getNameAsString(), + .Type = genType(Param->getType()), + }); + } + Output.emit(Syscall{ + .Func = Func->getNameAsString(), + .Args = std::move(Args), + }); +} + +void Extractor::matchIouring() { + const auto* IssueDefs = getResult("io_issue_defs"); + const auto& InitConsts = extractDesignatedInitConsts(*IssueDefs); + const auto* InitList = llvm::dyn_cast(IssueDefs->getInit()); + auto Fields = structFieldIndexes(InitList->getInit(0)->getType()->getAsRecordDecl()); + for (const auto& [I, Name] : InitConsts) { + const auto& Init = llvm::dyn_cast(InitList->getInit(I)); + std::string Prep = getFuncName(Init->getInit(Fields["prep"])); + if (Prep == "io_eopnotsupp_prep") + continue; + Output.emit(IouringOp{ + .Name = Name, + .Func = getFuncName(Init->getInit(Fields["issue"])), + }); + } +} + +void Extractor::matchFileOps() { + const auto* Fops = getResult("init"); + if (Fops->getNumInits() == 0 || isa(Fops->getInit(0))) { + // Some code constructs produce init list with DesignatedInitExpr. + // Unclear why, but it won't be handled by the following code, and is not necessary to handle. + return; + } + const auto* Var = getResult("var"); + std::string VarName = getUniqueDeclName(Var); + int NameSeq = FileOpsDedup[VarName]++; + if (NameSeq) + VarName += std::to_string(NameSeq); + auto Fields = structFieldIndexes(Fops->getType()->getAsRecordDecl()); + std::string Open = getFuncName(Fops->getInit(Fields["open"])); + std::string Ioctl = getFuncName(Fops->getInit(Fields["unlocked_ioctl"])); + std::string Read = getFuncName(Fops->getInit(Fields["read"])); + if (Read.empty()) + Read = getFuncName(Fops->getInit(Fields["read_iter"])); + std::string Write = getFuncName(Fops->getInit(Fields["write"])); + if (Write.empty()) + Write = getFuncName(Fops->getInit(Fields["write_iter"])); + std::string Mmap = getFuncName(Fops->getInit(Fields["mmap"])); + if (Mmap.empty()) + Mmap = getFuncName(Fops->getInit(Fields["get_unmapped_area"])); + Output.emit(FileOps{ + .Name = VarName, + .Open = std::move(Open), + .Read = std::move(Read), + .Write = std::move(Write), + .Mmap = std::move(Mmap), + .Ioctl = std::move(Ioctl), + }); +} + +void Extractor::extractIoctl(const Expr* Cmd, const ConstDesc& Const) { + // This is old style ioctl defined directly via a number. + // We can't infer anything about it. + if (Const.Value.find("_IO") != 0) + return; + FieldType Type; + auto Dir = _IOC_DIR(Const.IntValue); + if (Dir == _IOC_NONE) { + Type = IntType{.ByteSize = 1, .IsConst = true}; + } else if (std::optional Arg = getSizeofType(Cmd)) { + Type = PtrType{ + .Elem = genType(*Arg), + .IsConst = Dir == _IOC_READ, + }; + } else { + // It is an ioctl, but we failed to get the arg type. + // Let the Go part figure out a good arg type. + return; + } + Output.emit(Ioctl{ + .Name = Const.Name, + .Type = std::move(Type), + }); +} + +int main(int argc, const char** argv) { + llvm::cl::OptionCategory Options("syz-declextract options"); + auto OptionsParser = tooling::CommonOptionsParser::create(argc, argv, Options); + if (!OptionsParser) { + llvm::errs() << OptionsParser.takeError(); + return 1; + } + Extractor Ex; + tooling::ClangTool Tool(OptionsParser->getCompilations(), OptionsParser->getSourcePathList()); + if (Tool.run(tooling::newFrontendActionFactory(&Ex, &Ex).get())) + return 1; + Ex.print(); + return 0; +} diff --git a/tools/clang/declextract/output.h b/tools/clang/declextract/output.h new file mode 100644 index 000000000..56b207be1 --- /dev/null +++ b/tools/clang/declextract/output.h @@ -0,0 +1,474 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#ifndef SYZ_DECLEXTRACT_OUTPUT_H +#define SYZ_DECLEXTRACT_OUTPUT_H + +#include +#include +#include +#include + +const char* const AccessUnknown = ""; +const char* const AccessUser = "user"; +const char* const AccessNsAdmin = "ns_admin"; +const char* const AccessAdmin = "admin"; + +struct IntType; +struct PtrType; +struct ArrType; +struct BufferType; + +struct FieldType { + std::unique_ptr Int; + std::unique_ptr Ptr; + std::unique_ptr Array; + std::unique_ptr Buffer; + std::unique_ptr Struct; + + FieldType() = default; + FieldType(IntType&& Typ) : Int(std::make_unique(std::move(Typ))) {} + FieldType(PtrType&& Typ) : Ptr(std::make_unique(std::move(Typ))) {} + FieldType(ArrType&& Typ) : Array(std::make_unique(std::move(Typ))) {} + FieldType(BufferType&& Typ) : Buffer(std::make_unique(std::move(Typ))) {} + FieldType(const std::string& Typ) : Struct(std::make_unique(std::move(Typ))) {} +}; + +struct IntType { + int ByteSize = 0; + int64_t MinValue = 0; + int64_t MaxValue = 0; + bool IsConst = false; + std::string Name; + std::string Base; + std::string Enum; +}; + +struct PtrType { + FieldType Elem; + bool IsConst = false; +}; + +struct ArrType { + FieldType Elem; + int MinSize = 0; + int MaxSize = 0; + int Align = 0; + bool IsConstSize = false; +}; + +struct BufferType { + int MinSize = 0; + int MaxSize = 0; + bool IsString = false; + bool IsNonTerminated = false; +}; + +struct ConstInfo { + std::string Name; + std::string Filename; + int64_t Value; +}; + +struct Field { + std::string Name; + bool IsAnonymous = false; + int BitWidth = 0; + int CountedBy = -1; + FieldType Type; +}; + +struct Struct { + std::string Name; + int ByteSize = 0; + int Align = 0; + bool IsUnion = false; + bool IsPacked = false; + int AlignAttr = 0; + std::vector Fields; +}; + +struct Enum { + std::string Name; + std::vector Values; +}; + +struct Ioctl { + std::string Name; + FieldType Type; +}; + +struct FileOps { + std::string Name; + std::string Open; + std::string Read; + std::string Write; + std::string Mmap; + std::string Ioctl; +}; + +struct EntityReturn { + std::string Func; +}; + +struct EntityArgument { + std::string Func; + unsigned Arg; +}; + +struct EntityField { + std::string Struct; + std::string Field; +}; + +struct EntityLocal { + std::string Name; +}; + +struct EntityGlobalAddr { + std::string Name; +}; + +struct EntityResource { + std::string Type; + std::string SubType; +}; + +struct TypingEntity { + std::unique_ptr Return; + std::unique_ptr Argument; + std::unique_ptr Field; + std::unique_ptr Local; + std::unique_ptr GlobalAddr; + std::unique_ptr Resource; + + TypingEntity() = default; + TypingEntity(EntityReturn&& E) : Return(std::make_unique(std::move(E))) {} + TypingEntity(EntityArgument&& E) : Argument(std::make_unique(std::move(E))) {} + TypingEntity(EntityField&& E) : Field(std::make_unique(std::move(E))) {} + TypingEntity(EntityLocal&& E) : Local(std::make_unique(std::move(E))) {} + TypingEntity(EntityGlobalAddr&& E) : GlobalAddr(std::make_unique(std::move(E))) {} + TypingEntity(EntityResource&& E) : Resource(std::make_unique(std::move(E))) {} +}; + +struct TypingFact { + TypingEntity Src; + TypingEntity Dst; +}; + +struct FunctionScope { + int Arg = 0; + int StartLine = 0; + int EndLine = 0; + std::vector Values; + std::vector Calls; + std::vector Facts; +}; + +struct Function { + std::string Name; + std::string File; + int StartLine = 0; + int EndLine = 0; + bool IsStatic = false; + std::vector Scopes; +}; + +struct Syscall { + std::string Func; + std::vector Args; +}; + +struct IouringOp { + std::string Name; + std::string Func; +}; + +struct NetlinkOp { + std::string Name; + std::string Func; + const char* Access = nullptr; + std::string Policy; +}; + +struct NetlinkFamily { + std::string Name; + std::vector Ops; +}; + +struct NetlinkAttr { + std::string Name; + std::string Kind; + int MaxSize = 0; + std::string NestedPolicy; + std::unique_ptr Elem; +}; + +struct NetlinkPolicy { + std::string Name; + std::vector Attrs; +}; + +inline void print(JSONPrinter& Printer, const ConstInfo& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("filename", V.Filename); + Printer.Field("value", V.Value, true); +} + +inline void print(JSONPrinter& Printer, const Field& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("is_anonymous", V.IsAnonymous); + Printer.Field("bit_width", V.BitWidth); + Printer.Field("counted_by", V.CountedBy); + Printer.Field("type", V.Type, true); +} + +inline void print(JSONPrinter& Printer, const Struct& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("align", V.Align); + Printer.Field("byte_size", V.ByteSize); + Printer.Field("is_union", V.IsUnion); + Printer.Field("is_packed", V.IsPacked); + Printer.Field("align_attr", V.AlignAttr); + Printer.Field("fields", V.Fields, true); +} + +inline void print(JSONPrinter& Printer, const Enum& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("values", V.Values, true); +} + +inline void print(JSONPrinter& Printer, const FieldType& V) { + JSONPrinter::Scope Scope(Printer); + if (V.Int) + Printer.Field("int", *V.Int, true); + else if (V.Ptr) + Printer.Field("ptr", *V.Ptr, true); + else if (V.Array) + Printer.Field("array", *V.Array, true); + else if (V.Buffer) + Printer.Field("buffer", *V.Buffer, true); + else + Printer.Field("struct", *V.Struct, true); +} + +inline void print(JSONPrinter& Printer, const IntType& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("byte_size", V.ByteSize); + Printer.Field("min_value", V.MinValue); + Printer.Field("max_value", V.MaxValue); + Printer.Field("is_const", V.IsConst); + Printer.Field("name", V.Name); + Printer.Field("base", V.Base); + Printer.Field("enum", V.Enum, true); +} + +inline void print(JSONPrinter& Printer, const PtrType& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("elem", V.Elem); + Printer.Field("is_const", V.IsConst, true); +} + +inline void print(JSONPrinter& Printer, const ArrType& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("elem", V.Elem); + Printer.Field("min_size", V.MinSize); + Printer.Field("max_size", V.MaxSize); + Printer.Field("align", V.Align); + Printer.Field("is_const_size", V.IsConstSize, true); +} + +inline void print(JSONPrinter& Printer, const BufferType& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("min_size", V.MinSize); + Printer.Field("max_size", V.MaxSize); + Printer.Field("is_string", V.IsString); + Printer.Field("is_non_terminated", V.IsNonTerminated, true); +} + +inline void print(JSONPrinter& Printer, const Ioctl& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("type", V.Type, true); +} + +inline void print(JSONPrinter& Printer, const FileOps& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("open", V.Open); + Printer.Field("read", V.Read); + Printer.Field("write", V.Write); + Printer.Field("mmap", V.Mmap); + Printer.Field("ioctl", V.Ioctl, true); +} + +inline void print(JSONPrinter& Printer, const EntityReturn& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("func", V.Func, true); +} + +inline void print(JSONPrinter& Printer, const EntityArgument& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("func", V.Func); + Printer.Field("arg", V.Arg, true); +} + +inline void print(JSONPrinter& Printer, const EntityField& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("struct", V.Struct); + Printer.Field("field", V.Field, true); +} + +inline void print(JSONPrinter& Printer, const EntityLocal& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name, true); +} + +inline void print(JSONPrinter& Printer, const EntityGlobalAddr& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name, true); +} + +inline void print(JSONPrinter& Printer, const EntityResource& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("type", V.Type); + Printer.Field("subtype", V.SubType, true); +} + +inline void print(JSONPrinter& Printer, const TypingEntity& V) { + JSONPrinter::Scope Scope(Printer); + if (V.Return) + Printer.Field("return", *V.Return, true); + else if (V.Argument) + Printer.Field("argument", *V.Argument, true); + else if (V.Field) + Printer.Field("field", *V.Field, true); + else if (V.Local) + Printer.Field("local", *V.Local, true); + else if (V.GlobalAddr) + Printer.Field("global_addr", *V.GlobalAddr, true); + else + Printer.Field("resource", *V.Resource, true); +} + +inline void print(JSONPrinter& Printer, const TypingFact& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("src", V.Src); + Printer.Field("dst", V.Dst, true); +} + +inline void print(JSONPrinter& Printer, const FunctionScope& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("arg", V.Arg); + Printer.Field("values", V.Values); + Printer.Field("start_line", V.StartLine); + Printer.Field("end_line", V.EndLine); + Printer.Field("calls", V.Calls); + Printer.Field("facts", V.Facts, true); +} + +inline void print(JSONPrinter& Printer, const Function& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("file", V.File); + Printer.Field("start_line", V.StartLine); + Printer.Field("end_line", V.EndLine); + Printer.Field("is_static", V.IsStatic); + Printer.Field("scopes", V.Scopes, true); +} + +inline void print(JSONPrinter& Printer, const Syscall& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("func", V.Func); + Printer.Field("args", V.Args, true); +} + +inline void print(JSONPrinter& Printer, const IouringOp& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("func", V.Func, true); +} + +inline void print(JSONPrinter& Printer, const NetlinkOp& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("func", V.Func); + Printer.Field("access", V.Access); + Printer.Field("policy", V.Policy, true); +} + +inline void print(JSONPrinter& Printer, const NetlinkFamily& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("ops", V.Ops, true); +} + +inline void print(JSONPrinter& Printer, const NetlinkAttr& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("kind", V.Kind); + Printer.Field("max_size", V.MaxSize); + Printer.Field("nested_policy", V.NestedPolicy); + Printer.Field("elem", V.Elem, true); +} + +inline void print(JSONPrinter& Printer, const NetlinkPolicy& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("attrs", V.Attrs, true); +} + +// This type is used when we can't figure out the right type, but need some type to use. +inline FieldType TodoType() { + return IntType{ + .ByteSize = 1, + .Name = "TODO", + .Base = "long", + }; +} + +class Output { +public: + void emit(Function&& V) { Functions.push_back(std::move(V)); } + void emit(ConstInfo&& V) { Consts.push_back(std::move(V)); } + void emit(Struct&& V) { Structs.push_back(std::move(V)); } + void emit(Enum&& V) { Enums.push_back(std::move(V)); } + void emit(Syscall&& V) { Syscalls.push_back(std::move(V)); } + void emit(FileOps&& V) { FileOps.push_back(std::move(V)); } + void emit(Ioctl&& V) { Ioctls.push_back(std::move(V)); } + void emit(IouringOp&& V) { IouringOps.push_back(std::move(V)); } + void emit(NetlinkFamily&& V) { NetlinkFamilies.push_back(std::move(V)); } + void emit(NetlinkPolicy&& V) { NetlinkPolicies.push_back(std::move(V)); } + + void print() const { + JSONPrinter Printer; + Printer.Field("functions", Functions); + Printer.Field("consts", Consts); + Printer.Field("enums", Enums); + Printer.Field("structs", Structs); + Printer.Field("syscalls", Syscalls); + Printer.Field("file_ops", FileOps); + Printer.Field("ioctls", Ioctls); + Printer.Field("iouring_ops", IouringOps); + Printer.Field("netlink_families", NetlinkFamilies); + Printer.Field("netlink_policies", NetlinkPolicies, true); + } + +private: + std::vector Functions; + std::vector Consts; + std::vector Enums; + std::vector Structs; + std::vector Syscalls; + std::vector FileOps; + std::vector Ioctls; + std::vector IouringOps; + std::vector NetlinkFamilies; + std::vector NetlinkPolicies; +}; + +#endif diff --git a/tools/clang/json.h b/tools/clang/json.h new file mode 100644 index 000000000..873313a09 --- /dev/null +++ b/tools/clang/json.h @@ -0,0 +1,79 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#ifndef SYZ_DECLEXTRACT_JSON_H +#define SYZ_DECLEXTRACT_JSON_H + +#include +#include +#include +#include +#include + +class JSONPrinter { +public: + JSONPrinter() : Top(*this) {} + + template void Field(const char* Name, const T& V, bool Last = false) { + printf("%s\"%s\": ", Indent(), Name); + print(*this, V); + printf("%s\n", Last ? "" : ","); + } + + const char* Indent() const { + static std::string Indents; + while (Indents.size() < Nesting) + Indents.push_back('\t'); + return Indents.c_str() + Indents.size() - Nesting; + } + + class Scope { + public: + Scope(JSONPrinter& Printer, bool Array = false) : Printer(Printer), Array(Array) { + printf("%c\n", "{["[Array]); + Printer.Nesting++; + assert(Printer.Nesting < 1000); + } + + ~Scope() { + assert(Printer.Nesting > 0); + Printer.Nesting--; + printf("%s%c", Printer.Indent(), "}]"[Array]); + } + + private: + JSONPrinter& Printer; + const bool Array; + }; + +private: + friend class Scope; + size_t Nesting = 0; + Scope Top; +}; + +inline void print(JSONPrinter& Printer, int V) { printf("%d", V); } +inline void print(JSONPrinter& Printer, unsigned V) { printf("%u", V); } +inline void print(JSONPrinter& Printer, int64_t V) { printf("%ld", V); } +inline void print(JSONPrinter& Printer, bool V) { printf("%s", V ? "true" : "false"); } +inline void print(JSONPrinter& Printer, const char* V) { printf("\"%s\"", V ? V : ""); } +inline void print(JSONPrinter& Printer, const std::string& V) { print(Printer, V.c_str()); } + +template void print(JSONPrinter& Printer, const std::unique_ptr& V) { + if (!V) + printf("null"); + else + print(Printer, *V); +} + +template void print(JSONPrinter& Printer, const std::vector& V) { + JSONPrinter::Scope Scope(Printer, true); + size_t i = 0; + for (const auto& Elem : V) { + printf("%s", Printer.Indent()); + print(Printer, Elem); + printf("%s\n", ++i == V.size() ? "" : ","); + } +} + +#endif diff --git a/tools/syz-declextract/README.md b/tools/syz-declextract/README.md index cab9b5e8a..99fb8d23a 100644 --- a/tools/syz-declextract/README.md +++ b/tools/syz-declextract/README.md @@ -29,7 +29,7 @@ target_link_libraries(syz-declextract PRIVATE clangTooling) ``` mkdir $LLVM/clang/syz-declextract ``` -Copy `tools/syz-declextract/clangtool/*.{cpp,h}` files to `$LLVM/clang/syz-declextract/` directory. +Copy `tools/clang/declextract/*.{cpp,h}` and `tools/clang/*.h` files to `$LLVM/clang/syz-declextract/` directory. ``` LLVM_BUILD=$PWD/syz mkdir $LLVM_BUILD && cd $LLVM_BUILD diff --git a/tools/syz-declextract/clangtool/.clang-format b/tools/syz-declextract/clangtool/.clang-format deleted file mode 100644 index 15868add4..000000000 --- a/tools/syz-declextract/clangtool/.clang-format +++ /dev/null @@ -1,5 +0,0 @@ -BasedOnStyle: LLVM -ColumnLimit: 120 -DerivePointerAlignment: false -PointerAlignment: Left -CommentPragmas: '^[^ ]' diff --git a/tools/syz-declextract/clangtool/declextract.cpp b/tools/syz-declextract/clangtool/declextract.cpp deleted file mode 100644 index d7230a578..000000000 --- a/tools/syz-declextract/clangtool/declextract.cpp +++ /dev/null @@ -1,1009 +0,0 @@ -// Copyright 2024 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -#include "json.h" -#include "output.h" - -#include "clang/AST/APValue.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/Attr.h" -#include "clang/AST/Attrs.inc" -#include "clang/AST/Decl.h" -#include "clang/AST/DeclarationName.h" -#include "clang/AST/Expr.h" -#include "clang/AST/PrettyPrinter.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/AST/Stmt.h" -#include "clang/AST/Type.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Basic/CharInfo.h" -#include "clang/Basic/LLVM.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Basic/TypeTraits.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Tooling.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -using namespace clang; -using namespace clang::ast_matchers; - -// MacroDef/MacroMap hold information about macros defined in the file. -struct MacroDef { - std::string Value; // value as written in the source - SourceRange SourceRange; // soruce range of the value -}; -using MacroMap = std::unordered_map; - -// ConstDesc describes a macro or an enum value. -struct ConstDesc { - std::string Name; - std::string Value; - SourceRange SourceRange; - int64_t IntValue; -}; - -class Extractor : public MatchFinder, public tooling::SourceFileCallbacks { -public: - Extractor() { - match(&Extractor::matchFunctionDef, functionDecl(isDefinition()).bind("function")); - - match(&Extractor::matchSyscall, - functionDecl(isExpandedFromMacro("SYSCALL_DEFINEx"), matchesName("__do_sys_.*")).bind("syscall")); - - match(&Extractor::matchIouring, - translationUnitDecl(forEachDescendant( - varDecl(hasType(constantArrayType(hasElementType(hasDeclaration(recordDecl(hasName("io_issue_def")))))), - isDefinition()) - .bind("io_issue_defs")))); - - match(&Extractor::matchNetlinkPolicy, - translationUnitDecl(forEachDescendant( - varDecl(hasType(constantArrayType(hasElementType(hasDeclaration(recordDecl(hasName("nla_policy")))))), - isDefinition()) - .bind("netlink_policy")))); - - match(&Extractor::matchNetlinkFamily, varDecl(hasType(recordDecl(hasName("genl_family")).bind("genl_family")), - has(initListExpr().bind("genl_family_init")))); - - match(&Extractor::matchFileOps, - varDecl(forEachDescendant(initListExpr(hasType(recordDecl(hasName("file_operations")))).bind("init"))) - .bind("var")); - } - - void print() const { Output.print(); } - -private: - friend struct FunctionAnalyzer; - using MatchFunc = void (Extractor::*)(); - // Thunk that redirects MatchCallback::run method to one of the methods of the Extractor class. - struct MatchCallbackThunk : MatchFinder::MatchCallback { - Extractor& Ex; - MatchFunc Action; - MatchCallbackThunk(Extractor& Ex, MatchFunc Action) : Ex(Ex), Action(Action) {} - void run(const MatchFinder::MatchResult& Result) override { Ex.run(Result, Action); } - }; - std::vector> Matchers; - - // These set to point to the Result of the current match (to avoid passing them through all methods). - const BoundNodes* Nodes = nullptr; - ASTContext* Context = nullptr; - SourceManager* SourceManager = nullptr; - - Output Output; - MacroMap Macros; - std::unordered_map EnumDedup; - std::unordered_map StructDedup; - std::unordered_map FileOpsDedup; - - void matchFunctionDef(); - void matchSyscall(); - void matchIouring(); - void matchNetlinkPolicy(); - void matchNetlinkFamily(); - void matchFileOps(); - bool handleBeginSource(CompilerInstance& CI) override; - template void match(MatchFunc Action, const M& Matcher); - void run(const MatchFinder::MatchResult& Result, MatchFunc Action); - template const T* getResult(StringRef ID) const; - FieldType extractRecord(QualType QT, const RecordType* Typ, const std::string& BackupName); - std::string extractEnum(QualType QT, const EnumDecl* Decl); - void emitConst(const std::string& Name, int64_t Val, SourceLocation Loc); - std::string getFuncName(const Expr* Expr); - std::string getDeclName(const Expr* Expr); - const ValueDecl* getValueDecl(const Expr* Expr); - std::string getDeclFileID(const Decl* Decl); - std::string getUniqueDeclName(const NamedDecl* Decl); - std::vector> extractDesignatedInitConsts(const VarDecl& ArrayDecl); - FieldType genType(QualType Typ, const std::string& BackupName = ""); - std::unordered_map structFieldIndexes(const RecordDecl* Decl); - template T evaluate(const Expr* E); - template - std::vector findAllMatches(const Node* Expr, const Condition& Cond); - template - const T* findFirstMatch(const Node* Expr, const Condition& Cond); - std::optional getSizeofType(const Expr* E); - int sizeofType(const Type* T); - int alignofType(const Type* T); - void extractIoctl(const Expr* Cmd, const ConstDesc& Const); - std::optional isMacroOrEnum(const Expr* E); - ConstDesc constDesc(const Expr* E, const std::string& Str, const std::string& Value, const SourceRange& SourceRange); -}; - -// PPCallbacksTracker records all macro definitions (name/value/source location). -class PPCallbacksTracker : public PPCallbacks { -public: - PPCallbacksTracker(Preprocessor& PP, MacroMap& Macros) : SM(PP.getSourceManager()), Macros(Macros) {} - -private: - SourceManager& SM; - MacroMap& Macros; - - void MacroDefined(const Token& MacroName, const MacroDirective* MD) override { - const char* NameBegin = SM.getCharacterData(MacroName.getLocation()); - const char* NameEnd = SM.getCharacterData(MacroName.getEndLoc()); - std::string Name(NameBegin, NameEnd - NameBegin); - const char* ValBegin = SM.getCharacterData(MD->getMacroInfo()->getDefinitionLoc()); - const char* ValEnd = SM.getCharacterData(MD->getMacroInfo()->getDefinitionEndLoc()) + 1; - // Definition includes the macro name, remove it. - ValBegin += std::min(Name.size(), ValEnd - ValBegin); - // Trim whitespace from both ends. - while (ValBegin < ValEnd && isspace(*ValBegin)) - ValBegin++; - while (ValBegin < ValEnd && isspace(*(ValEnd - 1))) - ValEnd--; - std::string Value(ValBegin, ValEnd - ValBegin); - Macros[Name] = MacroDef{ - .Value = Value, - .SourceRange = SourceRange(MD->getMacroInfo()->getDefinitionLoc(), MD->getMacroInfo()->getDefinitionEndLoc()), - }; - } -}; - -const Expr* removeCasts(const Expr* E) { - for (;;) { - if (auto* P = dyn_cast(E)) - E = P->getSubExpr(); - else if (auto* C = dyn_cast(E)) - E = C->getSubExpr(); - else - break; - } - return E; -} - -bool Extractor::handleBeginSource(CompilerInstance& CI) { - Preprocessor& PP = CI.getPreprocessor(); - PP.addPPCallbacks(std::make_unique(PP, Macros)); - return true; -} - -template void Extractor::match(MatchFunc Action, const M& Matcher) { - Matchers.emplace_back(new MatchCallbackThunk(*this, Action)); - addMatcher(Matcher, Matchers.back().get()); -} - -void Extractor::run(const MatchFinder::MatchResult& Result, MatchFunc Action) { - Nodes = &Result.Nodes; - Context = Result.Context; - SourceManager = Result.SourceManager; - (this->*Action)(); -} - -template const T* Extractor::getResult(StringRef ID) const { return Nodes->getNodeAs(ID); } - -std::string TypeName(QualType QT) { - std::string Name = QT.getAsString(); - auto Attr = Name.find(" __attribute__"); - if (Attr != std::string::npos) - Name = Name.substr(0, Attr); - return Name; -} - -// Top function that converts any clang type QT to our output type. -FieldType Extractor::genType(QualType QT, const std::string& BackupName) { - const Type* T = QT.IgnoreParens().getUnqualifiedType().getDesugaredType(*Context).getTypePtr(); - if (llvm::isa(T)) { - return IntType{.ByteSize = sizeofType(T), .Name = TypeName(QT), .Base = QualType(T, 0).getAsString()}; - } - if (auto* Typ = llvm::dyn_cast(T)) { - return IntType{.ByteSize = sizeofType(T), .Enum = extractEnum(QT, Typ->getDecl())}; - } - if (llvm::isa(T)) { - return PtrType{.Elem = TodoType(), .IsConst = true}; - } - if (auto* Typ = llvm::dyn_cast(T)) { - return ArrType{.Elem = genType(Typ->getElementType(), BackupName)}; - } - if (auto* Typ = llvm::dyn_cast(T)) { - return extractRecord(QT, Typ, BackupName); - } - if (auto* Typ = llvm::dyn_cast(T)) { - // TODO: the size may be a macro that is different for each arch, e.g.: - // long foo[FOOSIZE/sizeof(long)]; - int Size = Typ->getSize().getZExtValue(); - return ArrType{ - .Elem = genType(Typ->getElementType(), BackupName), - .MinSize = Size, - .MaxSize = Size, - .Align = alignofType(Typ), - .IsConstSize = true, - }; - } - if (auto* Typ = llvm::dyn_cast(T)) { - FieldType Elem; - const QualType& Pointee = Typ->getPointeeType(); - if (Pointee->isAnyCharacterType()) - Elem = BufferType{.IsString = true}; - else if (Pointee->isVoidType()) - Elem = ArrType{.Elem = TodoType()}; - else - Elem = genType(Pointee, BackupName); // note: it may be an array as well - return PtrType{ - .Elem = std::move(Elem), - .IsConst = Pointee.isConstQualified(), - }; - } - QT.dump(); - llvm::report_fatal_error("unhandled type"); -} - -FieldType Extractor::extractRecord(QualType QT, const RecordType* Typ, const std::string& BackupName) { - auto* Decl = Typ->getDecl()->getDefinition(); - if (!Decl) - return TodoType(); // definition is in a different TU - std::string Name = Decl->getDeclName().getAsString(); - // If it's a typedef of anon struct, we want to use the typedef name: - // typedef struct {...} foo_t; - if (Name.empty() && QT->isTypedefNameType()) - Name = QualType(Typ, 0).getAsString(); - // If no other names, fallback to the parent-struct-based name. - if (Name.empty()) { - assert(!BackupName.empty()); - // The BackupName is supposed to be unique. - assert(!StructDedup[BackupName]); - Name = BackupName; - } - if (Name.find("struct ") == 0) - Name = Name.substr(strlen("struct ")); - if (StructDedup[Name]) - return Name; - StructDedup[Name] = true; - std::vector Fields; - for (const FieldDecl* F : Decl->fields()) { - std::string FieldName = F->getNameAsString(); - std::string BackupFieldName = Name + "_" + FieldName; - bool IsAnonymous = false; - if (FieldName.empty()) { - BackupFieldName = Name + "_" + std::to_string(F->getFieldIndex()); - FieldName = BackupFieldName; - IsAnonymous = true; - } - FieldType FieldType = genType(F->getType(), BackupFieldName); - int BitWidth = F->isBitField() ? F->getBitWidthValue() : 0; - int CountedBy = F->getType()->isCountAttributedType() - ? llvm::dyn_cast( - F->getType()->getAs()->getCountExpr()->getReferencedDeclOfCallee()) - ->getFieldIndex() - : -1; - Fields.push_back(Field{ - .Name = FieldName, - .IsAnonymous = IsAnonymous, - .BitWidth = BitWidth, - .CountedBy = CountedBy, - .Type = std::move(FieldType), - }); - } - int AlignAttr = 0; - bool Packed = false; - if (Decl->isStruct() && Decl->hasAttrs()) { - for (const auto& A : Decl->getAttrs()) { - if (auto* Attr = llvm::dyn_cast(A)) - AlignAttr = Attr->getAlignment(*Context) / 8; - else if (llvm::isa(A)) - Packed = true; - } - } - Output.emit(Struct{ - .Name = Name, - .ByteSize = sizeofType(Typ), - .Align = alignofType(Typ), - .IsUnion = Decl->isUnion(), - .IsPacked = Packed, - .AlignAttr = AlignAttr, - .Fields = std::move(Fields), - }); - return Name; -} - -std::string Extractor::extractEnum(QualType QT, const EnumDecl* Decl) { - std::string Name = Decl->getNameAsString(); - if (Name.empty()) { - // This is an unnamed enum declared with a typedef: - // typedef enum {...} enum_name; - auto Typedef = dyn_cast(QT.getTypePtr()); - if (Typedef) - Name = Typedef->getDecl()->getNameAsString(); - if (Name.empty()) { - QT.dump(); - llvm::report_fatal_error("enum with empty name"); - } - } - if (EnumDedup[Name]) - return Name; - EnumDedup[Name] = true; - std::vector Values; - for (const auto* Enumerator : Decl->enumerators()) { - const std::string& Name = Enumerator->getNameAsString(); - emitConst(Name, Enumerator->getInitVal().getExtValue(), Decl->getBeginLoc()); - Values.push_back(Name); - } - Output.emit(Enum{ - .Name = Name, - .Values = Values, - }); - return Name; -} - -void Extractor::emitConst(const std::string& Name, int64_t Val, SourceLocation Loc) { - Output.emit(ConstInfo{ - .Name = Name, - .Filename = std::filesystem::relative(SourceManager->getFilename(Loc).str()), - .Value = Val, - }); -} - -// Returns base part of the source file containing the canonical declaration. -// If the passed declaration is also a definition, then it will look for a preceeding declaration. -// This is used to generate unique names for static definitions that may have duplicate names -// across different TUs. We assume that the base part of the source file is enough -// to make them unique. -std::string Extractor::getDeclFileID(const Decl* Decl) { - std::string file = - std::filesystem::path(SourceManager->getFilename(Decl->getCanonicalDecl()->getSourceRange().getBegin()).str()) - .filename() - .stem() - .string(); - std::replace(file.begin(), file.end(), '-', '_'); - return file; -} - -std::optional Extractor::isMacroOrEnum(const Expr* E) { - if (!E) - return {}; - if (auto* Enum = removeCasts(E)->getEnumConstantDecl()) - return constDesc(E, Enum->getNameAsString(), "", Enum->getSourceRange()); - auto Range = Lexer::getAsCharRange(E->getSourceRange(), *SourceManager, Context->getLangOpts()); - const std::string& Str = Lexer::getSourceText(Range, *SourceManager, Context->getLangOpts()).str(); - auto MacroDef = Macros.find(Str); - if (MacroDef == Macros.end()) - return {}; - return constDesc(E, Str, MacroDef->second.Value, MacroDef->second.SourceRange); -} - -ConstDesc Extractor::constDesc(const Expr* E, const std::string& Str, const std::string& Value, - const SourceRange& SourceRange) { - int64_t Val = evaluate(E); - emitConst(Str, Val, SourceRange.getBegin()); - return ConstDesc{ - .Name = Str, - .Value = Value, - .SourceRange = SourceRange, - .IntValue = Val, - }; -} - -template void matchHelper(MatchFinder& Finder, ASTContext* Context, const Node* Expr) { - Finder.match(*Expr, *Context); -} - -void matchHelper(MatchFinder& Finder, ASTContext* Context, const ASTContext* Expr) { - assert(Context == Expr); - Finder.matchAST(*Context); -} - -// Returns all matches of Cond named "res" in Expr and returns them casted to T. -// Expr can point to Context for a global match. -template -std::vector Extractor::findAllMatches(const Node* Expr, const Condition& Cond) { - if (!Expr) - return {}; - struct Matcher : MatchFinder::MatchCallback { - std::vector Matches; - void run(const MatchFinder::MatchResult& Result) override { - if (const T* M = Result.Nodes.getNodeAs("res")) - Matches.push_back(M); - } - }; - MatchFinder Finder; - Matcher Matcher; - Finder.addMatcher(Cond, &Matcher); - matchHelper(Finder, Context, Expr); - return std::move(Matcher.Matches); -} - -// Returns the first match of Cond named "res" in Expr and returns it casted to T. -// If no match is found, returns nullptr. -template -const T* Extractor::findFirstMatch(const Node* Expr, const Condition& Cond) { - const auto& Matches = findAllMatches(Expr, Cond); - return Matches.empty() ? nullptr : Matches[0]; -} - -// Extracts the first function reference from the expression. -// TODO: try to extract the actual function reference the expression will be evaluated to -// (the first one is not necessarily the right one). -std::string Extractor::getFuncName(const Expr* Expr) { - auto* Decl = - findFirstMatch(Expr, stmt(forEachDescendant(declRefExpr(hasType(functionType())).bind("res")))); - return Decl ? Decl->getDecl()->getNameAsString() : ""; -} - -// If expression refers to some identifier, returns the identifier name. -// Otherwise returns an empty string. -// For example, if the expression is `function_name`, returns "function_name" string. -std::string Extractor::getDeclName(const Expr* Expr) { - // The expression can be complex and include casts and e.g. InitListExpr, - // to remove all of these we match the first/any DeclRefExpr. - auto* Decl = getValueDecl(Expr); - return Decl ? Decl->getNameAsString() : ""; -} - -// Returns the first ValueDecl in the expression. -const ValueDecl* Extractor::getValueDecl(const Expr* Expr) { - // The expression can be complex and include casts and e.g. InitListExpr, - // to remove all of these we match the first/any DeclRefExpr. - auto* Decl = findFirstMatch(Expr, stmt(forEachDescendant(declRefExpr().bind("res")))); - return Decl ? Decl->getDecl() : nullptr; -} - -// Recursively finds first sizeof in the expression and return the type passed to sizeof. -std::optional Extractor::getSizeofType(const Expr* E) { - auto* Res = findFirstMatch( - E, stmt(forEachDescendant(unaryExprOrTypeTraitExpr(ofKind(UETT_SizeOf)).bind("res")))); - if (!Res) - return {}; - if (Res->isArgumentType()) - return Res->getArgumentType(); - return Res->getArgumentExpr()->getType(); -} - -// Returns map of field name -> field index. -std::unordered_map Extractor::structFieldIndexes(const RecordDecl* Decl) { - // TODO: this is wrong for structs that contain unions and anonymous sub-structs (e.g. genl_split_ops). - // To handle these we would need to look at InitListExpr::getInitializedFieldInUnion, and recurse - // into anonymous structs. - std::unordered_map Indexes; - for (const auto& F : Decl->fields()) - Indexes[F->getNameAsString()] = F->getFieldIndex(); - return Indexes; -} - -// Extracts enum info from array variable designated initialization. -// For example, for the following code: -// -// enum Foo { -// FooA = 11, -// FooB = 42, -// }; -// -// struct Bar bars[] = { -// [FooA] = {...}, -// [FooB] = {...}, -// }; -// -// it returns the following vector: {{11, "FooA"}, {42, "FooB"}}. -std::vector> Extractor::extractDesignatedInitConsts(const VarDecl& ArrayDecl) { - const auto& Matches = findAllMatches( - &ArrayDecl, - decl(forEachDescendant(designatedInitExpr(optionally(has(constantExpr(has(declRefExpr())).bind("res"))))))); - std::vector> Inits; - for (auto* Match : Matches) { - const int64_t Val = *Match->getAPValueResult().getInt().getRawData(); - const auto& Name = Match->getEnumConstantDecl()->getNameAsString(); - const auto& Loc = Match->getEnumConstantDecl()->getBeginLoc(); - emitConst(Name, Val, Loc); - Inits.emplace_back(Val, Name); - } - return Inits; -} - -int Extractor::sizeofType(const Type* T) { return static_cast(Context->getTypeInfo(T).Width) / 8; } -int Extractor::alignofType(const Type* T) { return static_cast(Context->getTypeInfo(T).Align) / 8; } - -template T Extractor::evaluate(const Expr* E) { - Expr::EvalResult Res; - E->EvaluateAsConstantExpr(Res, *Context); - // TODO: it's unclear what to do if it's not Int (in some cases we see None here). - if (Res.Val.getKind() != APValue::Int) - return 0; - auto val = Res.Val.getInt(); - if (val.isSigned()) - return val.sextOrTrunc(64).getSExtValue(); - return val.zextOrTrunc(64).getZExtValue(); -} - -void Extractor::matchNetlinkPolicy() { - const auto* PolicyArray = getResult("netlink_policy"); - const auto* Init = llvm::dyn_cast_if_present(PolicyArray->getInit()); - if (!Init) - return; - const auto& InitConsts = extractDesignatedInitConsts(*PolicyArray); - auto Fields = structFieldIndexes(Init->getInit(0)->getType()->getAsRecordDecl()); - std::vector Attrs; - for (const auto& [I, Name] : InitConsts) { - const auto* AttrInit = llvm::dyn_cast(Init->getInit(I)); - const std::string& AttrKind = getDeclName(AttrInit->getInit(Fields["type"])); - if (AttrKind == "NLA_REJECT") - continue; - auto* LenExpr = AttrInit->getInit(Fields["len"]); - int MaxSize = 0; - std::string NestedPolicy; - std::unique_ptr Elem; - if (AttrKind == "NLA_NESTED" || AttrKind == "NLA_NESTED_ARRAY") { - if (const auto* NestedDecl = getValueDecl(AttrInit->getInit(2))) - NestedPolicy = getUniqueDeclName(NestedDecl); - } else { - MaxSize = evaluate(LenExpr); - if (auto SizeofType = getSizeofType(LenExpr)) - Elem = std::make_unique(genType(*SizeofType)); - } - Attrs.push_back(NetlinkAttr{ - .Name = Name, - .Kind = AttrKind, - .MaxSize = MaxSize, - .NestedPolicy = NestedPolicy, - .Elem = std::move(Elem), - }); - } - Output.emit(NetlinkPolicy{ - .Name = getUniqueDeclName(PolicyArray), - .Attrs = std::move(Attrs), - }); -} - -void Extractor::matchNetlinkFamily() { - const auto* FamilyInit = getResult("genl_family_init"); - auto Fields = structFieldIndexes(getResult("genl_family")); - const std::string& FamilyName = llvm::dyn_cast(FamilyInit->getInit(Fields["name"]))->getString().str(); - std::string DefaultPolicy; - if (const auto* PolicyDecl = FamilyInit->getInit(Fields["policy"])->getAsBuiltinConstantDeclRef(*Context)) - DefaultPolicy = getUniqueDeclName(PolicyDecl); - std::vector Ops; - for (const auto& OpsName : {"ops", "small_ops", "split_ops"}) { - const auto* OpsDecl = - llvm::dyn_cast_if_present(FamilyInit->getInit(Fields[OpsName])->getAsBuiltinConstantDeclRef(*Context)); - const auto NumOps = FamilyInit->getInit(Fields[std::string("n_") + OpsName])->getIntegerConstantExpr(*Context); - // The ops variable may be defined in another TU. - // TODO: extract variables from another TUs. - if (!OpsDecl || !OpsDecl->getInit() || !NumOps) - continue; - const auto* OpsInit = llvm::dyn_cast(OpsDecl->getInit()); - auto OpsFields = structFieldIndexes(OpsInit->getInit(0)->getType()->getAsRecordDecl()); - for (int I = 0; I < *NumOps; I++) { - const auto* OpInit = llvm::dyn_cast(OpsInit->getInit(I)); - const auto* CmdInit = OpInit->getInit(OpsFields["cmd"])->getEnumConstantDecl(); - if (!CmdInit) - continue; - const std::string& OpName = CmdInit->getNameAsString(); - emitConst(OpName, CmdInit->getInitVal().getExtValue(), CmdInit->getBeginLoc()); - std::string Policy; - if (OpsFields.count("policy") != 0) { - if (const auto* PolicyDecl = OpInit->getInit(OpsFields["policy"])->getAsBuiltinConstantDeclRef(*Context)) - Policy = getUniqueDeclName(PolicyDecl); - } - if (Policy.empty()) - Policy = DefaultPolicy; - std::string Func = getFuncName(OpInit->getInit(OpsFields["doit"])); - if (Func.empty()) - Func = getFuncName(OpInit->getInit(OpsFields["dumpit"])); - int Flags = evaluate(OpInit->getInit(OpsFields["flags"])); - const char* Access = AccessUser; - constexpr int GENL_ADMIN_PERM = 0x01; - constexpr int GENL_UNS_ADMIN_PERM = 0x10; - if (Flags & GENL_ADMIN_PERM) - Access = AccessAdmin; - else if (Flags & GENL_UNS_ADMIN_PERM) - Access = AccessNsAdmin; - Ops.push_back(NetlinkOp{ - .Name = OpName, - .Func = Func, - .Access = Access, - .Policy = Policy, - }); - } - } - Output.emit(NetlinkFamily{ - .Name = FamilyName, - .Ops = std::move(Ops), - }); -} - -std::string Extractor::getUniqueDeclName(const NamedDecl* Decl) { - return Decl->getNameAsString() + "_" + getDeclFileID(Decl); -} - -bool isInterestingCall(const CallExpr* Call) { - auto* CalleeDecl = Call->getDirectCallee(); - // We don't handle indirect calls yet. - if (!CalleeDecl) - return false; - // Builtins are not interesting and won't have a body. - if (CalleeDecl->getBuiltinID() != Builtin::ID::NotBuiltin) - return false; - const std::string& Callee = CalleeDecl->getNameAsString(); - // There are too many of these and they should only be called at runtime in broken builds. - if (Callee.rfind("__compiletime_assert", 0) == 0 || Callee == "____wrong_branch_error" || - Callee == "__bad_size_call_parameter") - return false; - return true; -} - -struct FunctionAnalyzer : RecursiveASTVisitor { - FunctionAnalyzer(Extractor* Extractor, const FunctionDecl* Func) - : Extractor(Extractor), CurrentFunc(Func->getNameAsString()), Context(Extractor->Context), - SourceManager(Extractor->SourceManager) { - // The global function scope. - Scopes.push_back(FunctionScope{.Arg = -1}); - Current = &Scopes[0]; - TraverseStmt(Func->getBody()); - } - - bool VisitBinaryOperator(const BinaryOperator* B) { - if (B->isAssignmentOp()) - noteFact(getTypingEntity(B->getRHS()), getTypingEntity(B->getLHS())); - return true; - } - - bool VisitVarDecl(const VarDecl* D) { - if (D->getStorageDuration() == SD_Automatic) - noteFact(getTypingEntity(D->getInit()), getDeclTypingEntity(D)); - return true; - } - - bool VisitReturnStmt(const ReturnStmt* Ret) { - noteFact(getTypingEntity(Ret->getRetValue()), EntityReturn{.Func = CurrentFunc}); - return true; - } - - bool VisitCallExpr(const CallExpr* Call) { - if (isInterestingCall(Call)) { - const std::string& Callee = Call->getDirectCallee()->getNameAsString(); - Current->Calls.push_back(Callee); - for (unsigned AI = 0; AI < Call->getNumArgs(); AI++) { - noteFact(getTypingEntity(Call->getArg(AI)), EntityArgument{ - .Func = Callee, - .Arg = AI, - }); - } - } - return true; - } - - bool VisitSwitchStmt(const SwitchStmt* S) { - // We are only interested in switches on the function arguments - // with cases that mention defines from uapi headers. - // This covers ioctl/fcntl/prctl/ptrace/etc. - bool IsInteresting = false; - auto Param = getTypingEntity(S->getCond()); - if (Current == &Scopes[0] && Param && Param->Argument) { - for (auto* C = S->getSwitchCaseList(); C; C = C->getNextSwitchCase()) { - auto* Case = dyn_cast(C); - if (!Case) - continue; - auto LMacro = Extractor->isMacroOrEnum(Case->getLHS()); - auto RMacro = Extractor->isMacroOrEnum(Case->getRHS()); - if (LMacro || RMacro) { - IsInteresting = true; - break; - } - } - } - - SwitchStack.push({S, IsInteresting, IsInteresting ? static_cast(Param->Argument->Arg) : -1}); - return true; - } - - bool VisitSwitchCase(const SwitchCase* C) { - if (!SwitchStack.top().IsInteresting) - return true; - // If there are several cases with the same "body", we want to create new scope - // only for the first one: - // case FOO: - // case BAR: - // ... some code ... - if (!C->getNextSwitchCase() || C->getNextSwitchCase()->getSubStmt() != C) { - int Line = SourceManager->getExpansionLineNumber(C->getBeginLoc()); - if (Current != &Scopes[0]) - Current->EndLine = Line; - Scopes.push_back(FunctionScope{ - .Arg = SwitchStack.top().Arg, - .StartLine = Line, - }); - Current = &Scopes.back(); - } - // Otherwise it's a default case, for which we don't add any values. - if (auto* Case = dyn_cast(C)) { - int64_t LVal = Extractor->evaluate(Case->getLHS()); - auto LMacro = Extractor->isMacroOrEnum(Case->getLHS()); - if (LMacro) { - Current->Values.push_back(LMacro->Name); - Extractor->extractIoctl(Case->getLHS(), *LMacro); - } else { - Current->Values.push_back(std::to_string(LVal)); - } - if (Case->caseStmtIsGNURange()) { - // GNU range is: - // case FOO ... BAR: - // Add all values in the range. - int64_t RVal = Extractor->evaluate(Case->getRHS()); - auto RMacro = Extractor->isMacroOrEnum(Case->getRHS()); - for (int64_t V = LVal + 1; V <= RVal - (RMacro ? 1 : 0); V++) - Current->Values.push_back(std::to_string(V)); - if (RMacro) - Current->Values.push_back(RMacro->Name); - } - } - return true; - } - - bool dataTraverseStmtPost(const Stmt* S) { - if (SwitchStack.empty()) - return true; - auto Top = SwitchStack.top(); - if (Top.S != S) - return true; - if (Top.IsInteresting) { - if (Current != &Scopes[0]) - Current->EndLine = SourceManager->getExpansionLineNumber(S->getEndLoc()); - Current = &Scopes[0]; - } - SwitchStack.pop(); - return true; - } - - void noteFact(std::optional&& Src, std::optional&& Dst) { - if (Src && Dst) - Current->Facts.push_back({std::move(*Src), std::move(*Dst)}); - } - - std::optional getTypingEntity(const Expr* E); - std::optional getDeclTypingEntity(const Decl* Decl); - - struct SwitchDesc { - const SwitchStmt* S; - bool IsInteresting; - int Arg; - }; - - Extractor* Extractor; - std::string CurrentFunc; - ASTContext* Context; - SourceManager* SourceManager; - std::vector Scopes; - FunctionScope* Current = nullptr; - std::unordered_map LocalVars; - std::unordered_map LocalSeq; - std::stack SwitchStack; -}; - -void Extractor::matchFunctionDef() { - const auto* Func = getResult("function"); - if (!Func->getBody()) - return; - auto Range = Func->getSourceRange(); - const std::string& SourceFile = - std::filesystem::relative(SourceManager->getFilename(SourceManager->getExpansionLoc(Range.getBegin())).str()); - const int StartLine = SourceManager->getExpansionLineNumber(Range.getBegin()); - const int EndLine = SourceManager->getExpansionLineNumber(Range.getEnd()); - FunctionAnalyzer Analyzer(this, Func); - Output.emit(Function{ - .Name = Func->getNameAsString(), - .File = SourceFile, - .StartLine = StartLine, - .EndLine = EndLine, - .IsStatic = Func->isStatic(), - .Scopes = std::move(Analyzer.Scopes), - }); -} - -std::optional FunctionAnalyzer::getTypingEntity(const Expr* E) { - if (!E) - return {}; - E = removeCasts(E); - if (auto* DeclRef = dyn_cast(E)) { - return getDeclTypingEntity(DeclRef->getDecl()); - } else if (auto* Member = dyn_cast(E)) { - const Type* StructType = - Member->getBase()->getType().IgnoreParens().getUnqualifiedType().getDesugaredType(*Context).getTypePtr(); - if (auto* T = dyn_cast(StructType)) - StructType = T->getPointeeType().IgnoreParens().getUnqualifiedType().getDesugaredType(*Context).getTypePtr(); - auto* StructDecl = dyn_cast(StructType)->getDecl(); - std::string StructName = StructDecl->getNameAsString(); - if (StructName.empty()) { - // The struct may be anonymous, but we need some name. - // Ideally we generate the same name we generate in struct definitions, then it will be possible - // to match them between each other. However, it does not seem to be easy. We can use DeclContext::getParent - // to get declaration of the enclosing struct, but we will also need to figure out the field index - // and handle all corner cases. For now we just use the following quick hack: hash declaration file:line. - // Note: the hash must be stable across different machines (for test golden files), so we take just - // the last part of the file name. - const std::string& SourceFile = - std::filesystem::path( - SourceManager->getFilename(SourceManager->getExpansionLoc(StructDecl->getBeginLoc())).str()) - .filename() - .string(); - int Line = SourceManager->getExpansionLineNumber(StructDecl->getBeginLoc()); - StructName = std::to_string(std::hash()(SourceFile) + std::hash()(Line)); - } - return EntityField{ - .Struct = StructName, - .Field = Member->getMemberDecl()->getNameAsString(), - }; - } else if (auto* Unary = dyn_cast(E)) { - if (Unary->getOpcode() == UnaryOperatorKind::UO_AddrOf) { - if (auto* DeclRef = dyn_cast(removeCasts(Unary->getSubExpr()))) { - if (auto* Var = dyn_cast(DeclRef->getDecl())) { - if (Var->hasGlobalStorage()) { - return EntityGlobalAddr{ - .Name = Extractor->getUniqueDeclName(Var), - }; - } - } - } - } - } else if (auto* Call = dyn_cast(E)) { - if (isInterestingCall(Call)) { - return EntityReturn{ - .Func = Call->getDirectCallee()->getNameAsString(), - }; - } - } - return {}; -} - -std::optional FunctionAnalyzer::getDeclTypingEntity(const Decl* Decl) { - if (auto* Parm = dyn_cast(Decl)) { - return EntityArgument{ - .Func = CurrentFunc, - .Arg = Parm->getFunctionScopeIndex(), - }; - } else if (auto* Var = dyn_cast(Decl)) { - if (Var->hasLocalStorage()) { - std::string VarName = Var->getNameAsString(); - // Theoretically there can be several local vars with the same name. - // Give them unique suffixes if that's the case. - if (LocalVars.count(Var) == 0) - LocalVars[Var] = LocalSeq[VarName]++; - if (int Seq = LocalVars[Var]) - VarName += std::to_string(Seq); - return EntityLocal{ - .Name = VarName, - }; - } - } - return {}; -} - -void Extractor::matchSyscall() { - const auto* Func = getResult("syscall"); - std::vector Args; - for (const auto& Param : Func->parameters()) { - Args.push_back(Field{ - .Name = Param->getNameAsString(), - .Type = genType(Param->getType()), - }); - } - Output.emit(Syscall{ - .Func = Func->getNameAsString(), - .Args = std::move(Args), - }); -} - -void Extractor::matchIouring() { - const auto* IssueDefs = getResult("io_issue_defs"); - const auto& InitConsts = extractDesignatedInitConsts(*IssueDefs); - const auto* InitList = llvm::dyn_cast(IssueDefs->getInit()); - auto Fields = structFieldIndexes(InitList->getInit(0)->getType()->getAsRecordDecl()); - for (const auto& [I, Name] : InitConsts) { - const auto& Init = llvm::dyn_cast(InitList->getInit(I)); - std::string Prep = getFuncName(Init->getInit(Fields["prep"])); - if (Prep == "io_eopnotsupp_prep") - continue; - Output.emit(IouringOp{ - .Name = Name, - .Func = getFuncName(Init->getInit(Fields["issue"])), - }); - } -} - -void Extractor::matchFileOps() { - const auto* Fops = getResult("init"); - if (Fops->getNumInits() == 0 || isa(Fops->getInit(0))) { - // Some code constructs produce init list with DesignatedInitExpr. - // Unclear why, but it won't be handled by the following code, and is not necessary to handle. - return; - } - const auto* Var = getResult("var"); - std::string VarName = getUniqueDeclName(Var); - int NameSeq = FileOpsDedup[VarName]++; - if (NameSeq) - VarName += std::to_string(NameSeq); - auto Fields = structFieldIndexes(Fops->getType()->getAsRecordDecl()); - std::string Open = getFuncName(Fops->getInit(Fields["open"])); - std::string Ioctl = getFuncName(Fops->getInit(Fields["unlocked_ioctl"])); - std::string Read = getFuncName(Fops->getInit(Fields["read"])); - if (Read.empty()) - Read = getFuncName(Fops->getInit(Fields["read_iter"])); - std::string Write = getFuncName(Fops->getInit(Fields["write"])); - if (Write.empty()) - Write = getFuncName(Fops->getInit(Fields["write_iter"])); - std::string Mmap = getFuncName(Fops->getInit(Fields["mmap"])); - if (Mmap.empty()) - Mmap = getFuncName(Fops->getInit(Fields["get_unmapped_area"])); - Output.emit(FileOps{ - .Name = VarName, - .Open = std::move(Open), - .Read = std::move(Read), - .Write = std::move(Write), - .Mmap = std::move(Mmap), - .Ioctl = std::move(Ioctl), - }); -} - -void Extractor::extractIoctl(const Expr* Cmd, const ConstDesc& Const) { - // This is old style ioctl defined directly via a number. - // We can't infer anything about it. - if (Const.Value.find("_IO") != 0) - return; - FieldType Type; - auto Dir = _IOC_DIR(Const.IntValue); - if (Dir == _IOC_NONE) { - Type = IntType{.ByteSize = 1, .IsConst = true}; - } else if (std::optional Arg = getSizeofType(Cmd)) { - Type = PtrType{ - .Elem = genType(*Arg), - .IsConst = Dir == _IOC_READ, - }; - } else { - // It is an ioctl, but we failed to get the arg type. - // Let the Go part figure out a good arg type. - return; - } - Output.emit(Ioctl{ - .Name = Const.Name, - .Type = std::move(Type), - }); -} - -int main(int argc, const char** argv) { - llvm::cl::OptionCategory Options("syz-declextract options"); - auto OptionsParser = tooling::CommonOptionsParser::create(argc, argv, Options); - if (!OptionsParser) { - llvm::errs() << OptionsParser.takeError(); - return 1; - } - Extractor Ex; - tooling::ClangTool Tool(OptionsParser->getCompilations(), OptionsParser->getSourcePathList()); - if (Tool.run(tooling::newFrontendActionFactory(&Ex, &Ex).get())) - return 1; - Ex.print(); - return 0; -} diff --git a/tools/syz-declextract/clangtool/json.h b/tools/syz-declextract/clangtool/json.h deleted file mode 100644 index 873313a09..000000000 --- a/tools/syz-declextract/clangtool/json.h +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2024 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -#ifndef SYZ_DECLEXTRACT_JSON_H -#define SYZ_DECLEXTRACT_JSON_H - -#include -#include -#include -#include -#include - -class JSONPrinter { -public: - JSONPrinter() : Top(*this) {} - - template void Field(const char* Name, const T& V, bool Last = false) { - printf("%s\"%s\": ", Indent(), Name); - print(*this, V); - printf("%s\n", Last ? "" : ","); - } - - const char* Indent() const { - static std::string Indents; - while (Indents.size() < Nesting) - Indents.push_back('\t'); - return Indents.c_str() + Indents.size() - Nesting; - } - - class Scope { - public: - Scope(JSONPrinter& Printer, bool Array = false) : Printer(Printer), Array(Array) { - printf("%c\n", "{["[Array]); - Printer.Nesting++; - assert(Printer.Nesting < 1000); - } - - ~Scope() { - assert(Printer.Nesting > 0); - Printer.Nesting--; - printf("%s%c", Printer.Indent(), "}]"[Array]); - } - - private: - JSONPrinter& Printer; - const bool Array; - }; - -private: - friend class Scope; - size_t Nesting = 0; - Scope Top; -}; - -inline void print(JSONPrinter& Printer, int V) { printf("%d", V); } -inline void print(JSONPrinter& Printer, unsigned V) { printf("%u", V); } -inline void print(JSONPrinter& Printer, int64_t V) { printf("%ld", V); } -inline void print(JSONPrinter& Printer, bool V) { printf("%s", V ? "true" : "false"); } -inline void print(JSONPrinter& Printer, const char* V) { printf("\"%s\"", V ? V : ""); } -inline void print(JSONPrinter& Printer, const std::string& V) { print(Printer, V.c_str()); } - -template void print(JSONPrinter& Printer, const std::unique_ptr& V) { - if (!V) - printf("null"); - else - print(Printer, *V); -} - -template void print(JSONPrinter& Printer, const std::vector& V) { - JSONPrinter::Scope Scope(Printer, true); - size_t i = 0; - for (const auto& Elem : V) { - printf("%s", Printer.Indent()); - print(Printer, Elem); - printf("%s\n", ++i == V.size() ? "" : ","); - } -} - -#endif diff --git a/tools/syz-declextract/clangtool/output.h b/tools/syz-declextract/clangtool/output.h deleted file mode 100644 index 56b207be1..000000000 --- a/tools/syz-declextract/clangtool/output.h +++ /dev/null @@ -1,474 +0,0 @@ -// Copyright 2024 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -#ifndef SYZ_DECLEXTRACT_OUTPUT_H -#define SYZ_DECLEXTRACT_OUTPUT_H - -#include -#include -#include -#include - -const char* const AccessUnknown = ""; -const char* const AccessUser = "user"; -const char* const AccessNsAdmin = "ns_admin"; -const char* const AccessAdmin = "admin"; - -struct IntType; -struct PtrType; -struct ArrType; -struct BufferType; - -struct FieldType { - std::unique_ptr Int; - std::unique_ptr Ptr; - std::unique_ptr Array; - std::unique_ptr Buffer; - std::unique_ptr Struct; - - FieldType() = default; - FieldType(IntType&& Typ) : Int(std::make_unique(std::move(Typ))) {} - FieldType(PtrType&& Typ) : Ptr(std::make_unique(std::move(Typ))) {} - FieldType(ArrType&& Typ) : Array(std::make_unique(std::move(Typ))) {} - FieldType(BufferType&& Typ) : Buffer(std::make_unique(std::move(Typ))) {} - FieldType(const std::string& Typ) : Struct(std::make_unique(std::move(Typ))) {} -}; - -struct IntType { - int ByteSize = 0; - int64_t MinValue = 0; - int64_t MaxValue = 0; - bool IsConst = false; - std::string Name; - std::string Base; - std::string Enum; -}; - -struct PtrType { - FieldType Elem; - bool IsConst = false; -}; - -struct ArrType { - FieldType Elem; - int MinSize = 0; - int MaxSize = 0; - int Align = 0; - bool IsConstSize = false; -}; - -struct BufferType { - int MinSize = 0; - int MaxSize = 0; - bool IsString = false; - bool IsNonTerminated = false; -}; - -struct ConstInfo { - std::string Name; - std::string Filename; - int64_t Value; -}; - -struct Field { - std::string Name; - bool IsAnonymous = false; - int BitWidth = 0; - int CountedBy = -1; - FieldType Type; -}; - -struct Struct { - std::string Name; - int ByteSize = 0; - int Align = 0; - bool IsUnion = false; - bool IsPacked = false; - int AlignAttr = 0; - std::vector Fields; -}; - -struct Enum { - std::string Name; - std::vector Values; -}; - -struct Ioctl { - std::string Name; - FieldType Type; -}; - -struct FileOps { - std::string Name; - std::string Open; - std::string Read; - std::string Write; - std::string Mmap; - std::string Ioctl; -}; - -struct EntityReturn { - std::string Func; -}; - -struct EntityArgument { - std::string Func; - unsigned Arg; -}; - -struct EntityField { - std::string Struct; - std::string Field; -}; - -struct EntityLocal { - std::string Name; -}; - -struct EntityGlobalAddr { - std::string Name; -}; - -struct EntityResource { - std::string Type; - std::string SubType; -}; - -struct TypingEntity { - std::unique_ptr Return; - std::unique_ptr Argument; - std::unique_ptr Field; - std::unique_ptr Local; - std::unique_ptr GlobalAddr; - std::unique_ptr Resource; - - TypingEntity() = default; - TypingEntity(EntityReturn&& E) : Return(std::make_unique(std::move(E))) {} - TypingEntity(EntityArgument&& E) : Argument(std::make_unique(std::move(E))) {} - TypingEntity(EntityField&& E) : Field(std::make_unique(std::move(E))) {} - TypingEntity(EntityLocal&& E) : Local(std::make_unique(std::move(E))) {} - TypingEntity(EntityGlobalAddr&& E) : GlobalAddr(std::make_unique(std::move(E))) {} - TypingEntity(EntityResource&& E) : Resource(std::make_unique(std::move(E))) {} -}; - -struct TypingFact { - TypingEntity Src; - TypingEntity Dst; -}; - -struct FunctionScope { - int Arg = 0; - int StartLine = 0; - int EndLine = 0; - std::vector Values; - std::vector Calls; - std::vector Facts; -}; - -struct Function { - std::string Name; - std::string File; - int StartLine = 0; - int EndLine = 0; - bool IsStatic = false; - std::vector Scopes; -}; - -struct Syscall { - std::string Func; - std::vector Args; -}; - -struct IouringOp { - std::string Name; - std::string Func; -}; - -struct NetlinkOp { - std::string Name; - std::string Func; - const char* Access = nullptr; - std::string Policy; -}; - -struct NetlinkFamily { - std::string Name; - std::vector Ops; -}; - -struct NetlinkAttr { - std::string Name; - std::string Kind; - int MaxSize = 0; - std::string NestedPolicy; - std::unique_ptr Elem; -}; - -struct NetlinkPolicy { - std::string Name; - std::vector Attrs; -}; - -inline void print(JSONPrinter& Printer, const ConstInfo& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name); - Printer.Field("filename", V.Filename); - Printer.Field("value", V.Value, true); -} - -inline void print(JSONPrinter& Printer, const Field& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name); - Printer.Field("is_anonymous", V.IsAnonymous); - Printer.Field("bit_width", V.BitWidth); - Printer.Field("counted_by", V.CountedBy); - Printer.Field("type", V.Type, true); -} - -inline void print(JSONPrinter& Printer, const Struct& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name); - Printer.Field("align", V.Align); - Printer.Field("byte_size", V.ByteSize); - Printer.Field("is_union", V.IsUnion); - Printer.Field("is_packed", V.IsPacked); - Printer.Field("align_attr", V.AlignAttr); - Printer.Field("fields", V.Fields, true); -} - -inline void print(JSONPrinter& Printer, const Enum& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name); - Printer.Field("values", V.Values, true); -} - -inline void print(JSONPrinter& Printer, const FieldType& V) { - JSONPrinter::Scope Scope(Printer); - if (V.Int) - Printer.Field("int", *V.Int, true); - else if (V.Ptr) - Printer.Field("ptr", *V.Ptr, true); - else if (V.Array) - Printer.Field("array", *V.Array, true); - else if (V.Buffer) - Printer.Field("buffer", *V.Buffer, true); - else - Printer.Field("struct", *V.Struct, true); -} - -inline void print(JSONPrinter& Printer, const IntType& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("byte_size", V.ByteSize); - Printer.Field("min_value", V.MinValue); - Printer.Field("max_value", V.MaxValue); - Printer.Field("is_const", V.IsConst); - Printer.Field("name", V.Name); - Printer.Field("base", V.Base); - Printer.Field("enum", V.Enum, true); -} - -inline void print(JSONPrinter& Printer, const PtrType& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("elem", V.Elem); - Printer.Field("is_const", V.IsConst, true); -} - -inline void print(JSONPrinter& Printer, const ArrType& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("elem", V.Elem); - Printer.Field("min_size", V.MinSize); - Printer.Field("max_size", V.MaxSize); - Printer.Field("align", V.Align); - Printer.Field("is_const_size", V.IsConstSize, true); -} - -inline void print(JSONPrinter& Printer, const BufferType& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("min_size", V.MinSize); - Printer.Field("max_size", V.MaxSize); - Printer.Field("is_string", V.IsString); - Printer.Field("is_non_terminated", V.IsNonTerminated, true); -} - -inline void print(JSONPrinter& Printer, const Ioctl& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name); - Printer.Field("type", V.Type, true); -} - -inline void print(JSONPrinter& Printer, const FileOps& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name); - Printer.Field("open", V.Open); - Printer.Field("read", V.Read); - Printer.Field("write", V.Write); - Printer.Field("mmap", V.Mmap); - Printer.Field("ioctl", V.Ioctl, true); -} - -inline void print(JSONPrinter& Printer, const EntityReturn& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("func", V.Func, true); -} - -inline void print(JSONPrinter& Printer, const EntityArgument& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("func", V.Func); - Printer.Field("arg", V.Arg, true); -} - -inline void print(JSONPrinter& Printer, const EntityField& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("struct", V.Struct); - Printer.Field("field", V.Field, true); -} - -inline void print(JSONPrinter& Printer, const EntityLocal& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name, true); -} - -inline void print(JSONPrinter& Printer, const EntityGlobalAddr& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name, true); -} - -inline void print(JSONPrinter& Printer, const EntityResource& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("type", V.Type); - Printer.Field("subtype", V.SubType, true); -} - -inline void print(JSONPrinter& Printer, const TypingEntity& V) { - JSONPrinter::Scope Scope(Printer); - if (V.Return) - Printer.Field("return", *V.Return, true); - else if (V.Argument) - Printer.Field("argument", *V.Argument, true); - else if (V.Field) - Printer.Field("field", *V.Field, true); - else if (V.Local) - Printer.Field("local", *V.Local, true); - else if (V.GlobalAddr) - Printer.Field("global_addr", *V.GlobalAddr, true); - else - Printer.Field("resource", *V.Resource, true); -} - -inline void print(JSONPrinter& Printer, const TypingFact& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("src", V.Src); - Printer.Field("dst", V.Dst, true); -} - -inline void print(JSONPrinter& Printer, const FunctionScope& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("arg", V.Arg); - Printer.Field("values", V.Values); - Printer.Field("start_line", V.StartLine); - Printer.Field("end_line", V.EndLine); - Printer.Field("calls", V.Calls); - Printer.Field("facts", V.Facts, true); -} - -inline void print(JSONPrinter& Printer, const Function& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name); - Printer.Field("file", V.File); - Printer.Field("start_line", V.StartLine); - Printer.Field("end_line", V.EndLine); - Printer.Field("is_static", V.IsStatic); - Printer.Field("scopes", V.Scopes, true); -} - -inline void print(JSONPrinter& Printer, const Syscall& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("func", V.Func); - Printer.Field("args", V.Args, true); -} - -inline void print(JSONPrinter& Printer, const IouringOp& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name); - Printer.Field("func", V.Func, true); -} - -inline void print(JSONPrinter& Printer, const NetlinkOp& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name); - Printer.Field("func", V.Func); - Printer.Field("access", V.Access); - Printer.Field("policy", V.Policy, true); -} - -inline void print(JSONPrinter& Printer, const NetlinkFamily& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name); - Printer.Field("ops", V.Ops, true); -} - -inline void print(JSONPrinter& Printer, const NetlinkAttr& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name); - Printer.Field("kind", V.Kind); - Printer.Field("max_size", V.MaxSize); - Printer.Field("nested_policy", V.NestedPolicy); - Printer.Field("elem", V.Elem, true); -} - -inline void print(JSONPrinter& Printer, const NetlinkPolicy& V) { - JSONPrinter::Scope Scope(Printer); - Printer.Field("name", V.Name); - Printer.Field("attrs", V.Attrs, true); -} - -// This type is used when we can't figure out the right type, but need some type to use. -inline FieldType TodoType() { - return IntType{ - .ByteSize = 1, - .Name = "TODO", - .Base = "long", - }; -} - -class Output { -public: - void emit(Function&& V) { Functions.push_back(std::move(V)); } - void emit(ConstInfo&& V) { Consts.push_back(std::move(V)); } - void emit(Struct&& V) { Structs.push_back(std::move(V)); } - void emit(Enum&& V) { Enums.push_back(std::move(V)); } - void emit(Syscall&& V) { Syscalls.push_back(std::move(V)); } - void emit(FileOps&& V) { FileOps.push_back(std::move(V)); } - void emit(Ioctl&& V) { Ioctls.push_back(std::move(V)); } - void emit(IouringOp&& V) { IouringOps.push_back(std::move(V)); } - void emit(NetlinkFamily&& V) { NetlinkFamilies.push_back(std::move(V)); } - void emit(NetlinkPolicy&& V) { NetlinkPolicies.push_back(std::move(V)); } - - void print() const { - JSONPrinter Printer; - Printer.Field("functions", Functions); - Printer.Field("consts", Consts); - Printer.Field("enums", Enums); - Printer.Field("structs", Structs); - Printer.Field("syscalls", Syscalls); - Printer.Field("file_ops", FileOps); - Printer.Field("ioctls", Ioctls); - Printer.Field("iouring_ops", IouringOps); - Printer.Field("netlink_families", NetlinkFamilies); - Printer.Field("netlink_policies", NetlinkPolicies, true); - } - -private: - std::vector Functions; - std::vector Consts; - std::vector Enums; - std::vector Structs; - std::vector Syscalls; - std::vector FileOps; - std::vector Ioctls; - std::vector IouringOps; - std::vector NetlinkFamilies; - std::vector NetlinkPolicies; -}; - -#endif -- cgit mrf-deployment