// Copyright 2024 syzkaller project authors. All rights reserved. // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. #include "json.h" #include "output.h" #include "clang/AST/APValue.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/Attrs.inc" #include "clang/AST/Decl.h" #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/PrettyPrinter.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/Type.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TypeTraits.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/Tooling.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include #include #include #include #include #include #include #include #include #include #include using namespace clang; using namespace clang::ast_matchers; // MacroDef/MacroMap hold information about macros defined in the file. struct MacroDef { std::string Value; // value as written in the source SourceRange SourceRange; // soruce range of the value }; using MacroMap = std::unordered_map; // ConstDesc describes a macro or an enum value. struct ConstDesc { std::string Name; std::string Value; SourceRange SourceRange; int64_t IntValue; }; class Extractor : public MatchFinder, public tooling::SourceFileCallbacks { public: Extractor() { match(&Extractor::matchFunctionDef, functionDecl(isDefinition()).bind("function")); match(&Extractor::matchSyscall, functionDecl(isExpandedFromMacro("SYSCALL_DEFINEx"), matchesName("__do_sys_.*")).bind("syscall")); match(&Extractor::matchIouring, translationUnitDecl(forEachDescendant( varDecl(hasType(constantArrayType(hasElementType(hasDeclaration(recordDecl(hasName("io_issue_def")))))), isDefinition()) .bind("io_issue_defs")))); match(&Extractor::matchNetlinkPolicy, translationUnitDecl(forEachDescendant( varDecl(hasType(constantArrayType(hasElementType(hasDeclaration(recordDecl(hasName("nla_policy")))))), isDefinition()) .bind("netlink_policy")))); match(&Extractor::matchNetlinkFamily, varDecl(hasType(recordDecl(hasName("genl_family")).bind("genl_family")), has(initListExpr().bind("genl_family_init")))); match(&Extractor::matchFileOps, varDecl(forEachDescendant(initListExpr(hasType(recordDecl(hasName("file_operations")))).bind("init"))) .bind("var")); } void print() const { Output.print(); } private: friend struct FunctionAnalyzer; using MatchFunc = void (Extractor::*)(); // Thunk that redirects MatchCallback::run method to one of the methods of the Extractor class. struct MatchCallbackThunk : MatchFinder::MatchCallback { Extractor& Ex; MatchFunc Action; MatchCallbackThunk(Extractor& Ex, MatchFunc Action) : Ex(Ex), Action(Action) {} void run(const MatchFinder::MatchResult& Result) override { Ex.run(Result, Action); } }; std::vector> Matchers; // These set to point to the Result of the current match (to avoid passing them through all methods). const BoundNodes* Nodes = nullptr; ASTContext* Context = nullptr; SourceManager* SourceManager = nullptr; Output Output; MacroMap Macros; std::unordered_map EnumDedup; std::unordered_map StructDedup; std::unordered_map FileOpsDedup; void matchFunctionDef(); void matchSyscall(); void matchIouring(); void matchNetlinkPolicy(); void matchNetlinkFamily(); void matchFileOps(); bool handleBeginSource(CompilerInstance& CI) override; template void match(MatchFunc Action, const M& Matcher); void run(const MatchFinder::MatchResult& Result, MatchFunc Action); template const T* getResult(StringRef ID) const; FieldType extractRecord(QualType QT, const RecordType* Typ, const std::string& BackupName); std::string extractEnum(QualType QT, const EnumDecl* Decl); void emitConst(const std::string& Name, int64_t Val, SourceLocation Loc); std::string getFuncName(const Expr* Expr); std::string getDeclName(const Expr* Expr); const ValueDecl* getValueDecl(const Expr* Expr); std::string getDeclFileID(const Decl* Decl); std::string getUniqueDeclName(const NamedDecl* Decl); std::vector> extractDesignatedInitConsts(const VarDecl& ArrayDecl); FieldType genType(QualType Typ, const std::string& BackupName = ""); std::unordered_map structFieldIndexes(const RecordDecl* Decl); template T evaluate(const Expr* E); template std::vector findAllMatches(const Node* Expr, const Condition& Cond); template const T* findFirstMatch(const Node* Expr, const Condition& Cond); std::optional getSizeofType(const Expr* E); int sizeofType(const Type* T); int alignofType(const Type* T); void extractIoctl(const Expr* Cmd, const ConstDesc& Const); std::optional isMacroOrEnum(const Expr* E); ConstDesc constDesc(const Expr* E, const std::string& Str, const std::string& Value, const SourceRange& SourceRange); }; // PPCallbacksTracker records all macro definitions (name/value/source location). class PPCallbacksTracker : public PPCallbacks { public: PPCallbacksTracker(Preprocessor& PP, MacroMap& Macros) : SM(PP.getSourceManager()), Macros(Macros) {} private: SourceManager& SM; MacroMap& Macros; void MacroDefined(const Token& MacroName, const MacroDirective* MD) override { const char* NameBegin = SM.getCharacterData(MacroName.getLocation()); const char* NameEnd = SM.getCharacterData(MacroName.getEndLoc()); std::string Name(NameBegin, NameEnd - NameBegin); const char* ValBegin = SM.getCharacterData(MD->getMacroInfo()->getDefinitionLoc()); const char* ValEnd = SM.getCharacterData(MD->getMacroInfo()->getDefinitionEndLoc()) + 1; // Definition includes the macro name, remove it. ValBegin += std::min(Name.size(), ValEnd - ValBegin); // Trim whitespace from both ends. while (ValBegin < ValEnd && isspace(*ValBegin)) ValBegin++; while (ValBegin < ValEnd && isspace(*(ValEnd - 1))) ValEnd--; std::string Value(ValBegin, ValEnd - ValBegin); Macros[Name] = MacroDef{ .Value = Value, .SourceRange = SourceRange(MD->getMacroInfo()->getDefinitionLoc(), MD->getMacroInfo()->getDefinitionEndLoc()), }; } }; const Expr* removeCasts(const Expr* E) { for (;;) { if (auto* P = dyn_cast(E)) E = P->getSubExpr(); else if (auto* C = dyn_cast(E)) E = C->getSubExpr(); else break; } return E; } bool Extractor::handleBeginSource(CompilerInstance& CI) { Preprocessor& PP = CI.getPreprocessor(); PP.addPPCallbacks(std::make_unique(PP, Macros)); return true; } template void Extractor::match(MatchFunc Action, const M& Matcher) { Matchers.emplace_back(new MatchCallbackThunk(*this, Action)); addMatcher(Matcher, Matchers.back().get()); } void Extractor::run(const MatchFinder::MatchResult& Result, MatchFunc Action) { Nodes = &Result.Nodes; Context = Result.Context; SourceManager = Result.SourceManager; (this->*Action)(); } template const T* Extractor::getResult(StringRef ID) const { return Nodes->getNodeAs(ID); } std::string TypeName(QualType QT) { std::string Name = QT.getAsString(); auto Attr = Name.find(" __attribute__"); if (Attr != std::string::npos) Name = Name.substr(0, Attr); return Name; } // Top function that converts any clang type QT to our output type. FieldType Extractor::genType(QualType QT, const std::string& BackupName) { const Type* T = QT.IgnoreParens().getUnqualifiedType().getDesugaredType(*Context).getTypePtr(); if (llvm::isa(T)) { return IntType{.ByteSize = sizeofType(T), .Name = TypeName(QT), .Base = QualType(T, 0).getAsString()}; } if (auto* Typ = llvm::dyn_cast(T)) { return IntType{.ByteSize = sizeofType(T), .Enum = extractEnum(QT, Typ->getDecl())}; } if (llvm::isa(T)) { return PtrType{.Elem = TodoType(), .IsConst = true}; } if (auto* Typ = llvm::dyn_cast(T)) { return ArrType{.Elem = genType(Typ->getElementType(), BackupName)}; } if (auto* Typ = llvm::dyn_cast(T)) { return extractRecord(QT, Typ, BackupName); } if (auto* Typ = llvm::dyn_cast(T)) { // TODO: the size may be a macro that is different for each arch, e.g.: // long foo[FOOSIZE/sizeof(long)]; int Size = Typ->getSize().getZExtValue(); return ArrType{ .Elem = genType(Typ->getElementType(), BackupName), .MinSize = Size, .MaxSize = Size, .Align = alignofType(Typ), .IsConstSize = true, }; } if (auto* Typ = llvm::dyn_cast(T)) { FieldType Elem; const QualType& Pointee = Typ->getPointeeType(); if (Pointee->isAnyCharacterType()) Elem = BufferType{.IsString = true}; else if (Pointee->isVoidType()) Elem = ArrType{.Elem = TodoType()}; else Elem = genType(Pointee, BackupName); // note: it may be an array as well return PtrType{ .Elem = std::move(Elem), .IsConst = Pointee.isConstQualified(), }; } QT.dump(); llvm::report_fatal_error("unhandled type"); } FieldType Extractor::extractRecord(QualType QT, const RecordType* Typ, const std::string& BackupName) { auto* Decl = Typ->getDecl()->getDefinition(); if (!Decl) return TodoType(); // definition is in a different TU std::string Name = Decl->getDeclName().getAsString(); // If it's a typedef of anon struct, we want to use the typedef name: // typedef struct {...} foo_t; if (Name.empty() && QT->isTypedefNameType()) Name = QualType(Typ, 0).getAsString(); // If no other names, fallback to the parent-struct-based name. if (Name.empty()) { assert(!BackupName.empty()); // The BackupName is supposed to be unique. assert(!StructDedup[BackupName]); Name = BackupName; } if (Name.find("struct ") == 0) Name = Name.substr(strlen("struct ")); if (StructDedup[Name]) return Name; StructDedup[Name] = true; std::vector Fields; for (const FieldDecl* F : Decl->fields()) { std::string FieldName = F->getNameAsString(); std::string BackupFieldName = Name + "_" + FieldName; bool IsAnonymous = false; if (FieldName.empty()) { BackupFieldName = Name + "_" + std::to_string(F->getFieldIndex()); FieldName = BackupFieldName; IsAnonymous = true; } FieldType FieldType = genType(F->getType(), BackupFieldName); int BitWidth = F->isBitField() ? F->getBitWidthValue() : 0; int CountedBy = F->getType()->isCountAttributedType() ? llvm::dyn_cast( F->getType()->getAs()->getCountExpr()->getReferencedDeclOfCallee()) ->getFieldIndex() : -1; Fields.push_back(Field{ .Name = FieldName, .IsAnonymous = IsAnonymous, .BitWidth = BitWidth, .CountedBy = CountedBy, .Type = std::move(FieldType), }); } int AlignAttr = 0; bool Packed = false; if (Decl->isStruct() && Decl->hasAttrs()) { for (const auto& A : Decl->getAttrs()) { if (auto* Attr = llvm::dyn_cast(A)) AlignAttr = Attr->getAlignment(*Context) / 8; else if (llvm::isa(A)) Packed = true; } } Output.emit(Struct{ .Name = Name, .ByteSize = sizeofType(Typ), .Align = alignofType(Typ), .IsUnion = Decl->isUnion(), .IsPacked = Packed, .AlignAttr = AlignAttr, .Fields = std::move(Fields), }); return Name; } std::string Extractor::extractEnum(QualType QT, const EnumDecl* Decl) { std::string Name = Decl->getNameAsString(); if (Name.empty()) { // This is an unnamed enum declared with a typedef: // typedef enum {...} enum_name; auto Typedef = dyn_cast(QT.getTypePtr()); if (Typedef) Name = Typedef->getDecl()->getNameAsString(); if (Name.empty()) { QT.dump(); llvm::report_fatal_error("enum with empty name"); } } if (EnumDedup[Name]) return Name; EnumDedup[Name] = true; std::vector Values; for (const auto* Enumerator : Decl->enumerators()) { const std::string& Name = Enumerator->getNameAsString(); emitConst(Name, Enumerator->getInitVal().getExtValue(), Decl->getBeginLoc()); Values.push_back(Name); } Output.emit(Enum{ .Name = Name, .Values = Values, }); return Name; } void Extractor::emitConst(const std::string& Name, int64_t Val, SourceLocation Loc) { Output.emit(ConstInfo{ .Name = Name, .Filename = std::filesystem::relative(SourceManager->getFilename(Loc).str()), .Value = Val, }); } // Returns base part of the source file containing the canonical declaration. // If the passed declaration is also a definition, then it will look for a preceeding declaration. // This is used to generate unique names for static definitions that may have duplicate names // across different TUs. We assume that the base part of the source file is enough // to make them unique. std::string Extractor::getDeclFileID(const Decl* Decl) { std::string file = std::filesystem::path(SourceManager->getFilename(Decl->getCanonicalDecl()->getSourceRange().getBegin()).str()) .filename() .stem() .string(); std::replace(file.begin(), file.end(), '-', '_'); return file; } std::optional Extractor::isMacroOrEnum(const Expr* E) { if (!E) return {}; if (auto* Enum = removeCasts(E)->getEnumConstantDecl()) return constDesc(E, Enum->getNameAsString(), "", Enum->getSourceRange()); auto Range = Lexer::getAsCharRange(E->getSourceRange(), *SourceManager, Context->getLangOpts()); const std::string& Str = Lexer::getSourceText(Range, *SourceManager, Context->getLangOpts()).str(); auto MacroDef = Macros.find(Str); if (MacroDef == Macros.end()) return {}; return constDesc(E, Str, MacroDef->second.Value, MacroDef->second.SourceRange); } ConstDesc Extractor::constDesc(const Expr* E, const std::string& Str, const std::string& Value, const SourceRange& SourceRange) { int64_t Val = evaluate(E); emitConst(Str, Val, SourceRange.getBegin()); return ConstDesc{ .Name = Str, .Value = Value, .SourceRange = SourceRange, .IntValue = Val, }; } template void matchHelper(MatchFinder& Finder, ASTContext* Context, const Node* Expr) { Finder.match(*Expr, *Context); } void matchHelper(MatchFinder& Finder, ASTContext* Context, const ASTContext* Expr) { assert(Context == Expr); Finder.matchAST(*Context); } // Returns all matches of Cond named "res" in Expr and returns them casted to T. // Expr can point to Context for a global match. template std::vector Extractor::findAllMatches(const Node* Expr, const Condition& Cond) { if (!Expr) return {}; struct Matcher : MatchFinder::MatchCallback { std::vector Matches; void run(const MatchFinder::MatchResult& Result) override { if (const T* M = Result.Nodes.getNodeAs("res")) Matches.push_back(M); } }; MatchFinder Finder; Matcher Matcher; Finder.addMatcher(Cond, &Matcher); matchHelper(Finder, Context, Expr); return std::move(Matcher.Matches); } // Returns the first match of Cond named "res" in Expr and returns it casted to T. // If no match is found, returns nullptr. template const T* Extractor::findFirstMatch(const Node* Expr, const Condition& Cond) { const auto& Matches = findAllMatches(Expr, Cond); return Matches.empty() ? nullptr : Matches[0]; } // Extracts the first function reference from the expression. // TODO: try to extract the actual function reference the expression will be evaluated to // (the first one is not necessarily the right one). std::string Extractor::getFuncName(const Expr* Expr) { auto* Decl = findFirstMatch(Expr, stmt(forEachDescendant(declRefExpr(hasType(functionType())).bind("res")))); return Decl ? Decl->getDecl()->getNameAsString() : ""; } // If expression refers to some identifier, returns the identifier name. // Otherwise returns an empty string. // For example, if the expression is `function_name`, returns "function_name" string. std::string Extractor::getDeclName(const Expr* Expr) { // The expression can be complex and include casts and e.g. InitListExpr, // to remove all of these we match the first/any DeclRefExpr. auto* Decl = getValueDecl(Expr); return Decl ? Decl->getNameAsString() : ""; } // Returns the first ValueDecl in the expression. const ValueDecl* Extractor::getValueDecl(const Expr* Expr) { // The expression can be complex and include casts and e.g. InitListExpr, // to remove all of these we match the first/any DeclRefExpr. auto* Decl = findFirstMatch(Expr, stmt(forEachDescendant(declRefExpr().bind("res")))); return Decl ? Decl->getDecl() : nullptr; } // Recursively finds first sizeof in the expression and return the type passed to sizeof. std::optional Extractor::getSizeofType(const Expr* E) { auto* Res = findFirstMatch( E, stmt(forEachDescendant(unaryExprOrTypeTraitExpr(ofKind(UETT_SizeOf)).bind("res")))); if (!Res) return {}; if (Res->isArgumentType()) return Res->getArgumentType(); return Res->getArgumentExpr()->getType(); } // Returns map of field name -> field index. std::unordered_map Extractor::structFieldIndexes(const RecordDecl* Decl) { // TODO: this is wrong for structs that contain unions and anonymous sub-structs (e.g. genl_split_ops). // To handle these we would need to look at InitListExpr::getInitializedFieldInUnion, and recurse // into anonymous structs. std::unordered_map Indexes; for (const auto& F : Decl->fields()) Indexes[F->getNameAsString()] = F->getFieldIndex(); return Indexes; } // Extracts enum info from array variable designated initialization. // For example, for the following code: // // enum Foo { // FooA = 11, // FooB = 42, // }; // // struct Bar bars[] = { // [FooA] = {...}, // [FooB] = {...}, // }; // // it returns the following vector: {{11, "FooA"}, {42, "FooB"}}. std::vector> Extractor::extractDesignatedInitConsts(const VarDecl& ArrayDecl) { const auto& Matches = findAllMatches( &ArrayDecl, decl(forEachDescendant(designatedInitExpr(optionally(has(constantExpr(has(declRefExpr())).bind("res"))))))); std::vector> Inits; for (auto* Match : Matches) { const int64_t Val = *Match->getAPValueResult().getInt().getRawData(); const auto& Name = Match->getEnumConstantDecl()->getNameAsString(); const auto& Loc = Match->getEnumConstantDecl()->getBeginLoc(); emitConst(Name, Val, Loc); Inits.emplace_back(Val, Name); } return Inits; } int Extractor::sizeofType(const Type* T) { return static_cast(Context->getTypeInfo(T).Width) / 8; } int Extractor::alignofType(const Type* T) { return static_cast(Context->getTypeInfo(T).Align) / 8; } template T Extractor::evaluate(const Expr* E) { Expr::EvalResult Res; E->EvaluateAsConstantExpr(Res, *Context); // TODO: it's unclear what to do if it's not Int (in some cases we see None here). if (Res.Val.getKind() != APValue::Int) return 0; auto val = Res.Val.getInt(); if (val.isSigned()) return val.sextOrTrunc(64).getSExtValue(); return val.zextOrTrunc(64).getZExtValue(); } void Extractor::matchNetlinkPolicy() { const auto* PolicyArray = getResult("netlink_policy"); const auto* Init = llvm::dyn_cast_if_present(PolicyArray->getInit()); if (!Init) return; const auto& InitConsts = extractDesignatedInitConsts(*PolicyArray); auto Fields = structFieldIndexes(Init->getInit(0)->getType()->getAsRecordDecl()); std::vector Attrs; for (const auto& [I, Name] : InitConsts) { const auto* AttrInit = llvm::dyn_cast(Init->getInit(I)); const std::string& AttrKind = getDeclName(AttrInit->getInit(Fields["type"])); if (AttrKind == "NLA_REJECT") continue; auto* LenExpr = AttrInit->getInit(Fields["len"]); int MaxSize = 0; std::string NestedPolicy; std::unique_ptr Elem; if (AttrKind == "NLA_NESTED" || AttrKind == "NLA_NESTED_ARRAY") { if (const auto* NestedDecl = getValueDecl(AttrInit->getInit(2))) NestedPolicy = getUniqueDeclName(NestedDecl); } else { MaxSize = evaluate(LenExpr); if (auto SizeofType = getSizeofType(LenExpr)) Elem = std::make_unique(genType(*SizeofType)); } Attrs.push_back(NetlinkAttr{ .Name = Name, .Kind = AttrKind, .MaxSize = MaxSize, .NestedPolicy = NestedPolicy, .Elem = std::move(Elem), }); } Output.emit(NetlinkPolicy{ .Name = getUniqueDeclName(PolicyArray), .Attrs = std::move(Attrs), }); } void Extractor::matchNetlinkFamily() { const auto* FamilyInit = getResult("genl_family_init"); auto Fields = structFieldIndexes(getResult("genl_family")); const std::string& FamilyName = llvm::dyn_cast(FamilyInit->getInit(Fields["name"]))->getString().str(); std::string DefaultPolicy; if (const auto* PolicyDecl = FamilyInit->getInit(Fields["policy"])->getAsBuiltinConstantDeclRef(*Context)) DefaultPolicy = getUniqueDeclName(PolicyDecl); std::vector Ops; for (const auto& OpsName : {"ops", "small_ops", "split_ops"}) { const auto* OpsDecl = llvm::dyn_cast_if_present(FamilyInit->getInit(Fields[OpsName])->getAsBuiltinConstantDeclRef(*Context)); const auto NumOps = FamilyInit->getInit(Fields[std::string("n_") + OpsName])->getIntegerConstantExpr(*Context); // The ops variable may be defined in another TU. // TODO: extract variables from another TUs. if (!OpsDecl || !OpsDecl->getInit() || !NumOps) continue; const auto* OpsInit = llvm::dyn_cast(OpsDecl->getInit()); auto OpsFields = structFieldIndexes(OpsInit->getInit(0)->getType()->getAsRecordDecl()); for (int I = 0; I < *NumOps; I++) { const auto* OpInit = llvm::dyn_cast(OpsInit->getInit(I)); const auto* CmdInit = OpInit->getInit(OpsFields["cmd"])->getEnumConstantDecl(); if (!CmdInit) continue; const std::string& OpName = CmdInit->getNameAsString(); emitConst(OpName, CmdInit->getInitVal().getExtValue(), CmdInit->getBeginLoc()); std::string Policy; if (OpsFields.count("policy") != 0) { if (const auto* PolicyDecl = OpInit->getInit(OpsFields["policy"])->getAsBuiltinConstantDeclRef(*Context)) Policy = getUniqueDeclName(PolicyDecl); } if (Policy.empty()) Policy = DefaultPolicy; std::string Func = getFuncName(OpInit->getInit(OpsFields["doit"])); if (Func.empty()) Func = getFuncName(OpInit->getInit(OpsFields["dumpit"])); int Flags = evaluate(OpInit->getInit(OpsFields["flags"])); const char* Access = AccessUser; constexpr int GENL_ADMIN_PERM = 0x01; constexpr int GENL_UNS_ADMIN_PERM = 0x10; if (Flags & GENL_ADMIN_PERM) Access = AccessAdmin; else if (Flags & GENL_UNS_ADMIN_PERM) Access = AccessNsAdmin; Ops.push_back(NetlinkOp{ .Name = OpName, .Func = Func, .Access = Access, .Policy = Policy, }); } } Output.emit(NetlinkFamily{ .Name = FamilyName, .Ops = std::move(Ops), }); } std::string Extractor::getUniqueDeclName(const NamedDecl* Decl) { return Decl->getNameAsString() + "_" + getDeclFileID(Decl); } bool isInterestingCall(const CallExpr* Call) { auto* CalleeDecl = Call->getDirectCallee(); // We don't handle indirect calls yet. if (!CalleeDecl) return false; // Builtins are not interesting and won't have a body. if (CalleeDecl->getBuiltinID() != Builtin::ID::NotBuiltin) return false; const std::string& Callee = CalleeDecl->getNameAsString(); // There are too many of these and they should only be called at runtime in broken builds. if (Callee.rfind("__compiletime_assert", 0) == 0 || Callee == "____wrong_branch_error" || Callee == "__bad_size_call_parameter") return false; return true; } struct FunctionAnalyzer : RecursiveASTVisitor { FunctionAnalyzer(Extractor* Extractor, const FunctionDecl* Func) : Extractor(Extractor), CurrentFunc(Func->getNameAsString()), Context(Extractor->Context), SourceManager(Extractor->SourceManager) { // The global function scope. Scopes.push_back(FunctionScope{.Arg = -1}); Current = &Scopes[0]; TraverseStmt(Func->getBody()); } bool VisitBinaryOperator(const BinaryOperator* B) { if (B->isAssignmentOp()) noteFact(getTypingEntity(B->getRHS()), getTypingEntity(B->getLHS())); return true; } bool VisitVarDecl(const VarDecl* D) { if (D->getStorageDuration() == SD_Automatic) noteFact(getTypingEntity(D->getInit()), getDeclTypingEntity(D)); return true; } bool VisitReturnStmt(const ReturnStmt* Ret) { noteFact(getTypingEntity(Ret->getRetValue()), EntityReturn{.Func = CurrentFunc}); return true; } bool VisitCallExpr(const CallExpr* Call) { if (isInterestingCall(Call)) { const std::string& Callee = Call->getDirectCallee()->getNameAsString(); Current->Calls.push_back(Callee); for (unsigned AI = 0; AI < Call->getNumArgs(); AI++) { noteFact(getTypingEntity(Call->getArg(AI)), EntityArgument{ .Func = Callee, .Arg = AI, }); } } return true; } bool VisitSwitchStmt(const SwitchStmt* S) { // We are only interested in switches on the function arguments // with cases that mention defines from uapi headers. // This covers ioctl/fcntl/prctl/ptrace/etc. bool IsInteresting = false; auto Param = getTypingEntity(S->getCond()); if (Current == &Scopes[0] && Param && Param->Argument) { for (auto* C = S->getSwitchCaseList(); C; C = C->getNextSwitchCase()) { auto* Case = dyn_cast(C); if (!Case) continue; auto LMacro = Extractor->isMacroOrEnum(Case->getLHS()); auto RMacro = Extractor->isMacroOrEnum(Case->getRHS()); if (LMacro || RMacro) { IsInteresting = true; break; } } } SwitchStack.push({S, IsInteresting, IsInteresting ? static_cast(Param->Argument->Arg) : -1}); return true; } bool VisitSwitchCase(const SwitchCase* C) { if (!SwitchStack.top().IsInteresting) return true; // If there are several cases with the same "body", we want to create new scope // only for the first one: // case FOO: // case BAR: // ... some code ... if (!C->getNextSwitchCase() || C->getNextSwitchCase()->getSubStmt() != C) { int Line = SourceManager->getExpansionLineNumber(C->getBeginLoc()); if (Current != &Scopes[0]) Current->EndLine = Line; Scopes.push_back(FunctionScope{ .Arg = SwitchStack.top().Arg, .StartLine = Line, }); Current = &Scopes.back(); } // Otherwise it's a default case, for which we don't add any values. if (auto* Case = dyn_cast(C)) { int64_t LVal = Extractor->evaluate(Case->getLHS()); auto LMacro = Extractor->isMacroOrEnum(Case->getLHS()); if (LMacro) { Current->Values.push_back(LMacro->Name); Extractor->extractIoctl(Case->getLHS(), *LMacro); } else { Current->Values.push_back(std::to_string(LVal)); } if (Case->caseStmtIsGNURange()) { // GNU range is: // case FOO ... BAR: // Add all values in the range. int64_t RVal = Extractor->evaluate(Case->getRHS()); auto RMacro = Extractor->isMacroOrEnum(Case->getRHS()); for (int64_t V = LVal + 1; V <= RVal - (RMacro ? 1 : 0); V++) Current->Values.push_back(std::to_string(V)); if (RMacro) Current->Values.push_back(RMacro->Name); } } return true; } bool dataTraverseStmtPost(const Stmt* S) { if (SwitchStack.empty()) return true; auto Top = SwitchStack.top(); if (Top.S != S) return true; if (Top.IsInteresting) { if (Current != &Scopes[0]) Current->EndLine = SourceManager->getExpansionLineNumber(S->getEndLoc()); Current = &Scopes[0]; } SwitchStack.pop(); return true; } void noteFact(std::optional&& Src, std::optional&& Dst) { if (Src && Dst) Current->Facts.push_back({std::move(*Src), std::move(*Dst)}); } std::optional getTypingEntity(const Expr* E); std::optional getDeclTypingEntity(const Decl* Decl); struct SwitchDesc { const SwitchStmt* S; bool IsInteresting; int Arg; }; Extractor* Extractor; std::string CurrentFunc; ASTContext* Context; SourceManager* SourceManager; std::vector Scopes; FunctionScope* Current = nullptr; std::unordered_map LocalVars; std::unordered_map LocalSeq; std::stack SwitchStack; }; void Extractor::matchFunctionDef() { const auto* Func = getResult("function"); if (!Func->getBody()) return; auto Range = Func->getSourceRange(); const std::string& SourceFile = std::filesystem::relative(SourceManager->getFilename(SourceManager->getExpansionLoc(Range.getBegin())).str()); const int StartLine = SourceManager->getExpansionLineNumber(Range.getBegin()); const int EndLine = SourceManager->getExpansionLineNumber(Range.getEnd()); FunctionAnalyzer Analyzer(this, Func); Output.emit(Function{ .Name = Func->getNameAsString(), .File = SourceFile, .StartLine = StartLine, .EndLine = EndLine, .IsStatic = Func->isStatic(), .Scopes = std::move(Analyzer.Scopes), }); } std::optional FunctionAnalyzer::getTypingEntity(const Expr* E) { if (!E) return {}; E = removeCasts(E); if (auto* DeclRef = dyn_cast(E)) { return getDeclTypingEntity(DeclRef->getDecl()); } else if (auto* Member = dyn_cast(E)) { const Type* StructType = Member->getBase()->getType().IgnoreParens().getUnqualifiedType().getDesugaredType(*Context).getTypePtr(); if (auto* T = dyn_cast(StructType)) StructType = T->getPointeeType().IgnoreParens().getUnqualifiedType().getDesugaredType(*Context).getTypePtr(); auto* StructDecl = dyn_cast(StructType)->getDecl(); std::string StructName = StructDecl->getNameAsString(); if (StructName.empty()) { // The struct may be anonymous, but we need some name. // Ideally we generate the same name we generate in struct definitions, then it will be possible // to match them between each other. However, it does not seem to be easy. We can use DeclContext::getParent // to get declaration of the enclosing struct, but we will also need to figure out the field index // and handle all corner cases. For now we just use the following quick hack: hash declaration file:line. // Note: the hash must be stable across different machines (for test golden files), so we take just // the last part of the file name. const std::string& SourceFile = std::filesystem::path( SourceManager->getFilename(SourceManager->getExpansionLoc(StructDecl->getBeginLoc())).str()) .filename() .string(); int Line = SourceManager->getExpansionLineNumber(StructDecl->getBeginLoc()); StructName = std::to_string(std::hash()(SourceFile) + std::hash()(Line)); } return EntityField{ .Struct = StructName, .Field = Member->getMemberDecl()->getNameAsString(), }; } else if (auto* Unary = dyn_cast(E)) { if (Unary->getOpcode() == UnaryOperatorKind::UO_AddrOf) { if (auto* DeclRef = dyn_cast(removeCasts(Unary->getSubExpr()))) { if (auto* Var = dyn_cast(DeclRef->getDecl())) { if (Var->hasGlobalStorage()) { return EntityGlobalAddr{ .Name = Extractor->getUniqueDeclName(Var), }; } } } } } else if (auto* Call = dyn_cast(E)) { if (isInterestingCall(Call)) { return EntityReturn{ .Func = Call->getDirectCallee()->getNameAsString(), }; } } return {}; } std::optional FunctionAnalyzer::getDeclTypingEntity(const Decl* Decl) { if (auto* Parm = dyn_cast(Decl)) { return EntityArgument{ .Func = CurrentFunc, .Arg = Parm->getFunctionScopeIndex(), }; } else if (auto* Var = dyn_cast(Decl)) { if (Var->hasLocalStorage()) { std::string VarName = Var->getNameAsString(); // Theoretically there can be several local vars with the same name. // Give them unique suffixes if that's the case. if (LocalVars.count(Var) == 0) LocalVars[Var] = LocalSeq[VarName]++; if (int Seq = LocalVars[Var]) VarName += std::to_string(Seq); return EntityLocal{ .Name = VarName, }; } } return {}; } void Extractor::matchSyscall() { const auto* Func = getResult("syscall"); std::vector Args; for (const auto& Param : Func->parameters()) { Args.push_back(Field{ .Name = Param->getNameAsString(), .Type = genType(Param->getType()), }); } Output.emit(Syscall{ .Func = Func->getNameAsString(), .Args = std::move(Args), }); } void Extractor::matchIouring() { const auto* IssueDefs = getResult("io_issue_defs"); const auto& InitConsts = extractDesignatedInitConsts(*IssueDefs); const auto* InitList = llvm::dyn_cast(IssueDefs->getInit()); auto Fields = structFieldIndexes(InitList->getInit(0)->getType()->getAsRecordDecl()); for (const auto& [I, Name] : InitConsts) { const auto& Init = llvm::dyn_cast(InitList->getInit(I)); std::string Prep = getFuncName(Init->getInit(Fields["prep"])); if (Prep == "io_eopnotsupp_prep") continue; Output.emit(IouringOp{ .Name = Name, .Func = getFuncName(Init->getInit(Fields["issue"])), }); } } void Extractor::matchFileOps() { const auto* Fops = getResult("init"); if (Fops->getNumInits() == 0 || isa(Fops->getInit(0))) { // Some code constructs produce init list with DesignatedInitExpr. // Unclear why, but it won't be handled by the following code, and is not necessary to handle. return; } const auto* Var = getResult("var"); std::string VarName = getUniqueDeclName(Var); int NameSeq = FileOpsDedup[VarName]++; if (NameSeq) VarName += std::to_string(NameSeq); auto Fields = structFieldIndexes(Fops->getType()->getAsRecordDecl()); std::string Open = getFuncName(Fops->getInit(Fields["open"])); std::string Ioctl = getFuncName(Fops->getInit(Fields["unlocked_ioctl"])); std::string Read = getFuncName(Fops->getInit(Fields["read"])); if (Read.empty()) Read = getFuncName(Fops->getInit(Fields["read_iter"])); std::string Write = getFuncName(Fops->getInit(Fields["write"])); if (Write.empty()) Write = getFuncName(Fops->getInit(Fields["write_iter"])); std::string Mmap = getFuncName(Fops->getInit(Fields["mmap"])); if (Mmap.empty()) Mmap = getFuncName(Fops->getInit(Fields["get_unmapped_area"])); Output.emit(FileOps{ .Name = VarName, .Open = std::move(Open), .Read = std::move(Read), .Write = std::move(Write), .Mmap = std::move(Mmap), .Ioctl = std::move(Ioctl), }); } void Extractor::extractIoctl(const Expr* Cmd, const ConstDesc& Const) { // This is old style ioctl defined directly via a number. // We can't infer anything about it. if (Const.Value.find("_IO") != 0) return; FieldType Type; auto Dir = _IOC_DIR(Const.IntValue); if (Dir == _IOC_NONE) { Type = IntType{.ByteSize = 1, .IsConst = true}; } else if (std::optional Arg = getSizeofType(Cmd)) { Type = PtrType{ .Elem = genType(*Arg), .IsConst = Dir == _IOC_READ, }; } else { // It is an ioctl, but we failed to get the arg type. // Let the Go part figure out a good arg type. return; } Output.emit(Ioctl{ .Name = Const.Name, .Type = std::move(Type), }); } int main(int argc, const char** argv) { llvm::cl::OptionCategory Options("syz-declextract options"); auto OptionsParser = tooling::CommonOptionsParser::create(argc, argv, Options); if (!OptionsParser) { llvm::errs() << OptionsParser.takeError(); return 1; } Extractor Ex; tooling::ClangTool Tool(OptionsParser->getCompilations(), OptionsParser->getSourcePathList()); if (Tool.run(tooling::newFrontendActionFactory(&Ex, &Ex).get())) return 1; Ex.print(); return 0; }