From c756ba4e975097bf74b952367e2cd1a8db466c69 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 2 Dec 2024 10:57:36 +0100 Subject: tools/syz-declextract: extract file_operations descriptions Extend the clang tool to locate file_operations variables and arrays and dump open/read/write/mmap/ioctl callbacks for each. It also tries to extract set of ioctl commands and argument types for them in a simple best-effort way (for now). It just locates switch in the ioctl callback and extracts each case as a command. --- tools/syz-declextract/clangtool/declextract.cpp | 134 ++++++++++++++++++++- tools/syz-declextract/clangtool/output.h | 41 +++++++ tools/syz-declextract/testdata/file_operations.c | 45 +++++++ .../testdata/file_operations.c.info | 0 .../testdata/file_operations.c.json | 118 ++++++++++++++++++ .../syz-declextract/testdata/file_operations.c.txt | 10 ++ tools/syz-declextract/testdata/include/fs.h | 12 ++ .../testdata/include/uapi/file_operations.h | 14 +++ .../syz-declextract/testdata/include/uapi/ioctl.h | 24 ++++ 9 files changed, 396 insertions(+), 2 deletions(-) create mode 100644 tools/syz-declextract/testdata/file_operations.c create mode 100644 tools/syz-declextract/testdata/file_operations.c.info create mode 100644 tools/syz-declextract/testdata/file_operations.c.json create mode 100644 tools/syz-declextract/testdata/file_operations.c.txt create mode 100644 tools/syz-declextract/testdata/include/fs.h create mode 100644 tools/syz-declextract/testdata/include/uapi/file_operations.h create mode 100644 tools/syz-declextract/testdata/include/uapi/ioctl.h (limited to 'tools') diff --git a/tools/syz-declextract/clangtool/declextract.cpp b/tools/syz-declextract/clangtool/declextract.cpp index ca78a3b1d..68ca91d32 100644 --- a/tools/syz-declextract/clangtool/declextract.cpp +++ b/tools/syz-declextract/clangtool/declextract.cpp @@ -20,6 +20,7 @@ #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TypeTraits.h" +#include "clang/Frontend/CompilerInstance.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/Tooling.h" #include "llvm/ADT/StringRef.h" @@ -36,10 +37,19 @@ #include #include +#include + using namespace clang; using namespace clang::ast_matchers; -class Extractor : public MatchFinder { +// MacroDef/MacroMap hold information about macros defined in the file. +struct MacroDef { + std::string Value; // value as written in the source + SourceRange SourceRange; // soruce range of the value +}; +using MacroMap = std::unordered_map; + +class Extractor : public MatchFinder, public tooling::SourceFileCallbacks { public: Extractor() { match(&Extractor::matchSyscall, @@ -59,6 +69,10 @@ public: match(&Extractor::matchNetlinkFamily, varDecl(hasType(recordDecl(hasName("genl_family")).bind("genl_family")), has(initListExpr().bind("genl_family_init")))); + + match(&Extractor::matchFileOps, + varDecl(forEachDescendant(initListExpr(hasType(recordDecl(hasName("file_operations")))).bind("init"))) + .bind("var")); } void print() const { Output.print(); } @@ -80,13 +94,17 @@ private: SourceManager* SourceManager = nullptr; Output Output; + MacroMap Macros; std::unordered_map EnumDedup; std::unordered_map StructDedup; + std::unordered_map FileOpsDedup; void matchSyscall(); void matchIouring(); void matchNetlinkPolicy(); void matchNetlinkFamily(); + void matchFileOps(); + bool handleBeginSource(CompilerInstance& CI) override; template void match(MatchFunc Action, const M& Matcher); void run(const MatchFinder::MatchResult& Result, MatchFunc Action); template const T* getResult(StringRef ID) const; @@ -107,8 +125,45 @@ private: const T* findFirstMatch(const Node* Expr, const Condition& Cond); std::optional getSizeofType(const Expr* E); int sizeofType(const Type* T); + std::vector extractIoctlCommands(const std::string& Ioctl); }; +// PPCallbacksTracker records all macro definitions (name/value/source location). +class PPCallbacksTracker : public PPCallbacks { +public: + PPCallbacksTracker(Preprocessor& PP, MacroMap& Macros) : SM(PP.getSourceManager()), Macros(Macros) {} + +private: + SourceManager& SM; + MacroMap& Macros; + + void MacroDefined(const Token& MacroName, const MacroDirective* MD) override { + const char* NameBegin = SM.getCharacterData(MacroName.getLocation()); + const char* NameEnd = SM.getCharacterData(MacroName.getEndLoc()); + std::string Name(NameBegin, NameEnd - NameBegin); + const char* ValBegin = SM.getCharacterData(MD->getMacroInfo()->getDefinitionLoc()); + const char* ValEnd = SM.getCharacterData(MD->getMacroInfo()->getDefinitionEndLoc()) + 1; + // Definition includes the macro name, remove it. + ValBegin += std::min(Name.size(), ValEnd - ValBegin); + // Trim whitespace from both ends. + while (ValBegin < ValEnd && isspace(*ValBegin)) + ValBegin++; + while (ValBegin < ValEnd && isspace(*(ValEnd - 1))) + ValEnd--; + std::string Value(ValBegin, ValEnd - ValBegin); + Macros[Name] = MacroDef{ + .Value = Value, + .SourceRange = SourceRange(MD->getMacroInfo()->getDefinitionLoc(), MD->getMacroInfo()->getDefinitionEndLoc()), + }; + } +}; + +bool Extractor::handleBeginSource(CompilerInstance& CI) { + Preprocessor& PP = CI.getPreprocessor(); + PP.addPPCallbacks(std::make_unique(PP, Macros)); + return true; +} + template void Extractor::match(MatchFunc Action, const M& Matcher) { Matchers.emplace_back(new MatchCallbackThunk(*this, Action)); addMatcher(Matcher, Matchers.back().get()); @@ -526,6 +581,81 @@ void Extractor::matchIouring() { } } +void Extractor::matchFileOps() { + const auto* Fops = getResult("init"); + if (Fops->getNumInits() == 0 || isa(Fops->getInit(0))) { + // Some code constructs produce init list with DesignatedInitExpr. + // Unclear why, but it won't be handled by the following code, and is not necessary to handle. + return; + } + const auto* Var = getResult("var"); + std::string VarName = Var->getNameAsString() + "_" + getDeclFileID(Var); + int NameSeq = FileOpsDedup[VarName]++; + if (NameSeq) + VarName += std::to_string(NameSeq); + auto Fields = structFieldIndexes(Fops->getType()->getAsRecordDecl()); + std::string Open = getDeclName(Fops->getInit(Fields["open"])); + std::string Ioctl = getDeclName(Fops->getInit(Fields["unlocked_ioctl"])); + std::string Read = getDeclName(Fops->getInit(Fields["read"])); + if (Read.empty()) + Read = getDeclName(Fops->getInit(Fields["read_iter"])); + std::string Write = getDeclName(Fops->getInit(Fields["write"])); + if (Write.empty()) + Write = getDeclName(Fops->getInit(Fields["write_iter"])); + std::string Mmap = getDeclName(Fops->getInit(Fields["mmap"])); + if (Mmap.empty()) + Mmap = getDeclName(Fops->getInit(Fields["get_unmapped_area"])); + auto Cmds = extractIoctlCommands(Ioctl); + Output.emit(FileOps{ + .Name = VarName, + .Open = std::move(Open), + .Read = std::move(Read), + .Write = std::move(Write), + .Mmap = std::move(Mmap), + .Ioctl = std::move(Ioctl), + .IoctlCmds = std::move(Cmds), + }); +} + +std::vector Extractor::extractIoctlCommands(const std::string& Ioctl) { + if (Ioctl.empty()) + return {}; + // If we see the ioctl function definition, match cases of switches (very best-effort for now). + const auto& Cases = findAllMatches( + Context, functionDecl(hasName(Ioctl), forEachDescendant(switchStmt(forEachSwitchCase(caseStmt().bind("res")))))); + std::vector Results; + for (auto* Case : Cases) { + const auto* Cmd = Case->getLHS(); + auto Range = Lexer::getAsCharRange(Cmd->getSourceRange(), *SourceManager, Context->getLangOpts()); + std::string CmdStr = Lexer::getSourceText(Range, *SourceManager, Context->getLangOpts()).str(); + auto MacroDef = Macros.find(CmdStr); + if (MacroDef == Macros.end()) + continue; + int64_t CmdVal = evaluate(Cmd); + noteConstUse(CmdStr, CmdVal, MacroDef->second.SourceRange); + FieldType CmdType; + const auto Dir = _IOC_DIR(CmdVal); + if (Dir == _IOC_NONE) { + CmdType = IntType{.ByteSize = 1, .IsConst = true}; + } else if (std::optional Arg = getSizeofType(Cmd)) { + CmdType = PtrType{ + .Elem = genType(*Arg), + .IsConst = Dir == _IOC_READ, + }; + } else { + CmdType = PtrType{ + .Elem = BufferType{}, + .IsConst = Dir == _IOC_READ, + }; + } + Results.push_back(IoctlCmd{ + .Name = CmdStr, + .Type = std::move(CmdType), + }); + } + return Results; +} + int main(int argc, const char** argv) { llvm::cl::OptionCategory Options("syz-declextract options"); auto OptionsParser = tooling::CommonOptionsParser::create(argc, argv, Options); @@ -535,7 +665,7 @@ int main(int argc, const char** argv) { } Extractor Ex; tooling::ClangTool Tool(OptionsParser->getCompilations(), OptionsParser->getSourcePathList()); - if (Tool.run(tooling::newFrontendActionFactory(&Ex).get())) + if (Tool.run(tooling::newFrontendActionFactory(&Ex, &Ex).get())) return 1; Ex.print(); return 0; diff --git a/tools/syz-declextract/clangtool/output.h b/tools/syz-declextract/clangtool/output.h index df3f290b6..c8e9741fd 100644 --- a/tools/syz-declextract/clangtool/output.h +++ b/tools/syz-declextract/clangtool/output.h @@ -36,6 +36,9 @@ struct FieldType { struct IntType { int ByteSize = 0; + int64_t MinValue = 0; + int64_t MaxValue = 0; + bool IsConst = false; std::string Name; std::string Base; std::string Enum; @@ -90,6 +93,21 @@ struct Enum { std::vector Values; }; +struct IoctlCmd { + std::string Name; + FieldType Type; +}; + +struct FileOps { + std::string Name; + std::string Open; + std::string Read; + std::string Write; + std::string Mmap; + std::string Ioctl; + std::vector IoctlCmds; +}; + struct Syscall { std::string Func; std::vector Args; @@ -173,6 +191,9 @@ inline void print(JSONPrinter& Printer, const FieldType& V) { inline void print(JSONPrinter& Printer, const IntType& V) { JSONPrinter::Scope Scope(Printer); Printer.Field("byte_size", V.ByteSize); + Printer.Field("min_value", V.MinValue); + Printer.Field("max_value", V.MaxValue); + Printer.Field("is_const", V.IsConst); Printer.Field("name", V.Name); Printer.Field("base", V.Base); Printer.Field("enum", V.Enum, true); @@ -199,6 +220,23 @@ inline void print(JSONPrinter& Printer, const BufferType& V) { Printer.Field("is_non_terminated", V.IsNonTerminated, true); } +inline void print(JSONPrinter& Printer, const IoctlCmd& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("type", V.Type, true); +} + +inline void print(JSONPrinter& Printer, const FileOps& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("name", V.Name); + Printer.Field("open", V.Open); + Printer.Field("read", V.Read); + Printer.Field("write", V.Write); + Printer.Field("mmap", V.Mmap); + Printer.Field("ioctl", V.Ioctl); + Printer.Field("ioctl_cmds", V.IoctlCmds, true); +} + inline void print(JSONPrinter& Printer, const Syscall& V) { JSONPrinter::Scope Scope(Printer); Printer.Field("func", V.Func); @@ -261,6 +299,7 @@ public: void emit(Struct&& V) { Structs.push_back(std::move(V)); } void emit(Enum&& V) { Enums.push_back(std::move(V)); } void emit(Syscall&& V) { Syscalls.push_back(std::move(V)); } + void emit(FileOps&& V) { FileOps.push_back(std::move(V)); } void emit(IouringOp&& V) { IouringOps.push_back(std::move(V)); } void emit(NetlinkFamily&& V) { NetlinkFamilies.push_back(std::move(V)); } void emit(NetlinkPolicy&& V) { NetlinkPolicies.push_back(std::move(V)); } @@ -272,6 +311,7 @@ public: Printer.Field("enums", Enums); Printer.Field("structs", Structs); Printer.Field("syscalls", Syscalls); + Printer.Field("file_ops", FileOps); Printer.Field("iouring_ops", IouringOps); Printer.Field("netlink_families", NetlinkFamilies); Printer.Field("netlink_policies", NetlinkPolicies, true); @@ -284,6 +324,7 @@ private: std::vector Enums; std::vector Structs; std::vector Syscalls; + std::vector FileOps; std::vector IouringOps; std::vector NetlinkFamilies; std::vector NetlinkPolicies; diff --git a/tools/syz-declextract/testdata/file_operations.c b/tools/syz-declextract/testdata/file_operations.c new file mode 100644 index 000000000..04d548f98 --- /dev/null +++ b/tools/syz-declextract/testdata/file_operations.c @@ -0,0 +1,45 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#include "include/fs.h" +#include "include/uapi/file_operations.h" + +static void foo_open() {} +static void foo_read() {} +static void foo_write() {} +static void foo_mmap() {} + +static void foo_ioctl(unsigned int cmd) { + switch (cmd) { + case FOO_IOCTL1: + case FOO_IOCTL2: + case FOO_IOCTL3: + case FOO_IOCTL4: + case FOO_IOCTL5: + } +} + +const struct file_operations foo = { + .open = foo_open, + .read = foo_read, + .write = foo_write, + .unlocked_ioctl = foo_ioctl, + .mmap = foo_mmap, +}; + +static void proc_open() {} +static void proc_read() {} +static void proc_write() {} +static void proc_ioctl(unsigned int cmd) {} + +const struct file_operations proc_ops[] = { + { + .open = proc_open, + .read_iter = proc_read, + .write_iter = proc_write, + }, + { + .open = proc_open, + .unlocked_ioctl = proc_ioctl, + }, +}; diff --git a/tools/syz-declextract/testdata/file_operations.c.info b/tools/syz-declextract/testdata/file_operations.c.info new file mode 100644 index 000000000..e69de29bb diff --git a/tools/syz-declextract/testdata/file_operations.c.json b/tools/syz-declextract/testdata/file_operations.c.json new file mode 100644 index 000000000..e5ddad2b2 --- /dev/null +++ b/tools/syz-declextract/testdata/file_operations.c.json @@ -0,0 +1,118 @@ +{ + "includes": [ + "include/uapi/file_operations.h" + ], + "structs": [ + { + "name": "foo_ioctl_arg", + "byte_size": 8, + "fields": [ + { + "name": "a", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + }, + { + "name": "b", + "counted_by": -1, + "type": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + } + } + ] + } + ], + "file_ops": [ + { + "name": "foo_file_operations", + "open": "foo_open", + "read": "foo_read", + "write": "foo_write", + "mmap": "foo_mmap", + "ioctl": "foo_ioctl", + "ioctl_cmds": [ + { + "name": "FOO_IOCTL5", + "type": { + "ptr": { + "elem": { + "struct": "foo_ioctl_arg" + } + } + } + }, + { + "name": "FOO_IOCTL4", + "type": { + "ptr": { + "elem": { + "struct": "foo_ioctl_arg" + } + } + } + }, + { + "name": "FOO_IOCTL3", + "type": { + "ptr": { + "elem": { + "struct": "foo_ioctl_arg" + }, + "is_const": true + } + } + }, + { + "name": "FOO_IOCTL2", + "type": { + "ptr": { + "elem": { + "int": { + "byte_size": 4, + "name": "int", + "base": "int" + } + }, + "is_const": true + } + } + }, + { + "name": "FOO_IOCTL1", + "type": { + "int": { + "byte_size": 1, + "is_const": true + } + } + } + ], + "source_file": "file_operations.c" + }, + { + "name": "proc_ops_file_operations", + "open": "proc_open", + "read": "proc_read", + "write": "proc_write", + "mmap": "proc_open", + "source_file": "file_operations.c" + }, + { + "name": "proc_ops_file_operations1", + "open": "proc_open", + "mmap": "proc_open", + "ioctl": "proc_ioctl", + "source_file": "file_operations.c" + } + ] +} \ No newline at end of file diff --git a/tools/syz-declextract/testdata/file_operations.c.txt b/tools/syz-declextract/testdata/file_operations.c.txt new file mode 100644 index 000000000..f2fb3ed1c --- /dev/null +++ b/tools/syz-declextract/testdata/file_operations.c.txt @@ -0,0 +1,10 @@ +# Code generated by syz-declextract. DO NOT EDIT. + +meta automatic + +type auto_todo intptr + +include +include +include +include diff --git a/tools/syz-declextract/testdata/include/fs.h b/tools/syz-declextract/testdata/include/fs.h new file mode 100644 index 000000000..a5c838595 --- /dev/null +++ b/tools/syz-declextract/testdata/include/fs.h @@ -0,0 +1,12 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +struct file_operations { + void (*open)(void); + void (*read)(void); + void (*write)(void); + void (*read_iter)(void); + void (*write_iter)(void); + void (*unlocked_ioctl)(unsigned int); + void (*mmap)(void); +}; diff --git a/tools/syz-declextract/testdata/include/uapi/file_operations.h b/tools/syz-declextract/testdata/include/uapi/file_operations.h new file mode 100644 index 000000000..6a2a8d259 --- /dev/null +++ b/tools/syz-declextract/testdata/include/uapi/file_operations.h @@ -0,0 +1,14 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#include "ioctl.h" + +#define FOO_IOCTL1 _IO('c', 1) +#define FOO_IOCTL2 _IOR('c', 2, int) +#define FOO_IOCTL3 _IOR('c', 3, struct foo_ioctl_arg) +#define FOO_IOCTL4 _IOW('c', 4, struct foo_ioctl_arg) +#define FOO_IOCTL5 _IOWR('c', 5, struct foo_ioctl_arg) + +struct foo_ioctl_arg { + int a, b; +}; diff --git a/tools/syz-declextract/testdata/include/uapi/ioctl.h b/tools/syz-declextract/testdata/include/uapi/ioctl.h new file mode 100644 index 000000000..fae14a74e --- /dev/null +++ b/tools/syz-declextract/testdata/include/uapi/ioctl.h @@ -0,0 +1,24 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#define _IOC_NONE 0U +#define _IOC_WRITE 1U +#define _IOC_READ 2U + +#define _IOC_NRBITS 8 +#define _IOC_TYPEBITS 8 +#define _IOC_SIZEBITS 14 +#define _IOC_DIRBITS 2 + +#define _IOC_NRSHIFT 0 +#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) +#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) +#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) + +#define _IOC(dir, type, nr, size) (((dir) << _IOC_DIRSHIFT) | ((type) << _IOC_TYPESHIFT) | \ + ((nr) << _IOC_NRSHIFT) | ((size) << _IOC_SIZESHIFT)) + +#define _IO(type, nr) _IOC(_IOC_NONE, (type), (nr), 0) +#define _IOR(type, nr, arg) _IOC(_IOC_READ, (type), (nr), (sizeof(arg))) +#define _IOW(type, nr, arg) _IOC(_IOC_WRITE, (type), (nr), (sizeof(arg))) +#define _IOWR(type, nr, arg) _IOC(_IOC_READ|_IOC_WRITE, (type), (nr), (sizeof(arg))) -- cgit mrf-deployment