From c7e92da6cb06679b04062786481f50e42c585bfc Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 14 Apr 2025 08:03:22 +0200 Subject: tools/syz-declextract: extract function references more precisely Currently we misparse some function references, e.g. for: .write = (foo) ? bar : baz, we extract "foo". Extract first function reference from such expressions. --- tools/syz-declextract/clangtool/declextract.cpp | 35 ++++++----- tools/syz-declextract/testdata/file_operations.c | 9 ++- .../testdata/file_operations.c.json | 68 +++++++++++++--------- 3 files changed, 70 insertions(+), 42 deletions(-) (limited to 'tools') diff --git a/tools/syz-declextract/clangtool/declextract.cpp b/tools/syz-declextract/clangtool/declextract.cpp index f1f1ad0a7..e026b3788 100644 --- a/tools/syz-declextract/clangtool/declextract.cpp +++ b/tools/syz-declextract/clangtool/declextract.cpp @@ -126,6 +126,7 @@ private: FieldType extractRecord(QualType QT, const RecordType* Typ, const std::string& BackupName); std::string extractEnum(QualType QT, const EnumDecl* Decl); void emitConst(const std::string& Name, int64_t Val, SourceLocation Loc); + std::string getFuncName(const Expr* Expr); std::string getDeclName(const Expr* Expr); const ValueDecl* getValueDecl(const Expr* Expr); std::string getDeclFileID(const Decl* Decl); @@ -447,10 +448,18 @@ const T* Extractor::findFirstMatch(const Node* Expr, const Condition& Cond) { return Matches.empty() ? nullptr : Matches[0]; } +// Extracts the first function reference from the expression. +// TODO: try to extract the actual function reference the expression will be evaluated to +// (the first one is not necessarily the right one). +std::string Extractor::getFuncName(const Expr* Expr) { + auto* Decl = + findFirstMatch(Expr, stmt(forEachDescendant(declRefExpr(hasType(functionType())).bind("res")))); + return Decl ? Decl->getDecl()->getNameAsString() : ""; +} + // If expression refers to some identifier, returns the identifier name. // Otherwise returns an empty string. // For example, if the expression is `function_name`, returns "function_name" string. -// If AppendFile, then it also appends per-file suffix. std::string Extractor::getDeclName(const Expr* Expr) { // The expression can be complex and include casts and e.g. InitListExpr, // to remove all of these we match the first/any DeclRefExpr. @@ -603,9 +612,9 @@ void Extractor::matchNetlinkFamily() { } if (Policy.empty()) Policy = DefaultPolicy; - std::string Func = getDeclName(OpInit->getInit(OpsFields["doit"])); + std::string Func = getFuncName(OpInit->getInit(OpsFields["doit"])); if (Func.empty()) - Func = getDeclName(OpInit->getInit(OpsFields["dumpit"])); + Func = getFuncName(OpInit->getInit(OpsFields["dumpit"])); int Flags = evaluate(OpInit->getInit(OpsFields["flags"])); const char* Access = AccessUser; constexpr int GENL_ADMIN_PERM = 0x01; @@ -916,12 +925,12 @@ void Extractor::matchIouring() { auto Fields = structFieldIndexes(InitList->getInit(0)->getType()->getAsRecordDecl()); for (const auto& [I, Name] : InitConsts) { const auto& Init = llvm::dyn_cast(InitList->getInit(I)); - std::string Prep = getDeclName(Init->getInit(Fields["prep"])); + std::string Prep = getFuncName(Init->getInit(Fields["prep"])); if (Prep == "io_eopnotsupp_prep") continue; Output.emit(IouringOp{ .Name = Name, - .Func = getDeclName(Init->getInit(Fields["issue"])), + .Func = getFuncName(Init->getInit(Fields["issue"])), }); } } @@ -939,17 +948,17 @@ void Extractor::matchFileOps() { if (NameSeq) VarName += std::to_string(NameSeq); auto Fields = structFieldIndexes(Fops->getType()->getAsRecordDecl()); - std::string Open = getDeclName(Fops->getInit(Fields["open"])); - std::string Ioctl = getDeclName(Fops->getInit(Fields["unlocked_ioctl"])); - std::string Read = getDeclName(Fops->getInit(Fields["read"])); + std::string Open = getFuncName(Fops->getInit(Fields["open"])); + std::string Ioctl = getFuncName(Fops->getInit(Fields["unlocked_ioctl"])); + std::string Read = getFuncName(Fops->getInit(Fields["read"])); if (Read.empty()) - Read = getDeclName(Fops->getInit(Fields["read_iter"])); - std::string Write = getDeclName(Fops->getInit(Fields["write"])); + Read = getFuncName(Fops->getInit(Fields["read_iter"])); + std::string Write = getFuncName(Fops->getInit(Fields["write"])); if (Write.empty()) - Write = getDeclName(Fops->getInit(Fields["write_iter"])); - std::string Mmap = getDeclName(Fops->getInit(Fields["mmap"])); + Write = getFuncName(Fops->getInit(Fields["write_iter"])); + std::string Mmap = getFuncName(Fops->getInit(Fields["mmap"])); if (Mmap.empty()) - Mmap = getDeclName(Fops->getInit(Fields["get_unmapped_area"])); + Mmap = getFuncName(Fops->getInit(Fields["get_unmapped_area"])); Output.emit(FileOps{ .Name = VarName, .Open = std::move(Open), diff --git a/tools/syz-declextract/testdata/file_operations.c b/tools/syz-declextract/testdata/file_operations.c index 0e62a6f16..a27fa55f5 100644 --- a/tools/syz-declextract/testdata/file_operations.c +++ b/tools/syz-declextract/testdata/file_operations.c @@ -9,10 +9,15 @@ enum { FOO_IOCTL12 = _IOR('c', 12, int), }; +enum { + config_foo +}; + static void foo_open() {} static void foo_read() {} static void foo_write() {} static void foo_mmap() {} +static void foo_mmap2() {} static void foo_ioctl2(unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -41,7 +46,9 @@ const struct file_operations foo = { .read = foo_read, .write = foo_write, .unlocked_ioctl = foo_ioctl, - .mmap = foo_mmap, + // Such code happens after macro expansion, + // we want to extract the first function name. + .mmap = ((config_foo) ? foo_mmap : foo_mmap2), }; static void proc_open() {} diff --git a/tools/syz-declextract/testdata/file_operations.c.json b/tools/syz-declextract/testdata/file_operations.c.json index 0c1706767..a63a5f2fc 100644 --- a/tools/syz-declextract/testdata/file_operations.c.json +++ b/tools/syz-declextract/testdata/file_operations.c.json @@ -27,8 +27,8 @@ { "name": "foo_ioctl", "file": "file_operations.c", - "start_line": 25, - "end_line": 37, + "start_line": 30, + "end_line": 42, "is_static": true, "scopes": [ { @@ -79,16 +79,16 @@ "FOO_IOCTL11", "FOO_IOCTL12" ], - "start_line": 27, - "end_line": 35 + "start_line": 32, + "end_line": 40 } ] }, { "name": "foo_ioctl2", "file": "file_operations.c", - "start_line": 17, - "end_line": 23, + "start_line": 22, + "end_line": 28, "is_static": true, "scopes": [ { @@ -100,16 +100,28 @@ "FOO_IOCTL6", "FOO_IOCTL7" ], - "start_line": 19, - "end_line": 22 + "start_line": 24, + "end_line": 27 } ] }, { "name": "foo_mmap", "file": "file_operations.c", - "start_line": 15, - "end_line": 15, + "start_line": 19, + "end_line": 19, + "is_static": true, + "scopes": [ + { + "arg": -1 + } + ] + }, + { + "name": "foo_mmap2", + "file": "file_operations.c", + "start_line": 20, + "end_line": 20, "is_static": true, "scopes": [ { @@ -120,8 +132,8 @@ { "name": "foo_open", "file": "file_operations.c", - "start_line": 12, - "end_line": 12, + "start_line": 16, + "end_line": 16, "is_static": true, "scopes": [ { @@ -132,8 +144,8 @@ { "name": "foo_read", "file": "file_operations.c", - "start_line": 13, - "end_line": 13, + "start_line": 17, + "end_line": 17, "is_static": true, "scopes": [ { @@ -144,8 +156,8 @@ { "name": "foo_write", "file": "file_operations.c", - "start_line": 14, - "end_line": 14, + "start_line": 18, + "end_line": 18, "is_static": true, "scopes": [ { @@ -168,8 +180,8 @@ { "name": "proc_ioctl", "file": "file_operations.c", - "start_line": 50, - "end_line": 50, + "start_line": 57, + "end_line": 57, "is_static": true, "scopes": [ { @@ -180,8 +192,8 @@ { "name": "proc_open", "file": "file_operations.c", - "start_line": 47, - "end_line": 47, + "start_line": 54, + "end_line": 54, "is_static": true, "scopes": [ { @@ -192,8 +204,8 @@ { "name": "proc_read", "file": "file_operations.c", - "start_line": 48, - "end_line": 48, + "start_line": 55, + "end_line": 55, "is_static": true, "scopes": [ { @@ -204,8 +216,8 @@ { "name": "proc_write", "file": "file_operations.c", - "start_line": 49, - "end_line": 49, + "start_line": 56, + "end_line": 56, "is_static": true, "scopes": [ { @@ -216,8 +228,8 @@ { "name": "unused_ioctl", "file": "file_operations.c", - "start_line": 66, - "end_line": 71, + "start_line": 73, + "end_line": 78, "is_static": true, "scopes": [ { @@ -229,8 +241,8 @@ "UNUSED_IOCTL1", "UNUSED_IOCTL2" ], - "start_line": 68, - "end_line": 70 + "start_line": 75, + "end_line": 77 } ] } -- cgit mrf-deployment