diff options
| author | Pimyn Girgis <bemenboshra2001@gmail.com> | 2024-07-29 13:28:55 +0000 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2024-07-30 12:26:59 +0000 |
| commit | 3d5fb491e177a71d306d7c3dff2bda1995c34a1c (patch) | |
| tree | 9cb9004d7a6ecebf130e422b9b47aa4f1ad0dd99 | |
| parent | a4e01e1e70da6b4954a12cae3495f7d754f4b1da (diff) | |
tools/syz-declextract: parse Linux Headers to extract metadata about syscalls
Clang tool that uses AST matchers to extract relevant metadata about Linux system calls. The tool is to be extended
later to support more interfaces, collect more relevant metadata and to automatically generate syzkaller descriptions
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | tools/syz-declextract/.clang-format | 2 | ||||
| -rw-r--r-- | tools/syz-declextract/README.md | 37 | ||||
| -rw-r--r-- | tools/syz-declextract/syz-declextract.cpp | 329 |
4 files changed, 111 insertions, 259 deletions
@@ -259,7 +259,7 @@ format_go: format_cpp: clang-format --style=file -i executor/*.cc executor/*.h \ executor/android/android_seccomp.h \ - tools/kcovtrace/*.c tools/kcovfuzzer/*.c tools/fops_probe/*.cc + tools/kcovtrace/*.c tools/kcovfuzzer/*.c tools/fops_probe/*.cc tools/syz-declextract/syz-declextract.cpp format_sys: bin/syz-fmt bin/syz-fmt all diff --git a/tools/syz-declextract/.clang-format b/tools/syz-declextract/.clang-format new file mode 100644 index 000000000..173f8a6a8 --- /dev/null +++ b/tools/syz-declextract/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: LLVM +ColumnLimit: 120 diff --git a/tools/syz-declextract/README.md b/tools/syz-declextract/README.md new file mode 100644 index 000000000..ec28959ba --- /dev/null +++ b/tools/syz-declextract/README.md @@ -0,0 +1,37 @@ +# syz-declextract +## Linux Kernel (For testing purposes) +``` +export KERNEL=$PWD/linux-stable +git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git $KERNEL +cd $KERNEL +make CC=clang defconfig # Having clang as the compiler is optional but removes erros later on +./scripts/config -e FTRACE_SYSCALLS +make CC=clang olddefconfig +make CC=clang -j`nproc` # kernel has to be built at least once for the script to work +./scripts/clang/gen_compile_commands.py +``` +## LLVM Project +``` +LLVM=$PWD/llvm-project +git clone https://github.com/llvm/llvm-project.git $LLVM +cd $LLVM +git checkout 0f231567719c99caa99164d8f91bad50883dab03 # In case of any breaking changes, this commit works +echo 'add_clang_executable(syz-declextract syz-declextract/syz-declextract.cpp) +target_link_libraries(syz-declextract PRIVATE clangTooling)' >> $LLVM/clang/CMakeLists.txt +``` +## syz-declextract +``` +mkdir $LLVM/clang/syz-declextract +``` +Download `syz-declextract.cpp` file and add it to `$LLVM/clang/syz-declextract` directory +``` +SYZ=$PWD/syz +mkdir $SYZ && cd $SYZ +cmake -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ +-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ $LLVM/llvm +make -j`nproc` syz-declextract +``` +## Example +``` +./bin/syz-declextract $KERNEL/fs/read_write.c | less # or any other .c file +``` diff --git a/tools/syz-declextract/syz-declextract.cpp b/tools/syz-declextract/syz-declextract.cpp index cb29175b1..78162f648 100644 --- a/tools/syz-declextract/syz-declextract.cpp +++ b/tools/syz-declextract/syz-declextract.cpp @@ -1,279 +1,92 @@ -// Copyright 2017 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -// This is a very rough prototype of an utility that extracts syscall descriptions from header files. -// It needs to extract struct/union descriptions, better analyze types, -// analyze pointer directions (in, out), figure out len types (usually marked with sal). -// The easiest way to build it is to build it as part of clang. Add the following lines to CMakeLists.txt: -// +add_clang_executable(syz-declextract syz-declextract/syz-declextract.cpp) -// +target_link_libraries(syz-declextract clangTooling) -// It was used to extract windows descriptions: -// syz-declextract -extra-arg="--driver-mode=cl" -extra-arg="-I/path/to/windows/headers" Windows.h - -#include "clang/AST/AST.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/Driver/Options.h" -#include "clang/Frontend/ASTConsumers.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/FrontendActions.h" -#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/AST/APValue.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchersInternal.h" +#include "clang/Basic/LLVM.h" +#include "clang/Sema/Ownership.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/Tooling.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include <stdio.h> +#include <string> +#include <vector> using namespace clang; -using namespace clang::tooling; +using namespace clang::ast_matchers; -std::string convertType(ASTContext &C, QualType T) { - auto name = T.getAsString(); - if (name == "HANDLE") - return name; - if (T->isIntegralOrEnumerationType()) { - int size = C.getTypeSize(T); - char buf[10]; - sprintf(buf, "int%d", size); - return buf; - } - if (T->isVoidPointerType()) { - return "ptr[inout, array[int8]]"; - } - if (T->isPointerType()) { - auto inner = convertType(C, T->getPointeeType()); - if (inner == "") - return "ptr[inout, array[int8]]"; - char buf[1024]; - sprintf(buf, "ptr[inout, %s]", inner.c_str()); - return buf; - } - return "intptr"; -} +struct Param { + std::string type; + std::string name; +}; -class DeclExtractCallVisitor : public RecursiveASTVisitor<DeclExtractCallVisitor> { - public: - explicit DeclExtractCallVisitor(ASTContext *Context) - : Context(*Context) {} +class Printer : public MatchFinder::MatchCallback { +public: + virtual void run(const MatchFinder::MatchResult &Result) override { + const auto *varDecl = Result.Nodes.getNodeAs<VarDecl>("Struct"); + auto *context = Result.Context; + if (!varDecl || !varDecl->getInit()) + return; - bool VisitFunctionDecl(const FunctionDecl *D) { - if (D->doesThisDeclarationHaveABody()) - return true; - // TODO(dvyukov): need to select only stdcall (WINAPI) functions. - // But the following 2 approaches do not work. - if (false) { - if (auto *FPT = D->getType()->getAs<FunctionProtoType>()) { - if (FPT->getExtInfo().getCC() != CC_X86StdCall) - return true; + // values contains the initializer list for the struct `syscall_metadata` + auto values = llvm::dyn_cast<InitListExpr>(varDecl->getInit())->inits(); + if (values.empty()) + return; + + int argc = *values[2]->getIntegerConstantExpr(*context).value().getRawData(); + + std::vector<Param> args(argc); + if (argc) { + int i = 0; + for (const auto *type : // get parameter types + llvm::dyn_cast<InitListExpr>( + llvm::dyn_cast<VarDecl>(values[3]->getAsBuiltinConstantDeclRef(*context)->getUnderlyingDecl()) + ->getInit()) + ->inits()) { + args[i++].type = std::move(*type->tryEvaluateString(*context)); } - } - if (false) { - if (!D->hasAttr<StdCallAttr>()) - return true; - } - // Tons of functions are bulk ignored below because they cause - // static/dynamic link failures, reboot machine, etc. - auto fn = D->getNameInfo().getAsString(); - if (fn.empty()) return true; - if (*fn.rbegin() == 'W') return true; // Unicode versions. - const char *ignore_prefixes[] { - "_", - "Rtl", - "IBind", - "Ndr", - "NDR", - "SCard", - }; - for (auto prefix: ignore_prefixes) { - if (strncmp(fn.c_str(), prefix, strlen(prefix)) == 0) return true; - } - const char *ignore_functions[] { - "IEnum", - "IStream", - "IType", - "IService", - "IProperty", - "ISequential", - "IDispatch", - "I_RPC", - "I_Rpc", - "CLEANLOCAL", - "WinMain", - "PropertySheet", - "LookupAccountNameLocalA", - "LookupAccountSidLocalA", - "WTSGetServiceSessionId", - "WTSIsServerContainer", - "GetDisplayAutoRotationPreferencesByProcessId", - "LoadStringByReference", - "IdnToNameprepUnicode", - "VerFindFileA", - "VerInstallFileA", - "GetFileVersionInfoSizeA", - "GetFileVersionInfoA", - "GetFileVersionInfoSizeExA", - "GetFileVersionInfoExA", - "VerQueryValueA", - "sndOpenSound", - "Netbios", - "RpcBindingGetTrainingContextHandle", - "RpcAsyncCleanupThread", - "ShellMessageBoxA", - "SHEnumerateUnreadMailAccountsA", - "SHGetUnreadMailCountA", - "SHSetUnreadMailCountA", - "GetEncSChannel", - "CryptExportPKCS8Ex", - "FindCertsByIssuer", - "CryptCancelAsyncRetrieval", - "CryptGetTimeValidObject", - "CryptFlushTimeValidObject", - "CryptProtectDataNoUI", - "CryptUnprotectDataNoUI", - "NsServerBindSearch", - "NsClientBindSearch", - "NsClientBindDone", - "GetOpenCardNameA", - "SubscribeServiceChangeNotifications", - "UnsubscribeServiceChangeNotifications", - "GetThreadDescription", - "SetThreadDescription", - "DialogControlDpi", - "SetDialogDpiChangeBehavior", - "GetDialogDpiChangeBehavior", - "RpcServer", - "DecodePointer", - "DecodeRemotePointer", - "DecodeSystemPointer", - "EncodePointer", - "EncodeRemotePointer", - "EncodeSystemPointer", - "UnmapViewOfFile2", - "MapViewOfFileNuma2", - "DeriveCapabilitySidsFromName", - "QueryAuxiliaryCounterFrequency", - "ConvertPerformanceCounterToAuxiliaryCounter", - "ConvertAuxiliaryCounterToPerformanceCounter", - "FreePropVariantArray", - "PropVariantCopy", - "PropVariantClear", - "InitiateShutdown", - "ExitWindowsEx", - "LockWorkStation", - "InitiateSystemShutdown", - "InitiateSystemShutdownEx", - "shutdown", - }; - for (auto func: ignore_functions) { - if (strstr(fn.c_str(), func)) return true; - } - // These are already described: - const char *ignore_exact[] { - "CreateFileA", - "CloseHandle", - "VirtualAlloc", - }; - for (auto func: ignore_exact) { - if (strcmp(fn.c_str(), func) == 0) return true; - } - const char *ignore_files[] { - "/um/ole", - "htiface.h", - "objbase.h", - "HLink.h", - "urlmon.h", - "HlGuids.h", - "unknwn.h", - "unknwnbase.h", - "coguid.h", - "MsHtmHst.h", - "msime.h", - "ComSvcs.h", - "combaseapi.h", - "WbemGlue.h", - "OCIdl.h", - "mfapi.h", - "CompPkgSup.h", - "ole2.h", - "Ole2.h", - "oleidl.h", - "ObjIdl.h", - "WabDefs.h", - "objidl.h", - }; - auto src = D->getSourceRange().getBegin().printToString(Context.getSourceManager()); - if (strstr(src.c_str(), "/um/") == 0) return true; - for (auto file: ignore_files) { - if (strstr(src.c_str(), file)) return true; - } - for (const ParmVarDecl *P : D->parameters()) { - auto typ = convertType(Context, P->getType()); - if (typ == "") { - llvm::outs() << D->getNameInfo().getAsString() << ": UNKNOWN TYPE: " << - QualType(P->getType()).getAsString() << "\n"; - return true; + + i = 0; + for (const auto *name : // get parameter names + llvm::dyn_cast<InitListExpr>( + llvm::dyn_cast<VarDecl>(values[4]->getAsBuiltinConstantDeclRef(*context)->getUnderlyingDecl()) + ->getInit()) + ->inits()) { + args[i++].name = std::move(*name->tryEvaluateString(*context)); } } - if (Generated[D->getNameInfo().getAsString()]) - return true; - Generated[D->getNameInfo().getAsString()] = true; - llvm::outs() << D->getNameInfo().getAsString() << "("; - int i = 0; - for (const ParmVarDecl *P : D->parameters()) { - if (i) - llvm::outs() << ", "; - auto name = P->getNameAsString(); - if (name == "") { - char buf[10]; - sprintf(buf, "arg%d", i); - name = buf; - } - llvm::outs() << name << " " << convertType(Context, P->getType()); - i++; - if (i == 9) - break; + printf("==========SYSCALL Found==========\n"); + printf("%s\n", values[0]->tryEvaluateString(*context).value().c_str()); + for (const auto &arg : args) { + printf("%s %s\n", arg.type.c_str(), arg.name.c_str()); } - llvm::outs() << ")"; - auto ret = convertType(Context, D->getReturnType()); - if (ret == "HANDLE") - llvm::outs() << " " << ret; - llvm::outs() << "\n"; - return true; } - - private: - ASTContext &Context; - std::map<std::string, bool> Generated; }; -class DeclExtractCallConsumer : public clang::ASTConsumer { - public: - explicit DeclExtractCallConsumer(ASTContext *Context) - : Visitor(Context) {} +int main(int argc, const char **argv) { + llvm::cl::OptionCategory SyzDeclExtractOptionCategory("SyzDeclExtract options"); + auto ExpectedParser = clang::tooling::CommonOptionsParser::create(argc, argv, SyzDeclExtractOptionCategory); - virtual void HandleTranslationUnit(clang::ASTContext &Context) { - Visitor.TraverseDecl(Context.getTranslationUnitDecl()); + if (!ExpectedParser) { + llvm::errs() << ExpectedParser.takeError(); + return 1; } - private: - DeclExtractCallVisitor Visitor; -}; - -class DeclExtractCallAction : public clang::ASTFrontendAction { - public: - DeclExtractCallAction() {} - - virtual std::unique_ptr<clang::ASTConsumer> CreateASTConsumer( - clang::CompilerInstance &Compiler, llvm::StringRef InFile) { - return std::unique_ptr<clang::ASTConsumer>( - new DeclExtractCallConsumer(&Compiler.getASTContext())); - } -}; + clang::tooling::CommonOptionsParser &OptionsParser = ExpectedParser.get(); + clang::tooling::ClangTool Tool(OptionsParser.getCompilations(), OptionsParser.getSourcePathList()); -static llvm::cl::OptionCategory MyToolCategory("my-tool options"); + DeclarationMatcher MetaDataMatcher = + varDecl(isExpandedFromMacro("SYSCALL_METADATA"), hasType(recordDecl(hasName("syscall_metadata")))).bind("Struct"); -int main(int argc, const char **argv) { - CommonOptionsParser OptionsParser(argc, argv, MyToolCategory); - ClangTool Tool(OptionsParser.getCompilations(), - OptionsParser.getSourcePathList()); - return Tool.run(newFrontendActionFactory<DeclExtractCallAction>().get()); + Printer Printer; + MatchFinder Finder; + Finder.addMatcher(MetaDataMatcher, &Printer); + return Tool.run(clang::tooling::newFrontendActionFactory(&Finder).get()); } |
