diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2025-11-17 11:17:23 +0100 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2025-11-20 10:10:05 +0000 |
| commit | 280ea308c321115445df610f1a75b05bbadca5f3 (patch) | |
| tree | c195c76723c4a08986d74edbfc9e15a4f07fa6c1 /tools | |
| parent | 94d1e3f8b1838e8a04074464a957e979a5c5e36b (diff) | |
pkg/codesearch: add skeleton for code searching tool
Add a clang tool that is used for code indexing (tools/clang/codesearch/).
It follows conventions and build procedure of the declextract tool.
Add pkg/codesearch package that aggregates the info exposed by the clang tools,
and allows doing simple queries:
- show source code of an entity (function, struct, etc)
- show entity comment
- show all entities defined in a source file
Add tools/syz-codesearch wrapper tool that allows to create index for a kernel build,
and then run code queries on it.
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/clang/codesearch/codesearch.cpp | 153 | ||||
| -rw-r--r-- | tools/clang/codesearch/output.h | 64 | ||||
| -rw-r--r-- | tools/syz-codesearch/codesearch.go | 66 |
3 files changed, 283 insertions, 0 deletions
diff --git a/tools/clang/codesearch/codesearch.cpp b/tools/clang/codesearch/codesearch.cpp new file mode 100644 index 000000000..8895d5307 --- /dev/null +++ b/tools/clang/codesearch/codesearch.cpp @@ -0,0 +1,153 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#include "json.h" +#include "output.h" + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Comment.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclarationName.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" + +#include <algorithm> +#include <filesystem> +#include <string> +#include <unordered_map> + +using namespace clang; + +// MacroDef/MacroMap hold information about macros defined in the file. +struct MacroDef { + std::string Value; // value as written in the source + SourceRange SourceRange; // soruce range of the value +}; +using MacroMap = std::unordered_map<std::string, MacroDef>; + +class Instance : public tooling::SourceFileCallbacks { +public: + Instance(Output& Output) : Output(Output) {} + std::unique_ptr<ASTConsumer> newASTConsumer(); + +private: + Output& Output; + MacroMap Macros; + + bool handleBeginSource(CompilerInstance& CI) override; +}; + +// PPCallbacksTracker records all macro definitions (name/value/source location). +class PPCallbacksTracker : public PPCallbacks { +public: + PPCallbacksTracker(Preprocessor& PP, MacroMap& Macros) : SM(PP.getSourceManager()), Macros(Macros) {} + +private: + SourceManager& SM; + MacroMap& Macros; + + void MacroDefined(const Token& MacroName, const MacroDirective* MD) override { (void)Macros; } +}; + +class IndexerAstConsumer : public ASTConsumer { +public: + IndexerAstConsumer(Output& Output, const MacroMap& Macros) : Output(Output), Macros(Macros) {} + +private: + Output& Output; + const MacroMap& Macros; + + void HandleTranslationUnit(ASTContext& context) override; +}; + +class Indexer : public RecursiveASTVisitor<Indexer> { +public: + Indexer(ASTContext& Context, Output& Output, const MacroMap& Macros) + : Context(Context), SM(Context.getSourceManager()), Output(Output) {} + + bool VisitFunctionDecl(const FunctionDecl*); + +private: + ASTContext& Context; + SourceManager& SM; + Output& Output; +}; + +bool Instance::handleBeginSource(CompilerInstance& CI) { + Preprocessor& PP = CI.getPreprocessor(); + PP.addPPCallbacks(std::make_unique<PPCallbacksTracker>(PP, Macros)); + return true; +} + +std::unique_ptr<ASTConsumer> Instance::newASTConsumer() { return std::make_unique<IndexerAstConsumer>(Output, Macros); } + +void IndexerAstConsumer::HandleTranslationUnit(ASTContext& Context) { + Indexer Indexer(Context, Output, Macros); + Indexer.TraverseDecl(Context.getTranslationUnitDecl()); +} + +bool Indexer::VisitFunctionDecl(const FunctionDecl* Func) { + if (!Func->doesThisDeclarationHaveABody()) + return true; + auto Range = Func->getSourceRange(); + const std::string& SourceFile = std::filesystem::relative(SM.getFilename(SM.getExpansionLoc(Range.getBegin())).str()); + int StartLine = SM.getExpansionLineNumber(Range.getBegin()); + int EndLine = SM.getExpansionLineNumber(Range.getEnd()); + std::string CommentSourceFile; + int CommentStartLine = 0; + int CommentEndLine = 0; + if (auto Comment = Context.getRawCommentForDeclNoCache(Func)) { + const auto& begin = Comment->getBeginLoc(); + const auto& end = Comment->getEndLoc(); + CommentSourceFile = std::filesystem::relative(SM.getFilename(SM.getExpansionLoc(begin)).str()); + CommentStartLine = SM.getExpansionLineNumber(begin); + CommentEndLine = SM.getExpansionLineNumber(end); + // Expand body range to include the comment, if they intersect. + if (SourceFile == CommentSourceFile && + std::max(StartLine, CommentStartLine) <= std::min(EndLine, CommentEndLine) + 1) { + StartLine = std::min(StartLine, CommentStartLine); + EndLine = std::max(EndLine, CommentEndLine); + } + } + Output.emit(Definition{ + .Kind = KindFunction, + .Name = Func->getNameAsString(), + .Type = Func->getType().getAsString(), + .IsStatic = Func->isStatic(), + .Body = + LineRange{ + .File = SourceFile, + .StartLine = StartLine, + .EndLine = EndLine, + }, + .Comment = + LineRange{ + .File = CommentSourceFile, + .StartLine = CommentStartLine, + .EndLine = CommentEndLine, + }, + }); + return true; +} + +int main(int argc, const char** argv) { + llvm::cl::OptionCategory Options("syz-indexer options"); + auto OptionsParser = tooling::CommonOptionsParser::create(argc, argv, Options); + if (!OptionsParser) { + llvm::errs() << OptionsParser.takeError(); + return 1; + } + Output Output; + Instance Instance(Output); + tooling::ClangTool Tool(OptionsParser->getCompilations(), OptionsParser->getSourcePathList()); + if (Tool.run(tooling::newFrontendActionFactory(&Instance, &Instance).get())) + return 1; + Output.print(); + return 0; +} diff --git a/tools/clang/codesearch/output.h b/tools/clang/codesearch/output.h new file mode 100644 index 000000000..ac490bb91 --- /dev/null +++ b/tools/clang/codesearch/output.h @@ -0,0 +1,64 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +#ifndef SYZ_INDEXER_OUTPUT_H +#define SYZ_INDEXER_OUTPUT_H + +#include "json.h" +#include <vector> + +constexpr char KindFunction[] = "function"; +constexpr char KindStruct[] = "struct"; +constexpr char KindVariable[] = "variable"; +constexpr char KindMacro[] = "macro"; +constexpr char KindEnum[] = "enum"; + +struct LineRange { + std::string File; + int StartLine = 0; + int EndLine = 0; +}; + +struct Definition { + const char* Kind; // one of Kind* consts + std::string Name; + std::string Type; // raw C type + bool IsStatic = false; + // If the kernel-doc comment is placed around the body, + // then it's included in the body range. + LineRange Body; + // Location of the kernel-doc comment. + LineRange Comment; +}; + +inline void print(JSONPrinter& Printer, const LineRange& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("file", V.File); + Printer.Field("start_line", V.StartLine); + Printer.Field("end_line", V.EndLine, true); +} + +inline void print(JSONPrinter& Printer, const Definition& V) { + JSONPrinter::Scope Scope(Printer); + Printer.Field("kind", V.Kind); + Printer.Field("name", V.Name); + Printer.Field("type", V.Type); + Printer.Field("is_static", V.IsStatic); + Printer.Field("body", V.Body); + Printer.Field("comment", V.Comment, true); +} + +class Output { +public: + void emit(Definition&& V) { Definitions.push_back(std::move(V)); } + + void print() const { + JSONPrinter Printer; + Printer.Field("definitions", Definitions, true); + } + +private: + std::vector<Definition> Definitions; +}; + +#endif diff --git a/tools/syz-codesearch/codesearch.go b/tools/syz-codesearch/codesearch.go new file mode 100644 index 000000000..afd3840c7 --- /dev/null +++ b/tools/syz-codesearch/codesearch.go @@ -0,0 +1,66 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package main + +import ( + "flag" + "fmt" + "os" + + "github.com/google/syzkaller/pkg/clangtool" + "github.com/google/syzkaller/pkg/codesearch" + "github.com/google/syzkaller/pkg/tool" +) + +func main() { + var ( + flagDatabase = flag.String("database", "", "path to input/output database file (mandatory)") + flagKernelSrc = flag.String("kernel-src", "", "path to kernel source directory (mandatory)") + flagKernelObj = flag.String("kernel-obj", "", "path to kernel build directory (mandatory)") + ) + flag.Parse() + if len(flag.Args()) == 0 || *flagDatabase == "" || *flagKernelSrc == "" || *flagKernelObj == "" { + printUsageAndExit() + } + cmd, args := flag.Args()[0], flag.Args()[1:] + if cmd == "index" { + if len(args) != 1 { + printUsageAndExit() + } + cfg := &clangtool.Config{ + ToolBin: args[0], + KernelSrc: *flagKernelSrc, + KernelObj: *flagKernelObj, + CacheFile: *flagDatabase, + DebugTrace: os.Stderr, + } + + if _, err := clangtool.Run[codesearch.Database](cfg); err != nil { + tool.Fail(err) + } + return + } + index, err := codesearch.NewIndex(*flagDatabase, []string{*flagKernelSrc, *flagKernelObj}) + if err != nil { + tool.Fail(err) + } + res, err := index.Command(cmd, args) + if err != nil { + tool.Fail(err) + } + os.Stdout.WriteString(res) +} + +func printUsageAndExit() { + fmt.Printf(`syz-codesearch usage: +syz-codesearch [flags] command [command arguments] +commands and their arguments: +`) + for _, cmd := range codesearch.Commands { + fmt.Printf(" - %v [%v args]\n", cmd.Name, cmd.NArgs) + } + fmt.Printf("\nflags:\n") + flag.PrintDefaults() + os.Exit(1) +} |
