aboutsummaryrefslogtreecommitdiffstats
path: root/tools/clang/codesearch/codesearch.cpp
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2025-11-17 11:17:23 +0100
committerDmitry Vyukov <dvyukov@google.com>2025-11-20 10:10:05 +0000
commit280ea308c321115445df610f1a75b05bbadca5f3 (patch)
treec195c76723c4a08986d74edbfc9e15a4f07fa6c1 /tools/clang/codesearch/codesearch.cpp
parent94d1e3f8b1838e8a04074464a957e979a5c5e36b (diff)
pkg/codesearch: add skeleton for code searching tool
Add a clang tool that is used for code indexing (tools/clang/codesearch/). It follows conventions and build procedure of the declextract tool. Add pkg/codesearch package that aggregates the info exposed by the clang tools, and allows doing simple queries: - show source code of an entity (function, struct, etc) - show entity comment - show all entities defined in a source file Add tools/syz-codesearch wrapper tool that allows to create index for a kernel build, and then run code queries on it.
Diffstat (limited to 'tools/clang/codesearch/codesearch.cpp')
-rw-r--r--tools/clang/codesearch/codesearch.cpp153
1 files changed, 153 insertions, 0 deletions
diff --git a/tools/clang/codesearch/codesearch.cpp b/tools/clang/codesearch/codesearch.cpp
new file mode 100644
index 000000000..8895d5307
--- /dev/null
+++ b/tools/clang/codesearch/codesearch.cpp
@@ -0,0 +1,153 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+#include "json.h"
+#include "output.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Comment.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include <algorithm>
+#include <filesystem>
+#include <string>
+#include <unordered_map>
+
+using namespace clang;
+
+// MacroDef/MacroMap hold information about macros defined in the file.
+struct MacroDef {
+ std::string Value; // value as written in the source
+ SourceRange SourceRange; // soruce range of the value
+};
+using MacroMap = std::unordered_map<std::string, MacroDef>;
+
+class Instance : public tooling::SourceFileCallbacks {
+public:
+ Instance(Output& Output) : Output(Output) {}
+ std::unique_ptr<ASTConsumer> newASTConsumer();
+
+private:
+ Output& Output;
+ MacroMap Macros;
+
+ bool handleBeginSource(CompilerInstance& CI) override;
+};
+
+// PPCallbacksTracker records all macro definitions (name/value/source location).
+class PPCallbacksTracker : public PPCallbacks {
+public:
+ PPCallbacksTracker(Preprocessor& PP, MacroMap& Macros) : SM(PP.getSourceManager()), Macros(Macros) {}
+
+private:
+ SourceManager& SM;
+ MacroMap& Macros;
+
+ void MacroDefined(const Token& MacroName, const MacroDirective* MD) override { (void)Macros; }
+};
+
+class IndexerAstConsumer : public ASTConsumer {
+public:
+ IndexerAstConsumer(Output& Output, const MacroMap& Macros) : Output(Output), Macros(Macros) {}
+
+private:
+ Output& Output;
+ const MacroMap& Macros;
+
+ void HandleTranslationUnit(ASTContext& context) override;
+};
+
+class Indexer : public RecursiveASTVisitor<Indexer> {
+public:
+ Indexer(ASTContext& Context, Output& Output, const MacroMap& Macros)
+ : Context(Context), SM(Context.getSourceManager()), Output(Output) {}
+
+ bool VisitFunctionDecl(const FunctionDecl*);
+
+private:
+ ASTContext& Context;
+ SourceManager& SM;
+ Output& Output;
+};
+
+bool Instance::handleBeginSource(CompilerInstance& CI) {
+ Preprocessor& PP = CI.getPreprocessor();
+ PP.addPPCallbacks(std::make_unique<PPCallbacksTracker>(PP, Macros));
+ return true;
+}
+
+std::unique_ptr<ASTConsumer> Instance::newASTConsumer() { return std::make_unique<IndexerAstConsumer>(Output, Macros); }
+
+void IndexerAstConsumer::HandleTranslationUnit(ASTContext& Context) {
+ Indexer Indexer(Context, Output, Macros);
+ Indexer.TraverseDecl(Context.getTranslationUnitDecl());
+}
+
+bool Indexer::VisitFunctionDecl(const FunctionDecl* Func) {
+ if (!Func->doesThisDeclarationHaveABody())
+ return true;
+ auto Range = Func->getSourceRange();
+ const std::string& SourceFile = std::filesystem::relative(SM.getFilename(SM.getExpansionLoc(Range.getBegin())).str());
+ int StartLine = SM.getExpansionLineNumber(Range.getBegin());
+ int EndLine = SM.getExpansionLineNumber(Range.getEnd());
+ std::string CommentSourceFile;
+ int CommentStartLine = 0;
+ int CommentEndLine = 0;
+ if (auto Comment = Context.getRawCommentForDeclNoCache(Func)) {
+ const auto& begin = Comment->getBeginLoc();
+ const auto& end = Comment->getEndLoc();
+ CommentSourceFile = std::filesystem::relative(SM.getFilename(SM.getExpansionLoc(begin)).str());
+ CommentStartLine = SM.getExpansionLineNumber(begin);
+ CommentEndLine = SM.getExpansionLineNumber(end);
+ // Expand body range to include the comment, if they intersect.
+ if (SourceFile == CommentSourceFile &&
+ std::max(StartLine, CommentStartLine) <= std::min(EndLine, CommentEndLine) + 1) {
+ StartLine = std::min(StartLine, CommentStartLine);
+ EndLine = std::max(EndLine, CommentEndLine);
+ }
+ }
+ Output.emit(Definition{
+ .Kind = KindFunction,
+ .Name = Func->getNameAsString(),
+ .Type = Func->getType().getAsString(),
+ .IsStatic = Func->isStatic(),
+ .Body =
+ LineRange{
+ .File = SourceFile,
+ .StartLine = StartLine,
+ .EndLine = EndLine,
+ },
+ .Comment =
+ LineRange{
+ .File = CommentSourceFile,
+ .StartLine = CommentStartLine,
+ .EndLine = CommentEndLine,
+ },
+ });
+ return true;
+}
+
+int main(int argc, const char** argv) {
+ llvm::cl::OptionCategory Options("syz-indexer options");
+ auto OptionsParser = tooling::CommonOptionsParser::create(argc, argv, Options);
+ if (!OptionsParser) {
+ llvm::errs() << OptionsParser.takeError();
+ return 1;
+ }
+ Output Output;
+ Instance Instance(Output);
+ tooling::ClangTool Tool(OptionsParser->getCompilations(), OptionsParser->getSourcePathList());
+ if (Tool.run(tooling::newFrontendActionFactory(&Instance, &Instance).get()))
+ return 1;
+ Output.print();
+ return 0;
+}