aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2025-11-17 11:17:23 +0100
committerDmitry Vyukov <dvyukov@google.com>2025-11-20 10:10:05 +0000
commit280ea308c321115445df610f1a75b05bbadca5f3 (patch)
treec195c76723c4a08986d74edbfc9e15a4f07fa6c1 /tools
parent94d1e3f8b1838e8a04074464a957e979a5c5e36b (diff)
pkg/codesearch: add skeleton for code searching tool
Add a clang tool that is used for code indexing (tools/clang/codesearch/). It follows conventions and build procedure of the declextract tool. Add pkg/codesearch package that aggregates the info exposed by the clang tools, and allows doing simple queries: - show source code of an entity (function, struct, etc) - show entity comment - show all entities defined in a source file Add tools/syz-codesearch wrapper tool that allows to create index for a kernel build, and then run code queries on it.
Diffstat (limited to 'tools')
-rw-r--r--tools/clang/codesearch/codesearch.cpp153
-rw-r--r--tools/clang/codesearch/output.h64
-rw-r--r--tools/syz-codesearch/codesearch.go66
3 files changed, 283 insertions, 0 deletions
diff --git a/tools/clang/codesearch/codesearch.cpp b/tools/clang/codesearch/codesearch.cpp
new file mode 100644
index 000000000..8895d5307
--- /dev/null
+++ b/tools/clang/codesearch/codesearch.cpp
@@ -0,0 +1,153 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+#include "json.h"
+#include "output.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Comment.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include <algorithm>
+#include <filesystem>
+#include <string>
+#include <unordered_map>
+
+using namespace clang;
+
+// MacroDef/MacroMap hold information about macros defined in the file.
+struct MacroDef {
+ std::string Value; // value as written in the source
+ SourceRange SourceRange; // soruce range of the value
+};
+using MacroMap = std::unordered_map<std::string, MacroDef>;
+
+class Instance : public tooling::SourceFileCallbacks {
+public:
+ Instance(Output& Output) : Output(Output) {}
+ std::unique_ptr<ASTConsumer> newASTConsumer();
+
+private:
+ Output& Output;
+ MacroMap Macros;
+
+ bool handleBeginSource(CompilerInstance& CI) override;
+};
+
+// PPCallbacksTracker records all macro definitions (name/value/source location).
+class PPCallbacksTracker : public PPCallbacks {
+public:
+ PPCallbacksTracker(Preprocessor& PP, MacroMap& Macros) : SM(PP.getSourceManager()), Macros(Macros) {}
+
+private:
+ SourceManager& SM;
+ MacroMap& Macros;
+
+ void MacroDefined(const Token& MacroName, const MacroDirective* MD) override { (void)Macros; }
+};
+
+class IndexerAstConsumer : public ASTConsumer {
+public:
+ IndexerAstConsumer(Output& Output, const MacroMap& Macros) : Output(Output), Macros(Macros) {}
+
+private:
+ Output& Output;
+ const MacroMap& Macros;
+
+ void HandleTranslationUnit(ASTContext& context) override;
+};
+
+class Indexer : public RecursiveASTVisitor<Indexer> {
+public:
+ Indexer(ASTContext& Context, Output& Output, const MacroMap& Macros)
+ : Context(Context), SM(Context.getSourceManager()), Output(Output) {}
+
+ bool VisitFunctionDecl(const FunctionDecl*);
+
+private:
+ ASTContext& Context;
+ SourceManager& SM;
+ Output& Output;
+};
+
+bool Instance::handleBeginSource(CompilerInstance& CI) {
+ Preprocessor& PP = CI.getPreprocessor();
+ PP.addPPCallbacks(std::make_unique<PPCallbacksTracker>(PP, Macros));
+ return true;
+}
+
+std::unique_ptr<ASTConsumer> Instance::newASTConsumer() { return std::make_unique<IndexerAstConsumer>(Output, Macros); }
+
+void IndexerAstConsumer::HandleTranslationUnit(ASTContext& Context) {
+ Indexer Indexer(Context, Output, Macros);
+ Indexer.TraverseDecl(Context.getTranslationUnitDecl());
+}
+
+bool Indexer::VisitFunctionDecl(const FunctionDecl* Func) {
+ if (!Func->doesThisDeclarationHaveABody())
+ return true;
+ auto Range = Func->getSourceRange();
+ const std::string& SourceFile = std::filesystem::relative(SM.getFilename(SM.getExpansionLoc(Range.getBegin())).str());
+ int StartLine = SM.getExpansionLineNumber(Range.getBegin());
+ int EndLine = SM.getExpansionLineNumber(Range.getEnd());
+ std::string CommentSourceFile;
+ int CommentStartLine = 0;
+ int CommentEndLine = 0;
+ if (auto Comment = Context.getRawCommentForDeclNoCache(Func)) {
+ const auto& begin = Comment->getBeginLoc();
+ const auto& end = Comment->getEndLoc();
+ CommentSourceFile = std::filesystem::relative(SM.getFilename(SM.getExpansionLoc(begin)).str());
+ CommentStartLine = SM.getExpansionLineNumber(begin);
+ CommentEndLine = SM.getExpansionLineNumber(end);
+ // Expand body range to include the comment, if they intersect.
+ if (SourceFile == CommentSourceFile &&
+ std::max(StartLine, CommentStartLine) <= std::min(EndLine, CommentEndLine) + 1) {
+ StartLine = std::min(StartLine, CommentStartLine);
+ EndLine = std::max(EndLine, CommentEndLine);
+ }
+ }
+ Output.emit(Definition{
+ .Kind = KindFunction,
+ .Name = Func->getNameAsString(),
+ .Type = Func->getType().getAsString(),
+ .IsStatic = Func->isStatic(),
+ .Body =
+ LineRange{
+ .File = SourceFile,
+ .StartLine = StartLine,
+ .EndLine = EndLine,
+ },
+ .Comment =
+ LineRange{
+ .File = CommentSourceFile,
+ .StartLine = CommentStartLine,
+ .EndLine = CommentEndLine,
+ },
+ });
+ return true;
+}
+
+int main(int argc, const char** argv) {
+ llvm::cl::OptionCategory Options("syz-indexer options");
+ auto OptionsParser = tooling::CommonOptionsParser::create(argc, argv, Options);
+ if (!OptionsParser) {
+ llvm::errs() << OptionsParser.takeError();
+ return 1;
+ }
+ Output Output;
+ Instance Instance(Output);
+ tooling::ClangTool Tool(OptionsParser->getCompilations(), OptionsParser->getSourcePathList());
+ if (Tool.run(tooling::newFrontendActionFactory(&Instance, &Instance).get()))
+ return 1;
+ Output.print();
+ return 0;
+}
diff --git a/tools/clang/codesearch/output.h b/tools/clang/codesearch/output.h
new file mode 100644
index 000000000..ac490bb91
--- /dev/null
+++ b/tools/clang/codesearch/output.h
@@ -0,0 +1,64 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+#ifndef SYZ_INDEXER_OUTPUT_H
+#define SYZ_INDEXER_OUTPUT_H
+
+#include "json.h"
+#include <vector>
+
+constexpr char KindFunction[] = "function";
+constexpr char KindStruct[] = "struct";
+constexpr char KindVariable[] = "variable";
+constexpr char KindMacro[] = "macro";
+constexpr char KindEnum[] = "enum";
+
+struct LineRange {
+ std::string File;
+ int StartLine = 0;
+ int EndLine = 0;
+};
+
+struct Definition {
+ const char* Kind; // one of Kind* consts
+ std::string Name;
+ std::string Type; // raw C type
+ bool IsStatic = false;
+ // If the kernel-doc comment is placed around the body,
+ // then it's included in the body range.
+ LineRange Body;
+ // Location of the kernel-doc comment.
+ LineRange Comment;
+};
+
+inline void print(JSONPrinter& Printer, const LineRange& V) {
+ JSONPrinter::Scope Scope(Printer);
+ Printer.Field("file", V.File);
+ Printer.Field("start_line", V.StartLine);
+ Printer.Field("end_line", V.EndLine, true);
+}
+
+inline void print(JSONPrinter& Printer, const Definition& V) {
+ JSONPrinter::Scope Scope(Printer);
+ Printer.Field("kind", V.Kind);
+ Printer.Field("name", V.Name);
+ Printer.Field("type", V.Type);
+ Printer.Field("is_static", V.IsStatic);
+ Printer.Field("body", V.Body);
+ Printer.Field("comment", V.Comment, true);
+}
+
+class Output {
+public:
+ void emit(Definition&& V) { Definitions.push_back(std::move(V)); }
+
+ void print() const {
+ JSONPrinter Printer;
+ Printer.Field("definitions", Definitions, true);
+ }
+
+private:
+ std::vector<Definition> Definitions;
+};
+
+#endif
diff --git a/tools/syz-codesearch/codesearch.go b/tools/syz-codesearch/codesearch.go
new file mode 100644
index 000000000..afd3840c7
--- /dev/null
+++ b/tools/syz-codesearch/codesearch.go
@@ -0,0 +1,66 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package main
+
+import (
+ "flag"
+ "fmt"
+ "os"
+
+ "github.com/google/syzkaller/pkg/clangtool"
+ "github.com/google/syzkaller/pkg/codesearch"
+ "github.com/google/syzkaller/pkg/tool"
+)
+
+func main() {
+ var (
+ flagDatabase = flag.String("database", "", "path to input/output database file (mandatory)")
+ flagKernelSrc = flag.String("kernel-src", "", "path to kernel source directory (mandatory)")
+ flagKernelObj = flag.String("kernel-obj", "", "path to kernel build directory (mandatory)")
+ )
+ flag.Parse()
+ if len(flag.Args()) == 0 || *flagDatabase == "" || *flagKernelSrc == "" || *flagKernelObj == "" {
+ printUsageAndExit()
+ }
+ cmd, args := flag.Args()[0], flag.Args()[1:]
+ if cmd == "index" {
+ if len(args) != 1 {
+ printUsageAndExit()
+ }
+ cfg := &clangtool.Config{
+ ToolBin: args[0],
+ KernelSrc: *flagKernelSrc,
+ KernelObj: *flagKernelObj,
+ CacheFile: *flagDatabase,
+ DebugTrace: os.Stderr,
+ }
+
+ if _, err := clangtool.Run[codesearch.Database](cfg); err != nil {
+ tool.Fail(err)
+ }
+ return
+ }
+ index, err := codesearch.NewIndex(*flagDatabase, []string{*flagKernelSrc, *flagKernelObj})
+ if err != nil {
+ tool.Fail(err)
+ }
+ res, err := index.Command(cmd, args)
+ if err != nil {
+ tool.Fail(err)
+ }
+ os.Stdout.WriteString(res)
+}
+
+func printUsageAndExit() {
+ fmt.Printf(`syz-codesearch usage:
+syz-codesearch [flags] command [command arguments]
+commands and their arguments:
+`)
+ for _, cmd := range codesearch.Commands {
+ fmt.Printf(" - %v [%v args]\n", cmd.Name, cmd.NArgs)
+ }
+ fmt.Printf("\nflags:\n")
+ flag.PrintDefaults()
+ os.Exit(1)
+}