aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2026-02-05 13:59:08 +0100
committerDmitry Vyukov <dvyukov@google.com>2026-02-06 09:29:18 +0000
commitfd36daf55d7f9e8ab412fcbf0441658a8d8be727 (patch)
tree6d0a982741820007d1a15212e0227df85eb1b501 /tools
parentf03c419189ef8ed823e306a342ee4d330fb2c394 (diff)
tools/clang: compile clang tools into the binary
Compiled clang tools into Go binaries using cgo. This significantly simplifies building and deployment. This also enables unit testing of clang tools. Now raw go test for clang tools will build them, run, and verify output. Each clang tool is still started as a subprocess. I've experimented with running them in-process, but this makes stdout/stderr interception extremly complicated, and it seems that clang tools still use unsynchronized global state, which breaks when invoked multiple times. Subprocesses also make it safer in the face of potential memory leaks, or memory corruptions in clang tools. Fixes #6645
Diffstat (limited to 'tools')
-rw-r--r--tools/clang/build.go33
-rw-r--r--tools/clang/codesearch/README.md9
l---------tools/clang/codesearch/build.go1
-rw-r--r--tools/clang/codesearch/codesearch.cpp21
-rw-r--r--tools/clang/codesearch/codesearch.go6
l---------tools/clang/declextract/build.go1
-rw-r--r--tools/clang/declextract/declextract.cpp32
-rw-r--r--tools/clang/declextract/declextract.go6
-rw-r--r--tools/clang/json.h4
-rw-r--r--tools/syz-codesearch/codesearch.go5
-rw-r--r--tools/syz-declextract/README.md32
-rw-r--r--tools/syz-declextract/declextract.go4
-rw-r--r--tools/syz-declextract/declextract_test.go7
13 files changed, 104 insertions, 57 deletions
diff --git a/tools/clang/build.go b/tools/clang/build.go
new file mode 100644
index 000000000..35a4affd3
--- /dev/null
+++ b/tools/clang/build.go
@@ -0,0 +1,33 @@
+// Copyright 2026 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package clangtoolimpl
+
+//// Common build flags for all C++ clang tools.
+//// We install this file into all tool subdirs as a symbolic link.
+//
+// #cgo CXXFLAGS: -std=c++23 -O2 -fno-exceptions -I..
+// #cgo CXXFLAGS: -Wno-changes-meaning -Wno-deprecated-enum-enum-conversion
+//
+// #cgo LDFLAGS: -lclangTooling -lclangFrontend -lclangSerialization -lclangDriver
+// #cgo LDFLAGS: -lclangToolingCore -lclangParse -lclangSema -lclangAPINotes -lclangAnalysis
+// #cgo LDFLAGS: -lclangASTMatchers -lclangRewrite -lclangEdit -lclangAST -lclangLex
+// #cgo LDFLAGS: -lclangBasic -lclangSupport -lLLVM
+//
+//// These flags are distro/version specific.
+//// Cgo does not support running shell commands to produce flags.
+//// We would need to run:
+//// llvm-config --cxxflags
+//// llvm-config --ldflags --libs --system-libs
+//// There are some work-arounds like exporting CGO_CXXFLAGS/LDLFAGS in the Makefile,
+//// or using go generate, but these won't work for bare go test runs.
+//// For now, we hardcode typical path the several supported llvm versions.
+//// The compiler will search in all of them in order, and pick the first
+//// that is actually present and contains files.
+//
+// #cgo CXXFLAGS: -I/usr/include/llvm-21 -I/usr/lib/llvm-21/include -I/usr/include/llvm-c-21
+// #cgo LDFLAGS: -L/usr/lib/llvm-21/lib
+//
+// #cgo CXXFLAGS: -I/usr/lib/llvm-19/include
+// #cgo LDFLAGS: -L/usr/lib/llvm-19/lib
+import "C"
diff --git a/tools/clang/codesearch/README.md b/tools/clang/codesearch/README.md
deleted file mode 100644
index 6d876c78e..000000000
--- a/tools/clang/codesearch/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# syz-codesearch
-
-Clang-based tool that indexes kernel source code to power
-[pkg/aflow/tool/codesearcher](/pkg/aflow/tool/codesearcher/codesearcher.go)
-agentic tool.
-
-The tool can be built following the procedure described for
-[syz-declextract tool](/tools/syz-declextract/README.md) or with `make
-codesearch`.
diff --git a/tools/clang/codesearch/build.go b/tools/clang/codesearch/build.go
new file mode 120000
index 000000000..0308541aa
--- /dev/null
+++ b/tools/clang/codesearch/build.go
@@ -0,0 +1 @@
+../build.go \ No newline at end of file
diff --git a/tools/clang/codesearch/codesearch.cpp b/tools/clang/codesearch/codesearch.cpp
index 047edad10..a2d6883a2 100644
--- a/tools/clang/codesearch/codesearch.cpp
+++ b/tools/clang/codesearch/codesearch.cpp
@@ -1,6 +1,9 @@
// Copyright 2025 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+// Clang-based tool that indexes kernel source code to power
+// pkg/aflow/tool/codesearcher/codesearcher.go agentic tool.
+
#include "json.h"
#include "output.h"
@@ -12,6 +15,7 @@
#include "clang/AST/RecordLayout.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Version.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Tooling.h"
@@ -311,7 +315,11 @@ bool Indexer::TraverseRecordDecl(RecordDecl* Decl) {
uint64_t OffsetInBits = Layout.getFieldOffset(Field->getFieldIndex());
uint64_t SizeInBits;
if (Field->isBitField()) {
- SizeInBits = Field->getBitWidthValue(Context);
+ SizeInBits = Field->getBitWidthValue(
+#if CLANG_VERSION_MAJOR == 19
+ Context
+#endif
+ );
} else {
TypeInfo Info = Context.getTypeInfo(Field->getType());
SizeInBits = Info.Width;
@@ -338,8 +346,8 @@ bool Indexer::TraverseTypedefDecl(TypedefDecl* Decl) {
return Base::TraverseTypedefDecl(Decl);
}
-int main(int argc, const char** argv) {
- llvm::cl::OptionCategory Options("syz-indexer options");
+static int Main(int argc, const char** argv) {
+ llvm::cl::OptionCategory Options("codesearch options");
auto OptionsParser = tooling::CommonOptionsParser::create(argc, argv, Options);
if (!OptionsParser) {
llvm::errs() << OptionsParser.takeError();
@@ -351,5 +359,12 @@ int main(int argc, const char** argv) {
if (Tool.run(tooling::newFrontendActionFactory(&Instance, &Instance).get()))
return 1;
Output.print();
+ fflush(stdout);
return 0;
}
+
+__attribute__((constructor(1000))) static void ctor(int argc, const char** argv) {
+ const char* run = getenv("SYZ_RUN_CLANGTOOL");
+ if (run && !strcmp(run, "codesearch"))
+ exit(Main(argc, argv));
+}
diff --git a/tools/clang/codesearch/codesearch.go b/tools/clang/codesearch/codesearch.go
new file mode 100644
index 000000000..6fa5d7de0
--- /dev/null
+++ b/tools/clang/codesearch/codesearch.go
@@ -0,0 +1,6 @@
+// Copyright 2026 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package clangtoolimpl
+
+const Tool = "codesearch"
diff --git a/tools/clang/declextract/build.go b/tools/clang/declextract/build.go
new file mode 120000
index 000000000..0308541aa
--- /dev/null
+++ b/tools/clang/declextract/build.go
@@ -0,0 +1 @@
+../build.go \ No newline at end of file
diff --git a/tools/clang/declextract/declextract.cpp b/tools/clang/declextract/declextract.cpp
index d7230a578..620c81f17 100644
--- a/tools/clang/declextract/declextract.cpp
+++ b/tools/clang/declextract/declextract.cpp
@@ -21,6 +21,7 @@
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TypeTraits.h"
+#include "clang/Basic/Version.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Tooling.h"
@@ -297,7 +298,12 @@ FieldType Extractor::extractRecord(QualType QT, const RecordType* Typ, const std
IsAnonymous = true;
}
FieldType FieldType = genType(F->getType(), BackupFieldName);
- int BitWidth = F->isBitField() ? F->getBitWidthValue() : 0;
+ int BitWidth = F->isBitField() ? F->getBitWidthValue(
+#if CLANG_VERSION_MAJOR == 19
+ *Context
+#endif
+ )
+ : 0;
int CountedBy = F->getType()->isCountAttributedType()
? llvm::dyn_cast<FieldDecl>(
F->getType()->getAs<CountAttributedType>()->getCountExpr()->getReferencedDeclOfCallee())
@@ -338,9 +344,16 @@ std::string Extractor::extractEnum(QualType QT, const EnumDecl* Decl) {
if (Name.empty()) {
// This is an unnamed enum declared with a typedef:
// typedef enum {...} enum_name;
- auto Typedef = dyn_cast<TypedefType>(QT.getTypePtr());
- if (Typedef)
- Name = Typedef->getDecl()->getNameAsString();
+ auto Elaborated = dyn_cast<ElaboratedType>(QT.getTypePtr());
+ if (Elaborated) {
+ auto Typedef = dyn_cast<TypedefType>(Elaborated->getNamedType().getTypePtr());
+ if (Typedef)
+ Name = Typedef->getDecl()->getNameAsString();
+ }
+ // This is the code we will need for one of future versions (past 21).
+ // auto Typedef = dyn_cast<TypedefType>(QT.getTypePtr());
+ // if (Typedef)
+ // Name = Typedef->getDecl()->getNameAsString();
if (Name.empty()) {
QT.dump();
llvm::report_fatal_error("enum with empty name");
@@ -993,8 +1006,8 @@ void Extractor::extractIoctl(const Expr* Cmd, const ConstDesc& Const) {
});
}
-int main(int argc, const char** argv) {
- llvm::cl::OptionCategory Options("syz-declextract options");
+static int Main(int argc, const char** argv) {
+ llvm::cl::OptionCategory Options("declextract options");
auto OptionsParser = tooling::CommonOptionsParser::create(argc, argv, Options);
if (!OptionsParser) {
llvm::errs() << OptionsParser.takeError();
@@ -1005,5 +1018,12 @@ int main(int argc, const char** argv) {
if (Tool.run(tooling::newFrontendActionFactory(&Ex, &Ex).get()))
return 1;
Ex.print();
+ fflush(stdout);
return 0;
}
+
+__attribute__((constructor(1000))) static void ctor(int argc, const char** argv) {
+ const char* run = getenv("SYZ_RUN_CLANGTOOL");
+ if (run && !strcmp(run, "declextract"))
+ exit(Main(argc, argv));
+}
diff --git a/tools/clang/declextract/declextract.go b/tools/clang/declextract/declextract.go
new file mode 100644
index 000000000..f419604c9
--- /dev/null
+++ b/tools/clang/declextract/declextract.go
@@ -0,0 +1,6 @@
+// Copyright 2026 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package clangtoolimpl
+
+const Tool = "declextract"
diff --git a/tools/clang/json.h b/tools/clang/json.h
index 596868da3..93c726fd3 100644
--- a/tools/clang/json.h
+++ b/tools/clang/json.h
@@ -1,8 +1,8 @@
// Copyright 2024 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
-#ifndef SYZ_DECLEXTRACT_JSON_H
-#define SYZ_DECLEXTRACT_JSON_H
+#ifndef SYZ_CLANGTOOL_JSON_H
+#define SYZ_CLANGTOOL_JSON_H
#include <cassert>
#include <cstdint>
diff --git a/tools/syz-codesearch/codesearch.go b/tools/syz-codesearch/codesearch.go
index be1efbba5..6042c643e 100644
--- a/tools/syz-codesearch/codesearch.go
+++ b/tools/syz-codesearch/codesearch.go
@@ -11,6 +11,7 @@ import (
"github.com/google/syzkaller/pkg/clangtool"
"github.com/google/syzkaller/pkg/codesearch"
"github.com/google/syzkaller/pkg/tool"
+ "github.com/google/syzkaller/tools/clang/codesearch"
)
func main() {
@@ -25,11 +26,11 @@ func main() {
}
cmd, args := flag.Args()[0], flag.Args()[1:]
if cmd == "index" {
- if len(args) != 1 {
+ if len(args) != 0 {
printUsageAndExit()
}
cfg := &clangtool.Config{
- ToolBin: args[0],
+ Tool: clangtoolimpl.Tool,
KernelSrc: *flagKernelSrc,
KernelObj: *flagKernelObj,
CacheFile: *flagDatabase,
diff --git a/tools/syz-declextract/README.md b/tools/syz-declextract/README.md
index 99fb8d23a..23a0bb56b 100644
--- a/tools/syz-declextract/README.md
+++ b/tools/syz-declextract/README.md
@@ -8,34 +8,7 @@ cd $KERNEL
make CC=clang defconfig
./scripts/config -e FTRACE_SYSCALLS
make CC=clang olddefconfig
-make CC=clang -j`nproc` # kernel has to be built at least once for the script to work
-./scripts/clang-tools/gen_compile_commands.py
-```
-
-## LLVM Project
-```
-LLVM=$PWD/llvm-project
-git clone https://github.com/llvm/llvm-project.git $LLVM
-cd $LLVM
-git checkout d9dfe7540f81663f75350bb5ceb66d2f94dac078 # In case of any breaking changes, this commit works
-echo '
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++20-designator -Wno-missing-designated-field-initializers")
-add_clang_executable(syz-declextract syz-declextract/declextract.cpp)
-target_link_libraries(syz-declextract PRIVATE clangTooling)
-' >> $LLVM/clang/CMakeLists.txt
-```
-
-## syz-declextract
-```
-mkdir $LLVM/clang/syz-declextract
-```
-Copy `tools/clang/declextract/*.{cpp,h}` and `tools/clang/*.h` files to `$LLVM/clang/syz-declextract/` directory.
-```
-LLVM_BUILD=$PWD/syz
-mkdir $LLVM_BUILD && cd $LLVM_BUILD
-cmake -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=On \
--DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -GNinja $LLVM/llvm
-ninja syz-declextract
+make CC=clang -j`nproc` vmlinux compile_commands.json # kernel has to be built at least once for the script to work
```
## Running on a single source file
@@ -54,8 +27,7 @@ should be used for the rest of the process as well.
## Running on the whole kernel
```
-go run ./tools/syz-declextract -binary=$LLVM_BUILD/bin/syz-declextract -config=manager.cfg \
- -coverage coverage.jsonl
+go run ./tools/syz-declextract -config=manager.cfg -coverage coverage.jsonl
syz-env make extract SOURCEDIR=$KERNEL
```
diff --git a/tools/syz-declextract/declextract.go b/tools/syz-declextract/declextract.go
index d2bb505e4..cc47330e8 100644
--- a/tools/syz-declextract/declextract.go
+++ b/tools/syz-declextract/declextract.go
@@ -29,6 +29,7 @@ import (
_ "github.com/google/syzkaller/pkg/subsystem/lists"
"github.com/google/syzkaller/pkg/tool"
"github.com/google/syzkaller/sys/targets"
+ "github.com/google/syzkaller/tools/clang/declextract"
"golang.org/x/sync/errgroup"
)
@@ -38,7 +39,6 @@ var target = targets.Get(targets.Linux, targets.AMD64)
func main() {
var (
flagConfig = flag.String("config", "", "manager config file")
- flagBinary = flag.String("binary", "syz-declextract", "path to syz-declextract binary")
flagCoverage = flag.String("coverage", "", "syzbot coverage jsonl file")
flagArches = flag.String("arches", "", "comma-separated list of arches to extract (all if empty)")
)
@@ -56,7 +56,7 @@ func main() {
coverFile: *flagCoverage,
loadProbeInfo: loadProbeInfo,
Config: &clangtool.Config{
- ToolBin: *flagBinary,
+ Tool: clangtoolimpl.Tool,
KernelSrc: mgrcfg.KernelSrc,
KernelObj: mgrcfg.KernelObj,
CacheFile: filepath.Join(mgrcfg.Workdir, "declextract.cache"),
diff --git a/tools/syz-declextract/declextract_test.go b/tools/syz-declextract/declextract_test.go
index bf1af7bdf..794844ffe 100644
--- a/tools/syz-declextract/declextract_test.go
+++ b/tools/syz-declextract/declextract_test.go
@@ -15,14 +15,15 @@ import (
"github.com/google/syzkaller/pkg/declextract"
"github.com/google/syzkaller/pkg/ifaceprobe"
"github.com/google/syzkaller/pkg/osutil"
+ "github.com/google/syzkaller/tools/clang/declextract"
)
func TestClangTool(t *testing.T) {
- tooltest.TestClangTool[declextract.Output](t)
+ tooltest.TestClangTool[declextract.Output](t, clangtoolimpl.Tool)
}
func TestDeclextract(t *testing.T) {
- tooltest.ForEachTestFile(t, func(t *testing.T, cfg *clangtool.Config, file string) {
+ tooltest.ForEachTestFile(t, clangtoolimpl.Tool, func(t *testing.T, cfg *clangtool.Config, file string) {
// Created cache file to avoid running the clang tool.
goldenFile := file + ".json"
cacheFile := filepath.Join(cfg.KernelObj, filepath.Base(goldenFile))
@@ -33,7 +34,7 @@ func TestDeclextract(t *testing.T) {
filepath.Join(cfg.KernelObj, "manual.txt")); err != nil {
t.Fatal(err)
}
- cfg.ToolBin = "this-is-not-supposed-to-run"
+ cfg.Tool = "this-is-not-supposed-to-run"
probeInfo := new(ifaceprobe.Info)
probeFile := filepath.Join(cfg.KernelSrc, filepath.Base(file)+".probe")
if osutil.IsExist(probeFile) {