aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2025-11-17 11:17:23 +0100
committerDmitry Vyukov <dvyukov@google.com>2025-11-20 10:10:05 +0000
commit280ea308c321115445df610f1a75b05bbadca5f3 (patch)
treec195c76723c4a08986d74edbfc9e15a4f07fa6c1
parent94d1e3f8b1838e8a04074464a957e979a5c5e36b (diff)
pkg/codesearch: add skeleton for code searching tool
Add a clang tool that is used for code indexing (tools/clang/codesearch/). It follows conventions and build procedure of the declextract tool. Add pkg/codesearch package that aggregates the info exposed by the clang tools, and allows doing simple queries: - show source code of an entity (function, struct, etc) - show entity comment - show all entities defined in a source file Add tools/syz-codesearch wrapper tool that allows to create index for a kernel build, and then run code queries on it.
-rw-r--r--Makefile3
-rw-r--r--pkg/clangtool/clangtool.go5
-rw-r--r--pkg/codesearch/codesearch.go190
-rw-r--r--pkg/codesearch/codesearch_test.go61
-rw-r--r--pkg/codesearch/database.go56
-rw-r--r--pkg/codesearch/testdata/query-def-comment-close3
-rw-r--r--pkg/codesearch/testdata/query-def-comment-header3
-rw-r--r--pkg/codesearch/testdata/query-def-comment-open7
-rw-r--r--pkg/codesearch/testdata/query-def-source-close8
-rw-r--r--pkg/codesearch/testdata/query-def-source-header8
-rw-r--r--pkg/codesearch/testdata/query-def-source-missing3
-rw-r--r--pkg/codesearch/testdata/query-def-source-open11
-rw-r--r--pkg/codesearch/testdata/query-def-source-same-name-non-static8
-rw-r--r--pkg/codesearch/testdata/query-def-source-same-name-static8
-rw-r--r--pkg/codesearch/testdata/query-file-index-missing3
-rw-r--r--pkg/codesearch/testdata/query-file-index-source7
-rw-r--r--pkg/codesearch/testdata/source0.c22
-rw-r--r--pkg/codesearch/testdata/source0.c.json41
-rw-r--r--pkg/codesearch/testdata/source0.h10
-rw-r--r--pkg/codesearch/testdata/source1.c7
-rw-r--r--pkg/codesearch/testdata/source1.c.json20
-rw-r--r--pkg/codesearch/testdata/source2.c7
-rw-r--r--pkg/codesearch/testdata/source2.c.json19
-rw-r--r--tools/clang/codesearch/codesearch.cpp153
-rw-r--r--tools/clang/codesearch/output.h64
-rw-r--r--tools/syz-codesearch/codesearch.go66
26 files changed, 791 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index 65cd8082a..9e215b357 100644
--- a/Makefile
+++ b/Makefile
@@ -273,7 +273,8 @@ format_cpp:
executor/android/android_seccomp.h \
tools/kcovtrace/*.c tools/kcovfuzzer/*.c tools/fops_probe/*.cc \
tools/clang/*.h \
- tools/clang/declextract/*.h tools/clang/declextract/*.cpp
+ tools/clang/declextract/*.h tools/clang/declextract/*.cpp \
+ tools/clang/codesearch/*.h tools/clang/codesearch/*.cpp
format_sys: bin/syz-fmt
bin/syz-fmt all
diff --git a/pkg/clangtool/clangtool.go b/pkg/clangtool/clangtool.go
index 4e3914cf8..8711b5411 100644
--- a/pkg/clangtool/clangtool.go
+++ b/pkg/clangtool/clangtool.go
@@ -155,7 +155,10 @@ func runTool[Output any, OutputPtr OutputDataPtr[Output]](cfg *Config, dbFile, f
cfg.KernelSrc), cfg.KernelObj), "/")
// Suppress warning since we may build the tool on a different clang
// version that produces more warnings.
- data, err := exec.Command(cfg.ToolBin, "-p", dbFile, "--extra-arg=-w", file).Output()
+ // Comments are needed for codesearch tool, but may be useful for declextract
+ // in the future if we try to parse them with LLMs.
+ data, err := exec.Command(cfg.ToolBin, "-p", dbFile,
+ "--extra-arg=-w", "--extra-arg=-fparse-all-comments", file).Output()
if err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
diff --git a/pkg/codesearch/codesearch.go b/pkg/codesearch/codesearch.go
new file mode 100644
index 000000000..c1e99a174
--- /dev/null
+++ b/pkg/codesearch/codesearch.go
@@ -0,0 +1,190 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package codesearch
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/google/syzkaller/pkg/osutil"
+)
+
+type Index struct {
+ db *Database
+ srcDirs []string
+}
+
+type Command struct {
+ Name string
+ NArgs int
+ Func func(*Index, []string) (string, error)
+}
+
+// Commands are used to run unit tests and for the syz-codesearch tool.
+var Commands = []Command{
+ {"file-index", 1, func(index *Index, args []string) (string, error) {
+ ok, entities, err := index.FileIndex(args[0])
+ if err != nil || !ok {
+ return notFound, err
+ }
+ b := new(strings.Builder)
+ fmt.Fprintf(b, "file %v defines the following entities:\n\n", args[0])
+ for _, ent := range entities {
+ fmt.Fprintf(b, "%v %v\n", ent.Kind, ent.Name)
+ }
+ return b.String(), nil
+ }},
+ {"def-comment", 2, func(index *Index, args []string) (string, error) {
+ info, err := index.DefinitionComment(args[0], args[1])
+ if err != nil || info == nil {
+ return notFound, err
+ }
+ if info.Body == "" {
+ return fmt.Sprintf("%v %v is defined in %v and is not commented\n",
+ info.Kind, args[1], info.File), nil
+ }
+ return fmt.Sprintf("%v %v is defined in %v and commented as:\n\n%v",
+ info.Kind, args[1], info.File, info.Body), nil
+ }},
+ {"def-source", 3, func(index *Index, args []string) (string, error) {
+ info, err := index.DefinitionSource(args[0], args[1], args[2] == "yes")
+ if err != nil || info == nil {
+ return notFound, err
+ }
+ return fmt.Sprintf("%v %v is defined in %v:\n\n%v", info.Kind, args[1], info.File, info.Body), nil
+ }},
+}
+
+const notFound = "not found\n"
+
+func NewIndex(databaseFile string, srcDirs []string) (*Index, error) {
+ db, err := osutil.ReadJSON[*Database](databaseFile)
+ if err != nil {
+ return nil, err
+ }
+ return &Index{
+ db: db,
+ srcDirs: srcDirs,
+ }, nil
+}
+
+func (index *Index) Command(cmd string, args []string) (string, error) {
+ for _, meta := range Commands {
+ if cmd == meta.Name {
+ if len(args) != meta.NArgs {
+ return "", fmt.Errorf("codesearch command %v requires %v args, but %v provided",
+ cmd, meta.NArgs, len(args))
+ }
+ return meta.Func(index, args)
+ }
+ }
+ return "", fmt.Errorf("unknown codesearch command %v", cmd)
+}
+
+type Entity struct {
+ Kind string
+ Name string
+}
+
+func (index *Index) FileIndex(file string) (bool, []Entity, error) {
+ var entities []Entity
+ for _, def := range index.db.Definitions {
+ if def.Body.File == file {
+ entities = append(entities, Entity{
+ Kind: def.Kind,
+ Name: def.Name,
+ })
+ }
+ }
+ return len(entities) != 0, entities, nil
+}
+
+type EntityInfo struct {
+ File string
+ Kind string
+ Body string
+}
+
+func (index *Index) DefinitionComment(contextFile, name string) (*EntityInfo, error) {
+ return index.definitionSource(contextFile, name, true, false)
+}
+
+func (index *Index) DefinitionSource(contextFile, name string, includeLines bool) (*EntityInfo, error) {
+ return index.definitionSource(contextFile, name, false, includeLines)
+}
+
+func (index *Index) definitionSource(contextFile, name string, comment, includeLines bool) (*EntityInfo, error) {
+ def := index.findDefinition(contextFile, name)
+ if def == nil {
+ return nil, nil
+ }
+ lineRange := def.Body
+ if comment {
+ lineRange = def.Comment
+ }
+ src, err := index.formatSource(lineRange, includeLines)
+ if err != nil {
+ return nil, err
+ }
+ return &EntityInfo{
+ File: def.Body.File,
+ Kind: def.Kind,
+ Body: src,
+ }, nil
+}
+
+func (index *Index) findDefinition(contextFile, name string) *Definition {
+ var weakMatch *Definition
+ for _, def := range index.db.Definitions {
+ if def.Name == name {
+ if def.Body.File == contextFile {
+ return def
+ }
+ if !def.IsStatic {
+ weakMatch = def
+ }
+ }
+ }
+ return weakMatch
+}
+
+func (index *Index) formatSource(lines LineRange, includeLines bool) (string, error) {
+ if lines.File == "" {
+ return "", nil
+ }
+ for _, dir := range index.srcDirs {
+ file := filepath.Join(dir, lines.File)
+ if !osutil.IsExist(file) {
+ continue
+ }
+ return formatSourceFile(file, lines.StartLine, lines.EndLine, includeLines)
+ }
+ return "", fmt.Errorf("codesearch: can't find %q file in any of %v", lines.File, index.srcDirs)
+}
+
+func formatSourceFile(file string, start, end int, includeLines bool) (string, error) {
+ data, err := os.ReadFile(file)
+ if err != nil {
+ return "", err
+ }
+ lines := bytes.Split(data, []byte{'\n'})
+ start--
+ end--
+ if start < 0 || end < start || end > len(lines) {
+ return "", fmt.Errorf("codesearch: bad line range [%v-%v] for file %v with %v lines",
+ start, end, file, len(lines))
+ }
+ b := new(strings.Builder)
+ for line := start; line <= end; line++ {
+ if includeLines {
+ fmt.Fprintf(b, "%4v:\t%s\n", line, lines[line])
+ } else {
+ fmt.Fprintf(b, "%s\n", lines[line])
+ }
+ }
+ return b.String(), nil
+}
diff --git a/pkg/codesearch/codesearch_test.go b/pkg/codesearch/codesearch_test.go
new file mode 100644
index 000000000..7af509294
--- /dev/null
+++ b/pkg/codesearch/codesearch_test.go
@@ -0,0 +1,61 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package codesearch
+
+import (
+ "bytes"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "github.com/google/syzkaller/pkg/clangtool/tooltest"
+ "github.com/google/syzkaller/pkg/osutil"
+)
+
+func TestClangTool(t *testing.T) {
+ tooltest.TestClangTool[Database](t)
+}
+
+func TestCommands(t *testing.T) {
+ db := tooltest.LoadOutput[Database](t)
+ index := &Index{db, []string{"testdata"}}
+ files, err := filepath.Glob(filepath.Join(osutil.Abs("testdata"), "query*"))
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(files) == 0 {
+ t.Fatal("found no qeury files")
+ }
+ covered := make(map[string]bool)
+ for _, file := range files {
+ t.Run(filepath.Base(file), func(t *testing.T) {
+ testCommand(t, index, covered, file)
+ })
+ }
+ for _, cmd := range Commands {
+ if !covered[cmd.Name] {
+ t.Errorf("command %v is not covered, add at least one test", cmd.Name)
+ }
+ }
+}
+
+func testCommand(t *testing.T, index *Index, covered map[string]bool, file string) {
+ data, err := os.ReadFile(file)
+ if err != nil {
+ t.Fatal(err)
+ }
+ query, _, _ := bytes.Cut(data, []byte{'\n'})
+ args := strings.Fields(string(query))
+ if len(args) == 0 {
+ t.Fatal("no command found")
+ }
+ result, err := index.Command(args[0], args[1:])
+ if err != nil {
+ t.Fatal(err)
+ }
+ got := append([]byte(strings.Join(args, " ")+"\n\n"), result...)
+ tooltest.CompareGoldenData(t, file, got)
+ covered[args[0]] = true
+}
diff --git a/pkg/codesearch/database.go b/pkg/codesearch/database.go
new file mode 100644
index 000000000..4757935e9
--- /dev/null
+++ b/pkg/codesearch/database.go
@@ -0,0 +1,56 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package codesearch
+
+import (
+ "strings"
+
+ "github.com/google/syzkaller/pkg/clangtool"
+)
+
+type Database struct {
+ Definitions []*Definition `json:"definitions,omitempty"`
+}
+
+type Definition struct {
+ Kind string `json:"kind,omitempty"`
+ Name string `json:"name,omitempty"`
+ Type string `json:"type,omitempty"`
+ IsStatic bool `json:"is_static,omitempty"`
+ Body LineRange `json:"body,omitempty"`
+ Comment LineRange `json:"comment,omitempty"`
+}
+
+type LineRange struct {
+ File string `json:"file,omitempty"`
+ StartLine int `json:"start_line,omitempty"`
+ EndLine int `json:"end_line,omitempty"`
+}
+
+func (db *Database) Merge(other *Database) {
+ db.Definitions = append(db.Definitions, other.Definitions...)
+}
+
+func (db *Database) Finalize(v *clangtool.Verifier) {
+ db.Definitions = clangtool.SortAndDedupSlice(db.Definitions)
+
+ for _, def := range db.Definitions {
+ v.LineRange(def.Body.File, def.Body.StartLine, def.Body.EndLine)
+ if def.Comment.File != "" {
+ v.LineRange(def.Comment.File, def.Comment.StartLine, def.Comment.EndLine)
+ }
+ }
+}
+
+// SetSoureFile attaches the source file to the entities that need it.
+// The clang tool could do it, but it looks easier to do it here.
+func (db *Database) SetSourceFile(file string, updatePath func(string) string) {
+ for _, def := range db.Definitions {
+ def.Body.File = updatePath(def.Body.File)
+ def.Comment.File = updatePath(def.Comment.File)
+ if strings.HasSuffix(def.Body.File, ".c") && def.Body.File != file {
+ def.IsStatic = false
+ }
+ }
+}
diff --git a/pkg/codesearch/testdata/query-def-comment-close b/pkg/codesearch/testdata/query-def-comment-close
new file mode 100644
index 000000000..df6c1c2af
--- /dev/null
+++ b/pkg/codesearch/testdata/query-def-comment-close
@@ -0,0 +1,3 @@
+def-comment source0.c close
+
+function close is defined in source0.c and is not commented
diff --git a/pkg/codesearch/testdata/query-def-comment-header b/pkg/codesearch/testdata/query-def-comment-header
new file mode 100644
index 000000000..a940938b8
--- /dev/null
+++ b/pkg/codesearch/testdata/query-def-comment-header
@@ -0,0 +1,3 @@
+def-comment source0.c function_with_comment_in_header
+
+function function_with_comment_in_header is defined in source0.c and is not commented
diff --git a/pkg/codesearch/testdata/query-def-comment-open b/pkg/codesearch/testdata/query-def-comment-open
new file mode 100644
index 000000000..64bd21812
--- /dev/null
+++ b/pkg/codesearch/testdata/query-def-comment-open
@@ -0,0 +1,7 @@
+def-comment source0.c open
+
+function open is defined in source0.c and commented as:
+
+/*
+ * Comment about open.
+ */
diff --git a/pkg/codesearch/testdata/query-def-source-close b/pkg/codesearch/testdata/query-def-source-close
new file mode 100644
index 000000000..2a9dcefad
--- /dev/null
+++ b/pkg/codesearch/testdata/query-def-source-close
@@ -0,0 +1,8 @@
+def-source source0.c close no
+
+function close is defined in source0.c:
+
+int close()
+{
+ return 0;
+}
diff --git a/pkg/codesearch/testdata/query-def-source-header b/pkg/codesearch/testdata/query-def-source-header
new file mode 100644
index 000000000..fd3ba300b
--- /dev/null
+++ b/pkg/codesearch/testdata/query-def-source-header
@@ -0,0 +1,8 @@
+def-source source0.c function_with_comment_in_header yes
+
+function function_with_comment_in_header is defined in source0.c:
+
+ 18: void function_with_comment_in_header()
+ 19: {
+ 20: same_name_in_several_files();
+ 21: }
diff --git a/pkg/codesearch/testdata/query-def-source-missing b/pkg/codesearch/testdata/query-def-source-missing
new file mode 100644
index 000000000..0b60003c7
--- /dev/null
+++ b/pkg/codesearch/testdata/query-def-source-missing
@@ -0,0 +1,3 @@
+def-source source0.c some_non_existent_function no
+
+not found
diff --git a/pkg/codesearch/testdata/query-def-source-open b/pkg/codesearch/testdata/query-def-source-open
new file mode 100644
index 000000000..bdcec72fd
--- /dev/null
+++ b/pkg/codesearch/testdata/query-def-source-open
@@ -0,0 +1,11 @@
+def-source source0.c open yes
+
+function open is defined in source0.c:
+
+ 5: /*
+ 6: * Comment about open.
+ 7: */
+ 8: int open()
+ 9: {
+ 10: return 0;
+ 11: }
diff --git a/pkg/codesearch/testdata/query-def-source-same-name-non-static b/pkg/codesearch/testdata/query-def-source-same-name-non-static
new file mode 100644
index 000000000..ae09d3313
--- /dev/null
+++ b/pkg/codesearch/testdata/query-def-source-same-name-non-static
@@ -0,0 +1,8 @@
+def-source source0.c same_name_in_several_files no
+
+function same_name_in_several_files is defined in source2.c:
+
+void same_name_in_several_files()
+{
+ // This is non-static version in in source2.c.
+}
diff --git a/pkg/codesearch/testdata/query-def-source-same-name-static b/pkg/codesearch/testdata/query-def-source-same-name-static
new file mode 100644
index 000000000..3d87c010c
--- /dev/null
+++ b/pkg/codesearch/testdata/query-def-source-same-name-static
@@ -0,0 +1,8 @@
+def-source source1.c same_name_in_several_files yes
+
+function same_name_in_several_files is defined in source1.c:
+
+ 3: static void same_name_in_several_files()
+ 4: {
+ 5: // This is static version in source1.c.
+ 6: }
diff --git a/pkg/codesearch/testdata/query-file-index-missing b/pkg/codesearch/testdata/query-file-index-missing
new file mode 100644
index 000000000..1be486378
--- /dev/null
+++ b/pkg/codesearch/testdata/query-file-index-missing
@@ -0,0 +1,3 @@
+file-index some-non-existent-file.c
+
+not found
diff --git a/pkg/codesearch/testdata/query-file-index-source b/pkg/codesearch/testdata/query-file-index-source
new file mode 100644
index 000000000..c238079d0
--- /dev/null
+++ b/pkg/codesearch/testdata/query-file-index-source
@@ -0,0 +1,7 @@
+file-index source0.c
+
+file source0.c defines the following entities:
+
+function close
+function function_with_comment_in_header
+function open
diff --git a/pkg/codesearch/testdata/source0.c b/pkg/codesearch/testdata/source0.c
new file mode 100644
index 000000000..384c4c119
--- /dev/null
+++ b/pkg/codesearch/testdata/source0.c
@@ -0,0 +1,22 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+#include "source0.h"
+
+/*
+ * Comment about open.
+ */
+int open()
+{
+ return 0;
+}
+
+int close()
+{
+ return 0;
+}
+
+void function_with_comment_in_header()
+{
+ same_name_in_several_files();
+}
diff --git a/pkg/codesearch/testdata/source0.c.json b/pkg/codesearch/testdata/source0.c.json
new file mode 100644
index 000000000..d33aa360c
--- /dev/null
+++ b/pkg/codesearch/testdata/source0.c.json
@@ -0,0 +1,41 @@
+{
+ "definitions": [
+ {
+ "kind": "function",
+ "name": "close",
+ "type": "int ()",
+ "body": {
+ "file": "source0.c",
+ "start_line": 14,
+ "end_line": 17
+ },
+ "comment": {}
+ },
+ {
+ "kind": "function",
+ "name": "function_with_comment_in_header",
+ "type": "void ()",
+ "body": {
+ "file": "source0.c",
+ "start_line": 19,
+ "end_line": 22
+ },
+ "comment": {}
+ },
+ {
+ "kind": "function",
+ "name": "open",
+ "type": "int ()",
+ "body": {
+ "file": "source0.c",
+ "start_line": 6,
+ "end_line": 12
+ },
+ "comment": {
+ "file": "source0.c",
+ "start_line": 6,
+ "end_line": 8
+ }
+ }
+ ]
+} \ No newline at end of file
diff --git a/pkg/codesearch/testdata/source0.h b/pkg/codesearch/testdata/source0.h
new file mode 100644
index 000000000..339975b2e
--- /dev/null
+++ b/pkg/codesearch/testdata/source0.h
@@ -0,0 +1,10 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+/*
+ * Comment about the function in header.
+ * Multi-line just in case.
+ */
+void function_with_comment_in_header();
+
+void same_name_in_several_files();
diff --git a/pkg/codesearch/testdata/source1.c b/pkg/codesearch/testdata/source1.c
new file mode 100644
index 000000000..ad7d5792c
--- /dev/null
+++ b/pkg/codesearch/testdata/source1.c
@@ -0,0 +1,7 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+static void same_name_in_several_files()
+{
+ // This is static version in source1.c.
+}
diff --git a/pkg/codesearch/testdata/source1.c.json b/pkg/codesearch/testdata/source1.c.json
new file mode 100644
index 000000000..72278a191
--- /dev/null
+++ b/pkg/codesearch/testdata/source1.c.json
@@ -0,0 +1,20 @@
+{
+ "definitions": [
+ {
+ "kind": "function",
+ "name": "same_name_in_several_files",
+ "type": "void ()",
+ "is_static": true,
+ "body": {
+ "file": "source1.c",
+ "start_line": 4,
+ "end_line": 7
+ },
+ "comment": {
+ "file": "source1.c",
+ "start_line": 1,
+ "end_line": 2
+ }
+ }
+ ]
+} \ No newline at end of file
diff --git a/pkg/codesearch/testdata/source2.c b/pkg/codesearch/testdata/source2.c
new file mode 100644
index 000000000..f7ef3d810
--- /dev/null
+++ b/pkg/codesearch/testdata/source2.c
@@ -0,0 +1,7 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+void same_name_in_several_files()
+{
+ // This is non-static version in in source2.c.
+}
diff --git a/pkg/codesearch/testdata/source2.c.json b/pkg/codesearch/testdata/source2.c.json
new file mode 100644
index 000000000..4407152db
--- /dev/null
+++ b/pkg/codesearch/testdata/source2.c.json
@@ -0,0 +1,19 @@
+{
+ "definitions": [
+ {
+ "kind": "function",
+ "name": "same_name_in_several_files",
+ "type": "void ()",
+ "body": {
+ "file": "source2.c",
+ "start_line": 4,
+ "end_line": 7
+ },
+ "comment": {
+ "file": "source2.c",
+ "start_line": 1,
+ "end_line": 2
+ }
+ }
+ ]
+} \ No newline at end of file
diff --git a/tools/clang/codesearch/codesearch.cpp b/tools/clang/codesearch/codesearch.cpp
new file mode 100644
index 000000000..8895d5307
--- /dev/null
+++ b/tools/clang/codesearch/codesearch.cpp
@@ -0,0 +1,153 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+#include "json.h"
+#include "output.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Comment.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include <algorithm>
+#include <filesystem>
+#include <string>
+#include <unordered_map>
+
+using namespace clang;
+
+// MacroDef/MacroMap hold information about macros defined in the file.
+struct MacroDef {
+ std::string Value; // value as written in the source
+ SourceRange SourceRange; // soruce range of the value
+};
+using MacroMap = std::unordered_map<std::string, MacroDef>;
+
+class Instance : public tooling::SourceFileCallbacks {
+public:
+ Instance(Output& Output) : Output(Output) {}
+ std::unique_ptr<ASTConsumer> newASTConsumer();
+
+private:
+ Output& Output;
+ MacroMap Macros;
+
+ bool handleBeginSource(CompilerInstance& CI) override;
+};
+
+// PPCallbacksTracker records all macro definitions (name/value/source location).
+class PPCallbacksTracker : public PPCallbacks {
+public:
+ PPCallbacksTracker(Preprocessor& PP, MacroMap& Macros) : SM(PP.getSourceManager()), Macros(Macros) {}
+
+private:
+ SourceManager& SM;
+ MacroMap& Macros;
+
+ void MacroDefined(const Token& MacroName, const MacroDirective* MD) override { (void)Macros; }
+};
+
+class IndexerAstConsumer : public ASTConsumer {
+public:
+ IndexerAstConsumer(Output& Output, const MacroMap& Macros) : Output(Output), Macros(Macros) {}
+
+private:
+ Output& Output;
+ const MacroMap& Macros;
+
+ void HandleTranslationUnit(ASTContext& context) override;
+};
+
+class Indexer : public RecursiveASTVisitor<Indexer> {
+public:
+ Indexer(ASTContext& Context, Output& Output, const MacroMap& Macros)
+ : Context(Context), SM(Context.getSourceManager()), Output(Output) {}
+
+ bool VisitFunctionDecl(const FunctionDecl*);
+
+private:
+ ASTContext& Context;
+ SourceManager& SM;
+ Output& Output;
+};
+
+bool Instance::handleBeginSource(CompilerInstance& CI) {
+ Preprocessor& PP = CI.getPreprocessor();
+ PP.addPPCallbacks(std::make_unique<PPCallbacksTracker>(PP, Macros));
+ return true;
+}
+
+std::unique_ptr<ASTConsumer> Instance::newASTConsumer() { return std::make_unique<IndexerAstConsumer>(Output, Macros); }
+
+void IndexerAstConsumer::HandleTranslationUnit(ASTContext& Context) {
+ Indexer Indexer(Context, Output, Macros);
+ Indexer.TraverseDecl(Context.getTranslationUnitDecl());
+}
+
+bool Indexer::VisitFunctionDecl(const FunctionDecl* Func) {
+ if (!Func->doesThisDeclarationHaveABody())
+ return true;
+ auto Range = Func->getSourceRange();
+ const std::string& SourceFile = std::filesystem::relative(SM.getFilename(SM.getExpansionLoc(Range.getBegin())).str());
+ int StartLine = SM.getExpansionLineNumber(Range.getBegin());
+ int EndLine = SM.getExpansionLineNumber(Range.getEnd());
+ std::string CommentSourceFile;
+ int CommentStartLine = 0;
+ int CommentEndLine = 0;
+ if (auto Comment = Context.getRawCommentForDeclNoCache(Func)) {
+ const auto& begin = Comment->getBeginLoc();
+ const auto& end = Comment->getEndLoc();
+ CommentSourceFile = std::filesystem::relative(SM.getFilename(SM.getExpansionLoc(begin)).str());
+ CommentStartLine = SM.getExpansionLineNumber(begin);
+ CommentEndLine = SM.getExpansionLineNumber(end);
+ // Expand body range to include the comment, if they intersect.
+ if (SourceFile == CommentSourceFile &&
+ std::max(StartLine, CommentStartLine) <= std::min(EndLine, CommentEndLine) + 1) {
+ StartLine = std::min(StartLine, CommentStartLine);
+ EndLine = std::max(EndLine, CommentEndLine);
+ }
+ }
+ Output.emit(Definition{
+ .Kind = KindFunction,
+ .Name = Func->getNameAsString(),
+ .Type = Func->getType().getAsString(),
+ .IsStatic = Func->isStatic(),
+ .Body =
+ LineRange{
+ .File = SourceFile,
+ .StartLine = StartLine,
+ .EndLine = EndLine,
+ },
+ .Comment =
+ LineRange{
+ .File = CommentSourceFile,
+ .StartLine = CommentStartLine,
+ .EndLine = CommentEndLine,
+ },
+ });
+ return true;
+}
+
+int main(int argc, const char** argv) {
+ llvm::cl::OptionCategory Options("syz-indexer options");
+ auto OptionsParser = tooling::CommonOptionsParser::create(argc, argv, Options);
+ if (!OptionsParser) {
+ llvm::errs() << OptionsParser.takeError();
+ return 1;
+ }
+ Output Output;
+ Instance Instance(Output);
+ tooling::ClangTool Tool(OptionsParser->getCompilations(), OptionsParser->getSourcePathList());
+ if (Tool.run(tooling::newFrontendActionFactory(&Instance, &Instance).get()))
+ return 1;
+ Output.print();
+ return 0;
+}
diff --git a/tools/clang/codesearch/output.h b/tools/clang/codesearch/output.h
new file mode 100644
index 000000000..ac490bb91
--- /dev/null
+++ b/tools/clang/codesearch/output.h
@@ -0,0 +1,64 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+#ifndef SYZ_INDEXER_OUTPUT_H
+#define SYZ_INDEXER_OUTPUT_H
+
+#include "json.h"
+#include <vector>
+
+constexpr char KindFunction[] = "function";
+constexpr char KindStruct[] = "struct";
+constexpr char KindVariable[] = "variable";
+constexpr char KindMacro[] = "macro";
+constexpr char KindEnum[] = "enum";
+
+struct LineRange {
+ std::string File;
+ int StartLine = 0;
+ int EndLine = 0;
+};
+
+struct Definition {
+ const char* Kind; // one of Kind* consts
+ std::string Name;
+ std::string Type; // raw C type
+ bool IsStatic = false;
+ // If the kernel-doc comment is placed around the body,
+ // then it's included in the body range.
+ LineRange Body;
+ // Location of the kernel-doc comment.
+ LineRange Comment;
+};
+
+inline void print(JSONPrinter& Printer, const LineRange& V) {
+ JSONPrinter::Scope Scope(Printer);
+ Printer.Field("file", V.File);
+ Printer.Field("start_line", V.StartLine);
+ Printer.Field("end_line", V.EndLine, true);
+}
+
+inline void print(JSONPrinter& Printer, const Definition& V) {
+ JSONPrinter::Scope Scope(Printer);
+ Printer.Field("kind", V.Kind);
+ Printer.Field("name", V.Name);
+ Printer.Field("type", V.Type);
+ Printer.Field("is_static", V.IsStatic);
+ Printer.Field("body", V.Body);
+ Printer.Field("comment", V.Comment, true);
+}
+
+class Output {
+public:
+ void emit(Definition&& V) { Definitions.push_back(std::move(V)); }
+
+ void print() const {
+ JSONPrinter Printer;
+ Printer.Field("definitions", Definitions, true);
+ }
+
+private:
+ std::vector<Definition> Definitions;
+};
+
+#endif
diff --git a/tools/syz-codesearch/codesearch.go b/tools/syz-codesearch/codesearch.go
new file mode 100644
index 000000000..afd3840c7
--- /dev/null
+++ b/tools/syz-codesearch/codesearch.go
@@ -0,0 +1,66 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package main
+
+import (
+ "flag"
+ "fmt"
+ "os"
+
+ "github.com/google/syzkaller/pkg/clangtool"
+ "github.com/google/syzkaller/pkg/codesearch"
+ "github.com/google/syzkaller/pkg/tool"
+)
+
+func main() {
+ var (
+ flagDatabase = flag.String("database", "", "path to input/output database file (mandatory)")
+ flagKernelSrc = flag.String("kernel-src", "", "path to kernel source directory (mandatory)")
+ flagKernelObj = flag.String("kernel-obj", "", "path to kernel build directory (mandatory)")
+ )
+ flag.Parse()
+ if len(flag.Args()) == 0 || *flagDatabase == "" || *flagKernelSrc == "" || *flagKernelObj == "" {
+ printUsageAndExit()
+ }
+ cmd, args := flag.Args()[0], flag.Args()[1:]
+ if cmd == "index" {
+ if len(args) != 1 {
+ printUsageAndExit()
+ }
+ cfg := &clangtool.Config{
+ ToolBin: args[0],
+ KernelSrc: *flagKernelSrc,
+ KernelObj: *flagKernelObj,
+ CacheFile: *flagDatabase,
+ DebugTrace: os.Stderr,
+ }
+
+ if _, err := clangtool.Run[codesearch.Database](cfg); err != nil {
+ tool.Fail(err)
+ }
+ return
+ }
+ index, err := codesearch.NewIndex(*flagDatabase, []string{*flagKernelSrc, *flagKernelObj})
+ if err != nil {
+ tool.Fail(err)
+ }
+ res, err := index.Command(cmd, args)
+ if err != nil {
+ tool.Fail(err)
+ }
+ os.Stdout.WriteString(res)
+}
+
+func printUsageAndExit() {
+ fmt.Printf(`syz-codesearch usage:
+syz-codesearch [flags] command [command arguments]
+commands and their arguments:
+`)
+ for _, cmd := range codesearch.Commands {
+ fmt.Printf(" - %v [%v args]\n", cmd.Name, cmd.NArgs)
+ }
+ fmt.Printf("\nflags:\n")
+ flag.PrintDefaults()
+ os.Exit(1)
+}