aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/codesearch
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2026-01-19 15:31:50 +0100
committerDmitry Vyukov <dvyukov@google.com>2026-01-21 13:38:45 +0000
commitd720635adb8965149cd75a3da692d3a0480c36c9 (patch)
treed9896eabd70abd266028624702bc74609ba1c9e4 /pkg/codesearch
parent6984f21fcb77bdd034a489c0e552aa1d910e852f (diff)
pkg/codesearch: support searching for references
Extend codesearch clang tool to export info about function references (calls, takes-address-of). Add pkg/codesearch command find-references. Export find-references in pkg/aflow/tools/codesearcher to LLMs. Update #6469
Diffstat (limited to 'pkg/codesearch')
-rw-r--r--pkg/codesearch/codesearch.go88
-rw-r--r--pkg/codesearch/codesearch_test.go18
-rw-r--r--pkg/codesearch/database.go20
-rw-r--r--pkg/codesearch/testdata/mm/refs.c6
-rw-r--r--pkg/codesearch/testdata/mm/refs.c.json23
-rw-r--r--pkg/codesearch/testdata/mm/slub.c.json1
-rw-r--r--pkg/codesearch/testdata/query-def-source-header8
-rw-r--r--pkg/codesearch/testdata/query-def-source-open14
-rw-r--r--pkg/codesearch/testdata/query-def-source-same-name-static8
-rw-r--r--pkg/codesearch/testdata/query-dir-index-mm1
-rw-r--r--pkg/codesearch/testdata/query-dir-index-root1
-rw-r--r--pkg/codesearch/testdata/query-dir-index-root21
-rw-r--r--pkg/codesearch/testdata/query-find-references-context-limit42
-rw-r--r--pkg/codesearch/testdata/query-find-references-missing3
-rw-r--r--pkg/codesearch/testdata/query-find-references-multiple28
-rw-r--r--pkg/codesearch/testdata/query-find-references-no-context16
-rw-r--r--pkg/codesearch/testdata/query-find-references-normal11
-rw-r--r--pkg/codesearch/testdata/query-find-references-output-limit10
-rw-r--r--pkg/codesearch/testdata/query-find-references-path-limit10
-rw-r--r--pkg/codesearch/testdata/refs.c34
-rw-r--r--pkg/codesearch/testdata/refs.c.json157
-rw-r--r--pkg/codesearch/testdata/source0.c.json10
22 files changed, 482 insertions, 28 deletions
diff --git a/pkg/codesearch/codesearch.go b/pkg/codesearch/codesearch.go
index 746984369..396df4f82 100644
--- a/pkg/codesearch/codesearch.go
+++ b/pkg/codesearch/codesearch.go
@@ -10,6 +10,7 @@ import (
"os"
"path/filepath"
"slices"
+ "strconv"
"strings"
"syscall"
@@ -80,6 +81,28 @@ var Commands = []Command{
}
return fmt.Sprintf("%v %v is defined in %v:\n\n%v", info.Kind, args[1], info.File, info.Body), nil
}},
+ {"find-references", 5, func(index *Index, args []string) (string, error) {
+ contextLines, err := strconv.Atoi(args[3])
+ if err != nil {
+ return "", fmt.Errorf("failed to parse number of context lines %q: %w", args[3], err)
+ }
+ outputLimit, err := strconv.Atoi(args[4])
+ if err != nil {
+ return "", fmt.Errorf("failed to parse output limit %q: %w", args[4], err)
+ }
+ refs, totalCount, err := index.FindReferences(args[0], args[1], args[2], contextLines, outputLimit)
+ if err != nil {
+ return "", err
+ }
+ b := new(strings.Builder)
+ fmt.Fprintf(b, "%v has %v references:\n\n", args[1], totalCount)
+ for _, ref := range refs {
+ fmt.Fprintf(b, "%v %v %v it at %v:%v\n%v\n\n",
+ ref.ReferencingEntityKind, ref.ReferencingEntityName, ref.ReferenceKind,
+ ref.SourceFile, ref.SourceLine, ref.SourceSnippet)
+ }
+ return b.String(), nil
+ }},
}
func IsSourceFile(file string) bool {
@@ -225,6 +248,69 @@ func (index *Index) definitionSource(contextFile, name string, comment, includeL
}, nil
}
+type ReferenceInfo struct {
+ ReferencingEntityKind string `jsonschema:"Kind of the referencing entity (function, struct, etc)."`
+ ReferencingEntityName string `jsonschema:"Name of the referencing entity."`
+ ReferenceKind string `jsonschema:"Kind of the reference (calls, takes-address, reads, writes-to, etc)."`
+ SourceFile string `jsonschema:"Source file of the reference."`
+ SourceLine int `jsonschema:"Source line of the reference."`
+ SourceSnippet string `jsonschema:"Surrounding code snippet, if requested." json:",omitempty"`
+}
+
+func (index *Index) FindReferences(contextFile, name, srcPrefix string, contextLines, outputLimit int) (
+ []ReferenceInfo, int, error) {
+ target := index.findDefinition(contextFile, name)
+ if target == nil {
+ return nil, 0, aflow.BadCallError("requested entity does not exist")
+ }
+ if srcPrefix != "" {
+ srcPrefix = filepath.Clean(srcPrefix)
+ }
+ totalCount := 0
+ var results []ReferenceInfo
+ for _, def := range index.db.Definitions {
+ if !strings.HasPrefix(def.Body.File, srcPrefix) {
+ continue
+ }
+ for _, ref := range def.Refs {
+ // TODO: this mis-handles the following case:
+ // the target is a non-static 'foo' in some file,
+ // the reference is in another file and refers to a static 'foo'
+ // defined in that file (which is not the target 'foo').
+ if ref.EntityKind != target.Kind || ref.Name != target.Name ||
+ target.IsStatic && target.Body.File != def.Body.File {
+ continue
+ }
+ totalCount++
+ if totalCount > outputLimit {
+ continue
+ }
+ snippet := ""
+ if contextLines > 0 {
+ lines := LineRange{
+ File: def.Body.File,
+ StartLine: max(def.Body.StartLine, ref.Line-contextLines),
+ EndLine: min(def.Body.EndLine, ref.Line+contextLines),
+ }
+ var err error
+ snippet, err = index.formatSource(lines, true)
+ if err != nil {
+ return nil, 0, err
+ }
+ }
+ results = append(results, ReferenceInfo{
+ ReferencingEntityKind: def.Kind,
+ ReferencingEntityName: def.Name,
+ ReferenceKind: ref.Kind,
+ SourceFile: def.Body.File,
+ SourceLine: ref.Line,
+ SourceSnippet: snippet,
+ })
+ }
+ }
+ return results, totalCount, nil
+}
+
func (index *Index) findDefinition(contextFile, name string) *Definition {
var weakMatch *Definition
for _, def := range index.db.Definitions {
@@ -269,7 +355,7 @@ func formatSourceFile(file string, start, end int, includeLines bool) (string, e
b := new(strings.Builder)
for line := start; line <= end; line++ {
if includeLines {
- fmt.Fprintf(b, "%4v:\t%s\n", line, lines[line])
+ fmt.Fprintf(b, "%4v:\t%s\n", line+1, lines[line])
} else {
fmt.Fprintf(b, "%s\n", lines[line])
}
diff --git a/pkg/codesearch/codesearch_test.go b/pkg/codesearch/codesearch_test.go
index 1f353c804..85dc90e00 100644
--- a/pkg/codesearch/codesearch_test.go
+++ b/pkg/codesearch/codesearch_test.go
@@ -47,16 +47,24 @@ func testCommand(t *testing.T, index *Index, covered map[string]bool, file strin
t.Fatal(err)
}
query, _, _ := bytes.Cut(data, []byte{'\n'})
- args := strings.Fields(string(query))
- if len(args) == 0 {
+ fields := strings.Fields(string(query))
+ if len(fields) == 0 {
t.Fatal("no command found")
}
- result, err := index.Command(args[0], args[1:])
+ cmd := fields[0]
+ var args []string
+ for _, arg := range fields[1:] {
+ if arg == `""` {
+ arg = ""
+ }
+ args = append(args, arg)
+ }
+ result, err := index.Command(cmd, args)
if err != nil {
// This is supposed to test aflow.BadCallError messages.
result = err.Error() + "\n"
}
- got := append([]byte(strings.Join(args, " ")+"\n\n"), result...)
+ got := append([]byte(strings.Join(fields, " ")+"\n\n"), result...)
tooltest.CompareGoldenData(t, file, got)
- covered[args[0]] = true
+ covered[cmd] = true
}
diff --git a/pkg/codesearch/database.go b/pkg/codesearch/database.go
index 4757935e9..499fc73cb 100644
--- a/pkg/codesearch/database.go
+++ b/pkg/codesearch/database.go
@@ -14,12 +14,20 @@ type Database struct {
}
type Definition struct {
- Kind string `json:"kind,omitempty"`
- Name string `json:"name,omitempty"`
- Type string `json:"type,omitempty"`
- IsStatic bool `json:"is_static,omitempty"`
- Body LineRange `json:"body,omitempty"`
- Comment LineRange `json:"comment,omitempty"`
+ Kind string `json:"kind,omitempty"`
+ Name string `json:"name,omitempty"`
+ Type string `json:"type,omitempty"`
+ IsStatic bool `json:"is_static,omitempty"`
+ Body LineRange `json:"body,omitempty"`
+ Comment LineRange `json:"comment,omitempty"`
+ Refs []Reference `json:"refs,omitempty"`
+}
+
+type Reference struct {
+ Kind string `json:"kind,omitempty"`
+ EntityKind string `json:"entity_kind,omitempty"`
+ Name string `json:"name,omitempty"`
+ Line int `json:"line,omitempty"`
}
type LineRange struct {
diff --git a/pkg/codesearch/testdata/mm/refs.c b/pkg/codesearch/testdata/mm/refs.c
new file mode 100644
index 000000000..709871ab7
--- /dev/null
+++ b/pkg/codesearch/testdata/mm/refs.c
@@ -0,0 +1,6 @@
+int refs2();
+
+void ref_in_mm()
+{
+ refs2();
+}
diff --git a/pkg/codesearch/testdata/mm/refs.c.json b/pkg/codesearch/testdata/mm/refs.c.json
new file mode 100644
index 000000000..09ac87f98
--- /dev/null
+++ b/pkg/codesearch/testdata/mm/refs.c.json
@@ -0,0 +1,23 @@
+{
+ "definitions": [
+ {
+ "kind": "function",
+ "name": "ref_in_mm",
+ "type": "void ()",
+ "body": {
+ "file": "mm/refs.c",
+ "start_line": 3,
+ "end_line": 6
+ },
+ "comment": {},
+ "refs": [
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "refs2",
+ "line": 5
+ }
+ ]
+ }
+ ]
+} \ No newline at end of file
diff --git a/pkg/codesearch/testdata/mm/slub.c.json b/pkg/codesearch/testdata/mm/slub.c.json
new file mode 100644
index 000000000..9e26dfeeb
--- /dev/null
+++ b/pkg/codesearch/testdata/mm/slub.c.json
@@ -0,0 +1 @@
+{} \ No newline at end of file
diff --git a/pkg/codesearch/testdata/query-def-source-header b/pkg/codesearch/testdata/query-def-source-header
index fd3ba300b..9e221f57b 100644
--- a/pkg/codesearch/testdata/query-def-source-header
+++ b/pkg/codesearch/testdata/query-def-source-header
@@ -2,7 +2,7 @@ def-source source0.c function_with_comment_in_header yes
function function_with_comment_in_header is defined in source0.c:
- 18: void function_with_comment_in_header()
- 19: {
- 20: same_name_in_several_files();
- 21: }
+ 19: void function_with_comment_in_header()
+ 20: {
+ 21: same_name_in_several_files();
+ 22: }
diff --git a/pkg/codesearch/testdata/query-def-source-open b/pkg/codesearch/testdata/query-def-source-open
index bdcec72fd..c8c11b8ef 100644
--- a/pkg/codesearch/testdata/query-def-source-open
+++ b/pkg/codesearch/testdata/query-def-source-open
@@ -2,10 +2,10 @@ def-source source0.c open yes
function open is defined in source0.c:
- 5: /*
- 6: * Comment about open.
- 7: */
- 8: int open()
- 9: {
- 10: return 0;
- 11: }
+ 6: /*
+ 7: * Comment about open.
+ 8: */
+ 9: int open()
+ 10: {
+ 11: return 0;
+ 12: }
diff --git a/pkg/codesearch/testdata/query-def-source-same-name-static b/pkg/codesearch/testdata/query-def-source-same-name-static
index 3d87c010c..635bd845c 100644
--- a/pkg/codesearch/testdata/query-def-source-same-name-static
+++ b/pkg/codesearch/testdata/query-def-source-same-name-static
@@ -2,7 +2,7 @@ def-source source1.c same_name_in_several_files yes
function same_name_in_several_files is defined in source1.c:
- 3: static void same_name_in_several_files()
- 4: {
- 5: // This is static version in source1.c.
- 6: }
+ 4: static void same_name_in_several_files()
+ 5: {
+ 6: // This is static version in source1.c.
+ 7: }
diff --git a/pkg/codesearch/testdata/query-dir-index-mm b/pkg/codesearch/testdata/query-dir-index-mm
index 554270e4e..806ef7b58 100644
--- a/pkg/codesearch/testdata/query-dir-index-mm
+++ b/pkg/codesearch/testdata/query-dir-index-mm
@@ -3,5 +3,6 @@ dir-index /mm/
directory /mm/ subdirs:
directory /mm/ files:
+ - refs.c
- slub.c
- slub.h
diff --git a/pkg/codesearch/testdata/query-dir-index-root b/pkg/codesearch/testdata/query-dir-index-root
index d14d37392..5e72b64c1 100644
--- a/pkg/codesearch/testdata/query-dir-index-root
+++ b/pkg/codesearch/testdata/query-dir-index-root
@@ -4,6 +4,7 @@ directory / subdirs:
- mm
directory / files:
+ - refs.c
- source0.c
- source0.h
- source1.c
diff --git a/pkg/codesearch/testdata/query-dir-index-root2 b/pkg/codesearch/testdata/query-dir-index-root2
index c82c79609..804bdce54 100644
--- a/pkg/codesearch/testdata/query-dir-index-root2
+++ b/pkg/codesearch/testdata/query-dir-index-root2
@@ -4,6 +4,7 @@ directory /mm/.. subdirs:
- mm
directory /mm/.. files:
+ - refs.c
- source0.c
- source0.h
- source1.c
diff --git a/pkg/codesearch/testdata/query-find-references-context-limit b/pkg/codesearch/testdata/query-find-references-context-limit
new file mode 100644
index 000000000..5d230c28a
--- /dev/null
+++ b/pkg/codesearch/testdata/query-find-references-context-limit
@@ -0,0 +1,42 @@
+find-references refs.c refs2 "" 10000 10
+
+refs2 has 4 references:
+
+function long_func_with_ref calls it at refs.c:29
+ 23: void long_func_with_ref()
+ 24: {
+ 25: refs0();
+ 26: refs1();
+ 27: refs0();
+ 28: refs1();
+ 29: refs2(refs1, refs0());
+ 30: refs0();
+ 31: refs1();
+ 32: refs0();
+ 33: refs1();
+ 34: }
+
+
+function ref_in_mm calls it at mm/refs.c:5
+ 3: void ref_in_mm()
+ 4: {
+ 5: refs2();
+ 6: }
+
+
+function refs3 calls it at refs.c:19
+ 17: void refs3()
+ 18: {
+ 19: refs2(refs1, refs0());
+ 20: (void)refs2;
+ 21: }
+
+
+function refs3 takes-address-of it at refs.c:20
+ 17: void refs3()
+ 18: {
+ 19: refs2(refs1, refs0());
+ 20: (void)refs2;
+ 21: }
+
+
diff --git a/pkg/codesearch/testdata/query-find-references-missing b/pkg/codesearch/testdata/query-find-references-missing
new file mode 100644
index 000000000..5c0d07e93
--- /dev/null
+++ b/pkg/codesearch/testdata/query-find-references-missing
@@ -0,0 +1,3 @@
+find-references source0.c something_that_does_not_exist "" 3 10
+
+requested entity does not exist
diff --git a/pkg/codesearch/testdata/query-find-references-multiple b/pkg/codesearch/testdata/query-find-references-multiple
new file mode 100644
index 000000000..99602ec47
--- /dev/null
+++ b/pkg/codesearch/testdata/query-find-references-multiple
@@ -0,0 +1,28 @@
+find-references refs.c refs2 "" 1 10
+
+refs2 has 4 references:
+
+function long_func_with_ref calls it at refs.c:29
+ 28: refs1();
+ 29: refs2(refs1, refs0());
+ 30: refs0();
+
+
+function ref_in_mm calls it at mm/refs.c:5
+ 4: {
+ 5: refs2();
+ 6: }
+
+
+function refs3 calls it at refs.c:19
+ 18: {
+ 19: refs2(refs1, refs0());
+ 20: (void)refs2;
+
+
+function refs3 takes-address-of it at refs.c:20
+ 19: refs2(refs1, refs0());
+ 20: (void)refs2;
+ 21: }
+
+
diff --git a/pkg/codesearch/testdata/query-find-references-no-context b/pkg/codesearch/testdata/query-find-references-no-context
new file mode 100644
index 000000000..dc38cb875
--- /dev/null
+++ b/pkg/codesearch/testdata/query-find-references-no-context
@@ -0,0 +1,16 @@
+find-references refs.c refs2 "" 0 10
+
+refs2 has 4 references:
+
+function long_func_with_ref calls it at refs.c:29
+
+
+function ref_in_mm calls it at mm/refs.c:5
+
+
+function refs3 calls it at refs.c:19
+
+
+function refs3 takes-address-of it at refs.c:20
+
+
diff --git a/pkg/codesearch/testdata/query-find-references-normal b/pkg/codesearch/testdata/query-find-references-normal
new file mode 100644
index 000000000..1bc1a4be1
--- /dev/null
+++ b/pkg/codesearch/testdata/query-find-references-normal
@@ -0,0 +1,11 @@
+find-references source0.c same_name_in_several_files "" 3 10
+
+same_name_in_several_files has 1 references:
+
+function function_with_comment_in_header calls it at source0.c:21
+ 19: void function_with_comment_in_header()
+ 20: {
+ 21: same_name_in_several_files();
+ 22: }
+
+
diff --git a/pkg/codesearch/testdata/query-find-references-output-limit b/pkg/codesearch/testdata/query-find-references-output-limit
new file mode 100644
index 000000000..aa81868fb
--- /dev/null
+++ b/pkg/codesearch/testdata/query-find-references-output-limit
@@ -0,0 +1,10 @@
+find-references refs.c refs2 "" 1 1
+
+refs2 has 4 references:
+
+function long_func_with_ref calls it at refs.c:29
+ 28: refs1();
+ 29: refs2(refs1, refs0());
+ 30: refs0();
+
+
diff --git a/pkg/codesearch/testdata/query-find-references-path-limit b/pkg/codesearch/testdata/query-find-references-path-limit
new file mode 100644
index 000000000..82a9cfa35
--- /dev/null
+++ b/pkg/codesearch/testdata/query-find-references-path-limit
@@ -0,0 +1,10 @@
+find-references refs.c refs2 mm 1 1
+
+refs2 has 1 references:
+
+function ref_in_mm calls it at mm/refs.c:5
+ 4: {
+ 5: refs2();
+ 6: }
+
+
diff --git a/pkg/codesearch/testdata/refs.c b/pkg/codesearch/testdata/refs.c
new file mode 100644
index 000000000..9b6b2633f
--- /dev/null
+++ b/pkg/codesearch/testdata/refs.c
@@ -0,0 +1,34 @@
+// Copyright 2025 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+int refs0()
+{
+ return 0;
+}
+
+void refs1()
+{
+}
+
+void refs2(void (*)(), int)
+{
+}
+
+void refs3()
+{
+ refs2(refs1, refs0());
+ (void)refs2;
+}
+
+void long_func_with_ref()
+{
+ refs0();
+ refs1();
+ refs0();
+ refs1();
+ refs2(refs1, refs0());
+ refs0();
+ refs1();
+ refs0();
+ refs1();
+}
diff --git a/pkg/codesearch/testdata/refs.c.json b/pkg/codesearch/testdata/refs.c.json
new file mode 100644
index 000000000..289ce7c30
--- /dev/null
+++ b/pkg/codesearch/testdata/refs.c.json
@@ -0,0 +1,157 @@
+{
+ "definitions": [
+ {
+ "kind": "function",
+ "name": "long_func_with_ref",
+ "type": "void ()",
+ "body": {
+ "file": "refs.c",
+ "start_line": 23,
+ "end_line": 34
+ },
+ "comment": {},
+ "refs": [
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "refs0",
+ "line": 25
+ },
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "refs1",
+ "line": 26
+ },
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "refs0",
+ "line": 27
+ },
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "refs1",
+ "line": 28
+ },
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "refs2",
+ "line": 29
+ },
+ {
+ "kind": "takes-address-of",
+ "entity_kind": "function",
+ "name": "refs1",
+ "line": 29
+ },
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "refs0",
+ "line": 29
+ },
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "refs0",
+ "line": 30
+ },
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "refs1",
+ "line": 31
+ },
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "refs0",
+ "line": 32
+ },
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "refs1",
+ "line": 33
+ }
+ ]
+ },
+ {
+ "kind": "function",
+ "name": "refs0",
+ "type": "int ()",
+ "body": {
+ "file": "refs.c",
+ "start_line": 4,
+ "end_line": 7
+ },
+ "comment": {
+ "file": "refs.c",
+ "start_line": 1,
+ "end_line": 2
+ }
+ },
+ {
+ "kind": "function",
+ "name": "refs1",
+ "type": "void ()",
+ "body": {
+ "file": "refs.c",
+ "start_line": 9,
+ "end_line": 11
+ },
+ "comment": {}
+ },
+ {
+ "kind": "function",
+ "name": "refs2",
+ "type": "void (void (*)(), int)",
+ "body": {
+ "file": "refs.c",
+ "start_line": 13,
+ "end_line": 15
+ },
+ "comment": {}
+ },
+ {
+ "kind": "function",
+ "name": "refs3",
+ "type": "void ()",
+ "body": {
+ "file": "refs.c",
+ "start_line": 17,
+ "end_line": 21
+ },
+ "comment": {},
+ "refs": [
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "refs2",
+ "line": 19
+ },
+ {
+ "kind": "takes-address-of",
+ "entity_kind": "function",
+ "name": "refs1",
+ "line": 19
+ },
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "refs0",
+ "line": 19
+ },
+ {
+ "kind": "takes-address-of",
+ "entity_kind": "function",
+ "name": "refs2",
+ "line": 20
+ }
+ ]
+ }
+ ]
+} \ No newline at end of file
diff --git a/pkg/codesearch/testdata/source0.c.json b/pkg/codesearch/testdata/source0.c.json
index d33aa360c..5eea7aba9 100644
--- a/pkg/codesearch/testdata/source0.c.json
+++ b/pkg/codesearch/testdata/source0.c.json
@@ -20,7 +20,15 @@
"start_line": 19,
"end_line": 22
},
- "comment": {}
+ "comment": {},
+ "refs": [
+ {
+ "kind": "calls",
+ "entity_kind": "function",
+ "name": "same_name_in_several_files",
+ "line": 21
+ }
+ ]
},
{
"kind": "function",