diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2026-01-19 15:31:50 +0100 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2026-01-21 13:38:45 +0000 |
| commit | d720635adb8965149cd75a3da692d3a0480c36c9 (patch) | |
| tree | d9896eabd70abd266028624702bc74609ba1c9e4 /pkg/codesearch | |
| parent | 6984f21fcb77bdd034a489c0e552aa1d910e852f (diff) | |
pkg/codesearch: support searching for references
Extend codesearch clang tool to export info about function references
(calls, takes-address-of).
Add pkg/codesearch command find-references.
Export find-references in pkg/aflow/tools/codesearcher to LLMs.
Update #6469
Diffstat (limited to 'pkg/codesearch')
22 files changed, 482 insertions, 28 deletions
diff --git a/pkg/codesearch/codesearch.go b/pkg/codesearch/codesearch.go index 746984369..396df4f82 100644 --- a/pkg/codesearch/codesearch.go +++ b/pkg/codesearch/codesearch.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "slices" + "strconv" "strings" "syscall" @@ -80,6 +81,28 @@ var Commands = []Command{ } return fmt.Sprintf("%v %v is defined in %v:\n\n%v", info.Kind, args[1], info.File, info.Body), nil }}, + {"find-references", 5, func(index *Index, args []string) (string, error) { + contextLines, err := strconv.Atoi(args[3]) + if err != nil { + return "", fmt.Errorf("failed to parse number of context lines %q: %w", args[3], err) + } + outputLimit, err := strconv.Atoi(args[4]) + if err != nil { + return "", fmt.Errorf("failed to parse output limit %q: %w", args[4], err) + } + refs, totalCount, err := index.FindReferences(args[0], args[1], args[2], contextLines, outputLimit) + if err != nil { + return "", err + } + b := new(strings.Builder) + fmt.Fprintf(b, "%v has %v references:\n\n", args[1], totalCount) + for _, ref := range refs { + fmt.Fprintf(b, "%v %v %v it at %v:%v\n%v\n\n", + ref.ReferencingEntityKind, ref.ReferencingEntityName, ref.ReferenceKind, + ref.SourceFile, ref.SourceLine, ref.SourceSnippet) + } + return b.String(), nil + }}, } func IsSourceFile(file string) bool { @@ -225,6 +248,69 @@ func (index *Index) definitionSource(contextFile, name string, comment, includeL }, nil } +type ReferenceInfo struct { + ReferencingEntityKind string `jsonschema:"Kind of the referencing entity (function, struct, etc)."` + ReferencingEntityName string `jsonschema:"Name of the referencing entity."` + ReferenceKind string `jsonschema:"Kind of the reference (calls, takes-address, reads, writes-to, etc)."` + SourceFile string `jsonschema:"Source file of the reference."` + SourceLine int `jsonschema:"Source line of the reference."` + SourceSnippet string `jsonschema:"Surrounding code snippet, if requested." json:",omitempty"` +} + +func (index *Index) FindReferences(contextFile, name, srcPrefix string, contextLines, outputLimit int) ( + []ReferenceInfo, int, error) { + target := index.findDefinition(contextFile, name) + if target == nil { + return nil, 0, aflow.BadCallError("requested entity does not exist") + } + if srcPrefix != "" { + srcPrefix = filepath.Clean(srcPrefix) + } + totalCount := 0 + var results []ReferenceInfo + for _, def := range index.db.Definitions { + if !strings.HasPrefix(def.Body.File, srcPrefix) { + continue + } + for _, ref := range def.Refs { + // TODO: this mis-handles the following case: + // the target is a non-static 'foo' in some file, + // the reference is in another file and refers to a static 'foo' + // defined in that file (which is not the target 'foo'). + if ref.EntityKind != target.Kind || ref.Name != target.Name || + target.IsStatic && target.Body.File != def.Body.File { + continue + } + totalCount++ + if totalCount > outputLimit { + continue + } + snippet := "" + if contextLines > 0 { + lines := LineRange{ + File: def.Body.File, + StartLine: max(def.Body.StartLine, ref.Line-contextLines), + EndLine: min(def.Body.EndLine, ref.Line+contextLines), + } + var err error + snippet, err = index.formatSource(lines, true) + if err != nil { + return nil, 0, err + } + } + results = append(results, ReferenceInfo{ + ReferencingEntityKind: def.Kind, + ReferencingEntityName: def.Name, + ReferenceKind: ref.Kind, + SourceFile: def.Body.File, + SourceLine: ref.Line, + SourceSnippet: snippet, + }) + } + } + return results, totalCount, nil +} + func (index *Index) findDefinition(contextFile, name string) *Definition { var weakMatch *Definition for _, def := range index.db.Definitions { @@ -269,7 +355,7 @@ func formatSourceFile(file string, start, end int, includeLines bool) (string, e b := new(strings.Builder) for line := start; line <= end; line++ { if includeLines { - fmt.Fprintf(b, "%4v:\t%s\n", line, lines[line]) + fmt.Fprintf(b, "%4v:\t%s\n", line+1, lines[line]) } else { fmt.Fprintf(b, "%s\n", lines[line]) } diff --git a/pkg/codesearch/codesearch_test.go b/pkg/codesearch/codesearch_test.go index 1f353c804..85dc90e00 100644 --- a/pkg/codesearch/codesearch_test.go +++ b/pkg/codesearch/codesearch_test.go @@ -47,16 +47,24 @@ func testCommand(t *testing.T, index *Index, covered map[string]bool, file strin t.Fatal(err) } query, _, _ := bytes.Cut(data, []byte{'\n'}) - args := strings.Fields(string(query)) - if len(args) == 0 { + fields := strings.Fields(string(query)) + if len(fields) == 0 { t.Fatal("no command found") } - result, err := index.Command(args[0], args[1:]) + cmd := fields[0] + var args []string + for _, arg := range fields[1:] { + if arg == `""` { + arg = "" + } + args = append(args, arg) + } + result, err := index.Command(cmd, args) if err != nil { // This is supposed to test aflow.BadCallError messages. result = err.Error() + "\n" } - got := append([]byte(strings.Join(args, " ")+"\n\n"), result...) + got := append([]byte(strings.Join(fields, " ")+"\n\n"), result...) tooltest.CompareGoldenData(t, file, got) - covered[args[0]] = true + covered[cmd] = true } diff --git a/pkg/codesearch/database.go b/pkg/codesearch/database.go index 4757935e9..499fc73cb 100644 --- a/pkg/codesearch/database.go +++ b/pkg/codesearch/database.go @@ -14,12 +14,20 @@ type Database struct { } type Definition struct { - Kind string `json:"kind,omitempty"` - Name string `json:"name,omitempty"` - Type string `json:"type,omitempty"` - IsStatic bool `json:"is_static,omitempty"` - Body LineRange `json:"body,omitempty"` - Comment LineRange `json:"comment,omitempty"` + Kind string `json:"kind,omitempty"` + Name string `json:"name,omitempty"` + Type string `json:"type,omitempty"` + IsStatic bool `json:"is_static,omitempty"` + Body LineRange `json:"body,omitempty"` + Comment LineRange `json:"comment,omitempty"` + Refs []Reference `json:"refs,omitempty"` +} + +type Reference struct { + Kind string `json:"kind,omitempty"` + EntityKind string `json:"entity_kind,omitempty"` + Name string `json:"name,omitempty"` + Line int `json:"line,omitempty"` } type LineRange struct { diff --git a/pkg/codesearch/testdata/mm/refs.c b/pkg/codesearch/testdata/mm/refs.c new file mode 100644 index 000000000..709871ab7 --- /dev/null +++ b/pkg/codesearch/testdata/mm/refs.c @@ -0,0 +1,6 @@ +int refs2(); + +void ref_in_mm() +{ + refs2(); +} diff --git a/pkg/codesearch/testdata/mm/refs.c.json b/pkg/codesearch/testdata/mm/refs.c.json new file mode 100644 index 000000000..09ac87f98 --- /dev/null +++ b/pkg/codesearch/testdata/mm/refs.c.json @@ -0,0 +1,23 @@ +{ + "definitions": [ + { + "kind": "function", + "name": "ref_in_mm", + "type": "void ()", + "body": { + "file": "mm/refs.c", + "start_line": 3, + "end_line": 6 + }, + "comment": {}, + "refs": [ + { + "kind": "calls", + "entity_kind": "function", + "name": "refs2", + "line": 5 + } + ] + } + ] +}
\ No newline at end of file diff --git a/pkg/codesearch/testdata/mm/slub.c.json b/pkg/codesearch/testdata/mm/slub.c.json new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/pkg/codesearch/testdata/mm/slub.c.json @@ -0,0 +1 @@ +{}
\ No newline at end of file diff --git a/pkg/codesearch/testdata/query-def-source-header b/pkg/codesearch/testdata/query-def-source-header index fd3ba300b..9e221f57b 100644 --- a/pkg/codesearch/testdata/query-def-source-header +++ b/pkg/codesearch/testdata/query-def-source-header @@ -2,7 +2,7 @@ def-source source0.c function_with_comment_in_header yes function function_with_comment_in_header is defined in source0.c: - 18: void function_with_comment_in_header() - 19: { - 20: same_name_in_several_files(); - 21: } + 19: void function_with_comment_in_header() + 20: { + 21: same_name_in_several_files(); + 22: } diff --git a/pkg/codesearch/testdata/query-def-source-open b/pkg/codesearch/testdata/query-def-source-open index bdcec72fd..c8c11b8ef 100644 --- a/pkg/codesearch/testdata/query-def-source-open +++ b/pkg/codesearch/testdata/query-def-source-open @@ -2,10 +2,10 @@ def-source source0.c open yes function open is defined in source0.c: - 5: /* - 6: * Comment about open. - 7: */ - 8: int open() - 9: { - 10: return 0; - 11: } + 6: /* + 7: * Comment about open. + 8: */ + 9: int open() + 10: { + 11: return 0; + 12: } diff --git a/pkg/codesearch/testdata/query-def-source-same-name-static b/pkg/codesearch/testdata/query-def-source-same-name-static index 3d87c010c..635bd845c 100644 --- a/pkg/codesearch/testdata/query-def-source-same-name-static +++ b/pkg/codesearch/testdata/query-def-source-same-name-static @@ -2,7 +2,7 @@ def-source source1.c same_name_in_several_files yes function same_name_in_several_files is defined in source1.c: - 3: static void same_name_in_several_files() - 4: { - 5: // This is static version in source1.c. - 6: } + 4: static void same_name_in_several_files() + 5: { + 6: // This is static version in source1.c. + 7: } diff --git a/pkg/codesearch/testdata/query-dir-index-mm b/pkg/codesearch/testdata/query-dir-index-mm index 554270e4e..806ef7b58 100644 --- a/pkg/codesearch/testdata/query-dir-index-mm +++ b/pkg/codesearch/testdata/query-dir-index-mm @@ -3,5 +3,6 @@ dir-index /mm/ directory /mm/ subdirs: directory /mm/ files: + - refs.c - slub.c - slub.h diff --git a/pkg/codesearch/testdata/query-dir-index-root b/pkg/codesearch/testdata/query-dir-index-root index d14d37392..5e72b64c1 100644 --- a/pkg/codesearch/testdata/query-dir-index-root +++ b/pkg/codesearch/testdata/query-dir-index-root @@ -4,6 +4,7 @@ directory / subdirs: - mm directory / files: + - refs.c - source0.c - source0.h - source1.c diff --git a/pkg/codesearch/testdata/query-dir-index-root2 b/pkg/codesearch/testdata/query-dir-index-root2 index c82c79609..804bdce54 100644 --- a/pkg/codesearch/testdata/query-dir-index-root2 +++ b/pkg/codesearch/testdata/query-dir-index-root2 @@ -4,6 +4,7 @@ directory /mm/.. subdirs: - mm directory /mm/.. files: + - refs.c - source0.c - source0.h - source1.c diff --git a/pkg/codesearch/testdata/query-find-references-context-limit b/pkg/codesearch/testdata/query-find-references-context-limit new file mode 100644 index 000000000..5d230c28a --- /dev/null +++ b/pkg/codesearch/testdata/query-find-references-context-limit @@ -0,0 +1,42 @@ +find-references refs.c refs2 "" 10000 10 + +refs2 has 4 references: + +function long_func_with_ref calls it at refs.c:29 + 23: void long_func_with_ref() + 24: { + 25: refs0(); + 26: refs1(); + 27: refs0(); + 28: refs1(); + 29: refs2(refs1, refs0()); + 30: refs0(); + 31: refs1(); + 32: refs0(); + 33: refs1(); + 34: } + + +function ref_in_mm calls it at mm/refs.c:5 + 3: void ref_in_mm() + 4: { + 5: refs2(); + 6: } + + +function refs3 calls it at refs.c:19 + 17: void refs3() + 18: { + 19: refs2(refs1, refs0()); + 20: (void)refs2; + 21: } + + +function refs3 takes-address-of it at refs.c:20 + 17: void refs3() + 18: { + 19: refs2(refs1, refs0()); + 20: (void)refs2; + 21: } + + diff --git a/pkg/codesearch/testdata/query-find-references-missing b/pkg/codesearch/testdata/query-find-references-missing new file mode 100644 index 000000000..5c0d07e93 --- /dev/null +++ b/pkg/codesearch/testdata/query-find-references-missing @@ -0,0 +1,3 @@ +find-references source0.c something_that_does_not_exist "" 3 10 + +requested entity does not exist diff --git a/pkg/codesearch/testdata/query-find-references-multiple b/pkg/codesearch/testdata/query-find-references-multiple new file mode 100644 index 000000000..99602ec47 --- /dev/null +++ b/pkg/codesearch/testdata/query-find-references-multiple @@ -0,0 +1,28 @@ +find-references refs.c refs2 "" 1 10 + +refs2 has 4 references: + +function long_func_with_ref calls it at refs.c:29 + 28: refs1(); + 29: refs2(refs1, refs0()); + 30: refs0(); + + +function ref_in_mm calls it at mm/refs.c:5 + 4: { + 5: refs2(); + 6: } + + +function refs3 calls it at refs.c:19 + 18: { + 19: refs2(refs1, refs0()); + 20: (void)refs2; + + +function refs3 takes-address-of it at refs.c:20 + 19: refs2(refs1, refs0()); + 20: (void)refs2; + 21: } + + diff --git a/pkg/codesearch/testdata/query-find-references-no-context b/pkg/codesearch/testdata/query-find-references-no-context new file mode 100644 index 000000000..dc38cb875 --- /dev/null +++ b/pkg/codesearch/testdata/query-find-references-no-context @@ -0,0 +1,16 @@ +find-references refs.c refs2 "" 0 10 + +refs2 has 4 references: + +function long_func_with_ref calls it at refs.c:29 + + +function ref_in_mm calls it at mm/refs.c:5 + + +function refs3 calls it at refs.c:19 + + +function refs3 takes-address-of it at refs.c:20 + + diff --git a/pkg/codesearch/testdata/query-find-references-normal b/pkg/codesearch/testdata/query-find-references-normal new file mode 100644 index 000000000..1bc1a4be1 --- /dev/null +++ b/pkg/codesearch/testdata/query-find-references-normal @@ -0,0 +1,11 @@ +find-references source0.c same_name_in_several_files "" 3 10 + +same_name_in_several_files has 1 references: + +function function_with_comment_in_header calls it at source0.c:21 + 19: void function_with_comment_in_header() + 20: { + 21: same_name_in_several_files(); + 22: } + + diff --git a/pkg/codesearch/testdata/query-find-references-output-limit b/pkg/codesearch/testdata/query-find-references-output-limit new file mode 100644 index 000000000..aa81868fb --- /dev/null +++ b/pkg/codesearch/testdata/query-find-references-output-limit @@ -0,0 +1,10 @@ +find-references refs.c refs2 "" 1 1 + +refs2 has 4 references: + +function long_func_with_ref calls it at refs.c:29 + 28: refs1(); + 29: refs2(refs1, refs0()); + 30: refs0(); + + diff --git a/pkg/codesearch/testdata/query-find-references-path-limit b/pkg/codesearch/testdata/query-find-references-path-limit new file mode 100644 index 000000000..82a9cfa35 --- /dev/null +++ b/pkg/codesearch/testdata/query-find-references-path-limit @@ -0,0 +1,10 @@ +find-references refs.c refs2 mm 1 1 + +refs2 has 1 references: + +function ref_in_mm calls it at mm/refs.c:5 + 4: { + 5: refs2(); + 6: } + + diff --git a/pkg/codesearch/testdata/refs.c b/pkg/codesearch/testdata/refs.c new file mode 100644 index 000000000..9b6b2633f --- /dev/null +++ b/pkg/codesearch/testdata/refs.c @@ -0,0 +1,34 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +int refs0() +{ + return 0; +} + +void refs1() +{ +} + +void refs2(void (*)(), int) +{ +} + +void refs3() +{ + refs2(refs1, refs0()); + (void)refs2; +} + +void long_func_with_ref() +{ + refs0(); + refs1(); + refs0(); + refs1(); + refs2(refs1, refs0()); + refs0(); + refs1(); + refs0(); + refs1(); +} diff --git a/pkg/codesearch/testdata/refs.c.json b/pkg/codesearch/testdata/refs.c.json new file mode 100644 index 000000000..289ce7c30 --- /dev/null +++ b/pkg/codesearch/testdata/refs.c.json @@ -0,0 +1,157 @@ +{ + "definitions": [ + { + "kind": "function", + "name": "long_func_with_ref", + "type": "void ()", + "body": { + "file": "refs.c", + "start_line": 23, + "end_line": 34 + }, + "comment": {}, + "refs": [ + { + "kind": "calls", + "entity_kind": "function", + "name": "refs0", + "line": 25 + }, + { + "kind": "calls", + "entity_kind": "function", + "name": "refs1", + "line": 26 + }, + { + "kind": "calls", + "entity_kind": "function", + "name": "refs0", + "line": 27 + }, + { + "kind": "calls", + "entity_kind": "function", + "name": "refs1", + "line": 28 + }, + { + "kind": "calls", + "entity_kind": "function", + "name": "refs2", + "line": 29 + }, + { + "kind": "takes-address-of", + "entity_kind": "function", + "name": "refs1", + "line": 29 + }, + { + "kind": "calls", + "entity_kind": "function", + "name": "refs0", + "line": 29 + }, + { + "kind": "calls", + "entity_kind": "function", + "name": "refs0", + "line": 30 + }, + { + "kind": "calls", + "entity_kind": "function", + "name": "refs1", + "line": 31 + }, + { + "kind": "calls", + "entity_kind": "function", + "name": "refs0", + "line": 32 + }, + { + "kind": "calls", + "entity_kind": "function", + "name": "refs1", + "line": 33 + } + ] + }, + { + "kind": "function", + "name": "refs0", + "type": "int ()", + "body": { + "file": "refs.c", + "start_line": 4, + "end_line": 7 + }, + "comment": { + "file": "refs.c", + "start_line": 1, + "end_line": 2 + } + }, + { + "kind": "function", + "name": "refs1", + "type": "void ()", + "body": { + "file": "refs.c", + "start_line": 9, + "end_line": 11 + }, + "comment": {} + }, + { + "kind": "function", + "name": "refs2", + "type": "void (void (*)(), int)", + "body": { + "file": "refs.c", + "start_line": 13, + "end_line": 15 + }, + "comment": {} + }, + { + "kind": "function", + "name": "refs3", + "type": "void ()", + "body": { + "file": "refs.c", + "start_line": 17, + "end_line": 21 + }, + "comment": {}, + "refs": [ + { + "kind": "calls", + "entity_kind": "function", + "name": "refs2", + "line": 19 + }, + { + "kind": "takes-address-of", + "entity_kind": "function", + "name": "refs1", + "line": 19 + }, + { + "kind": "calls", + "entity_kind": "function", + "name": "refs0", + "line": 19 + }, + { + "kind": "takes-address-of", + "entity_kind": "function", + "name": "refs2", + "line": 20 + } + ] + } + ] +}
\ No newline at end of file diff --git a/pkg/codesearch/testdata/source0.c.json b/pkg/codesearch/testdata/source0.c.json index d33aa360c..5eea7aba9 100644 --- a/pkg/codesearch/testdata/source0.c.json +++ b/pkg/codesearch/testdata/source0.c.json @@ -20,7 +20,15 @@ "start_line": 19, "end_line": 22 }, - "comment": {} + "comment": {}, + "refs": [ + { + "kind": "calls", + "entity_kind": "function", + "name": "same_name_in_several_files", + "line": 21 + } + ] }, { "kind": "function", |
