1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
|
// Copyright 2025 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
package codesearcher
import (
"fmt"
"path/filepath"
"github.com/google/syzkaller/pkg/aflow"
"github.com/google/syzkaller/pkg/clangtool"
"github.com/google/syzkaller/pkg/codesearch"
"github.com/google/syzkaller/pkg/hash"
"github.com/google/syzkaller/tools/clang/codesearch"
)
var (
ToolDirIndex = aflow.NewFuncTool("codesearch-dir-index", dirIndex, `
Tool provides list of source files and subdirectories in the given directory in the source tree.
`)
ToolReadFile = aflow.NewFuncTool("read-file", readFile, `
Tool provides full contents of a single source file as is. Avoid using this tool if there are better
and more specialized tools for the job, because source files may be large and contain lots
of unrelated information.
`)
ToolFileIndex = aflow.NewFuncTool("codesearch-file-index", fileIndex, `
Tool provides list of entities defined in the given source file.
Entity can be function, struct, or global variable.
Use it to understand what other things of interest exist in a file.
For example, to locate some initialization function that sets up invariants,
or to find a group of similar functions to later assess similarities/differences
in their implementations.
`)
ToolDefinitionComment = aflow.NewFuncTool("codesearch-definition-comment", definitionComment, `
Tool provides source code comment for an entity with the given name.
Entity can be function, struct, or global variable.
Use it to understand how an entity is supposed to be used.
For example, what a function does, or if it may be invoked with NULL pointer argument or not.
But an entity may not have a comment, in which case an empty comment is returned.
In such case, you may consider using codesearch-definition-source tool to look
at the full source code of the entity.
`)
ToolDefinitionSource = aflow.NewFuncTool("codesearch-definition-source", definitionSource, `
Tool provides full source code for an entity with the given name.
Entity can be function, struct, or global variable.
Use it to understand implementation details of an entity.
For example, how a function works, what precondition error checks it has, etc.
`)
ToolFindReferences = aflow.NewFuncTool("codesearch-find-references", findReferences, `
Tool finds and lists all references to (uses of) the given entity.
Entity can be function, struct, or global variable.
If can be used to find all calls or other uses of the given function,
definition of the given struct/union/enum,
or all reads/writes of the given struct/union field.
To find field references use 'struct_name::field_name' syntax.
`)
ToolStructLayout = aflow.NewFuncTool("codesearch-struct-layout", structLayout, `
Tool provides layout of a struct/union (fields, offsets, sizes).
It can be used to understand the full memory layout of a struct,
or to find which field is located at a specific offset.
The response contains ALL fields of the struct. If you don't see
a field in the output, it is NOT present in the struct definition
(e.g. due to #ifdefs).
You can strictly trust the response to be complete and accurate.
`)
Tools = []aflow.Tool{ToolDirIndex, ToolReadFile, ToolFileIndex, ToolDefinitionComment,
ToolDefinitionSource, ToolFindReferences, ToolStructLayout}
)
// This action needs to run before any agents that use codesearch tools.
var PrepareIndex = aflow.NewFuncAction("codesearch-prepare", prepare)
type prepareArgs struct {
KernelCommit string
KernelConfig string
KernelSrc string
KernelObj string
}
type prepareResult struct {
Index index
}
// nolint: lll
type dirIndexArgs struct {
Dir string `jsonschema:"Relative directory in the source tree. Use an empty string for the root of the tree, or paths like 'net/ipv4/' for subdirs."`
}
type dirIndexResult struct {
Subdirs []string `jsonschema:"List of direct subdirectories."`
Files []string `jsonschema:"List of source files."`
}
type readFileArgs struct {
File string `jsonschema:"Source file path."`
}
type readFileResult struct {
Contents string `jsonschema:"File contents."`
}
type fileIndexArgs struct {
SourceFile string `jsonschema:"Source file path."`
}
type fileIndexResult struct {
Entities []indexEntity `jsonschema:"List of entites defined in the file."`
}
type indexEntity struct {
Kind string `jsonschema:"Kind of the entity: function, struct, variable."`
Name string `jsonschema:"Name of the entity."`
}
// nolint: lll
type defCommentArgs struct {
ContextFile string `jsonschema:"Source file path that references the entity. It helps to restrict scope of the search, if there are different definitions with the same name in different source files."`
Name string `jsonschema:"Name of the entity of interest."`
}
type defCommentResult struct {
Kind string `jsonschema:"Kind of the entity: function, struct, variable."`
Comment string `jsonschema:"Source comment for the entity."`
}
// nolint: lll
type defSourceArgs struct {
ContextFile string `jsonschema:"Source file path that references the entity. It helps to restrict scope of the search, if there are different definitions with the same name in different source files."`
Name string `jsonschema:"Name of the entity of interest."`
IncludeLines bool `jsonschema:"Whether to include line numbers in the output or not. Line numbers may distract you, so ask for them only if you need to match lines elsewhere with the source code."`
}
// nolint: lll
type defSourceResult struct {
SourceFile string `jsonschema:"Source file path where the entity is defined."`
SourceCode string `jsonschema:"Source code of the entity definition. It is prefixed with line numbers, so that they can be referenced in other tool invocations."`
}
// index prevents full JSON marshalling of the index contexts,
// so that they do not appear in logs/journal, and also ensures
// that the index does not pass JSON marshalling round-trip.
type index struct {
*codesearch.Index
}
func (index) MarshalJSON() ([]byte, error) {
return []byte(`"codesearch-index"`), nil
}
func (index) UnmarshalJSON([]byte) error {
return fmt.Errorf("codesearch-index cannot be unmarshalled")
}
func prepare(ctx *aflow.Context, args prepareArgs) (prepareResult, error) {
desc := fmt.Sprintf("kernel commit %v, config hash %v, databash hash %v",
args.KernelCommit, hash.String(args.KernelConfig), codesearch.DatabaseFormatHash)
dir, err := ctx.Cache("codesearch", desc, func(dir string) error {
cfg := &clangtool.Config{
Tool: clangtoolimpl.Tool,
KernelSrc: args.KernelSrc,
KernelObj: args.KernelObj,
CacheFile: filepath.Join(dir, "index.json"),
}
_, err := clangtool.Run[codesearch.Database](cfg)
return err
})
if err != nil {
return prepareResult{}, err
}
srcDirs := []string{args.KernelSrc, args.KernelObj}
csIndex, err := codesearch.NewIndex(filepath.Join(dir, "index.json"), srcDirs)
return prepareResult{index{csIndex}}, err
}
func dirIndex(ctx *aflow.Context, state prepareResult, args dirIndexArgs) (dirIndexResult, error) {
subdirs, files, err := state.Index.DirIndex(args.Dir)
return dirIndexResult{
Subdirs: subdirs,
Files: files,
}, err
}
func readFile(ctx *aflow.Context, state prepareResult, args readFileArgs) (readFileResult, error) {
contents, err := state.Index.ReadFile(args.File)
return readFileResult{
Contents: contents,
}, err
}
func fileIndex(ctx *aflow.Context, state prepareResult, args fileIndexArgs) (fileIndexResult, error) {
entities, err := state.Index.FileIndex(args.SourceFile)
res := fileIndexResult{}
for _, ent := range entities {
res.Entities = append(res.Entities, indexEntity{
Kind: ent.Kind,
Name: ent.Name,
})
}
return res, err
}
func definitionComment(ctx *aflow.Context, state prepareResult, args defCommentArgs) (defCommentResult, error) {
info, err := state.Index.DefinitionComment(args.ContextFile, args.Name)
if err != nil {
return defCommentResult{}, err
}
return defCommentResult{
Kind: info.Kind,
Comment: info.Body,
}, nil
}
func definitionSource(ctx *aflow.Context, state prepareResult, args defSourceArgs) (defSourceResult, error) {
info, err := state.Index.DefinitionSource(args.ContextFile, args.Name, args.IncludeLines)
if err != nil {
return defSourceResult{}, err
}
return defSourceResult{
SourceFile: info.File,
SourceCode: info.Body,
}, nil
}
// nolint: lll
type findReferencesArgs struct {
ContextFile string `jsonschema:"Source file path that references the entity. It helps to restrict scope of the search, if there are different definitions with the same name in different source files." json:",omitempty"`
Name string `jsonschema:"Name of the entity of interest."`
SourceTreePrefix string `jsonschema:"Prefix of the source tree where to search for references. Can be used to restrict search to e.g. net/ipv4/. Pass an empty string to find all references." json:",omitempty"`
IncludeSnippetLines uint `jsonschema:"If set to non-0, output will include source code snippets with that many lines of context. If set to 0, no source snippets will be included. Snippets only show the referencing entity, so to see e.g. whole referencing functions pass a large value, e.g. 10000" json:",omitempty"`
}
// nolint: lll
type findReferencesResult struct {
TruncatedOutput bool `jsonschema:"Set if there were too many references, and the output is truncated. If you get truncated output, you may try to either request w/o source code snippets by passing IncludeSnippetLines=0 (which has higher limit on the number of output references), or restrict search to some prefix of the source tree with SourceTreePrefix argument."`
References []codesearch.ReferenceInfo `jsonschema:"List of requested references."`
}
func findReferences(ctx *aflow.Context, state prepareResult, args findReferencesArgs) (findReferencesResult, error) {
// TODO: consider limiting output based on the total number of lines in code snippets.
// In the end we care about total number of consumed tokens.
outputLimit := 20
if args.IncludeSnippetLines == 0 {
outputLimit = 1000
} else if args.IncludeSnippetLines < 10 {
outputLimit = 100
}
refs, totalCount, err := state.Index.FindReferences(
args.ContextFile, args.Name, args.SourceTreePrefix,
int(args.IncludeSnippetLines), outputLimit)
if err != nil {
return findReferencesResult{}, err
}
return findReferencesResult{
TruncatedOutput: totalCount > len(refs),
References: refs,
}, nil
}
// nolint: lll
type structLayoutArgs struct {
ContextFile string `jsonschema:"Source file path that references the entity. It helps to restrict scope of the search, if there are different definitions with the same name in different source files." json:",omitempty"`
Name string `jsonschema:"Name of the struct/union."`
FieldOffset *uint `jsonschema:"Byte offset to query. If set to null (or missing), the tool returns the whole struct layout. Otherwise, it returns only the field(s) overlapping with this byte." json:",omitempty"`
}
type structLayoutResult struct {
Fields []structLayoutField `jsonschema:"List of fields."`
}
type structLayoutField struct {
Name string `jsonschema:"Name of the field."`
OffsetBits uint64 `jsonschema:"Offset of the field in bits."`
SizeBits uint64 `jsonschema:"Size of the field in bits."`
}
func structLayout(ctx *aflow.Context, state prepareResult, args structLayoutArgs) (structLayoutResult, error) {
fields, err := state.Index.GetStructLayout(args.ContextFile, args.Name, args.FieldOffset)
if err != nil {
return structLayoutResult{}, err
}
res := structLayoutResult{}
for _, f := range fields {
res.Fields = append(res.Fields, structLayoutField{
Name: f.Name,
OffsetBits: f.OffsetBits,
SizeBits: f.SizeBits,
})
}
return res, nil
}
|