aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/aflow/tool/codesearcher/codesearcher.go
blob: c336a0ca4e274b8a4de92c2e9ee23d3da55ec54a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
// Copyright 2025 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

package codesearcher

import (
	"fmt"
	"path/filepath"

	"github.com/google/syzkaller/pkg/aflow"
	"github.com/google/syzkaller/pkg/clangtool"
	"github.com/google/syzkaller/pkg/codesearch"
	"github.com/google/syzkaller/pkg/hash"
)

var Tools = []aflow.Tool{
	aflow.NewFuncTool("codesearch-dir-index", dirIndex, `
Tool provides list of source files and subdirectories in the given directory in the source tree.
`),
	aflow.NewFuncTool("codesearch-file-index", fileIndex, `
Tool provides list of entities defined in the given source file.
Entity can be function, struct, or global variable.
Use it to understand what other things of interest exist in a file.
For example, to locate some initialization function that sets up invariants,
or to find a group of similar functions to later assess similarities/differences
in their implementations.
`),
	aflow.NewFuncTool("codesearch-definition-comment", definitionComment, `
Tool provides source code comment for an entity with the given name.
Entity can be function, struct, or global variable.
Use it to understand how an entity is supposed to be used.
For example, what a function does, or if it may be invoked with NULL pointer argument or not.
But an entity may not have a comment, in which case an empty comment is returned.
In such case, you may consider using codesearch-definition-source tool to look
at the full source code of the entity.
`),
	aflow.NewFuncTool("codesearch-definition-source", definitionSource, `
Tool provides full source code for an entity with the given name.
Entity can be function, struct, or global variable.
Use it to understand implementation details of an entity.
For example, how a function works, what precondition error checks it has, etc.
`),
}

// This action needs to run before any agents that use codesearch tools.
var PrepareIndex = aflow.NewFuncAction("codesearch-prepare", prepare)

type prepareArgs struct {
	KernelCommit      string
	KernelConfig      string
	KernelSrc         string
	KernelObj         string
	CodesearchToolBin string
}

type prepareResult struct {
	Index index
}

// nolint: lll
type dirIndexArgs struct {
	Dir string `jsonschema:"Relative directory in the source tree. Use an empty string for the root of the tree, or paths like 'net/ipv4/' for subdirs."`
}

type dirIndexResult struct {
	Missing bool     `jsonschema:"Set to true if the requested directory does not exist."`
	Subdirs []string `jsonschema:"List of direct subdirectories."`
	Files   []string `jsonschema:"List of source files."`
}

type fileIndexArgs struct {
	SourceFile string `jsonschema:"Source file path."`
}

type fileIndexResult struct {
	Missing  bool          `jsonschema:"Set to true if the file with the given name does not exist."`
	Entities []indexEntity `jsonschema:"List of entites defined in the file."`
}

type indexEntity struct {
	Kind string `jsonschema:"Kind of the entity: function, struct, variable."`
	Name string `jsonschema:"Name of the entity."`
}

// nolint: lll
type defCommentArgs struct {
	SourceFile string `jsonschema:"Source file path that references the entity. It helps to restrict scope of the search, if there are different definitions with the same name in different source files."`
	Name       string `jsonschema:"Name of the entity of interest."`
}

type defCommentResult struct {
	Missing bool   `jsonschema:"Set to true if the entity with the given name does not exist."`
	Kind    string `jsonschema:"Kind of the entity: function, struct, variable."`
	Comment string `jsonschema:"Source comment for the entity."`
}

// nolint: lll
type defSourceArgs struct {
	SourceFile   string `jsonschema:"Source file path that references the entity. It helps to restrict scope of the search, if there are different definitions with the same name in different source files."`
	Name         string `jsonschema:"Name of the entity of interest."`
	IncludeLines bool   `jsonschema:"Whether to include line numbers in the output or not. Line numbers may distract you, so ask for them only if you need to match lines elsewhere with the source code."`
}

// nolint: lll
type defSourceResult struct {
	Missing    bool   `jsonschema:"Set to true if the entity with the given name does not exist."`
	SourceFile string `jsonschema:"Source file path where the entity is defined."`
	SourceCode string `jsonschema:"Source code of the entity definition. It is prefixed with line numbers, so that they can be referenced in other tool invocations."`
}

// index prevents full JSON marshalling of the index contexts,
// so that they do not appear in logs/journal, and also ensures
// that the index does not pass JSON marshalling round-trip.
type index struct {
	*codesearch.Index
}

func (index) MarshalJSON() ([]byte, error) {
	return []byte(`"codesearch-index"`), nil
}

func (index) UnmarshalJSON([]byte) error {
	return fmt.Errorf("codesearch-index cannot be unmarshalled")
}

func prepare(ctx *aflow.Context, args prepareArgs) (prepareResult, error) {
	desc := fmt.Sprintf("kernel commit %v, config hash %v",
		args.KernelCommit, hash.String(args.KernelConfig))
	dir, err := ctx.Cache("codesearch", desc, func(dir string) error {
		cfg := &clangtool.Config{
			ToolBin:   args.CodesearchToolBin,
			KernelSrc: args.KernelSrc,
			KernelObj: args.KernelObj,
			CacheFile: filepath.Join(dir, "index.json"),
		}
		_, err := clangtool.Run[codesearch.Database](cfg)
		return err
	})
	if err != nil {
		return prepareResult{}, err
	}
	srcDirs := []string{args.KernelSrc, args.KernelObj}
	csIndex, err := codesearch.NewIndex(filepath.Join(dir, "index.json"), srcDirs)
	return prepareResult{index{csIndex}}, err
}

func dirIndex(ctx *aflow.Context, state prepareResult, args dirIndexArgs) (dirIndexResult, error) {
	ok, subdirs, files, err := state.Index.DirIndex(args.Dir)
	res := dirIndexResult{
		Missing: !ok,
		Subdirs: subdirs,
		Files:   files,
	}
	return res, err
}

func fileIndex(ctx *aflow.Context, state prepareResult, args fileIndexArgs) (fileIndexResult, error) {
	ok, entities, err := state.Index.FileIndex(args.SourceFile)
	res := fileIndexResult{
		Missing: !ok,
	}
	for _, ent := range entities {
		res.Entities = append(res.Entities, indexEntity{
			Kind: ent.Kind,
			Name: ent.Name,
		})
	}
	return res, err
}

func definitionComment(ctx *aflow.Context, state prepareResult, args defCommentArgs) (defCommentResult, error) {
	info, err := state.Index.DefinitionComment(args.SourceFile, args.Name)
	if err != nil || info == nil {
		return defCommentResult{
			Missing: info == nil,
		}, err
	}
	return defCommentResult{
		Kind:    info.Kind,
		Comment: info.Body,
	}, nil
}

func definitionSource(ctx *aflow.Context, state prepareResult, args defSourceArgs) (defSourceResult, error) {
	info, err := state.Index.DefinitionSource(args.SourceFile, args.Name, args.IncludeLines)
	if err != nil || info == nil {
		return defSourceResult{
			Missing: info == nil,
		}, err
	}
	return defSourceResult{
		SourceFile: info.File,
		SourceCode: info.Body,
	}, nil
}