1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
// Copyright 2025 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
package assessmenet
import (
"github.com/google/syzkaller/pkg/aflow"
"github.com/google/syzkaller/pkg/aflow/action/kernel"
"github.com/google/syzkaller/pkg/aflow/ai"
"github.com/google/syzkaller/pkg/aflow/tool/codesearcher"
)
type kcsanInputs struct {
CrashReport string
KernelRepo string
KernelCommit string
KernelConfig string
CodesearchToolBin string
}
type kcsanOutputs struct {
Confident bool
Benign bool
Explanation string
}
func init() {
aflow.Register[kcsanInputs, kcsanOutputs](
ai.WorkflowAssessmentKCSAN,
"assess if a KCSAN report is about a benign race that only needs annotations or not",
&aflow.Flow{
Root: &aflow.Pipeline{
Actions: []aflow.Action{
kernel.Checkout,
kernel.Build,
codesearcher.PrepareIndex,
&aflow.LLMAgent{
Name: "expert",
Reply: "Explanation",
Outputs: aflow.LLMOutputs[struct {
Confident bool `jsonschema:"If you are confident in the verdict of the analysis or not."`
Benign bool `jsonschema:"If the data race is benign or not."`
}](),
Temperature: 1,
Instruction: kcsanInstruction,
Prompt: kcsanPrompt,
Tools: codesearcher.Tools,
},
},
},
},
)
}
const kcsanInstruction = `
You are an experienced Linux kernel developer tasked with determining if the given kernel
data race is benign or not. The data race report is from KCSAN tool.
It contains 2 stack traces of the memory accesses that constitute a data race.
A "benign" data races are on a simple int/bool variable or similar field,
and the accesses are not supposed to be protected by any mutual exclusion primitives.
Common examples of such "benign" data races are accesses to various flags fields,
statistics counters, and similar. A "benign" data race does not lead to memory corruption/crash
with a conservative compiler that compiles memory accesses to primitive types
effectively as atomic.
A non-benign (or "harmful" data race) can lead to corruption/crash even with
a conservative compiler that compiles memory accesses to primitive types
effectively as atomic. A common example of a "harmful" data races is race on
a complex container (list/hashmap/etc), where accesses are supposed to be protected
by a mutual exclusion primitive.
In the final reply explain why you think the given data race is benign or is harmful.
Use the provided tools to confirm any assumptions, what variables/fields being accessed, etc.
In particular, don't make assumptions about the kernel source code,
use codesearch tools to read the actual source code.
`
const kcsanPrompt = `
The data race report is:
{{.CrashReport}}
`
|