1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
// Copyright 2023 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
package subsystem
import "math"
// Extractor deduces the subsystems from the list of crashes.
type Extractor struct {
raw rawExtractorInterface
}
// Crash represents the subset of the available crash information that's required for
// subsystem inference.
type Crash struct {
GuiltyPath string
SyzRepro []byte
}
// rawExtractorInterface simplifies testing.
type rawExtractorInterface interface {
FromPath(path string) []*Subsystem
FromProg(progBytes []byte) []*Subsystem
}
func MakeExtractor(list []*Subsystem) *Extractor {
return &Extractor{raw: makeRawExtractor(list)}
}
func (e *Extractor) Extract(crashes []*Crash) []*Subsystem {
// First put all subsystems to the same list.
subsystems := []*Subsystem{}
reproCount := 0
for _, crash := range crashes {
if crash.GuiltyPath != "" {
subsystems = append(subsystems, e.raw.FromPath(crash.GuiltyPath)...)
}
if len(crash.SyzRepro) != 0 {
reproCount++
}
}
subsystems = removeParents(subsystems)
counts := make(map[*Subsystem]int)
for _, entry := range subsystems {
counts[entry]++
}
// If all reproducers hint at the same subsystem, take it as well.
reproCounts := map[*Subsystem]int{}
fromRepro := []*Subsystem{}
for _, crash := range crashes {
if len(crash.SyzRepro) == 0 {
continue
}
for _, subsystem := range e.raw.FromProg(crash.SyzRepro) {
reproCounts[subsystem]++
if reproCounts[subsystem] == reproCount {
fromRepro = append(fromRepro, subsystem)
}
}
}
// It can be the case that guilty paths point to several subsystems, but the reproducer
// can clearly point to one of them.
// Let's consider it to be the strongest singal.
if len(fromRepro) > 0 {
fromRepro = removeParents(fromRepro)
newSubsystems := []*Subsystem{}
for _, reproSubsystem := range fromRepro {
parents := reproSubsystem.ReachableParents()
parents[reproSubsystem] = struct{}{} // also include the subsystem itself
for _, subsystem := range subsystems {
if _, ok := parents[subsystem]; ok {
newSubsystems = append(newSubsystems, reproSubsystem)
break
}
}
}
if len(newSubsystems) > 0 {
// Just pick those subsystems.
return newSubsystems
}
// If there are sufficiently many reproducers that point to subsystems other than
// those from guilty paths, there's a chance we just didn't parse report correctly.
const cutOff = 3
if reproCount >= cutOff {
// But if the guilty paths are non-controversial, also take the leading candidate.
return append(fromRepro, mostVoted(counts, 0.66)...)
}
}
// Take subsystems from reproducers into account.
for _, entry := range fromRepro {
counts[entry] += reproCount
}
// Let's pick all subsystems that received >= 33% of votes (thus no more than 3).
return removeParents(mostVoted(counts, 0.33))
}
// mostVoted picks subsystems that have received >= share votes.
func mostVoted(counts map[*Subsystem]int, share float64) []*Subsystem {
total := 0
for _, count := range counts {
total += count
}
cutOff := int(math.Ceil(share * float64(total)))
ret := []*Subsystem{}
for entry, count := range counts {
if count < cutOff {
continue
}
ret = append(ret, entry)
}
return ret
}
func removeParents(subsystems []*Subsystem) []*Subsystem {
// If there are both parents and children, remove parents.
ignore := make(map[*Subsystem]struct{})
for _, entry := range subsystems {
for p := range entry.ReachableParents() {
ignore[p] = struct{}{}
}
}
var ret []*Subsystem
for _, entry := range subsystems {
if _, ok := ignore[entry]; ok {
continue
}
ret = append(ret, entry)
}
return ret
}
|