aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/subsystem/extractor.go
blob: 996560a31a930d5b53b92ad4fe335dd57d9321cb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
// Copyright 2023 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

package subsystem

// Extractor deduces the subsystems from the list of crashes.
type Extractor struct {
	raw rawExtractorInterface
}

// Crash represents the subset of the available crash information that's required for
// subsystem inference.
type Crash struct {
	GuiltyPath string
	SyzRepro   []byte
}

// rawExtractorInterface simplifies testing.
type rawExtractorInterface interface {
	FromPath(path string) []*Subsystem
	FromProg(progBytes []byte) []*Subsystem
}

func MakeExtractor(list []*Subsystem) *Extractor {
	return &Extractor{raw: makeRawExtractor(list)}
}

func (e *Extractor) Extract(crashes []*Crash) []*Subsystem {
	// First put all subsystems to the same list.
	subsystems := []*Subsystem{}
	reproCount := 0
	for _, crash := range crashes {
		if crash.GuiltyPath != "" {
			subsystems = append(subsystems, e.raw.FromPath(crash.GuiltyPath)...)
		}
		if len(crash.SyzRepro) != 0 {
			reproCount++
		}
	}

	// If all reproducers hint at the same subsystem, take it as well.
	reproCounts := map[*Subsystem]int{}
	fromRepro := []*Subsystem{}
	for _, crash := range crashes {
		if len(crash.SyzRepro) == 0 {
			continue
		}
		for _, subsystem := range e.raw.FromProg(crash.SyzRepro) {
			reproCounts[subsystem]++
			if reproCounts[subsystem] == reproCount {
				fromRepro = append(fromRepro, subsystem)
			}
		}
	}

	// It can be the case that guilty paths point to several subsystems, but the reproducer
	// can clearly point to one of them.
	if len(fromRepro) > 0 {
		// If we do not exclude parents, there'll always be a root subsystem in there
		// and, as a result, subsystems reproducers will always replace all other ones.
		withoutParents := removeParents(subsystems)
		newSubsystems := []*Subsystem{}
		for _, reproSubsystem := range fromRepro {
			parents := reproSubsystem.ReachableParents()
			for _, subsystem := range withoutParents {
				if _, ok := parents[subsystem]; ok {
					newSubsystems = append(newSubsystems, reproSubsystem)
					break
				}
			}
		}
		if len(newSubsystems) > 0 {
			subsystems = newSubsystems
		}
	}

	// And calculate counts.
	counts := make(map[*Subsystem]int)
	maxCount := 0
	for _, entry := range removeParents(append(subsystems, fromRepro...)) {
		counts[entry]++
		if counts[entry] > maxCount {
			maxCount = counts[entry]
		}
	}

	// Pick the most prevalent ones.
	ret := []*Subsystem{}
	for entry, count := range counts {
		if count < maxCount {
			continue
		}
		ret = append(ret, entry)
	}
	return ret
}

func removeParents(subsystems []*Subsystem) []*Subsystem {
	// If there are both parents and children, remove parents.
	ignore := make(map[*Subsystem]struct{})
	for _, entry := range subsystems {
		for p := range entry.ReachableParents() {
			ignore[p] = struct{}{}
		}
	}
	var ret []*Subsystem
	for _, entry := range subsystems {
		if _, ok := ignore[entry]; ok {
			continue
		}
		ret = append(ret, entry)
	}
	return ret
}