aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/subsystem
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2023-04-05 18:53:43 +0200
committerAleksandr Nogikh <wp32pw@gmail.com>2023-04-06 10:14:04 +0200
commit0870752005424bd8737bdea302071167f4f0026d (patch)
treea2e2b5837696437f05fc1b1fad10f6729b190e8c /pkg/subsystem
parente962f59f57afaccd1d28bfdbb2b91d0110acd285 (diff)
pkg/subsystem: rewrite the inference logic
Let's just accept that we cannot fully trust guilty paths and try to increase the weight of subsystems extracted from reproducers. Instead of taking all subsystems that have received the highest number of votes, take all which have received >= 33%. This will reduce noise and in almost all cases limit the number of assigned subsystems to 2. If there are >= 3 reproducers that point to exactly the same set of subsystems, give them a preference. But still take one subsystem from guilty paths if there's one that's mentioned >= 66% times. The numbers themselves are somewhat arbitrary, but hopefully this will improve the quality of subsystem inference. Add some more tests.
Diffstat (limited to 'pkg/subsystem')
-rw-r--r--pkg/subsystem/extractor.go50
-rw-r--r--pkg/subsystem/lists/linux_test.go74
2 files changed, 108 insertions, 16 deletions
diff --git a/pkg/subsystem/extractor.go b/pkg/subsystem/extractor.go
index 9794642d3..fe1c720aa 100644
--- a/pkg/subsystem/extractor.go
+++ b/pkg/subsystem/extractor.go
@@ -3,6 +3,8 @@
package subsystem
+import "math"
+
// Extractor deduces the subsystems from the list of crashes.
type Extractor struct {
raw rawExtractorInterface
@@ -37,6 +39,11 @@ func (e *Extractor) Extract(crashes []*Crash) []*Subsystem {
reproCount++
}
}
+ subsystems = removeParents(subsystems)
+ counts := make(map[*Subsystem]int)
+ for _, entry := range subsystems {
+ counts[entry]++
+ }
// If all reproducers hint at the same subsystem, take it as well.
reproCounts := map[*Subsystem]int{}
@@ -54,15 +61,14 @@ func (e *Extractor) Extract(crashes []*Crash) []*Subsystem {
}
// It can be the case that guilty paths point to several subsystems, but the reproducer
// can clearly point to one of them.
+ // Let's consider it to be the strongest singal.
if len(fromRepro) > 0 {
- // If we do not exclude parents, there'll always be a root subsystem in there
- // and, as a result, subsystems reproducers will always replace all other ones.
- withoutParents := removeParents(subsystems)
+ fromRepro = removeParents(fromRepro)
newSubsystems := []*Subsystem{}
for _, reproSubsystem := range fromRepro {
parents := reproSubsystem.ReachableParents()
parents[reproSubsystem] = struct{}{} // also include the subsystem itself
- for _, subsystem := range withoutParents {
+ for _, subsystem := range subsystems {
if _, ok := parents[subsystem]; ok {
newSubsystems = append(newSubsystems, reproSubsystem)
break
@@ -70,24 +76,38 @@ func (e *Extractor) Extract(crashes []*Crash) []*Subsystem {
}
}
if len(newSubsystems) > 0 {
- subsystems = newSubsystems
+ // Just pick those subsystems.
+ return newSubsystems
}
- }
- // And calculate counts.
- counts := make(map[*Subsystem]int)
- maxCount := 0
- for _, entry := range removeParents(append(subsystems, fromRepro...)) {
- counts[entry]++
- if counts[entry] > maxCount {
- maxCount = counts[entry]
+ // If there are sufficiently many reproducers that point to subsystems other than
+ // those from guilty paths, there's a chance we just didn't parse report correctly.
+ const cutOff = 3
+ if reproCount >= cutOff {
+ // But if the guilty paths are non-controversial, also take the leading candidate.
+ return append(fromRepro, mostVoted(counts, 0.66)...)
}
}
- // Pick the most prevalent ones.
+ // Take subsystems from reproducers into account.
+ for _, entry := range fromRepro {
+ counts[entry] += reproCount
+ }
+
+ // Let's pick all subsystems that received >= 33% of votes (thus no more than 3).
+ return removeParents(mostVoted(counts, 0.33))
+}
+
+// mostVoted picks subsystems that have received >= share votes.
+func mostVoted(counts map[*Subsystem]int, share float64) []*Subsystem {
+ total := 0
+ for _, count := range counts {
+ total += count
+ }
+ cutOff := int(math.Ceil(share * float64(total)))
ret := []*Subsystem{}
for entry, count := range counts {
- if count < maxCount {
+ if count < cutOff {
continue
}
ret = append(ret, entry)
diff --git a/pkg/subsystem/lists/linux_test.go b/pkg/subsystem/lists/linux_test.go
index 075a3d26d..57cbc8c22 100644
--- a/pkg/subsystem/lists/linux_test.go
+++ b/pkg/subsystem/lists/linux_test.go
@@ -95,7 +95,7 @@ syz_usb_connect(0x0, 0x24, &(0x7f0000000140)=ANY=[@ANYBLOB="12010000abbe6740e917
`),
},
},
- expect: []string{"dri"},
+ expect: []string{"dri", "usb"},
},
{
name: "ntfs bug with one ntfs guilty path",
@@ -131,6 +131,78 @@ syz_mount_image$ntfs3(&(0x7f000001f740), &(0x7f000001f780)='./file0\x00', 0x0, &
},
expect: []string{"ntfs3"},
},
+ {
+ name: "many repros point to ntfs, all guilty files are wrong",
+ crashes: []*subsystem.Crash{
+ {
+ GuiltyPath: `arch/arm64/kernel/stacktrace.c`,
+ },
+ {
+ GuiltyPath: `arch/arm64/kernel/stacktrace.c`,
+ },
+ {
+ GuiltyPath: `arch/arm64/kernel/stacktrace.c`,
+ },
+ {
+ GuiltyPath: `arch/arm64/kernel/stacktrace.c`,
+ SyzRepro: []byte(`
+syz_mount_image$ntfs3(&(0x7f000001f740), &(0x7f000001f780)='./file0\x00', 0x0, &(0x7f0000000200)=ANY=[@ANYBLOB="64697363==")
+`),
+ },
+ {
+ GuiltyPath: `arch/arm64/kernel/stacktrace.c`,
+ SyzRepro: []byte(`
+syz_mount_image$ntfs3(&(0x7f000001f740), &(0x7f000001f780)='./file0\x00', 0x0, &(0x7f0000000200)=ANY=[@ANYBLOB="64697363==")
+`),
+ },
+ {
+ GuiltyPath: `arch/arm64/kernel/stacktrace.c`,
+ SyzRepro: []byte(`
+syz_mount_image$ntfs3(&(0x7f000001f740), &(0x7f000001f780)='./file0\x00', 0x0, &(0x7f0000000200)=ANY=[@ANYBLOB="64697363==")
+`),
+ },
+ },
+ // "arm" is just too omnipresent, we have to mention it.
+ expect: []string{"ntfs3", "arm"},
+ },
+ {
+ name: "guilty files point to media, many repros to usb",
+ crashes: []*subsystem.Crash{
+ {
+ // Suppose, in one case report parsing failed.
+ GuiltyPath: `arch/arm64/kernel/stacktrace.c`,
+ },
+ {
+ GuiltyPath: `drivers/media/mc/mc-entity.c`,
+ },
+ {
+ GuiltyPath: `drivers/media/mc/mc-entity.c`,
+ },
+ {
+ GuiltyPath: `drivers/media/mc/mc-entity.c`,
+ },
+ {
+ GuiltyPath: `drivers/media/mc/mc-entity.c`,
+ SyzRepro: []byte(`
+syz_usb_connect(0x0, 0x58, &(0x7f0000000100)=ANY=[@ANYBLOB="1201000036ee3808d30b55056"])
+`),
+ },
+ {
+ GuiltyPath: `drivers/media/mc/mc-entity.c`,
+ SyzRepro: []byte(`
+syz_usb_connect(0x0, 0x58, &(0x7f0000000100)=ANY=[@ANYBLOB="1201000036ee3808d30b55056"])
+`),
+ },
+ {
+ GuiltyPath: `drivers/media/mc/mc-entity.c`,
+ SyzRepro: []byte(`
+syz_usb_connect(0x0, 0x58, &(0x7f0000000100)=ANY=[@ANYBLOB="1201000036ee3808d30b55056"])
+`),
+ },
+ },
+ // "media" is picked because it's in >= 2/3 guilty paths.
+ expect: []string{"media", "usb"},
+ },
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {