diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2023-04-05 18:53:43 +0200 |
|---|---|---|
| committer | Aleksandr Nogikh <wp32pw@gmail.com> | 2023-04-06 10:14:04 +0200 |
| commit | 0870752005424bd8737bdea302071167f4f0026d (patch) | |
| tree | a2e2b5837696437f05fc1b1fad10f6729b190e8c /pkg/subsystem | |
| parent | e962f59f57afaccd1d28bfdbb2b91d0110acd285 (diff) | |
pkg/subsystem: rewrite the inference logic
Let's just accept that we cannot fully trust guilty paths and try to
increase the weight of subsystems extracted from reproducers.
Instead of taking all subsystems that have received the highest number
of votes, take all which have received >= 33%. This will reduce noise
and in almost all cases limit the number of assigned subsystems to 2.
If there are >= 3 reproducers that point to exactly the same set of
subsystems, give them a preference. But still take one subsystem from
guilty paths if there's one that's mentioned >= 66% times.
The numbers themselves are somewhat arbitrary, but hopefully this will
improve the quality of subsystem inference.
Add some more tests.
Diffstat (limited to 'pkg/subsystem')
| -rw-r--r-- | pkg/subsystem/extractor.go | 50 | ||||
| -rw-r--r-- | pkg/subsystem/lists/linux_test.go | 74 |
2 files changed, 108 insertions, 16 deletions
diff --git a/pkg/subsystem/extractor.go b/pkg/subsystem/extractor.go index 9794642d3..fe1c720aa 100644 --- a/pkg/subsystem/extractor.go +++ b/pkg/subsystem/extractor.go @@ -3,6 +3,8 @@ package subsystem +import "math" + // Extractor deduces the subsystems from the list of crashes. type Extractor struct { raw rawExtractorInterface @@ -37,6 +39,11 @@ func (e *Extractor) Extract(crashes []*Crash) []*Subsystem { reproCount++ } } + subsystems = removeParents(subsystems) + counts := make(map[*Subsystem]int) + for _, entry := range subsystems { + counts[entry]++ + } // If all reproducers hint at the same subsystem, take it as well. reproCounts := map[*Subsystem]int{} @@ -54,15 +61,14 @@ func (e *Extractor) Extract(crashes []*Crash) []*Subsystem { } // It can be the case that guilty paths point to several subsystems, but the reproducer // can clearly point to one of them. + // Let's consider it to be the strongest singal. if len(fromRepro) > 0 { - // If we do not exclude parents, there'll always be a root subsystem in there - // and, as a result, subsystems reproducers will always replace all other ones. - withoutParents := removeParents(subsystems) + fromRepro = removeParents(fromRepro) newSubsystems := []*Subsystem{} for _, reproSubsystem := range fromRepro { parents := reproSubsystem.ReachableParents() parents[reproSubsystem] = struct{}{} // also include the subsystem itself - for _, subsystem := range withoutParents { + for _, subsystem := range subsystems { if _, ok := parents[subsystem]; ok { newSubsystems = append(newSubsystems, reproSubsystem) break @@ -70,24 +76,38 @@ func (e *Extractor) Extract(crashes []*Crash) []*Subsystem { } } if len(newSubsystems) > 0 { - subsystems = newSubsystems + // Just pick those subsystems. + return newSubsystems } - } - // And calculate counts. - counts := make(map[*Subsystem]int) - maxCount := 0 - for _, entry := range removeParents(append(subsystems, fromRepro...)) { - counts[entry]++ - if counts[entry] > maxCount { - maxCount = counts[entry] + // If there are sufficiently many reproducers that point to subsystems other than + // those from guilty paths, there's a chance we just didn't parse report correctly. + const cutOff = 3 + if reproCount >= cutOff { + // But if the guilty paths are non-controversial, also take the leading candidate. + return append(fromRepro, mostVoted(counts, 0.66)...) } } - // Pick the most prevalent ones. + // Take subsystems from reproducers into account. + for _, entry := range fromRepro { + counts[entry] += reproCount + } + + // Let's pick all subsystems that received >= 33% of votes (thus no more than 3). + return removeParents(mostVoted(counts, 0.33)) +} + +// mostVoted picks subsystems that have received >= share votes. +func mostVoted(counts map[*Subsystem]int, share float64) []*Subsystem { + total := 0 + for _, count := range counts { + total += count + } + cutOff := int(math.Ceil(share * float64(total))) ret := []*Subsystem{} for entry, count := range counts { - if count < maxCount { + if count < cutOff { continue } ret = append(ret, entry) diff --git a/pkg/subsystem/lists/linux_test.go b/pkg/subsystem/lists/linux_test.go index 075a3d26d..57cbc8c22 100644 --- a/pkg/subsystem/lists/linux_test.go +++ b/pkg/subsystem/lists/linux_test.go @@ -95,7 +95,7 @@ syz_usb_connect(0x0, 0x24, &(0x7f0000000140)=ANY=[@ANYBLOB="12010000abbe6740e917 `), }, }, - expect: []string{"dri"}, + expect: []string{"dri", "usb"}, }, { name: "ntfs bug with one ntfs guilty path", @@ -131,6 +131,78 @@ syz_mount_image$ntfs3(&(0x7f000001f740), &(0x7f000001f780)='./file0\x00', 0x0, & }, expect: []string{"ntfs3"}, }, + { + name: "many repros point to ntfs, all guilty files are wrong", + crashes: []*subsystem.Crash{ + { + GuiltyPath: `arch/arm64/kernel/stacktrace.c`, + }, + { + GuiltyPath: `arch/arm64/kernel/stacktrace.c`, + }, + { + GuiltyPath: `arch/arm64/kernel/stacktrace.c`, + }, + { + GuiltyPath: `arch/arm64/kernel/stacktrace.c`, + SyzRepro: []byte(` +syz_mount_image$ntfs3(&(0x7f000001f740), &(0x7f000001f780)='./file0\x00', 0x0, &(0x7f0000000200)=ANY=[@ANYBLOB="64697363==") +`), + }, + { + GuiltyPath: `arch/arm64/kernel/stacktrace.c`, + SyzRepro: []byte(` +syz_mount_image$ntfs3(&(0x7f000001f740), &(0x7f000001f780)='./file0\x00', 0x0, &(0x7f0000000200)=ANY=[@ANYBLOB="64697363==") +`), + }, + { + GuiltyPath: `arch/arm64/kernel/stacktrace.c`, + SyzRepro: []byte(` +syz_mount_image$ntfs3(&(0x7f000001f740), &(0x7f000001f780)='./file0\x00', 0x0, &(0x7f0000000200)=ANY=[@ANYBLOB="64697363==") +`), + }, + }, + // "arm" is just too omnipresent, we have to mention it. + expect: []string{"ntfs3", "arm"}, + }, + { + name: "guilty files point to media, many repros to usb", + crashes: []*subsystem.Crash{ + { + // Suppose, in one case report parsing failed. + GuiltyPath: `arch/arm64/kernel/stacktrace.c`, + }, + { + GuiltyPath: `drivers/media/mc/mc-entity.c`, + }, + { + GuiltyPath: `drivers/media/mc/mc-entity.c`, + }, + { + GuiltyPath: `drivers/media/mc/mc-entity.c`, + }, + { + GuiltyPath: `drivers/media/mc/mc-entity.c`, + SyzRepro: []byte(` +syz_usb_connect(0x0, 0x58, &(0x7f0000000100)=ANY=[@ANYBLOB="1201000036ee3808d30b55056"]) +`), + }, + { + GuiltyPath: `drivers/media/mc/mc-entity.c`, + SyzRepro: []byte(` +syz_usb_connect(0x0, 0x58, &(0x7f0000000100)=ANY=[@ANYBLOB="1201000036ee3808d30b55056"]) +`), + }, + { + GuiltyPath: `drivers/media/mc/mc-entity.c`, + SyzRepro: []byte(` +syz_usb_connect(0x0, 0x58, &(0x7f0000000100)=ANY=[@ANYBLOB="1201000036ee3808d30b55056"]) +`), + }, + }, + // "media" is picked because it's in >= 2/3 guilty paths. + expect: []string{"media", "usb"}, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { |
