From 3a4c5e2da302d43152f2e8b1362d8568c0d57e6e Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Wed, 18 Jan 2023 18:45:55 +0100 Subject: pkg/subsystem/linux: determine parent-child relations For that, extract a coincidence count matrix from a path coverage, then apply the following rule. Subsystem A is a child of B if both hold true: 1) More than 2/3 of paths that relate to A also relate to B. 2) B covers more directory tree entities than A. --- pkg/subsystem/linux/parents.go | 104 +++++++++++++++++++++++++++++++++ pkg/subsystem/linux/parents_test.go | 90 ++++++++++++++++++++++++++++ pkg/subsystem/linux/subsystems.go | 9 +++ pkg/subsystem/linux/subsystems_test.go | 54 +++++++++++++---- 4 files changed, 247 insertions(+), 10 deletions(-) create mode 100644 pkg/subsystem/linux/parents.go create mode 100644 pkg/subsystem/linux/parents_test.go (limited to 'pkg/subsystem/linux') diff --git a/pkg/subsystem/linux/parents.go b/pkg/subsystem/linux/parents.go new file mode 100644 index 000000000..40ba10b7f --- /dev/null +++ b/pkg/subsystem/linux/parents.go @@ -0,0 +1,104 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package linux + +import ( + "fmt" + + "github.com/google/syzkaller/pkg/subsystem/entity" + "github.com/google/syzkaller/pkg/subsystem/match" +) + +// SetParents attempts to determine the parent-child relations among the extracted subsystems. +// We assume A is a child of B if: +// 1) B covers more paths than A. +// 2) Most of the paths that relate to A also relate to B. +func SetParents(matrix *match.CoincidenceMatrix, list []*entity.Subsystem) error { + matrix.NonEmptyPairs(func(a, b *entity.Subsystem, count int) { + // Demand that > 2/3 paths are related. + if 3*count/matrix.Count(a) >= 2 && matrix.Count(a) < matrix.Count(b) { + a.Parents = append(a.Parents, b) + } + }) + // Just in case. + if loopsExist(list) { + return fmt.Errorf("there are loops in the parents relation") + } + transitiveReduction(list) + return nil +} + +// The algorithm runs in O(E * (E + V)). +// We expect that E is quite low here, so it should be fine. +func transitiveReduction(list []*entity.Subsystem) { + for _, s := range list { + removeParents := map[*entity.Subsystem]bool{} + for _, p := range s.Parents { + reachable := reachableParents(p) + for otherP := range reachable { + removeParents[otherP] = true + } + } + newParents := []*entity.Subsystem{} + for _, p := range s.Parents { + if !removeParents[p] { + newParents = append(newParents, p) + } + } + s.Parents = newParents + } +} + +// reachableParents lists all entity.Subsystem entries reachable from the given one. +func reachableParents(start *entity.Subsystem) map[*entity.Subsystem]bool { + ret := make(map[*entity.Subsystem]bool) + var dfs func(node *entity.Subsystem) + dfs = func(node *entity.Subsystem) { + for _, p := range node.Parents { + ret[p] = true + dfs(p) + } + } + dfs(start) + return ret +} + +// loopsExist is a helper method that verifies that the resulting graph has no loops. +func loopsExist(list []*entity.Subsystem) bool { + type graphNode struct { + obj *entity.Subsystem + entered bool + left bool + } + nodes := []*graphNode{} + objToNode := map[*entity.Subsystem]*graphNode{} + for _, obj := range list { + node := &graphNode{obj: obj} + nodes = append(nodes, node) + objToNode[obj] = node + } + var dfs func(*graphNode) bool + dfs = func(node *graphNode) bool { + if node.left { + return false + } + if node.entered { + // We've found a cycle. + return true + } + node.entered = true + anyLoop := false + for _, parent := range node.obj.Parents { + anyLoop = anyLoop || dfs(objToNode[parent]) + } + node.left = true + return anyLoop + } + for _, node := range nodes { + if dfs(node) { + return true + } + } + return false +} diff --git a/pkg/subsystem/linux/parents_test.go b/pkg/subsystem/linux/parents_test.go new file mode 100644 index 000000000..2ef6e2b07 --- /dev/null +++ b/pkg/subsystem/linux/parents_test.go @@ -0,0 +1,90 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package linux + +import ( + "testing" + "testing/fstest" + + "github.com/google/syzkaller/pkg/subsystem/entity" + "github.com/google/syzkaller/pkg/subsystem/match" + "github.com/stretchr/testify/assert" +) + +func TestLoopsDoExist(t *testing.T) { + a := &entity.Subsystem{} + b := &entity.Subsystem{Parents: []*entity.Subsystem{a}} + c := &entity.Subsystem{Parents: []*entity.Subsystem{b}} + a.Parents = []*entity.Subsystem{c} + assert.True(t, loopsExist([]*entity.Subsystem{a, b, c})) +} + +func TestLoopsDoNotExist(t *testing.T) { + a := &entity.Subsystem{} + b := &entity.Subsystem{Parents: []*entity.Subsystem{a}} + c := &entity.Subsystem{Parents: []*entity.Subsystem{b}} + assert.False(t, loopsExist([]*entity.Subsystem{a, b, c})) +} + +func TestTransitiveReduction(t *testing.T) { + // (d, c), (c, b), (b, a) + // (d, a) + // (d, b) + // (d, e) + // (c, a) + a := &entity.Subsystem{} + b := &entity.Subsystem{Parents: []*entity.Subsystem{a}} + c := &entity.Subsystem{Parents: []*entity.Subsystem{a, b}} + e := &entity.Subsystem{} + d := &entity.Subsystem{Parents: []*entity.Subsystem{a, b, c, e}} + transitiveReduction([]*entity.Subsystem{a, b, c, d, e}) + + // The result should be: + // (d, c), (c, b), (b, a) + // (d, e) + assert.ElementsMatch(t, d.Parents, []*entity.Subsystem{c, e}) + assert.ElementsMatch(t, c.Parents, []*entity.Subsystem{b}) +} + +func TestSetParents(t *testing.T) { + kernel := &entity.Subsystem{PathRules: []entity.PathRule{{ + IncludeRegexp: `.*`, + }}} + net := &entity.Subsystem{PathRules: []entity.PathRule{{ + IncludeRegexp: `^net/`, + }}} + wireless := &entity.Subsystem{PathRules: []entity.PathRule{{ + IncludeRegexp: `^net/wireless`, + }}} + drivers := &entity.Subsystem{PathRules: []entity.PathRule{{ + IncludeRegexp: `^drivers/`, + }}} + + tree := fstest.MapFS{ + "include/net/cfg80211.h": {}, + "net/socket.c": {}, + "net/nfc/core.c": {}, + "net/wireless/nl80211.c": {}, + "net/wireless/sysfs.c": {}, + "net/ipv4/arp.c": {}, + "drivers/usb/host/xhci.c": {}, + "drivers/android/binder.c": {}, + } + + matrix, err := match.BuildCoincidenceMatrix(tree, + []*entity.Subsystem{kernel, net, wireless, drivers}, nil) + assert.NoError(t, err) + + // Calculate parents. + err = SetParents(matrix, []*entity.Subsystem{kernel, net, wireless, drivers}) + if err != nil { + t.Fatal(err) + } + + // Verify parents. + assert.ElementsMatch(t, net.Parents, []*entity.Subsystem{kernel}) + assert.ElementsMatch(t, wireless.Parents, []*entity.Subsystem{net}) + assert.ElementsMatch(t, drivers.Parents, []*entity.Subsystem{kernel}) + assert.ElementsMatch(t, kernel.Parents, []*entity.Subsystem{}) +} diff --git a/pkg/subsystem/linux/subsystems.go b/pkg/subsystem/linux/subsystems.go index 8d8716bf9..abda295b4 100644 --- a/pkg/subsystem/linux/subsystems.go +++ b/pkg/subsystem/linux/subsystems.go @@ -10,6 +10,7 @@ import ( "sort" "github.com/google/syzkaller/pkg/subsystem/entity" + "github.com/google/syzkaller/pkg/subsystem/match" ) func ListFromRepo(repo string) ([]*entity.Subsystem, error) { @@ -86,6 +87,14 @@ func (ctx *linuxCtx) getSubsystems() ([]*entity.Subsystem, error) { return nil, fmt.Errorf("failed to set names: %w", err) } ctx.applyExtraRules(ret) + matrix, err := match.BuildCoincidenceMatrix(ctx.root, ret, nil) + if err != nil { + return nil, err + } + err = SetParents(matrix, ret) + if err != nil { + return nil, err + } return ret, nil } diff --git a/pkg/subsystem/linux/subsystems_test.go b/pkg/subsystem/linux/subsystems_test.go index 09d835e3f..52cf8540c 100644 --- a/pkg/subsystem/linux/subsystems_test.go +++ b/pkg/subsystem/linux/subsystems_test.go @@ -20,9 +20,11 @@ func TestGroupLinuxSubsystems(t *testing.T) { if err != nil { t.Fatal(err) } - // The regexps used for matching rules may change later, so let's not compare them here. for _, s := range subsystems { + // The regexps used for matching rules may change later, so let's not compare them here. s.PathRules = nil + // It complicates the test, so let's skip it here. + s.Parents = nil } expected := []*entity.Subsystem{ { @@ -36,8 +38,14 @@ func TestGroupLinuxSubsystems(t *testing.T) { Maintainers: []string{"email_ext4@email.com", "email_ext4_2@email.com"}, }, { - Name: "mm", - Lists: []string{"linux-mm@kvack.org"}, + Name: "mm", + Lists: []string{"linux-mm@kvack.org"}, + Maintainers: []string{"email_mm@email.com"}, + }, + { + Name: "tmpfs", + Lists: []string{"tmpfs@kvack.org"}, + Maintainers: []string{"email_shmem@email.com"}, }, { Name: "kernel", @@ -57,7 +65,8 @@ func TestCustomCallRules(t *testing.T) { t.Fatal(err) } expectCalls := map[string][]string{ - "ext4": []string{"syz_mount_image$ext4"}, + "ext4": {"syz_mount_image$ext4"}, + "tmpfs": {"syz_mount_image$tmpfs"}, } gotCalls := map[string][]string{} for _, s := range subsystems { @@ -106,9 +115,8 @@ func TestLinuxSubsystemPaths(t *testing.T) { list: []string{"kernel", "mm"}, }, { - // Shmem has no mailing list. path: `mm/shmem.c`, - list: []string{"kernel", "mm"}, + list: []string{"kernel", "mm", "tmpfs"}, }, { path: `include/net/ah.h`, @@ -133,6 +141,32 @@ func TestLinuxSubsystemPaths(t *testing.T) { } } +func TestLinuxSubsystemParents(t *testing.T) { + // For the list of subsystems, see TestLinuxSubsystemsList. + // Here we rely on the same ones. + repo := prepareTestLinuxRepo(t, []byte(testMaintainers)) + subsystems, err := listFromRepoInner(repo, nil) + if err != nil { + t.Fatal(err) + } + + expectParents := map[string][]string{ + "ext4": {"fs"}, + "mm": {"kernel"}, + "fs": {"kernel"}, + "tmpfs": {"mm"}, + "freevxfs": {"fs"}, + } + for _, s := range subsystems { + names := []string{} + for _, p := range s.Parents { + names = append(names, p.Name) + } + assert.ElementsMatch(t, names, expectParents[s.Name], + "wrong parents for %#v", s.Name) + } +} + func prepareTestLinuxRepo(t *testing.T, maintainers []byte) fs.FS { return fstest.MapFS{ `fs/ext4/fsync.c`: {}, @@ -152,9 +186,9 @@ func prepareTestLinuxRepo(t *testing.T, maintainers []byte) fs.FS { var ( testRules = &customRules{ subsystemCalls: map[string][]string{ - "ext4": []string{"syz_mount_image$ext4"}, - "vxfs": []string{"syz_mount_image$vxfs"}, - "tmpfs": []string{"syz_mount_image$tmpfs"}, + "ext4": {"syz_mount_image$ext4"}, + "vxfs": {"syz_mount_image$vxfs"}, + "tmpfs": {"syz_mount_image$tmpfs"}, }, } testMaintainers = ` @@ -212,7 +246,7 @@ F: tools/testing/selftests/vm/ TMPFS (SHMEM FILESYSTEM) M: Developer -L: linux-mm@kvack.org +L: tmpfs@kvack.org S: Maintained F: include/linux/shmem_fs.h F: mm/shmem.c -- cgit mrf-deployment