From 95871dcc45f6531b4c692ff892aad56bdd95e16f Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Fri, 10 Feb 2023 12:14:36 +0100 Subject: pkg/subsystem: restructure the package Remove the entity and match subpackages. Regenerate the linux.go file. --- pkg/subsystem/entities.go | 44 ++++++++++++++++ pkg/subsystem/entity/entities.go | 44 ---------------- pkg/subsystem/extractor.go | 20 +++----- pkg/subsystem/extractor_test.go | 31 ++++++------ pkg/subsystem/linux/coincidence.go | 55 ++++++++++++++++++++ pkg/subsystem/linux/coincidence_test.go | 40 +++++++++++++++ pkg/subsystem/linux/maintainers.go | 6 +-- pkg/subsystem/linux/maintainers_test.go | 7 ++- pkg/subsystem/linux/names.go | 4 +- pkg/subsystem/linux/names_test.go | 8 +-- pkg/subsystem/linux/parents.go | 35 ++++++------- pkg/subsystem/linux/parents_test.go | 69 +++++++++++++------------ pkg/subsystem/linux/path_coincidence.go | 68 +++++++++++++++++++++++++ pkg/subsystem/linux/path_coincidence_test.go | 49 ++++++++++++++++++ pkg/subsystem/linux/subsystems.go | 19 ++++--- pkg/subsystem/linux/subsystems_test.go | 7 ++- pkg/subsystem/list.go | 10 ++-- pkg/subsystem/lists/linux.go | 31 ++---------- pkg/subsystem/match.go | 73 +++++++++++++++++++++++++++ pkg/subsystem/match/coincidence.go | 55 -------------------- pkg/subsystem/match/coincidence_test.go | 40 --------------- pkg/subsystem/match/match.go | 75 ---------------------------- pkg/subsystem/match/match_test.go | 52 ------------------- pkg/subsystem/match/path_coincidence.go | 68 ------------------------- pkg/subsystem/match/path_coincidence_test.go | 49 ------------------ pkg/subsystem/match_test.go | 51 +++++++++++++++++++ pkg/subsystem/raw_extractor.go | 20 ++++---- pkg/subsystem/raw_extractor_test.go | 27 +++++----- 28 files changed, 506 insertions(+), 551 deletions(-) create mode 100644 pkg/subsystem/entities.go delete mode 100644 pkg/subsystem/entity/entities.go create mode 100644 pkg/subsystem/linux/coincidence.go create mode 100644 pkg/subsystem/linux/coincidence_test.go create mode 100644 pkg/subsystem/linux/path_coincidence.go create mode 100644 pkg/subsystem/linux/path_coincidence_test.go create mode 100644 pkg/subsystem/match.go delete mode 100644 pkg/subsystem/match/coincidence.go delete mode 100644 pkg/subsystem/match/coincidence_test.go delete mode 100644 pkg/subsystem/match/match.go delete mode 100644 pkg/subsystem/match/match_test.go delete mode 100644 pkg/subsystem/match/path_coincidence.go delete mode 100644 pkg/subsystem/match/path_coincidence_test.go create mode 100644 pkg/subsystem/match_test.go (limited to 'pkg') diff --git a/pkg/subsystem/entities.go b/pkg/subsystem/entities.go new file mode 100644 index 000000000..b74217bcd --- /dev/null +++ b/pkg/subsystem/entities.go @@ -0,0 +1,44 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package subsystem + +type Subsystem struct { + Name string + PathRules []PathRule + Syscalls []string + Lists []string + Maintainers []string + Parents []*Subsystem +} + +// ReachableParents returns the set of subsystems reachable from the current one. +func (subsystem *Subsystem) ReachableParents() map[*Subsystem]struct{} { + ret := make(map[*Subsystem]struct{}) + var dfs func(node *Subsystem) + dfs = func(node *Subsystem) { + if _, visited := ret[node]; visited { + return + } + for _, p := range node.Parents { + if p == subsystem { + panic("loop in the parents relation") + } + ret[p] = struct{}{} + dfs(p) + } + } + dfs(subsystem) + return ret +} + +// PathRule describes the part of the directory tree belonging to a single subsystem. +type PathRule struct { + IncludeRegexp string + // ExcludeRegexps are tested before IncludeRegexp. + ExcludeRegexp string +} + +func (pr *PathRule) IsEmpty() bool { + return pr.IncludeRegexp == "" && pr.ExcludeRegexp == "" +} diff --git a/pkg/subsystem/entity/entities.go b/pkg/subsystem/entity/entities.go deleted file mode 100644 index e2b29a7f3..000000000 --- a/pkg/subsystem/entity/entities.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2023 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package entity - -type Subsystem struct { - Name string - PathRules []PathRule - Syscalls []string - Lists []string - Maintainers []string - Parents []*Subsystem -} - -// ReachableParents returns the set of subsystems reachable from the current one. -func (subsystem *Subsystem) ReachableParents() map[*Subsystem]struct{} { - ret := make(map[*Subsystem]struct{}) - var dfs func(node *Subsystem) - dfs = func(node *Subsystem) { - if _, visited := ret[node]; visited { - return - } - for _, p := range node.Parents { - if p == subsystem { - panic("loop in the parents relation") - } - ret[p] = struct{}{} - dfs(p) - } - } - dfs(subsystem) - return ret -} - -// PathRule describes the part of the directory tree belonging to a single subsystem. -type PathRule struct { - IncludeRegexp string - // ExcludeRegexps are tested before IncludeRegexp. - ExcludeRegexp string -} - -func (pr *PathRule) IsEmpty() bool { - return pr.IncludeRegexp == "" && pr.ExcludeRegexp == "" -} diff --git a/pkg/subsystem/extractor.go b/pkg/subsystem/extractor.go index e54086b29..075c8079d 100644 --- a/pkg/subsystem/extractor.go +++ b/pkg/subsystem/extractor.go @@ -3,10 +3,6 @@ package subsystem -import ( - "github.com/google/syzkaller/pkg/subsystem/entity" -) - // Extractor deduces the subsystems from the list of crashes. type Extractor struct { raw rawExtractorInterface @@ -21,17 +17,17 @@ type Crash struct { // rawExtractorInterface simplifies testing. type rawExtractorInterface interface { - FromPath(path string) []*entity.Subsystem - FromProg(progBytes []byte) []*entity.Subsystem + FromPath(path string) []*Subsystem + FromProg(progBytes []byte) []*Subsystem } -func MakeExtractor(list []*entity.Subsystem) *Extractor { +func MakeExtractor(list []*Subsystem) *Extractor { return &Extractor{raw: makeRawExtractor(list)} } -func (e *Extractor) Extract(crashes []*Crash) []*entity.Subsystem { +func (e *Extractor) Extract(crashes []*Crash) []*Subsystem { // First put all subsystems to the same list. - subsystems := []*entity.Subsystem{} + subsystems := []*Subsystem{} for _, crash := range crashes { if crash.GuiltyPath != "" { subsystems = append(subsystems, e.raw.FromPath(crash.GuiltyPath)...) @@ -42,7 +38,7 @@ func (e *Extractor) Extract(crashes []*Crash) []*entity.Subsystem { } // If there are both parents and children, remove parents. - ignore := make(map[*entity.Subsystem]struct{}) + ignore := make(map[*Subsystem]struct{}) for _, entry := range subsystems { for p := range entry.ReachableParents() { ignore[p] = struct{}{} @@ -50,7 +46,7 @@ func (e *Extractor) Extract(crashes []*Crash) []*entity.Subsystem { } // And calculate counts. - counts := make(map[*entity.Subsystem]int) + counts := make(map[*Subsystem]int) maxCount := 0 for _, entry := range subsystems { if _, ok := ignore[entry]; ok { @@ -63,7 +59,7 @@ func (e *Extractor) Extract(crashes []*Crash) []*entity.Subsystem { } // Pick the most prevalent ones. - ret := []*entity.Subsystem{} + ret := []*Subsystem{} for entry, count := range counts { if count < maxCount { continue diff --git a/pkg/subsystem/extractor_test.go b/pkg/subsystem/extractor_test.go index 5dedd0b1a..c2d165973 100644 --- a/pkg/subsystem/extractor_test.go +++ b/pkg/subsystem/extractor_test.go @@ -7,7 +7,6 @@ import ( "reflect" "testing" - "github.com/google/syzkaller/pkg/subsystem/entity" "github.com/stretchr/testify/assert" ) @@ -15,14 +14,14 @@ func TestExtractor(t *testing.T) { // Objects used in tests. fsPath := "fs/" extProg, nfsProg := []byte("ext prog"), []byte("nfs prog") - fs := &entity.Subsystem{Name: "fs"} - ext := &entity.Subsystem{Name: "ext", Parents: []*entity.Subsystem{fs}} - nfs := &entity.Subsystem{Name: "nfs", Parents: []*entity.Subsystem{fs}} + fs := &Subsystem{Name: "fs"} + ext := &Subsystem{Name: "ext", Parents: []*Subsystem{fs}} + nfs := &Subsystem{Name: "nfs", Parents: []*Subsystem{fs}} // Tests themselves. tests := []struct { name string crashes []*Crash - want []*entity.Subsystem + want []*Subsystem }{ { name: `Make sure it works fine with just a single path`, @@ -31,7 +30,7 @@ func TestExtractor(t *testing.T) { GuiltyPath: fsPath, }, }, - want: []*entity.Subsystem{fs}, + want: []*Subsystem{fs}, }, { name: `Make sure a child shadows its parent`, @@ -44,7 +43,7 @@ func TestExtractor(t *testing.T) { SyzRepro: extProg, }, }, - want: []*entity.Subsystem{ext}, + want: []*Subsystem{ext}, }, { name: `Two equally present children`, @@ -61,7 +60,7 @@ func TestExtractor(t *testing.T) { SyzRepro: nfsProg, }, }, - want: []*entity.Subsystem{nfs, ext}, + want: []*Subsystem{nfs, ext}, }, { name: `One child is more present than another`, @@ -82,17 +81,17 @@ func TestExtractor(t *testing.T) { SyzRepro: extProg, }, }, - want: []*entity.Subsystem{ext}, + want: []*Subsystem{ext}, }, } extractor := &Extractor{ raw: &testRawExtractor{ - perPath: map[string][]*entity.Subsystem{ + perPath: map[string][]*Subsystem{ fsPath: {fs}, }, perProg: []progSubsystems{ - {extProg, []*entity.Subsystem{ext}}, - {nfsProg, []*entity.Subsystem{nfs}}, + {extProg, []*Subsystem{ext}}, + {nfsProg, []*Subsystem{nfs}}, }, }, } @@ -103,20 +102,20 @@ func TestExtractor(t *testing.T) { } type testRawExtractor struct { - perPath map[string][]*entity.Subsystem + perPath map[string][]*Subsystem perProg []progSubsystems } type progSubsystems struct { prog []byte - ret []*entity.Subsystem + ret []*Subsystem } -func (e *testRawExtractor) FromPath(path string) []*entity.Subsystem { +func (e *testRawExtractor) FromPath(path string) []*Subsystem { return e.perPath[path] } -func (e *testRawExtractor) FromProg(progBytes []byte) []*entity.Subsystem { +func (e *testRawExtractor) FromProg(progBytes []byte) []*Subsystem { for _, obj := range e.perProg { if reflect.DeepEqual(progBytes, obj.prog) { return obj.ret diff --git a/pkg/subsystem/linux/coincidence.go b/pkg/subsystem/linux/coincidence.go new file mode 100644 index 000000000..86ee0794f --- /dev/null +++ b/pkg/subsystem/linux/coincidence.go @@ -0,0 +1,55 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package linux + +import "github.com/google/syzkaller/pkg/subsystem" + +// CoincidenceMatrix represents a matrix that, for every pair of subsystems +// A and B, stores the number of times A and B have coincided in the input data. +// So far we only need it for subsystem.Subsystem, so its interface is tailored to it. +type CoincidenceMatrix struct { + matrix map[*subsystem.Subsystem]map[*subsystem.Subsystem]int +} + +func MakeCoincidenceMatrix() *CoincidenceMatrix { + return &CoincidenceMatrix{ + matrix: make(map[*subsystem.Subsystem]map[*subsystem.Subsystem]int), + } +} + +func (cm *CoincidenceMatrix) Record(items ...*subsystem.Subsystem) { + for i := 0; i < len(items); i++ { + for j := 0; j < len(items); j++ { + cm.inc(items[i], items[j]) + } + } +} + +func (cm *CoincidenceMatrix) Count(a *subsystem.Subsystem) int { + return cm.Get(a, a) +} + +func (cm *CoincidenceMatrix) Get(a, b *subsystem.Subsystem) int { + return cm.matrix[a][b] +} + +func (cm *CoincidenceMatrix) NonEmptyPairs(cb func(a, b *subsystem.Subsystem, val int)) { + for a, sub := range cm.matrix { + for b, val := range sub { + if a == b { + continue + } + cb(a, b, val) + } + } +} + +func (cm *CoincidenceMatrix) inc(a, b *subsystem.Subsystem) { + subMatrix, ok := cm.matrix[a] + if !ok { + subMatrix = make(map[*subsystem.Subsystem]int) + cm.matrix[a] = subMatrix + } + subMatrix[b]++ +} diff --git a/pkg/subsystem/linux/coincidence_test.go b/pkg/subsystem/linux/coincidence_test.go new file mode 100644 index 000000000..8aaf5ed0a --- /dev/null +++ b/pkg/subsystem/linux/coincidence_test.go @@ -0,0 +1,40 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package linux + +import ( + "testing" + + "github.com/google/syzkaller/pkg/subsystem" + "github.com/stretchr/testify/assert" +) + +func TestCoincidenceMatrix(t *testing.T) { + cm := MakeCoincidenceMatrix() + a, b, c := &subsystem.Subsystem{}, &subsystem.Subsystem{}, &subsystem.Subsystem{} + cm.Record(a, b) + cm.Record(b, c) + + // Test counts. + assert.Equal(t, cm.Count(a), 1) + assert.Equal(t, cm.Count(b), 2) + assert.Equal(t, cm.Count(c), 1) + + // Test pairwise counts. + assert.Equal(t, cm.Get(a, b), 1) + assert.Equal(t, cm.Get(b, c), 1) + assert.Equal(t, cm.Get(a, c), 0) + + // Test the iterator. + type pair struct { + a *subsystem.Subsystem + b *subsystem.Subsystem + } + expected := []pair{{a, b}, {b, a}, {b, c}, {c, b}} + got := []pair{} + cm.NonEmptyPairs(func(a, b *subsystem.Subsystem, _ int) { + got = append(got, pair{a, b}) + }) + assert.ElementsMatch(t, expected, got) +} diff --git a/pkg/subsystem/linux/maintainers.go b/pkg/subsystem/linux/maintainers.go index 7c60a840a..2f4d63003 100644 --- a/pkg/subsystem/linux/maintainers.go +++ b/pkg/subsystem/linux/maintainers.go @@ -13,7 +13,7 @@ import ( "strings" "unicode" - "github.com/google/syzkaller/pkg/subsystem/entity" + "github.com/google/syzkaller/pkg/subsystem" ) // maintainersRecord represents a single raw record in the MAINTAINERS file. @@ -146,7 +146,7 @@ func parseEmail(value string) (string, error) { return addr.Address, nil } -func (r maintainersRecord) ToPathRule() entity.PathRule { +func (r maintainersRecord) ToPathRule() subsystem.PathRule { inclRe := strings.Builder{} for i, wildcard := range r.includePatterns { if i > 0 { @@ -167,7 +167,7 @@ func (r maintainersRecord) ToPathRule() entity.PathRule { } wildcardToRegexp(wildcard, &exclRe) } - return entity.PathRule{ + return subsystem.PathRule{ IncludeRegexp: inclRe.String(), ExcludeRegexp: exclRe.String(), } diff --git a/pkg/subsystem/linux/maintainers_test.go b/pkg/subsystem/linux/maintainers_test.go index dc3299dbf..676bbcda1 100644 --- a/pkg/subsystem/linux/maintainers_test.go +++ b/pkg/subsystem/linux/maintainers_test.go @@ -8,8 +8,7 @@ import ( "testing" "github.com/google/go-cmp/cmp" - "github.com/google/syzkaller/pkg/subsystem/entity" - "github.com/google/syzkaller/pkg/subsystem/match" + "github.com/google/syzkaller/pkg/subsystem" ) func TestRecordToPathRule(t *testing.T) { @@ -127,8 +126,8 @@ func TestRecordToPathRule(t *testing.T) { for _, loopTest := range tests { test := loopTest t.Run(test.name, func(t *testing.T) { - pm := match.MakePathMatcher([]*entity.Subsystem{ - {PathRules: []entity.PathRule{test.record.ToPathRule()}}, + pm := subsystem.MakePathMatcher([]*subsystem.Subsystem{ + {PathRules: []subsystem.PathRule{test.record.ToPathRule()}}, }) for _, path := range test.match { if len(pm.Match(path)) != 1 { diff --git a/pkg/subsystem/linux/names.go b/pkg/subsystem/linux/names.go index be57a5ab3..3dcc2deed 100644 --- a/pkg/subsystem/linux/names.go +++ b/pkg/subsystem/linux/names.go @@ -7,12 +7,12 @@ import ( "fmt" "regexp" - "github.com/google/syzkaller/pkg/subsystem/entity" + "github.com/google/syzkaller/pkg/subsystem" ) // setSubsystemNames assigns unique names to the presented subsystems. // If it failed to assign a name to a subsystem, the Name field remains empty. -func setSubsystemNames(list []*entity.Subsystem) error { +func setSubsystemNames(list []*subsystem.Subsystem) error { dups := map[string]bool{} for _, item := range list { // For now, we can only infer name from the list email. diff --git a/pkg/subsystem/linux/names_test.go b/pkg/subsystem/linux/names_test.go index 0b2352411..03f9a86d3 100644 --- a/pkg/subsystem/linux/names_test.go +++ b/pkg/subsystem/linux/names_test.go @@ -6,7 +6,7 @@ package linux import ( "testing" - "github.com/google/syzkaller/pkg/subsystem/entity" + "github.com/google/syzkaller/pkg/subsystem" ) func TestEmailToName(t *testing.T) { @@ -33,8 +33,8 @@ type subsystemTestInput struct { outName string } -func (sti subsystemTestInput) ToSubsystem() *entity.Subsystem { - s := &entity.Subsystem{} +func (sti subsystemTestInput) ToSubsystem() *subsystem.Subsystem { + s := &subsystem.Subsystem{} if sti.email != "" { s.Lists = append(s.Lists, sti.email) } @@ -92,7 +92,7 @@ func TestSetSubsystemNames(t *testing.T) { for _, test := range tests { curr := test t.Run(curr.name, func(t *testing.T) { - list := []*entity.Subsystem{} + list := []*subsystem.Subsystem{} for _, i := range curr.inputs { list = append(list, i.ToSubsystem()) } diff --git a/pkg/subsystem/linux/parents.go b/pkg/subsystem/linux/parents.go index fdea4a62f..344bc0db2 100644 --- a/pkg/subsystem/linux/parents.go +++ b/pkg/subsystem/linux/parents.go @@ -3,14 +3,11 @@ package linux -import ( - "github.com/google/syzkaller/pkg/subsystem/entity" - "github.com/google/syzkaller/pkg/subsystem/match" -) +import "github.com/google/syzkaller/pkg/subsystem" // parentTransformations applies all subsystem list transformations that have been implemented. -func parentTransformations(matrix *match.CoincidenceMatrix, - list []*entity.Subsystem) ([]*entity.Subsystem, error) { +func parentTransformations(matrix *CoincidenceMatrix, + list []*subsystem.Subsystem) ([]*subsystem.Subsystem, error) { list = dropSmallSubsystems(matrix, list) list = dropDuplicateSubsystems(matrix, list) err := setParents(matrix, list) @@ -24,13 +21,13 @@ func parentTransformations(matrix *match.CoincidenceMatrix, // We assume A is a child of B if: // 1) B covers more paths than A. // 2) Most of the paths that relate to A also relate to B. -func setParents(matrix *match.CoincidenceMatrix, list []*entity.Subsystem) error { +func setParents(matrix *CoincidenceMatrix, list []*subsystem.Subsystem) error { // Some subsystems might have already been dropeed. - inInput := map[*entity.Subsystem]bool{} + inInput := map[*subsystem.Subsystem]bool{} for _, item := range list { inInput[item] = true } - matrix.NonEmptyPairs(func(a, b *entity.Subsystem, count int) { + matrix.NonEmptyPairs(func(a, b *subsystem.Subsystem, count int) { if !inInput[a] || !inInput[b] { return } @@ -45,10 +42,10 @@ func setParents(matrix *match.CoincidenceMatrix, list []*entity.Subsystem) error } // dropSmallSubsystems removes subsystems for which we have found only a few matches in the filesystem tree. -func dropSmallSubsystems(matrix *match.CoincidenceMatrix, list []*entity.Subsystem) []*entity.Subsystem { +func dropSmallSubsystems(matrix *CoincidenceMatrix, list []*subsystem.Subsystem) []*subsystem.Subsystem { const cutOffCount = 2 - newList := []*entity.Subsystem{} + newList := []*subsystem.Subsystem{} for _, item := range list { if matrix.Count(item) > cutOffCount || len(item.Syscalls) > 0 { newList = append(newList, item) @@ -61,9 +58,9 @@ func dropSmallSubsystems(matrix *match.CoincidenceMatrix, list []*entity.Subsyst // First, if subsystems A and B 100% overlap, we prefer the one that's alphabetically first. // Second, if subsystem A is fully enclosed in subsystem B and constitutes more than 75% of B, // we drop A, since it brings little value. -func dropDuplicateSubsystems(matrix *match.CoincidenceMatrix, list []*entity.Subsystem) []*entity.Subsystem { - drop := map[*entity.Subsystem]struct{}{} - firstIsBetter := func(first, second *entity.Subsystem) bool { +func dropDuplicateSubsystems(matrix *CoincidenceMatrix, list []*subsystem.Subsystem) []*subsystem.Subsystem { + drop := map[*subsystem.Subsystem]struct{}{} + firstIsBetter := func(first, second *subsystem.Subsystem) bool { firstEmail, secondEmail := "", "" if len(first.Lists) > 0 { firstEmail = first.Lists[0] @@ -73,7 +70,7 @@ func dropDuplicateSubsystems(matrix *match.CoincidenceMatrix, list []*entity.Sub } return firstEmail < secondEmail } - matrix.NonEmptyPairs(func(a, b *entity.Subsystem, count int) { + matrix.NonEmptyPairs(func(a, b *subsystem.Subsystem, count int) { // Only consider cases when A is fully enclosed in B, i.e. M[A][B] == M[A][A]. if count != matrix.Count(a) { return @@ -91,7 +88,7 @@ func dropDuplicateSubsystems(matrix *match.CoincidenceMatrix, list []*entity.Sub drop[a] = struct{}{} } }) - newList := []*entity.Subsystem{} + newList := []*subsystem.Subsystem{} for _, item := range list { if _, exists := drop[item]; !exists { newList = append(newList, item) @@ -102,15 +99,15 @@ func dropDuplicateSubsystems(matrix *match.CoincidenceMatrix, list []*entity.Sub // The algorithm runs in O(E * (E + V)). // We expect that E is quite low here, so it should be fine. -func transitiveReduction(list []*entity.Subsystem) { +func transitiveReduction(list []*subsystem.Subsystem) { for _, s := range list { - removeParents := map[*entity.Subsystem]bool{} + removeParents := map[*subsystem.Subsystem]bool{} for _, p := range s.Parents { for otherP := range p.ReachableParents() { removeParents[otherP] = true } } - newParents := []*entity.Subsystem{} + newParents := []*subsystem.Subsystem{} for _, p := range s.Parents { if !removeParents[p] { newParents = append(newParents, p) diff --git a/pkg/subsystem/linux/parents_test.go b/pkg/subsystem/linux/parents_test.go index dde13177d..444e4ff33 100644 --- a/pkg/subsystem/linux/parents_test.go +++ b/pkg/subsystem/linux/parents_test.go @@ -7,40 +7,39 @@ import ( "testing" "testing/fstest" - "github.com/google/syzkaller/pkg/subsystem/entity" - "github.com/google/syzkaller/pkg/subsystem/match" + "github.com/google/syzkaller/pkg/subsystem" "github.com/stretchr/testify/assert" ) func TestDropSmallSubsystems(t *testing.T) { - kernel := &entity.Subsystem{} - net := &entity.Subsystem{} - fs := &entity.Subsystem{} - legal := &entity.Subsystem{} + kernel := &subsystem.Subsystem{} + net := &subsystem.Subsystem{} + fs := &subsystem.Subsystem{} + legal := &subsystem.Subsystem{} - matrix := match.MakeCoincidenceMatrix() + matrix := MakeCoincidenceMatrix() matrix.Record(kernel, net) matrix.Record(kernel, fs) matrix.Record(kernel, net, fs) matrix.Record(kernel, net, fs) matrix.Record(kernel, net, fs) - ret := dropSmallSubsystems(matrix, []*entity.Subsystem{kernel, net, fs, legal}) - assert.ElementsMatch(t, []*entity.Subsystem{kernel, net, fs}, ret) + ret := dropSmallSubsystems(matrix, []*subsystem.Subsystem{kernel, net, fs, legal}) + assert.ElementsMatch(t, []*subsystem.Subsystem{kernel, net, fs}, ret) } func TestDropDuplicateSubsystems(t *testing.T) { - input, expected := []*entity.Subsystem{}, []*entity.Subsystem{} - matrix := match.MakeCoincidenceMatrix() + input, expected := []*subsystem.Subsystem{}, []*subsystem.Subsystem{} + matrix := MakeCoincidenceMatrix() // Always present. - kernel := &entity.Subsystem{Name: "kernel"} + kernel := &subsystem.Subsystem{Name: "kernel"} input = append(input, kernel) expected = append(expected, kernel) // Fully overlap. - sameA := &entity.Subsystem{Lists: []string{"SameA@gmail.com"}} - sameB := &entity.Subsystem{Lists: []string{"SameB@gmail.com"}} + sameA := &subsystem.Subsystem{Lists: []string{"SameA@gmail.com"}} + sameB := &subsystem.Subsystem{Lists: []string{"SameB@gmail.com"}} matrix.Record(kernel, sameA, sameB) matrix.Record(kernel, sameA, sameB) matrix.Record(kernel, sameA, sameB) @@ -48,7 +47,7 @@ func TestDropDuplicateSubsystems(t *testing.T) { expected = append(expected, sameA) // Overlap, but the smaller one is not so significant. - ext4, fs := &entity.Subsystem{Name: "ext4"}, &entity.Subsystem{Name: "fs"} + ext4, fs := &subsystem.Subsystem{Name: "ext4"}, &subsystem.Subsystem{Name: "fs"} matrix.Record(kernel, ext4, fs) matrix.Record(kernel, ext4, fs) matrix.Record(kernel, fs) // 66%. @@ -56,7 +55,7 @@ func TestDropDuplicateSubsystems(t *testing.T) { expected = append(expected, ext4, fs) // Overlap, and the smaller one takes a big part. - toDrop, stays := &entity.Subsystem{Name: "to-drop"}, &entity.Subsystem{Name: "stays"} + toDrop, stays := &subsystem.Subsystem{Name: "to-drop"}, &subsystem.Subsystem{Name: "stays"} for i := 0; i < 5; i++ { matrix.Record(kernel, toDrop, stays) } @@ -75,31 +74,31 @@ func TestTransitiveReduction(t *testing.T) { // (d, b) // (d, e) // (c, a) - a := &entity.Subsystem{} - b := &entity.Subsystem{Parents: []*entity.Subsystem{a}} - c := &entity.Subsystem{Parents: []*entity.Subsystem{a, b}} - e := &entity.Subsystem{} - d := &entity.Subsystem{Parents: []*entity.Subsystem{a, b, c, e}} - transitiveReduction([]*entity.Subsystem{a, b, c, d, e}) + a := &subsystem.Subsystem{} + b := &subsystem.Subsystem{Parents: []*subsystem.Subsystem{a}} + c := &subsystem.Subsystem{Parents: []*subsystem.Subsystem{a, b}} + e := &subsystem.Subsystem{} + d := &subsystem.Subsystem{Parents: []*subsystem.Subsystem{a, b, c, e}} + transitiveReduction([]*subsystem.Subsystem{a, b, c, d, e}) // The result should be: // (d, c), (c, b), (b, a) // (d, e) - assert.ElementsMatch(t, d.Parents, []*entity.Subsystem{c, e}) - assert.ElementsMatch(t, c.Parents, []*entity.Subsystem{b}) + assert.ElementsMatch(t, d.Parents, []*subsystem.Subsystem{c, e}) + assert.ElementsMatch(t, c.Parents, []*subsystem.Subsystem{b}) } func TestSetParents(t *testing.T) { - kernel := &entity.Subsystem{PathRules: []entity.PathRule{{ + kernel := &subsystem.Subsystem{PathRules: []subsystem.PathRule{{ IncludeRegexp: `.*`, }}} - net := &entity.Subsystem{PathRules: []entity.PathRule{{ + net := &subsystem.Subsystem{PathRules: []subsystem.PathRule{{ IncludeRegexp: `^net/`, }}} - wireless := &entity.Subsystem{PathRules: []entity.PathRule{{ + wireless := &subsystem.Subsystem{PathRules: []subsystem.PathRule{{ IncludeRegexp: `^net/wireless`, }}} - drivers := &entity.Subsystem{PathRules: []entity.PathRule{{ + drivers := &subsystem.Subsystem{PathRules: []subsystem.PathRule{{ IncludeRegexp: `^drivers/`, }}} @@ -114,20 +113,20 @@ func TestSetParents(t *testing.T) { "drivers/android/binder.c": {}, } - matrix, err := match.BuildCoincidenceMatrix(tree, - []*entity.Subsystem{kernel, net, wireless, drivers}, nil) + matrix, err := BuildCoincidenceMatrix(tree, + []*subsystem.Subsystem{kernel, net, wireless, drivers}, nil) assert.NoError(t, err) // Calculate parents. - err = setParents(matrix, []*entity.Subsystem{kernel, net, wireless, drivers}) + err = setParents(matrix, []*subsystem.Subsystem{kernel, net, wireless, drivers}) if err != nil { t.Fatal(err) } // Verify parents. - assert.ElementsMatch(t, net.Parents, []*entity.Subsystem{kernel}) - assert.ElementsMatch(t, wireless.Parents, []*entity.Subsystem{net}) - assert.ElementsMatch(t, drivers.Parents, []*entity.Subsystem{kernel}) - assert.ElementsMatch(t, kernel.Parents, []*entity.Subsystem{}) + assert.ElementsMatch(t, net.Parents, []*subsystem.Subsystem{kernel}) + assert.ElementsMatch(t, wireless.Parents, []*subsystem.Subsystem{net}) + assert.ElementsMatch(t, drivers.Parents, []*subsystem.Subsystem{kernel}) + assert.ElementsMatch(t, kernel.Parents, []*subsystem.Subsystem{}) } diff --git a/pkg/subsystem/linux/path_coincidence.go b/pkg/subsystem/linux/path_coincidence.go new file mode 100644 index 000000000..44182bd6a --- /dev/null +++ b/pkg/subsystem/linux/path_coincidence.go @@ -0,0 +1,68 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package linux + +import ( + "io/fs" + "regexp" + "runtime" + "sync" + + "github.com/google/syzkaller/pkg/subsystem" +) + +func BuildCoincidenceMatrix(root fs.FS, list []*subsystem.Subsystem, + excludeRe *regexp.Regexp) (*CoincidenceMatrix, error) { + // Create a matcher. + matcher := subsystem.MakePathMatcher(list) + chPaths, chResult := extractSubsystems(matcher) + // The final consumer goroutine. + cm := MakeCoincidenceMatrix() + ready := make(chan struct{}) + go func() { + for items := range chResult { + cm.Record(items...) + } + ready <- struct{}{} + }() + // Source of data. + err := fs.WalkDir(root, ".", func(path string, info fs.DirEntry, err error) error { + if err != nil || info.IsDir() { + return err + } + if !includePathRe.MatchString(path) || + (excludeRe != nil && excludeRe.MatchString(path)) { + return nil + } + chPaths <- path + return nil + }) + close(chPaths) + <-ready + return cm, err +} + +var ( + includePathRe = regexp.MustCompile(`(?:/|\.(?:c|h|S))$`) +) + +func extractSubsystems(matcher *subsystem.PathMatcher) (chan<- string, <-chan []*subsystem.Subsystem) { + procs := runtime.NumCPU() + paths, output := make(chan string, procs), make(chan []*subsystem.Subsystem, procs) + var wg sync.WaitGroup + for i := 0; i < procs; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for path := range paths { + output <- matcher.Match(path) + } + }() + } + go func() { + wg.Wait() + close(output) + }() + return paths, output +} diff --git a/pkg/subsystem/linux/path_coincidence_test.go b/pkg/subsystem/linux/path_coincidence_test.go new file mode 100644 index 000000000..ac32dcd31 --- /dev/null +++ b/pkg/subsystem/linux/path_coincidence_test.go @@ -0,0 +1,49 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package linux + +import ( + "testing" + "testing/fstest" + + "github.com/google/syzkaller/pkg/subsystem" + "github.com/stretchr/testify/assert" +) + +func TestBuildCoincidenceMatrix(t *testing.T) { + vfs := &subsystem.Subsystem{PathRules: []subsystem.PathRule{ + {IncludeRegexp: `^fs/`}, + }} + ext4 := &subsystem.Subsystem{PathRules: []subsystem.PathRule{ + {IncludeRegexp: `^fs/ext4/`}, + }} + ntfs := &subsystem.Subsystem{PathRules: []subsystem.PathRule{ + {IncludeRegexp: `^fs/ntfs/`}, + }} + kernel := &subsystem.Subsystem{PathRules: []subsystem.PathRule{ + {IncludeRegexp: `.*`}, + }} + + fs := fstest.MapFS{ + ".git/obj/12345": {}, + "fs/inode.c": {}, + "fs/ext4/file.c": {}, + "fs/ntfs/file.c": {}, + "fs/fat/file.c": {}, + "net/socket.c": {}, + } + matrix, err := BuildCoincidenceMatrix(fs, []*subsystem.Subsystem{vfs, ntfs, ext4, kernel}, nil) + assert.NoError(t, err) + + // Test total counts. + assert.Equal(t, 5, matrix.Count(kernel)) + assert.Equal(t, 4, matrix.Count(vfs)) + assert.Equal(t, 1, matrix.Count(ext4)) + + // Test pairwise counts. + assert.Equal(t, 1, matrix.Get(vfs, ext4)) + assert.Equal(t, 1, matrix.Get(vfs, ntfs)) + assert.Equal(t, 0, matrix.Get(ext4, ntfs)) + assert.Equal(t, 4, matrix.Get(kernel, vfs)) +} diff --git a/pkg/subsystem/linux/subsystems.go b/pkg/subsystem/linux/subsystems.go index 5ca57a1be..18ac3864e 100644 --- a/pkg/subsystem/linux/subsystems.go +++ b/pkg/subsystem/linux/subsystems.go @@ -10,16 +10,15 @@ import ( "regexp" "sort" - "github.com/google/syzkaller/pkg/subsystem/entity" - "github.com/google/syzkaller/pkg/subsystem/match" + "github.com/google/syzkaller/pkg/subsystem" ) -func ListFromRepo(repo string) ([]*entity.Subsystem, error) { +func ListFromRepo(repo string) ([]*subsystem.Subsystem, error) { return listFromRepoInner(os.DirFS(repo), linuxSubsystemRules) } // listFromRepoInner allows for better testing. -func listFromRepoInner(root fs.FS, rules *customRules) ([]*entity.Subsystem, error) { +func listFromRepoInner(root fs.FS, rules *customRules) ([]*subsystem.Subsystem, error) { records, err := getMaintainers(root) if err != nil { return nil, err @@ -31,7 +30,7 @@ func listFromRepoInner(root fs.FS, rules *customRules) ([]*entity.Subsystem, err extraRules: rules, } list := ctx.groupByList() - matrix, err := match.BuildCoincidenceMatrix(root, list, dropPatterns) + matrix, err := BuildCoincidenceMatrix(root, list, dropPatterns) if err != nil { return nil, err } @@ -71,7 +70,7 @@ var ( dropPatterns = regexp.MustCompile(`^(Documentation|scripts|samples|tools)|Makefile`) ) -func (ctx *linuxCtx) groupByList() []*entity.Subsystem { +func (ctx *linuxCtx) groupByList() []*subsystem.Subsystem { perList := make(map[string][]*maintainersRecord) for _, record := range ctx.rawRecords { for _, list := range record.lists { @@ -82,7 +81,7 @@ func (ctx *linuxCtx) groupByList() []*entity.Subsystem { if ctx.extraRules != nil { exclude = ctx.extraRules.notSubsystemEmails } - ret := []*entity.Subsystem{} + ret := []*subsystem.Subsystem{} for email, list := range perList { if _, skip := exclude[email]; skip { continue @@ -96,7 +95,7 @@ func (ctx *linuxCtx) groupByList() []*entity.Subsystem { return ret } -func (ctx *linuxCtx) applyExtraRules(list []*entity.Subsystem) { +func (ctx *linuxCtx) applyExtraRules(list []*subsystem.Subsystem) { if ctx.extraRules == nil { return } @@ -105,7 +104,7 @@ func (ctx *linuxCtx) applyExtraRules(list []*entity.Subsystem) { } } -func mergeRawRecords(email string, records []*maintainersRecord) *entity.Subsystem { +func mergeRawRecords(email string, records []*maintainersRecord) *subsystem.Subsystem { unique := func(list []string) []string { m := make(map[string]struct{}) for _, s := range list { @@ -119,7 +118,7 @@ func mergeRawRecords(email string, records []*maintainersRecord) *entity.Subsyst return ret } var maintainers []string - subsystem := &entity.Subsystem{Lists: []string{email}} + subsystem := &subsystem.Subsystem{Lists: []string{email}} for _, record := range records { rule := record.ToPathRule() if !rule.IsEmpty() { diff --git a/pkg/subsystem/linux/subsystems_test.go b/pkg/subsystem/linux/subsystems_test.go index f13009fad..cbc4b9699 100644 --- a/pkg/subsystem/linux/subsystems_test.go +++ b/pkg/subsystem/linux/subsystems_test.go @@ -8,8 +8,7 @@ import ( "testing" "testing/fstest" - "github.com/google/syzkaller/pkg/subsystem/entity" - "github.com/google/syzkaller/pkg/subsystem/match" + "github.com/google/syzkaller/pkg/subsystem" "github.com/stretchr/testify/assert" ) @@ -26,7 +25,7 @@ func TestGroupLinuxSubsystems(t *testing.T) { // It complicates the test, so let's skip it here. s.Parents = nil } - expected := []*entity.Subsystem{ + expected := []*subsystem.Subsystem{ { Name: "fs", Lists: []string{"linux-fsdevel@vger.kernel.org"}, @@ -88,7 +87,7 @@ func TestLinuxSubsystemPaths(t *testing.T) { if err != nil { t.Fatal(err) } - matcher := match.MakePathMatcher(subsystems) + matcher := subsystem.MakePathMatcher(subsystems) tests := []struct { path string list []string diff --git a/pkg/subsystem/list.go b/pkg/subsystem/list.go index 737ab859e..7842f8620 100644 --- a/pkg/subsystem/list.go +++ b/pkg/subsystem/list.go @@ -3,10 +3,6 @@ package subsystem -import ( - "github.com/google/syzkaller/pkg/subsystem/entity" -) - // In general, it's not correct to assume that subsystems are only determined by target.OS, // because subsystems are related not to the user interface of the OS kernel, but rather to // the OS kernel implementation. @@ -17,16 +13,16 @@ import ( // Therefore, subsystem lists have to be a completely different entity. var ( - lists = make(map[string][]*entity.Subsystem) + lists = make(map[string][]*Subsystem) ) -func RegisterList(name string, list []*entity.Subsystem) { +func RegisterList(name string, list []*Subsystem) { if _, ok := lists[name]; ok { panic(name + " subsystem list already exists!") } lists[name] = list } -func GetList(name string) []*entity.Subsystem { +func GetList(name string) []*Subsystem { return lists[name] } diff --git a/pkg/subsystem/lists/linux.go b/pkg/subsystem/lists/linux.go index bd03428d3..708fbe67b 100644 --- a/pkg/subsystem/lists/linux.go +++ b/pkg/subsystem/lists/linux.go @@ -3,11 +3,10 @@ package lists -import . "github.com/google/syzkaller/pkg/subsystem/entity" -import "github.com/google/syzkaller/pkg/subsystem" +import . "github.com/google/syzkaller/pkg/subsystem" func init() { - subsystem.RegisterList("linux", subsystems) + RegisterList("linux", subsystems) } // The subsystem list: @@ -123,7 +122,6 @@ func init() { // - kexec // - keyrings // - kgdb-bugreport -// - kselftest // - kunit // - kvm // - kvm-riscv @@ -179,7 +177,6 @@ func init() { // - ath10k // - ath11k // - b43 -// - brcm80211 // - libertas // - wcn36xx // - zd1211 @@ -243,7 +240,7 @@ func init() { // - xtensa var subsystems = []*Subsystem{ - shacyfmac, ac100, accelerators, acpi, acpi4asus, acpica, acrn, actions, afs, alpha, alsa, amdgfx, amlogic, apparmor, arch, arm, armmsm, asahi, aspeed, ath10k, ath11k, atm, audit, autofs, axis, batman, b43, bcache, block, bluetooth, bpf, brcm80211, bridge, btrfs, cachefs, can, ceph, cgroups, chromeplatform, cifs, cirrus, clk, cluster, codalist, coresight, coreteam, crypto, csky, cxl, damon, dccp, dell, devicetree, dm, dmaengine, drbd, dri, ecryptfs, edac, efi, erofs, etnaviv, ext4, f2fs, fbdev, fpga, freedreno, fs, fscrypt, fsi, fsverity, geode, gpio, greybus, hams, hardening, hexagon, hippi, hwmon, hyperv, i2c, i3c, ia64, ide, iio, imx, industrypack, input, integrity, intelgfx, intelgvt, intelwiredlan, iouring, iommu, isdn4linux, jfs, karma, kasan, kernel, kexec, keyrings, kgdbbugreport, kselftest, kunit, kvm, kvmriscv, kvmarm, leds, libertas, lima, linux1394, linuxppc, linuxpps, livepatching, llvm, loongarch, lsm, lvs, m68k, malidp, media, mediatek, megaraid, mhi, mips, mjpeg, mm, mmc, modules, mpi3, mptfusion, mptcp, mtd, nbd, net, nfc, nfs, nilfs, nitro, nouveau, ntb, ntfs, ntfs3, nvdimm, nvme, ocfs2, omap, optee, openiscsi, openbmc, openipmi, openrisc, openvswitch, openwrt, orangefs, ossdrivers, overlayfs, oxnas, parisc, parport, pci, perf, phy, pm, ppp, pvrusb2, pwm, qat, raid, rcu, rdma, rds, reiserfs, remoteproc, renesassoc, riscv, rockchip, rpi, rttools, rtc, rustfor, s390, samsungsoc, scsi, sctp, selinux, serial, sgx, sh, snpsarc, sof, sparclinux, speakup, spi, spice, squashfs, staging, stm32, sunxi, target, tegra, tipc, tomoyo, trace, uclinux, um, unisoc, usb, usbstorage, v9fs, video, virt, watchdog, wcn36xx, wireguard, wireless, wpan, x25, x86, x86drivers, xen, xfs, xtensa, zd1211, + shacyfmac, ac100, accelerators, acpi, acpi4asus, acpica, acrn, actions, afs, alpha, alsa, amdgfx, amlogic, apparmor, arch, arm, armmsm, asahi, aspeed, ath10k, ath11k, atm, audit, autofs, axis, batman, b43, bcache, block, bluetooth, bpf, bridge, btrfs, cachefs, can, ceph, cgroups, chromeplatform, cifs, cirrus, clk, cluster, codalist, coresight, coreteam, crypto, csky, cxl, damon, dccp, dell, devicetree, dm, dmaengine, drbd, dri, ecryptfs, edac, efi, erofs, etnaviv, ext4, f2fs, fbdev, fpga, freedreno, fs, fscrypt, fsi, fsverity, geode, gpio, greybus, hams, hardening, hexagon, hippi, hwmon, hyperv, i2c, i3c, ia64, ide, iio, imx, industrypack, input, integrity, intelgfx, intelgvt, intelwiredlan, iouring, iommu, isdn4linux, jfs, karma, kasan, kernel, kexec, keyrings, kgdbbugreport, kunit, kvm, kvmriscv, kvmarm, leds, libertas, lima, linux1394, linuxppc, linuxpps, livepatching, llvm, loongarch, lsm, lvs, m68k, malidp, media, mediatek, megaraid, mhi, mips, mjpeg, mm, mmc, modules, mpi3, mptfusion, mptcp, mtd, nbd, net, nfc, nfs, nilfs, nitro, nouveau, ntb, ntfs, ntfs3, nvdimm, nvme, ocfs2, omap, optee, openiscsi, openbmc, openipmi, openrisc, openvswitch, openwrt, orangefs, ossdrivers, overlayfs, oxnas, parisc, parport, pci, perf, phy, pm, ppp, pvrusb2, pwm, qat, raid, rcu, rdma, rds, reiserfs, remoteproc, renesassoc, riscv, rockchip, rpi, rttools, rtc, rustfor, s390, samsungsoc, scsi, sctp, selinux, serial, sgx, sh, snpsarc, sof, sparclinux, speakup, spi, spice, squashfs, staging, stm32, sunxi, target, tegra, tipc, tomoyo, trace, uclinux, um, unisoc, usb, usbstorage, v9fs, video, virt, watchdog, wcn36xx, wireguard, wireless, wpan, x25, x86, x86drivers, xen, xfs, xtensa, zd1211, } // Subsystem info. @@ -818,16 +815,6 @@ var bpf = &Subsystem{ }, } -var brcm80211 = &Subsystem{ - Name: "brcm80211", - Lists: []string{"brcm80211-dev-list.pdl@broadcom.com"}, - Maintainers: []string{"aspriel@gmail.com", "franky.lin@broadcom.com", "hante.meuleman@broadcom.com"}, - Parents: []*Subsystem{wireless}, - PathRules: []PathRule{ - {IncludeRegexp: "^drivers/net/wireless/broadcom/brcm80211/"}, - }, -} - var bridge = &Subsystem{ Name: "bridge", Lists: []string{"bridge@lists.linux-foundation.org"}, @@ -2106,16 +2093,6 @@ var kgdbbugreport = &Subsystem{ }, } -var kselftest = &Subsystem{ - Name: "kselftest", - Lists: []string{"linux-kselftest@vger.kernel.org"}, - Parents: []*Subsystem{kernel}, - PathRules: []PathRule{ - {IncludeRegexp: "^include/kunit/|^lib/kunit/"}, - {IncludeRegexp: "^lib/list-test\\.c$"}, - }, -} - var kunit = &Subsystem{ Name: "kunit", Lists: []string{"kunit-dev@googlegroups.com"}, @@ -2168,7 +2145,7 @@ var kvmriscv = &Subsystem{ var kvmarm = &Subsystem{ Name: "kvmarm", - Lists: []string{"kvmarm@lists.linux.dev"}, + Lists: []string{"kvmarm@lists.cs.columbia.edu"}, Maintainers: []string{"maz@kernel.org"}, Parents: []*Subsystem{arm}, PathRules: []PathRule{ diff --git a/pkg/subsystem/match.go b/pkg/subsystem/match.go new file mode 100644 index 000000000..ad5d3a257 --- /dev/null +++ b/pkg/subsystem/match.go @@ -0,0 +1,73 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package subsystem + +import ( + "regexp" + "strings" +) + +type PathMatcher struct { + matches []*match +} + +type match struct { + include *regexp.Regexp + exclude *regexp.Regexp + object *Subsystem +} + +func MakePathMatcher(list []*Subsystem) *PathMatcher { + m := &PathMatcher{} + for _, item := range list { + m.register(item) + } + return m +} + +func (p *PathMatcher) register(item *Subsystem) { + onlyInclude := []string{} + list := []PathRule{} + for _, r := range item.PathRules { + if r.ExcludeRegexp == "" { + // It's expected that almost everything will go to this branch. + onlyInclude = append(onlyInclude, r.IncludeRegexp) + } else { + list = append(list, r) + } + } + if len(onlyInclude) > 0 { + list = append(list, PathRule{ + IncludeRegexp: strings.Join(onlyInclude, "|"), + }) + } + for _, rule := range list { + p.matches = append(p.matches, buildMatch(rule, item)) + } +} + +func (p *PathMatcher) Match(path string) []*Subsystem { + ret := []*Subsystem{} + for _, m := range p.matches { + if m.exclude != nil && m.exclude.MatchString(path) { + continue + } + if m.include != nil && !m.include.MatchString(path) { + continue + } + ret = append(ret, m.object) + } + return ret +} + +func buildMatch(rule PathRule, item *Subsystem) *match { + m := &match{object: item} + if rule.IncludeRegexp != "" { + m.include = regexp.MustCompile(rule.IncludeRegexp) + } + if rule.ExcludeRegexp != "" { + m.exclude = regexp.MustCompile(rule.ExcludeRegexp) + } + return m +} diff --git a/pkg/subsystem/match/coincidence.go b/pkg/subsystem/match/coincidence.go deleted file mode 100644 index 45bd98884..000000000 --- a/pkg/subsystem/match/coincidence.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2023 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package match - -import "github.com/google/syzkaller/pkg/subsystem/entity" - -// CoincidenceMatrix represents a matrix that, for every pair of subsystems -// A and B, stores the number of times A and B have coincided in the input data. -// So far we only need it for entity.Subsystem, so its interface is tailored to it. -type CoincidenceMatrix struct { - matrix map[*entity.Subsystem]map[*entity.Subsystem]int -} - -func MakeCoincidenceMatrix() *CoincidenceMatrix { - return &CoincidenceMatrix{ - matrix: make(map[*entity.Subsystem]map[*entity.Subsystem]int), - } -} - -func (cm *CoincidenceMatrix) Record(items ...*entity.Subsystem) { - for i := 0; i < len(items); i++ { - for j := 0; j < len(items); j++ { - cm.inc(items[i], items[j]) - } - } -} - -func (cm *CoincidenceMatrix) Count(a *entity.Subsystem) int { - return cm.Get(a, a) -} - -func (cm *CoincidenceMatrix) Get(a, b *entity.Subsystem) int { - return cm.matrix[a][b] -} - -func (cm *CoincidenceMatrix) NonEmptyPairs(cb func(a, b *entity.Subsystem, val int)) { - for a, sub := range cm.matrix { - for b, val := range sub { - if a == b { - continue - } - cb(a, b, val) - } - } -} - -func (cm *CoincidenceMatrix) inc(a, b *entity.Subsystem) { - subMatrix, ok := cm.matrix[a] - if !ok { - subMatrix = make(map[*entity.Subsystem]int) - cm.matrix[a] = subMatrix - } - subMatrix[b]++ -} diff --git a/pkg/subsystem/match/coincidence_test.go b/pkg/subsystem/match/coincidence_test.go deleted file mode 100644 index 15f0dd241..000000000 --- a/pkg/subsystem/match/coincidence_test.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2023 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package match - -import ( - "testing" - - "github.com/google/syzkaller/pkg/subsystem/entity" - "github.com/stretchr/testify/assert" -) - -func TestCoincidenceMatrix(t *testing.T) { - cm := MakeCoincidenceMatrix() - a, b, c := &entity.Subsystem{}, &entity.Subsystem{}, &entity.Subsystem{} - cm.Record(a, b) - cm.Record(b, c) - - // Test counts. - assert.Equal(t, cm.Count(a), 1) - assert.Equal(t, cm.Count(b), 2) - assert.Equal(t, cm.Count(c), 1) - - // Test pairwise counts. - assert.Equal(t, cm.Get(a, b), 1) - assert.Equal(t, cm.Get(b, c), 1) - assert.Equal(t, cm.Get(a, c), 0) - - // Test the iterator. - type pair struct { - a *entity.Subsystem - b *entity.Subsystem - } - expected := []pair{{a, b}, {b, a}, {b, c}, {c, b}} - got := []pair{} - cm.NonEmptyPairs(func(a, b *entity.Subsystem, _ int) { - got = append(got, pair{a, b}) - }) - assert.ElementsMatch(t, expected, got) -} diff --git a/pkg/subsystem/match/match.go b/pkg/subsystem/match/match.go deleted file mode 100644 index 31874f978..000000000 --- a/pkg/subsystem/match/match.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2023 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package match - -import ( - "regexp" - "strings" - - "github.com/google/syzkaller/pkg/subsystem/entity" -) - -type PathMatcher struct { - matches []*match -} - -type match struct { - include *regexp.Regexp - exclude *regexp.Regexp - object *entity.Subsystem -} - -func MakePathMatcher(list []*entity.Subsystem) *PathMatcher { - m := &PathMatcher{} - for _, item := range list { - m.register(item) - } - return m -} - -func (p *PathMatcher) register(item *entity.Subsystem) { - onlyInclude := []string{} - list := []entity.PathRule{} - for _, r := range item.PathRules { - if r.ExcludeRegexp == "" { - // It's expected that almost everything will go to this branch. - onlyInclude = append(onlyInclude, r.IncludeRegexp) - } else { - list = append(list, r) - } - } - if len(onlyInclude) > 0 { - list = append(list, entity.PathRule{ - IncludeRegexp: strings.Join(onlyInclude, "|"), - }) - } - for _, rule := range list { - p.matches = append(p.matches, buildMatch(rule, item)) - } -} - -func (p *PathMatcher) Match(path string) []*entity.Subsystem { - ret := []*entity.Subsystem{} - for _, m := range p.matches { - if m.exclude != nil && m.exclude.MatchString(path) { - continue - } - if m.include != nil && !m.include.MatchString(path) { - continue - } - ret = append(ret, m.object) - } - return ret -} - -func buildMatch(rule entity.PathRule, item *entity.Subsystem) *match { - m := &match{object: item} - if rule.IncludeRegexp != "" { - m.include = regexp.MustCompile(rule.IncludeRegexp) - } - if rule.ExcludeRegexp != "" { - m.exclude = regexp.MustCompile(rule.ExcludeRegexp) - } - return m -} diff --git a/pkg/subsystem/match/match_test.go b/pkg/subsystem/match/match_test.go deleted file mode 100644 index 2f7440e19..000000000 --- a/pkg/subsystem/match/match_test.go +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2023 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package match - -import ( - "testing" - - "github.com/google/syzkaller/pkg/subsystem/entity" - "github.com/stretchr/testify/assert" -) - -func TestPathMatcher(t *testing.T) { - arm := &entity.Subsystem{ - PathRules: []entity.PathRule{ - { - IncludeRegexp: `^arch/arm/.*$`, - ExcludeRegexp: `^arch/arm/boot/dts/.*$`, - }, - {IncludeRegexp: `^drivers/spi/spi-pl022\.c$`}, - { - // nolint:lll - IncludeRegexp: `^drivers/irqchip/irq-vic\.c$|^Documentation/devicetree/bindings/interrupt-controller/arm,vic\.yaml$`, - }, - }, - } - docs := &entity.Subsystem{ - PathRules: []entity.PathRule{ - {IncludeRegexp: `^Documentation/.*$`}, - }, - } - m := MakePathMatcher([]*entity.Subsystem{arm, docs}) - assert.ElementsMatch(t, []*entity.Subsystem{arm, docs}, - m.Match(`Documentation/devicetree/bindings/interrupt-controller/arm,vic.yaml`)) - assert.ElementsMatch(t, []*entity.Subsystem{arm}, m.Match(`arch/arm/a.c`)) - assert.ElementsMatch(t, []*entity.Subsystem{docs}, m.Match(`Documentation/a/b/c.md`)) - assert.Empty(t, m.Match(`arch/boot/dts/a.c`)) -} - -func TestPathMatchOrder(t *testing.T) { - s := &entity.Subsystem{ - PathRules: []entity.PathRule{ - { - IncludeRegexp: `^a/b/.*$`, - ExcludeRegexp: `^a/.*$`, - }, - }, - } - m := MakePathMatcher([]*entity.Subsystem{s}) - // If we first exclude a/, then a/b/c never matches. - assert.Empty(t, m.Match("a/b/c")) -} diff --git a/pkg/subsystem/match/path_coincidence.go b/pkg/subsystem/match/path_coincidence.go deleted file mode 100644 index 944546490..000000000 --- a/pkg/subsystem/match/path_coincidence.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2023 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package match - -import ( - "io/fs" - "regexp" - "runtime" - "sync" - - "github.com/google/syzkaller/pkg/subsystem/entity" -) - -func BuildCoincidenceMatrix(root fs.FS, list []*entity.Subsystem, - excludeRe *regexp.Regexp) (*CoincidenceMatrix, error) { - // Create a matcher. - matcher := MakePathMatcher(list) - chPaths, chResult := extractSubsystems(matcher) - // The final consumer goroutine. - cm := MakeCoincidenceMatrix() - ready := make(chan struct{}) - go func() { - for items := range chResult { - cm.Record(items...) - } - ready <- struct{}{} - }() - // Source of data. - err := fs.WalkDir(root, ".", func(path string, info fs.DirEntry, err error) error { - if err != nil || info.IsDir() { - return err - } - if !includePathRe.MatchString(path) || - (excludeRe != nil && excludeRe.MatchString(path)) { - return nil - } - chPaths <- path - return nil - }) - close(chPaths) - <-ready - return cm, err -} - -var ( - includePathRe = regexp.MustCompile(`(?:/|\.(?:c|h|S))$`) -) - -func extractSubsystems(matcher *PathMatcher) (chan<- string, <-chan []*entity.Subsystem) { - procs := runtime.NumCPU() - paths, output := make(chan string, procs), make(chan []*entity.Subsystem, procs) - var wg sync.WaitGroup - for i := 0; i < procs; i++ { - wg.Add(1) - go func() { - defer wg.Done() - for path := range paths { - output <- matcher.Match(path) - } - }() - } - go func() { - wg.Wait() - close(output) - }() - return paths, output -} diff --git a/pkg/subsystem/match/path_coincidence_test.go b/pkg/subsystem/match/path_coincidence_test.go deleted file mode 100644 index 2d1969d7f..000000000 --- a/pkg/subsystem/match/path_coincidence_test.go +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2023 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package match - -import ( - "testing" - "testing/fstest" - - "github.com/google/syzkaller/pkg/subsystem/entity" - "github.com/stretchr/testify/assert" -) - -func TestBuildCoincidenceMatrix(t *testing.T) { - vfs := &entity.Subsystem{PathRules: []entity.PathRule{ - {IncludeRegexp: `^fs/`}, - }} - ext4 := &entity.Subsystem{PathRules: []entity.PathRule{ - {IncludeRegexp: `^fs/ext4/`}, - }} - ntfs := &entity.Subsystem{PathRules: []entity.PathRule{ - {IncludeRegexp: `^fs/ntfs/`}, - }} - kernel := &entity.Subsystem{PathRules: []entity.PathRule{ - {IncludeRegexp: `.*`}, - }} - - fs := fstest.MapFS{ - ".git/obj/12345": {}, - "fs/inode.c": {}, - "fs/ext4/file.c": {}, - "fs/ntfs/file.c": {}, - "fs/fat/file.c": {}, - "net/socket.c": {}, - } - matrix, err := BuildCoincidenceMatrix(fs, []*entity.Subsystem{vfs, ntfs, ext4, kernel}, nil) - assert.NoError(t, err) - - // Test total counts. - assert.Equal(t, 5, matrix.Count(kernel)) - assert.Equal(t, 4, matrix.Count(vfs)) - assert.Equal(t, 1, matrix.Count(ext4)) - - // Test pairwise counts. - assert.Equal(t, 1, matrix.Get(vfs, ext4)) - assert.Equal(t, 1, matrix.Get(vfs, ntfs)) - assert.Equal(t, 0, matrix.Get(ext4, ntfs)) - assert.Equal(t, 4, matrix.Get(kernel, vfs)) -} diff --git a/pkg/subsystem/match_test.go b/pkg/subsystem/match_test.go new file mode 100644 index 000000000..7c96b0bcd --- /dev/null +++ b/pkg/subsystem/match_test.go @@ -0,0 +1,51 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package subsystem + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestPathMatcher(t *testing.T) { + arm := &Subsystem{ + PathRules: []PathRule{ + { + IncludeRegexp: `^arch/arm/.*$`, + ExcludeRegexp: `^arch/arm/boot/dts/.*$`, + }, + {IncludeRegexp: `^drivers/spi/spi-pl022\.c$`}, + { + // nolint:lll + IncludeRegexp: `^drivers/irqchip/irq-vic\.c$|^Documentation/devicetree/bindings/interrupt-controller/arm,vic\.yaml$`, + }, + }, + } + docs := &Subsystem{ + PathRules: []PathRule{ + {IncludeRegexp: `^Documentation/.*$`}, + }, + } + m := MakePathMatcher([]*Subsystem{arm, docs}) + assert.ElementsMatch(t, []*Subsystem{arm, docs}, + m.Match(`Documentation/devicetree/bindings/interrupt-controller/arm,vic.yaml`)) + assert.ElementsMatch(t, []*Subsystem{arm}, m.Match(`arch/arm/a.c`)) + assert.ElementsMatch(t, []*Subsystem{docs}, m.Match(`Documentation/a/b/c.md`)) + assert.Empty(t, m.Match(`arch/boot/dts/a.c`)) +} + +func TestPathMatchOrder(t *testing.T) { + s := &Subsystem{ + PathRules: []PathRule{ + { + IncludeRegexp: `^a/b/.*$`, + ExcludeRegexp: `^a/.*$`, + }, + }, + } + m := MakePathMatcher([]*Subsystem{s}) + // If we first exclude a/, then a/b/c never matches. + assert.Empty(t, m.Match("a/b/c")) +} diff --git a/pkg/subsystem/raw_extractor.go b/pkg/subsystem/raw_extractor.go index 4a86b1f6b..fa964fd25 100644 --- a/pkg/subsystem/raw_extractor.go +++ b/pkg/subsystem/raw_extractor.go @@ -4,21 +4,19 @@ package subsystem import ( - "github.com/google/syzkaller/pkg/subsystem/entity" - "github.com/google/syzkaller/pkg/subsystem/match" "github.com/google/syzkaller/prog" ) // rawExtractor performs low-level subsystem matching (directly by a path or a syscall). type rawExtractor struct { - matcher *match.PathMatcher - perCall map[string][]*entity.Subsystem + matcher *PathMatcher + perCall map[string][]*Subsystem } -func makeRawExtractor(list []*entity.Subsystem) *rawExtractor { +func makeRawExtractor(list []*Subsystem) *rawExtractor { ret := &rawExtractor{ - matcher: match.MakePathMatcher(list), - perCall: make(map[string][]*entity.Subsystem), + matcher: MakePathMatcher(list), + perCall: make(map[string][]*Subsystem), } for _, subsystem := range list { for _, call := range subsystem.Syscalls { @@ -28,19 +26,19 @@ func makeRawExtractor(list []*entity.Subsystem) *rawExtractor { return ret } -func (e *rawExtractor) FromPath(path string) []*entity.Subsystem { +func (e *rawExtractor) FromPath(path string) []*Subsystem { return e.matcher.Match(path) } -func (e *rawExtractor) FromProg(progBytes []byte) []*entity.Subsystem { - calls := make(map[*entity.Subsystem]struct{}) +func (e *rawExtractor) FromProg(progBytes []byte) []*Subsystem { + calls := make(map[*Subsystem]struct{}) progCalls, _, _ := prog.CallSet(progBytes) for call := range progCalls { for _, subsystem := range e.perCall[call] { calls[subsystem] = struct{}{} } } - list := []*entity.Subsystem{} + list := []*Subsystem{} for key := range calls { list = append(list, key) } diff --git a/pkg/subsystem/raw_extractor_test.go b/pkg/subsystem/raw_extractor_test.go index 9d221af86..44273be2e 100644 --- a/pkg/subsystem/raw_extractor_test.go +++ b/pkg/subsystem/raw_extractor_test.go @@ -6,23 +6,22 @@ package subsystem import ( "testing" - "github.com/google/syzkaller/pkg/subsystem/entity" "github.com/stretchr/testify/assert" ) func TestSubsystemExtractor(t *testing.T) { - ioUring := &entity.Subsystem{ + ioUring := &Subsystem{ Name: "io_uring", - PathRules: []entity.PathRule{ + PathRules: []PathRule{ { IncludeRegexp: `io_uring/.*`, }, }, Syscalls: []string{"syz_io_uring_setup"}, } - security := &entity.Subsystem{ + security := &Subsystem{ Name: "security", - PathRules: []entity.PathRule{ + PathRules: []PathRule{ { IncludeRegexp: `security/.*`, ExcludeRegexp: `security/selinux/.*`, @@ -32,21 +31,21 @@ func TestSubsystemExtractor(t *testing.T) { }, }, } - net := &entity.Subsystem{ + net := &Subsystem{ Name: "net", - PathRules: []entity.PathRule{ + PathRules: []PathRule{ { IncludeRegexp: `net/.*`, }, }, } - obj := makeRawExtractor([]*entity.Subsystem{ioUring, security, net}) + obj := makeRawExtractor([]*Subsystem{ioUring, security, net}) // Verify path matching. - assert.ElementsMatch(t, obj.FromPath(`io_uring/file.c`), []*entity.Subsystem{ioUring}) - assert.ElementsMatch(t, obj.FromPath(`security/file.c`), []*entity.Subsystem{security}) - assert.ElementsMatch(t, obj.FromPath(`security/selinux/file.c`), []*entity.Subsystem{}) - assert.ElementsMatch(t, obj.FromPath(`net/ipv6/calipso.c`), []*entity.Subsystem{net, security}) + assert.ElementsMatch(t, obj.FromPath(`io_uring/file.c`), []*Subsystem{ioUring}) + assert.ElementsMatch(t, obj.FromPath(`security/file.c`), []*Subsystem{security}) + assert.ElementsMatch(t, obj.FromPath(`security/selinux/file.c`), []*Subsystem{}) + assert.ElementsMatch(t, obj.FromPath(`net/ipv6/calipso.c`), []*Subsystem{net, security}) // Verify prog matching. // nolint: lll @@ -60,10 +59,10 @@ r3 = openat$ptmx(0xffffffffffffff9c, &(0x7f0000000040), 0x8a04, 0x0) syz_io_uring_submit(r1, r2, &(0x7f0000000000)=@IORING_OP_READ=@pass_buffer={0x16, 0x0, 0x0, @fd_index=0x5, 0x0, 0x0}, 0x0) ioctl$TIOCSETD(r3, 0x5423, &(0x7f0000000580)=0x3) io_uring_enter(r0, 0x2ff, 0x0, 0x0, 0x0, 0x0)`)), - []*entity.Subsystem{ioUring}) + []*Subsystem{ioUring}) // nolint: lll assert.ElementsMatch(t, obj.FromProg([]byte( `syz_mount_image$nilfs2(&(0x7f0000000000), &(0x7f0000000100)='./file0\x00', 0x100000, 0x3b, &(0x7f0000000200)=[{&(0x7f0000011240)="02", 0x1}, {&(0x7f0000012a40)="03000000", 0x4, 0x1}], 0x0, &(0x7f00000131c0), 0x1) openat$incfs(0xffffffffffffff9c, &(0x7f0000000000)='.pending_reads\x00', 0x4040, 0x0)`)), - []*entity.Subsystem{}) + []*Subsystem{}) } -- cgit mrf-deployment