aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/subsystem/linux
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2023-03-29 10:49:17 +0200
committerAleksandr Nogikh <wp32pw@gmail.com>2023-03-29 11:23:53 +0200
commitf325deb023e4e2fb9197004be1b3da738680429c (patch)
tree6b9e23b5f1c0afdeb72e4112d430017e19d113c0 /pkg/subsystem/linux
parentc01f77498598ff3d147830906c4ca42e3fb1b8ff (diff)
pkg/subsystem: recognize more maintainers
Earlier we only took maintainers if there was just one MAINTAINERS record, but that was a very severe limitation. Let's try a more elaborate approach. It's also not perfect, but allows us to extract many more maintainers, while keeping false positives at zero. Group raw MAINTAINER records by their T: entries. If there's just one set of T: values per group mailing list, take the intersection of M: entries from there.
Diffstat (limited to 'pkg/subsystem/linux')
-rw-r--r--pkg/subsystem/linux/maintainers.go3
-rw-r--r--pkg/subsystem/linux/maintainers_test.go3
-rw-r--r--pkg/subsystem/linux/subsystems.go74
3 files changed, 62 insertions, 18 deletions
diff --git a/pkg/subsystem/linux/maintainers.go b/pkg/subsystem/linux/maintainers.go
index 2f4d63003..90ca79bef 100644
--- a/pkg/subsystem/linux/maintainers.go
+++ b/pkg/subsystem/linux/maintainers.go
@@ -24,6 +24,7 @@ type maintainersRecord struct {
regexps []string
lists []string
maintainers []string
+ trees []string
}
func parseLinuxMaintainers(content io.Reader) ([]*maintainersRecord, error) {
@@ -128,6 +129,8 @@ func applyProperty(record *maintainersRecord, property *recordProperty) error {
return err
}
record.lists = append(record.lists, value)
+ case "T":
+ record.trees = append(record.trees, property.value)
}
return nil
}
diff --git a/pkg/subsystem/linux/maintainers_test.go b/pkg/subsystem/linux/maintainers_test.go
index 676bbcda1..f69485eae 100644
--- a/pkg/subsystem/linux/maintainers_test.go
+++ b/pkg/subsystem/linux/maintainers_test.go
@@ -188,6 +188,7 @@ func TestLinuxMaintainers(t *testing.T) {
"linux-media@vger.kernel.org",
},
maintainers: []string{"email2@kernel.org"},
+ trees: []string{"git git://linuxtv.org/media_tree.git"},
},
{
name: "RISC-V ARCHITECTURE",
@@ -199,12 +200,14 @@ func TestLinuxMaintainers(t *testing.T) {
"email4@kernel.org",
"email5@kernel.org",
},
+ trees: []string{"git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git"},
},
{
name: "THE REST",
includePatterns: []string{"*", "*/"},
lists: []string{"linux-kernel@vger.kernel.org"},
maintainers: []string{"email6@kernel.org"},
+ trees: []string{"git git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git"},
},
}
if diff := cmp.Diff(targetResult, result,
diff --git a/pkg/subsystem/linux/subsystems.go b/pkg/subsystem/linux/subsystems.go
index b14a3cbe5..880b1fcaf 100644
--- a/pkg/subsystem/linux/subsystems.go
+++ b/pkg/subsystem/linux/subsystems.go
@@ -134,19 +134,7 @@ func (ctx *linuxCtx) applyExtraRules(list []*subsystem.Subsystem) {
}
func mergeRawRecords(records []*maintainersRecord, email string) *subsystem.Subsystem {
- unique := func(list []string) []string {
- m := make(map[string]struct{})
- for _, s := range list {
- m[s] = struct{}{}
- }
- ret := []string{}
- for s := range m {
- ret = append(ret, s)
- }
- sort.Strings(ret)
- return ret
- }
- var lists, maintainers []string
+ var lists []string
subsystem := &subsystem.Subsystem{}
for _, record := range records {
rule := record.ToPathRule()
@@ -154,19 +142,69 @@ func mergeRawRecords(records []*maintainersRecord, email string) *subsystem.Subs
subsystem.PathRules = append(subsystem.PathRules, rule)
}
lists = append(lists, record.lists...)
- maintainers = append(maintainers, record.maintainers...)
}
if email != "" {
subsystem.Lists = []string{email}
} else if len(lists) > 0 {
subsystem.Lists = unique(lists)
}
- // There's a risk that we collect too many unrelated maintainers, so
- // let's only merge them if there are no lists.
+ subsystem.Maintainers = maintainersFromRecords(records)
+ return subsystem
+}
+
+func unique(list []string) []string {
+ m := make(map[string]struct{})
+ for _, s := range list {
+ m[s] = struct{}{}
+ }
+ ret := []string{}
+ for s := range m {
+ ret = append(ret, s)
+ }
+ sort.Strings(ret)
+ return ret
+}
+
+func maintainersFromRecords(records []*maintainersRecord) []string {
+ // Generally we avoid merging maintainers from too many MAINTAINERS records,
+ // as we may end up pinging too many unrelated people.
+ // But in some cases we can still reliably collect the information.
if len(records) <= 1 {
- subsystem.Maintainers = unique(maintainers)
+ // First of all, we're fine if there was just on record.
+ return unique(records[0].maintainers)
}
- return subsystem
+ // Also let's take a look at the entries that have tree information.
+ // They seem to be present only in the most important entries.
+ perTrees := map[string][][]string{}
+ for _, record := range records {
+ if len(record.trees) == 0 {
+ continue
+ }
+ sort.Strings(record.trees)
+ key := fmt.Sprintf("%v", record.trees)
+ perTrees[key] = append(perTrees[key], record.maintainers)
+ }
+ if len(perTrees) > 1 {
+ // There are several sets of trees, no way to determine the most important.
+ return nil
+ }
+ var maintainerLists [][]string
+ for _, value := range perTrees {
+ maintainerLists = value
+ }
+ // Now let's take the intersection of lists.
+ counts := map[string]int{}
+ var retList []string
+ for _, list := range maintainerLists {
+ list = unique(list)
+ for _, email := range list {
+ counts[email]++
+ if counts[email] == len(maintainerLists) {
+ retList = append(retList, email)
+ }
+ }
+ }
+ return retList
}
func getMaintainers(root fs.FS) ([]*maintainersRecord, error) {