diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2023-03-29 10:49:17 +0200 |
|---|---|---|
| committer | Aleksandr Nogikh <wp32pw@gmail.com> | 2023-03-29 11:23:53 +0200 |
| commit | f325deb023e4e2fb9197004be1b3da738680429c (patch) | |
| tree | 6b9e23b5f1c0afdeb72e4112d430017e19d113c0 /pkg/subsystem/linux | |
| parent | c01f77498598ff3d147830906c4ca42e3fb1b8ff (diff) | |
pkg/subsystem: recognize more maintainers
Earlier we only took maintainers if there was just one MAINTAINERS
record, but that was a very severe limitation.
Let's try a more elaborate approach. It's also not perfect, but allows
us to extract many more maintainers, while keeping false positives at
zero.
Group raw MAINTAINER records by their T: entries. If there's just one
set of T: values per group mailing list, take the intersection of M:
entries from there.
Diffstat (limited to 'pkg/subsystem/linux')
| -rw-r--r-- | pkg/subsystem/linux/maintainers.go | 3 | ||||
| -rw-r--r-- | pkg/subsystem/linux/maintainers_test.go | 3 | ||||
| -rw-r--r-- | pkg/subsystem/linux/subsystems.go | 74 |
3 files changed, 62 insertions, 18 deletions
diff --git a/pkg/subsystem/linux/maintainers.go b/pkg/subsystem/linux/maintainers.go index 2f4d63003..90ca79bef 100644 --- a/pkg/subsystem/linux/maintainers.go +++ b/pkg/subsystem/linux/maintainers.go @@ -24,6 +24,7 @@ type maintainersRecord struct { regexps []string lists []string maintainers []string + trees []string } func parseLinuxMaintainers(content io.Reader) ([]*maintainersRecord, error) { @@ -128,6 +129,8 @@ func applyProperty(record *maintainersRecord, property *recordProperty) error { return err } record.lists = append(record.lists, value) + case "T": + record.trees = append(record.trees, property.value) } return nil } diff --git a/pkg/subsystem/linux/maintainers_test.go b/pkg/subsystem/linux/maintainers_test.go index 676bbcda1..f69485eae 100644 --- a/pkg/subsystem/linux/maintainers_test.go +++ b/pkg/subsystem/linux/maintainers_test.go @@ -188,6 +188,7 @@ func TestLinuxMaintainers(t *testing.T) { "linux-media@vger.kernel.org", }, maintainers: []string{"email2@kernel.org"}, + trees: []string{"git git://linuxtv.org/media_tree.git"}, }, { name: "RISC-V ARCHITECTURE", @@ -199,12 +200,14 @@ func TestLinuxMaintainers(t *testing.T) { "email4@kernel.org", "email5@kernel.org", }, + trees: []string{"git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git"}, }, { name: "THE REST", includePatterns: []string{"*", "*/"}, lists: []string{"linux-kernel@vger.kernel.org"}, maintainers: []string{"email6@kernel.org"}, + trees: []string{"git git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git"}, }, } if diff := cmp.Diff(targetResult, result, diff --git a/pkg/subsystem/linux/subsystems.go b/pkg/subsystem/linux/subsystems.go index b14a3cbe5..880b1fcaf 100644 --- a/pkg/subsystem/linux/subsystems.go +++ b/pkg/subsystem/linux/subsystems.go @@ -134,19 +134,7 @@ func (ctx *linuxCtx) applyExtraRules(list []*subsystem.Subsystem) { } func mergeRawRecords(records []*maintainersRecord, email string) *subsystem.Subsystem { - unique := func(list []string) []string { - m := make(map[string]struct{}) - for _, s := range list { - m[s] = struct{}{} - } - ret := []string{} - for s := range m { - ret = append(ret, s) - } - sort.Strings(ret) - return ret - } - var lists, maintainers []string + var lists []string subsystem := &subsystem.Subsystem{} for _, record := range records { rule := record.ToPathRule() @@ -154,19 +142,69 @@ func mergeRawRecords(records []*maintainersRecord, email string) *subsystem.Subs subsystem.PathRules = append(subsystem.PathRules, rule) } lists = append(lists, record.lists...) - maintainers = append(maintainers, record.maintainers...) } if email != "" { subsystem.Lists = []string{email} } else if len(lists) > 0 { subsystem.Lists = unique(lists) } - // There's a risk that we collect too many unrelated maintainers, so - // let's only merge them if there are no lists. + subsystem.Maintainers = maintainersFromRecords(records) + return subsystem +} + +func unique(list []string) []string { + m := make(map[string]struct{}) + for _, s := range list { + m[s] = struct{}{} + } + ret := []string{} + for s := range m { + ret = append(ret, s) + } + sort.Strings(ret) + return ret +} + +func maintainersFromRecords(records []*maintainersRecord) []string { + // Generally we avoid merging maintainers from too many MAINTAINERS records, + // as we may end up pinging too many unrelated people. + // But in some cases we can still reliably collect the information. if len(records) <= 1 { - subsystem.Maintainers = unique(maintainers) + // First of all, we're fine if there was just on record. + return unique(records[0].maintainers) } - return subsystem + // Also let's take a look at the entries that have tree information. + // They seem to be present only in the most important entries. + perTrees := map[string][][]string{} + for _, record := range records { + if len(record.trees) == 0 { + continue + } + sort.Strings(record.trees) + key := fmt.Sprintf("%v", record.trees) + perTrees[key] = append(perTrees[key], record.maintainers) + } + if len(perTrees) > 1 { + // There are several sets of trees, no way to determine the most important. + return nil + } + var maintainerLists [][]string + for _, value := range perTrees { + maintainerLists = value + } + // Now let's take the intersection of lists. + counts := map[string]int{} + var retList []string + for _, list := range maintainerLists { + list = unique(list) + for _, email := range list { + counts[email]++ + if counts[email] == len(maintainerLists) { + retList = append(retList, email) + } + } + } + return retList } func getMaintainers(root fs.FS) ([]*maintainersRecord, error) { |
