From f325deb023e4e2fb9197004be1b3da738680429c Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Wed, 29 Mar 2023 10:49:17 +0200 Subject: pkg/subsystem: recognize more maintainers Earlier we only took maintainers if there was just one MAINTAINERS record, but that was a very severe limitation. Let's try a more elaborate approach. It's also not perfect, but allows us to extract many more maintainers, while keeping false positives at zero. Group raw MAINTAINER records by their T: entries. If there's just one set of T: values per group mailing list, take the intersection of M: entries from there. --- pkg/subsystem/linux/subsystems.go | 74 +++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 18 deletions(-) (limited to 'pkg/subsystem/linux/subsystems.go') diff --git a/pkg/subsystem/linux/subsystems.go b/pkg/subsystem/linux/subsystems.go index b14a3cbe5..880b1fcaf 100644 --- a/pkg/subsystem/linux/subsystems.go +++ b/pkg/subsystem/linux/subsystems.go @@ -134,19 +134,7 @@ func (ctx *linuxCtx) applyExtraRules(list []*subsystem.Subsystem) { } func mergeRawRecords(records []*maintainersRecord, email string) *subsystem.Subsystem { - unique := func(list []string) []string { - m := make(map[string]struct{}) - for _, s := range list { - m[s] = struct{}{} - } - ret := []string{} - for s := range m { - ret = append(ret, s) - } - sort.Strings(ret) - return ret - } - var lists, maintainers []string + var lists []string subsystem := &subsystem.Subsystem{} for _, record := range records { rule := record.ToPathRule() @@ -154,19 +142,69 @@ func mergeRawRecords(records []*maintainersRecord, email string) *subsystem.Subs subsystem.PathRules = append(subsystem.PathRules, rule) } lists = append(lists, record.lists...) - maintainers = append(maintainers, record.maintainers...) } if email != "" { subsystem.Lists = []string{email} } else if len(lists) > 0 { subsystem.Lists = unique(lists) } - // There's a risk that we collect too many unrelated maintainers, so - // let's only merge them if there are no lists. + subsystem.Maintainers = maintainersFromRecords(records) + return subsystem +} + +func unique(list []string) []string { + m := make(map[string]struct{}) + for _, s := range list { + m[s] = struct{}{} + } + ret := []string{} + for s := range m { + ret = append(ret, s) + } + sort.Strings(ret) + return ret +} + +func maintainersFromRecords(records []*maintainersRecord) []string { + // Generally we avoid merging maintainers from too many MAINTAINERS records, + // as we may end up pinging too many unrelated people. + // But in some cases we can still reliably collect the information. if len(records) <= 1 { - subsystem.Maintainers = unique(maintainers) + // First of all, we're fine if there was just on record. + return unique(records[0].maintainers) } - return subsystem + // Also let's take a look at the entries that have tree information. + // They seem to be present only in the most important entries. + perTrees := map[string][][]string{} + for _, record := range records { + if len(record.trees) == 0 { + continue + } + sort.Strings(record.trees) + key := fmt.Sprintf("%v", record.trees) + perTrees[key] = append(perTrees[key], record.maintainers) + } + if len(perTrees) > 1 { + // There are several sets of trees, no way to determine the most important. + return nil + } + var maintainerLists [][]string + for _, value := range perTrees { + maintainerLists = value + } + // Now let's take the intersection of lists. + counts := map[string]int{} + var retList []string + for _, list := range maintainerLists { + list = unique(list) + for _, email := range list { + counts[email]++ + if counts[email] == len(maintainerLists) { + retList = append(retList, email) + } + } + } + return retList } func getMaintainers(root fs.FS) ([]*maintainersRecord, error) { -- cgit mrf-deployment