From 249ceea9b77a9eda26fb696c50b673d6f295d7f4 Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Tue, 17 Dec 2024 15:38:19 +0100 Subject: pkg/email/lore: extract patch series Refactor the code to make it more reusable. Add a method to extract specifically the list of new patch series. --- pkg/email/lore/parse.go | 169 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 163 insertions(+), 6 deletions(-) (limited to 'pkg/email/lore/parse.go') diff --git a/pkg/email/lore/parse.go b/pkg/email/lore/parse.go index d0e4d4fe2..98152b408 100644 --- a/pkg/email/lore/parse.go +++ b/pkg/email/lore/parse.go @@ -4,14 +4,17 @@ package lore import ( + "fmt" "regexp" "sort" + "strconv" "strings" "github.com/google/syzkaller/dashboard/dashapi" "github.com/google/syzkaller/pkg/email" ) +// Thread is a generic representation of a single discussion in the mailing list. type Thread struct { Subject string MessageID string @@ -20,9 +23,28 @@ type Thread struct { Messages []*email.Email } +// Series represents a single patch series sent over email. +type Series struct { + Subject string + MessageID string + Version int + Corrupted string // If non-empty, contains a reason why the series better be ignored. + Patches []Patch +} + +type Patch struct { + Seq int + *email.Email +} + // Threads extracts individual threads from a list of emails. func Threads(emails []*email.Email) []*Thread { + return listThreads(emails, 0) +} + +func listThreads(emails []*email.Email, maxDepth int) []*Thread { ctx := &parseCtx{ + maxDepth: maxDepth, messages: map[string]*email.Email{}, next: map[*email.Email][]*email.Email{}, } @@ -33,6 +55,68 @@ func Threads(emails []*email.Email) []*Thread { return ctx.threads } +// PatchSeries is similar to Threads, but returns only the patch series submitted to the mailing lists. +func PatchSeries(emails []*email.Email) []*Series { + var ret []*Series + // Normally, all following series patches are sent in response to the first email sent. + // So there's no sense to look at deeper replies. + for _, thread := range listThreads(emails, 1) { + if thread.Type != dashapi.DiscussionPatch { + continue + } + patch, ok := parsePatchSubject(thread.Subject) + if !ok { + // It must never be happening. + panic("DiscussionPatch is set, but we fail to parse the thread subject") + } + total := patch.Total.ValueOr(1) + series := &Series{ + Subject: patch.Title, + MessageID: thread.MessageID, + Version: patch.Version.ValueOr(1), + } + ret = append(ret, series) + if patch.Seq.IsSet() && patch.Seq.Value() > 1 { + series.Corrupted = "the first patch has seq>1" + continue + } + hasSeq := map[int]bool{} + for _, email := range thread.Messages { + patch, ok := parsePatchSubject(email.Subject) + if !ok { + continue + } + seq := patch.Seq.ValueOr(1) + if seq == 0 { + // The cover email is not of interest. + continue + } + if hasSeq[seq] { + // It's weird if that really happens, but let's skip for now. + continue + } + hasSeq[seq] = true + series.Patches = append(series.Patches, Patch{ + Seq: seq, + Email: email, + }) + } + if len(hasSeq) != total { + series.Corrupted = fmt.Sprintf("the subject mentions %d patches, %d are found", + total, len(hasSeq)) + continue + } + if len(series.Patches) == 0 { + series.Corrupted = "0 patches" + continue + } + sort.Slice(series.Patches, func(i, j int) bool { + return series.Patches[i].Seq < series.Patches[j].Seq + }) + } + return ret +} + // DiscussionType extracts the specific discussion type from an email. func DiscussionType(msg *email.Email) dashapi.DiscussionType { discType := dashapi.DiscussionMention @@ -40,7 +124,7 @@ func DiscussionType(msg *email.Email) dashapi.DiscussionType { discType = dashapi.DiscussionReport } // This is very crude, but should work for now. - if patchSubjectRe.MatchString(strings.ToLower(msg.Subject)) { + if _, ok := parsePatchSubject(msg.Subject); ok { discType = dashapi.DiscussionPatch } else if strings.Contains(msg.Subject, "Monthly") { discType = dashapi.DiscussionReminder @@ -48,9 +132,51 @@ func DiscussionType(msg *email.Email) dashapi.DiscussionType { return discType } -var patchSubjectRe = regexp.MustCompile(`\[(?:(?:rfc|resend)\s+)*patch`) +type PatchSubject struct { + Title string + Tags []string // Sometimes there's e.g. "net" or "next-next" in the subject. + Version Optional[int] + Seq Optional[int] // The "Seq/Total" part. + Total Optional[int] +} + +// nolint: lll +var patchSubjectRe = regexp.MustCompile(`(?mi)^\[(?:([\w\s-]+)\s)?PATCH(?:\s([\w\s-]+))??(?:\s0*(\d+)\/(\d+))?\]\s*(.+)`) + +func parsePatchSubject(subject string) (PatchSubject, bool) { + var ret PatchSubject + groups := patchSubjectRe.FindStringSubmatch(subject) + if len(groups) == 0 { + return ret, false + } + tags := strings.Fields(groups[1]) + for _, tag := range append(tags, strings.Fields(groups[2])...) { + if strings.HasPrefix(tag, "v") { + val, err := strconv.Atoi(strings.TrimPrefix(tag, "v")) + if err == nil { + ret.Version.Set(val) + continue + } + } + ret.Tags = append(ret.Tags, tag) + } + sort.Strings(ret.Tags) + if groups[3] != "" { + if val, err := strconv.Atoi(groups[3]); err == nil { + ret.Seq.Set(val) + } + } + if groups[4] != "" { + if val, err := strconv.Atoi(groups[4]); err == nil { + ret.Total.Set(val) + } + } + ret.Title = groups[5] + return ret, true +} type parseCtx struct { + maxDepth int threads []*Thread messages map[string]*email.Email next map[*email.Email][]*email.Email @@ -73,7 +199,7 @@ func (c *parseCtx) process() { } // Iterate starting from these tree nodes. for _, node := range nodes { - c.visit(node, nil) + c.visit(node, nil, 0) } // Collect BugIDs. for _, thread := range c.threads { @@ -92,7 +218,7 @@ func (c *parseCtx) process() { } } -func (c *parseCtx) visit(msg *email.Email, thread *Thread) { +func (c *parseCtx) visit(msg *email.Email, thread *Thread, depth int) { var oldInfo *email.OldThreadInfo if thread != nil { oldInfo = &email.OldThreadInfo{ @@ -114,7 +240,38 @@ func (c *parseCtx) visit(msg *email.Email, thread *Thread) { } c.threads = append(c.threads, thread) } - for _, nextMsg := range c.next[msg] { - c.visit(nextMsg, thread) + if c.maxDepth == 0 || depth < c.maxDepth { + for _, nextMsg := range c.next[msg] { + c.visit(nextMsg, thread, depth+1) + } } } + +type Optional[T any] struct { + val T + set bool +} + +func value[T any](val T) Optional[T] { + return Optional[T]{val: val, set: true} +} + +func (o Optional[T]) IsSet() bool { + return o.set +} + +func (o Optional[T]) Value() T { + return o.val +} + +func (o Optional[T]) ValueOr(def T) T { + if o.set { + return o.val + } + return def +} + +func (o *Optional[T]) Set(val T) { + o.val = val + o.set = true +} -- cgit mrf-deployment