aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/email/lore/parse.go
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2024-12-17 15:38:19 +0100
committerAleksandr Nogikh <nogikh@google.com>2025-01-13 08:44:57 +0000
commit249ceea9b77a9eda26fb696c50b673d6f295d7f4 (patch)
treec2236298dc0b9b07f5450c63b5bad0f3a35f19f3 /pkg/email/lore/parse.go
parent6dbc6a9bc76e06852841ed5c5bdbb78409b17f53 (diff)
pkg/email/lore: extract patch series
Refactor the code to make it more reusable. Add a method to extract specifically the list of new patch series.
Diffstat (limited to 'pkg/email/lore/parse.go')
-rw-r--r--pkg/email/lore/parse.go169
1 files changed, 163 insertions, 6 deletions
diff --git a/pkg/email/lore/parse.go b/pkg/email/lore/parse.go
index d0e4d4fe2..98152b408 100644
--- a/pkg/email/lore/parse.go
+++ b/pkg/email/lore/parse.go
@@ -4,14 +4,17 @@
package lore
import (
+ "fmt"
"regexp"
"sort"
+ "strconv"
"strings"
"github.com/google/syzkaller/dashboard/dashapi"
"github.com/google/syzkaller/pkg/email"
)
+// Thread is a generic representation of a single discussion in the mailing list.
type Thread struct {
Subject string
MessageID string
@@ -20,9 +23,28 @@ type Thread struct {
Messages []*email.Email
}
+// Series represents a single patch series sent over email.
+type Series struct {
+ Subject string
+ MessageID string
+ Version int
+ Corrupted string // If non-empty, contains a reason why the series better be ignored.
+ Patches []Patch
+}
+
+type Patch struct {
+ Seq int
+ *email.Email
+}
+
// Threads extracts individual threads from a list of emails.
func Threads(emails []*email.Email) []*Thread {
+ return listThreads(emails, 0)
+}
+
+func listThreads(emails []*email.Email, maxDepth int) []*Thread {
ctx := &parseCtx{
+ maxDepth: maxDepth,
messages: map[string]*email.Email{},
next: map[*email.Email][]*email.Email{},
}
@@ -33,6 +55,68 @@ func Threads(emails []*email.Email) []*Thread {
return ctx.threads
}
+// PatchSeries is similar to Threads, but returns only the patch series submitted to the mailing lists.
+func PatchSeries(emails []*email.Email) []*Series {
+ var ret []*Series
+ // Normally, all following series patches are sent in response to the first email sent.
+ // So there's no sense to look at deeper replies.
+ for _, thread := range listThreads(emails, 1) {
+ if thread.Type != dashapi.DiscussionPatch {
+ continue
+ }
+ patch, ok := parsePatchSubject(thread.Subject)
+ if !ok {
+ // It must never be happening.
+ panic("DiscussionPatch is set, but we fail to parse the thread subject")
+ }
+ total := patch.Total.ValueOr(1)
+ series := &Series{
+ Subject: patch.Title,
+ MessageID: thread.MessageID,
+ Version: patch.Version.ValueOr(1),
+ }
+ ret = append(ret, series)
+ if patch.Seq.IsSet() && patch.Seq.Value() > 1 {
+ series.Corrupted = "the first patch has seq>1"
+ continue
+ }
+ hasSeq := map[int]bool{}
+ for _, email := range thread.Messages {
+ patch, ok := parsePatchSubject(email.Subject)
+ if !ok {
+ continue
+ }
+ seq := patch.Seq.ValueOr(1)
+ if seq == 0 {
+ // The cover email is not of interest.
+ continue
+ }
+ if hasSeq[seq] {
+ // It's weird if that really happens, but let's skip for now.
+ continue
+ }
+ hasSeq[seq] = true
+ series.Patches = append(series.Patches, Patch{
+ Seq: seq,
+ Email: email,
+ })
+ }
+ if len(hasSeq) != total {
+ series.Corrupted = fmt.Sprintf("the subject mentions %d patches, %d are found",
+ total, len(hasSeq))
+ continue
+ }
+ if len(series.Patches) == 0 {
+ series.Corrupted = "0 patches"
+ continue
+ }
+ sort.Slice(series.Patches, func(i, j int) bool {
+ return series.Patches[i].Seq < series.Patches[j].Seq
+ })
+ }
+ return ret
+}
+
// DiscussionType extracts the specific discussion type from an email.
func DiscussionType(msg *email.Email) dashapi.DiscussionType {
discType := dashapi.DiscussionMention
@@ -40,7 +124,7 @@ func DiscussionType(msg *email.Email) dashapi.DiscussionType {
discType = dashapi.DiscussionReport
}
// This is very crude, but should work for now.
- if patchSubjectRe.MatchString(strings.ToLower(msg.Subject)) {
+ if _, ok := parsePatchSubject(msg.Subject); ok {
discType = dashapi.DiscussionPatch
} else if strings.Contains(msg.Subject, "Monthly") {
discType = dashapi.DiscussionReminder
@@ -48,9 +132,51 @@ func DiscussionType(msg *email.Email) dashapi.DiscussionType {
return discType
}
-var patchSubjectRe = regexp.MustCompile(`\[(?:(?:rfc|resend)\s+)*patch`)
+type PatchSubject struct {
+ Title string
+ Tags []string // Sometimes there's e.g. "net" or "next-next" in the subject.
+ Version Optional[int]
+ Seq Optional[int] // The "Seq/Total" part.
+ Total Optional[int]
+}
+
+// nolint: lll
+var patchSubjectRe = regexp.MustCompile(`(?mi)^\[(?:([\w\s-]+)\s)?PATCH(?:\s([\w\s-]+))??(?:\s0*(\d+)\/(\d+))?\]\s*(.+)`)
+
+func parsePatchSubject(subject string) (PatchSubject, bool) {
+ var ret PatchSubject
+ groups := patchSubjectRe.FindStringSubmatch(subject)
+ if len(groups) == 0 {
+ return ret, false
+ }
+ tags := strings.Fields(groups[1])
+ for _, tag := range append(tags, strings.Fields(groups[2])...) {
+ if strings.HasPrefix(tag, "v") {
+ val, err := strconv.Atoi(strings.TrimPrefix(tag, "v"))
+ if err == nil {
+ ret.Version.Set(val)
+ continue
+ }
+ }
+ ret.Tags = append(ret.Tags, tag)
+ }
+ sort.Strings(ret.Tags)
+ if groups[3] != "" {
+ if val, err := strconv.Atoi(groups[3]); err == nil {
+ ret.Seq.Set(val)
+ }
+ }
+ if groups[4] != "" {
+ if val, err := strconv.Atoi(groups[4]); err == nil {
+ ret.Total.Set(val)
+ }
+ }
+ ret.Title = groups[5]
+ return ret, true
+}
type parseCtx struct {
+ maxDepth int
threads []*Thread
messages map[string]*email.Email
next map[*email.Email][]*email.Email
@@ -73,7 +199,7 @@ func (c *parseCtx) process() {
}
// Iterate starting from these tree nodes.
for _, node := range nodes {
- c.visit(node, nil)
+ c.visit(node, nil, 0)
}
// Collect BugIDs.
for _, thread := range c.threads {
@@ -92,7 +218,7 @@ func (c *parseCtx) process() {
}
}
-func (c *parseCtx) visit(msg *email.Email, thread *Thread) {
+func (c *parseCtx) visit(msg *email.Email, thread *Thread, depth int) {
var oldInfo *email.OldThreadInfo
if thread != nil {
oldInfo = &email.OldThreadInfo{
@@ -114,7 +240,38 @@ func (c *parseCtx) visit(msg *email.Email, thread *Thread) {
}
c.threads = append(c.threads, thread)
}
- for _, nextMsg := range c.next[msg] {
- c.visit(nextMsg, thread)
+ if c.maxDepth == 0 || depth < c.maxDepth {
+ for _, nextMsg := range c.next[msg] {
+ c.visit(nextMsg, thread, depth+1)
+ }
}
}
+
+type Optional[T any] struct {
+ val T
+ set bool
+}
+
+func value[T any](val T) Optional[T] {
+ return Optional[T]{val: val, set: true}
+}
+
+func (o Optional[T]) IsSet() bool {
+ return o.set
+}
+
+func (o Optional[T]) Value() T {
+ return o.val
+}
+
+func (o Optional[T]) ValueOr(def T) T {
+ if o.set {
+ return o.val
+ }
+ return def
+}
+
+func (o *Optional[T]) Set(val T) {
+ o.val = val
+ o.set = true
+}