diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2023-04-12 11:17:35 +0200 |
|---|---|---|
| committer | Aleksandr Nogikh <wp32pw@gmail.com> | 2023-04-12 13:55:59 +0200 |
| commit | 5d17667bb6ecde104eff665d3c096ac2b7984648 (patch) | |
| tree | 6915c7b58d1d19ab402d78f8fec6878d51cf074e /pkg/email/lore | |
| parent | 02abcd096cbcce690366f853c8bc065d22aa205c (diff) | |
all: refactor discussion processing code
Now that too much logic seems to be duplicated in tools/syz-lore and
dahsboard/app/discussion.go, it's time to refactor the code.
Factor out the code that decides whether an incoming email message
should start a new thread or be appended to the previous one.
Restructure pkg/email/lore so that email processing matches the
one-by-one approach of reporting_email.go.
Diffstat (limited to 'pkg/email/lore')
| -rw-r--r-- | pkg/email/lore/parse.go | 125 | ||||
| -rw-r--r-- | pkg/email/lore/parse_test.go | 37 |
2 files changed, 103 insertions, 59 deletions
diff --git a/pkg/email/lore/parse.go b/pkg/email/lore/parse.go index 84bb56352..d2dc9c04b 100644 --- a/pkg/email/lore/parse.go +++ b/pkg/email/lore/parse.go @@ -4,12 +4,17 @@ package lore import ( + "sort" + "strings" + + "github.com/google/syzkaller/dashboard/dashapi" "github.com/google/syzkaller/pkg/email" ) type Thread struct { Subject string MessageID string + Type dashapi.DiscussionType BugIDs []string Messages []*email.Email } @@ -18,90 +23,92 @@ type Thread struct { func Threads(emails []*email.Email) []*Thread { ctx := &parseCtx{ messages: map[string]*email.Email{}, + next: map[*email.Email][]*email.Email{}, } for _, email := range emails { ctx.record(email) } - return ctx.threads() + ctx.process() + return ctx.threads +} + +// DiscussionType extracts the specific discussion type from an email. +func DiscussionType(msg *email.Email) dashapi.DiscussionType { + discType := dashapi.DiscussionReport + // This is very crude, but should work for now. + if strings.Contains(msg.Subject, "PATCH") { + discType = dashapi.DiscussionPatch + } else if strings.Contains(msg.Subject, "Monthly") { + discType = dashapi.DiscussionReminder + } + return discType } type parseCtx struct { + threads []*Thread messages map[string]*email.Email + next map[*email.Email][]*email.Email } func (c *parseCtx) record(msg *email.Email) { c.messages[msg.MessageID] = msg } -func (c *parseCtx) threads() []*Thread { - threads := map[string]*Thread{} - threadsList := []*Thread{} - - newThread := func(msg *email.Email) { - thread := &Thread{ - MessageID: msg.MessageID, - Subject: msg.Subject, - } - threads[msg.MessageID] = thread - threadsList = append(threadsList, thread) - } - - // Detect threads, i.e. messages without In-Reply-To. +func (c *parseCtx) process() { + // List messages for which we dont't have ancestors. + nodes := []*email.Email{} for _, msg := range c.messages { - if msg.InReplyTo == "" { - newThread(msg) + if msg.InReplyTo == "" || c.messages[msg.InReplyTo] == nil { + nodes = append(nodes, msg) + } else { + parent := c.messages[msg.InReplyTo] + c.next[parent] = append(c.next[parent], msg) } } - // Assign messages to threads. - for _, msg := range c.messages { - base := c.first(msg) - if base == nil { - if msg.OwnEmail { - // Likely bot's reply to a non-public command. Ignore. - continue - } - // Pretend it's a separate discussion. - newThread(msg) - base = msg - } - thread := threads[base.MessageID] - thread.BugIDs = append(thread.BugIDs, msg.BugIDs...) - thread.Messages = append(threads[base.MessageID].Messages, msg) + // Iterate starting from these tree nodes. + for _, node := range nodes { + c.visit(node, nil) } - // Deduplicate BugIDs lists. - for _, thread := range threads { - if len(thread.BugIDs) == 0 { - continue - } + // Collect BugIDs. + for _, thread := range c.threads { unique := map[string]struct{}{} - newList := []string{} - for _, id := range thread.BugIDs { - if _, ok := unique[id]; !ok { - newList = append(newList, id) + for _, msg := range thread.Messages { + for _, id := range msg.BugIDs { + unique[id] = struct{}{} } - unique[id] = struct{}{} } - thread.BugIDs = newList + var ids []string + for id := range unique { + ids = append(ids, id) + } + sort.Strings(ids) + thread.BugIDs = ids } - return threadsList } -// first finds the firt message of an email thread. -func (c *parseCtx) first(msg *email.Email) *email.Email { - visited := map[*email.Email]struct{}{} - for { - // There have been a few cases when we'd otherwise get an infinite loop. - if _, ok := visited[msg]; ok { - return nil +func (c *parseCtx) visit(msg *email.Email, thread *Thread) { + var oldInfo *email.OldThreadInfo + if thread != nil { + oldInfo = &email.OldThreadInfo{ + ThreadType: thread.Type, } - visited[msg] = struct{}{} - if msg.InReplyTo == "" { - return msg - } - msg = c.messages[msg.InReplyTo] - if msg == nil { - // Probably we just didn't load the message. - return nil + } + msgType := DiscussionType(msg) + switch email.NewMessageAction(msg, msgType, oldInfo) { + case email.ActionIgnore: + thread = nil + case email.ActionAppend: + thread.Messages = append(thread.Messages, msg) + case email.ActionNewThread: + thread = &Thread{ + MessageID: msg.MessageID, + Subject: msg.Subject, + Type: msgType, + Messages: []*email.Email{msg}, } + c.threads = append(c.threads, thread) + } + for _, nextMsg := range c.next[msg] { + c.visit(nextMsg, thread) } } diff --git a/pkg/email/lore/parse_test.go b/pkg/email/lore/parse_test.go index 86e2b51b7..2343a7c26 100644 --- a/pkg/email/lore/parse_test.go +++ b/pkg/email/lore/parse_test.go @@ -10,6 +10,7 @@ import ( "time" "github.com/google/go-cmp/cmp" + "github.com/google/syzkaller/dashboard/dashapi" "github.com/google/syzkaller/pkg/email" ) @@ -112,6 +113,7 @@ Bug report`, "<A-Base>": { Subject: "Thread A", MessageID: "<A-Base>", + Type: dashapi.DiscussionReport, Messages: []*email.Email{ { MessageID: "<A-Base>", @@ -144,6 +146,7 @@ Bug report`, "<Bug>": { Subject: "[syzbot] Some bug", MessageID: "<Bug>", + Type: dashapi.DiscussionReport, BugIDs: []string{"4564456"}, Messages: []*email.Email{ { @@ -180,6 +183,7 @@ Bug report`, "<Patch>": { Subject: "[PATCH] Some bug fixed", MessageID: "<Patch>", + Type: dashapi.DiscussionPatch, BugIDs: []string{"12345"}, Messages: []*email.Email{ { @@ -196,6 +200,7 @@ Bug report`, "<Sub-Discussion>": { Subject: "[syzbot] Some bug 2", MessageID: "<Sub-Discussion>", + Type: dashapi.DiscussionReport, BugIDs: []string{"4564456"}, Messages: []*email.Email{ { @@ -244,3 +249,35 @@ Bug report`, t.Fatalf("Expected %d threads, got %d", len(expected), len(threads)) } } + +func TestDiscussionType(t *testing.T) { + tests := []struct { + msg *email.Email + ret dashapi.DiscussionType + }{ + { + msg: &email.Email{ + Subject: "[PATCH] Bla-bla", + }, + ret: dashapi.DiscussionPatch, + }, + { + msg: &email.Email{ + Subject: "[syzbot] Monthly ext4 report", + }, + ret: dashapi.DiscussionReminder, + }, + { + msg: &email.Email{ + Subject: "[syzbot] WARNING in abcd", + }, + ret: dashapi.DiscussionReport, + }, + } + for _, test := range tests { + got := DiscussionType(test.msg) + if got != test.ret { + t.Fatalf("expected %v got %v for %v", test.ret, got, test.msg) + } + } +} |
