diff options
| -rw-r--r-- | dashboard/app/discussion.go | 62 | ||||
| -rw-r--r-- | dashboard/app/reporting_email.go | 18 | ||||
| -rw-r--r-- | pkg/email/action.go | 38 | ||||
| -rw-r--r-- | pkg/email/action_test.go | 78 | ||||
| -rw-r--r-- | pkg/email/lore/parse.go | 125 | ||||
| -rw-r--r-- | pkg/email/lore/parse_test.go | 37 | ||||
| -rw-r--r-- | tools/syz-lore/query_lkml.go | 12 |
7 files changed, 254 insertions, 116 deletions
diff --git a/dashboard/app/discussion.go b/dashboard/app/discussion.go index b586d151d..04f0370b8 100644 --- a/dashboard/app/discussion.go +++ b/dashboard/app/discussion.go @@ -7,55 +7,47 @@ import ( "fmt" "sort" "strings" - "time" "github.com/google/syzkaller/dashboard/dashapi" + "github.com/google/syzkaller/pkg/email" "golang.org/x/net/context" db "google.golang.org/appengine/v2/datastore" ) -type newDiscussionMessage struct { - id string - subject string - msgSource dashapi.DiscussionSource - msgType dashapi.DiscussionType - bugIDs []string - inReplyTo string - external bool - time time.Time -} - // saveDiscussionMessage is meant to be called after each received E-mail message, // for which we know the BugID. -func saveDiscussionMessage(c context.Context, msg *newDiscussionMessage) error { +func saveDiscussionMessage(c context.Context, msg *email.Email, + msgSource dashapi.DiscussionSource, msgType dashapi.DiscussionType) error { discUpdate := &dashapi.Discussion{ - Source: msg.msgSource, - Type: msg.msgType, - BugIDs: msg.bugIDs, - } - if msg.inReplyTo != "" { - d, err := discussionByMessageID(c, msg.msgSource, msg.inReplyTo) - if err == nil { - discUpdate.ID = d.ID - discUpdate.Type = dashapi.DiscussionType(d.Type) - } else if !msg.external { - // Most likely it's a public bot's reply to a non-public - // patch testing request. Ignore it. - return nil + Source: msgSource, + Type: msgType, + BugIDs: msg.BugIDs, + } + var parent *Discussion + var oldThreadInfo *email.OldThreadInfo + if msg.InReplyTo != "" { + parent, _ = discussionByMessageID(c, msgSource, msg.InReplyTo) + if parent != nil { + oldThreadInfo = &email.OldThreadInfo{ + ThreadType: dashapi.DiscussionType(parent.Type), + } } - // If the original discussion is not in the DB, it means we - // were likely only mentioned in some further discussion. - // Remember then only the sub-thread visible to us. } - if discUpdate.ID == "" { + switch email.NewMessageAction(msg, msgType, oldThreadInfo) { + case email.ActionIgnore: + return nil + case email.ActionAppend: + discUpdate.ID = parent.ID + discUpdate.Type = oldThreadInfo.ThreadType + case email.ActionNewThread: // Use the current message as the discussion's head. - discUpdate.ID = msg.id - discUpdate.Subject = msg.subject + discUpdate.ID = msg.MessageID + discUpdate.Subject = msg.Subject } discUpdate.Messages = append(discUpdate.Messages, dashapi.DiscussionMessage{ - ID: msg.id, - Time: msg.time, - External: msg.external, + ID: msg.MessageID, + Time: msg.Date, + External: !msg.OwnEmail, }) return mergeDiscussion(c, discUpdate) } diff --git a/dashboard/app/reporting_email.go b/dashboard/app/reporting_email.go index 56ea74bcb..5982cb526 100644 --- a/dashboard/app/reporting_email.go +++ b/dashboard/app/reporting_email.go @@ -20,6 +20,7 @@ import ( "github.com/google/syzkaller/dashboard/dashapi" "github.com/google/syzkaller/pkg/email" + "github.com/google/syzkaller/pkg/email/lore" "github.com/google/syzkaller/pkg/html" "github.com/google/syzkaller/pkg/subsystem" "golang.org/x/net/context" @@ -554,10 +555,9 @@ func processDiscussionEmail(c context.Context, msg *email.Email, source dashapi. msg.BugIDs = msg.BugIDs[:limitIDs] } log.Infof(c, "saving to discussions for %q", msg.BugIDs) - // This is very crude, but should work for now. dType := dashapi.DiscussionReport - if strings.Contains(msg.Subject, "PATCH") { - dType = dashapi.DiscussionPatch + if source == dashapi.DiscussionLore { + dType = lore.DiscussionType(msg) } extIDs := []string{} for _, id := range msg.BugIDs { @@ -570,20 +570,12 @@ func processDiscussionEmail(c context.Context, msg *email.Email, source dashapi. extIDs = append(extIDs, id) } } + msg.BugIDs = extIDs if len(extIDs) == 0 { log.Infof(c, "filtered all extIDs out") return nil } - err := saveDiscussionMessage(c, &newDiscussionMessage{ - id: msg.MessageID, - subject: msg.Subject, - msgSource: source, - msgType: dType, - bugIDs: extIDs, - inReplyTo: msg.InReplyTo, - external: !msg.OwnEmail, - time: msg.Date, - }) + err := saveDiscussionMessage(c, msg, source, dType) if err != nil { return fmt.Errorf("failed to save in discussions: %v", err) } diff --git a/pkg/email/action.go b/pkg/email/action.go new file mode 100644 index 000000000..1d9fb22ff --- /dev/null +++ b/pkg/email/action.go @@ -0,0 +1,38 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package email + +import "github.com/google/syzkaller/dashboard/dashapi" + +type OldThreadInfo struct { + ThreadType dashapi.DiscussionType +} + +type MessageAction string + +const ( + ActionIgnore MessageAction = "ignore" + ActionAppend MessageAction = "append" + ActionNewThread MessageAction = "new-thread" +) + +func NewMessageAction(msg *Email, msgType dashapi.DiscussionType, oldThread *OldThreadInfo) MessageAction { + if msg.InReplyTo == "" { + // If it's not a reply, always start a new thread. + return ActionNewThread + } + if oldThread != nil { + // Otherwise just append the message. + return ActionAppend + } + if msg.OwnEmail { + // Most likely it's a bot's public reply to a non-public + // patch testing request. Ignore it. + return ActionIgnore + } + // If the original discussion is not recorded anywhere, it means + // we were likely only mentioned in some further discussion. + // Remember then only the sub-thread visible to us. + return ActionNewThread +} diff --git a/pkg/email/action_test.go b/pkg/email/action_test.go new file mode 100644 index 000000000..51b0505f8 --- /dev/null +++ b/pkg/email/action_test.go @@ -0,0 +1,78 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package email + +import ( + "testing" + + "github.com/google/syzkaller/dashboard/dashapi" +) + +func TestMessageActions(t *testing.T) { + tests := []struct { + name string + msg *Email + msgType dashapi.DiscussionType + oldThread *OldThreadInfo + result MessageAction + }{ + { + name: "plain new thread", + msg: &Email{}, + msgType: dashapi.DiscussionReport, + oldThread: nil, + result: ActionNewThread, + }, + { + name: "plain reply to a report", + msg: &Email{ + InReplyTo: "<abcd>", + }, + msgType: dashapi.DiscussionReport, + oldThread: &OldThreadInfo{ + ThreadType: dashapi.DiscussionReport, + }, + result: ActionAppend, + }, + { + name: "plain reply to a patch", + msg: &Email{ + InReplyTo: "<abcd>", + }, + msgType: dashapi.DiscussionReport, + oldThread: &OldThreadInfo{ + ThreadType: dashapi.DiscussionPatch, + }, + result: ActionAppend, + }, + { + name: "sudden syzbot reply", + msg: &Email{ + OwnEmail: true, + InReplyTo: "<abcd>", + }, + msgType: dashapi.DiscussionReport, + oldThread: nil, + result: ActionIgnore, + }, + { + name: "legit subdiscussion", + msg: &Email{ + InReplyTo: "<abcd>", + }, + msgType: dashapi.DiscussionReport, + oldThread: nil, + result: ActionNewThread, + }, + } + for _, _test := range tests { + test := _test + t.Run(test.name, func(tt *testing.T) { + got := NewMessageAction(test.msg, test.msgType, test.oldThread) + if got != test.result { + t.Fatalf("wanted %v, got %v", test.result, got) + } + }) + } +} diff --git a/pkg/email/lore/parse.go b/pkg/email/lore/parse.go index 84bb56352..d2dc9c04b 100644 --- a/pkg/email/lore/parse.go +++ b/pkg/email/lore/parse.go @@ -4,12 +4,17 @@ package lore import ( + "sort" + "strings" + + "github.com/google/syzkaller/dashboard/dashapi" "github.com/google/syzkaller/pkg/email" ) type Thread struct { Subject string MessageID string + Type dashapi.DiscussionType BugIDs []string Messages []*email.Email } @@ -18,90 +23,92 @@ type Thread struct { func Threads(emails []*email.Email) []*Thread { ctx := &parseCtx{ messages: map[string]*email.Email{}, + next: map[*email.Email][]*email.Email{}, } for _, email := range emails { ctx.record(email) } - return ctx.threads() + ctx.process() + return ctx.threads +} + +// DiscussionType extracts the specific discussion type from an email. +func DiscussionType(msg *email.Email) dashapi.DiscussionType { + discType := dashapi.DiscussionReport + // This is very crude, but should work for now. + if strings.Contains(msg.Subject, "PATCH") { + discType = dashapi.DiscussionPatch + } else if strings.Contains(msg.Subject, "Monthly") { + discType = dashapi.DiscussionReminder + } + return discType } type parseCtx struct { + threads []*Thread messages map[string]*email.Email + next map[*email.Email][]*email.Email } func (c *parseCtx) record(msg *email.Email) { c.messages[msg.MessageID] = msg } -func (c *parseCtx) threads() []*Thread { - threads := map[string]*Thread{} - threadsList := []*Thread{} - - newThread := func(msg *email.Email) { - thread := &Thread{ - MessageID: msg.MessageID, - Subject: msg.Subject, - } - threads[msg.MessageID] = thread - threadsList = append(threadsList, thread) - } - - // Detect threads, i.e. messages without In-Reply-To. +func (c *parseCtx) process() { + // List messages for which we dont't have ancestors. + nodes := []*email.Email{} for _, msg := range c.messages { - if msg.InReplyTo == "" { - newThread(msg) + if msg.InReplyTo == "" || c.messages[msg.InReplyTo] == nil { + nodes = append(nodes, msg) + } else { + parent := c.messages[msg.InReplyTo] + c.next[parent] = append(c.next[parent], msg) } } - // Assign messages to threads. - for _, msg := range c.messages { - base := c.first(msg) - if base == nil { - if msg.OwnEmail { - // Likely bot's reply to a non-public command. Ignore. - continue - } - // Pretend it's a separate discussion. - newThread(msg) - base = msg - } - thread := threads[base.MessageID] - thread.BugIDs = append(thread.BugIDs, msg.BugIDs...) - thread.Messages = append(threads[base.MessageID].Messages, msg) + // Iterate starting from these tree nodes. + for _, node := range nodes { + c.visit(node, nil) } - // Deduplicate BugIDs lists. - for _, thread := range threads { - if len(thread.BugIDs) == 0 { - continue - } + // Collect BugIDs. + for _, thread := range c.threads { unique := map[string]struct{}{} - newList := []string{} - for _, id := range thread.BugIDs { - if _, ok := unique[id]; !ok { - newList = append(newList, id) + for _, msg := range thread.Messages { + for _, id := range msg.BugIDs { + unique[id] = struct{}{} } - unique[id] = struct{}{} } - thread.BugIDs = newList + var ids []string + for id := range unique { + ids = append(ids, id) + } + sort.Strings(ids) + thread.BugIDs = ids } - return threadsList } -// first finds the firt message of an email thread. -func (c *parseCtx) first(msg *email.Email) *email.Email { - visited := map[*email.Email]struct{}{} - for { - // There have been a few cases when we'd otherwise get an infinite loop. - if _, ok := visited[msg]; ok { - return nil +func (c *parseCtx) visit(msg *email.Email, thread *Thread) { + var oldInfo *email.OldThreadInfo + if thread != nil { + oldInfo = &email.OldThreadInfo{ + ThreadType: thread.Type, } - visited[msg] = struct{}{} - if msg.InReplyTo == "" { - return msg - } - msg = c.messages[msg.InReplyTo] - if msg == nil { - // Probably we just didn't load the message. - return nil + } + msgType := DiscussionType(msg) + switch email.NewMessageAction(msg, msgType, oldInfo) { + case email.ActionIgnore: + thread = nil + case email.ActionAppend: + thread.Messages = append(thread.Messages, msg) + case email.ActionNewThread: + thread = &Thread{ + MessageID: msg.MessageID, + Subject: msg.Subject, + Type: msgType, + Messages: []*email.Email{msg}, } + c.threads = append(c.threads, thread) + } + for _, nextMsg := range c.next[msg] { + c.visit(nextMsg, thread) } } diff --git a/pkg/email/lore/parse_test.go b/pkg/email/lore/parse_test.go index 86e2b51b7..2343a7c26 100644 --- a/pkg/email/lore/parse_test.go +++ b/pkg/email/lore/parse_test.go @@ -10,6 +10,7 @@ import ( "time" "github.com/google/go-cmp/cmp" + "github.com/google/syzkaller/dashboard/dashapi" "github.com/google/syzkaller/pkg/email" ) @@ -112,6 +113,7 @@ Bug report`, "<A-Base>": { Subject: "Thread A", MessageID: "<A-Base>", + Type: dashapi.DiscussionReport, Messages: []*email.Email{ { MessageID: "<A-Base>", @@ -144,6 +146,7 @@ Bug report`, "<Bug>": { Subject: "[syzbot] Some bug", MessageID: "<Bug>", + Type: dashapi.DiscussionReport, BugIDs: []string{"4564456"}, Messages: []*email.Email{ { @@ -180,6 +183,7 @@ Bug report`, "<Patch>": { Subject: "[PATCH] Some bug fixed", MessageID: "<Patch>", + Type: dashapi.DiscussionPatch, BugIDs: []string{"12345"}, Messages: []*email.Email{ { @@ -196,6 +200,7 @@ Bug report`, "<Sub-Discussion>": { Subject: "[syzbot] Some bug 2", MessageID: "<Sub-Discussion>", + Type: dashapi.DiscussionReport, BugIDs: []string{"4564456"}, Messages: []*email.Email{ { @@ -244,3 +249,35 @@ Bug report`, t.Fatalf("Expected %d threads, got %d", len(expected), len(threads)) } } + +func TestDiscussionType(t *testing.T) { + tests := []struct { + msg *email.Email + ret dashapi.DiscussionType + }{ + { + msg: &email.Email{ + Subject: "[PATCH] Bla-bla", + }, + ret: dashapi.DiscussionPatch, + }, + { + msg: &email.Email{ + Subject: "[syzbot] Monthly ext4 report", + }, + ret: dashapi.DiscussionReminder, + }, + { + msg: &email.Email{ + Subject: "[syzbot] WARNING in abcd", + }, + ret: dashapi.DiscussionReport, + }, + } + for _, test := range tests { + got := DiscussionType(test.msg) + if got != test.ret { + t.Fatalf("expected %v got %v for %v", test.ret, got, test.msg) + } + } +} diff --git a/tools/syz-lore/query_lkml.go b/tools/syz-lore/query_lkml.go index fed69eede..324dee7be 100644 --- a/tools/syz-lore/query_lkml.go +++ b/tools/syz-lore/query_lkml.go @@ -56,18 +56,12 @@ func main() { Time: m.Date, }) } - discType := dashapi.DiscussionReport - if strings.Contains(thread.Subject, "PATCH") { - discType = dashapi.DiscussionPatch - } else if strings.Contains(thread.Subject, "Monthly") { - discType = dashapi.DiscussionReminder - } log.Printf("saving %d/%d", i+1, len(threads)) err := dash.SaveDiscussion(&dashapi.SaveDiscussionReq{ Discussion: &dashapi.Discussion{ ID: thread.MessageID, Source: dashapi.DiscussionLore, - Type: discType, + Type: thread.Type, Subject: thread.Subject, BugIDs: thread.BugIDs, Messages: messages, @@ -146,8 +140,8 @@ func processArchives(dir string, emails, domains []string) []*lore.Thread { } ret = append(ret, d) if *flagVerbose { - log.Printf("discussion ID=%s BugID=%s Subject=%s Messages=%d", - d.MessageID, d.BugIDs, d.Subject, len(d.Messages)) + log.Printf("discussion ID=%s BugID=%s Type=%s Subject=%s Messages=%d", + d.MessageID, d.BugIDs, d.Subject, d.Type, len(d.Messages)) } } log.Printf("%d threads are related to syzbot", len(ret)) |
