aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/email
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/email')
-rw-r--r--pkg/email/lore/parse.go96
-rw-r--r--pkg/email/lore/parse_test.go189
-rw-r--r--pkg/email/lore/read.go32
3 files changed, 317 insertions, 0 deletions
diff --git a/pkg/email/lore/parse.go b/pkg/email/lore/parse.go
new file mode 100644
index 000000000..6d90f9ee5
--- /dev/null
+++ b/pkg/email/lore/parse.go
@@ -0,0 +1,96 @@
+// Copyright 2023 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package lore
+
+import (
+ "github.com/google/syzkaller/pkg/email"
+)
+
+type Thread struct {
+ Subject string
+ MessageID string
+ BugIDs []string
+ Messages []*email.Email
+}
+
+// Threads extracts individual threads from a list of emails.
+func Threads(emails []*email.Email) []*Thread {
+ ctx := &parseCtx{
+ messages: map[string]*email.Email{},
+ }
+ for _, email := range emails {
+ ctx.record(email)
+ }
+ return ctx.threads()
+}
+
+type parseCtx struct {
+ messages map[string]*email.Email
+}
+
+func (c *parseCtx) record(msg *email.Email) {
+ c.messages[msg.MessageID] = msg
+}
+
+func (c *parseCtx) threads() []*Thread {
+ threads := map[string]*Thread{}
+ threadsList := []*Thread{}
+ // Detect threads, i.e. messages without In-Reply-To.
+ for _, msg := range c.messages {
+ if msg.InReplyTo == "" {
+ thread := &Thread{
+ MessageID: msg.MessageID,
+ Subject: msg.Subject,
+ }
+ threads[msg.MessageID] = thread
+ threadsList = append(threadsList, thread)
+ }
+ }
+ // Assign messages to threads.
+ for _, msg := range c.messages {
+ base := c.first(msg)
+ if base == nil {
+ continue
+ }
+ thread := threads[base.MessageID]
+ thread.BugIDs = append(thread.BugIDs, msg.BugIDs...)
+ thread.Messages = append(threads[base.MessageID].Messages, msg)
+ }
+ // Deduplicate BugIDs lists.
+ for _, thread := range threads {
+ if len(thread.BugIDs) == 0 {
+ continue
+ }
+ unique := map[string]struct{}{}
+ newList := []string{}
+ for _, id := range thread.BugIDs {
+ if _, ok := unique[id]; !ok {
+ newList = append(newList, id)
+ }
+ unique[id] = struct{}{}
+ }
+ thread.BugIDs = newList
+ }
+ return threadsList
+}
+
+// first finds the firt message of an email thread.
+func (c *parseCtx) first(msg *email.Email) *email.Email {
+ visited := map[*email.Email]struct{}{}
+ for {
+ // There have been a few cases when we'd otherwise get an infinite loop.
+ if _, ok := visited[msg]; ok {
+ return nil
+ }
+ visited[msg] = struct{}{}
+ if msg.InReplyTo == "" {
+ return msg
+ }
+ msg = c.messages[msg.InReplyTo]
+ if msg == nil {
+ // Probably we just didn't load the message.
+ return nil
+ }
+ }
+}
diff --git a/pkg/email/lore/parse_test.go b/pkg/email/lore/parse_test.go
new file mode 100644
index 000000000..90be05200
--- /dev/null
+++ b/pkg/email/lore/parse_test.go
@@ -0,0 +1,189 @@
+// Copyright 2023 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package lore
+
+import (
+ "sort"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/google/go-cmp/cmp"
+ "github.com/google/syzkaller/pkg/email"
+)
+
+func TestThreadsCollection(t *testing.T) {
+ messages := []string{
+ // <A-Base> <-- <A-Child-1> <-- <A-Child-1-1>.
+ `Date: Sun, 7 May 2017 19:54:00 -0700
+Subject: Thread A
+Message-ID: <A-Base>
+From: UserA <a@user.com>
+Content-Type: text/plain
+
+
+Some text`,
+ `Date: Sun, 7 May 2017 19:55:00 -0700
+Subject: Re: Thread A
+Message-ID: <A-Child-1>
+From: UserB <b@user.com>
+To: UserA <a@user.com>
+Content-Type: text/plain
+In-Reply-To: <A-Base>
+
+
+Some reply`,
+ `Date: Sun, 7 May 2017 19:56:00 -0700
+Subject: Re: Re: Thread A
+Message-ID: <A-Child-1-1>
+From: UserC <c@user.com>
+To: UserA <a@user.com>, UserB <b@user.com>
+Content-Type: text/plain
+In-Reply-To: <A-Child-1>
+
+
+Some reply (2)`,
+ // <Bug> with two children: <Bug-Reply1>, <Bug-Reply2>.
+ `Date: Sun, 7 May 2017 19:57:00 -0700
+Subject: [syzbot] Some bug
+Message-ID: <Bug>
+From: syzbot <syzbot+4564456@bar.com>
+Content-Type: text/plain
+
+
+Bug report`,
+ `Date: Sun, 7 May 2017 19:58:00 -0700
+Subject: Re: [syzbot] Some bug
+Message-ID: <Bug-Reply1>
+From: UserC <c@user.com>
+To: syzbot <syzbot+4564456@bar.com>
+In-Reply-To: <Bug>
+Content-Type: text/plain
+
+
+Bug report reply`,
+ `Date: Sun, 7 May 2017 19:58:01 -0700
+Subject: Re: [syzbot] Some bug
+Message-ID: <Bug-Reply2>
+From: UserD <d@user.com>
+To: syzbot <syzbot+4564456@bar.com>
+In-Reply-To: <Bug>B
+Content-Type: text/plain
+
+
+Bug report reply 2`,
+ // And one PATCH without replies.
+ `Date: Sun, 7 May 2017 19:58:01 -0700
+Subject: [PATCH] Some bug fixed
+Message-ID: <Patch>
+From: UserE <e@user.com>
+Cc: syzbot <syzbot+12345@bar.com>
+Content-Type: text/plain
+
+
+Patch`,
+ }
+
+ zone := time.FixedZone("", -7*60*60)
+ expected := map[string]*Thread{
+ "<A-Base>": {
+ Subject: "Thread A",
+ MessageID: "<A-Base>",
+ Messages: []*email.Email{
+ {
+ MessageID: "<A-Base>",
+ Subject: "Thread A",
+ Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, zone),
+ Author: "a@user.com",
+ Cc: []string{"a@user.com"},
+ Command: email.CmdNone,
+ },
+ {
+ MessageID: "<A-Child-1>",
+ Subject: "Re: Thread A",
+ Date: time.Date(2017, time.May, 7, 19, 55, 0, 0, zone),
+ Author: "b@user.com",
+ Cc: []string{"a@user.com", "b@user.com"},
+ InReplyTo: "<A-Base>",
+ Command: email.CmdNone,
+ },
+ {
+ MessageID: "<A-Child-1-1>",
+ Subject: "Re: Re: Thread A",
+ Date: time.Date(2017, time.May, 7, 19, 56, 0, 0, zone),
+ Author: "c@user.com",
+ Cc: []string{"a@user.com", "b@user.com", "c@user.com"},
+ InReplyTo: "<A-Child-1>",
+ Command: email.CmdNone,
+ },
+ },
+ },
+ "<Bug>": {
+ Subject: "[syzbot] Some bug",
+ MessageID: "<Bug>",
+ BugIDs: []string{"4564456"},
+ Messages: []*email.Email{
+ {
+ MessageID: "<Bug>",
+ BugIDs: []string{"4564456"},
+ Subject: "[syzbot] Some bug",
+ Date: time.Date(2017, time.May, 7, 19, 57, 0, 0, zone),
+ Author: "syzbot@bar.com",
+ Command: email.CmdNone,
+ },
+ {
+ MessageID: "<Bug-Reply1>",
+ BugIDs: []string{"4564456"},
+ Subject: "Re: [syzbot] Some bug",
+ Date: time.Date(2017, time.May, 7, 19, 58, 0, 0, zone),
+ Author: "c@user.com",
+ Cc: []string{"c@user.com"},
+ InReplyTo: "<Bug>",
+ Command: email.CmdNone,
+ },
+ },
+ },
+ "<Patch>": {
+ Subject: "[PATCH] Some bug fixed",
+ MessageID: "<Patch>",
+ BugIDs: []string{"12345"},
+ Messages: []*email.Email{
+ {
+ MessageID: "<Patch>",
+ BugIDs: []string{"12345"},
+ Subject: "[PATCH] Some bug fixed",
+ Date: time.Date(2017, time.May, 7, 19, 58, 1, 0, zone),
+ Author: "e@user.com",
+ Cc: []string{"e@user.com"},
+ Command: email.CmdNone,
+ },
+ },
+ },
+ }
+
+ emails := []*email.Email{}
+ for _, m := range messages {
+ msg, err := email.Parse(strings.NewReader(m), []string{"syzbot@bar.com"},
+ []string{}, []string{"bar.com"})
+ if err != nil {
+ t.Fatal(err)
+ }
+ msg.Body = ""
+ emails = append(emails, msg)
+ }
+
+ threads := Threads(emails)
+ for _, d := range threads {
+ sort.Slice(d.Messages, func(i, j int) bool {
+ return d.Messages[i].Date.Before(d.Messages[j].Date)
+ })
+ if diff := cmp.Diff(expected[d.MessageID], d); diff != "" {
+ t.Fatalf("%s: %s", d.MessageID, diff)
+ }
+ }
+
+ if len(threads) != len(expected) {
+ t.Fatalf("Expected %d threads, got %d", len(expected), len(threads))
+ }
+}
diff --git a/pkg/email/lore/read.go b/pkg/email/lore/read.go
new file mode 100644
index 000000000..1cfa680b6
--- /dev/null
+++ b/pkg/email/lore/read.go
@@ -0,0 +1,32 @@
+// Copyright 2023 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package lore
+
+import (
+ "fmt"
+
+ "github.com/google/syzkaller/pkg/vcs"
+)
+
+type EmailReader struct {
+ Extract func() ([]byte, error)
+}
+
+// ReadArchive queries the parsed messages from a single LKML message archive.
+func ReadArchive(dir string, messages chan<- *EmailReader) error {
+ repo := vcs.NewLKMLRepo(dir)
+ commits, err := repo.ListCommitHashes("HEAD")
+ if err != nil {
+ return fmt.Errorf("failed to get recent commits: %w", err)
+ }
+ for _, iterCommit := range commits {
+ commit := iterCommit
+ messages <- &EmailReader{
+ Extract: func() ([]byte, error) {
+ return repo.Object("m", commit)
+ },
+ }
+ }
+ return nil
+}