aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/email/lore/parse.go
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2023-03-30 16:47:18 +0200
committerAleksandr Nogikh <wp32pw@gmail.com>2023-04-06 13:59:25 +0200
commitb78f6ba060f2182be9259be23a8c07d2d16671fa (patch)
tree4b065e825c8425d6193df3356372ecaf3ead1755 /pkg/email/lore/parse.go
parent7c8c334ec07d3333dacd150dc389ba3f3db649df (diff)
pkg/email/lore: add Lore archives parsing routines
We need to 1) Query raw messages from an LKML git archive. 2) Group raw messages into threads.
Diffstat (limited to 'pkg/email/lore/parse.go')
-rw-r--r--pkg/email/lore/parse.go96
1 files changed, 96 insertions, 0 deletions
diff --git a/pkg/email/lore/parse.go b/pkg/email/lore/parse.go
new file mode 100644
index 000000000..6d90f9ee5
--- /dev/null
+++ b/pkg/email/lore/parse.go
@@ -0,0 +1,96 @@
+// Copyright 2023 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package lore
+
+import (
+ "github.com/google/syzkaller/pkg/email"
+)
+
+type Thread struct {
+ Subject string
+ MessageID string
+ BugIDs []string
+ Messages []*email.Email
+}
+
+// Threads extracts individual threads from a list of emails.
+func Threads(emails []*email.Email) []*Thread {
+ ctx := &parseCtx{
+ messages: map[string]*email.Email{},
+ }
+ for _, email := range emails {
+ ctx.record(email)
+ }
+ return ctx.threads()
+}
+
+type parseCtx struct {
+ messages map[string]*email.Email
+}
+
+func (c *parseCtx) record(msg *email.Email) {
+ c.messages[msg.MessageID] = msg
+}
+
+func (c *parseCtx) threads() []*Thread {
+ threads := map[string]*Thread{}
+ threadsList := []*Thread{}
+ // Detect threads, i.e. messages without In-Reply-To.
+ for _, msg := range c.messages {
+ if msg.InReplyTo == "" {
+ thread := &Thread{
+ MessageID: msg.MessageID,
+ Subject: msg.Subject,
+ }
+ threads[msg.MessageID] = thread
+ threadsList = append(threadsList, thread)
+ }
+ }
+ // Assign messages to threads.
+ for _, msg := range c.messages {
+ base := c.first(msg)
+ if base == nil {
+ continue
+ }
+ thread := threads[base.MessageID]
+ thread.BugIDs = append(thread.BugIDs, msg.BugIDs...)
+ thread.Messages = append(threads[base.MessageID].Messages, msg)
+ }
+ // Deduplicate BugIDs lists.
+ for _, thread := range threads {
+ if len(thread.BugIDs) == 0 {
+ continue
+ }
+ unique := map[string]struct{}{}
+ newList := []string{}
+ for _, id := range thread.BugIDs {
+ if _, ok := unique[id]; !ok {
+ newList = append(newList, id)
+ }
+ unique[id] = struct{}{}
+ }
+ thread.BugIDs = newList
+ }
+ return threadsList
+}
+
+// first finds the firt message of an email thread.
+func (c *parseCtx) first(msg *email.Email) *email.Email {
+ visited := map[*email.Email]struct{}{}
+ for {
+ // There have been a few cases when we'd otherwise get an infinite loop.
+ if _, ok := visited[msg]; ok {
+ return nil
+ }
+ visited[msg] = struct{}{}
+ if msg.InReplyTo == "" {
+ return msg
+ }
+ msg = c.messages[msg.InReplyTo]
+ if msg == nil {
+ // Probably we just didn't load the message.
+ return nil
+ }
+ }
+}