diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2023-03-30 16:47:18 +0200 |
|---|---|---|
| committer | Aleksandr Nogikh <wp32pw@gmail.com> | 2023-04-06 13:59:25 +0200 |
| commit | b78f6ba060f2182be9259be23a8c07d2d16671fa (patch) | |
| tree | 4b065e825c8425d6193df3356372ecaf3ead1755 /pkg/email/lore/parse.go | |
| parent | 7c8c334ec07d3333dacd150dc389ba3f3db649df (diff) | |
pkg/email/lore: add Lore archives parsing routines
We need to
1) Query raw messages from an LKML git archive.
2) Group raw messages into threads.
Diffstat (limited to 'pkg/email/lore/parse.go')
| -rw-r--r-- | pkg/email/lore/parse.go | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/pkg/email/lore/parse.go b/pkg/email/lore/parse.go new file mode 100644 index 000000000..6d90f9ee5 --- /dev/null +++ b/pkg/email/lore/parse.go @@ -0,0 +1,96 @@ +// Copyright 2023 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package lore + +import ( + "github.com/google/syzkaller/pkg/email" +) + +type Thread struct { + Subject string + MessageID string + BugIDs []string + Messages []*email.Email +} + +// Threads extracts individual threads from a list of emails. +func Threads(emails []*email.Email) []*Thread { + ctx := &parseCtx{ + messages: map[string]*email.Email{}, + } + for _, email := range emails { + ctx.record(email) + } + return ctx.threads() +} + +type parseCtx struct { + messages map[string]*email.Email +} + +func (c *parseCtx) record(msg *email.Email) { + c.messages[msg.MessageID] = msg +} + +func (c *parseCtx) threads() []*Thread { + threads := map[string]*Thread{} + threadsList := []*Thread{} + // Detect threads, i.e. messages without In-Reply-To. + for _, msg := range c.messages { + if msg.InReplyTo == "" { + thread := &Thread{ + MessageID: msg.MessageID, + Subject: msg.Subject, + } + threads[msg.MessageID] = thread + threadsList = append(threadsList, thread) + } + } + // Assign messages to threads. + for _, msg := range c.messages { + base := c.first(msg) + if base == nil { + continue + } + thread := threads[base.MessageID] + thread.BugIDs = append(thread.BugIDs, msg.BugIDs...) + thread.Messages = append(threads[base.MessageID].Messages, msg) + } + // Deduplicate BugIDs lists. + for _, thread := range threads { + if len(thread.BugIDs) == 0 { + continue + } + unique := map[string]struct{}{} + newList := []string{} + for _, id := range thread.BugIDs { + if _, ok := unique[id]; !ok { + newList = append(newList, id) + } + unique[id] = struct{}{} + } + thread.BugIDs = newList + } + return threadsList +} + +// first finds the firt message of an email thread. +func (c *parseCtx) first(msg *email.Email) *email.Email { + visited := map[*email.Email]struct{}{} + for { + // There have been a few cases when we'd otherwise get an infinite loop. + if _, ok := visited[msg]; ok { + return nil + } + visited[msg] = struct{}{} + if msg.InReplyTo == "" { + return msg + } + msg = c.messages[msg.InReplyTo] + if msg == nil { + // Probably we just didn't load the message. + return nil + } + } +} |
