diff options
| author | Aleksandr Nogikh <nogikh@google.com> | 2022-11-17 18:47:15 +0000 |
|---|---|---|
| committer | Aleksandr Nogikh <wp32pw@gmail.com> | 2022-11-21 16:36:47 +0100 |
| commit | b42d86eb82846fe9a1d830bc5df4869a5d285be3 (patch) | |
| tree | 2851c3bec207c88603935828e87122e2223d8ade /pkg/email | |
| parent | 6989c64a48428b1563c52419d978dac64c53434f (diff) | |
pkg/email: detect author and mailing list
Handle two types of mailing list responses:
* From contains the original author.
* From is replaced by the mailing list.
Diffstat (limited to 'pkg/email')
| -rw-r--r-- | pkg/email/parser.go | 66 | ||||
| -rw-r--r-- | pkg/email/parser_test.go | 82 |
2 files changed, 112 insertions, 36 deletions
diff --git a/pkg/email/parser.go b/pkg/email/parser.go index 6c0fb22d4..ff7eeb7ec 100644 --- a/pkg/email/parser.go +++ b/pkg/email/parser.go @@ -22,9 +22,9 @@ type Email struct { MessageID string Link string Subject string - From string + MailingList string + Author string Cc []string - Sender string Body string // text/plain part Patch string // attached patch, if any Command Command // command to bot @@ -52,7 +52,18 @@ const ( var groupsLinkRe = regexp.MustCompile("\nTo view this discussion on the web visit" + " (https://groups\\.google\\.com/.*?)\\.(?:\r)?\n") -func Parse(r io.Reader, ownEmails []string) (*Email, error) { +func prepareEmails(list []string) map[string]bool { + ret := make(map[string]bool) + for _, email := range list { + ret[email] = true + if addr, err := mail.ParseAddress(email); err == nil { + ret[addr.Address] = true + } + } + return ret +} + +func Parse(r io.Reader, ownEmails []string, goodLists []string) (*Email, error) { msg, err := mail.ReadMessage(r) if err != nil { return nil, fmt.Errorf("failed to read email: %v", err) @@ -70,13 +81,7 @@ func Parse(r io.Reader, ownEmails []string) (*Email, error) { cc, _ := msg.Header.AddressList("Cc") bugID := "" var ccList []string - ownAddrs := make(map[string]bool) - for _, email := range ownEmails { - ownAddrs[email] = true - if addr, err := mail.ParseAddress(email); err == nil { - ownAddrs[addr.Address] = true - } - } + ownAddrs := prepareEmails(ownEmails) fromMe := false for _, addr := range from { cleaned, _, _ := RemoveAddrContext(addr.Address) @@ -84,7 +89,16 @@ func Parse(r io.Reader, ownEmails []string) (*Email, error) { fromMe = true } } - for _, addr := range append(append(cc, to...), from...) { + + originalFrom := "" + // Ignore error since the header might not be present. + originalFroms, _ := msg.Header.AddressList("X-Original-From") + if len(originalFroms) > 0 { + originalFrom = originalFroms[0].String() + } + + rawCcList := append(append(append(cc, to...), from...), originalFroms...) + for _, addr := range rawCcList { cleaned, context, _ := RemoveAddrContext(addr.Address) if addr, err := mail.ParseAddress(cleaned); err == nil { cleaned = addr.Address @@ -100,12 +114,9 @@ func Parse(r io.Reader, ownEmails []string) (*Email, error) { ccList = MergeEmailLists(ccList) sender := "" - senders, err := msg.Header.AddressList("Sender") - if err != nil { - if err != mail.ErrHeaderNotPresent { - return nil, err - } - } else if len(senders) > 0 { + // Ignore error since the header might not be present. + senders, _ := msg.Header.AddressList("Sender") + if len(senders) > 0 { sender = senders[0].Address } @@ -133,14 +144,31 @@ func Parse(r io.Reader, ownEmails []string) (*Email, error) { if match := groupsLinkRe.FindStringSubmatchIndex(bodyStr); match != nil { link = bodyStr[match[2]:match[3]] } + + author := CanonicalEmail(from[0].Address) + mailingList := "" + + goodListsMap := prepareEmails(goodLists) + if goodListsMap[author] { + // In some cases, the mailing list would change From and introduce X-Original-From. + mailingList = author + if originalFrom != "" { + author = CanonicalEmail(originalFrom) + } + // Not sure if `else` can happen here, but let it be mailingList == author in this case. + } else if goodListsMap[CanonicalEmail(sender)] { + // In other cases, the mailing list would preserve From and just change Sender. + mailingList = CanonicalEmail(sender) + } + email := &Email{ BugID: bugID, MessageID: msg.Header.Get("Message-ID"), Link: link, + Author: author, + MailingList: mailingList, Subject: subject, - From: CanonicalEmail(from[0].Address), Cc: ccList, - Sender: sender, Body: bodyStr, Patch: patch, Command: cmd, diff --git a/pkg/email/parser_test.go b/pkg/email/parser_test.go index d50e86cff..22fbdbbea 100644 --- a/pkg/email/parser_test.go +++ b/pkg/email/parser_test.go @@ -120,7 +120,8 @@ func TestCanonicalEmail(t *testing.T) { func TestParse(t *testing.T) { for i, test := range parseTests { body := func(t *testing.T, test ParseTest) { - email, err := Parse(strings.NewReader(test.email), []string{"bot <foo@bar.com>"}) + email, err := Parse(strings.NewReader(test.email), + []string{"bot <foo@bar.com>"}, []string{"list@googlegroups.com"}) if err != nil { t.Fatal(err) } @@ -158,8 +159,8 @@ line1 #syz fix bar baz line 2 `, - cmd: CmdFix, - str: "fix", + cmd: CmdFix, + str: "fix", args: "bar baz", }, { @@ -357,7 +358,7 @@ For more options, visit https://groups.google.com/d/optout.`, MessageID: "<123>", Link: "https://groups.google.com/d/msgid/syzkaller/abcdef@google.com", Subject: "test subject", - From: "bob@example.com", + Author: "bob@example.com", Cc: []string{"bob@example.com"}, Body: `text body second line @@ -388,7 +389,7 @@ last line`, BugID: "4564456", MessageID: "<123>", Subject: "test subject", - From: "foo@bar.com", + Author: "foo@bar.com", Cc: []string{"bob@example.com"}, Body: `text body last line`, @@ -409,7 +410,7 @@ last line`, Email{ MessageID: "<123>", Subject: "test subject", - From: "bob@example.com", + Author: "bob@example.com", Cc: []string{"alice@example.com", "bob@example.com", "bot@example.com"}, Body: `#syz invalid text body @@ -435,7 +436,7 @@ last line Email{ MessageID: "<123>", Subject: "test subject", - From: "bob@example.com", + Author: "bob@example.com", Cc: []string{"alice@example.com", "bob@example.com", "bot@example.com"}, Body: `text body second line @@ -475,7 +476,7 @@ IHQpKSB7CiAJCXNwaW5fdW5sb2NrKCZrY292LT5sb2NrKTsKIAkJcmV0dXJuOwo= Email{ MessageID: "<123>", Subject: "test subject", - From: "bob@example.com", + Author: "bob@example.com", Cc: []string{"bob@example.com", "bot@example.com"}, Body: `body text >#syz test @@ -563,7 +564,7 @@ or)</div></div></div> Email{ MessageID: "<123>", Subject: "test subject", - From: "bob@example.com", + Author: "bob@example.com", Cc: []string{"bob@example.com", "bot@example.com"}, Body: `On Mon, May 8, 2017 at 6:47 PM, Bob wrote: > body text @@ -640,9 +641,8 @@ d `, Email{ MessageID: "<1250334f-7220-2bff-5d87-b87573758d81@bar.com>", Subject: "Re: BUG: unable to handle kernel NULL pointer dereference in sock_poll", - From: "bar@foo.com", + Author: "bar@foo.com", Cc: []string{"bar@foo.com", "syzbot@syzkaller.appspotmail.com"}, - Sender: "syzkaller-bugs@googlegroups.com", Body: `On 2018/06/10 4:57, syzbot wrote: > Hello, > @@ -667,9 +667,8 @@ From: bar@foo.com #syz dup: BUG: unable to handle kernel NULL pointer dereference in corrupted `, Email{ - From: "bar@foo.com", + Author: "bar@foo.com", Cc: []string{"bar@foo.com", "syzbot@syzkaller.appspotmail.com"}, - Sender: "syzkaller-bugs@googlegroups.com", Body: `#syz dup: BUG: unable to handle kernel NULL pointer dereference in corrupted `, @@ -685,9 +684,8 @@ From: bar@foo.com #syz fix: When freeing a lockf struct that already is part of a linked list, make sure to `, Email{ - From: "bar@foo.com", + Author: "bar@foo.com", Cc: []string{"bar@foo.com", "syzbot@syzkaller.appspotmail.com"}, - Sender: "syzkaller-bugs@googlegroups.com", Body: `#syz fix: When freeing a lockf struct that already is part of a linked list, make sure to `, @@ -695,7 +693,6 @@ When freeing a lockf struct that already is part of a linked list, make sure to CommandStr: "fix:", CommandArgs: "When freeing a lockf struct that already is part of a linked list, make sure to", }}, - {`Date: Sun, 7 May 2017 19:54:00 -0700 Message-ID: <123> Subject: #syz test: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git master @@ -707,11 +704,62 @@ nothing to see here`, BugID: "4564456", MessageID: "<123>", Subject: "#syz test: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git master", - From: "bob@example.com", + Author: "bob@example.com", Cc: []string{"bob@example.com"}, Body: `nothing to see here`, Command: CmdTest, CommandStr: "test:", CommandArgs: "git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git master", }}, + {`Date: Sun, 7 May 2017 19:54:00 -0700 +Message-ID: <123> +Sender: list@googlegroups.com +Subject: Subject +From: user@mail.com +To: syzbot <list@googlegroups.com> + +nothing to see here`, + Email{ + MessageID: "<123>", + Subject: "Subject", + Author: "user@mail.com", + MailingList: "list@googlegroups.com", + Cc: []string{"list@googlegroups.com", "user@mail.com"}, + Body: `nothing to see here`, + Command: CmdNone, + }}, + {`Date: Sun, 7 May 2017 19:54:00 -0700 +Message-ID: <123> +From: list@googlegroups.com +X-Original-From: user@mail.com +Subject: Subject +To: <user2@mail.com> + +nothing to see here`, + Email{ + MessageID: "<123>", + Subject: "Subject", + Author: "user@mail.com", + MailingList: "list@googlegroups.com", + Cc: []string{"list@googlegroups.com", "user2@mail.com", "user@mail.com"}, + Body: `nothing to see here`, + Command: CmdNone, + }}, + // A faulty case, just check we handle it normally. + {`Date: Sun, 7 May 2017 19:54:00 -0700 +Message-ID: <123> +From: list@googlegroups.com +Subject: Subject +To: <user2@mail.com> + +nothing to see here`, + Email{ + MessageID: "<123>", + Subject: "Subject", + Author: "list@googlegroups.com", + MailingList: "list@googlegroups.com", + Cc: []string{"list@googlegroups.com", "user2@mail.com"}, + Body: `nothing to see here`, + Command: CmdNone, + }}, } |
