diff options
| -rw-r--r-- | dashboard/app/reporting_email.go | 36 | ||||
| -rw-r--r-- | pkg/email/parser.go | 68 | ||||
| -rw-r--r-- | pkg/email/parser_test.go | 123 |
3 files changed, 202 insertions, 25 deletions
diff --git a/dashboard/app/reporting_email.go b/dashboard/app/reporting_email.go index 71a12b7b2..42dfec75e 100644 --- a/dashboard/app/reporting_email.go +++ b/dashboard/app/reporting_email.go @@ -428,7 +428,9 @@ func handleIncomingMail(w http.ResponseWriter, r *http.Request) { // nolint: gocyclo func incomingMail(c context.Context, r *http.Request) error { - msg, err := email.Parse(r.Body, ownEmails(c), ownMailingLists()) + msg, err := email.Parse(r.Body, ownEmails(c), ownMailingLists(), []string{ + appURL(c), + }) if err != nil { // Malformed emails constantly appear from spammers. // But we have not seen errors parsing legit emails. @@ -441,8 +443,8 @@ func incomingMail(c context.Context, r *http.Request) error { // But we still want to remember the id of our own message, so just neutralize the command. msg.Command, msg.CommandArgs = email.CmdNone, "" } - log.Infof(c, "received email: subject %q, author %q, cc %q, msg %q, bug %q, cmd %q, link %q, list %q", - msg.Subject, msg.Author, msg.Cc, msg.MessageID, msg.BugID, msg.Command, msg.Link, msg.MailingList) + log.Infof(c, "received email: subject %q, author %q, cc %q, msg %q, bug %v, cmd %q, link %q, list %q", + msg.Subject, msg.Author, msg.Cc, msg.MessageID, msg.BugIDs, msg.Command, msg.Link, msg.MailingList) if msg.Command == email.CmdFix && msg.CommandArgs == "exact-commit-title" { // Sometimes it happens that somebody sends us our own text back, ignore it. msg.Command, msg.CommandArgs = email.CmdNone, "" @@ -457,7 +459,7 @@ func incomingMail(c context.Context, r *http.Request) error { mailingList := email.CanonicalEmail(emailConfig.Email) mailingListInCC := checkMailingListInCC(c, msg, mailingList) log.Infof(c, "from/cc mailing list: %v/%v", fromMailingList, mailingListInCC) - if fromMailingList && msg.BugID != "" && msg.Command != email.CmdNone { + if fromMailingList && len(msg.BugIDs) > 0 && msg.Command != email.CmdNone { // Note that if syzbot was not directly mentioned in To or Cc, this is not really // a duplicate message, so it must be processed. We detect it by looking at BugID. @@ -678,14 +680,19 @@ type bugListInfoResult struct { func identifyEmail(c context.Context, msg *email.Email) ( *bugInfoResult, *bugListInfoResult, *EmailConfig) { - if isBugListHash(msg.BugID) { - subsystem, _, stage, err := findSubsystemReportByID(c, msg.BugID) + bugID := "" + if len(msg.BugIDs) > 0 { + // For now let's only consider one of them. + bugID = msg.BugIDs[0] + } + if isBugListHash(bugID) { + subsystem, _, stage, err := findSubsystemReportByID(c, bugID) if err != nil { log.Errorf(c, "findBugListByID failed: %s", err) return nil, nil, nil } if subsystem == nil { - log.Errorf(c, "no bug list with the %v ID found", msg.BugID) + log.Errorf(c, "no bug list with the %v ID found", bugID) return nil, nil, nil } reminderConfig := config.Namespaces[subsystem.Namespace].Subsystems.Reminder @@ -695,10 +702,10 @@ func identifyEmail(c context.Context, msg *email.Email) ( } emailConfig, ok := bugListReportingConfig(subsystem.Namespace, stage).(*EmailConfig) if !ok { - log.Errorf(c, "bug list's reporting config is not EmailConfig (id=%v)", msg.BugID) + log.Errorf(c, "bug list's reporting config is not EmailConfig (id=%v)", bugID) return nil, nil, nil } - return nil, &bugListInfoResult{id: msg.BugID, config: emailConfig}, emailConfig + return nil, &bugListInfoResult{id: bugID, config: emailConfig}, emailConfig } bugInfo := loadBugInfo(c, msg) if bugInfo == nil { @@ -715,7 +722,12 @@ type bugInfoResult struct { } func loadBugInfo(c context.Context, msg *email.Email) *bugInfoResult { - if msg.BugID == "" { + bugID := "" + if len(msg.BugIDs) > 0 { + // For now let's only consider one of them. + bugID = msg.BugIDs[0] + } + if bugID == "" { var matchingErr error // Give it one more try -- maybe we can determine the bug from the subject + mailing list. if msg.MailingList != "" { @@ -746,7 +758,7 @@ func loadBugInfo(c context.Context, msg *email.Email) *bugInfoResult { } return nil } - bug, bugKey, err := findBugByReportingID(c, msg.BugID) + bug, bugKey, err := findBugByReportingID(c, bugID) if err != nil { log.Errorf(c, "can't find bug: %v", err) from, err := email.AddAddrContext(ownEmail(c), "HASH") @@ -759,7 +771,7 @@ func loadBugInfo(c context.Context, msg *email.Email) *bugInfoResult { } return nil } - bugReporting, _ := bugReportingByID(bug, msg.BugID) + bugReporting, _ := bugReportingByID(bug, bugID) if bugReporting == nil { log.Errorf(c, "can't find bug reporting: %v", err) if err := replyTo(c, msg, "", "Can't find the corresponding bug."); err != nil { diff --git a/pkg/email/parser.go b/pkg/email/parser.go index af044791d..795cc6fed 100644 --- a/pkg/email/parser.go +++ b/pkg/email/parser.go @@ -14,12 +14,15 @@ import ( "regexp" "sort" "strings" + "time" "unicode" ) type Email struct { - BugID string + BugIDs []string MessageID string + InReplyTo string + Date time.Time Link string Subject string MailingList string @@ -65,7 +68,7 @@ func prepareEmails(list []string) map[string]bool { return ret } -func Parse(r io.Reader, ownEmails, goodLists []string) (*Email, error) { +func Parse(r io.Reader, ownEmails, goodLists, domains []string) (*Email, error) { msg, err := mail.ReadMessage(r) if err != nil { return nil, fmt.Errorf("failed to read email: %v", err) @@ -81,7 +84,6 @@ func Parse(r io.Reader, ownEmails, goodLists []string) (*Email, error) { to, _ := msg.Header.AddressList("To") // AddressList fails if the header is not present. cc, _ := msg.Header.AddressList("Cc") - bugID := "" var ccList []string ownAddrs := prepareEmails(ownEmails) fromMe := false @@ -99,6 +101,7 @@ func Parse(r io.Reader, ownEmails, goodLists []string) (*Email, error) { originalFrom = originalFroms[0].String() } + bugIDs := []string{} rawCcList := append(append(append(cc, to...), from...), originalFroms...) for _, addr := range rawCcList { cleaned, context, _ := RemoveAddrContext(addr.Address) @@ -106,9 +109,7 @@ func Parse(r io.Reader, ownEmails, goodLists []string) (*Email, error) { cleaned = addr.Address } if ownAddrs[cleaned] { - if bugID == "" { - bugID = context - } + bugIDs = append(bugIDs, context) } else { ccList = append(ccList, CanonicalEmail(cleaned)) } @@ -142,6 +143,8 @@ func Parse(r io.Reader, ownEmails, goodLists []string) (*Email, error) { } cmd, cmdStr, cmdArgs = extractCommand(subject + "\n" + bodyStr) } + bugIDs = append(bugIDs, extractBodyBugIDs(bodyStr, ownAddrs, domains)...) + link := "" if match := groupsLinkRe.FindStringSubmatchIndex(bodyStr); match != nil { link = bodyStr[match[2]:match[3]] @@ -162,10 +165,12 @@ func Parse(r io.Reader, ownEmails, goodLists []string) (*Email, error) { // In other cases, the mailing list would preserve From and just change Sender. mailingList = CanonicalEmail(sender) } - + date, _ := mail.ParseDate(msg.Header.Get("Date")) email := &Email{ - BugID: bugID, + BugIDs: dedupBugIDs(bugIDs), MessageID: msg.Header.Get("Message-ID"), + InReplyTo: msg.Header.Get("In-Reply-To"), + Date: date, Link: link, Author: author, MailingList: mailingList, @@ -412,6 +417,53 @@ func parseBody(r io.Reader, headers mail.Header) ([]byte, [][]byte, error) { } } +func extractBodyBugIDs(body string, ownEmailMap map[string]bool, domains []string) []string { + // Let's build a regular expression. + var rb strings.Builder + for email := range ownEmailMap { + escaped := regexp.QuoteMeta(email) + part := strings.ReplaceAll(escaped, `@`, `\+(\w+?)@`) + if rb.Len() > 0 { + rb.WriteString(`|`) + } + rb.WriteString(part) + } + for _, domain := range domains { + escaped := regexp.QuoteMeta(domain + "/bug?extid=") + if rb.Len() > 0 { + rb.WriteString(`|`) + } + rb.WriteString(escaped) + rb.WriteString(`([\w]+)`) + } + rg := regexp.MustCompile(rb.String()) + ids := []string{} + for _, match := range rg.FindAllStringSubmatch(body, -1) { + // Take all non-empty group matches. + for i := 1; i < len(match); i++ { + if match[i] == "" { + continue + } + ids = append(ids, match[i]) + } + } + return ids +} + +func dedupBugIDs(list []string) []string { + // We should preserve the original order of IDs. + var ret []string + dup := map[string]struct{}{} + for _, v := range list { + if _, ok := dup[v]; ok { + continue + } + dup[v] = struct{}{} + ret = append(ret, v) + } + return ret +} + // MergeEmailLists merges several email lists removing duplicates and invalid entries. func MergeEmailLists(lists ...[]string) []string { const ( diff --git a/pkg/email/parser_test.go b/pkg/email/parser_test.go index cde028a02..151d259b8 100644 --- a/pkg/email/parser_test.go +++ b/pkg/email/parser_test.go @@ -8,6 +8,7 @@ import ( "reflect" "strings" "testing" + "time" "github.com/google/go-cmp/cmp" ) @@ -121,7 +122,10 @@ func TestParse(t *testing.T) { for i, test := range parseTests { body := func(t *testing.T, test ParseTest) { email, err := Parse(strings.NewReader(test.email), - []string{"bot <foo@bar.com>"}, []string{"list@googlegroups.com"}) + []string{"bot <foo@bar.com>"}, + []string{"list@googlegroups.com"}, + []string{"bar.com"}, + ) if err != nil { t.Fatal(err) } @@ -342,6 +346,8 @@ type ParseTest struct { res Email } +var parseTestZone = time.FixedZone("", -7*60*60) + // nolint: lll var parseTests = []ParseTest{ {`Date: Sun, 7 May 2017 19:54:00 -0700 @@ -362,8 +368,9 @@ To post to this group, send email to syzkaller@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/syzkaller/abcdef@google.com. For more options, visit https://groups.google.com/d/optout.`, Email{ - BugID: "4564456", + BugIDs: []string{"4564456"}, MessageID: "<123>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), Link: "https://groups.google.com/d/msgid/syzkaller/abcdef@google.com", Subject: "test subject", Author: "bob@example.com", @@ -394,8 +401,9 @@ Content-Type: text/plain; charset="UTF-8" text body last line`, Email{ - BugID: "4564456", + BugIDs: []string{"4564456"}, MessageID: "<123>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), Subject: "test subject", Author: "foo@bar.com", Cc: []string{"bob@example.com"}, @@ -417,6 +425,7 @@ second line last line`, Email{ MessageID: "<123>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), Subject: "test subject", Author: "bob@example.com", Cc: []string{"alice@example.com", "bob@example.com", "bot@example.com"}, @@ -443,6 +452,7 @@ last line #syz command`, Email{ MessageID: "<123>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), Subject: "test subject", Author: "bob@example.com", Cc: []string{"alice@example.com", "bob@example.com", "bot@example.com"}, @@ -483,6 +493,7 @@ IHQpKSB7CiAJCXNwaW5fdW5sb2NrKCZrY292LT5sb2NrKTsKIAkJcmV0dXJuOwo= --001a114ce0b01684a6054f0d8b81--`, Email{ MessageID: "<123>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), Subject: "test subject", Author: "bob@example.com", Cc: []string{"bob@example.com", "bot@example.com"}, @@ -571,6 +582,7 @@ or)</div></div></div> --f403043eee70018593054f0d9f1f--`, Email{ MessageID: "<123>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), Subject: "test subject", Author: "bob@example.com", Cc: []string{"bob@example.com", "bot@example.com"}, @@ -648,6 +660,7 @@ On 2018/06/10 4:57, syzbot wrote: d `, Email{ MessageID: "<1250334f-7220-2bff-5d87-b87573758d81@bar.com>", + Date: time.Date(2018, time.June, 10, 10, 38, 20, 0, time.FixedZone("", 9*60*60)), Subject: "Re: BUG: unable to handle kernel NULL pointer dereference in sock_poll", Author: "bar@foo.com", Cc: []string{"bar@foo.com", "syzbot@syzkaller.appspotmail.com"}, @@ -709,8 +722,9 @@ To: syzbot <foo+4564456@bar.com> nothing to see here`, Email{ - BugID: "4564456", + BugIDs: []string{"4564456"}, MessageID: "<123>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), Subject: "#syz test: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git master", Author: "bob@example.com", Cc: []string{"bob@example.com"}, @@ -729,6 +743,7 @@ To: syzbot <list@googlegroups.com> nothing to see here`, Email{ MessageID: "<123>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), Subject: "Subject", Author: "user@mail.com", MailingList: "list@googlegroups.com", @@ -746,6 +761,7 @@ To: <user2@mail.com> nothing to see here`, Email{ MessageID: "<123>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), Subject: "Subject", Author: "user@mail.com", MailingList: "list@googlegroups.com", @@ -763,6 +779,7 @@ To: <user2@mail.com> nothing to see here`, Email{ MessageID: "<123>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), Subject: "Subject", Author: "list@googlegroups.com", MailingList: "list@googlegroups.com", @@ -776,7 +793,7 @@ Subject: Re: BUG: unable to handle kernel NULL pointer dereference in To: syzbot <syzbot+344bb0f46d7719cd9483@syzkaller.appspotmail.com> From: bar <bar@foo.com> Message-ID: <1250334f-7220-2bff-5d87-b87573758d81@bar.com> -Date: Sun, 10 Jun 2018 10:38:20 +0900 +Date: Sun, 7 May 2017 19:54:00 -0700 MIME-Version: 1.0 Content-Type: text/plain; charset="UTF-8" Content-Language: en-US @@ -787,6 +804,7 @@ test: https://github.com/torvalds/linux.git 7b5bb460defa107dd2e82= f950fddb9ea6bdb5e39 `, Email{ MessageID: "<1250334f-7220-2bff-5d87-b87573758d81@bar.com>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), Subject: "Re: BUG: unable to handle kernel NULL pointer dereference in sock_poll", Author: "bar@foo.com", Cc: []string{"bar@foo.com", "syzbot@syzkaller.appspotmail.com"}, @@ -797,4 +815,99 @@ test: https://github.com/torvalds/linux.git 7b5bb460defa107dd2e82f950fddb9ea6bdb CommandStr: "test:", CommandArgs: "https://github.com/torvalds/linux.git 7b5bb460defa107dd2e82f950fddb9ea6bdb5e39", }}, + {`Sender: syzkaller-bugs@googlegroups.com +Subject: [PATCH] Some patch +To: <someone@foo.com> +From: bar <bar@foo.com> +Message-ID: <1250334f-7220-2bff-5d87-b87573758d81@bar.com> +Date: Sun, 7 May 2017 19:54:00 -0700 +MIME-Version: 1.0 +Content-Type: text/plain; charset="UTF-8" +Content-Language: en-US +Content-Transfer-Encoding: quoted-printable + +Reported-by: syzbot <foo+223c7461c58c58a4cb10@bar.com> +`, Email{ + BugIDs: []string{"223c7461c58c58a4cb10"}, + MessageID: "<1250334f-7220-2bff-5d87-b87573758d81@bar.com>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), + Subject: "[PATCH] Some patch", + Author: "bar@foo.com", + Cc: []string{"bar@foo.com", "someone@foo.com"}, + Body: `Reported-by: syzbot <foo+223c7461c58c58a4cb10@bar.com> +`, + Command: CmdNone, + }}, + {`Sender: syzkaller-bugs@googlegroups.com +Subject: [PATCH] Some patch +To: <someone@foo.com> +From: bar <bar@foo.com> +Message-ID: <1250334f-7220-2bff-5d87-b87573758d81@bar.com> +Date: Sun, 7 May 2017 19:54:00 -0700 +MIME-Version: 1.0 +Content-Type: text/plain; charset="UTF-8" +Content-Language: en-US + +Link: https://bar.com/bug?extid=223c7461c58c58a4cb10@bar.com +`, Email{ + BugIDs: []string{"223c7461c58c58a4cb10"}, + MessageID: "<1250334f-7220-2bff-5d87-b87573758d81@bar.com>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), + Subject: "[PATCH] Some patch", + Author: "bar@foo.com", + Cc: []string{"bar@foo.com", "someone@foo.com"}, + Body: `Link: https://bar.com/bug?extid=223c7461c58c58a4cb10@bar.com +`, + Command: CmdNone, + }}, + + {`Sender: syzkaller-bugs@googlegroups.com +Subject: [PATCH] Some patch +To: <someone@foo.com> +From: bar <bar@foo.com> +Message-ID: <1250334f-7220-2bff-5d87-b87573758d81@bar.com> +Date: Sun, 7 May 2017 19:54:00 -0700 +MIME-Version: 1.0 +Content-Type: text/plain; charset="UTF-8" +Content-Language: en-US +Content-Transfer-Encoding: quoted-printable + +Reported-by: syzbot <foo+223c7461c58c58a4cb10@bar.com> +Reported-by: syzbot <foo+9909090909090909@bar.com> +`, Email{ + BugIDs: []string{"223c7461c58c58a4cb10", "9909090909090909"}, + MessageID: "<1250334f-7220-2bff-5d87-b87573758d81@bar.com>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), + Subject: "[PATCH] Some patch", + Author: "bar@foo.com", + Cc: []string{"bar@foo.com", "someone@foo.com"}, + Body: `Reported-by: syzbot <foo+223c7461c58c58a4cb10@bar.com> +Reported-by: syzbot <foo+9909090909090909@bar.com> +`, + Command: CmdNone, + }}, + {`Sender: syzkaller-bugs@googlegroups.com +Subject: [PATCH] Some patch +To: <someone@foo.com>, <foo+9909090909090909@bar.com> +From: bar <bar@foo.com> +Message-ID: <1250334f-7220-2bff-5d87-b87573758d81@bar.com> +Date: Sun, 7 May 2017 19:54:00 -0700 +MIME-Version: 1.0 +Content-Type: text/plain; charset="UTF-8" +Content-Language: en-US +Content-Transfer-Encoding: quoted-printable + +Reported-by: syzbot <foo+223c7461c58c58a4cb10@bar.com> +`, Email{ + // First come BugIDs from header, then from the body. + BugIDs: []string{"9909090909090909", "223c7461c58c58a4cb10"}, + MessageID: "<1250334f-7220-2bff-5d87-b87573758d81@bar.com>", + Date: time.Date(2017, time.May, 7, 19, 54, 0, 0, parseTestZone), + Subject: "[PATCH] Some patch", + Author: "bar@foo.com", + Cc: []string{"bar@foo.com", "someone@foo.com"}, + Body: `Reported-by: syzbot <foo+223c7461c58c58a4cb10@bar.com> +`, + Command: CmdNone, + }}, } |
