aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2017-07-02 15:40:24 +0200
committerDmitry Vyukov <dvyukov@google.com>2017-07-02 15:40:24 +0200
commit233bc790fcdf4bc657a5949bd63d0a9db019cf42 (patch)
treef13ce8c78e52a42bdfa0f9bfc8815ad6c4c455bf
parent1b20342f25dda771055fe93749190719733c4d0a (diff)
pkg/email: add email parsing functionality
Parse extracts all potentially interesting info from an incoming email.
-rw-r--r--pkg/email/parser.go183
-rw-r--r--pkg/email/parser_test.go344
2 files changed, 527 insertions, 0 deletions
diff --git a/pkg/email/parser.go b/pkg/email/parser.go
new file mode 100644
index 000000000..42a890472
--- /dev/null
+++ b/pkg/email/parser.go
@@ -0,0 +1,183 @@
+// Copyright 2017 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package email
+
+import (
+ "bytes"
+ "encoding/base64"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "mime"
+ "mime/multipart"
+ "net/mail"
+ "strings"
+)
+
+type Email struct {
+ BugID string
+ MessageID string
+ Subject string
+ From string
+ Cc []string
+ Body string // text/plain part
+ Patch string // attached patch, if any
+ Command string // command to bot (#syzbot is stripped)
+ CommandArgs []string // arguments for the command
+}
+
+func Parse(r io.Reader, ownEmail string) (*Email, error) {
+ msg, err := mail.ReadMessage(r)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read email: %v", err)
+ }
+ from, err := msg.Header.AddressList("From")
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse email header 'From': %v", err)
+ }
+ if len(from) == 0 {
+ return nil, fmt.Errorf("failed to parse email header 'To': no senders")
+ }
+ to, err := msg.Header.AddressList("To")
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse email header 'To': %v", err)
+ }
+ // AddressList fails if the header is not present.
+ cc, _ := msg.Header.AddressList("Cc")
+ bugID := ""
+ var ccList []string
+ for _, addr := range append(cc, to...) {
+ if bugID == "" {
+ bugID = extractBugID(addr.Address, ownEmail)
+ }
+ ccList = append(ccList, addr.String())
+ }
+ body, attachments, err := parseBody(msg.Body, msg.Header)
+ if err != nil {
+ return nil, err
+ }
+ patch := ""
+ for _, a := range attachments {
+ _, patch, _ = ParsePatch(string(a))
+ if patch != "" {
+ break
+ }
+ }
+ if patch == "" {
+ _, patch, _ = ParsePatch(string(body))
+ }
+ cmd, cmdArgs := extractCommand(body)
+ email := &Email{
+ BugID: bugID,
+ MessageID: msg.Header.Get("Message-ID"),
+ Subject: msg.Header.Get("Subject"),
+ From: from[0].String(),
+ Cc: ccList,
+ Body: string(body),
+ Patch: patch,
+ Command: cmd,
+ CommandArgs: cmdArgs,
+ }
+ return email, nil
+}
+
+// extractBugID extracts bug ID encoded in receiver email.
+// We send emails from <something+BUG_ID_HASH@something.com>.
+// from is potentially such email address, canonical is <something@something.com>.
+// This function returns BUG_ID_HASH, or an empty string if from does not contain
+// the hash or is different from canonical.
+func extractBugID(from, canonical string) string {
+ if email, err := mail.ParseAddress(canonical); err == nil {
+ canonical = email.Address
+ }
+ plusPos := strings.IndexByte(from, '+')
+ if plusPos == -1 {
+ return ""
+ }
+ atPos := strings.IndexByte(from[plusPos:], '@')
+ if atPos == -1 {
+ return ""
+ }
+ user := from[:plusPos]
+ domain := from[plusPos+atPos:]
+ hash := from[plusPos+1 : plusPos+atPos]
+ if strings.ToLower(user+domain) != strings.ToLower(canonical) {
+ return ""
+ }
+ return hash
+}
+
+// extractCommand extracts command to syzbot from email body.
+// Commands are of the following form:
+// ^#syzbot cmd args...
+func extractCommand(body []byte) (cmd string, args []string) {
+ cmdPos := bytes.Index(append([]byte{'\n'}, body...), []byte("\n#syzbot "))
+ if cmdPos == -1 {
+ return
+ }
+ cmdPos += 8
+ cmdEnd := bytes.IndexByte(body[cmdPos:], '\n')
+ if cmdEnd == -1 {
+ cmdEnd = len(body) - cmdPos
+ }
+ cmdLine := strings.TrimSpace(string(body[cmdPos : cmdPos+cmdEnd]))
+ if cmdLine == "" {
+ return
+ }
+ split := strings.Split(cmdLine, " ")
+ cmd = split[0]
+ for _, arg := range split[1:] {
+ if trimmed := strings.TrimSpace(arg); trimmed != "" {
+ args = append(args, trimmed)
+ }
+ }
+ return
+}
+
+func parseBody(r io.Reader, headers mail.Header) (body []byte, attachments [][]byte, err error) {
+ mediaType, params, err := mime.ParseMediaType(headers.Get("Content-Type"))
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to parse email header 'Content-Type': %v", err)
+ }
+ disp, _, _ := mime.ParseMediaType(headers.Get("Content-Disposition"))
+ if disp == "attachment" {
+ // Note: mime package handles quoted-printable internally.
+ if strings.ToLower(headers.Get("Content-Transfer-Encoding")) == "base64" {
+ r = base64.NewDecoder(base64.StdEncoding, r)
+ }
+ attachment, err := ioutil.ReadAll(r)
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to read email body: %v", err)
+ }
+ return nil, [][]byte{attachment}, nil
+ }
+ if mediaType == "text/plain" {
+ body, err := ioutil.ReadAll(r)
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to read email body: %v", err)
+ }
+ return body, nil, nil
+ }
+ if !strings.HasPrefix(mediaType, "multipart/") {
+ return nil, nil, nil
+ }
+ mr := multipart.NewReader(r, params["boundary"])
+ for {
+ p, err := mr.NextPart()
+ if err == io.EOF {
+ return body, attachments, nil
+ }
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to parse MIME parts: %v", err)
+ }
+ body1, attachments1, err1 := parseBody(p, mail.Header(p.Header))
+ if err1 != nil {
+ return nil, nil, err1
+ }
+ if body == nil {
+ body = body1
+ }
+ attachments = append(attachments, attachments1...)
+ }
+}
diff --git a/pkg/email/parser_test.go b/pkg/email/parser_test.go
new file mode 100644
index 000000000..8d8e675e4
--- /dev/null
+++ b/pkg/email/parser_test.go
@@ -0,0 +1,344 @@
+// Copyright 2017 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package email
+
+import (
+ "fmt"
+ "reflect"
+ "strings"
+ "testing"
+)
+
+func TestExtractCommand(t *testing.T) {
+ for i, test := range extractCommandTests {
+ t.Run(fmt.Sprint(i), func(t *testing.T) {
+ cmd, args := extractCommand([]byte(test.body))
+ if cmd != test.cmd || !reflect.DeepEqual(args, test.args) {
+ t.Logf("expect: %q %q", test.cmd, test.args)
+ t.Logf("got : %q %q", cmd, args)
+ t.Fail()
+ }
+ })
+ }
+}
+
+func TestExtractBugID(t *testing.T) {
+ for i, test := range extractBugIDTests {
+ t.Run(fmt.Sprint(i), func(t *testing.T) {
+ bugID := extractBugID(test.email, `"Foo Bar" <foo@bar.com>`)
+ if bugID != test.bugID {
+ t.Logf("expect: %q", test.bugID)
+ t.Logf("got : %q", bugID)
+ t.Fail()
+ }
+ })
+ }
+}
+
+func TestParse(t *testing.T) {
+ for i, test := range parseTests {
+ t.Run(fmt.Sprint(i), func(t *testing.T) {
+ email, err := Parse(strings.NewReader(test.email), "")
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !reflect.DeepEqual(email, test.res) {
+ t.Logf("expect:\n%#v", test.res)
+ t.Logf("got:\n%#v", email)
+ t.Fail()
+ }
+ })
+ }
+}
+
+var extractCommandTests = []struct {
+ body string
+ cmd string
+ args []string
+}{
+ {
+ body: `Hello,
+
+line1
+#syzbot foo bar baz`,
+ cmd: "foo",
+ args: []string{"bar", "baz"},
+ },
+ {
+ body: `Hello,
+
+line1
+#syzbot foo bar baz
+line 2
+`,
+ cmd: "foo",
+ args: []string{"bar", "baz"},
+ },
+ {
+ body: `
+line1
+> #syzbot foo bar baz
+line 2
+`,
+ cmd: "",
+ args: nil,
+ },
+}
+
+var extractBugIDTests = []struct {
+ email string
+ bugID string
+}{
+ {
+ `foo@bar.com`,
+ ``,
+ },
+ {
+ `foo+123@baz.com`,
+ ``,
+ },
+ {
+ `foo+123@bar.com`,
+ `123`,
+ },
+}
+
+var parseTests = []struct {
+ email string
+ res *Email
+}{
+ {`Date: Sun, 7 May 2017 19:54:00 -0700
+Message-ID: <123>
+Subject: test subject
+From: Bob <bob@example.com>
+To: syzbot <bot@example.com>
+Content-Type: text/plain; charset="UTF-8"
+
+text body
+second line
+#syzbot command arg1 arg2 arg3
+last line`,
+ &Email{
+ MessageID: "<123>",
+ Subject: "test subject",
+ From: "\"Bob\" <bob@example.com>",
+ Cc: []string{"\"syzbot\" <bot@example.com>"},
+ Body: `text body
+second line
+#syzbot command arg1 arg2 arg3
+last line`,
+ Patch: "",
+ Command: "command",
+ CommandArgs: []string{"arg1", "arg2", "arg3"},
+ }},
+
+ {`Date: Sun, 7 May 2017 19:54:00 -0700
+Message-ID: <123>
+Subject: test subject
+From: Bob <bob@example.com>
+To: syzbot <bot@example.com>, Alice <alice@example.com>
+Content-Type: text/plain
+
+#syzbot command
+text body
+second line
+last line`,
+ &Email{
+ MessageID: "<123>",
+ Subject: "test subject",
+ From: "\"Bob\" <bob@example.com>",
+ Cc: []string{"\"syzbot\" <bot@example.com>", "\"Alice\" <alice@example.com>"},
+ Body: `#syzbot command
+text body
+second line
+last line`,
+ Patch: "",
+ Command: "command",
+ CommandArgs: nil,
+ }},
+
+ {`Date: Sun, 7 May 2017 19:54:00 -0700
+Message-ID: <123>
+Subject: test subject
+From: Bob <bob@example.com>
+To: syzbot <bot@example.com>, Alice <alice@example.com>
+Content-Type: text/plain
+
+text body
+second line
+last line
+#syzbot command`,
+ &Email{
+ MessageID: "<123>",
+ Subject: "test subject",
+ From: "\"Bob\" <bob@example.com>",
+ Cc: []string{"\"syzbot\" <bot@example.com>", "\"Alice\" <alice@example.com>"},
+ Body: `text body
+second line
+last line
+#syzbot command`,
+ Patch: "",
+ Command: "command",
+ CommandArgs: nil,
+ }},
+
+ {`Date: Sun, 7 May 2017 19:54:00 -0700
+Message-ID: <123>
+Subject: test subject
+From: Bob <bob@example.com>
+To: syzbot <bot@example.com>
+Content-Type: multipart/mixed; boundary="001a114ce0b01684a6054f0d8b81"
+
+--001a114ce0b01684a6054f0d8b81
+Content-Type: text/plain; charset="UTF-8"
+
+body text
+>#syzbot test
+
+--001a114ce0b01684a6054f0d8b81
+Content-Type: text/x-patch; charset="US-ASCII"; name="patch.patch"
+Content-Disposition: attachment; filename="patch.patch"
+Content-Transfer-Encoding: base64
+X-Attachment-Id: f_j2gwcdoa1
+
+ZGlmZiAtLWdpdCBhL2tlcm5lbC9rY292LmMgYi9rZXJuZWwva2Nvdi5jCmluZGV4IDg1ZTU1NDZj
+ZDc5MS4uOTQ5ZWE0NTc0NDEyIDEwMDY0NAotLS0gYS9rZXJuZWwva2Nvdi5jCisrKyBiL2tlcm5l
+bC9rY292LmMKQEAgLTEyNyw3ICsxMjcsNiBAQCB2b2lkIGtjb3ZfdGFza19leGl0KHN0cnVjdCB0
+YXNrX3N0cnVjdCAqdCkKIAlrY292ID0gdC0+a2NvdjsKIAlpZiAoa2NvdiA9PSBOVUxMKQogCQly
+ZXR1cm47Ci0Jc3Bpbl9sb2NrKCZrY292LT5sb2NrKTsKIAlpZiAoV0FSTl9PTihrY292LT50ICE9
+IHQpKSB7CiAJCXNwaW5fdW5sb2NrKCZrY292LT5sb2NrKTsKIAkJcmV0dXJuOwo=
+--001a114ce0b01684a6054f0d8b81--`,
+ &Email{
+ MessageID: "<123>",
+ Subject: "test subject",
+ From: "\"Bob\" <bob@example.com>",
+ Cc: []string{"\"syzbot\" <bot@example.com>"},
+ Body: `body text
+>#syzbot test
+`,
+ Patch: `--- a/kernel/kcov.c
++++ b/kernel/kcov.c
+@@ -127,7 +127,6 @@ void kcov_task_exit(struct task_struct *t)
+ kcov = t->kcov;
+ if (kcov == NULL)
+ return;
+- spin_lock(&kcov->lock);
+ if (WARN_ON(kcov->t != t)) {
+ spin_unlock(&kcov->lock);
+ return;
+`,
+ Command: "",
+ CommandArgs: nil,
+ }},
+
+ {`Date: Sun, 7 May 2017 19:54:00 -0700
+Message-ID: <123>
+Subject: test subject
+From: Bob <bob@example.com>
+To: syzbot <bot@example.com>
+Content-Type: multipart/alternative; boundary="f403043eee70018593054f0d9f1f"
+
+--f403043eee70018593054f0d9f1f
+Content-Type: text/plain; charset="UTF-8"
+
+On Mon, May 8, 2017 at 6:47 PM, Bob wrote:
+> body text
+
+#syzbot test
+
+commit 59372bbf3abd5b24a7f6f676a3968685c280f955
+Date: Thu Apr 27 13:54:11 2017 +0200
+
+ statx: correct error handling of NULL pathname
+
+ test patch.
+
+diff --git a/fs/stat.c b/fs/stat.c
+index 3d85747bd86e..a257b872a53d 100644
+--- a/fs/stat.c
++++ b/fs/stat.c
+@@ -567,8 +567,6 @@ SYSCALL_DEFINE5(statx,
+ return -EINVAL;
+ if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
+ return -EINVAL;
+- if (!filename)
+- return -EINVAL;
+
+ error = vfs_statx(dfd, filename, flags, &stat, mask);
+ if (error)
+
+--f403043eee70018593054f0d9f1f
+Content-Type: text/html; charset="UTF-8"
+Content-Transfer-Encoding: quoted-printable
+
+<div dir=3D"ltr">On Mon, May 8, 2017 at 6:47 PM, Dmitry Vyukov &lt;<a href=
+=3D"mailto:bob@example.com">bob@example.com</a>&gt; wrote:<br>&gt; bo=
+dy text<br><br>#syzbot test<br><br><div><div>commit 59372bbf3abd5b24a7f6f67=
+6a3968685c280f955</div><div>Date: =C2=A0 Thu Apr 27 13:54:11 2017 +0200</di=
+v><div><br></div><div>=C2=A0 =C2=A0 statx: correct error handling of NULL p=
+athname</div><div>=C2=A0 =C2=A0=C2=A0</div><div>=C2=A0 =C2=A0 test patch.</=
+div><div><br></div><div>diff --git a/fs/stat.c b/fs/stat.c</div><div>index =
+3d85747bd86e..a257b872a53d 100644</div><div>--- a/fs/stat.c</div><div>+++ b=
+/fs/stat.c</div><div>@@ -567,8 +567,6 @@ SYSCALL_DEFINE5(statx,</div><div>=
+=C2=A0<span class=3D"gmail-Apple-tab-span" style=3D"white-space:pre">=09=09=
+</span>return -EINVAL;</div><div>=C2=A0<span class=3D"gmail-Apple-tab-span"=
+ style=3D"white-space:pre">=09</span>if ((flags &amp; AT_STATX_SYNC_TYPE) =
+=3D=3D AT_STATX_SYNC_TYPE)</div><div>=C2=A0<span class=3D"gmail-Apple-tab-s=
+pan" style=3D"white-space:pre">=09=09</span>return -EINVAL;</div><div>-<spa=
+n class=3D"gmail-Apple-tab-span" style=3D"white-space:pre">=09</span>if (!f=
+ilename)</div><div>-<span class=3D"gmail-Apple-tab-span" style=3D"white-spa=
+ce:pre">=09=09</span>return -EINVAL;</div><div>=C2=A0</div><div>=C2=A0<span=
+ class=3D"gmail-Apple-tab-span" style=3D"white-space:pre">=09</span>error =
+=3D vfs_statx(dfd, filename, flags, &amp;stat, mask);</div><div>=C2=A0<span=
+ class=3D"gmail-Apple-tab-span" style=3D"white-space:pre">=09</span>if (err=
+or)</div></div></div>
+
+--f403043eee70018593054f0d9f1f--`,
+ &Email{
+ MessageID: "<123>",
+ Subject: "test subject",
+ From: "\"Bob\" <bob@example.com>",
+ Cc: []string{"\"syzbot\" <bot@example.com>"},
+ Body: `On Mon, May 8, 2017 at 6:47 PM, Bob wrote:
+> body text
+
+#syzbot test
+
+commit 59372bbf3abd5b24a7f6f676a3968685c280f955
+Date: Thu Apr 27 13:54:11 2017 +0200
+
+ statx: correct error handling of NULL pathname
+
+ test patch.
+
+diff --git a/fs/stat.c b/fs/stat.c
+index 3d85747bd86e..a257b872a53d 100644
+--- a/fs/stat.c
++++ b/fs/stat.c
+@@ -567,8 +567,6 @@ SYSCALL_DEFINE5(statx,
+ return -EINVAL;
+ if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
+ return -EINVAL;
+- if (!filename)
+- return -EINVAL;
+
+ error = vfs_statx(dfd, filename, flags, &stat, mask);
+ if (error)
+`,
+ Patch: `--- a/fs/stat.c
++++ b/fs/stat.c
+@@ -567,8 +567,6 @@ SYSCALL_DEFINE5(statx,
+ return -EINVAL;
+ if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
+ return -EINVAL;
+- if (!filename)
+- return -EINVAL;
+
+ error = vfs_statx(dfd, filename, flags, &stat, mask);
+ if (error)
+`,
+ Command: "test",
+ CommandArgs: nil,
+ }},
+}