aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/BurntSushi/toml/lex.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/BurntSushi/toml/lex.go')
-rw-r--r--vendor/github.com/BurntSushi/toml/lex.go768
1 files changed, 524 insertions, 244 deletions
diff --git a/vendor/github.com/BurntSushi/toml/lex.go b/vendor/github.com/BurntSushi/toml/lex.go
index e0a742a88..28ed4dd35 100644
--- a/vendor/github.com/BurntSushi/toml/lex.go
+++ b/vendor/github.com/BurntSushi/toml/lex.go
@@ -2,6 +2,8 @@ package toml
import (
"fmt"
+ "reflect"
+ "runtime"
"strings"
"unicode"
"unicode/utf8"
@@ -29,33 +31,20 @@ const (
itemArrayTableStart
itemArrayTableEnd
itemKeyStart
+ itemKeyEnd
itemCommentStart
itemInlineTableStart
itemInlineTableEnd
)
-const (
- eof = 0
- comma = ','
- tableStart = '['
- tableEnd = ']'
- arrayTableStart = '['
- arrayTableEnd = ']'
- tableSep = '.'
- keySep = '='
- arrayStart = '['
- arrayEnd = ']'
- commentStart = '#'
- stringStart = '"'
- stringEnd = '"'
- rawStringStart = '\''
- rawStringEnd = '\''
- inlineTableStart = '{'
- inlineTableEnd = '}'
-)
+const eof = 0
type stateFn func(lx *lexer) stateFn
+func (p Position) String() string {
+ return fmt.Sprintf("at line %d; start %d; length %d", p.Line, p.Start, p.Len)
+}
+
type lexer struct {
input string
start int
@@ -64,26 +53,26 @@ type lexer struct {
state stateFn
items chan item
- // Allow for backing up up to three runes.
- // This is necessary because TOML contains 3-rune tokens (""" and ''').
- prevWidths [3]int
- nprev int // how many of prevWidths are in use
- // If we emit an eof, we can still back up, but it is not OK to call
- // next again.
- atEOF bool
+ // Allow for backing up up to 4 runes. This is necessary because TOML
+ // contains 3-rune tokens (""" and ''').
+ prevWidths [4]int
+ nprev int // how many of prevWidths are in use
+ atEOF bool // If we emit an eof, we can still back up, but it is not OK to call next again.
// A stack of state functions used to maintain context.
- // The idea is to reuse parts of the state machine in various places.
- // For example, values can appear at the top level or within arbitrarily
- // nested arrays. The last state on the stack is used after a value has
- // been lexed. Similarly for comments.
+ //
+ // The idea is to reuse parts of the state machine in various places. For
+ // example, values can appear at the top level or within arbitrarily nested
+ // arrays. The last state on the stack is used after a value has been lexed.
+ // Similarly for comments.
stack []stateFn
}
type item struct {
- typ itemType
- val string
- line int
+ typ itemType
+ val string
+ err error
+ pos Position
}
func (lx *lexer) nextItem() item {
@@ -93,6 +82,7 @@ func (lx *lexer) nextItem() item {
return item
default:
lx.state = lx.state(lx)
+ //fmt.Printf(" STATE %-24s current: %-10s stack: %s\n", lx.state, lx.current(), lx.stack)
}
}
}
@@ -101,9 +91,9 @@ func lex(input string) *lexer {
lx := &lexer{
input: input,
state: lexTop,
- line: 1,
items: make(chan item, 10),
stack: make([]stateFn, 0, 10),
+ line: 1,
}
return lx
}
@@ -125,19 +115,36 @@ func (lx *lexer) current() string {
return lx.input[lx.start:lx.pos]
}
+func (lx lexer) getPos() Position {
+ p := Position{
+ Line: lx.line,
+ Start: lx.start,
+ Len: lx.pos - lx.start,
+ }
+ if p.Len <= 0 {
+ p.Len = 1
+ }
+ return p
+}
+
func (lx *lexer) emit(typ itemType) {
- lx.items <- item{typ, lx.current(), lx.line}
+ // Needed for multiline strings ending with an incomplete UTF-8 sequence.
+ if lx.start > lx.pos {
+ lx.error(errLexUTF8{lx.input[lx.pos]})
+ return
+ }
+ lx.items <- item{typ: typ, pos: lx.getPos(), val: lx.current()}
lx.start = lx.pos
}
func (lx *lexer) emitTrim(typ itemType) {
- lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
+ lx.items <- item{typ: typ, pos: lx.getPos(), val: strings.TrimSpace(lx.current())}
lx.start = lx.pos
}
func (lx *lexer) next() (r rune) {
if lx.atEOF {
- panic("next called after EOF")
+ panic("BUG in lexer: next called after EOF")
}
if lx.pos >= len(lx.input) {
lx.atEOF = true
@@ -147,12 +154,25 @@ func (lx *lexer) next() (r rune) {
if lx.input[lx.pos] == '\n' {
lx.line++
}
+ lx.prevWidths[3] = lx.prevWidths[2]
lx.prevWidths[2] = lx.prevWidths[1]
lx.prevWidths[1] = lx.prevWidths[0]
- if lx.nprev < 3 {
+ if lx.nprev < 4 {
lx.nprev++
}
+
r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
+ if r == utf8.RuneError {
+ lx.error(errLexUTF8{lx.input[lx.pos]})
+ return utf8.RuneError
+ }
+
+ // Note: don't use peek() here, as this calls next().
+ if isControl(r) || (r == '\r' && (len(lx.input)-1 == lx.pos || lx.input[lx.pos+1] != '\n')) {
+ lx.errorControlChar(r)
+ return utf8.RuneError
+ }
+
lx.prevWidths[0] = w
lx.pos += w
return r
@@ -163,19 +183,21 @@ func (lx *lexer) ignore() {
lx.start = lx.pos
}
-// backup steps back one rune. Can be called only twice between calls to next.
+// backup steps back one rune. Can be called 4 times between calls to next.
func (lx *lexer) backup() {
if lx.atEOF {
lx.atEOF = false
return
}
if lx.nprev < 1 {
- panic("backed up too far")
+ panic("BUG in lexer: backed up too far")
}
w := lx.prevWidths[0]
lx.prevWidths[0] = lx.prevWidths[1]
lx.prevWidths[1] = lx.prevWidths[2]
+ lx.prevWidths[2] = lx.prevWidths[3]
lx.nprev--
+
lx.pos -= w
if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
lx.line--
@@ -211,18 +233,58 @@ func (lx *lexer) skip(pred func(rune) bool) {
}
}
-// errorf stops all lexing by emitting an error and returning `nil`.
+// error stops all lexing by emitting an error and returning `nil`.
+//
// Note that any value that is a character is escaped if it's a special
// character (newlines, tabs, etc.).
+func (lx *lexer) error(err error) stateFn {
+ if lx.atEOF {
+ return lx.errorPrevLine(err)
+ }
+ lx.items <- item{typ: itemError, pos: lx.getPos(), err: err}
+ return nil
+}
+
+// errorfPrevline is like error(), but sets the position to the last column of
+// the previous line.
+//
+// This is so that unexpected EOF or NL errors don't show on a new blank line.
+func (lx *lexer) errorPrevLine(err error) stateFn {
+ pos := lx.getPos()
+ pos.Line--
+ pos.Len = 1
+ pos.Start = lx.pos - 1
+ lx.items <- item{typ: itemError, pos: pos, err: err}
+ return nil
+}
+
+// errorPos is like error(), but allows explicitly setting the position.
+func (lx *lexer) errorPos(start, length int, err error) stateFn {
+ pos := lx.getPos()
+ pos.Start = start
+ pos.Len = length
+ lx.items <- item{typ: itemError, pos: pos, err: err}
+ return nil
+}
+
+// errorf is like error, and creates a new error.
func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
- lx.items <- item{
- itemError,
- fmt.Sprintf(format, values...),
- lx.line,
+ if lx.atEOF {
+ pos := lx.getPos()
+ pos.Line--
+ pos.Len = 1
+ pos.Start = lx.pos - 1
+ lx.items <- item{typ: itemError, pos: pos, err: fmt.Errorf(format, values...)}
+ return nil
}
+ lx.items <- item{typ: itemError, pos: lx.getPos(), err: fmt.Errorf(format, values...)}
return nil
}
+func (lx *lexer) errorControlChar(cc rune) stateFn {
+ return lx.errorPos(lx.pos-1, 1, errLexControl{cc})
+}
+
// lexTop consumes elements at the top level of TOML data.
func lexTop(lx *lexer) stateFn {
r := lx.next()
@@ -230,10 +292,10 @@ func lexTop(lx *lexer) stateFn {
return lexSkip(lx, lexTop)
}
switch r {
- case commentStart:
+ case '#':
lx.push(lexTop)
return lexCommentStart
- case tableStart:
+ case '[':
return lexTableStart
case eof:
if lx.pos > lx.start {
@@ -256,7 +318,7 @@ func lexTop(lx *lexer) stateFn {
func lexTopEnd(lx *lexer) stateFn {
r := lx.next()
switch {
- case r == commentStart:
+ case r == '#':
// a comment will read to a newline for us.
lx.push(lexTop)
return lexCommentStart
@@ -269,8 +331,9 @@ func lexTopEnd(lx *lexer) stateFn {
lx.emit(itemEOF)
return nil
}
- return lx.errorf("expected a top-level item to end with a newline, "+
- "comment, or EOF, but got %q instead", r)
+ return lx.errorf(
+ "expected a top-level item to end with a newline, comment, or EOF, but got %q instead",
+ r)
}
// lexTable lexes the beginning of a table. Namely, it makes sure that
@@ -279,7 +342,7 @@ func lexTopEnd(lx *lexer) stateFn {
// It also handles the case that this is an item in an array of tables.
// e.g., '[[name]]'.
func lexTableStart(lx *lexer) stateFn {
- if lx.peek() == arrayTableStart {
+ if lx.peek() == '[' {
lx.next()
lx.emit(itemArrayTableStart)
lx.push(lexArrayTableEnd)
@@ -296,9 +359,8 @@ func lexTableEnd(lx *lexer) stateFn {
}
func lexArrayTableEnd(lx *lexer) stateFn {
- if r := lx.next(); r != arrayTableEnd {
- return lx.errorf("expected end of table array name delimiter %q, "+
- "but got %q instead", arrayTableEnd, r)
+ if r := lx.next(); r != ']' {
+ return lx.errorf("expected end of table array name delimiter ']', but got %q instead", r)
}
lx.emit(itemArrayTableEnd)
return lexTopEnd
@@ -307,31 +369,18 @@ func lexArrayTableEnd(lx *lexer) stateFn {
func lexTableNameStart(lx *lexer) stateFn {
lx.skip(isWhitespace)
switch r := lx.peek(); {
- case r == tableEnd || r == eof:
- return lx.errorf("unexpected end of table name " +
- "(table names cannot be empty)")
- case r == tableSep:
- return lx.errorf("unexpected table separator " +
- "(table names cannot be empty)")
- case r == stringStart || r == rawStringStart:
+ case r == ']' || r == eof:
+ return lx.errorf("unexpected end of table name (table names cannot be empty)")
+ case r == '.':
+ return lx.errorf("unexpected table separator (table names cannot be empty)")
+ case r == '"' || r == '\'':
lx.ignore()
lx.push(lexTableNameEnd)
- return lexValue // reuse string lexing
+ return lexQuotedName
default:
- return lexBareTableName
- }
-}
-
-// lexBareTableName lexes the name of a table. It assumes that at least one
-// valid character for the table has already been read.
-func lexBareTableName(lx *lexer) stateFn {
- r := lx.next()
- if isBareKeyChar(r) {
- return lexBareTableName
+ lx.push(lexTableNameEnd)
+ return lexBareName
}
- lx.backup()
- lx.emit(itemText)
- return lexTableNameEnd
}
// lexTableNameEnd reads the end of a piece of a table name, optionally
@@ -341,69 +390,107 @@ func lexTableNameEnd(lx *lexer) stateFn {
switch r := lx.next(); {
case isWhitespace(r):
return lexTableNameEnd
- case r == tableSep:
+ case r == '.':
lx.ignore()
return lexTableNameStart
- case r == tableEnd:
+ case r == ']':
return lx.pop()
default:
- return lx.errorf("expected '.' or ']' to end table name, "+
- "but got %q instead", r)
+ return lx.errorf("expected '.' or ']' to end table name, but got %q instead", r)
}
}
-// lexKeyStart consumes a key name up until the first non-whitespace character.
-// lexKeyStart will ignore whitespace.
-func lexKeyStart(lx *lexer) stateFn {
- r := lx.peek()
+// lexBareName lexes one part of a key or table.
+//
+// It assumes that at least one valid character for the table has already been
+// read.
+//
+// Lexes only one part, e.g. only 'a' inside 'a.b'.
+func lexBareName(lx *lexer) stateFn {
+ r := lx.next()
+ if isBareKeyChar(r) {
+ return lexBareName
+ }
+ lx.backup()
+ lx.emit(itemText)
+ return lx.pop()
+}
+
+// lexBareName lexes one part of a key or table.
+//
+// It assumes that at least one valid character for the table has already been
+// read.
+//
+// Lexes only one part, e.g. only '"a"' inside '"a".b'.
+func lexQuotedName(lx *lexer) stateFn {
+ r := lx.next()
switch {
- case r == keySep:
- return lx.errorf("unexpected key separator %q", keySep)
- case isWhitespace(r) || isNL(r):
- lx.next()
- return lexSkip(lx, lexKeyStart)
- case r == stringStart || r == rawStringStart:
- lx.ignore()
- lx.emit(itemKeyStart)
- lx.push(lexKeyEnd)
- return lexValue // reuse string lexing
+ case isWhitespace(r):
+ return lexSkip(lx, lexValue)
+ case r == '"':
+ lx.ignore() // ignore the '"'
+ return lexString
+ case r == '\'':
+ lx.ignore() // ignore the "'"
+ return lexRawString
+ case r == eof:
+ return lx.errorf("unexpected EOF; expected value")
default:
+ return lx.errorf("expected value but found %q instead", r)
+ }
+}
+
+// lexKeyStart consumes all key parts until a '='.
+func lexKeyStart(lx *lexer) stateFn {
+ lx.skip(isWhitespace)
+ switch r := lx.peek(); {
+ case r == '=' || r == eof:
+ return lx.errorf("unexpected '=': key name appears blank")
+ case r == '.':
+ return lx.errorf("unexpected '.': keys cannot start with a '.'")
+ case r == '"' || r == '\'':
lx.ignore()
+ fallthrough
+ default: // Bare key
lx.emit(itemKeyStart)
- return lexBareKey
+ return lexKeyNameStart
}
}
-// lexBareKey consumes the text of a bare key. Assumes that the first character
-// (which is not whitespace) has not yet been consumed.
-func lexBareKey(lx *lexer) stateFn {
- switch r := lx.next(); {
- case isBareKeyChar(r):
- return lexBareKey
- case isWhitespace(r):
- lx.backup()
- lx.emit(itemText)
- return lexKeyEnd
- case r == keySep:
- lx.backup()
- lx.emit(itemText)
- return lexKeyEnd
+func lexKeyNameStart(lx *lexer) stateFn {
+ lx.skip(isWhitespace)
+ switch r := lx.peek(); {
+ case r == '=' || r == eof:
+ return lx.errorf("unexpected '='")
+ case r == '.':
+ return lx.errorf("unexpected '.'")
+ case r == '"' || r == '\'':
+ lx.ignore()
+ lx.push(lexKeyEnd)
+ return lexQuotedName
default:
- return lx.errorf("bare keys cannot contain %q", r)
+ lx.push(lexKeyEnd)
+ return lexBareName
}
}
// lexKeyEnd consumes the end of a key and trims whitespace (up to the key
// separator).
func lexKeyEnd(lx *lexer) stateFn {
+ lx.skip(isWhitespace)
switch r := lx.next(); {
- case r == keySep:
- return lexSkip(lx, lexValue)
case isWhitespace(r):
return lexSkip(lx, lexKeyEnd)
+ case r == eof:
+ return lx.errorf("unexpected EOF; expected key separator '='")
+ case r == '.':
+ lx.ignore()
+ return lexKeyNameStart
+ case r == '=':
+ lx.emit(itemKeyEnd)
+ return lexSkip(lx, lexValue)
default:
- return lx.errorf("expected key separator %q, but got %q instead",
- keySep, r)
+ return lx.errorf("expected '.' or '=', but got %q instead", r)
}
}
@@ -422,17 +509,17 @@ func lexValue(lx *lexer) stateFn {
return lexNumberOrDateStart
}
switch r {
- case arrayStart:
+ case '[':
lx.ignore()
lx.emit(itemArray)
return lexArrayValue
- case inlineTableStart:
+ case '{':
lx.ignore()
lx.emit(itemInlineTableStart)
return lexInlineTableValue
- case stringStart:
- if lx.accept(stringStart) {
- if lx.accept(stringStart) {
+ case '"':
+ if lx.accept('"') {
+ if lx.accept('"') {
lx.ignore() // Ignore """
return lexMultilineString
}
@@ -440,9 +527,9 @@ func lexValue(lx *lexer) stateFn {
}
lx.ignore() // ignore the '"'
return lexString
- case rawStringStart:
- if lx.accept(rawStringStart) {
- if lx.accept(rawStringStart) {
+ case '\'':
+ if lx.accept('\'') {
+ if lx.accept('\'') {
lx.ignore() // Ignore """
return lexMultilineRawString
}
@@ -450,10 +537,15 @@ func lexValue(lx *lexer) stateFn {
}
lx.ignore() // ignore the "'"
return lexRawString
- case '+', '-':
- return lexNumberStart
case '.': // special error case, be kind to users
return lx.errorf("floats must start with a digit, not '.'")
+ case 'i', 'n':
+ if (lx.accept('n') && lx.accept('f')) || (lx.accept('a') && lx.accept('n')) {
+ lx.emit(itemFloat)
+ return lx.pop()
+ }
+ case '-', '+':
+ return lexDecimalNumberStart
}
if unicode.IsLetter(r) {
// Be permissive here; lexBool will give a nice error if the
@@ -463,6 +555,9 @@ func lexValue(lx *lexer) stateFn {
lx.backup()
return lexBool
}
+ if r == eof {
+ return lx.errorf("unexpected EOF; expected value")
+ }
return lx.errorf("expected value but found %q instead", r)
}
@@ -473,14 +568,12 @@ func lexArrayValue(lx *lexer) stateFn {
switch {
case isWhitespace(r) || isNL(r):
return lexSkip(lx, lexArrayValue)
- case r == commentStart:
+ case r == '#':
lx.push(lexArrayValue)
return lexCommentStart
- case r == comma:
+ case r == ',':
return lx.errorf("unexpected comma")
- case r == arrayEnd:
- // NOTE(caleb): The spec isn't clear about whether you can have
- // a trailing comma or not, so we'll allow it.
+ case r == ']':
return lexArrayEnd
}
@@ -493,23 +586,20 @@ func lexArrayValue(lx *lexer) stateFn {
// the next value (or the end of the array): it ignores whitespace and newlines
// and expects either a ',' or a ']'.
func lexArrayValueEnd(lx *lexer) stateFn {
- r := lx.next()
- switch {
+ switch r := lx.next(); {
case isWhitespace(r) || isNL(r):
return lexSkip(lx, lexArrayValueEnd)
- case r == commentStart:
+ case r == '#':
lx.push(lexArrayValueEnd)
return lexCommentStart
- case r == comma:
+ case r == ',':
lx.ignore()
return lexArrayValue // move on to the next value
- case r == arrayEnd:
+ case r == ']':
return lexArrayEnd
+ default:
+ return lx.errorf("expected a comma (',') or array terminator (']'), but got %s", runeOrEOF(r))
}
- return lx.errorf(
- "expected a comma or array terminator %q, but got %q instead",
- arrayEnd, r,
- )
}
// lexArrayEnd finishes the lexing of an array.
@@ -528,13 +618,13 @@ func lexInlineTableValue(lx *lexer) stateFn {
case isWhitespace(r):
return lexSkip(lx, lexInlineTableValue)
case isNL(r):
- return lx.errorf("newlines not allowed within inline tables")
- case r == commentStart:
+ return lx.errorPrevLine(errLexInlineTableNL{})
+ case r == '#':
lx.push(lexInlineTableValue)
return lexCommentStart
- case r == comma:
+ case r == ',':
return lx.errorf("unexpected comma")
- case r == inlineTableEnd:
+ case r == '}':
return lexInlineTableEnd
}
lx.backup()
@@ -546,23 +636,33 @@ func lexInlineTableValue(lx *lexer) stateFn {
// key/value pair and the next pair (or the end of the table):
// it ignores whitespace and expects either a ',' or a '}'.
func lexInlineTableValueEnd(lx *lexer) stateFn {
- r := lx.next()
- switch {
+ switch r := lx.next(); {
case isWhitespace(r):
return lexSkip(lx, lexInlineTableValueEnd)
case isNL(r):
- return lx.errorf("newlines not allowed within inline tables")
- case r == commentStart:
+ return lx.errorPrevLine(errLexInlineTableNL{})
+ case r == '#':
lx.push(lexInlineTableValueEnd)
return lexCommentStart
- case r == comma:
+ case r == ',':
lx.ignore()
+ lx.skip(isWhitespace)
+ if lx.peek() == '}' {
+ return lx.errorf("trailing comma not allowed in inline tables")
+ }
return lexInlineTableValue
- case r == inlineTableEnd:
+ case r == '}':
return lexInlineTableEnd
+ default:
+ return lx.errorf("expected a comma or an inline table terminator '}', but got %s instead", runeOrEOF(r))
}
- return lx.errorf("expected a comma or an inline table terminator %q, "+
- "but got %q instead", inlineTableEnd, r)
+}
+
+func runeOrEOF(r rune) string {
+ if r == eof {
+ return "end of file"
+ }
+ return "'" + string(r) + "'"
}
// lexInlineTableEnd finishes the lexing of an inline table.
@@ -579,13 +679,13 @@ func lexString(lx *lexer) stateFn {
r := lx.next()
switch {
case r == eof:
- return lx.errorf("unexpected EOF")
+ return lx.errorf(`unexpected EOF; expected '"'`)
case isNL(r):
- return lx.errorf("strings cannot contain newlines")
+ return lx.errorPrevLine(errLexStringNL{})
case r == '\\':
lx.push(lexString)
return lexStringEscape
- case r == stringEnd:
+ case r == '"':
lx.backup()
lx.emit(itemString)
lx.next()
@@ -598,19 +698,47 @@ func lexString(lx *lexer) stateFn {
// lexMultilineString consumes the inner contents of a string. It assumes that
// the beginning '"""' has already been consumed and ignored.
func lexMultilineString(lx *lexer) stateFn {
- switch lx.next() {
+ r := lx.next()
+ switch r {
+ default:
+ return lexMultilineString
case eof:
- return lx.errorf("unexpected EOF")
+ return lx.errorf(`unexpected EOF; expected '"""'`)
case '\\':
return lexMultilineStringEscape
- case stringEnd:
- if lx.accept(stringEnd) {
- if lx.accept(stringEnd) {
- lx.backup()
+ case '"':
+ /// Found " → try to read two more "".
+ if lx.accept('"') {
+ if lx.accept('"') {
+ /// Peek ahead: the string can contain " and "", including at the
+ /// end: """str"""""
+ /// 6 or more at the end, however, is an error.
+ if lx.peek() == '"' {
+ /// Check if we already lexed 5 's; if so we have 6 now, and
+ /// that's just too many man!
+ ///
+ /// Second check is for the edge case:
+ ///
+ /// two quotes allowed.
+ /// vv
+ /// """lol \""""""
+ /// ^^ ^^^---- closing three
+ /// escaped
+ ///
+ /// But ugly, but it works
+ if strings.HasSuffix(lx.current(), `"""""`) && !strings.HasSuffix(lx.current(), `\"""""`) {
+ return lx.errorf(`unexpected '""""""'`)
+ }
+ lx.backup()
+ lx.backup()
+ return lexMultilineString
+ }
+
+ lx.backup() /// backup: don't include the """ in the item.
lx.backup()
lx.backup()
lx.emit(itemMultilineString)
- lx.next()
+ lx.next() /// Read over ''' again and discard it.
lx.next()
lx.next()
lx.ignore()
@@ -618,8 +746,8 @@ func lexMultilineString(lx *lexer) stateFn {
}
lx.backup()
}
+ return lexMultilineString
}
- return lexMultilineString
}
// lexRawString consumes a raw string. Nothing can be escaped in such a string.
@@ -627,35 +755,54 @@ func lexMultilineString(lx *lexer) stateFn {
func lexRawString(lx *lexer) stateFn {
r := lx.next()
switch {
+ default:
+ return lexRawString
case r == eof:
- return lx.errorf("unexpected EOF")
+ return lx.errorf(`unexpected EOF; expected "'"`)
case isNL(r):
- return lx.errorf("strings cannot contain newlines")
- case r == rawStringEnd:
+ return lx.errorPrevLine(errLexStringNL{})
+ case r == '\'':
lx.backup()
lx.emit(itemRawString)
lx.next()
lx.ignore()
return lx.pop()
}
- return lexRawString
}
// lexMultilineRawString consumes a raw string. Nothing can be escaped in such
// a string. It assumes that the beginning "'''" has already been consumed and
// ignored.
func lexMultilineRawString(lx *lexer) stateFn {
- switch lx.next() {
+ r := lx.next()
+ switch r {
+ default:
+ return lexMultilineRawString
case eof:
- return lx.errorf("unexpected EOF")
- case rawStringEnd:
- if lx.accept(rawStringEnd) {
- if lx.accept(rawStringEnd) {
- lx.backup()
+ return lx.errorf(`unexpected EOF; expected "'''"`)
+ case '\'':
+ /// Found ' → try to read two more ''.
+ if lx.accept('\'') {
+ if lx.accept('\'') {
+ /// Peek ahead: the string can contain ' and '', including at the
+ /// end: '''str'''''
+ /// 6 or more at the end, however, is an error.
+ if lx.peek() == '\'' {
+ /// Check if we already lexed 5 's; if so we have 6 now, and
+ /// that's just too many man!
+ if strings.HasSuffix(lx.current(), "'''''") {
+ return lx.errorf(`unexpected "''''''"`)
+ }
+ lx.backup()
+ lx.backup()
+ return lexMultilineRawString
+ }
+
+ lx.backup() /// backup: don't include the ''' in the item.
lx.backup()
lx.backup()
lx.emit(itemRawMultilineString)
- lx.next()
+ lx.next() /// Read over ''' again and discard it.
lx.next()
lx.next()
lx.ignore()
@@ -663,15 +810,14 @@ func lexMultilineRawString(lx *lexer) stateFn {
}
lx.backup()
}
+ return lexMultilineRawString
}
- return lexMultilineRawString
}
// lexMultilineStringEscape consumes an escaped character. It assumes that the
// preceding '\\' has already been consumed.
func lexMultilineStringEscape(lx *lexer) stateFn {
- // Handle the special case first:
- if isNL(lx.next()) {
+ if isNL(lx.next()) { /// \ escaping newline.
return lexMultilineString
}
lx.backup()
@@ -694,6 +840,10 @@ func lexStringEscape(lx *lexer) stateFn {
fallthrough
case '"':
fallthrough
+ case ' ', '\t':
+ // Inside """ .. """ strings you can use \ to escape newlines, and any
+ // amount of whitespace can be between the \ and \n.
+ fallthrough
case '\\':
return lx.pop()
case 'u':
@@ -701,9 +851,7 @@ func lexStringEscape(lx *lexer) stateFn {
case 'U':
return lexLongUnicodeEscape
}
- return lx.errorf("invalid escape character %q; only the following "+
- "escape characters are allowed: "+
- `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r)
+ return lx.error(errLexEscape{r})
}
func lexShortUnicodeEscape(lx *lexer) stateFn {
@@ -711,8 +859,9 @@ func lexShortUnicodeEscape(lx *lexer) stateFn {
for i := 0; i < 4; i++ {
r = lx.next()
if !isHexadecimal(r) {
- return lx.errorf(`expected four hexadecimal digits after '\u', `+
- "but got %q instead", lx.current())
+ return lx.errorf(
+ `expected four hexadecimal digits after '\u', but got %q instead`,
+ lx.current())
}
}
return lx.pop()
@@ -723,28 +872,33 @@ func lexLongUnicodeEscape(lx *lexer) stateFn {
for i := 0; i < 8; i++ {
r = lx.next()
if !isHexadecimal(r) {
- return lx.errorf(`expected eight hexadecimal digits after '\U', `+
- "but got %q instead", lx.current())
+ return lx.errorf(
+ `expected eight hexadecimal digits after '\U', but got %q instead`,
+ lx.current())
}
}
return lx.pop()
}
-// lexNumberOrDateStart consumes either an integer, a float, or datetime.
+// lexNumberOrDateStart processes the first character of a value which begins
+// with a digit. It exists to catch values starting with '0', so that
+// lexBaseNumberOrDate can differentiate base prefixed integers from other
+// types.
func lexNumberOrDateStart(lx *lexer) stateFn {
r := lx.next()
- if isDigit(r) {
- return lexNumberOrDate
- }
switch r {
- case '_':
- return lexNumber
- case 'e', 'E':
- return lexFloat
- case '.':
- return lx.errorf("floats must start with a digit, not '.'")
+ case '0':
+ return lexBaseNumberOrDate
}
- return lx.errorf("expected a digit but got %q", r)
+
+ if !isDigit(r) {
+ // The only way to reach this state is if the value starts
+ // with a digit, so specifically treat anything else as an
+ // error.
+ return lx.errorf("expected a digit but got %q", r)
+ }
+
+ return lexNumberOrDate
}
// lexNumberOrDate consumes either an integer, float or datetime.
@@ -754,10 +908,10 @@ func lexNumberOrDate(lx *lexer) stateFn {
return lexNumberOrDate
}
switch r {
- case '-':
+ case '-', ':':
return lexDatetime
case '_':
- return lexNumber
+ return lexDecimalNumber
case '.', 'e', 'E':
return lexFloat
}
@@ -775,41 +929,156 @@ func lexDatetime(lx *lexer) stateFn {
return lexDatetime
}
switch r {
- case '-', 'T', ':', '.', 'Z', '+':
+ case '-', ':', 'T', 't', ' ', '.', 'Z', 'z', '+':
return lexDatetime
}
lx.backup()
- lx.emit(itemDatetime)
+ lx.emitTrim(itemDatetime)
return lx.pop()
}
-// lexNumberStart consumes either an integer or a float. It assumes that a sign
-// has already been read, but that *no* digits have been consumed.
-// lexNumberStart will move to the appropriate integer or float states.
-func lexNumberStart(lx *lexer) stateFn {
- // We MUST see a digit. Even floats have to start with a digit.
+// lexHexInteger consumes a hexadecimal integer after seeing the '0x' prefix.
+func lexHexInteger(lx *lexer) stateFn {
r := lx.next()
- if !isDigit(r) {
- if r == '.' {
- return lx.errorf("floats must start with a digit, not '.'")
+ if isHexadecimal(r) {
+ return lexHexInteger
+ }
+ switch r {
+ case '_':
+ return lexHexInteger
+ }
+
+ lx.backup()
+ lx.emit(itemInteger)
+ return lx.pop()
+}
+
+// lexOctalInteger consumes an octal integer after seeing the '0o' prefix.
+func lexOctalInteger(lx *lexer) stateFn {
+ r := lx.next()
+ if isOctal(r) {
+ return lexOctalInteger
+ }
+ switch r {
+ case '_':
+ return lexOctalInteger
+ }
+
+ lx.backup()
+ lx.emit(itemInteger)
+ return lx.pop()
+}
+
+// lexBinaryInteger consumes a binary integer after seeing the '0b' prefix.
+func lexBinaryInteger(lx *lexer) stateFn {
+ r := lx.next()
+ if isBinary(r) {
+ return lexBinaryInteger
+ }
+ switch r {
+ case '_':
+ return lexBinaryInteger
+ }
+
+ lx.backup()
+ lx.emit(itemInteger)
+ return lx.pop()
+}
+
+// lexDecimalNumber consumes a decimal float or integer.
+func lexDecimalNumber(lx *lexer) stateFn {
+ r := lx.next()
+ if isDigit(r) {
+ return lexDecimalNumber
+ }
+ switch r {
+ case '.', 'e', 'E':
+ return lexFloat
+ case '_':
+ return lexDecimalNumber
+ }
+
+ lx.backup()
+ lx.emit(itemInteger)
+ return lx.pop()
+}
+
+// lexDecimalNumber consumes the first digit of a number beginning with a sign.
+// It assumes the sign has already been consumed. Values which start with a sign
+// are only allowed to be decimal integers or floats.
+//
+// The special "nan" and "inf" values are also recognized.
+func lexDecimalNumberStart(lx *lexer) stateFn {
+ r := lx.next()
+
+ // Special error cases to give users better error messages
+ switch r {
+ case 'i':
+ if !lx.accept('n') || !lx.accept('f') {
+ return lx.errorf("invalid float: '%s'", lx.current())
}
- return lx.errorf("expected a digit but got %q", r)
+ lx.emit(itemFloat)
+ return lx.pop()
+ case 'n':
+ if !lx.accept('a') || !lx.accept('n') {
+ return lx.errorf("invalid float: '%s'", lx.current())
+ }
+ lx.emit(itemFloat)
+ return lx.pop()
+ case '0':
+ p := lx.peek()
+ switch p {
+ case 'b', 'o', 'x':
+ return lx.errorf("cannot use sign with non-decimal numbers: '%s%c'", lx.current(), p)
+ }
+ case '.':
+ return lx.errorf("floats must start with a digit, not '.'")
}
- return lexNumber
+
+ if isDigit(r) {
+ return lexDecimalNumber
+ }
+
+ return lx.errorf("expected a digit but got %q", r)
}
-// lexNumber consumes an integer or a float after seeing the first digit.
-func lexNumber(lx *lexer) stateFn {
+// lexBaseNumberOrDate differentiates between the possible values which
+// start with '0'. It assumes that before reaching this state, the initial '0'
+// has been consumed.
+func lexBaseNumberOrDate(lx *lexer) stateFn {
r := lx.next()
+ // Note: All datetimes start with at least two digits, so we don't
+ // handle date characters (':', '-', etc.) here.
if isDigit(r) {
- return lexNumber
+ return lexNumberOrDate
}
switch r {
case '_':
- return lexNumber
+ // Can only be decimal, because there can't be an underscore
+ // between the '0' and the base designator, and dates can't
+ // contain underscores.
+ return lexDecimalNumber
case '.', 'e', 'E':
return lexFloat
+ case 'b':
+ r = lx.peek()
+ if !isBinary(r) {
+ lx.errorf("not a binary number: '%s%c'", lx.current(), r)
+ }
+ return lexBinaryInteger
+ case 'o':
+ r = lx.peek()
+ if !isOctal(r) {
+ lx.errorf("not an octal number: '%s%c'", lx.current(), r)
+ }
+ return lexOctalInteger
+ case 'x':
+ r = lx.peek()
+ if !isHexadecimal(r) {
+ lx.errorf("not a hexidecimal number: '%s%c'", lx.current(), r)
+ }
+ return lexHexInteger
}
lx.backup()
@@ -867,49 +1136,31 @@ func lexCommentStart(lx *lexer) stateFn {
// It will consume *up to* the first newline character, and pass control
// back to the last state on the stack.
func lexComment(lx *lexer) stateFn {
- r := lx.peek()
- if isNL(r) || r == eof {
+ switch r := lx.next(); {
+ case isNL(r) || r == eof:
+ lx.backup()
lx.emit(itemText)
return lx.pop()
+ default:
+ return lexComment
}
- lx.next()
- return lexComment
}
// lexSkip ignores all slurped input and moves on to the next state.
func lexSkip(lx *lexer, nextState stateFn) stateFn {
- return func(lx *lexer) stateFn {
- lx.ignore()
- return nextState
- }
-}
-
-// isWhitespace returns true if `r` is a whitespace character according
-// to the spec.
-func isWhitespace(r rune) bool {
- return r == '\t' || r == ' '
-}
-
-func isNL(r rune) bool {
- return r == '\n' || r == '\r'
-}
-
-func isDigit(r rune) bool {
- return r >= '0' && r <= '9'
-}
-
-func isHexadecimal(r rune) bool {
- return (r >= '0' && r <= '9') ||
- (r >= 'a' && r <= 'f') ||
- (r >= 'A' && r <= 'F')
+ lx.ignore()
+ return nextState
}
-func isBareKeyChar(r rune) bool {
- return (r >= 'A' && r <= 'Z') ||
- (r >= 'a' && r <= 'z') ||
- (r >= '0' && r <= '9') ||
- r == '_' ||
- r == '-'
+func (s stateFn) String() string {
+ name := runtime.FuncForPC(reflect.ValueOf(s).Pointer()).Name()
+ if i := strings.LastIndexByte(name, '.'); i > -1 {
+ name = name[i+1:]
+ }
+ if s == nil {
+ name = "<nil>"
+ }
+ return name + "()"
}
func (itype itemType) String() string {
@@ -938,12 +1189,18 @@ func (itype itemType) String() string {
return "TableEnd"
case itemKeyStart:
return "KeyStart"
+ case itemKeyEnd:
+ return "KeyEnd"
case itemArray:
return "Array"
case itemArrayEnd:
return "ArrayEnd"
case itemCommentStart:
return "CommentStart"
+ case itemInlineTableStart:
+ return "InlineTableStart"
+ case itemInlineTableEnd:
+ return "InlineTableEnd"
}
panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
}
@@ -951,3 +1208,26 @@ func (itype itemType) String() string {
func (item item) String() string {
return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
}
+
+func isWhitespace(r rune) bool { return r == '\t' || r == ' ' }
+func isNL(r rune) bool { return r == '\n' || r == '\r' }
+func isControl(r rune) bool { // Control characters except \t, \r, \n
+ switch r {
+ case '\t', '\r', '\n':
+ return false
+ default:
+ return (r >= 0x00 && r <= 0x1f) || r == 0x7f
+ }
+}
+func isDigit(r rune) bool { return r >= '0' && r <= '9' }
+func isBinary(r rune) bool { return r == '0' || r == '1' }
+func isOctal(r rune) bool { return r >= '0' && r <= '7' }
+func isHexadecimal(r rune) bool {
+ return (r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')
+}
+func isBareKeyChar(r rune) bool {
+ return (r >= 'A' && r <= 'Z') ||
+ (r >= 'a' && r <= 'z') ||
+ (r >= '0' && r <= '9') ||
+ r == '_' || r == '-'
+}