aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/ast
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/ast')
-rw-r--r--pkg/ast/scanner.go163
1 files changed, 93 insertions, 70 deletions
diff --git a/pkg/ast/scanner.go b/pkg/ast/scanner.go
index 343c3b9ab..67e7f50bd 100644
--- a/pkg/ast/scanner.go
+++ b/pkg/ast/scanner.go
@@ -134,14 +134,7 @@ func (s *scanner) Scan() (tok token, lit string, pos Pos) {
s.next()
case s.ch == '`':
tok = tokCExpr
- for s.next(); s.ch != '`' && s.ch != '\n'; s.next() {
- }
- if s.ch == '\n' {
- s.Error(pos, "C expression is not terminated")
- } else {
- lit = string(s.data[pos.Off+1 : s.off])
- s.next()
- }
+ lit = s.scanCExpr(pos)
case s.prev2 == tokDefine && s.prev1 == tokIdent:
// Note: the old form for C expressions, not really lexable.
// TODO(dvyukov): get rid of this eventually.
@@ -155,74 +148,16 @@ func (s *scanner) Scan() (tok token, lit string, pos Pos) {
}
lit = string(s.data[pos.Off+1 : s.off])
case s.ch == '"' || s.ch == '<':
- // TODO(dvyukov): get rid of <...> strings, that's only includes
tok = tokString
- closing := byte('"')
- if s.ch == '<' {
- closing = '>'
- }
- for s.next(); s.ch != closing; s.next() {
- if s.ch == 0 || s.ch == '\n' {
- s.Error(pos, "string literal is not terminated")
- return
- }
- }
- lit = string(s.data[pos.Off+1 : s.off])
- for i := 0; i < len(lit); i++ {
- if lit[i] < 0x20 || lit[i] >= 0x80 {
- pos1 := pos
- pos1.Col += i + 1
- pos1.Off += i + 1
- s.Error(pos1, "illegal character %#U in string literal", lit[i])
- break
- }
- }
- s.next()
+ lit = s.scanStr(pos)
case s.ch >= '0' && s.ch <= '9':
tok = tokInt
- for s.ch >= '0' && s.ch <= '9' ||
- s.ch >= 'a' && s.ch <= 'f' ||
- s.ch >= 'A' && s.ch <= 'F' || s.ch == 'x' {
- s.next()
- }
- lit = string(s.data[pos.Off:s.off])
- bad := false
- if _, err := strconv.ParseUint(lit, 10, 64); err != nil {
- if len(lit) > 2 && lit[0] == '0' && lit[1] == 'x' {
- if _, err := strconv.ParseUint(lit[2:], 16, 64); err != nil {
- bad = true
- }
- } else {
- bad = true
- }
- }
- if bad {
- s.Error(pos, fmt.Sprintf("bad integer %q", lit))
- lit = "0"
- }
+ lit = s.scanInt(pos)
case s.ch == '\'':
tok = tokInt
- lit = "0"
- s.next()
- s.next()
- if s.ch != '\'' {
- s.Error(pos, "char literal is not terminated")
- return
- }
- s.next()
- lit = string(s.data[pos.Off : pos.Off+3])
+ lit = s.scanChar(pos)
case s.ch == '_' || s.ch >= 'a' && s.ch <= 'z' || s.ch >= 'A' && s.ch <= 'Z':
- tok = tokIdent
- for s.ch == '_' || s.ch == '$' ||
- s.ch >= 'a' && s.ch <= 'z' ||
- s.ch >= 'A' && s.ch <= 'Z' ||
- s.ch >= '0' && s.ch <= '9' {
- s.next()
- }
- lit = string(s.data[pos.Off:s.off])
- if key, ok := keywords[lit]; ok {
- tok = key
- }
+ tok, lit = s.scanIdent(pos)
default:
tok = punctuation[s.ch]
if tok == tokIllegal {
@@ -235,6 +170,94 @@ func (s *scanner) Scan() (tok token, lit string, pos Pos) {
return
}
+func (s *scanner) scanCExpr(pos Pos) string {
+ for s.next(); s.ch != '`' && s.ch != '\n'; s.next() {
+ }
+ if s.ch == '\n' {
+ s.Error(pos, "C expression is not terminated")
+ return ""
+ }
+ lit := string(s.data[pos.Off+1 : s.off])
+ s.next()
+ return lit
+}
+
+func (s *scanner) scanStr(pos Pos) string {
+ // TODO(dvyukov): get rid of <...> strings, that's only includes
+ closing := byte('"')
+ if s.ch == '<' {
+ closing = '>'
+ }
+ for s.next(); s.ch != closing; s.next() {
+ if s.ch == 0 || s.ch == '\n' {
+ s.Error(pos, "string literal is not terminated")
+ return ""
+ }
+ }
+ lit := string(s.data[pos.Off+1 : s.off])
+ for i := 0; i < len(lit); i++ {
+ if lit[i] < 0x20 || lit[i] >= 0x80 {
+ pos1 := pos
+ pos1.Col += i + 1
+ pos1.Off += i + 1
+ s.Error(pos1, "illegal character %#U in string literal", lit[i])
+ break
+ }
+ }
+ s.next()
+ return lit
+}
+
+func (s *scanner) scanInt(pos Pos) string {
+ for s.ch >= '0' && s.ch <= '9' ||
+ s.ch >= 'a' && s.ch <= 'f' ||
+ s.ch >= 'A' && s.ch <= 'F' || s.ch == 'x' {
+ s.next()
+ }
+ lit := string(s.data[pos.Off:s.off])
+ bad := false
+ if _, err := strconv.ParseUint(lit, 10, 64); err != nil {
+ if len(lit) > 2 && lit[0] == '0' && lit[1] == 'x' {
+ if _, err := strconv.ParseUint(lit[2:], 16, 64); err != nil {
+ bad = true
+ }
+ } else {
+ bad = true
+ }
+ }
+ if bad {
+ s.Error(pos, fmt.Sprintf("bad integer %q", lit))
+ lit = "0"
+ }
+ return lit
+}
+
+func (s *scanner) scanChar(pos Pos) string {
+ s.next()
+ s.next()
+ if s.ch != '\'' {
+ s.Error(pos, "char literal is not terminated")
+ return "0"
+ }
+ s.next()
+ return string(s.data[pos.Off : pos.Off+3])
+}
+
+func (s *scanner) scanIdent(pos Pos) (tok token, lit string) {
+ tok = tokIdent
+ for s.ch == '_' || s.ch == '$' ||
+ s.ch >= 'a' && s.ch <= 'z' ||
+ s.ch >= 'A' && s.ch <= 'Z' ||
+ s.ch >= '0' && s.ch <= '9' {
+ s.next()
+ }
+ lit = string(s.data[pos.Off:s.off])
+ if key, ok := keywords[lit]; ok {
+ tok = key
+ }
+ return
+}
+
func (s *scanner) Error(pos Pos, msg string, args ...interface{}) {
s.errors++
s.errorHandler(pos, fmt.Sprintf(msg, args...))