aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/ast/parser.go
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2017-05-22 05:28:31 +0200
committerDmitry Vyukov <dvyukov@google.com>2017-08-18 11:26:50 +0200
commit127a9c2b65ae07f309e839c3b8e5ab2ee7983e56 (patch)
tree3a4dd2af0a2fc09b2bba1dad738c7657d1b0de1d /pkg/ast/parser.go
parent5809a8e05714bda367f3fd57f9b983a3403f04b0 (diff)
pkg/ast: new parser for sys descriptions
The old parser in sys/sysparser is too hacky, difficult to extend and drops debug info too early, so that we can't produce proper error messages. Add a new parser that is build like a proper language parser and preserves full debug info for every token.
Diffstat (limited to 'pkg/ast/parser.go')
-rw-r--r--pkg/ast/parser.go423
1 files changed, 423 insertions, 0 deletions
diff --git a/pkg/ast/parser.go b/pkg/ast/parser.go
new file mode 100644
index 000000000..737d08068
--- /dev/null
+++ b/pkg/ast/parser.go
@@ -0,0 +1,423 @@
+// Copyright 2017 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package ast
+
+import (
+ "errors"
+ "fmt"
+ "strconv"
+ "strings"
+)
+
+// Parse parses sys description into AST and returns top-level nodes.
+func Parse(data []byte, filename string, errorHandler func(pos Pos, msg string)) (top []interface{}, ok bool) {
+ p := &parser{s: newScanner(data, filename, errorHandler)}
+ prevNewLine, prevComment := false, false
+ for p.next(); p.tok != tokEOF; {
+ decl := p.parseTopRecover()
+ if decl == nil {
+ continue
+ }
+ // Add new lines around structs, remove duplicate new lines.
+ if _, ok := decl.(*NewLine); ok && prevNewLine {
+ continue
+ }
+ if str, ok := decl.(*Struct); ok && !prevNewLine && !prevComment {
+ top = append(top, &NewLine{Pos: str.Pos})
+ }
+ top = append(top, decl)
+ if str, ok := decl.(*Struct); ok {
+ decl = &NewLine{Pos: str.Pos}
+ top = append(top, decl)
+ }
+ _, prevNewLine = decl.(*NewLine)
+ _, prevComment = decl.(*Comment)
+ }
+ if prevNewLine {
+ top = top[:len(top)-1]
+ }
+ ok = p.s.Ok()
+ return
+}
+
+type parser struct {
+ s *scanner
+
+ // Current token:
+ tok token
+ lit string
+ pos Pos
+}
+
+// Skip parsing till the next NEWLINE, for error recovery.
+var skipLine = errors.New("")
+
+func (p *parser) parseTopRecover() interface{} {
+ defer func() {
+ switch err := recover(); err {
+ case nil:
+ case skipLine:
+ // Try to recover by consuming everything until next NEWLINE.
+ for p.tok != tokNewLine {
+ p.next()
+ }
+ p.consume(tokNewLine)
+ default:
+ panic(err)
+ }
+ }()
+ decl := p.parseTop()
+ if decl == nil {
+ panic("not reachable")
+ }
+ p.consume(tokNewLine)
+ return decl
+}
+
+func (p *parser) parseTop() interface{} {
+ switch p.tok {
+ case tokNewLine:
+ return &NewLine{Pos: p.pos}
+ case tokComment:
+ return p.parseComment()
+ case tokDefine:
+ return p.parseDefine()
+ case tokInclude:
+ return p.parseInclude()
+ case tokIncdir:
+ return p.parseIncdir()
+ case tokResource:
+ return p.parseResource()
+ case tokIdent:
+ name := p.parseIdent()
+ switch p.tok {
+ case tokLParen:
+ return p.parseCall(name)
+ case tokLBrace, tokLBrack:
+ return p.parseStruct(name)
+ case tokEq:
+ return p.parseFlags(name)
+ default:
+ p.expect(tokLParen, tokLBrace, tokLBrack, tokEq)
+ }
+ case tokIllegal:
+ // Scanner has already producer an error for this one.
+ panic(skipLine)
+ default:
+ p.expect(tokComment, tokDefine, tokInclude, tokResource, tokIdent)
+ }
+ panic("not reachable")
+}
+
+func (p *parser) next() {
+ p.tok, p.lit, p.pos = p.s.Scan()
+}
+
+func (p *parser) consume(tok token) {
+ p.expect(tok)
+ p.next()
+}
+
+func (p *parser) tryConsume(tok token) bool {
+ if p.tok != tok {
+ return false
+ }
+ p.next()
+ return true
+}
+
+func (p *parser) expect(tokens ...token) {
+ for _, tok := range tokens {
+ if p.tok == tok {
+ return
+ }
+ }
+ var str []string
+ for _, tok := range tokens {
+ str = append(str, tok.String())
+ }
+ p.s.Error(p.pos, fmt.Sprintf("unexpected %v, expecting %v", p.tok, strings.Join(str, ", ")))
+ panic(skipLine)
+}
+
+func (p *parser) parseComment() *Comment {
+ c := &Comment{
+ Pos: p.pos,
+ Text: p.lit,
+ }
+ p.consume(tokComment)
+ return c
+}
+
+func (p *parser) parseDefine() *Define {
+ pos0 := p.pos
+ p.consume(tokDefine)
+ name := p.parseIdent()
+ p.expect(tokInt, tokIdent, tokCExpr)
+ var val *Int
+ if p.tok == tokCExpr {
+ val = p.parseCExpr()
+ } else {
+ val = p.parseInt()
+ }
+ return &Define{
+ Pos: pos0,
+ Name: name,
+ Value: val,
+ }
+}
+
+func (p *parser) parseInclude() *Include {
+ pos0 := p.pos
+ p.consume(tokInclude)
+ return &Include{
+ Pos: pos0,
+ File: p.parseString(),
+ }
+}
+
+func (p *parser) parseIncdir() *Incdir {
+ pos0 := p.pos
+ p.consume(tokIncdir)
+ return &Incdir{
+ Pos: pos0,
+ Dir: p.parseString(),
+ }
+}
+
+func (p *parser) parseResource() *Resource {
+ pos0 := p.pos
+ p.consume(tokResource)
+ name := p.parseIdent()
+ p.consume(tokLBrack)
+ base := p.parseIdent()
+ p.consume(tokRBrack)
+ var values []*Int
+ if p.tryConsume(tokColon) {
+ values = append(values, p.parseInt())
+ for p.tryConsume(tokComma) {
+ values = append(values, p.parseInt())
+ }
+ }
+ return &Resource{
+ Pos: pos0,
+ Name: name,
+ Base: base,
+ Values: values,
+ }
+}
+
+func (p *parser) parseCall(name *Ident) *Call {
+ c := &Call{
+ Pos: name.Pos,
+ Name: name,
+ }
+ p.consume(tokLParen)
+ for p.tok != tokRParen {
+ c.Args = append(c.Args, p.parseField())
+ p.expect(tokComma, tokRParen)
+ p.tryConsume(tokComma)
+ }
+ p.consume(tokRParen)
+ if p.tok != tokNewLine {
+ c.Ret = p.parseType()
+ }
+ return c
+}
+
+func (p *parser) parseFlags(name *Ident) interface{} {
+ p.consume(tokEq)
+ switch p.tok {
+ case tokInt, tokIdent:
+ return p.parseIntFlags(name)
+ case tokString:
+ return p.parseStrFlags(name)
+ default:
+ p.expect(tokInt, tokIdent, tokString)
+ return nil
+ }
+}
+
+func (p *parser) parseIntFlags(name *Ident) *IntFlags {
+ values := []*Int{p.parseInt()}
+ for p.tryConsume(tokComma) {
+ values = append(values, p.parseInt())
+ }
+ return &IntFlags{
+ Pos: name.Pos,
+ Name: name,
+ Values: values,
+ }
+}
+
+func (p *parser) parseStrFlags(name *Ident) *StrFlags {
+ values := []*String{p.parseString()}
+ for p.tryConsume(tokComma) {
+ values = append(values, p.parseString())
+ }
+ return &StrFlags{
+ Pos: name.Pos,
+ Name: name,
+ Values: values,
+ }
+}
+
+func (p *parser) parseStruct(name *Ident) *Struct {
+ str := &Struct{
+ Pos: name.Pos,
+ Name: name,
+ }
+ closing := tokRBrace
+ if p.tok == tokLBrack {
+ str.IsUnion = true
+ closing = tokRBrack
+ }
+ p.next()
+ p.consume(tokNewLine)
+ for {
+ newBlock := false
+ for p.tok == tokNewLine {
+ newBlock = true
+ p.next()
+ }
+ comments := p.parseCommentBlock()
+ if p.tryConsume(closing) {
+ str.Comments = comments
+ break
+ }
+ fld := p.parseField()
+ fld.NewBlock = newBlock
+ fld.Comments = comments
+ str.Fields = append(str.Fields, fld)
+ p.consume(tokNewLine)
+ }
+ if p.tryConsume(tokLBrack) {
+ str.Attrs = append(str.Attrs, p.parseIdent())
+ for p.tryConsume(tokComma) {
+ str.Attrs = append(str.Attrs, p.parseIdent())
+ }
+ p.consume(tokRBrack)
+ }
+ return str
+}
+
+func (p *parser) parseCommentBlock() []*Comment {
+ var comments []*Comment
+ for p.tok == tokComment {
+ comments = append(comments, p.parseComment())
+ p.consume(tokNewLine)
+ }
+ return comments
+}
+
+func (p *parser) parseField() *Field {
+ name := p.parseIdent()
+ return &Field{
+ Pos: name.Pos,
+ Name: name,
+ Type: p.parseType(),
+ }
+}
+
+func (p *parser) parseType() *Type {
+ arg := &Type{
+ Pos: p.pos,
+ }
+ allowColon := false
+ switch p.tok {
+ case tokInt:
+ allowColon = true
+ arg.Value, arg.ValueHex = p.parseIntValue()
+ case tokIdent:
+ allowColon = true
+ arg.Ident = p.lit
+ case tokString:
+ arg.String = p.lit
+ default:
+ p.expect(tokInt, tokIdent, tokString)
+ }
+ p.next()
+ if allowColon && p.tryConsume(tokColon) {
+ switch p.tok {
+ case tokInt:
+ arg.Value2, arg.Value2Hex = p.parseIntValue()
+ case tokIdent:
+ arg.Ident2 = p.lit
+ default:
+ p.expect(tokInt, tokIdent)
+ }
+ p.next()
+ }
+ arg.Args = p.parseTypeList()
+ return arg
+}
+
+func (p *parser) parseTypeList() []*Type {
+ var args []*Type
+ if p.tryConsume(tokLBrack) {
+ args = append(args, p.parseType())
+ for p.tryConsume(tokComma) {
+ args = append(args, p.parseType())
+ }
+ p.consume(tokRBrack)
+ }
+ return args
+}
+
+func (p *parser) parseIdent() *Ident {
+ p.expect(tokIdent)
+ ident := &Ident{
+ Pos: p.pos,
+ Name: p.lit,
+ }
+ p.next()
+ return ident
+}
+
+func (p *parser) parseString() *String {
+ p.expect(tokString)
+ str := &String{
+ Pos: p.pos,
+ Value: p.lit,
+ }
+ p.next()
+ return str
+}
+
+func (p *parser) parseInt() *Int {
+ i := &Int{
+ Pos: p.pos,
+ }
+ switch p.tok {
+ case tokInt:
+ i.Value, i.ValueHex = p.parseIntValue()
+ case tokIdent:
+ i.Ident = p.lit
+ default:
+ p.expect(tokInt, tokIdent)
+ }
+ p.next()
+ return i
+}
+
+func (p *parser) parseIntValue() (uint64, bool) {
+ if v, err := strconv.ParseUint(p.lit, 10, 64); err == nil {
+ return v, false
+ }
+ if len(p.lit) > 2 && p.lit[0] == '0' && p.lit[1] == 'x' {
+ if v, err := strconv.ParseUint(p.lit[2:], 16, 64); err == nil {
+ return v, true
+ }
+ }
+ panic(fmt.Sprintf("scanner returned bad integer %q", p.lit))
+}
+
+func (p *parser) parseCExpr() *Int {
+ i := &Int{
+ Pos: p.pos,
+ CExpr: p.lit,
+ }
+ p.consume(tokCExpr)
+ return i
+}