pkg/ast: new parser for sys descriptions

The old parser in sys/sysparser is too hacky, difficult to extend and drops debug info too early, so that we can't produce proper error messages. Add a new parser that is build like a proper language parser and preserves full debug info for every token.
author: Dmitry Vyukov <dvyukov@google.com> 2017-05-22 05:28:31 +0200
committer: Dmitry Vyukov <dvyukov@google.com> 2017-08-18 11:26:50 +0200
commit: 127a9c2b65ae07f309e839c3b8e5ab2ee7983e56 (patch)
tree: 3a4dd2af0a2fc09b2bba1dad738c7657d1b0de1d /pkg/ast/parser.go
parent: 5809a8e05714bda367f3fd57f9b983a3403f04b0 (diff)
1 files changed, 423 insertions, 0 deletions
diff --git a/pkg/ast/parser.go b/pkg/ast/parser.go
new file mode 100644
index 000000000..737d08068
--- /dev/null
+++ b/pkg/ast/parser.go
@@ -0,0 +1,423 @@
+// Copyright 2017 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package ast
+
+import (
+	"errors"
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+// Parse parses sys description into AST and returns top-level nodes.
+func Parse(data []byte, filename string, errorHandler func(pos Pos, msg string)) (top []interface{}, ok bool) {
+	p := &parser{s: newScanner(data, filename, errorHandler)}
+	prevNewLine, prevComment := false, false
+	for p.next(); p.tok != tokEOF; {
+		decl := p.parseTopRecover()
+		if decl == nil {
+			continue
+		}
+		// Add new lines around structs, remove duplicate new lines.
+		if _, ok := decl.(*NewLine); ok && prevNewLine {
+			continue
+		}
+		if str, ok := decl.(*Struct); ok && !prevNewLine && !prevComment {
+			top = append(top, &NewLine{Pos: str.Pos})
+		}
+		top = append(top, decl)
+		if str, ok := decl.(*Struct); ok {
+			decl = &NewLine{Pos: str.Pos}
+			top = append(top, decl)
+		}
+		_, prevNewLine = decl.(*NewLine)
+		_, prevComment = decl.(*Comment)
+	}
+	if prevNewLine {
+		top = top[:len(top)-1]
+	}
+	ok = p.s.Ok()
+	return
+}
+
+type parser struct {
+	s *scanner
+
+	// Current token:
+	tok token
+	lit string
+	pos Pos
+}
+
+// Skip parsing till the next NEWLINE, for error recovery.
+var skipLine = errors.New("")
+
+func (p *parser) parseTopRecover() interface{} {
+	defer func() {
+		switch err := recover(); err {
+		case nil:
+		case skipLine:
+			// Try to recover by consuming everything until next NEWLINE.
+			for p.tok != tokNewLine {
+				p.next()
+			}
+			p.consume(tokNewLine)
+		default:
+			panic(err)
+		}
+	}()
+	decl := p.parseTop()
+	if decl == nil {
+		panic("not reachable")
+	}
+	p.consume(tokNewLine)
+	return decl
+}
+
+func (p *parser) parseTop() interface{} {
+	switch p.tok {
+	case tokNewLine:
+		return &NewLine{Pos: p.pos}
+	case tokComment:
+		return p.parseComment()
+	case tokDefine:
+		return p.parseDefine()
+	case tokInclude:
+		return p.parseInclude()
+	case tokIncdir:
+		return p.parseIncdir()
+	case tokResource:
+		return p.parseResource()
+	case tokIdent:
+		name := p.parseIdent()
+		switch p.tok {
+		case tokLParen:
+			return p.parseCall(name)
+		case tokLBrace, tokLBrack:
+			return p.parseStruct(name)
+		case tokEq:
+			return p.parseFlags(name)
+		default:
+			p.expect(tokLParen, tokLBrace, tokLBrack, tokEq)
+		}
+	case tokIllegal:
+		// Scanner has already producer an error for this one.
+		panic(skipLine)
+	default:
+		p.expect(tokComment, tokDefine, tokInclude, tokResource, tokIdent)
+	}
+	panic("not reachable")
+}
+
+func (p *parser) next() {
+	p.tok, p.lit, p.pos = p.s.Scan()
+}
+
+func (p *parser) consume(tok token) {
+	p.expect(tok)
+	p.next()
+}
+
+func (p *parser) tryConsume(tok token) bool {
+	if p.tok != tok {
+		return false
+	}
+	p.next()
+	return true
+}
+
+func (p *parser) expect(tokens ...token) {
+	for _, tok := range tokens {
+		if p.tok == tok {
+			return
+		}
+	}
+	var str []string
+	for _, tok := range tokens {
+		str = append(str, tok.String())
+	}
+	p.s.Error(p.pos, fmt.Sprintf("unexpected %v, expecting %v", p.tok, strings.Join(str, ", ")))
+	panic(skipLine)
+}
+
+func (p *parser) parseComment() *Comment {
+	c := &Comment{
+		Pos:  p.pos,
+		Text: p.lit,
+	}
+	p.consume(tokComment)
+	return c
+}
+
+func (p *parser) parseDefine() *Define {
+	pos0 := p.pos
+	p.consume(tokDefine)
+	name := p.parseIdent()
+	p.expect(tokInt, tokIdent, tokCExpr)
+	var val *Int
+	if p.tok == tokCExpr {
+		val = p.parseCExpr()
+	} else {
+		val = p.parseInt()
+	}
+	return &Define{
+		Pos:   pos0,
+		Name:  name,
+		Value: val,
+	}
+}
+
+func (p *parser) parseInclude() *Include {
+	pos0 := p.pos
+	p.consume(tokInclude)
+	return &Include{
+		Pos:  pos0,
+		File: p.parseString(),
+	}
+}
+
+func (p *parser) parseIncdir() *Incdir {
+	pos0 := p.pos
+	p.consume(tokIncdir)
+	return &Incdir{
+		Pos: pos0,
+		Dir: p.parseString(),
+	}
+}
+
+func (p *parser) parseResource() *Resource {
+	pos0 := p.pos
+	p.consume(tokResource)
+	name := p.parseIdent()
+	p.consume(tokLBrack)
+	base := p.parseIdent()
+	p.consume(tokRBrack)
+	var values []*Int
+	if p.tryConsume(tokColon) {
+		values = append(values, p.parseInt())
+		for p.tryConsume(tokComma) {
+			values = append(values, p.parseInt())
+		}
+	}
+	return &Resource{
+		Pos:    pos0,
+		Name:   name,
+		Base:   base,
+		Values: values,
+	}
+}
+
+func (p *parser) parseCall(name *Ident) *Call {
+	c := &Call{
+		Pos:  name.Pos,
+		Name: name,
+	}
+	p.consume(tokLParen)
+	for p.tok != tokRParen {
+		c.Args = append(c.Args, p.parseField())
+		p.expect(tokComma, tokRParen)
+		p.tryConsume(tokComma)
+	}
+	p.consume(tokRParen)
+	if p.tok != tokNewLine {
+		c.Ret = p.parseType()
+	}
+	return c
+}
+
+func (p *parser) parseFlags(name *Ident) interface{} {
+	p.consume(tokEq)
+	switch p.tok {
+	case tokInt, tokIdent:
+		return p.parseIntFlags(name)
+	case tokString:
+		return p.parseStrFlags(name)
+	default:
+		p.expect(tokInt, tokIdent, tokString)
+		return nil
+	}
+}
+
+func (p *parser) parseIntFlags(name *Ident) *IntFlags {
+	values := []*Int{p.parseInt()}
+	for p.tryConsume(tokComma) {
+		values = append(values, p.parseInt())
+	}
+	return &IntFlags{
+		Pos:    name.Pos,
+		Name:   name,
+		Values: values,
+	}
+}
+
+func (p *parser) parseStrFlags(name *Ident) *StrFlags {
+	values := []*String{p.parseString()}
+	for p.tryConsume(tokComma) {
+		values = append(values, p.parseString())
+	}
+	return &StrFlags{
+		Pos:    name.Pos,
+		Name:   name,
+		Values: values,
+	}
+}
+
+func (p *parser) parseStruct(name *Ident) *Struct {
+	str := &Struct{
+		Pos:  name.Pos,
+		Name: name,
+	}
+	closing := tokRBrace
+	if p.tok == tokLBrack {
+		str.IsUnion = true
+		closing = tokRBrack
+	}
+	p.next()
+	p.consume(tokNewLine)
+	for {
+		newBlock := false
+		for p.tok == tokNewLine {
+			newBlock = true
+			p.next()
+		}
+		comments := p.parseCommentBlock()
+		if p.tryConsume(closing) {
+			str.Comments = comments
+			break
+		}
+		fld := p.parseField()
+		fld.NewBlock = newBlock
+		fld.Comments = comments
+		str.Fields = append(str.Fields, fld)
+		p.consume(tokNewLine)
+	}
+	if p.tryConsume(tokLBrack) {
+		str.Attrs = append(str.Attrs, p.parseIdent())
+		for p.tryConsume(tokComma) {
+			str.Attrs = append(str.Attrs, p.parseIdent())
+		}
+		p.consume(tokRBrack)
+	}
+	return str
+}
+
+func (p *parser) parseCommentBlock() []*Comment {
+	var comments []*Comment
+	for p.tok == tokComment {
+		comments = append(comments, p.parseComment())
+		p.consume(tokNewLine)
+	}
+	return comments
+}
+
+func (p *parser) parseField() *Field {
+	name := p.parseIdent()
+	return &Field{
+		Pos:  name.Pos,
+		Name: name,
+		Type: p.parseType(),
+	}
+}
+
+func (p *parser) parseType() *Type {
+	arg := &Type{
+		Pos: p.pos,
+	}
+	allowColon := false
+	switch p.tok {
+	case tokInt:
+		allowColon = true
+		arg.Value, arg.ValueHex = p.parseIntValue()
+	case tokIdent:
+		allowColon = true
+		arg.Ident = p.lit
+	case tokString:
+		arg.String = p.lit
+	default:
+		p.expect(tokInt, tokIdent, tokString)
+	}
+	p.next()
+	if allowColon && p.tryConsume(tokColon) {
+		switch p.tok {
+		case tokInt:
+			arg.Value2, arg.Value2Hex = p.parseIntValue()
+		case tokIdent:
+			arg.Ident2 = p.lit
+		default:
+			p.expect(tokInt, tokIdent)
+		}
+		p.next()
+	}
+	arg.Args = p.parseTypeList()
+	return arg
+}
+
+func (p *parser) parseTypeList() []*Type {
+	var args []*Type
+	if p.tryConsume(tokLBrack) {
+		args = append(args, p.parseType())
+		for p.tryConsume(tokComma) {
+			args = append(args, p.parseType())
+		}
+		p.consume(tokRBrack)
+	}
+	return args
+}
+
+func (p *parser) parseIdent() *Ident {
+	p.expect(tokIdent)
+	ident := &Ident{
+		Pos:  p.pos,
+		Name: p.lit,
+	}
+	p.next()
+	return ident
+}
+
+func (p *parser) parseString() *String {
+	p.expect(tokString)
+	str := &String{
+		Pos:   p.pos,
+		Value: p.lit,
+	}
+	p.next()
+	return str
+}
+
+func (p *parser) parseInt() *Int {
+	i := &Int{
+		Pos: p.pos,
+	}
+	switch p.tok {
+	case tokInt:
+		i.Value, i.ValueHex = p.parseIntValue()
+	case tokIdent:
+		i.Ident = p.lit
+	default:
+		p.expect(tokInt, tokIdent)
+	}
+	p.next()
+	return i
+}
+
+func (p *parser) parseIntValue() (uint64, bool) {
+	if v, err := strconv.ParseUint(p.lit, 10, 64); err == nil {
+		return v, false
+	}
+	if len(p.lit) > 2 && p.lit[0] == '0' && p.lit[1] == 'x' {
+		if v, err := strconv.ParseUint(p.lit[2:], 16, 64); err == nil {
+			return v, true
+		}
+	}
+	panic(fmt.Sprintf("scanner returned bad integer %q", p.lit))
+}
+
+func (p *parser) parseCExpr() *Int {
+	i := &Int{
+		Pos:   p.pos,
+		CExpr: p.lit,
+	}
+	p.consume(tokCExpr)
+	return i
+}
author	Dmitry Vyukov <dvyukov@google.com>	2017-05-22 05:28:31 +0200
committer	Dmitry Vyukov <dvyukov@google.com>	2017-08-18 11:26:50 +0200
commit	127a9c2b65ae07f309e839c3b8e5ab2ee7983e56 (patch)
tree	3a4dd2af0a2fc09b2bba1dad738c7657d1b0de1d /pkg/ast/parser.go
parent	5809a8e05714bda367f3fd57f9b983a3403f04b0 (diff)