From 712de1c63d9db97c81af68cd0dc4372c53d2e57a Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 15 Sep 2020 18:05:35 +0200 Subject: vendor/github.com/golangci/golangci-lint: update to v1.31 --- vendor/github.com/quasilyte/regex/syntax/parser.go | 503 +++++++++++++++++++++ 1 file changed, 503 insertions(+) create mode 100644 vendor/github.com/quasilyte/regex/syntax/parser.go (limited to 'vendor/github.com/quasilyte/regex/syntax/parser.go') diff --git a/vendor/github.com/quasilyte/regex/syntax/parser.go b/vendor/github.com/quasilyte/regex/syntax/parser.go new file mode 100644 index 000000000..c540ac593 --- /dev/null +++ b/vendor/github.com/quasilyte/regex/syntax/parser.go @@ -0,0 +1,503 @@ +package syntax + +import ( + "errors" + "strings" +) + +type ParserOptions struct { + // NoLiterals disables OpChar merging into OpLiteral. + NoLiterals bool +} + +func NewParser(opts *ParserOptions) *Parser { + return newParser(opts) +} + +type Parser struct { + out Regexp + lexer lexer + exprPool []Expr + + prefixParselets [256]prefixParselet + infixParselets [256]infixParselet + + charClass []Expr + allocated uint + + opts ParserOptions +} + +// ParsePCRE parses PHP-style pattern with delimiters. +// An example of such pattern is `/foo/i`. +func (p *Parser) ParsePCRE(pattern string) (*RegexpPCRE, error) { + pcre, err := p.newPCRE(pattern) + if err != nil { + return nil, err + } + if pcre.HasModifier('x') { + return nil, errors.New("'x' modifier is not supported") + } + re, err := p.Parse(pcre.Pattern) + if re != nil { + pcre.Expr = re.Expr + } + return pcre, err +} + +func (p *Parser) Parse(pattern string) (result *Regexp, err error) { + defer func() { + r := recover() + if r == nil { + return + } + if err2, ok := r.(ParseError); ok { + err = err2 + return + } + panic(r) + }() + + p.lexer.Init(pattern) + p.allocated = 0 + p.out.Pattern = pattern + if pattern == "" { + p.out.Expr = *p.newExpr(OpConcat, Position{}) + } else { + p.out.Expr = *p.parseExpr(0) + } + + if !p.opts.NoLiterals { + p.mergeChars(&p.out.Expr) + } + p.setValues(&p.out.Expr) + + return &p.out, nil +} + +type prefixParselet func(token) *Expr + +type infixParselet func(*Expr, token) *Expr + +func newParser(opts *ParserOptions) *Parser { + var p Parser + + if opts != nil { + p.opts = *opts + } + p.exprPool = make([]Expr, 256) + + for tok, op := range tok2op { + if op != 0 { + p.prefixParselets[tokenKind(tok)] = p.parsePrefixElementary + } + } + + p.prefixParselets[tokQ] = func(tok token) *Expr { + litPos := tok.pos + litPos.Begin += uint16(len(`\Q`)) + form := FormQuoteUnclosed + if strings.HasSuffix(p.tokenValue(tok), `\E`) { + litPos.End -= uint16(len(`\E`)) + form = FormDefault + } + lit := p.newExpr(OpString, litPos) + return p.newExprForm(OpQuote, form, tok.pos, lit) + } + + p.prefixParselets[tokEscapeHexFull] = func(tok token) *Expr { + litPos := tok.pos + litPos.Begin += uint16(len(`\x{`)) + litPos.End -= uint16(len(`}`)) + lit := p.newExpr(OpString, litPos) + return p.newExprForm(OpEscapeHex, FormEscapeHexFull, tok.pos, lit) + } + p.prefixParselets[tokEscapeUniFull] = func(tok token) *Expr { + litPos := tok.pos + litPos.Begin += uint16(len(`\p{`)) + litPos.End -= uint16(len(`}`)) + lit := p.newExpr(OpString, litPos) + return p.newExprForm(OpEscapeUni, FormEscapeUniFull, tok.pos, lit) + } + + p.prefixParselets[tokEscapeHex] = func(tok token) *Expr { return p.parseEscape(OpEscapeHex, `\x`, tok) } + p.prefixParselets[tokEscapeOctal] = func(tok token) *Expr { return p.parseEscape(OpEscapeOctal, `\`, tok) } + p.prefixParselets[tokEscapeChar] = func(tok token) *Expr { return p.parseEscape(OpEscapeChar, `\`, tok) } + p.prefixParselets[tokEscapeMeta] = func(tok token) *Expr { return p.parseEscape(OpEscapeMeta, `\`, tok) } + p.prefixParselets[tokEscapeUni] = func(tok token) *Expr { return p.parseEscape(OpEscapeUni, `\p`, tok) } + + p.prefixParselets[tokLparen] = func(tok token) *Expr { return p.parseGroup(OpCapture, tok) } + p.prefixParselets[tokLparenAtomic] = func(tok token) *Expr { return p.parseGroup(OpAtomicGroup, tok) } + p.prefixParselets[tokLparenPositiveLookahead] = func(tok token) *Expr { return p.parseGroup(OpPositiveLookahead, tok) } + p.prefixParselets[tokLparenNegativeLookahead] = func(tok token) *Expr { return p.parseGroup(OpNegativeLookahead, tok) } + p.prefixParselets[tokLparenPositiveLookbehind] = func(tok token) *Expr { return p.parseGroup(OpPositiveLookbehind, tok) } + p.prefixParselets[tokLparenNegativeLookbehind] = func(tok token) *Expr { return p.parseGroup(OpNegativeLookbehind, tok) } + + p.prefixParselets[tokLparenName] = func(tok token) *Expr { + return p.parseNamedCapture(FormDefault, tok) + } + p.prefixParselets[tokLparenNameAngle] = func(tok token) *Expr { + return p.parseNamedCapture(FormNamedCaptureAngle, tok) + } + p.prefixParselets[tokLparenNameQuote] = func(tok token) *Expr { + return p.parseNamedCapture(FormNamedCaptureQuote, tok) + } + + p.prefixParselets[tokLparenFlags] = p.parseGroupWithFlags + + p.prefixParselets[tokPipe] = func(tok token) *Expr { + // We need prefix pipe parselet to handle `(|x)` syntax. + right := p.parseExpr(1) + return p.newExpr(OpAlt, tok.pos, p.newEmpty(tok.pos), right) + } + p.prefixParselets[tokLbracket] = func(tok token) *Expr { + return p.parseCharClass(OpCharClass, tok) + } + p.prefixParselets[tokLbracketCaret] = func(tok token) *Expr { + return p.parseCharClass(OpNegCharClass, tok) + } + + p.infixParselets[tokRepeat] = func(left *Expr, tok token) *Expr { + repeatLit := p.newExpr(OpString, tok.pos) + return p.newExpr(OpRepeat, combinePos(left.Pos, tok.pos), left, repeatLit) + } + p.infixParselets[tokStar] = func(left *Expr, tok token) *Expr { + return p.newExpr(OpStar, combinePos(left.Pos, tok.pos), left) + } + p.infixParselets[tokConcat] = func(left *Expr, tok token) *Expr { + right := p.parseExpr(2) + if left.Op == OpConcat { + left.Args = append(left.Args, *right) + left.Pos.End = right.End() + return left + } + return p.newExpr(OpConcat, combinePos(left.Pos, right.Pos), left, right) + } + p.infixParselets[tokPipe] = p.parseAlt + p.infixParselets[tokMinus] = p.parseMinus + p.infixParselets[tokPlus] = p.parsePlus + p.infixParselets[tokQuestion] = p.parseQuestion + + return &p +} + +func (p *Parser) setValues(e *Expr) { + for i := range e.Args { + p.setValues(&e.Args[i]) + } + e.Value = p.exprValue(e) +} + +func (p *Parser) tokenValue(tok token) string { + return p.out.Pattern[tok.pos.Begin:tok.pos.End] +} + +func (p *Parser) exprValue(e *Expr) string { + return p.out.Pattern[e.Begin():e.End()] +} + +func (p *Parser) mergeChars(e *Expr) { + for i := range e.Args { + p.mergeChars(&e.Args[i]) + } + if e.Op != OpConcat || len(e.Args) < 2 { + return + } + + args := e.Args[:0] + i := 0 + for i < len(e.Args) { + first := i + chars := 0 + for j := i; j < len(e.Args) && e.Args[j].Op == OpChar; j++ { + chars++ + } + if chars > 1 { + c1 := e.Args[first] + c2 := e.Args[first+chars-1] + lit := p.newExpr(OpLiteral, combinePos(c1.Pos, c2.Pos)) + for j := 0; j < chars; j++ { + lit.Args = append(lit.Args, e.Args[first+j]) + } + args = append(args, *lit) + i += chars + } else { + args = append(args, e.Args[i]) + i++ + } + } + if len(args) == 1 { + *e = args[0] // Turn OpConcat into OpLiteral + } else { + e.Args = args + } +} + +func (p *Parser) newEmpty(pos Position) *Expr { + return p.newExpr(OpConcat, pos) +} + +func (p *Parser) newExprForm(op Operation, form Form, pos Position, args ...*Expr) *Expr { + e := p.newExpr(op, pos, args...) + e.Form = form + return e +} + +func (p *Parser) newExpr(op Operation, pos Position, args ...*Expr) *Expr { + e := p.allocExpr() + *e = Expr{ + Op: op, + Pos: pos, + Args: e.Args[:0], + } + for _, arg := range args { + e.Args = append(e.Args, *arg) + } + return e +} + +func (p *Parser) allocExpr() *Expr { + i := p.allocated + if i < uint(len(p.exprPool)) { + p.allocated++ + return &p.exprPool[i] + } + return &Expr{} +} + +func (p *Parser) expect(kind tokenKind) Position { + tok := p.lexer.NextToken() + if tok.kind != kind { + throwExpectedFound(tok.pos, kind.String(), tok.kind.String()) + } + return tok.pos +} + +func (p *Parser) parseExpr(precedence int) *Expr { + tok := p.lexer.NextToken() + prefix := p.prefixParselets[tok.kind] + if prefix == nil { + throwUnexpectedToken(tok.pos, tok.String()) + } + left := prefix(tok) + + for precedence < p.precedenceOf(p.lexer.Peek()) { + tok := p.lexer.NextToken() + infix := p.infixParselets[tok.kind] + left = infix(left, tok) + } + + return left +} + +func (p *Parser) parsePrefixElementary(tok token) *Expr { + return p.newExpr(tok2op[tok.kind], tok.pos) +} + +func (p *Parser) parseCharClass(op Operation, tok token) *Expr { + var endPos Position + p.charClass = p.charClass[:0] + for { + p.charClass = append(p.charClass, *p.parseExpr(0)) + next := p.lexer.Peek() + if next.kind == tokRbracket { + endPos = next.pos + p.lexer.NextToken() + break + } + if next.kind == tokNone { + throw(tok.pos, "unterminated '['") + } + } + + result := p.newExpr(op, combinePos(tok.pos, endPos)) + result.Args = append(result.Args, p.charClass...) + return result +} + +func (p *Parser) parseMinus(left *Expr, tok token) *Expr { + if p.isValidCharRangeOperand(left) { + if p.lexer.Peek().kind != tokRbracket { + right := p.parseExpr(2) + return p.newExpr(OpCharRange, combinePos(left.Pos, right.Pos), left, right) + } + } + p.charClass = append(p.charClass, *left) + return p.newExpr(OpChar, tok.pos) +} + +func (p *Parser) isValidCharRangeOperand(e *Expr) bool { + switch e.Op { + case OpEscapeHex, OpEscapeOctal, OpEscapeMeta, OpChar: + return true + case OpEscapeChar: + switch p.exprValue(e) { + case `\\`, `\|`, `\*`, `\+`, `\?`, `\.`, `\[`, `\^`, `\$`, `\(`, `\)`: + return true + } + } + return false +} + +func (p *Parser) parsePlus(left *Expr, tok token) *Expr { + op := OpPlus + switch left.Op { + case OpPlus, OpStar, OpQuestion, OpRepeat: + op = OpPossessive + } + return p.newExpr(op, combinePos(left.Pos, tok.pos), left) +} + +func (p *Parser) parseQuestion(left *Expr, tok token) *Expr { + op := OpQuestion + switch left.Op { + case OpPlus, OpStar, OpQuestion, OpRepeat: + op = OpNonGreedy + } + return p.newExpr(op, combinePos(left.Pos, tok.pos), left) +} + +func (p *Parser) parseAlt(left *Expr, tok token) *Expr { + var right *Expr + switch p.lexer.Peek().kind { + case tokRparen, tokNone: + // This is needed to handle `(x|)` syntax. + right = p.newEmpty(tok.pos) + default: + right = p.parseExpr(1) + } + if left.Op == OpAlt { + left.Args = append(left.Args, *right) + left.Pos.End = right.End() + return left + } + return p.newExpr(OpAlt, combinePos(left.Pos, right.Pos), left, right) +} + +func (p *Parser) parseGroupItem(tok token) *Expr { + if p.lexer.Peek().kind == tokRparen { + // This is needed to handle `() syntax.` + return p.newEmpty(tok.pos) + } + return p.parseExpr(0) +} + +func (p *Parser) parseGroup(op Operation, tok token) *Expr { + x := p.parseGroupItem(tok) + result := p.newExpr(op, tok.pos, x) + result.Pos.End = p.expect(tokRparen).End + return result +} + +func (p *Parser) parseNamedCapture(form Form, tok token) *Expr { + prefixLen := len("(?<") + if form == FormDefault { + prefixLen = len("(?P<") + } + name := p.newExpr(OpString, Position{ + Begin: tok.pos.Begin + uint16(prefixLen), + End: tok.pos.End - uint16(len(">")), + }) + x := p.parseGroupItem(tok) + result := p.newExprForm(OpNamedCapture, form, tok.pos, x, name) + result.Pos.End = p.expect(tokRparen).End + return result +} + +func (p *Parser) parseGroupWithFlags(tok token) *Expr { + var result *Expr + val := p.out.Pattern[tok.pos.Begin+1 : tok.pos.End] + switch { + case !strings.HasSuffix(val, ":"): + flags := p.newExpr(OpString, Position{ + Begin: tok.pos.Begin + uint16(len("(?")), + End: tok.pos.End, + }) + result = p.newExpr(OpFlagOnlyGroup, tok.pos, flags) + case val == "?:": + x := p.parseGroupItem(tok) + result = p.newExpr(OpGroup, tok.pos, x) + default: + flags := p.newExpr(OpString, Position{ + Begin: tok.pos.Begin + uint16(len("(?")), + End: tok.pos.End - uint16(len(":")), + }) + x := p.parseGroupItem(tok) + result = p.newExpr(OpGroupWithFlags, tok.pos, x, flags) + } + result.Pos.End = p.expect(tokRparen).End + return result +} + +func (p *Parser) parseEscape(op Operation, prefix string, tok token) *Expr { + litPos := tok.pos + litPos.Begin += uint16(len(prefix)) + lit := p.newExpr(OpString, litPos) + return p.newExpr(op, tok.pos, lit) +} + +func (p *Parser) precedenceOf(tok token) int { + switch tok.kind { + case tokPipe: + return 1 + case tokConcat, tokMinus: + return 2 + case tokPlus, tokStar, tokQuestion, tokRepeat: + return 3 + default: + return 0 + } +} + +func (p *Parser) newPCRE(source string) (*RegexpPCRE, error) { + if source == "" { + return nil, errors.New("empty pattern: can't find delimiters") + } + + delim := source[0] + endDelim := delim + switch delim { + case '(': + endDelim = ')' + case '{': + endDelim = '}' + case '[': + endDelim = ']' + case '<': + endDelim = '>' + case '\\': + return nil, errors.New("'\\' is not a valid delimiter") + default: + if isSpace(delim) { + return nil, errors.New("whitespace is not a valid delimiter") + } + if isAlphanumeric(delim) { + return nil, errors.New("'" + string(delim) + "' is not a valid delimiter") + } + } + + const delimLen = 1 + j := strings.LastIndexByte(source[delimLen:], endDelim) + if j == -1 { + return nil, errors.New("can't find '" + string(endDelim) + "' ending delimiter") + } + j += delimLen + + pcre := &RegexpPCRE{ + Pattern: source[1:j], + Source: source, + Delim: [2]byte{delim, endDelim}, + Modifiers: source[j+1:], + } + return pcre, nil +} + +var tok2op = [256]Operation{ + tokDollar: OpDollar, + tokCaret: OpCaret, + tokDot: OpDot, + tokChar: OpChar, + tokMinus: OpChar, + tokPosixClass: OpPosixClass, + tokComment: OpComment, +} -- cgit mrf-deployment