aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/quasilyte/regex/syntax/parser.go
diff options
context:
space:
mode:
authorTaras Madan <tarasmadan@google.com>2025-01-22 16:07:17 +0100
committerTaras Madan <tarasmadan@google.com>2025-01-23 10:42:36 +0000
commit7b4377ad9d8a7205416df8d6217ef2b010f89481 (patch)
treee6fec4fd12ff807a16d847923f501075bf71d16c /vendor/github.com/quasilyte/regex/syntax/parser.go
parent475a4c203afb8b7d3af51c4fd32bb170ff32a45e (diff)
vendor: delete
Diffstat (limited to 'vendor/github.com/quasilyte/regex/syntax/parser.go')
-rw-r--r--vendor/github.com/quasilyte/regex/syntax/parser.go503
1 files changed, 0 insertions, 503 deletions
diff --git a/vendor/github.com/quasilyte/regex/syntax/parser.go b/vendor/github.com/quasilyte/regex/syntax/parser.go
deleted file mode 100644
index f1c154f31..000000000
--- a/vendor/github.com/quasilyte/regex/syntax/parser.go
+++ /dev/null
@@ -1,503 +0,0 @@
-package syntax
-
-import (
- "errors"
- "strings"
-)
-
-type ParserOptions struct {
- // NoLiterals disables OpChar merging into OpLiteral.
- NoLiterals bool
-}
-
-func NewParser(opts *ParserOptions) *Parser {
- return newParser(opts)
-}
-
-type Parser struct {
- out Regexp
- lexer lexer
- exprPool []Expr
-
- prefixParselets [256]prefixParselet
- infixParselets [256]infixParselet
-
- charClass []Expr
- allocated uint
-
- opts ParserOptions
-}
-
-// ParsePCRE parses PHP-style pattern with delimiters.
-// An example of such pattern is `/foo/i`.
-func (p *Parser) ParsePCRE(pattern string) (*RegexpPCRE, error) {
- pcre, err := p.newPCRE(pattern)
- if err != nil {
- return nil, err
- }
- if pcre.HasModifier('x') {
- return nil, errors.New("'x' modifier is not supported")
- }
- re, err := p.Parse(pcre.Pattern)
- if re != nil {
- pcre.Expr = re.Expr
- }
- return pcre, err
-}
-
-func (p *Parser) Parse(pattern string) (result *Regexp, err error) {
- defer func() {
- r := recover()
- if r == nil {
- return
- }
- if err2, ok := r.(ParseError); ok {
- err = err2
- return
- }
- panic(r)
- }()
-
- p.lexer.Init(pattern)
- p.allocated = 0
- p.out.Pattern = pattern
- if pattern == "" {
- p.out.Expr = *p.newExpr(OpConcat, Position{})
- } else {
- p.out.Expr = *p.parseExpr(0)
- }
-
- if !p.opts.NoLiterals {
- p.mergeChars(&p.out.Expr)
- }
- p.setValues(&p.out.Expr)
-
- return &p.out, nil
-}
-
-type prefixParselet func(token) *Expr
-
-type infixParselet func(*Expr, token) *Expr
-
-func newParser(opts *ParserOptions) *Parser {
- var p Parser
-
- if opts != nil {
- p.opts = *opts
- }
- p.exprPool = make([]Expr, 256)
-
- for tok, op := range tok2op {
- if op != 0 {
- p.prefixParselets[tokenKind(tok)] = p.parsePrefixElementary
- }
- }
-
- p.prefixParselets[tokQ] = func(tok token) *Expr {
- litPos := tok.pos
- litPos.Begin += uint16(len(`\Q`))
- form := FormQuoteUnclosed
- if strings.HasSuffix(p.tokenValue(tok), `\E`) {
- litPos.End -= uint16(len(`\E`))
- form = FormDefault
- }
- lit := p.newExpr(OpString, litPos)
- return p.newExprForm(OpQuote, form, tok.pos, lit)
- }
-
- p.prefixParselets[tokEscapeHexFull] = func(tok token) *Expr {
- litPos := tok.pos
- litPos.Begin += uint16(len(`\x{`))
- litPos.End -= uint16(len(`}`))
- lit := p.newExpr(OpString, litPos)
- return p.newExprForm(OpEscapeHex, FormEscapeHexFull, tok.pos, lit)
- }
- p.prefixParselets[tokEscapeUniFull] = func(tok token) *Expr {
- litPos := tok.pos
- litPos.Begin += uint16(len(`\p{`))
- litPos.End -= uint16(len(`}`))
- lit := p.newExpr(OpString, litPos)
- return p.newExprForm(OpEscapeUni, FormEscapeUniFull, tok.pos, lit)
- }
-
- p.prefixParselets[tokEscapeHex] = func(tok token) *Expr { return p.parseEscape(OpEscapeHex, `\x`, tok) }
- p.prefixParselets[tokEscapeOctal] = func(tok token) *Expr { return p.parseEscape(OpEscapeOctal, `\`, tok) }
- p.prefixParselets[tokEscapeChar] = func(tok token) *Expr { return p.parseEscape(OpEscapeChar, `\`, tok) }
- p.prefixParselets[tokEscapeMeta] = func(tok token) *Expr { return p.parseEscape(OpEscapeMeta, `\`, tok) }
- p.prefixParselets[tokEscapeUni] = func(tok token) *Expr { return p.parseEscape(OpEscapeUni, `\p`, tok) }
-
- p.prefixParselets[tokLparen] = func(tok token) *Expr { return p.parseGroup(OpCapture, tok) }
- p.prefixParselets[tokLparenAtomic] = func(tok token) *Expr { return p.parseGroup(OpAtomicGroup, tok) }
- p.prefixParselets[tokLparenPositiveLookahead] = func(tok token) *Expr { return p.parseGroup(OpPositiveLookahead, tok) }
- p.prefixParselets[tokLparenNegativeLookahead] = func(tok token) *Expr { return p.parseGroup(OpNegativeLookahead, tok) }
- p.prefixParselets[tokLparenPositiveLookbehind] = func(tok token) *Expr { return p.parseGroup(OpPositiveLookbehind, tok) }
- p.prefixParselets[tokLparenNegativeLookbehind] = func(tok token) *Expr { return p.parseGroup(OpNegativeLookbehind, tok) }
-
- p.prefixParselets[tokLparenName] = func(tok token) *Expr {
- return p.parseNamedCapture(FormDefault, tok)
- }
- p.prefixParselets[tokLparenNameAngle] = func(tok token) *Expr {
- return p.parseNamedCapture(FormNamedCaptureAngle, tok)
- }
- p.prefixParselets[tokLparenNameQuote] = func(tok token) *Expr {
- return p.parseNamedCapture(FormNamedCaptureQuote, tok)
- }
-
- p.prefixParselets[tokLparenFlags] = p.parseGroupWithFlags
-
- p.prefixParselets[tokPipe] = func(tok token) *Expr {
- // We need prefix pipe parselet to handle `(|x)` syntax.
- right := p.parseExpr(1)
- return p.newExpr(OpAlt, tok.pos, p.newEmpty(tok.pos), right)
- }
- p.prefixParselets[tokLbracket] = func(tok token) *Expr {
- return p.parseCharClass(OpCharClass, tok)
- }
- p.prefixParselets[tokLbracketCaret] = func(tok token) *Expr {
- return p.parseCharClass(OpNegCharClass, tok)
- }
-
- p.infixParselets[tokRepeat] = func(left *Expr, tok token) *Expr {
- repeatLit := p.newExpr(OpString, tok.pos)
- return p.newExpr(OpRepeat, combinePos(left.Pos, tok.pos), left, repeatLit)
- }
- p.infixParselets[tokStar] = func(left *Expr, tok token) *Expr {
- return p.newExpr(OpStar, combinePos(left.Pos, tok.pos), left)
- }
- p.infixParselets[tokConcat] = func(left *Expr, tok token) *Expr {
- right := p.parseExpr(2)
- if left.Op == OpConcat {
- left.Args = append(left.Args, *right)
- left.Pos.End = right.End()
- return left
- }
- return p.newExpr(OpConcat, combinePos(left.Pos, right.Pos), left, right)
- }
- p.infixParselets[tokPipe] = p.parseAlt
- p.infixParselets[tokMinus] = p.parseMinus
- p.infixParselets[tokPlus] = p.parsePlus
- p.infixParselets[tokQuestion] = p.parseQuestion
-
- return &p
-}
-
-func (p *Parser) setValues(e *Expr) {
- for i := range e.Args {
- p.setValues(&e.Args[i])
- }
- e.Value = p.exprValue(e)
-}
-
-func (p *Parser) tokenValue(tok token) string {
- return p.out.Pattern[tok.pos.Begin:tok.pos.End]
-}
-
-func (p *Parser) exprValue(e *Expr) string {
- return p.out.Pattern[e.Begin():e.End()]
-}
-
-func (p *Parser) mergeChars(e *Expr) {
- for i := range e.Args {
- p.mergeChars(&e.Args[i])
- }
- if e.Op != OpConcat || len(e.Args) < 2 {
- return
- }
-
- args := e.Args[:0]
- i := 0
- for i < len(e.Args) {
- first := i
- chars := 0
- for j := i; j < len(e.Args) && e.Args[j].Op == OpChar; j++ {
- chars++
- }
- if chars > 1 {
- c1 := e.Args[first]
- c2 := e.Args[first+chars-1]
- lit := p.newExpr(OpLiteral, combinePos(c1.Pos, c2.Pos))
- for j := 0; j < chars; j++ {
- lit.Args = append(lit.Args, e.Args[first+j])
- }
- args = append(args, *lit)
- i += chars
- } else {
- args = append(args, e.Args[i])
- i++
- }
- }
- if len(args) == 1 {
- *e = args[0] // Turn OpConcat into OpLiteral
- } else {
- e.Args = args
- }
-}
-
-func (p *Parser) newEmpty(pos Position) *Expr {
- return p.newExpr(OpConcat, pos)
-}
-
-func (p *Parser) newExprForm(op Operation, form Form, pos Position, args ...*Expr) *Expr {
- e := p.newExpr(op, pos, args...)
- e.Form = form
- return e
-}
-
-func (p *Parser) newExpr(op Operation, pos Position, args ...*Expr) *Expr {
- e := p.allocExpr()
- *e = Expr{
- Op: op,
- Pos: pos,
- Args: e.Args[:0],
- }
- for _, arg := range args {
- e.Args = append(e.Args, *arg)
- }
- return e
-}
-
-func (p *Parser) allocExpr() *Expr {
- i := p.allocated
- if i < uint(len(p.exprPool)) {
- p.allocated++
- return &p.exprPool[i]
- }
- return &Expr{}
-}
-
-func (p *Parser) expect(kind tokenKind) Position {
- tok := p.lexer.NextToken()
- if tok.kind != kind {
- throwExpectedFound(tok.pos, kind.String(), tok.kind.String())
- }
- return tok.pos
-}
-
-func (p *Parser) parseExpr(precedence int) *Expr {
- tok := p.lexer.NextToken()
- prefix := p.prefixParselets[tok.kind]
- if prefix == nil {
- throwUnexpectedToken(tok.pos, tok.String())
- }
- left := prefix(tok)
-
- for precedence < p.precedenceOf(p.lexer.Peek()) {
- tok := p.lexer.NextToken()
- infix := p.infixParselets[tok.kind]
- left = infix(left, tok)
- }
-
- return left
-}
-
-func (p *Parser) parsePrefixElementary(tok token) *Expr {
- return p.newExpr(tok2op[tok.kind], tok.pos)
-}
-
-func (p *Parser) parseCharClass(op Operation, tok token) *Expr {
- var endPos Position
- p.charClass = p.charClass[:0]
- for {
- p.charClass = append(p.charClass, *p.parseExpr(0))
- next := p.lexer.Peek()
- if next.kind == tokRbracket {
- endPos = next.pos
- p.lexer.NextToken()
- break
- }
- if next.kind == tokNone {
- throw(tok.pos, "unterminated '['")
- }
- }
-
- result := p.newExpr(op, combinePos(tok.pos, endPos))
- result.Args = append(result.Args, p.charClass...)
- return result
-}
-
-func (p *Parser) parseMinus(left *Expr, tok token) *Expr {
- if p.isValidCharRangeOperand(left) {
- if p.lexer.Peek().kind != tokRbracket {
- right := p.parseExpr(2)
- return p.newExpr(OpCharRange, combinePos(left.Pos, right.Pos), left, right)
- }
- }
- p.charClass = append(p.charClass, *left)
- return p.newExpr(OpChar, tok.pos)
-}
-
-func (p *Parser) isValidCharRangeOperand(e *Expr) bool {
- switch e.Op {
- case OpEscapeHex, OpEscapeOctal, OpEscapeMeta, OpChar:
- return true
- case OpEscapeChar:
- switch p.exprValue(e) {
- case `\\`, `\|`, `\*`, `\+`, `\?`, `\.`, `\[`, `\^`, `\$`, `\(`, `\)`:
- return true
- }
- }
- return false
-}
-
-func (p *Parser) parsePlus(left *Expr, tok token) *Expr {
- op := OpPlus
- switch left.Op {
- case OpPlus, OpStar, OpQuestion, OpRepeat:
- op = OpPossessive
- }
- return p.newExpr(op, combinePos(left.Pos, tok.pos), left)
-}
-
-func (p *Parser) parseQuestion(left *Expr, tok token) *Expr {
- op := OpQuestion
- switch left.Op {
- case OpPlus, OpStar, OpQuestion, OpRepeat:
- op = OpNonGreedy
- }
- return p.newExpr(op, combinePos(left.Pos, tok.pos), left)
-}
-
-func (p *Parser) parseAlt(left *Expr, tok token) *Expr {
- var right *Expr
- switch p.lexer.Peek().kind {
- case tokRparen, tokNone:
- // This is needed to handle `(x|)` syntax.
- right = p.newEmpty(tok.pos)
- default:
- right = p.parseExpr(1)
- }
- if left.Op == OpAlt {
- left.Args = append(left.Args, *right)
- left.Pos.End = right.End()
- return left
- }
- return p.newExpr(OpAlt, combinePos(left.Pos, right.Pos), left, right)
-}
-
-func (p *Parser) parseGroupItem(tok token) *Expr {
- if p.lexer.Peek().kind == tokRparen {
- // This is needed to handle `() syntax.`
- return p.newEmpty(tok.pos)
- }
- return p.parseExpr(0)
-}
-
-func (p *Parser) parseGroup(op Operation, tok token) *Expr {
- x := p.parseGroupItem(tok)
- result := p.newExpr(op, tok.pos, x)
- result.Pos.End = p.expect(tokRparen).End
- return result
-}
-
-func (p *Parser) parseNamedCapture(form Form, tok token) *Expr {
- prefixLen := len("(?<")
- if form == FormDefault {
- prefixLen = len("(?P<")
- }
- name := p.newExpr(OpString, Position{
- Begin: tok.pos.Begin + uint16(prefixLen),
- End: tok.pos.End - uint16(len(">")),
- })
- x := p.parseGroupItem(tok)
- result := p.newExprForm(OpNamedCapture, form, tok.pos, x, name)
- result.Pos.End = p.expect(tokRparen).End
- return result
-}
-
-func (p *Parser) parseGroupWithFlags(tok token) *Expr {
- var result *Expr
- val := p.out.Pattern[tok.pos.Begin+1 : tok.pos.End]
- switch {
- case !strings.HasSuffix(val, ":"):
- flags := p.newExpr(OpString, Position{
- Begin: tok.pos.Begin + uint16(len("(?")),
- End: tok.pos.End,
- })
- result = p.newExpr(OpFlagOnlyGroup, tok.pos, flags)
- case val == "?:":
- x := p.parseGroupItem(tok)
- result = p.newExpr(OpGroup, tok.pos, x)
- default:
- flags := p.newExpr(OpString, Position{
- Begin: tok.pos.Begin + uint16(len("(?")),
- End: tok.pos.End - uint16(len(":")),
- })
- x := p.parseGroupItem(tok)
- result = p.newExpr(OpGroupWithFlags, tok.pos, x, flags)
- }
- result.Pos.End = p.expect(tokRparen).End
- return result
-}
-
-func (p *Parser) parseEscape(op Operation, prefix string, tok token) *Expr {
- litPos := tok.pos
- litPos.Begin += uint16(len(prefix))
- lit := p.newExpr(OpString, litPos)
- return p.newExpr(op, tok.pos, lit)
-}
-
-func (p *Parser) precedenceOf(tok token) int {
- switch tok.kind {
- case tokPipe:
- return 1
- case tokConcat, tokMinus:
- return 2
- case tokPlus, tokStar, tokQuestion, tokRepeat:
- return 3
- default:
- return 0
- }
-}
-
-func (p *Parser) newPCRE(source string) (*RegexpPCRE, error) {
- if source == "" {
- return nil, errors.New("empty pattern: can't find delimiters")
- }
-
- delim := source[0]
- endDelim := delim
- switch delim {
- case '(':
- endDelim = ')'
- case '{':
- endDelim = '}'
- case '[':
- endDelim = ']'
- case '<':
- endDelim = '>'
- case '\\':
- return nil, errors.New("'\\' is not a valid delimiter")
- default:
- if isSpace(delim) {
- return nil, errors.New("whitespace is not a valid delimiter")
- }
- if isAlphanumeric(delim) {
- return nil, errors.New("'" + string(delim) + "' is not a valid delimiter")
- }
- }
-
- const delimLen = 1
- j := strings.LastIndexByte(source[delimLen:], endDelim)
- if j == -1 {
- return nil, errors.New("can't find '" + string(endDelim) + "' ending delimiter")
- }
- j += delimLen
-
- pcre := &RegexpPCRE{
- Pattern: source[delimLen:j],
- Source: source,
- Delim: [2]byte{delim, endDelim},
- Modifiers: source[j+delimLen:],
- }
- return pcre, nil
-}
-
-var tok2op = [256]Operation{
- tokDollar: OpDollar,
- tokCaret: OpCaret,
- tokDot: OpDot,
- tokChar: OpChar,
- tokMinus: OpChar,
- tokPosixClass: OpPosixClass,
- tokComment: OpComment,
-}