From 7b4377ad9d8a7205416df8d6217ef2b010f89481 Mon Sep 17 00:00:00 2001 From: Taras Madan Date: Wed, 22 Jan 2025 16:07:17 +0100 Subject: vendor: delete --- vendor/github.com/quasilyte/regex/syntax/parser.go | 503 --------------------- 1 file changed, 503 deletions(-) delete mode 100644 vendor/github.com/quasilyte/regex/syntax/parser.go (limited to 'vendor/github.com/quasilyte/regex/syntax/parser.go') diff --git a/vendor/github.com/quasilyte/regex/syntax/parser.go b/vendor/github.com/quasilyte/regex/syntax/parser.go deleted file mode 100644 index f1c154f31..000000000 --- a/vendor/github.com/quasilyte/regex/syntax/parser.go +++ /dev/null @@ -1,503 +0,0 @@ -package syntax - -import ( - "errors" - "strings" -) - -type ParserOptions struct { - // NoLiterals disables OpChar merging into OpLiteral. - NoLiterals bool -} - -func NewParser(opts *ParserOptions) *Parser { - return newParser(opts) -} - -type Parser struct { - out Regexp - lexer lexer - exprPool []Expr - - prefixParselets [256]prefixParselet - infixParselets [256]infixParselet - - charClass []Expr - allocated uint - - opts ParserOptions -} - -// ParsePCRE parses PHP-style pattern with delimiters. -// An example of such pattern is `/foo/i`. -func (p *Parser) ParsePCRE(pattern string) (*RegexpPCRE, error) { - pcre, err := p.newPCRE(pattern) - if err != nil { - return nil, err - } - if pcre.HasModifier('x') { - return nil, errors.New("'x' modifier is not supported") - } - re, err := p.Parse(pcre.Pattern) - if re != nil { - pcre.Expr = re.Expr - } - return pcre, err -} - -func (p *Parser) Parse(pattern string) (result *Regexp, err error) { - defer func() { - r := recover() - if r == nil { - return - } - if err2, ok := r.(ParseError); ok { - err = err2 - return - } - panic(r) - }() - - p.lexer.Init(pattern) - p.allocated = 0 - p.out.Pattern = pattern - if pattern == "" { - p.out.Expr = *p.newExpr(OpConcat, Position{}) - } else { - p.out.Expr = *p.parseExpr(0) - } - - if !p.opts.NoLiterals { - p.mergeChars(&p.out.Expr) - } - p.setValues(&p.out.Expr) - - return &p.out, nil -} - -type prefixParselet func(token) *Expr - -type infixParselet func(*Expr, token) *Expr - -func newParser(opts *ParserOptions) *Parser { - var p Parser - - if opts != nil { - p.opts = *opts - } - p.exprPool = make([]Expr, 256) - - for tok, op := range tok2op { - if op != 0 { - p.prefixParselets[tokenKind(tok)] = p.parsePrefixElementary - } - } - - p.prefixParselets[tokQ] = func(tok token) *Expr { - litPos := tok.pos - litPos.Begin += uint16(len(`\Q`)) - form := FormQuoteUnclosed - if strings.HasSuffix(p.tokenValue(tok), `\E`) { - litPos.End -= uint16(len(`\E`)) - form = FormDefault - } - lit := p.newExpr(OpString, litPos) - return p.newExprForm(OpQuote, form, tok.pos, lit) - } - - p.prefixParselets[tokEscapeHexFull] = func(tok token) *Expr { - litPos := tok.pos - litPos.Begin += uint16(len(`\x{`)) - litPos.End -= uint16(len(`}`)) - lit := p.newExpr(OpString, litPos) - return p.newExprForm(OpEscapeHex, FormEscapeHexFull, tok.pos, lit) - } - p.prefixParselets[tokEscapeUniFull] = func(tok token) *Expr { - litPos := tok.pos - litPos.Begin += uint16(len(`\p{`)) - litPos.End -= uint16(len(`}`)) - lit := p.newExpr(OpString, litPos) - return p.newExprForm(OpEscapeUni, FormEscapeUniFull, tok.pos, lit) - } - - p.prefixParselets[tokEscapeHex] = func(tok token) *Expr { return p.parseEscape(OpEscapeHex, `\x`, tok) } - p.prefixParselets[tokEscapeOctal] = func(tok token) *Expr { return p.parseEscape(OpEscapeOctal, `\`, tok) } - p.prefixParselets[tokEscapeChar] = func(tok token) *Expr { return p.parseEscape(OpEscapeChar, `\`, tok) } - p.prefixParselets[tokEscapeMeta] = func(tok token) *Expr { return p.parseEscape(OpEscapeMeta, `\`, tok) } - p.prefixParselets[tokEscapeUni] = func(tok token) *Expr { return p.parseEscape(OpEscapeUni, `\p`, tok) } - - p.prefixParselets[tokLparen] = func(tok token) *Expr { return p.parseGroup(OpCapture, tok) } - p.prefixParselets[tokLparenAtomic] = func(tok token) *Expr { return p.parseGroup(OpAtomicGroup, tok) } - p.prefixParselets[tokLparenPositiveLookahead] = func(tok token) *Expr { return p.parseGroup(OpPositiveLookahead, tok) } - p.prefixParselets[tokLparenNegativeLookahead] = func(tok token) *Expr { return p.parseGroup(OpNegativeLookahead, tok) } - p.prefixParselets[tokLparenPositiveLookbehind] = func(tok token) *Expr { return p.parseGroup(OpPositiveLookbehind, tok) } - p.prefixParselets[tokLparenNegativeLookbehind] = func(tok token) *Expr { return p.parseGroup(OpNegativeLookbehind, tok) } - - p.prefixParselets[tokLparenName] = func(tok token) *Expr { - return p.parseNamedCapture(FormDefault, tok) - } - p.prefixParselets[tokLparenNameAngle] = func(tok token) *Expr { - return p.parseNamedCapture(FormNamedCaptureAngle, tok) - } - p.prefixParselets[tokLparenNameQuote] = func(tok token) *Expr { - return p.parseNamedCapture(FormNamedCaptureQuote, tok) - } - - p.prefixParselets[tokLparenFlags] = p.parseGroupWithFlags - - p.prefixParselets[tokPipe] = func(tok token) *Expr { - // We need prefix pipe parselet to handle `(|x)` syntax. - right := p.parseExpr(1) - return p.newExpr(OpAlt, tok.pos, p.newEmpty(tok.pos), right) - } - p.prefixParselets[tokLbracket] = func(tok token) *Expr { - return p.parseCharClass(OpCharClass, tok) - } - p.prefixParselets[tokLbracketCaret] = func(tok token) *Expr { - return p.parseCharClass(OpNegCharClass, tok) - } - - p.infixParselets[tokRepeat] = func(left *Expr, tok token) *Expr { - repeatLit := p.newExpr(OpString, tok.pos) - return p.newExpr(OpRepeat, combinePos(left.Pos, tok.pos), left, repeatLit) - } - p.infixParselets[tokStar] = func(left *Expr, tok token) *Expr { - return p.newExpr(OpStar, combinePos(left.Pos, tok.pos), left) - } - p.infixParselets[tokConcat] = func(left *Expr, tok token) *Expr { - right := p.parseExpr(2) - if left.Op == OpConcat { - left.Args = append(left.Args, *right) - left.Pos.End = right.End() - return left - } - return p.newExpr(OpConcat, combinePos(left.Pos, right.Pos), left, right) - } - p.infixParselets[tokPipe] = p.parseAlt - p.infixParselets[tokMinus] = p.parseMinus - p.infixParselets[tokPlus] = p.parsePlus - p.infixParselets[tokQuestion] = p.parseQuestion - - return &p -} - -func (p *Parser) setValues(e *Expr) { - for i := range e.Args { - p.setValues(&e.Args[i]) - } - e.Value = p.exprValue(e) -} - -func (p *Parser) tokenValue(tok token) string { - return p.out.Pattern[tok.pos.Begin:tok.pos.End] -} - -func (p *Parser) exprValue(e *Expr) string { - return p.out.Pattern[e.Begin():e.End()] -} - -func (p *Parser) mergeChars(e *Expr) { - for i := range e.Args { - p.mergeChars(&e.Args[i]) - } - if e.Op != OpConcat || len(e.Args) < 2 { - return - } - - args := e.Args[:0] - i := 0 - for i < len(e.Args) { - first := i - chars := 0 - for j := i; j < len(e.Args) && e.Args[j].Op == OpChar; j++ { - chars++ - } - if chars > 1 { - c1 := e.Args[first] - c2 := e.Args[first+chars-1] - lit := p.newExpr(OpLiteral, combinePos(c1.Pos, c2.Pos)) - for j := 0; j < chars; j++ { - lit.Args = append(lit.Args, e.Args[first+j]) - } - args = append(args, *lit) - i += chars - } else { - args = append(args, e.Args[i]) - i++ - } - } - if len(args) == 1 { - *e = args[0] // Turn OpConcat into OpLiteral - } else { - e.Args = args - } -} - -func (p *Parser) newEmpty(pos Position) *Expr { - return p.newExpr(OpConcat, pos) -} - -func (p *Parser) newExprForm(op Operation, form Form, pos Position, args ...*Expr) *Expr { - e := p.newExpr(op, pos, args...) - e.Form = form - return e -} - -func (p *Parser) newExpr(op Operation, pos Position, args ...*Expr) *Expr { - e := p.allocExpr() - *e = Expr{ - Op: op, - Pos: pos, - Args: e.Args[:0], - } - for _, arg := range args { - e.Args = append(e.Args, *arg) - } - return e -} - -func (p *Parser) allocExpr() *Expr { - i := p.allocated - if i < uint(len(p.exprPool)) { - p.allocated++ - return &p.exprPool[i] - } - return &Expr{} -} - -func (p *Parser) expect(kind tokenKind) Position { - tok := p.lexer.NextToken() - if tok.kind != kind { - throwExpectedFound(tok.pos, kind.String(), tok.kind.String()) - } - return tok.pos -} - -func (p *Parser) parseExpr(precedence int) *Expr { - tok := p.lexer.NextToken() - prefix := p.prefixParselets[tok.kind] - if prefix == nil { - throwUnexpectedToken(tok.pos, tok.String()) - } - left := prefix(tok) - - for precedence < p.precedenceOf(p.lexer.Peek()) { - tok := p.lexer.NextToken() - infix := p.infixParselets[tok.kind] - left = infix(left, tok) - } - - return left -} - -func (p *Parser) parsePrefixElementary(tok token) *Expr { - return p.newExpr(tok2op[tok.kind], tok.pos) -} - -func (p *Parser) parseCharClass(op Operation, tok token) *Expr { - var endPos Position - p.charClass = p.charClass[:0] - for { - p.charClass = append(p.charClass, *p.parseExpr(0)) - next := p.lexer.Peek() - if next.kind == tokRbracket { - endPos = next.pos - p.lexer.NextToken() - break - } - if next.kind == tokNone { - throw(tok.pos, "unterminated '['") - } - } - - result := p.newExpr(op, combinePos(tok.pos, endPos)) - result.Args = append(result.Args, p.charClass...) - return result -} - -func (p *Parser) parseMinus(left *Expr, tok token) *Expr { - if p.isValidCharRangeOperand(left) { - if p.lexer.Peek().kind != tokRbracket { - right := p.parseExpr(2) - return p.newExpr(OpCharRange, combinePos(left.Pos, right.Pos), left, right) - } - } - p.charClass = append(p.charClass, *left) - return p.newExpr(OpChar, tok.pos) -} - -func (p *Parser) isValidCharRangeOperand(e *Expr) bool { - switch e.Op { - case OpEscapeHex, OpEscapeOctal, OpEscapeMeta, OpChar: - return true - case OpEscapeChar: - switch p.exprValue(e) { - case `\\`, `\|`, `\*`, `\+`, `\?`, `\.`, `\[`, `\^`, `\$`, `\(`, `\)`: - return true - } - } - return false -} - -func (p *Parser) parsePlus(left *Expr, tok token) *Expr { - op := OpPlus - switch left.Op { - case OpPlus, OpStar, OpQuestion, OpRepeat: - op = OpPossessive - } - return p.newExpr(op, combinePos(left.Pos, tok.pos), left) -} - -func (p *Parser) parseQuestion(left *Expr, tok token) *Expr { - op := OpQuestion - switch left.Op { - case OpPlus, OpStar, OpQuestion, OpRepeat: - op = OpNonGreedy - } - return p.newExpr(op, combinePos(left.Pos, tok.pos), left) -} - -func (p *Parser) parseAlt(left *Expr, tok token) *Expr { - var right *Expr - switch p.lexer.Peek().kind { - case tokRparen, tokNone: - // This is needed to handle `(x|)` syntax. - right = p.newEmpty(tok.pos) - default: - right = p.parseExpr(1) - } - if left.Op == OpAlt { - left.Args = append(left.Args, *right) - left.Pos.End = right.End() - return left - } - return p.newExpr(OpAlt, combinePos(left.Pos, right.Pos), left, right) -} - -func (p *Parser) parseGroupItem(tok token) *Expr { - if p.lexer.Peek().kind == tokRparen { - // This is needed to handle `() syntax.` - return p.newEmpty(tok.pos) - } - return p.parseExpr(0) -} - -func (p *Parser) parseGroup(op Operation, tok token) *Expr { - x := p.parseGroupItem(tok) - result := p.newExpr(op, tok.pos, x) - result.Pos.End = p.expect(tokRparen).End - return result -} - -func (p *Parser) parseNamedCapture(form Form, tok token) *Expr { - prefixLen := len("(?<") - if form == FormDefault { - prefixLen = len("(?P<") - } - name := p.newExpr(OpString, Position{ - Begin: tok.pos.Begin + uint16(prefixLen), - End: tok.pos.End - uint16(len(">")), - }) - x := p.parseGroupItem(tok) - result := p.newExprForm(OpNamedCapture, form, tok.pos, x, name) - result.Pos.End = p.expect(tokRparen).End - return result -} - -func (p *Parser) parseGroupWithFlags(tok token) *Expr { - var result *Expr - val := p.out.Pattern[tok.pos.Begin+1 : tok.pos.End] - switch { - case !strings.HasSuffix(val, ":"): - flags := p.newExpr(OpString, Position{ - Begin: tok.pos.Begin + uint16(len("(?")), - End: tok.pos.End, - }) - result = p.newExpr(OpFlagOnlyGroup, tok.pos, flags) - case val == "?:": - x := p.parseGroupItem(tok) - result = p.newExpr(OpGroup, tok.pos, x) - default: - flags := p.newExpr(OpString, Position{ - Begin: tok.pos.Begin + uint16(len("(?")), - End: tok.pos.End - uint16(len(":")), - }) - x := p.parseGroupItem(tok) - result = p.newExpr(OpGroupWithFlags, tok.pos, x, flags) - } - result.Pos.End = p.expect(tokRparen).End - return result -} - -func (p *Parser) parseEscape(op Operation, prefix string, tok token) *Expr { - litPos := tok.pos - litPos.Begin += uint16(len(prefix)) - lit := p.newExpr(OpString, litPos) - return p.newExpr(op, tok.pos, lit) -} - -func (p *Parser) precedenceOf(tok token) int { - switch tok.kind { - case tokPipe: - return 1 - case tokConcat, tokMinus: - return 2 - case tokPlus, tokStar, tokQuestion, tokRepeat: - return 3 - default: - return 0 - } -} - -func (p *Parser) newPCRE(source string) (*RegexpPCRE, error) { - if source == "" { - return nil, errors.New("empty pattern: can't find delimiters") - } - - delim := source[0] - endDelim := delim - switch delim { - case '(': - endDelim = ')' - case '{': - endDelim = '}' - case '[': - endDelim = ']' - case '<': - endDelim = '>' - case '\\': - return nil, errors.New("'\\' is not a valid delimiter") - default: - if isSpace(delim) { - return nil, errors.New("whitespace is not a valid delimiter") - } - if isAlphanumeric(delim) { - return nil, errors.New("'" + string(delim) + "' is not a valid delimiter") - } - } - - const delimLen = 1 - j := strings.LastIndexByte(source[delimLen:], endDelim) - if j == -1 { - return nil, errors.New("can't find '" + string(endDelim) + "' ending delimiter") - } - j += delimLen - - pcre := &RegexpPCRE{ - Pattern: source[delimLen:j], - Source: source, - Delim: [2]byte{delim, endDelim}, - Modifiers: source[j+delimLen:], - } - return pcre, nil -} - -var tok2op = [256]Operation{ - tokDollar: OpDollar, - tokCaret: OpCaret, - tokDot: OpDot, - tokChar: OpChar, - tokMinus: OpChar, - tokPosixClass: OpPosixClass, - tokComment: OpComment, -} -- cgit mrf-deployment