aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/quasilyte/regex/syntax
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2020-09-15 18:05:35 +0200
committerDmitry Vyukov <dvyukov@google.com>2020-09-15 19:34:30 +0200
commit712de1c63d9db97c81af68cd0dc4372c53d2e57a (patch)
treeae1761fec52c3ae4ddd003a4130ddbda8d0a2d69 /vendor/github.com/quasilyte/regex/syntax
parent298a69c38dd5c8a9bbd7a022e88f4ddbcf885e16 (diff)
vendor/github.com/golangci/golangci-lint: update to v1.31
Diffstat (limited to 'vendor/github.com/quasilyte/regex/syntax')
-rw-r--r--vendor/github.com/quasilyte/regex/syntax/LICENSE21
-rw-r--r--vendor/github.com/quasilyte/regex/syntax/README.md29
-rw-r--r--vendor/github.com/quasilyte/regex/syntax/ast.go64
-rw-r--r--vendor/github.com/quasilyte/regex/syntax/errors.go27
-rw-r--r--vendor/github.com/quasilyte/regex/syntax/go.mod3
-rw-r--r--vendor/github.com/quasilyte/regex/syntax/lexer.go454
-rw-r--r--vendor/github.com/quasilyte/regex/syntax/operation.go195
-rw-r--r--vendor/github.com/quasilyte/regex/syntax/operation_string.go59
-rw-r--r--vendor/github.com/quasilyte/regex/syntax/parser.go503
-rw-r--r--vendor/github.com/quasilyte/regex/syntax/pos.go10
-rw-r--r--vendor/github.com/quasilyte/regex/syntax/tokenkind_string.go59
-rw-r--r--vendor/github.com/quasilyte/regex/syntax/utils.go30
12 files changed, 1454 insertions, 0 deletions
diff --git a/vendor/github.com/quasilyte/regex/syntax/LICENSE b/vendor/github.com/quasilyte/regex/syntax/LICENSE
new file mode 100644
index 000000000..f0c81282b
--- /dev/null
+++ b/vendor/github.com/quasilyte/regex/syntax/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Iskander (Alex) Sharipov / quasilyte
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/quasilyte/regex/syntax/README.md b/vendor/github.com/quasilyte/regex/syntax/README.md
new file mode 100644
index 000000000..b70e25ad9
--- /dev/null
+++ b/vendor/github.com/quasilyte/regex/syntax/README.md
@@ -0,0 +1,29 @@
+# Package `regex/syntax`
+
+Package `syntax` provides regular expressions parser as well as AST definitions.
+
+## Rationale
+
+The advantages of this package over stdlib [regexp/syntax](https://golang.org/pkg/regexp/syntax/):
+
+1. Does not transformations/optimizations during the parsing.
+ The produced parse tree is loseless.
+
+2. Simpler AST representation.
+
+3. Can parse most PCRE operations in addition to [re2](https://github.com/google/re2/wiki/Syntax) syntax.
+ It can also handle PHP/Perl style patterns with delimiters.
+
+4. This package is easier to extend than something from the standard library.
+
+This package does almost no assumptions about how generated AST is going to be used
+so it preserves as much syntax information as possible.
+
+It's easy to write another intermediate representation on top of it. The main
+function of this package is to convert a textual regexp pattern into a more
+structured form that can be processed more easily.
+
+## Users
+
+* [go-critic](https://github.com/go-critic/go-critic) - Go static analyzer
+* [NoVerify](https://github.com/VKCOM/noverify) - PHP static analyzer
diff --git a/vendor/github.com/quasilyte/regex/syntax/ast.go b/vendor/github.com/quasilyte/regex/syntax/ast.go
new file mode 100644
index 000000000..4d21a9432
--- /dev/null
+++ b/vendor/github.com/quasilyte/regex/syntax/ast.go
@@ -0,0 +1,64 @@
+package syntax
+
+import (
+ "strings"
+)
+
+type Regexp struct {
+ Pattern string
+ Expr Expr
+}
+
+type RegexpPCRE struct {
+ Pattern string
+ Expr Expr
+
+ Source string
+ Modifiers string
+ Delim [2]byte
+}
+
+func (re *RegexpPCRE) HasModifier(mod byte) bool {
+ return strings.IndexByte(re.Modifiers, mod) >= 0
+}
+
+type Expr struct {
+ // The operations that this expression performs. See `operation.go`.
+ Op Operation
+
+ Form Form
+
+ _ [2]byte // Reserved
+
+ // Pos describes a source location inside regexp pattern.
+ Pos Position
+
+ // Args is a list of sub-expressions of this expression.
+ //
+ // See Operation constants documentation to learn how to
+ // interpret the particular expression args.
+ Args []Expr
+
+ // Value holds expression textual value.
+ //
+ // Usually, that value is identical to src[Begin():End()],
+ // but this is not true for programmatically generated objects.
+ Value string
+}
+
+// Begin returns expression leftmost offset.
+func (e Expr) Begin() uint16 { return e.Pos.Begin }
+
+// End returns expression rightmost offset.
+func (e Expr) End() uint16 { return e.Pos.End }
+
+// LastArg returns expression last argument.
+//
+// Should not be called on expressions that may have 0 arguments.
+func (e Expr) LastArg() Expr {
+ return e.Args[len(e.Args)-1]
+}
+
+type Operation byte
+
+type Form byte
diff --git a/vendor/github.com/quasilyte/regex/syntax/errors.go b/vendor/github.com/quasilyte/regex/syntax/errors.go
new file mode 100644
index 000000000..beefba5f9
--- /dev/null
+++ b/vendor/github.com/quasilyte/regex/syntax/errors.go
@@ -0,0 +1,27 @@
+package syntax
+
+type ParseError struct {
+ Pos Position
+ Message string
+}
+
+func (e ParseError) Error() string { return e.Message }
+
+func throw(pos Position, message string) {
+ panic(ParseError{Pos: pos, Message: message})
+}
+
+func throwExpectedFound(pos Position, expected, found string) {
+ throw(pos, "expected '"+expected+"', found '"+found+"'")
+}
+
+func throwUnexpectedToken(pos Position, token string) {
+ throw(pos, "unexpected token: "+token)
+}
+
+func newPos(begin, end int) Position {
+ return Position{
+ Begin: uint16(begin),
+ End: uint16(end),
+ }
+}
diff --git a/vendor/github.com/quasilyte/regex/syntax/go.mod b/vendor/github.com/quasilyte/regex/syntax/go.mod
new file mode 100644
index 000000000..2a4e1f33b
--- /dev/null
+++ b/vendor/github.com/quasilyte/regex/syntax/go.mod
@@ -0,0 +1,3 @@
+module github.com/quasilyte/regex/syntax
+
+go 1.14
diff --git a/vendor/github.com/quasilyte/regex/syntax/lexer.go b/vendor/github.com/quasilyte/regex/syntax/lexer.go
new file mode 100644
index 000000000..aae146c2e
--- /dev/null
+++ b/vendor/github.com/quasilyte/regex/syntax/lexer.go
@@ -0,0 +1,454 @@
+package syntax
+
+import (
+ "strings"
+ "unicode/utf8"
+)
+
+type token struct {
+ kind tokenKind
+ pos Position
+}
+
+func (tok token) String() string {
+ return tok.kind.String()
+}
+
+type tokenKind byte
+
+//go:generate stringer -type=tokenKind -trimprefix=tok -linecomment=true
+const (
+ tokNone tokenKind = iota
+
+ tokChar
+ tokGroupFlags
+ tokPosixClass
+ tokConcat
+ tokRepeat
+ tokEscapeChar
+ tokEscapeMeta
+ tokEscapeOctal
+ tokEscapeUni
+ tokEscapeUniFull
+ tokEscapeHex
+ tokEscapeHexFull
+ tokComment
+
+ tokQ // \Q
+ tokMinus // -
+ tokLbracket // [
+ tokLbracketCaret // [^
+ tokRbracket // ]
+ tokDollar // $
+ tokCaret // ^
+ tokQuestion // ?
+ tokDot // .
+ tokPlus // +
+ tokStar // *
+ tokPipe // |
+ tokLparen // (
+ tokLparenName // (?P<name>
+ tokLparenNameAngle // (?<name>
+ tokLparenNameQuote // (?'name'
+ tokLparenFlags // (?flags
+ tokLparenAtomic // (?>
+ tokLparenPositiveLookahead // (?=
+ tokLparenPositiveLookbehind // (?<=
+ tokLparenNegativeLookahead // (?!
+ tokLparenNegativeLookbehind // (?<!
+ tokRparen // )
+)
+
+// reMetachar is a table of meta chars outside of a char class.
+var reMetachar = [256]bool{
+ '\\': true,
+ '|': true,
+ '*': true,
+ '+': true,
+ '?': true,
+ '.': true,
+ '[': true,
+ ']': true,
+ '^': true,
+ '$': true,
+ '(': true,
+ ')': true,
+}
+
+// charClassMetachar is a table of meta chars inside char class.
+var charClassMetachar = [256]bool{
+ '-': true,
+ ']': true,
+}
+
+type lexer struct {
+ tokens []token
+ pos int
+ input string
+}
+
+func (l *lexer) HasMoreTokens() bool {
+ return l.pos < len(l.tokens)
+}
+
+func (l *lexer) NextToken() token {
+ if l.pos < len(l.tokens) {
+ tok := l.tokens[l.pos]
+ l.pos++
+ return tok
+ }
+ return token{}
+}
+
+func (l *lexer) Peek() token {
+ if l.pos < len(l.tokens) {
+ return l.tokens[l.pos]
+ }
+ return token{}
+}
+
+func (l *lexer) scan() {
+ for l.pos < len(l.input) {
+ ch := l.input[l.pos]
+ if ch >= utf8.RuneSelf {
+ _, size := utf8.DecodeRuneInString(l.input[l.pos:])
+ l.pushTok(tokChar, size)
+ l.maybeInsertConcat()
+ continue
+ }
+ switch ch {
+ case '\\':
+ l.scanEscape(false)
+ case '.':
+ l.pushTok(tokDot, 1)
+ case '+':
+ l.pushTok(tokPlus, 1)
+ case '*':
+ l.pushTok(tokStar, 1)
+ case '^':
+ l.pushTok(tokCaret, 1)
+ case '$':
+ l.pushTok(tokDollar, 1)
+ case '?':
+ l.pushTok(tokQuestion, 1)
+ case ')':
+ l.pushTok(tokRparen, 1)
+ case '|':
+ l.pushTok(tokPipe, 1)
+ case '[':
+ if l.byteAt(l.pos+1) == '^' {
+ l.pushTok(tokLbracketCaret, 2)
+ } else {
+ l.pushTok(tokLbracket, 1)
+ }
+ l.scanCharClass()
+ case '(':
+ if l.byteAt(l.pos+1) == '?' {
+ switch {
+ case l.byteAt(l.pos+2) == '>':
+ l.pushTok(tokLparenAtomic, len("(?>"))
+ case l.byteAt(l.pos+2) == '=':
+ l.pushTok(tokLparenPositiveLookahead, len("(?="))
+ case l.byteAt(l.pos+2) == '!':
+ l.pushTok(tokLparenNegativeLookahead, len("(?!"))
+ case l.byteAt(l.pos+2) == '<' && l.byteAt(l.pos+3) == '=':
+ l.pushTok(tokLparenPositiveLookbehind, len("(?<="))
+ case l.byteAt(l.pos+2) == '<' && l.byteAt(l.pos+3) == '!':
+ l.pushTok(tokLparenNegativeLookbehind, len("(?<!"))
+ default:
+ if l.tryScanComment(l.pos + 2) {
+ } else if l.tryScanGroupName(l.pos + 2) {
+ } else if l.tryScanGroupFlags(l.pos + 2) {
+ } else {
+ throw(newPos(l.pos, l.pos+1), "group token is incomplete")
+ }
+ }
+ } else {
+ l.pushTok(tokLparen, 1)
+ }
+ case '{':
+ if j := l.repeatWidth(l.pos + 1); j >= 0 {
+ l.pushTok(tokRepeat, len("{")+j)
+ } else {
+ l.pushTok(tokChar, 1)
+ }
+ default:
+ l.pushTok(tokChar, 1)
+ }
+ l.maybeInsertConcat()
+ }
+}
+
+func (l *lexer) scanCharClass() {
+ l.maybeInsertConcat()
+
+ // We need to handle first `]` in a special way. See #3.
+ if l.byteAt(l.pos) == ']' {
+ l.pushTok(tokChar, 1)
+ }
+
+ for l.pos < len(l.input) {
+ ch := l.input[l.pos]
+ if ch >= utf8.RuneSelf {
+ _, size := utf8.DecodeRuneInString(l.input[l.pos:])
+ l.pushTok(tokChar, size)
+ continue
+ }
+ switch ch {
+ case '\\':
+ l.scanEscape(true)
+ case '[':
+ isPosixClass := false
+ if l.byteAt(l.pos+1) == ':' {
+ j := l.stringIndex(l.pos+2, ":]")
+ if j >= 0 {
+ isPosixClass = true
+ l.pushTok(tokPosixClass, j+len("[::]"))
+ }
+ }
+ if !isPosixClass {
+ l.pushTok(tokChar, 1)
+ }
+ case '-':
+ l.pushTok(tokMinus, 1)
+ case ']':
+ l.pushTok(tokRbracket, 1)
+ return // Stop scanning in the char context
+ default:
+ l.pushTok(tokChar, 1)
+ }
+ }
+}
+
+func (l *lexer) scanEscape(insideCharClass bool) {
+ s := l.input
+ if l.pos+1 >= len(s) {
+ throw(newPos(l.pos, l.pos+1), `unexpected end of pattern: trailing '\'`)
+ }
+ switch {
+ case s[l.pos+1] == 'p' || s[l.pos+1] == 'P':
+ if l.pos+2 >= len(s) {
+ throw(newPos(l.pos, l.pos+2), "unexpected end of pattern: expected uni-class-short or '{'")
+ }
+ if s[l.pos+2] == '{' {
+ j := strings.IndexByte(s[l.pos+2:], '}')
+ if j < 0 {
+ throw(newPos(l.pos, l.pos+2), "can't find closing '}'")
+ }
+ l.pushTok(tokEscapeUniFull, len(`\p{`)+j)
+ } else {
+ l.pushTok(tokEscapeUni, len(`\pL`))
+ }
+ case s[l.pos+1] == 'x':
+ if l.pos+2 >= len(s) {
+ throw(newPos(l.pos, l.pos+2), "unexpected end of pattern: expected hex-digit or '{'")
+ }
+ if s[l.pos+2] == '{' {
+ j := strings.IndexByte(s[l.pos+2:], '}')
+ if j < 0 {
+ throw(newPos(l.pos, l.pos+2), "can't find closing '}'")
+ }
+ l.pushTok(tokEscapeHexFull, len(`\x{`)+j)
+ } else {
+ if isHexDigit(l.byteAt(l.pos + 3)) {
+ l.pushTok(tokEscapeHex, len(`\xFF`))
+ } else {
+ l.pushTok(tokEscapeHex, len(`\xF`))
+ }
+ }
+ case isOctalDigit(s[l.pos+1]):
+ digits := 1
+ if isOctalDigit(l.byteAt(l.pos + 2)) {
+ if isOctalDigit(l.byteAt(l.pos + 3)) {
+ digits = 3
+ } else {
+ digits = 2
+ }
+ }
+ l.pushTok(tokEscapeOctal, len(`\`)+digits)
+ case s[l.pos+1] == 'Q':
+ size := len(s) - l.pos // Until the pattern ends
+ j := l.stringIndex(l.pos+2, `\E`)
+ if j >= 0 {
+ size = j + len(`\Q\E`)
+ }
+ l.pushTok(tokQ, size)
+
+ default:
+ ch := l.byteAt(l.pos + 1)
+ if ch >= utf8.RuneSelf {
+ _, size := utf8.DecodeRuneInString(l.input[l.pos+1:])
+ l.pushTok(tokEscapeChar, len(`\`)+size)
+ return
+ }
+ kind := tokEscapeChar
+ if insideCharClass {
+ if charClassMetachar[ch] {
+ kind = tokEscapeMeta
+ }
+ } else {
+ if reMetachar[ch] {
+ kind = tokEscapeMeta
+ }
+ }
+ l.pushTok(kind, 2)
+ }
+}
+
+func (l *lexer) maybeInsertConcat() {
+ if l.isConcatPos() {
+ last := len(l.tokens) - 1
+ tok := l.tokens[last]
+ l.tokens[last].kind = tokConcat
+ l.tokens = append(l.tokens, tok)
+ }
+}
+
+func (l *lexer) Init(s string) {
+ l.pos = 0
+ l.tokens = l.tokens[:0]
+ l.input = s
+
+ l.scan()
+
+ l.pos = 0
+}
+
+func (l *lexer) tryScanGroupName(pos int) bool {
+ tok := tokLparenName
+ endCh := byte('>')
+ offset := 1
+ switch l.byteAt(pos) {
+ case '\'':
+ endCh = '\''
+ tok = tokLparenNameQuote
+ case '<':
+ tok = tokLparenNameAngle
+ case 'P':
+ offset = 2
+ default:
+ return false
+ }
+ if pos+offset >= len(l.input) {
+ return false
+ }
+ end := strings.IndexByte(l.input[pos+offset:], endCh)
+ if end < 0 {
+ return false
+ }
+ l.pushTok(tok, len("(?")+offset+end+1)
+ return true
+}
+
+func (l *lexer) tryScanGroupFlags(pos int) bool {
+ colonPos := strings.IndexByte(l.input[pos:], ':')
+ parenPos := strings.IndexByte(l.input[pos:], ')')
+ if parenPos < 0 {
+ return false
+ }
+ end := parenPos
+ if colonPos >= 0 && colonPos < parenPos {
+ end = colonPos + len(":")
+ }
+ l.pushTok(tokLparenFlags, len("(?")+end)
+ return true
+}
+
+func (l *lexer) tryScanComment(pos int) bool {
+ if l.byteAt(pos) != '#' {
+ return false
+ }
+ parenPos := strings.IndexByte(l.input[pos:], ')')
+ if parenPos < 0 {
+ return false
+ }
+ l.pushTok(tokComment, len("(?")+parenPos+len(")"))
+ return true
+}
+
+func (l *lexer) repeatWidth(pos int) int {
+ j := pos
+ for isDigit(l.byteAt(j)) {
+ j++
+ }
+ if j == pos {
+ return -1
+ }
+ if l.byteAt(j) == '}' {
+ return (j + len("}")) - pos // {min}
+ }
+ if l.byteAt(j) != ',' {
+ return -1
+ }
+ j += len(",")
+ for isDigit(l.byteAt(j)) {
+ j++
+ }
+ if l.byteAt(j) == '}' {
+ return (j + len("}")) - pos // {min,} or {min,max}
+ }
+ return -1
+}
+
+func (l *lexer) stringIndex(offset int, s string) int {
+ if offset < len(l.input) {
+ return strings.Index(l.input[offset:], s)
+ }
+ return -1
+}
+
+func (l *lexer) byteAt(pos int) byte {
+ if pos >= 0 && pos < len(l.input) {
+ return l.input[pos]
+ }
+ return 0
+}
+
+func (l *lexer) pushTok(kind tokenKind, size int) {
+ l.tokens = append(l.tokens, token{
+ kind: kind,
+ pos: Position{Begin: uint16(l.pos), End: uint16(l.pos + size)},
+ })
+ l.pos += size
+}
+
+func (l *lexer) isConcatPos() bool {
+ if len(l.tokens) < 2 {
+ return false
+ }
+ x := l.tokens[len(l.tokens)-2].kind
+ if concatTable[x]&concatX != 0 {
+ return false
+ }
+ y := l.tokens[len(l.tokens)-1].kind
+ return concatTable[y]&concatY == 0
+}
+
+const (
+ concatX byte = 1 << iota
+ concatY
+)
+
+var concatTable = [256]byte{
+ tokPipe: concatX | concatY,
+
+ tokLparen: concatX,
+ tokLparenFlags: concatX,
+ tokLparenName: concatX,
+ tokLparenNameAngle: concatX,
+ tokLparenNameQuote: concatX,
+ tokLparenAtomic: concatX,
+ tokLbracket: concatX,
+ tokLbracketCaret: concatX,
+ tokLparenPositiveLookahead: concatX,
+ tokLparenPositiveLookbehind: concatX,
+ tokLparenNegativeLookahead: concatX,
+ tokLparenNegativeLookbehind: concatX,
+
+ tokRparen: concatY,
+ tokRbracket: concatY,
+ tokPlus: concatY,
+ tokStar: concatY,
+ tokQuestion: concatY,
+ tokRepeat: concatY,
+}
diff --git a/vendor/github.com/quasilyte/regex/syntax/operation.go b/vendor/github.com/quasilyte/regex/syntax/operation.go
new file mode 100644
index 000000000..0fc8fc521
--- /dev/null
+++ b/vendor/github.com/quasilyte/regex/syntax/operation.go
@@ -0,0 +1,195 @@
+package syntax
+
+//go:generate stringer -type=Operation -trimprefix=Op
+const (
+ OpNone Operation = iota
+
+ // OpConcat is a concatenation of ops.
+ // Examples: `xy` `abc\d` ``
+ // Args - concatenated ops
+ //
+ // As a special case, OpConcat with 0 Args is used for "empty"
+ // set of operations.
+ OpConcat
+
+ // OpDot is a '.' wildcard.
+ OpDot
+
+ // OpAlt is x|y alternation of ops.
+ // Examples: `a|bc` `x(.*?)|y(.*?)`
+ // Args - union-connected regexp branches
+ OpAlt
+
+ // OpStar is a shorthand for {0,} repetition.
+ // Examples: `x*`
+ // Args[0] - repeated expression
+ OpStar
+
+ // OpPlus is a shorthand for {1,} repetition.
+ // Examples: `x+`
+ // Args[0] - repeated expression
+ OpPlus
+
+ // OpQuestion is a shorthand for {0,1} repetition.
+ // Examples: `x?`
+ // Args[0] - repeated expression
+ OpQuestion
+
+ // OpNonGreedy makes its operand quantifier non-greedy.
+ // Examples: `x??` `x*?` `x+?`
+ // Args[0] - quantified expression
+ OpNonGreedy
+
+ // OpPossessive makes its operand quantifier possessive.
+ // Examples: `x?+` `x*+` `x++`
+ // Args[0] - quantified expression
+ OpPossessive
+
+ // OpCaret is ^ anchor.
+ OpCaret
+
+ // OpDollar is $ anchor.
+ OpDollar
+
+ // OpLiteral is a collection of consecutive chars.
+ // Examples: `ab` `10x`
+ // Args - enclosed characters (OpChar)
+ OpLiteral
+
+ // OpChar is a single literal pattern character.
+ // Examples: `a` `6` `ф`
+ OpChar
+
+ // OpString is an artificial element that is used in other expressions.
+ OpString
+
+ // OpQuote is a \Q...\E enclosed literal.
+ // Examples: `\Q.?\E` `\Q?q[]=1`
+ // FormQuoteUnclosed: `\Qabc`
+ // Args[0] - literal value (OpString)
+ OpQuote
+
+ // OpEscapeChar is a single char escape.
+ // Examples: `\d` `\a` `\n`
+ // Args[0] - escaped value (OpString)
+ OpEscapeChar
+
+ // OpEscapeMeta is an escaped meta char.
+ // Examples: `\(` `\[` `\+`
+ // Args[0] - escaped value (OpString)
+ OpEscapeMeta
+
+ // OpEscapeOctal is an octal char code escape (up to 3 digits).
+ // Examples: `\123` `\12`
+ // Args[0] - escaped value (OpString)
+ OpEscapeOctal
+
+ // OpEscapeHex is a hex char code escape.
+ // Examples: `\x7F` `\xF7`
+ // FormEscapeHexFull examples: `\x{10FFFF}` `\x{F}`.
+ // Args[0] - escaped value (OpString)
+ OpEscapeHex
+
+ // OpEscapeUni is a Unicode char class escape.
+ // Examples: `\pS` `\pL` `\PL`
+ // FormEscapeUniFull examples: `\p{Greek}` `\p{Symbol}` `\p{^L}`
+ // Args[0] - escaped value (OpString)
+ OpEscapeUni
+
+ // OpCharClass is a char class enclosed in [].
+ // Examples: `[abc]` `[a-z0-9\]]`
+ // Args - char class elements (can include OpCharRange and OpPosixClass)
+ OpCharClass
+
+ // OpNegCharClass is a negated char class enclosed in [].
+ // Examples: `[^abc]` `[^a-z0-9\]]`
+ // Args - char class elements (can include OpCharRange and OpPosixClass)
+ OpNegCharClass
+
+ // OpCharRange is an inclusive char range inside a char class.
+ // Examples: `0-9` `A-Z`
+ // Args[0] - range lower bound
+ // Args[1] - range upper bound
+ OpCharRange
+
+ // OpPosixClass is a named ASCII char set inside a char class.
+ // Examples: `[:alpha:]` `[:blank:]`
+ OpPosixClass
+
+ // OpRepeat is a {min,max} repetition quantifier.
+ // Examples: `x{5}` `x{min,max}` `x{min,}`
+ // Args[0] - repeated expression
+ // Args[1] - repeat count (OpString)
+ OpRepeat
+
+ // OpCapture is `(re)` capturing group.
+ // Examples: `(abc)` `(x|y)`
+ // Args[0] - enclosed expression
+ OpCapture
+
+ // OpNamedCapture is `(?P<name>re)` capturing group.
+ // Examples: `(?P<foo>abc)` `(?P<name>x|y)`
+ // FormNamedCaptureAngle examples: `(?<foo>abc)` `(?<name>x|y)`
+ // FormNamedCaptureQuote examples: `(?'foo'abc)` `(?'name'x|y)`
+ // Args[0] - enclosed expression (OpConcat with 0 args for empty group)
+ // Args[1] - group name (OpString)
+ OpNamedCapture
+
+ // OpGroup is `(?:re)` non-capturing group.
+ // Examples: `(?:abc)` `(?:x|y)`
+ // Args[0] - enclosed expression (OpConcat with 0 args for empty group)
+ OpGroup
+
+ // OpGroupWithFlags is `(?flags:re)` non-capturing group.
+ // Examples: `(?i:abc)` `(?i:x|y)`
+ // Args[0] - enclosed expression (OpConcat with 0 args for empty group)
+ // Args[1] - flags (OpString)
+ OpGroupWithFlags
+
+ // OpAtomicGroup is `(?>re)` non-capturing group without backtracking.
+ // Examples: `(?>foo)` `(?>)`
+ // Args[0] - enclosed expression (OpConcat with 0 args for empty group)
+ OpAtomicGroup
+
+ // OpPositiveLookahead is `(?=re)` asserts that following text matches re.
+ // Examples: `(?=foo)`
+ // Args[0] - enclosed expression (OpConcat with 0 args for empty group)
+ OpPositiveLookahead
+
+ // OpNegativeLookahead is `(?!re)` asserts that following text doesn't match re.
+ // Examples: `(?!foo)`
+ // Args[0] - enclosed expression (OpConcat with 0 args for empty group)
+ OpNegativeLookahead
+
+ // OpPositiveLookbehind is `(?<=re)` asserts that preceding text matches re.
+ // Examples: `(?<=foo)`
+ // Args[0] - enclosed expression (OpConcat with 0 args for empty group)
+ OpPositiveLookbehind
+
+ // OpNegativeLookbehind is `(?=re)` asserts that preceding text doesn't match re.
+ // Examples: `(?<!foo)`
+ // Args[0] - enclosed expression (OpConcat with 0 args for empty group)
+ OpNegativeLookbehind
+
+ // OpFlagOnlyGroup is `(?flags)` form that affects current group flags.
+ // Examples: `(?i)` `(?i-m)` `(?-im)`
+ // Args[0] - flags (OpString)
+ OpFlagOnlyGroup
+
+ // OpComment is a group-like regexp comment expression.
+ // Examples: `(?#text)` `(?#)`
+ OpComment
+
+ // OpNone2 is a sentinel value that is never part of the AST.
+ // OpNone and OpNone2 can be used to cover all ops in a range.
+ OpNone2
+)
+
+const (
+ FormDefault Form = iota
+ FormEscapeHexFull
+ FormEscapeUniFull
+ FormNamedCaptureAngle
+ FormNamedCaptureQuote
+ FormQuoteUnclosed
+)
diff --git a/vendor/github.com/quasilyte/regex/syntax/operation_string.go b/vendor/github.com/quasilyte/regex/syntax/operation_string.go
new file mode 100644
index 000000000..b78e9ac5d
--- /dev/null
+++ b/vendor/github.com/quasilyte/regex/syntax/operation_string.go
@@ -0,0 +1,59 @@
+// Code generated by "stringer -type=Operation -trimprefix=Op"; DO NOT EDIT.
+
+package syntax
+
+import "strconv"
+
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[OpNone-0]
+ _ = x[OpConcat-1]
+ _ = x[OpDot-2]
+ _ = x[OpAlt-3]
+ _ = x[OpStar-4]
+ _ = x[OpPlus-5]
+ _ = x[OpQuestion-6]
+ _ = x[OpNonGreedy-7]
+ _ = x[OpPossessive-8]
+ _ = x[OpCaret-9]
+ _ = x[OpDollar-10]
+ _ = x[OpLiteral-11]
+ _ = x[OpChar-12]
+ _ = x[OpString-13]
+ _ = x[OpQuote-14]
+ _ = x[OpEscapeChar-15]
+ _ = x[OpEscapeMeta-16]
+ _ = x[OpEscapeOctal-17]
+ _ = x[OpEscapeHex-18]
+ _ = x[OpEscapeUni-19]
+ _ = x[OpCharClass-20]
+ _ = x[OpNegCharClass-21]
+ _ = x[OpCharRange-22]
+ _ = x[OpPosixClass-23]
+ _ = x[OpRepeat-24]
+ _ = x[OpCapture-25]
+ _ = x[OpNamedCapture-26]
+ _ = x[OpGroup-27]
+ _ = x[OpGroupWithFlags-28]
+ _ = x[OpAtomicGroup-29]
+ _ = x[OpPositiveLookahead-30]
+ _ = x[OpNegativeLookahead-31]
+ _ = x[OpPositiveLookbehind-32]
+ _ = x[OpNegativeLookbehind-33]
+ _ = x[OpFlagOnlyGroup-34]
+ _ = x[OpComment-35]
+ _ = x[OpNone2-36]
+}
+
+const _Operation_name = "NoneConcatDotAltStarPlusQuestionNonGreedyPossessiveCaretDollarLiteralCharStringQuoteEscapeCharEscapeMetaEscapeOctalEscapeHexEscapeUniCharClassNegCharClassCharRangePosixClassRepeatCaptureNamedCaptureGroupGroupWithFlagsAtomicGroupPositiveLookaheadNegativeLookaheadPositiveLookbehindNegativeLookbehindFlagOnlyGroupCommentNone2"
+
+var _Operation_index = [...]uint16{0, 4, 10, 13, 16, 20, 24, 32, 41, 51, 56, 62, 69, 73, 79, 84, 94, 104, 115, 124, 133, 142, 154, 163, 173, 179, 186, 198, 203, 217, 228, 245, 262, 280, 298, 311, 318, 323}
+
+func (i Operation) String() string {
+ if i >= Operation(len(_Operation_index)-1) {
+ return "Operation(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _Operation_name[_Operation_index[i]:_Operation_index[i+1]]
+}
diff --git a/vendor/github.com/quasilyte/regex/syntax/parser.go b/vendor/github.com/quasilyte/regex/syntax/parser.go
new file mode 100644
index 000000000..c540ac593
--- /dev/null
+++ b/vendor/github.com/quasilyte/regex/syntax/parser.go
@@ -0,0 +1,503 @@
+package syntax
+
+import (
+ "errors"
+ "strings"
+)
+
+type ParserOptions struct {
+ // NoLiterals disables OpChar merging into OpLiteral.
+ NoLiterals bool
+}
+
+func NewParser(opts *ParserOptions) *Parser {
+ return newParser(opts)
+}
+
+type Parser struct {
+ out Regexp
+ lexer lexer
+ exprPool []Expr
+
+ prefixParselets [256]prefixParselet
+ infixParselets [256]infixParselet
+
+ charClass []Expr
+ allocated uint
+
+ opts ParserOptions
+}
+
+// ParsePCRE parses PHP-style pattern with delimiters.
+// An example of such pattern is `/foo/i`.
+func (p *Parser) ParsePCRE(pattern string) (*RegexpPCRE, error) {
+ pcre, err := p.newPCRE(pattern)
+ if err != nil {
+ return nil, err
+ }
+ if pcre.HasModifier('x') {
+ return nil, errors.New("'x' modifier is not supported")
+ }
+ re, err := p.Parse(pcre.Pattern)
+ if re != nil {
+ pcre.Expr = re.Expr
+ }
+ return pcre, err
+}
+
+func (p *Parser) Parse(pattern string) (result *Regexp, err error) {
+ defer func() {
+ r := recover()
+ if r == nil {
+ return
+ }
+ if err2, ok := r.(ParseError); ok {
+ err = err2
+ return
+ }
+ panic(r)
+ }()
+
+ p.lexer.Init(pattern)
+ p.allocated = 0
+ p.out.Pattern = pattern
+ if pattern == "" {
+ p.out.Expr = *p.newExpr(OpConcat, Position{})
+ } else {
+ p.out.Expr = *p.parseExpr(0)
+ }
+
+ if !p.opts.NoLiterals {
+ p.mergeChars(&p.out.Expr)
+ }
+ p.setValues(&p.out.Expr)
+
+ return &p.out, nil
+}
+
+type prefixParselet func(token) *Expr
+
+type infixParselet func(*Expr, token) *Expr
+
+func newParser(opts *ParserOptions) *Parser {
+ var p Parser
+
+ if opts != nil {
+ p.opts = *opts
+ }
+ p.exprPool = make([]Expr, 256)
+
+ for tok, op := range tok2op {
+ if op != 0 {
+ p.prefixParselets[tokenKind(tok)] = p.parsePrefixElementary
+ }
+ }
+
+ p.prefixParselets[tokQ] = func(tok token) *Expr {
+ litPos := tok.pos
+ litPos.Begin += uint16(len(`\Q`))
+ form := FormQuoteUnclosed
+ if strings.HasSuffix(p.tokenValue(tok), `\E`) {
+ litPos.End -= uint16(len(`\E`))
+ form = FormDefault
+ }
+ lit := p.newExpr(OpString, litPos)
+ return p.newExprForm(OpQuote, form, tok.pos, lit)
+ }
+
+ p.prefixParselets[tokEscapeHexFull] = func(tok token) *Expr {
+ litPos := tok.pos
+ litPos.Begin += uint16(len(`\x{`))
+ litPos.End -= uint16(len(`}`))
+ lit := p.newExpr(OpString, litPos)
+ return p.newExprForm(OpEscapeHex, FormEscapeHexFull, tok.pos, lit)
+ }
+ p.prefixParselets[tokEscapeUniFull] = func(tok token) *Expr {
+ litPos := tok.pos
+ litPos.Begin += uint16(len(`\p{`))
+ litPos.End -= uint16(len(`}`))
+ lit := p.newExpr(OpString, litPos)
+ return p.newExprForm(OpEscapeUni, FormEscapeUniFull, tok.pos, lit)
+ }
+
+ p.prefixParselets[tokEscapeHex] = func(tok token) *Expr { return p.parseEscape(OpEscapeHex, `\x`, tok) }
+ p.prefixParselets[tokEscapeOctal] = func(tok token) *Expr { return p.parseEscape(OpEscapeOctal, `\`, tok) }
+ p.prefixParselets[tokEscapeChar] = func(tok token) *Expr { return p.parseEscape(OpEscapeChar, `\`, tok) }
+ p.prefixParselets[tokEscapeMeta] = func(tok token) *Expr { return p.parseEscape(OpEscapeMeta, `\`, tok) }
+ p.prefixParselets[tokEscapeUni] = func(tok token) *Expr { return p.parseEscape(OpEscapeUni, `\p`, tok) }
+
+ p.prefixParselets[tokLparen] = func(tok token) *Expr { return p.parseGroup(OpCapture, tok) }
+ p.prefixParselets[tokLparenAtomic] = func(tok token) *Expr { return p.parseGroup(OpAtomicGroup, tok) }
+ p.prefixParselets[tokLparenPositiveLookahead] = func(tok token) *Expr { return p.parseGroup(OpPositiveLookahead, tok) }
+ p.prefixParselets[tokLparenNegativeLookahead] = func(tok token) *Expr { return p.parseGroup(OpNegativeLookahead, tok) }
+ p.prefixParselets[tokLparenPositiveLookbehind] = func(tok token) *Expr { return p.parseGroup(OpPositiveLookbehind, tok) }
+ p.prefixParselets[tokLparenNegativeLookbehind] = func(tok token) *Expr { return p.parseGroup(OpNegativeLookbehind, tok) }
+
+ p.prefixParselets[tokLparenName] = func(tok token) *Expr {
+ return p.parseNamedCapture(FormDefault, tok)
+ }
+ p.prefixParselets[tokLparenNameAngle] = func(tok token) *Expr {
+ return p.parseNamedCapture(FormNamedCaptureAngle, tok)
+ }
+ p.prefixParselets[tokLparenNameQuote] = func(tok token) *Expr {
+ return p.parseNamedCapture(FormNamedCaptureQuote, tok)
+ }
+
+ p.prefixParselets[tokLparenFlags] = p.parseGroupWithFlags
+
+ p.prefixParselets[tokPipe] = func(tok token) *Expr {
+ // We need prefix pipe parselet to handle `(|x)` syntax.
+ right := p.parseExpr(1)
+ return p.newExpr(OpAlt, tok.pos, p.newEmpty(tok.pos), right)
+ }
+ p.prefixParselets[tokLbracket] = func(tok token) *Expr {
+ return p.parseCharClass(OpCharClass, tok)
+ }
+ p.prefixParselets[tokLbracketCaret] = func(tok token) *Expr {
+ return p.parseCharClass(OpNegCharClass, tok)
+ }
+
+ p.infixParselets[tokRepeat] = func(left *Expr, tok token) *Expr {
+ repeatLit := p.newExpr(OpString, tok.pos)
+ return p.newExpr(OpRepeat, combinePos(left.Pos, tok.pos), left, repeatLit)
+ }
+ p.infixParselets[tokStar] = func(left *Expr, tok token) *Expr {
+ return p.newExpr(OpStar, combinePos(left.Pos, tok.pos), left)
+ }
+ p.infixParselets[tokConcat] = func(left *Expr, tok token) *Expr {
+ right := p.parseExpr(2)
+ if left.Op == OpConcat {
+ left.Args = append(left.Args, *right)
+ left.Pos.End = right.End()
+ return left
+ }
+ return p.newExpr(OpConcat, combinePos(left.Pos, right.Pos), left, right)
+ }
+ p.infixParselets[tokPipe] = p.parseAlt
+ p.infixParselets[tokMinus] = p.parseMinus
+ p.infixParselets[tokPlus] = p.parsePlus
+ p.infixParselets[tokQuestion] = p.parseQuestion
+
+ return &p
+}
+
+func (p *Parser) setValues(e *Expr) {
+ for i := range e.Args {
+ p.setValues(&e.Args[i])
+ }
+ e.Value = p.exprValue(e)
+}
+
+func (p *Parser) tokenValue(tok token) string {
+ return p.out.Pattern[tok.pos.Begin:tok.pos.End]
+}
+
+func (p *Parser) exprValue(e *Expr) string {
+ return p.out.Pattern[e.Begin():e.End()]
+}
+
+func (p *Parser) mergeChars(e *Expr) {
+ for i := range e.Args {
+ p.mergeChars(&e.Args[i])
+ }
+ if e.Op != OpConcat || len(e.Args) < 2 {
+ return
+ }
+
+ args := e.Args[:0]
+ i := 0
+ for i < len(e.Args) {
+ first := i
+ chars := 0
+ for j := i; j < len(e.Args) && e.Args[j].Op == OpChar; j++ {
+ chars++
+ }
+ if chars > 1 {
+ c1 := e.Args[first]
+ c2 := e.Args[first+chars-1]
+ lit := p.newExpr(OpLiteral, combinePos(c1.Pos, c2.Pos))
+ for j := 0; j < chars; j++ {
+ lit.Args = append(lit.Args, e.Args[first+j])
+ }
+ args = append(args, *lit)
+ i += chars
+ } else {
+ args = append(args, e.Args[i])
+ i++
+ }
+ }
+ if len(args) == 1 {
+ *e = args[0] // Turn OpConcat into OpLiteral
+ } else {
+ e.Args = args
+ }
+}
+
+func (p *Parser) newEmpty(pos Position) *Expr {
+ return p.newExpr(OpConcat, pos)
+}
+
+func (p *Parser) newExprForm(op Operation, form Form, pos Position, args ...*Expr) *Expr {
+ e := p.newExpr(op, pos, args...)
+ e.Form = form
+ return e
+}
+
+func (p *Parser) newExpr(op Operation, pos Position, args ...*Expr) *Expr {
+ e := p.allocExpr()
+ *e = Expr{
+ Op: op,
+ Pos: pos,
+ Args: e.Args[:0],
+ }
+ for _, arg := range args {
+ e.Args = append(e.Args, *arg)
+ }
+ return e
+}
+
+func (p *Parser) allocExpr() *Expr {
+ i := p.allocated
+ if i < uint(len(p.exprPool)) {
+ p.allocated++
+ return &p.exprPool[i]
+ }
+ return &Expr{}
+}
+
+func (p *Parser) expect(kind tokenKind) Position {
+ tok := p.lexer.NextToken()
+ if tok.kind != kind {
+ throwExpectedFound(tok.pos, kind.String(), tok.kind.String())
+ }
+ return tok.pos
+}
+
+func (p *Parser) parseExpr(precedence int) *Expr {
+ tok := p.lexer.NextToken()
+ prefix := p.prefixParselets[tok.kind]
+ if prefix == nil {
+ throwUnexpectedToken(tok.pos, tok.String())
+ }
+ left := prefix(tok)
+
+ for precedence < p.precedenceOf(p.lexer.Peek()) {
+ tok := p.lexer.NextToken()
+ infix := p.infixParselets[tok.kind]
+ left = infix(left, tok)
+ }
+
+ return left
+}
+
+func (p *Parser) parsePrefixElementary(tok token) *Expr {
+ return p.newExpr(tok2op[tok.kind], tok.pos)
+}
+
+func (p *Parser) parseCharClass(op Operation, tok token) *Expr {
+ var endPos Position
+ p.charClass = p.charClass[:0]
+ for {
+ p.charClass = append(p.charClass, *p.parseExpr(0))
+ next := p.lexer.Peek()
+ if next.kind == tokRbracket {
+ endPos = next.pos
+ p.lexer.NextToken()
+ break
+ }
+ if next.kind == tokNone {
+ throw(tok.pos, "unterminated '['")
+ }
+ }
+
+ result := p.newExpr(op, combinePos(tok.pos, endPos))
+ result.Args = append(result.Args, p.charClass...)
+ return result
+}
+
+func (p *Parser) parseMinus(left *Expr, tok token) *Expr {
+ if p.isValidCharRangeOperand(left) {
+ if p.lexer.Peek().kind != tokRbracket {
+ right := p.parseExpr(2)
+ return p.newExpr(OpCharRange, combinePos(left.Pos, right.Pos), left, right)
+ }
+ }
+ p.charClass = append(p.charClass, *left)
+ return p.newExpr(OpChar, tok.pos)
+}
+
+func (p *Parser) isValidCharRangeOperand(e *Expr) bool {
+ switch e.Op {
+ case OpEscapeHex, OpEscapeOctal, OpEscapeMeta, OpChar:
+ return true
+ case OpEscapeChar:
+ switch p.exprValue(e) {
+ case `\\`, `\|`, `\*`, `\+`, `\?`, `\.`, `\[`, `\^`, `\$`, `\(`, `\)`:
+ return true
+ }
+ }
+ return false
+}
+
+func (p *Parser) parsePlus(left *Expr, tok token) *Expr {
+ op := OpPlus
+ switch left.Op {
+ case OpPlus, OpStar, OpQuestion, OpRepeat:
+ op = OpPossessive
+ }
+ return p.newExpr(op, combinePos(left.Pos, tok.pos), left)
+}
+
+func (p *Parser) parseQuestion(left *Expr, tok token) *Expr {
+ op := OpQuestion
+ switch left.Op {
+ case OpPlus, OpStar, OpQuestion, OpRepeat:
+ op = OpNonGreedy
+ }
+ return p.newExpr(op, combinePos(left.Pos, tok.pos), left)
+}
+
+func (p *Parser) parseAlt(left *Expr, tok token) *Expr {
+ var right *Expr
+ switch p.lexer.Peek().kind {
+ case tokRparen, tokNone:
+ // This is needed to handle `(x|)` syntax.
+ right = p.newEmpty(tok.pos)
+ default:
+ right = p.parseExpr(1)
+ }
+ if left.Op == OpAlt {
+ left.Args = append(left.Args, *right)
+ left.Pos.End = right.End()
+ return left
+ }
+ return p.newExpr(OpAlt, combinePos(left.Pos, right.Pos), left, right)
+}
+
+func (p *Parser) parseGroupItem(tok token) *Expr {
+ if p.lexer.Peek().kind == tokRparen {
+ // This is needed to handle `() syntax.`
+ return p.newEmpty(tok.pos)
+ }
+ return p.parseExpr(0)
+}
+
+func (p *Parser) parseGroup(op Operation, tok token) *Expr {
+ x := p.parseGroupItem(tok)
+ result := p.newExpr(op, tok.pos, x)
+ result.Pos.End = p.expect(tokRparen).End
+ return result
+}
+
+func (p *Parser) parseNamedCapture(form Form, tok token) *Expr {
+ prefixLen := len("(?<")
+ if form == FormDefault {
+ prefixLen = len("(?P<")
+ }
+ name := p.newExpr(OpString, Position{
+ Begin: tok.pos.Begin + uint16(prefixLen),
+ End: tok.pos.End - uint16(len(">")),
+ })
+ x := p.parseGroupItem(tok)
+ result := p.newExprForm(OpNamedCapture, form, tok.pos, x, name)
+ result.Pos.End = p.expect(tokRparen).End
+ return result
+}
+
+func (p *Parser) parseGroupWithFlags(tok token) *Expr {
+ var result *Expr
+ val := p.out.Pattern[tok.pos.Begin+1 : tok.pos.End]
+ switch {
+ case !strings.HasSuffix(val, ":"):
+ flags := p.newExpr(OpString, Position{
+ Begin: tok.pos.Begin + uint16(len("(?")),
+ End: tok.pos.End,
+ })
+ result = p.newExpr(OpFlagOnlyGroup, tok.pos, flags)
+ case val == "?:":
+ x := p.parseGroupItem(tok)
+ result = p.newExpr(OpGroup, tok.pos, x)
+ default:
+ flags := p.newExpr(OpString, Position{
+ Begin: tok.pos.Begin + uint16(len("(?")),
+ End: tok.pos.End - uint16(len(":")),
+ })
+ x := p.parseGroupItem(tok)
+ result = p.newExpr(OpGroupWithFlags, tok.pos, x, flags)
+ }
+ result.Pos.End = p.expect(tokRparen).End
+ return result
+}
+
+func (p *Parser) parseEscape(op Operation, prefix string, tok token) *Expr {
+ litPos := tok.pos
+ litPos.Begin += uint16(len(prefix))
+ lit := p.newExpr(OpString, litPos)
+ return p.newExpr(op, tok.pos, lit)
+}
+
+func (p *Parser) precedenceOf(tok token) int {
+ switch tok.kind {
+ case tokPipe:
+ return 1
+ case tokConcat, tokMinus:
+ return 2
+ case tokPlus, tokStar, tokQuestion, tokRepeat:
+ return 3
+ default:
+ return 0
+ }
+}
+
+func (p *Parser) newPCRE(source string) (*RegexpPCRE, error) {
+ if source == "" {
+ return nil, errors.New("empty pattern: can't find delimiters")
+ }
+
+ delim := source[0]
+ endDelim := delim
+ switch delim {
+ case '(':
+ endDelim = ')'
+ case '{':
+ endDelim = '}'
+ case '[':
+ endDelim = ']'
+ case '<':
+ endDelim = '>'
+ case '\\':
+ return nil, errors.New("'\\' is not a valid delimiter")
+ default:
+ if isSpace(delim) {
+ return nil, errors.New("whitespace is not a valid delimiter")
+ }
+ if isAlphanumeric(delim) {
+ return nil, errors.New("'" + string(delim) + "' is not a valid delimiter")
+ }
+ }
+
+ const delimLen = 1
+ j := strings.LastIndexByte(source[delimLen:], endDelim)
+ if j == -1 {
+ return nil, errors.New("can't find '" + string(endDelim) + "' ending delimiter")
+ }
+ j += delimLen
+
+ pcre := &RegexpPCRE{
+ Pattern: source[1:j],
+ Source: source,
+ Delim: [2]byte{delim, endDelim},
+ Modifiers: source[j+1:],
+ }
+ return pcre, nil
+}
+
+var tok2op = [256]Operation{
+ tokDollar: OpDollar,
+ tokCaret: OpCaret,
+ tokDot: OpDot,
+ tokChar: OpChar,
+ tokMinus: OpChar,
+ tokPosixClass: OpPosixClass,
+ tokComment: OpComment,
+}
diff --git a/vendor/github.com/quasilyte/regex/syntax/pos.go b/vendor/github.com/quasilyte/regex/syntax/pos.go
new file mode 100644
index 000000000..51bdbf87a
--- /dev/null
+++ b/vendor/github.com/quasilyte/regex/syntax/pos.go
@@ -0,0 +1,10 @@
+package syntax
+
+type Position struct {
+ Begin uint16
+ End uint16
+}
+
+func combinePos(begin, end Position) Position {
+ return Position{Begin: begin.Begin, End: end.End}
+}
diff --git a/vendor/github.com/quasilyte/regex/syntax/tokenkind_string.go b/vendor/github.com/quasilyte/regex/syntax/tokenkind_string.go
new file mode 100644
index 000000000..8800436bc
--- /dev/null
+++ b/vendor/github.com/quasilyte/regex/syntax/tokenkind_string.go
@@ -0,0 +1,59 @@
+// Code generated by "stringer -type=tokenKind -trimprefix=tok -linecomment=true"; DO NOT EDIT.
+
+package syntax
+
+import "strconv"
+
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[tokNone-0]
+ _ = x[tokChar-1]
+ _ = x[tokGroupFlags-2]
+ _ = x[tokPosixClass-3]
+ _ = x[tokConcat-4]
+ _ = x[tokRepeat-5]
+ _ = x[tokEscapeChar-6]
+ _ = x[tokEscapeMeta-7]
+ _ = x[tokEscapeOctal-8]
+ _ = x[tokEscapeUni-9]
+ _ = x[tokEscapeUniFull-10]
+ _ = x[tokEscapeHex-11]
+ _ = x[tokEscapeHexFull-12]
+ _ = x[tokComment-13]
+ _ = x[tokQ-14]
+ _ = x[tokMinus-15]
+ _ = x[tokLbracket-16]
+ _ = x[tokLbracketCaret-17]
+ _ = x[tokRbracket-18]
+ _ = x[tokDollar-19]
+ _ = x[tokCaret-20]
+ _ = x[tokQuestion-21]
+ _ = x[tokDot-22]
+ _ = x[tokPlus-23]
+ _ = x[tokStar-24]
+ _ = x[tokPipe-25]
+ _ = x[tokLparen-26]
+ _ = x[tokLparenName-27]
+ _ = x[tokLparenNameAngle-28]
+ _ = x[tokLparenNameQuote-29]
+ _ = x[tokLparenFlags-30]
+ _ = x[tokLparenAtomic-31]
+ _ = x[tokLparenPositiveLookahead-32]
+ _ = x[tokLparenPositiveLookbehind-33]
+ _ = x[tokLparenNegativeLookahead-34]
+ _ = x[tokLparenNegativeLookbehind-35]
+ _ = x[tokRparen-36]
+}
+
+const _tokenKind_name = "NoneCharGroupFlagsPosixClassConcatRepeatEscapeCharEscapeMetaEscapeOctalEscapeUniEscapeUniFullEscapeHexEscapeHexFullComment\\Q-[[^]$^?.+*|((?P<name>(?<name>(?'name'(?flags(?>(?=(?<=(?!(?<!)"
+
+var _tokenKind_index = [...]uint8{0, 4, 8, 18, 28, 34, 40, 50, 60, 71, 80, 93, 102, 115, 122, 124, 125, 126, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 146, 154, 162, 169, 172, 175, 179, 182, 186, 187}
+
+func (i tokenKind) String() string {
+ if i >= tokenKind(len(_tokenKind_index)-1) {
+ return "tokenKind(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _tokenKind_name[_tokenKind_index[i]:_tokenKind_index[i+1]]
+}
diff --git a/vendor/github.com/quasilyte/regex/syntax/utils.go b/vendor/github.com/quasilyte/regex/syntax/utils.go
new file mode 100644
index 000000000..e5b654825
--- /dev/null
+++ b/vendor/github.com/quasilyte/regex/syntax/utils.go
@@ -0,0 +1,30 @@
+package syntax
+
+func isSpace(ch byte) bool {
+ switch ch {
+ case '\r', '\n', '\t', '\f', '\v', ' ':
+ return true
+ default:
+ return false
+ }
+}
+
+func isAlphanumeric(ch byte) bool {
+ return (ch >= 'a' && ch <= 'z') ||
+ (ch >= 'A' && ch <= 'Z') ||
+ (ch >= '0' && ch <= '9')
+}
+
+func isDigit(ch byte) bool {
+ return ch >= '0' && ch <= '9'
+}
+
+func isOctalDigit(ch byte) bool {
+ return ch >= '0' && ch <= '7'
+}
+
+func isHexDigit(ch byte) bool {
+ return (ch >= '0' && ch <= '9') ||
+ (ch >= 'a' && ch <= 'f') ||
+ (ch >= 'A' && ch <= 'F')
+}