aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/golangci/dupl/suffixtree
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2020-07-04 11:12:55 +0200
committerDmitry Vyukov <dvyukov@google.com>2020-07-04 15:05:30 +0200
commitc7d7f10bdff703e4a3c0414e8a33d4e45c91eb35 (patch)
tree0dff0ee1f98dbfa3ad8776112053a450d176592b /vendor/github.com/golangci/dupl/suffixtree
parent9573094ce235bd9afe88f5da27a47dd6bcc1e13b (diff)
go.mod: vendor golangci-lint
Diffstat (limited to 'vendor/github.com/golangci/dupl/suffixtree')
-rw-r--r--vendor/github.com/golangci/dupl/suffixtree/dupl.go98
-rw-r--r--vendor/github.com/golangci/dupl/suffixtree/suffixtree.go216
2 files changed, 314 insertions, 0 deletions
diff --git a/vendor/github.com/golangci/dupl/suffixtree/dupl.go b/vendor/github.com/golangci/dupl/suffixtree/dupl.go
new file mode 100644
index 000000000..ab145b4f3
--- /dev/null
+++ b/vendor/github.com/golangci/dupl/suffixtree/dupl.go
@@ -0,0 +1,98 @@
+package suffixtree
+
+import "sort"
+
+type Match struct {
+ Ps []Pos
+ Len Pos
+}
+
+type posList struct {
+ positions []Pos
+}
+
+func newPosList() *posList {
+ return &posList{make([]Pos, 0)}
+}
+
+func (p *posList) append(p2 *posList) {
+ p.positions = append(p.positions, p2.positions...)
+}
+
+func (p *posList) add(pos Pos) {
+ p.positions = append(p.positions, pos)
+}
+
+type contextList struct {
+ lists map[int]*posList
+}
+
+func newContextList() *contextList {
+ return &contextList{make(map[int]*posList)}
+}
+
+func (c *contextList) getAll() []Pos {
+ keys := make([]int, 0, len(c.lists))
+ for k := range c.lists {
+ keys = append(keys, k)
+ }
+ sort.Ints(keys)
+ var ps []Pos
+ for _, k := range keys {
+ ps = append(ps, c.lists[k].positions...)
+ }
+ return ps
+}
+
+func (c *contextList) append(c2 *contextList) {
+ for lc, pl := range c2.lists {
+ if _, ok := c.lists[lc]; ok {
+ c.lists[lc].append(pl)
+ } else {
+ c.lists[lc] = pl
+ }
+ }
+}
+
+// FindDuplOver find pairs of maximal duplicities over a threshold
+// length.
+func (t *STree) FindDuplOver(threshold int) <-chan Match {
+ auxTran := newTran(0, 0, t.root)
+ ch := make(chan Match)
+ go func() {
+ walkTrans(auxTran, 0, threshold, ch)
+ close(ch)
+ }()
+ return ch
+}
+
+func walkTrans(parent *tran, length, threshold int, ch chan<- Match) *contextList {
+ s := parent.state
+
+ cl := newContextList()
+
+ if len(s.trans) == 0 {
+ pl := newPosList()
+ start := parent.end + 1 - Pos(length)
+ pl.add(start)
+ ch := 0
+ if start > 0 {
+ ch = s.tree.data[start-1].Val()
+ }
+ cl.lists[ch] = pl
+ return cl
+ }
+
+ for _, t := range s.trans {
+ ln := length + t.len()
+ cl2 := walkTrans(t, ln, threshold, ch)
+ if ln >= threshold {
+ cl.append(cl2)
+ }
+ }
+ if length >= threshold && len(cl.lists) > 1 {
+ m := Match{cl.getAll(), Pos(length)}
+ ch <- m
+ }
+ return cl
+}
diff --git a/vendor/github.com/golangci/dupl/suffixtree/suffixtree.go b/vendor/github.com/golangci/dupl/suffixtree/suffixtree.go
new file mode 100644
index 000000000..738015025
--- /dev/null
+++ b/vendor/github.com/golangci/dupl/suffixtree/suffixtree.go
@@ -0,0 +1,216 @@
+package suffixtree
+
+import (
+ "bytes"
+ "fmt"
+ "math"
+ "strings"
+)
+
+const infinity = math.MaxInt32
+
+// Pos denotes position in data slice.
+type Pos int32
+
+type Token interface {
+ Val() int
+}
+
+// STree is a struct representing a suffix tree.
+type STree struct {
+ data []Token
+ root *state
+ auxState *state // auxiliary state
+
+ // active point
+ s *state
+ start, end Pos
+}
+
+// New creates new suffix tree.
+func New() *STree {
+ t := new(STree)
+ t.data = make([]Token, 0, 50)
+ t.root = newState(t)
+ t.auxState = newState(t)
+ t.root.linkState = t.auxState
+ t.s = t.root
+ return t
+}
+
+// Update refreshes the suffix tree to by new data.
+func (t *STree) Update(data ...Token) {
+ t.data = append(t.data, data...)
+ for _ = range data {
+ t.update()
+ t.s, t.start = t.canonize(t.s, t.start, t.end)
+ t.end++
+ }
+}
+
+// update transforms suffix tree T(n) to T(n+1).
+func (t *STree) update() {
+ oldr := t.root
+
+ // (s, (start, end)) is the canonical reference pair for the active point
+ s := t.s
+ start, end := t.start, t.end
+ var r *state
+ for {
+ var endPoint bool
+ r, endPoint = t.testAndSplit(s, start, end-1)
+ if endPoint {
+ break
+ }
+ r.fork(end)
+ if oldr != t.root {
+ oldr.linkState = r
+ }
+ oldr = r
+ s, start = t.canonize(s.linkState, start, end-1)
+ }
+ if oldr != t.root {
+ oldr.linkState = r
+ }
+
+ // update active point
+ t.s = s
+ t.start = start
+}
+
+// testAndSplit tests whether a state with canonical ref. pair
+// (s, (start, end)) is the end point, that is, a state that have
+// a c-transition. If not, then state (exs, (start, end)) is made
+// explicit (if not already so).
+func (t *STree) testAndSplit(s *state, start, end Pos) (exs *state, endPoint bool) {
+ c := t.data[t.end]
+ if start <= end {
+ tr := s.findTran(t.data[start])
+ splitPoint := tr.start + end - start + 1
+ if t.data[splitPoint].Val() == c.Val() {
+ return s, true
+ }
+ // make the (s, (start, end)) state explicit
+ newSt := newState(s.tree)
+ newSt.addTran(splitPoint, tr.end, tr.state)
+ tr.end = splitPoint - 1
+ tr.state = newSt
+ return newSt, false
+ }
+ if s == t.auxState || s.findTran(c) != nil {
+ return s, true
+ }
+ return s, false
+}
+
+// canonize returns updated state and start position for ref. pair
+// (s, (start, end)) of state r so the new ref. pair is canonical,
+// that is, referenced from the closest explicit ancestor of r.
+func (t *STree) canonize(s *state, start, end Pos) (*state, Pos) {
+ if s == t.auxState {
+ s, start = t.root, start+1
+ }
+ if start > end {
+ return s, start
+ }
+
+ var tr *tran
+ for {
+ if start <= end {
+ tr = s.findTran(t.data[start])
+ if tr == nil {
+ panic(fmt.Sprintf("there should be some transition for '%d' at %d",
+ t.data[start].Val(), start))
+ }
+ }
+ if tr.end-tr.start > end-start {
+ break
+ }
+ start += tr.end - tr.start + 1
+ s = tr.state
+ }
+ if s == nil {
+ panic("there should always be some suffix link resolution")
+ }
+ return s, start
+}
+
+func (t *STree) At(p Pos) Token {
+ if p < 0 || p >= Pos(len(t.data)) {
+ panic("position out of bounds")
+ }
+ return t.data[p]
+}
+
+func (t *STree) String() string {
+ buf := new(bytes.Buffer)
+ printState(buf, t.root, 0)
+ return buf.String()
+}
+
+func printState(buf *bytes.Buffer, s *state, ident int) {
+ for _, tr := range s.trans {
+ fmt.Fprint(buf, strings.Repeat(" ", ident))
+ fmt.Fprintf(buf, "* (%d, %d)\n", tr.start, tr.ActEnd())
+ printState(buf, tr.state, ident+1)
+ }
+}
+
+// state is an explicit state of the suffix tree.
+type state struct {
+ tree *STree
+ trans []*tran
+ linkState *state
+}
+
+func newState(t *STree) *state {
+ return &state{
+ tree: t,
+ trans: make([]*tran, 0),
+ linkState: nil,
+ }
+}
+
+func (s *state) addTran(start, end Pos, r *state) {
+ s.trans = append(s.trans, newTran(start, end, r))
+}
+
+// fork creates a new branch from the state s.
+func (s *state) fork(i Pos) *state {
+ r := newState(s.tree)
+ s.addTran(i, infinity, r)
+ return r
+}
+
+// findTran finds c-transition.
+func (s *state) findTran(c Token) *tran {
+ for _, tran := range s.trans {
+ if s.tree.data[tran.start].Val() == c.Val() {
+ return tran
+ }
+ }
+ return nil
+}
+
+// tran represents a state's transition.
+type tran struct {
+ start, end Pos
+ state *state
+}
+
+func newTran(start, end Pos, s *state) *tran {
+ return &tran{start, end, s}
+}
+
+func (t *tran) len() int {
+ return int(t.end - t.start + 1)
+}
+
+// ActEnd returns actual end position as consistent with
+// the actual length of the data in the STree.
+func (t *tran) ActEnd() Pos {
+ if t.end == infinity {
+ return Pos(len(t.state.tree.data)) - 1
+ }
+ return t.end
+}