diff options
| author | Dmitry Vyukov <dvyukov@google.com> | 2020-07-04 11:12:55 +0200 |
|---|---|---|
| committer | Dmitry Vyukov <dvyukov@google.com> | 2020-07-04 15:05:30 +0200 |
| commit | c7d7f10bdff703e4a3c0414e8a33d4e45c91eb35 (patch) | |
| tree | 0dff0ee1f98dbfa3ad8776112053a450d176592b /vendor/github.com/golangci/dupl/suffixtree | |
| parent | 9573094ce235bd9afe88f5da27a47dd6bcc1e13b (diff) | |
go.mod: vendor golangci-lint
Diffstat (limited to 'vendor/github.com/golangci/dupl/suffixtree')
| -rw-r--r-- | vendor/github.com/golangci/dupl/suffixtree/dupl.go | 98 | ||||
| -rw-r--r-- | vendor/github.com/golangci/dupl/suffixtree/suffixtree.go | 216 |
2 files changed, 314 insertions, 0 deletions
diff --git a/vendor/github.com/golangci/dupl/suffixtree/dupl.go b/vendor/github.com/golangci/dupl/suffixtree/dupl.go new file mode 100644 index 000000000..ab145b4f3 --- /dev/null +++ b/vendor/github.com/golangci/dupl/suffixtree/dupl.go @@ -0,0 +1,98 @@ +package suffixtree + +import "sort" + +type Match struct { + Ps []Pos + Len Pos +} + +type posList struct { + positions []Pos +} + +func newPosList() *posList { + return &posList{make([]Pos, 0)} +} + +func (p *posList) append(p2 *posList) { + p.positions = append(p.positions, p2.positions...) +} + +func (p *posList) add(pos Pos) { + p.positions = append(p.positions, pos) +} + +type contextList struct { + lists map[int]*posList +} + +func newContextList() *contextList { + return &contextList{make(map[int]*posList)} +} + +func (c *contextList) getAll() []Pos { + keys := make([]int, 0, len(c.lists)) + for k := range c.lists { + keys = append(keys, k) + } + sort.Ints(keys) + var ps []Pos + for _, k := range keys { + ps = append(ps, c.lists[k].positions...) + } + return ps +} + +func (c *contextList) append(c2 *contextList) { + for lc, pl := range c2.lists { + if _, ok := c.lists[lc]; ok { + c.lists[lc].append(pl) + } else { + c.lists[lc] = pl + } + } +} + +// FindDuplOver find pairs of maximal duplicities over a threshold +// length. +func (t *STree) FindDuplOver(threshold int) <-chan Match { + auxTran := newTran(0, 0, t.root) + ch := make(chan Match) + go func() { + walkTrans(auxTran, 0, threshold, ch) + close(ch) + }() + return ch +} + +func walkTrans(parent *tran, length, threshold int, ch chan<- Match) *contextList { + s := parent.state + + cl := newContextList() + + if len(s.trans) == 0 { + pl := newPosList() + start := parent.end + 1 - Pos(length) + pl.add(start) + ch := 0 + if start > 0 { + ch = s.tree.data[start-1].Val() + } + cl.lists[ch] = pl + return cl + } + + for _, t := range s.trans { + ln := length + t.len() + cl2 := walkTrans(t, ln, threshold, ch) + if ln >= threshold { + cl.append(cl2) + } + } + if length >= threshold && len(cl.lists) > 1 { + m := Match{cl.getAll(), Pos(length)} + ch <- m + } + return cl +} diff --git a/vendor/github.com/golangci/dupl/suffixtree/suffixtree.go b/vendor/github.com/golangci/dupl/suffixtree/suffixtree.go new file mode 100644 index 000000000..738015025 --- /dev/null +++ b/vendor/github.com/golangci/dupl/suffixtree/suffixtree.go @@ -0,0 +1,216 @@ +package suffixtree + +import ( + "bytes" + "fmt" + "math" + "strings" +) + +const infinity = math.MaxInt32 + +// Pos denotes position in data slice. +type Pos int32 + +type Token interface { + Val() int +} + +// STree is a struct representing a suffix tree. +type STree struct { + data []Token + root *state + auxState *state // auxiliary state + + // active point + s *state + start, end Pos +} + +// New creates new suffix tree. +func New() *STree { + t := new(STree) + t.data = make([]Token, 0, 50) + t.root = newState(t) + t.auxState = newState(t) + t.root.linkState = t.auxState + t.s = t.root + return t +} + +// Update refreshes the suffix tree to by new data. +func (t *STree) Update(data ...Token) { + t.data = append(t.data, data...) + for _ = range data { + t.update() + t.s, t.start = t.canonize(t.s, t.start, t.end) + t.end++ + } +} + +// update transforms suffix tree T(n) to T(n+1). +func (t *STree) update() { + oldr := t.root + + // (s, (start, end)) is the canonical reference pair for the active point + s := t.s + start, end := t.start, t.end + var r *state + for { + var endPoint bool + r, endPoint = t.testAndSplit(s, start, end-1) + if endPoint { + break + } + r.fork(end) + if oldr != t.root { + oldr.linkState = r + } + oldr = r + s, start = t.canonize(s.linkState, start, end-1) + } + if oldr != t.root { + oldr.linkState = r + } + + // update active point + t.s = s + t.start = start +} + +// testAndSplit tests whether a state with canonical ref. pair +// (s, (start, end)) is the end point, that is, a state that have +// a c-transition. If not, then state (exs, (start, end)) is made +// explicit (if not already so). +func (t *STree) testAndSplit(s *state, start, end Pos) (exs *state, endPoint bool) { + c := t.data[t.end] + if start <= end { + tr := s.findTran(t.data[start]) + splitPoint := tr.start + end - start + 1 + if t.data[splitPoint].Val() == c.Val() { + return s, true + } + // make the (s, (start, end)) state explicit + newSt := newState(s.tree) + newSt.addTran(splitPoint, tr.end, tr.state) + tr.end = splitPoint - 1 + tr.state = newSt + return newSt, false + } + if s == t.auxState || s.findTran(c) != nil { + return s, true + } + return s, false +} + +// canonize returns updated state and start position for ref. pair +// (s, (start, end)) of state r so the new ref. pair is canonical, +// that is, referenced from the closest explicit ancestor of r. +func (t *STree) canonize(s *state, start, end Pos) (*state, Pos) { + if s == t.auxState { + s, start = t.root, start+1 + } + if start > end { + return s, start + } + + var tr *tran + for { + if start <= end { + tr = s.findTran(t.data[start]) + if tr == nil { + panic(fmt.Sprintf("there should be some transition for '%d' at %d", + t.data[start].Val(), start)) + } + } + if tr.end-tr.start > end-start { + break + } + start += tr.end - tr.start + 1 + s = tr.state + } + if s == nil { + panic("there should always be some suffix link resolution") + } + return s, start +} + +func (t *STree) At(p Pos) Token { + if p < 0 || p >= Pos(len(t.data)) { + panic("position out of bounds") + } + return t.data[p] +} + +func (t *STree) String() string { + buf := new(bytes.Buffer) + printState(buf, t.root, 0) + return buf.String() +} + +func printState(buf *bytes.Buffer, s *state, ident int) { + for _, tr := range s.trans { + fmt.Fprint(buf, strings.Repeat(" ", ident)) + fmt.Fprintf(buf, "* (%d, %d)\n", tr.start, tr.ActEnd()) + printState(buf, tr.state, ident+1) + } +} + +// state is an explicit state of the suffix tree. +type state struct { + tree *STree + trans []*tran + linkState *state +} + +func newState(t *STree) *state { + return &state{ + tree: t, + trans: make([]*tran, 0), + linkState: nil, + } +} + +func (s *state) addTran(start, end Pos, r *state) { + s.trans = append(s.trans, newTran(start, end, r)) +} + +// fork creates a new branch from the state s. +func (s *state) fork(i Pos) *state { + r := newState(s.tree) + s.addTran(i, infinity, r) + return r +} + +// findTran finds c-transition. +func (s *state) findTran(c Token) *tran { + for _, tran := range s.trans { + if s.tree.data[tran.start].Val() == c.Val() { + return tran + } + } + return nil +} + +// tran represents a state's transition. +type tran struct { + start, end Pos + state *state +} + +func newTran(start, end Pos, s *state) *tran { + return &tran{start, end, s} +} + +func (t *tran) len() int { + return int(t.end - t.start + 1) +} + +// ActEnd returns actual end position as consistent with +// the actual length of the data in the STree. +func (t *tran) ActEnd() Pos { + if t.end == infinity { + return Pos(len(t.state.tree.data)) - 1 + } + return t.end +} |
