From 73f4b622a34ffc998a542f5e109fb05a1d892272 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Sun, 31 Mar 2024 12:24:45 +0200 Subject: go.mod: vendor github.com/bsm/histogram --- vendor/github.com/bsm/histogram/v3/.gitignore | 1 + vendor/github.com/bsm/histogram/v3/LICENSE | 201 ++++++++++++++++++ vendor/github.com/bsm/histogram/v3/Makefile | 10 + vendor/github.com/bsm/histogram/v3/README.md | 14 ++ vendor/github.com/bsm/histogram/v3/bins.go | 18 ++ vendor/github.com/bsm/histogram/v3/histogram.go | 266 ++++++++++++++++++++++++ 6 files changed, 510 insertions(+) create mode 100644 vendor/github.com/bsm/histogram/v3/.gitignore create mode 100644 vendor/github.com/bsm/histogram/v3/LICENSE create mode 100644 vendor/github.com/bsm/histogram/v3/Makefile create mode 100644 vendor/github.com/bsm/histogram/v3/README.md create mode 100644 vendor/github.com/bsm/histogram/v3/bins.go create mode 100644 vendor/github.com/bsm/histogram/v3/histogram.go (limited to 'vendor/github.com') diff --git a/vendor/github.com/bsm/histogram/v3/.gitignore b/vendor/github.com/bsm/histogram/v3/.gitignore new file mode 100644 index 000000000..48b8bf907 --- /dev/null +++ b/vendor/github.com/bsm/histogram/v3/.gitignore @@ -0,0 +1 @@ +vendor/ diff --git a/vendor/github.com/bsm/histogram/v3/LICENSE b/vendor/github.com/bsm/histogram/v3/LICENSE new file mode 100644 index 000000000..1625b8474 --- /dev/null +++ b/vendor/github.com/bsm/histogram/v3/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2021 Black Square Media Ltd + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/bsm/histogram/v3/Makefile b/vendor/github.com/bsm/histogram/v3/Makefile new file mode 100644 index 000000000..f9073368b --- /dev/null +++ b/vendor/github.com/bsm/histogram/v3/Makefile @@ -0,0 +1,10 @@ +default: test + +test: + go test ./... + +bench: + go test ./... -run=NONE -bench=. -benchmem + +lint: + golangci-lint run diff --git a/vendor/github.com/bsm/histogram/v3/README.md b/vendor/github.com/bsm/histogram/v3/README.md new file mode 100644 index 000000000..e52c241fd --- /dev/null +++ b/vendor/github.com/bsm/histogram/v3/README.md @@ -0,0 +1,14 @@ +# Histogram + +[![Build Status](https://travis-ci.org/bsm/histogram.svg)](https://travis-ci.org/bsm/histogram) [![GoDoc](https://godoc.org/github.com/bsm/histogram?status.svg)](https://godoc.org/github.com/bsm/histogram) + +Fast Go implementation of Ben-Haim's and Yom-Tov's streaming histogram algorithm, as described in their _A Streaming Parallel Decision Tree Algorithm_ +(2010, [PDF](http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf)) paper. + +## Documentation + +Please see the [API documentation](https://godoc.org/github.com/bsm/histogram) for package and API descriptions and examples. + +## Credits + +- Aaron Windsor - https://github.com/aaw/histosketch (released into the public domain). diff --git a/vendor/github.com/bsm/histogram/v3/bins.go b/vendor/github.com/bsm/histogram/v3/bins.go new file mode 100644 index 000000000..41e9e9252 --- /dev/null +++ b/vendor/github.com/bsm/histogram/v3/bins.go @@ -0,0 +1,18 @@ +package histogram + +import "math" + +type bin struct { + w float64 // weight + v float64 // value +} + +func (b bin) Sum() float64 { return math.Abs(b.w) * b.v } + +// ---------------------------------------------------------- + +type binSlice []bin + +func (s binSlice) Len() int { return len(s) } +func (s binSlice) Less(i, j int) bool { return s[i].v < s[j].v } +func (s binSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } diff --git a/vendor/github.com/bsm/histogram/v3/histogram.go b/vendor/github.com/bsm/histogram/v3/histogram.go new file mode 100644 index 000000000..c7c68bf74 --- /dev/null +++ b/vendor/github.com/bsm/histogram/v3/histogram.go @@ -0,0 +1,266 @@ +package histogram + +import ( + "math" + "sort" +) + +// Histogram is a probabilistic, fixed-size data structure, able to +// accommodate massive data streams while predicting distributions +// and quantiles much more accurately than a sample-based approach. +// +// Please note that a historgram is not thread-safe. All operations +// must be protected by a mutex if used across multiple goroutines. +type Histogram struct { + bins []bin + size int + weight float64 + + min, max float64 +} + +// New creates a new histogram with a maximum size. +func New(sz int) *Histogram { + h := new(Histogram) + h.Reset(sz) + return h +} + +// Reset resets the struct to its initial state with +// a specific size. +func (h *Histogram) Reset(sz int) { + if sz < cap(h.bins) { + h.bins = h.bins[:0] + } else { + h.bins = make([]bin, 0, sz+1) + } + + h.size = sz + h.min = math.NaN() + h.max = math.NaN() + h.weight = 0 +} + +// Copy copies h to x and returns x. If x is passed as nil +// a new Histogram will be inited. +func (h *Histogram) Copy(x *Histogram) *Histogram { + if x == nil { + x = new(Histogram) + } + if sz := h.size; sz < cap(x.bins) { + x.bins = x.bins[:len(h.bins)] + } else { + x.bins = make([]bin, len(h.bins), sz+1) + } + copy(x.bins, h.bins) + + x.size = h.size + x.min = h.min + x.max = h.max + x.weight = h.weight + return x +} + +// Count returns the observed weight truncated to the next integer. +func (h *Histogram) Count() int { return int(h.weight) } + +// Weight returns the observed weight (usually, the number of items seen). +func (h *Histogram) Weight() float64 { return h.weight } + +// Min returns the smallest observed value. +// Returns NaN if Count is zero. +func (h *Histogram) Min() float64 { + if h.weight == 0 { + return math.NaN() + } + return h.min +} + +// Max returns the largest observed value. +// Returns NaN if Count is zero. +func (h *Histogram) Max() float64 { + if h.weight == 0 { + return math.NaN() + } + return h.max +} + +// Sum returns the (approximate) sum of all observed values. +// Returns NaN if Count is zero. +func (h *Histogram) Sum() float64 { + if h.weight == 0 { + return math.NaN() + } + + var sum float64 + for _, b := range h.bins { + sum += b.Sum() + } + return sum +} + +// Mean returns the (approximate) average observed value. +// Returns NaN if Count is zero. +func (h *Histogram) Mean() float64 { + if h.weight == 0 { + return math.NaN() + } + return h.Sum() / h.weight +} + +// Variance returns the (approximate) sample variance of the distribution. +// Returns NaN if Count is zero. +func (h *Histogram) Variance() float64 { + if h.weight <= 1 { + return math.NaN() + } + + var vv float64 + mean := h.Mean() + for _, b := range h.bins { + delta := mean - b.v + vv += delta * delta * b.w + } + return vv / (h.weight - 1) +} + +// Quantile returns the (approximate) quantile of the distribution. +// Accepted values for q are between 0.0 and 1.0. +// Returns NaN if Count is zero or bad inputs. +func (h *Histogram) Quantile(q float64) float64 { + if h.weight == 0 || q < 0.0 || q > 1.0 { + return math.NaN() + } else if q == 0.0 { + return h.min + } else if q == 1.0 { + return h.max + } + + delta := q * h.weight + pos := 0 + for w0 := 0.0; pos < len(h.bins); pos++ { + w1 := math.Abs(h.bins[pos].w) / 2.0 + if delta-w1-w0 < 0 { + break + } + delta -= (w1 + w0) + w0 = w1 + } + + switch pos { + case 0: // lower bound + return h.solve(bin{v: h.min, w: 0}, h.bins[pos], delta) + case len(h.bins): // upper bound + return h.solve(h.bins[pos-1], bin{v: h.max, w: 0}, delta) + default: + return h.solve(h.bins[pos-1], h.bins[pos], delta) + } +} + +// Add is the same as AddWeight(v, 1) +func (h *Histogram) Add(v float64) { h.AddWeight(v, 1) } + +// AddN is the same as AddWeight(v, float64(n)) +func (h *Histogram) AddN(v float64, n int) { h.AddWeight(v, float64(n)) } + +// AddWeight adds observations of v with the weight w to the distribution. +func (h *Histogram) AddWeight(v, w float64) { + if w <= 0 { + return + } + if h.weight == 0 || v < h.min { + h.min = v + } + if h.weight == 0 || v > h.max { + h.max = v + } + + h.insert(v, w) + h.weight += w + + h.prune() +} + +// Merge sets h to the union x ∪ y. +func (h *Histogram) Merge(x, y *Histogram) { + h.bins = append(h.bins[:0], x.bins...) + h.bins = append(h.bins, y.bins...) + sort.Sort(binSlice(h.bins)) + h.prune() +} + +// MergeWith sets h to the union h ∪ x. +func (h *Histogram) MergeWith(x *Histogram) { h.Merge(h, x) } + +// NumBins returns bin (bucket) count. +func (h *Histogram) NumBins() int { return len(h.bins) } + +// Bin returns bin (bucket) data. +// Requested index must be 0 <= i < NumBins() or it will panic. +func (h *Histogram) Bin(i int) (value, weight float64) { + b := h.bins[i] + return b.v, b.w +} + +func (h *Histogram) solve(b1, b2 bin, delta float64) float64 { + w1, w2 := b1.w, b2.w + + // return if both bins are exact (unmerged) + if w1 > 0 && w2 > 0 { + return b2.v + } + + // normalise + w1, w2 = math.Abs(w1), math.Abs(w2) + + // calculate multiplier + var z float64 + if w1 == w2 { + z = delta / w1 + } else { + a := 2 * (w2 - w1) + b := 2 * w1 + z = (math.Sqrt(b*b+4*a*delta) - b) / a + } + return b1.v + (b2.v-b1.v)*z +} + +func (h *Histogram) insert(v, w float64) { + pos := h.search(v) + if pos < len(h.bins) && h.bins[pos].v == v { + h.bins[pos].w += math.Copysign(w, h.bins[pos].w) + return + } + + maxi := len(h.bins) + h.bins = h.bins[:len(h.bins)+1] + if pos != maxi { + copy(h.bins[pos+1:], h.bins[pos:]) + } + h.bins[pos].w = w + h.bins[pos].v = v +} + +func (h *Histogram) prune() { + for len(h.bins) > h.size { + delta := math.MaxFloat64 + pos := 0 + for i := 0; i < len(h.bins)-1; i++ { + b1, b2 := h.bins[i], h.bins[i+1] + if x := b2.v - b1.v; x < delta { + pos, delta = i, x + } + } + + b1, b2 := h.bins[pos], h.bins[pos+1] + w := math.Abs(b1.w) + math.Abs(b2.w) + v := (b1.Sum() + b2.Sum()) / w + h.bins[pos+1].w = -w + h.bins[pos+1].v = v + h.bins = h.bins[:pos+copy(h.bins[pos:], h.bins[pos+1:])] + } +} + +func (h *Histogram) search(v float64) int { + return sort.Search(len(h.bins), func(i int) bool { return h.bins[i].v >= v }) +} -- cgit mrf-deployment