pkg/stat: rename package name to singular form

Go package names should generally be singular form: https://go.dev/blog/package-names https://rakyll.org/style-packages https://groups.google.com/g/golang-nuts/c/buBwLar1gNw
author: Dmitry Vyukov <dvyukov@google.com> 2024-07-24 12:08:49 +0200
committer: Dmitry Vyukov <dvyukov@google.com> 2024-07-24 14:39:45 +0000
commit: 49e6369fe732c0f81e5b03b36e345afbf3c79a15 (patch)
tree: 651e322e41a8084abd6f2c80e4f9b7ff50a1dfe9 /pkg/stat/sample
parent: 1f032c27c8158e44723253179928104813d45cdc (diff)
3 files changed, 159 insertions, 0 deletions
diff --git a/pkg/stat/sample/pvalue.go b/pkg/stat/sample/pvalue.go
new file mode 100644
index 000000000..acfff4bc4
--- /dev/null
+++ b/pkg/stat/sample/pvalue.go
@@ -0,0 +1,20 @@
+// Copyright 2021 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package sample
+
+// TODO: I didn't find the substitution as of Feb 2023. Let's keep it as is while it works.
+import "golang.org/x/perf/benchstat" // nolint:all
+
+// Mann-Whitney U test.
+func UTest(old, new *Sample) (pval float64, err error) {
+	// Unfortunately we cannot just invoke MannWhitneyUTest from x/perf/benchstat/internal/stats,
+	// so we first wrap the data in Metrics.
+	mOld := benchstat.Metrics{
+		RValues: old.Xs,
+	}
+	mNew := benchstat.Metrics{
+		RValues: new.Xs,
+	}
+	return benchstat.UTest(&mOld, &mNew)
+}
diff --git a/pkg/stat/sample/sample.go b/pkg/stat/sample/sample.go
new file mode 100644
index 000000000..740f9aefe
--- /dev/null
+++ b/pkg/stat/sample/sample.go
@@ -0,0 +1,73 @@
+// Copyright 2021 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+// Package sample provides various statistical operations and algorithms.
+package sample
+
+import (
+	"math"
+	"sort"
+)
+
+// Sample represents a single sample - set of data points collected during an experiment.
+type Sample struct {
+	Xs     []float64
+	Sorted bool
+}
+
+func (s *Sample) Percentile(p float64) float64 {
+	s.Sort()
+	// The code below is taken from golang.org/x/perf/internal/stats
+	// Unfortunately, that package is internal and we cannot just import and use it.
+	N := float64(len(s.Xs))
+	n := 1/3.0 + p*(N+1/3.0) // R8
+	kf, frac := math.Modf(n)
+	k := int(kf)
+	if k <= 0 {
+		return s.Xs[0]
+	} else if k >= len(s.Xs) {
+		return s.Xs[len(s.Xs)-1]
+	}
+	return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1])
+}
+
+func (s *Sample) Median() float64 {
+	return s.Percentile(0.5)
+}
+
+// Remove outliers by the Tukey's fences method.
+func (s *Sample) RemoveOutliers() *Sample {
+	if len(s.Xs) < 4 {
+		// If the data set is too small, we cannot reliably detect outliers anyway.
+		return s.Copy()
+	}
+	s.Sort()
+	Q1 := s.Percentile(0.25)
+	Q3 := s.Percentile(0.75)
+	minValue := Q1 - 1.5*(Q3-Q1)
+	maxValue := Q3 + 1.5*(Q3-Q1)
+	xs := []float64{}
+	for _, value := range s.Xs {
+		if value >= minValue && value <= maxValue {
+			xs = append(xs, value)
+		}
+	}
+	return &Sample{
+		Xs:     xs,
+		Sorted: s.Sorted,
+	}
+}
+
+func (s *Sample) Copy() *Sample {
+	return &Sample{
+		Xs:     append([]float64{}, s.Xs...),
+		Sorted: s.Sorted,
+	}
+}
+
+func (s *Sample) Sort() {
+	if !s.Sorted {
+		sort.Slice(s.Xs, func(i, j int) bool { return s.Xs[i] < s.Xs[j] })
+		s.Sorted = true
+	}
+}
diff --git a/pkg/stat/sample/sample_test.go b/pkg/stat/sample/sample_test.go
new file mode 100644
index 000000000..ac7845ccf
--- /dev/null
+++ b/pkg/stat/sample/sample_test.go
@@ -0,0 +1,66 @@
+// Copyright 2021 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package sample
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestMedian(t *testing.T) {
+	tests := []struct {
+		input     []float64
+		minMedian float64
+		maxMedian float64
+	}{
+		{
+			input:     []float64{1, 2, 3},
+			minMedian: 1.99, // we cannot do exact floating point equality comparison
+			maxMedian: 2.01,
+		},
+		{
+			input:     []float64{0, 1, 2, 3},
+			minMedian: 1.0,
+			maxMedian: 2.0,
+		},
+	}
+	for _, test := range tests {
+		sample := Sample{Xs: test.input}
+		median := sample.Median()
+		if median < test.minMedian || median > test.maxMedian {
+			t.Errorf("sample %v, median got %v, median expected [%v;%v]",
+				test.input, median, test.minMedian, test.maxMedian)
+		}
+	}
+}
+
+func TestRemoveOutliers(t *testing.T) {
+	// Some tests just to check the overall sanity of the method.
+	tests := []struct {
+		input  []float64
+		output []float64
+	}{
+		{
+			input:  []float64{-20, 1, 2, 3, 4, 5},
+			output: []float64{1, 2, 3, 4, 5},
+		},
+		{
+			input:  []float64{1, 2, 3, 4, 25},
+			output: []float64{1, 2, 3, 4},
+		},
+		{
+			input:  []float64{-10, -5, 0, 5, 10, 15},
+			output: []float64{-10, -5, 0, 5, 10, 15},
+		},
+	}
+	for _, test := range tests {
+		sample := Sample{Xs: test.input}
+		result := sample.RemoveOutliers()
+		result.Sort()
+		if !reflect.DeepEqual(result.Xs, test.output) {
+			t.Errorf("input: %v, expected no outliers: %v, got: %v",
+				test.input, test.output, result.Xs)
+		}
+	}
+}
author	Dmitry Vyukov <dvyukov@google.com>	2024-07-24 12:08:49 +0200
committer	Dmitry Vyukov <dvyukov@google.com>	2024-07-24 14:39:45 +0000
commit	49e6369fe732c0f81e5b03b36e345afbf3c79a15 (patch)
tree	651e322e41a8084abd6f2c80e4f9b7ff50a1dfe9 /pkg/stat/sample
parent	1f032c27c8158e44723253179928104813d45cdc (diff)