From 49e6369fe732c0f81e5b03b36e345afbf3c79a15 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 24 Jul 2024 12:08:49 +0200 Subject: pkg/stat: rename package name to singular form Go package names should generally be singular form: https://go.dev/blog/package-names https://rakyll.org/style-packages https://groups.google.com/g/golang-nuts/c/buBwLar1gNw --- pkg/stat/sample/pvalue.go | 20 ++++++++++++ pkg/stat/sample/sample.go | 73 ++++++++++++++++++++++++++++++++++++++++++ pkg/stat/sample/sample_test.go | 66 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 159 insertions(+) create mode 100644 pkg/stat/sample/pvalue.go create mode 100644 pkg/stat/sample/sample.go create mode 100644 pkg/stat/sample/sample_test.go (limited to 'pkg/stat/sample') diff --git a/pkg/stat/sample/pvalue.go b/pkg/stat/sample/pvalue.go new file mode 100644 index 000000000..acfff4bc4 --- /dev/null +++ b/pkg/stat/sample/pvalue.go @@ -0,0 +1,20 @@ +// Copyright 2021 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package sample + +// TODO: I didn't find the substitution as of Feb 2023. Let's keep it as is while it works. +import "golang.org/x/perf/benchstat" // nolint:all + +// Mann-Whitney U test. +func UTest(old, new *Sample) (pval float64, err error) { + // Unfortunately we cannot just invoke MannWhitneyUTest from x/perf/benchstat/internal/stats, + // so we first wrap the data in Metrics. + mOld := benchstat.Metrics{ + RValues: old.Xs, + } + mNew := benchstat.Metrics{ + RValues: new.Xs, + } + return benchstat.UTest(&mOld, &mNew) +} diff --git a/pkg/stat/sample/sample.go b/pkg/stat/sample/sample.go new file mode 100644 index 000000000..740f9aefe --- /dev/null +++ b/pkg/stat/sample/sample.go @@ -0,0 +1,73 @@ +// Copyright 2021 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// Package sample provides various statistical operations and algorithms. +package sample + +import ( + "math" + "sort" +) + +// Sample represents a single sample - set of data points collected during an experiment. +type Sample struct { + Xs []float64 + Sorted bool +} + +func (s *Sample) Percentile(p float64) float64 { + s.Sort() + // The code below is taken from golang.org/x/perf/internal/stats + // Unfortunately, that package is internal and we cannot just import and use it. + N := float64(len(s.Xs)) + n := 1/3.0 + p*(N+1/3.0) // R8 + kf, frac := math.Modf(n) + k := int(kf) + if k <= 0 { + return s.Xs[0] + } else if k >= len(s.Xs) { + return s.Xs[len(s.Xs)-1] + } + return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1]) +} + +func (s *Sample) Median() float64 { + return s.Percentile(0.5) +} + +// Remove outliers by the Tukey's fences method. +func (s *Sample) RemoveOutliers() *Sample { + if len(s.Xs) < 4 { + // If the data set is too small, we cannot reliably detect outliers anyway. + return s.Copy() + } + s.Sort() + Q1 := s.Percentile(0.25) + Q3 := s.Percentile(0.75) + minValue := Q1 - 1.5*(Q3-Q1) + maxValue := Q3 + 1.5*(Q3-Q1) + xs := []float64{} + for _, value := range s.Xs { + if value >= minValue && value <= maxValue { + xs = append(xs, value) + } + } + return &Sample{ + Xs: xs, + Sorted: s.Sorted, + } +} + +func (s *Sample) Copy() *Sample { + return &Sample{ + Xs: append([]float64{}, s.Xs...), + Sorted: s.Sorted, + } +} + +func (s *Sample) Sort() { + if !s.Sorted { + sort.Slice(s.Xs, func(i, j int) bool { return s.Xs[i] < s.Xs[j] }) + s.Sorted = true + } +} diff --git a/pkg/stat/sample/sample_test.go b/pkg/stat/sample/sample_test.go new file mode 100644 index 000000000..ac7845ccf --- /dev/null +++ b/pkg/stat/sample/sample_test.go @@ -0,0 +1,66 @@ +// Copyright 2021 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package sample + +import ( + "reflect" + "testing" +) + +func TestMedian(t *testing.T) { + tests := []struct { + input []float64 + minMedian float64 + maxMedian float64 + }{ + { + input: []float64{1, 2, 3}, + minMedian: 1.99, // we cannot do exact floating point equality comparison + maxMedian: 2.01, + }, + { + input: []float64{0, 1, 2, 3}, + minMedian: 1.0, + maxMedian: 2.0, + }, + } + for _, test := range tests { + sample := Sample{Xs: test.input} + median := sample.Median() + if median < test.minMedian || median > test.maxMedian { + t.Errorf("sample %v, median got %v, median expected [%v;%v]", + test.input, median, test.minMedian, test.maxMedian) + } + } +} + +func TestRemoveOutliers(t *testing.T) { + // Some tests just to check the overall sanity of the method. + tests := []struct { + input []float64 + output []float64 + }{ + { + input: []float64{-20, 1, 2, 3, 4, 5}, + output: []float64{1, 2, 3, 4, 5}, + }, + { + input: []float64{1, 2, 3, 4, 25}, + output: []float64{1, 2, 3, 4}, + }, + { + input: []float64{-10, -5, 0, 5, 10, 15}, + output: []float64{-10, -5, 0, 5, 10, 15}, + }, + } + for _, test := range tests { + sample := Sample{Xs: test.input} + result := sample.RemoveOutliers() + result.Sort() + if !reflect.DeepEqual(result.Xs, test.output) { + t.Errorf("input: %v, expected no outliers: %v, got: %v", + test.input, test.output, result.Xs) + } + } +} -- cgit mrf-deployment