From 286c38bd1ae34870c40986e8ddebfd65d6e5049e Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Thu, 13 Jun 2024 20:30:13 +0200 Subject: pkg/stats: split out pkg/stats/sample This will reduce the number of dependencies needed for the main syzkaller tools. --- pkg/stats/pvalue.go | 20 ----------- pkg/stats/sample.go | 73 ----------------------------------------- pkg/stats/sample/pvalue.go | 20 +++++++++++ pkg/stats/sample/sample.go | 73 +++++++++++++++++++++++++++++++++++++++++ pkg/stats/sample/sample_test.go | 66 +++++++++++++++++++++++++++++++++++++ pkg/stats/sample_test.go | 66 ------------------------------------- 6 files changed, 159 insertions(+), 159 deletions(-) delete mode 100644 pkg/stats/pvalue.go delete mode 100644 pkg/stats/sample.go create mode 100644 pkg/stats/sample/pvalue.go create mode 100644 pkg/stats/sample/sample.go create mode 100644 pkg/stats/sample/sample_test.go delete mode 100644 pkg/stats/sample_test.go (limited to 'pkg') diff --git a/pkg/stats/pvalue.go b/pkg/stats/pvalue.go deleted file mode 100644 index 9f413f5aa..000000000 --- a/pkg/stats/pvalue.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2021 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package stats - -// TODO: I didn't find the substitution as of Feb 2023. Let's keep it as is while it works. -import "golang.org/x/perf/benchstat" // nolint:all - -// Mann-Whitney U test. -func UTest(old, new *Sample) (pval float64, err error) { - // Unfortunately we cannot just invoke MannWhitneyUTest from x/perf/benchstat/internal/stats, - // so we first wrap the data in Metrics. - mOld := benchstat.Metrics{ - RValues: old.Xs, - } - mNew := benchstat.Metrics{ - RValues: new.Xs, - } - return benchstat.UTest(&mOld, &mNew) -} diff --git a/pkg/stats/sample.go b/pkg/stats/sample.go deleted file mode 100644 index 668675fc1..000000000 --- a/pkg/stats/sample.go +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2021 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -// Package stats provides various statistical operations and algorithms. -package stats - -import ( - "math" - "sort" -) - -// Sample represents a single sample - set of data points collected during an experiment. -type Sample struct { - Xs []float64 - Sorted bool -} - -func (s *Sample) Percentile(p float64) float64 { - s.Sort() - // The code below is taken from golang.org/x/perf/internal/stats - // Unfortunately, that package is internal and we cannot just import and use it. - N := float64(len(s.Xs)) - n := 1/3.0 + p*(N+1/3.0) // R8 - kf, frac := math.Modf(n) - k := int(kf) - if k <= 0 { - return s.Xs[0] - } else if k >= len(s.Xs) { - return s.Xs[len(s.Xs)-1] - } - return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1]) -} - -func (s *Sample) Median() float64 { - return s.Percentile(0.5) -} - -// Remove outliers by the Tukey's fences method. -func (s *Sample) RemoveOutliers() *Sample { - if len(s.Xs) < 4 { - // If the data set is too small, we cannot reliably detect outliers anyway. - return s.Copy() - } - s.Sort() - Q1 := s.Percentile(0.25) - Q3 := s.Percentile(0.75) - minValue := Q1 - 1.5*(Q3-Q1) - maxValue := Q3 + 1.5*(Q3-Q1) - xs := []float64{} - for _, value := range s.Xs { - if value >= minValue && value <= maxValue { - xs = append(xs, value) - } - } - return &Sample{ - Xs: xs, - Sorted: s.Sorted, - } -} - -func (s *Sample) Copy() *Sample { - return &Sample{ - Xs: append([]float64{}, s.Xs...), - Sorted: s.Sorted, - } -} - -func (s *Sample) Sort() { - if !s.Sorted { - sort.Slice(s.Xs, func(i, j int) bool { return s.Xs[i] < s.Xs[j] }) - s.Sorted = true - } -} diff --git a/pkg/stats/sample/pvalue.go b/pkg/stats/sample/pvalue.go new file mode 100644 index 000000000..acfff4bc4 --- /dev/null +++ b/pkg/stats/sample/pvalue.go @@ -0,0 +1,20 @@ +// Copyright 2021 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package sample + +// TODO: I didn't find the substitution as of Feb 2023. Let's keep it as is while it works. +import "golang.org/x/perf/benchstat" // nolint:all + +// Mann-Whitney U test. +func UTest(old, new *Sample) (pval float64, err error) { + // Unfortunately we cannot just invoke MannWhitneyUTest from x/perf/benchstat/internal/stats, + // so we first wrap the data in Metrics. + mOld := benchstat.Metrics{ + RValues: old.Xs, + } + mNew := benchstat.Metrics{ + RValues: new.Xs, + } + return benchstat.UTest(&mOld, &mNew) +} diff --git a/pkg/stats/sample/sample.go b/pkg/stats/sample/sample.go new file mode 100644 index 000000000..740f9aefe --- /dev/null +++ b/pkg/stats/sample/sample.go @@ -0,0 +1,73 @@ +// Copyright 2021 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// Package sample provides various statistical operations and algorithms. +package sample + +import ( + "math" + "sort" +) + +// Sample represents a single sample - set of data points collected during an experiment. +type Sample struct { + Xs []float64 + Sorted bool +} + +func (s *Sample) Percentile(p float64) float64 { + s.Sort() + // The code below is taken from golang.org/x/perf/internal/stats + // Unfortunately, that package is internal and we cannot just import and use it. + N := float64(len(s.Xs)) + n := 1/3.0 + p*(N+1/3.0) // R8 + kf, frac := math.Modf(n) + k := int(kf) + if k <= 0 { + return s.Xs[0] + } else if k >= len(s.Xs) { + return s.Xs[len(s.Xs)-1] + } + return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1]) +} + +func (s *Sample) Median() float64 { + return s.Percentile(0.5) +} + +// Remove outliers by the Tukey's fences method. +func (s *Sample) RemoveOutliers() *Sample { + if len(s.Xs) < 4 { + // If the data set is too small, we cannot reliably detect outliers anyway. + return s.Copy() + } + s.Sort() + Q1 := s.Percentile(0.25) + Q3 := s.Percentile(0.75) + minValue := Q1 - 1.5*(Q3-Q1) + maxValue := Q3 + 1.5*(Q3-Q1) + xs := []float64{} + for _, value := range s.Xs { + if value >= minValue && value <= maxValue { + xs = append(xs, value) + } + } + return &Sample{ + Xs: xs, + Sorted: s.Sorted, + } +} + +func (s *Sample) Copy() *Sample { + return &Sample{ + Xs: append([]float64{}, s.Xs...), + Sorted: s.Sorted, + } +} + +func (s *Sample) Sort() { + if !s.Sorted { + sort.Slice(s.Xs, func(i, j int) bool { return s.Xs[i] < s.Xs[j] }) + s.Sorted = true + } +} diff --git a/pkg/stats/sample/sample_test.go b/pkg/stats/sample/sample_test.go new file mode 100644 index 000000000..ac7845ccf --- /dev/null +++ b/pkg/stats/sample/sample_test.go @@ -0,0 +1,66 @@ +// Copyright 2021 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package sample + +import ( + "reflect" + "testing" +) + +func TestMedian(t *testing.T) { + tests := []struct { + input []float64 + minMedian float64 + maxMedian float64 + }{ + { + input: []float64{1, 2, 3}, + minMedian: 1.99, // we cannot do exact floating point equality comparison + maxMedian: 2.01, + }, + { + input: []float64{0, 1, 2, 3}, + minMedian: 1.0, + maxMedian: 2.0, + }, + } + for _, test := range tests { + sample := Sample{Xs: test.input} + median := sample.Median() + if median < test.minMedian || median > test.maxMedian { + t.Errorf("sample %v, median got %v, median expected [%v;%v]", + test.input, median, test.minMedian, test.maxMedian) + } + } +} + +func TestRemoveOutliers(t *testing.T) { + // Some tests just to check the overall sanity of the method. + tests := []struct { + input []float64 + output []float64 + }{ + { + input: []float64{-20, 1, 2, 3, 4, 5}, + output: []float64{1, 2, 3, 4, 5}, + }, + { + input: []float64{1, 2, 3, 4, 25}, + output: []float64{1, 2, 3, 4}, + }, + { + input: []float64{-10, -5, 0, 5, 10, 15}, + output: []float64{-10, -5, 0, 5, 10, 15}, + }, + } + for _, test := range tests { + sample := Sample{Xs: test.input} + result := sample.RemoveOutliers() + result.Sort() + if !reflect.DeepEqual(result.Xs, test.output) { + t.Errorf("input: %v, expected no outliers: %v, got: %v", + test.input, test.output, result.Xs) + } + } +} diff --git a/pkg/stats/sample_test.go b/pkg/stats/sample_test.go deleted file mode 100644 index 9e88e9f93..000000000 --- a/pkg/stats/sample_test.go +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2021 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package stats - -import ( - "reflect" - "testing" -) - -func TestMedian(t *testing.T) { - tests := []struct { - input []float64 - minMedian float64 - maxMedian float64 - }{ - { - input: []float64{1, 2, 3}, - minMedian: 1.99, // we cannot do exact floating point equality comparison - maxMedian: 2.01, - }, - { - input: []float64{0, 1, 2, 3}, - minMedian: 1.0, - maxMedian: 2.0, - }, - } - for _, test := range tests { - sample := Sample{Xs: test.input} - median := sample.Median() - if median < test.minMedian || median > test.maxMedian { - t.Errorf("sample %v, median got %v, median expected [%v;%v]", - test.input, median, test.minMedian, test.maxMedian) - } - } -} - -func TestRemoveOutliers(t *testing.T) { - // Some tests just to check the overall sanity of the method. - tests := []struct { - input []float64 - output []float64 - }{ - { - input: []float64{-20, 1, 2, 3, 4, 5}, - output: []float64{1, 2, 3, 4, 5}, - }, - { - input: []float64{1, 2, 3, 4, 25}, - output: []float64{1, 2, 3, 4}, - }, - { - input: []float64{-10, -5, 0, 5, 10, 15}, - output: []float64{-10, -5, 0, 5, 10, 15}, - }, - } - for _, test := range tests { - sample := Sample{Xs: test.input} - result := sample.RemoveOutliers() - result.Sort() - if !reflect.DeepEqual(result.Xs, test.output) { - t.Errorf("input: %v, expected no outliers: %v, got: %v", - test.input, test.output, result.Xs) - } - } -} -- cgit mrf-deployment