From 286c38bd1ae34870c40986e8ddebfd65d6e5049e Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Thu, 13 Jun 2024 20:30:13 +0200 Subject: pkg/stats: split out pkg/stats/sample This will reduce the number of dependencies needed for the main syzkaller tools. --- pkg/stats/pvalue.go | 20 ----------- pkg/stats/sample.go | 73 ----------------------------------------- pkg/stats/sample/pvalue.go | 20 +++++++++++ pkg/stats/sample/sample.go | 73 +++++++++++++++++++++++++++++++++++++++++ pkg/stats/sample/sample_test.go | 66 +++++++++++++++++++++++++++++++++++++ pkg/stats/sample_test.go | 66 ------------------------------------- tools/syz-testbed/stats.go | 26 +++++++-------- tools/syz-testbed/table.go | 8 ++--- tools/syz-testbed/table_test.go | 8 ++--- 9 files changed, 180 insertions(+), 180 deletions(-) delete mode 100644 pkg/stats/pvalue.go delete mode 100644 pkg/stats/sample.go create mode 100644 pkg/stats/sample/pvalue.go create mode 100644 pkg/stats/sample/sample.go create mode 100644 pkg/stats/sample/sample_test.go delete mode 100644 pkg/stats/sample_test.go diff --git a/pkg/stats/pvalue.go b/pkg/stats/pvalue.go deleted file mode 100644 index 9f413f5aa..000000000 --- a/pkg/stats/pvalue.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2021 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package stats - -// TODO: I didn't find the substitution as of Feb 2023. Let's keep it as is while it works. -import "golang.org/x/perf/benchstat" // nolint:all - -// Mann-Whitney U test. -func UTest(old, new *Sample) (pval float64, err error) { - // Unfortunately we cannot just invoke MannWhitneyUTest from x/perf/benchstat/internal/stats, - // so we first wrap the data in Metrics. - mOld := benchstat.Metrics{ - RValues: old.Xs, - } - mNew := benchstat.Metrics{ - RValues: new.Xs, - } - return benchstat.UTest(&mOld, &mNew) -} diff --git a/pkg/stats/sample.go b/pkg/stats/sample.go deleted file mode 100644 index 668675fc1..000000000 --- a/pkg/stats/sample.go +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2021 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -// Package stats provides various statistical operations and algorithms. -package stats - -import ( - "math" - "sort" -) - -// Sample represents a single sample - set of data points collected during an experiment. -type Sample struct { - Xs []float64 - Sorted bool -} - -func (s *Sample) Percentile(p float64) float64 { - s.Sort() - // The code below is taken from golang.org/x/perf/internal/stats - // Unfortunately, that package is internal and we cannot just import and use it. - N := float64(len(s.Xs)) - n := 1/3.0 + p*(N+1/3.0) // R8 - kf, frac := math.Modf(n) - k := int(kf) - if k <= 0 { - return s.Xs[0] - } else if k >= len(s.Xs) { - return s.Xs[len(s.Xs)-1] - } - return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1]) -} - -func (s *Sample) Median() float64 { - return s.Percentile(0.5) -} - -// Remove outliers by the Tukey's fences method. -func (s *Sample) RemoveOutliers() *Sample { - if len(s.Xs) < 4 { - // If the data set is too small, we cannot reliably detect outliers anyway. - return s.Copy() - } - s.Sort() - Q1 := s.Percentile(0.25) - Q3 := s.Percentile(0.75) - minValue := Q1 - 1.5*(Q3-Q1) - maxValue := Q3 + 1.5*(Q3-Q1) - xs := []float64{} - for _, value := range s.Xs { - if value >= minValue && value <= maxValue { - xs = append(xs, value) - } - } - return &Sample{ - Xs: xs, - Sorted: s.Sorted, - } -} - -func (s *Sample) Copy() *Sample { - return &Sample{ - Xs: append([]float64{}, s.Xs...), - Sorted: s.Sorted, - } -} - -func (s *Sample) Sort() { - if !s.Sorted { - sort.Slice(s.Xs, func(i, j int) bool { return s.Xs[i] < s.Xs[j] }) - s.Sorted = true - } -} diff --git a/pkg/stats/sample/pvalue.go b/pkg/stats/sample/pvalue.go new file mode 100644 index 000000000..acfff4bc4 --- /dev/null +++ b/pkg/stats/sample/pvalue.go @@ -0,0 +1,20 @@ +// Copyright 2021 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package sample + +// TODO: I didn't find the substitution as of Feb 2023. Let's keep it as is while it works. +import "golang.org/x/perf/benchstat" // nolint:all + +// Mann-Whitney U test. +func UTest(old, new *Sample) (pval float64, err error) { + // Unfortunately we cannot just invoke MannWhitneyUTest from x/perf/benchstat/internal/stats, + // so we first wrap the data in Metrics. + mOld := benchstat.Metrics{ + RValues: old.Xs, + } + mNew := benchstat.Metrics{ + RValues: new.Xs, + } + return benchstat.UTest(&mOld, &mNew) +} diff --git a/pkg/stats/sample/sample.go b/pkg/stats/sample/sample.go new file mode 100644 index 000000000..740f9aefe --- /dev/null +++ b/pkg/stats/sample/sample.go @@ -0,0 +1,73 @@ +// Copyright 2021 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// Package sample provides various statistical operations and algorithms. +package sample + +import ( + "math" + "sort" +) + +// Sample represents a single sample - set of data points collected during an experiment. +type Sample struct { + Xs []float64 + Sorted bool +} + +func (s *Sample) Percentile(p float64) float64 { + s.Sort() + // The code below is taken from golang.org/x/perf/internal/stats + // Unfortunately, that package is internal and we cannot just import and use it. + N := float64(len(s.Xs)) + n := 1/3.0 + p*(N+1/3.0) // R8 + kf, frac := math.Modf(n) + k := int(kf) + if k <= 0 { + return s.Xs[0] + } else if k >= len(s.Xs) { + return s.Xs[len(s.Xs)-1] + } + return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1]) +} + +func (s *Sample) Median() float64 { + return s.Percentile(0.5) +} + +// Remove outliers by the Tukey's fences method. +func (s *Sample) RemoveOutliers() *Sample { + if len(s.Xs) < 4 { + // If the data set is too small, we cannot reliably detect outliers anyway. + return s.Copy() + } + s.Sort() + Q1 := s.Percentile(0.25) + Q3 := s.Percentile(0.75) + minValue := Q1 - 1.5*(Q3-Q1) + maxValue := Q3 + 1.5*(Q3-Q1) + xs := []float64{} + for _, value := range s.Xs { + if value >= minValue && value <= maxValue { + xs = append(xs, value) + } + } + return &Sample{ + Xs: xs, + Sorted: s.Sorted, + } +} + +func (s *Sample) Copy() *Sample { + return &Sample{ + Xs: append([]float64{}, s.Xs...), + Sorted: s.Sorted, + } +} + +func (s *Sample) Sort() { + if !s.Sorted { + sort.Slice(s.Xs, func(i, j int) bool { return s.Xs[i] < s.Xs[j] }) + s.Sorted = true + } +} diff --git a/pkg/stats/sample/sample_test.go b/pkg/stats/sample/sample_test.go new file mode 100644 index 000000000..ac7845ccf --- /dev/null +++ b/pkg/stats/sample/sample_test.go @@ -0,0 +1,66 @@ +// Copyright 2021 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package sample + +import ( + "reflect" + "testing" +) + +func TestMedian(t *testing.T) { + tests := []struct { + input []float64 + minMedian float64 + maxMedian float64 + }{ + { + input: []float64{1, 2, 3}, + minMedian: 1.99, // we cannot do exact floating point equality comparison + maxMedian: 2.01, + }, + { + input: []float64{0, 1, 2, 3}, + minMedian: 1.0, + maxMedian: 2.0, + }, + } + for _, test := range tests { + sample := Sample{Xs: test.input} + median := sample.Median() + if median < test.minMedian || median > test.maxMedian { + t.Errorf("sample %v, median got %v, median expected [%v;%v]", + test.input, median, test.minMedian, test.maxMedian) + } + } +} + +func TestRemoveOutliers(t *testing.T) { + // Some tests just to check the overall sanity of the method. + tests := []struct { + input []float64 + output []float64 + }{ + { + input: []float64{-20, 1, 2, 3, 4, 5}, + output: []float64{1, 2, 3, 4, 5}, + }, + { + input: []float64{1, 2, 3, 4, 25}, + output: []float64{1, 2, 3, 4}, + }, + { + input: []float64{-10, -5, 0, 5, 10, 15}, + output: []float64{-10, -5, 0, 5, 10, 15}, + }, + } + for _, test := range tests { + sample := Sample{Xs: test.input} + result := sample.RemoveOutliers() + result.Sort() + if !reflect.DeepEqual(result.Xs, test.output) { + t.Errorf("input: %v, expected no outliers: %v, got: %v", + test.input, test.output, result.Xs) + } + } +} diff --git a/pkg/stats/sample_test.go b/pkg/stats/sample_test.go deleted file mode 100644 index 9e88e9f93..000000000 --- a/pkg/stats/sample_test.go +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2021 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package stats - -import ( - "reflect" - "testing" -) - -func TestMedian(t *testing.T) { - tests := []struct { - input []float64 - minMedian float64 - maxMedian float64 - }{ - { - input: []float64{1, 2, 3}, - minMedian: 1.99, // we cannot do exact floating point equality comparison - maxMedian: 2.01, - }, - { - input: []float64{0, 1, 2, 3}, - minMedian: 1.0, - maxMedian: 2.0, - }, - } - for _, test := range tests { - sample := Sample{Xs: test.input} - median := sample.Median() - if median < test.minMedian || median > test.maxMedian { - t.Errorf("sample %v, median got %v, median expected [%v;%v]", - test.input, median, test.minMedian, test.maxMedian) - } - } -} - -func TestRemoveOutliers(t *testing.T) { - // Some tests just to check the overall sanity of the method. - tests := []struct { - input []float64 - output []float64 - }{ - { - input: []float64{-20, 1, 2, 3, 4, 5}, - output: []float64{1, 2, 3, 4, 5}, - }, - { - input: []float64{1, 2, 3, 4, 25}, - output: []float64{1, 2, 3, 4}, - }, - { - input: []float64{-10, -5, 0, 5, 10, 15}, - output: []float64{-10, -5, 0, 5, 10, 15}, - }, - } - for _, test := range tests { - sample := Sample{Xs: test.input} - result := sample.RemoveOutliers() - result.Sort() - if !reflect.DeepEqual(result.Xs, test.output) { - t.Errorf("input: %v, expected no outliers: %v, got: %v", - test.input, test.output, result.Xs) - } - } -} diff --git a/tools/syz-testbed/stats.go b/tools/syz-testbed/stats.go index 8c20d495e..108c25ed1 100644 --- a/tools/syz-testbed/stats.go +++ b/tools/syz-testbed/stats.go @@ -12,7 +12,7 @@ import ( "time" "github.com/google/syzkaller/pkg/osutil" - "github.com/google/syzkaller/pkg/stats" + "github.com/google/syzkaller/pkg/stats/sample" ) type BugInfo struct { @@ -103,12 +103,12 @@ func readBenches(benchFile string) ([]StatRecord, error) { // The input are stat snapshots of different instances taken at the same time. // This function groups those data points per stat types (e.g. exec total, crashes, etc.). -func groupSamples(records []StatRecord) map[string]*stats.Sample { - ret := make(map[string]*stats.Sample) +func groupSamples(records []StatRecord) map[string]*sample.Sample { + ret := make(map[string]*sample.Sample) for _, record := range records { for key, value := range record { if ret[key] == nil { - ret[key] = &stats.Sample{} + ret[key] = &sample.Sample{} } ret[key].Xs = append(ret[key].Xs, float64(value)) } @@ -243,7 +243,7 @@ func (group RunResultGroup) minResultLength() int { return ret } -func (group RunResultGroup) groupNthRecord(i int) map[string]*stats.Sample { +func (group RunResultGroup) groupNthRecord(i int) map[string]*sample.Sample { records := []StatRecord{} for _, result := range group.SyzManagerResults() { records = append(records, result.StatRecords[i]) @@ -251,7 +251,7 @@ func (group RunResultGroup) groupNthRecord(i int) map[string]*stats.Sample { return groupSamples(records) } -func (group RunResultGroup) groupLastRecord() map[string]*stats.Sample { +func (group RunResultGroup) groupLastRecord() map[string]*sample.Sample { records := []StatRecord{} for _, result := range group.SyzManagerResults() { n := len(result.StatRecords) @@ -304,7 +304,7 @@ func (view StatView) AlignedStatsTable(field string) (*Table, error) { continue } // Unwind the samples so that they are aligned on the field value. - var samples map[string]*stats.Sample + var samples map[string]*sample.Sample for i := minLen - 1; i >= 0; i-- { candidate := group.groupNthRecord(i) // TODO: consider data interpolation. @@ -390,16 +390,16 @@ func (view StatView) GenerateReproDurationTable() (*Table, error) { table.AddColumn(group.Name) } for _, group := range view.Groups { - samples := make(map[string]*stats.Sample) + samples := make(map[string]*sample.Sample) for _, result := range group.SyzReproResults() { title := result.Input.Title - var sample *stats.Sample - sample, ok := samples[title] + var sampleObj *sample.Sample + sampleObj, ok := samples[title] if !ok { - sample = &stats.Sample{} - samples[title] = sample + sampleObj = &sample.Sample{} + samples[title] = sampleObj } - sample.Xs = append(sample.Xs, result.Duration.Seconds()) + sampleObj.Xs = append(sampleObj.Xs, result.Duration.Seconds()) } for title, sample := range samples { diff --git a/tools/syz-testbed/table.go b/tools/syz-testbed/table.go index ee3ffd0e7..fae3e2279 100644 --- a/tools/syz-testbed/table.go +++ b/tools/syz-testbed/table.go @@ -10,7 +10,7 @@ import ( "os" "sort" - "github.com/google/syzkaller/pkg/stats" + "github.com/google/syzkaller/pkg/stats/sample" ) type Cell = interface{} @@ -25,7 +25,7 @@ type Table struct { type ValueCell struct { Value float64 - Sample *stats.Sample + Sample *sample.Sample PercentChange *float64 PValue *float64 } @@ -39,7 +39,7 @@ type BoolCell struct { Value bool } -func NewValueCell(sample *stats.Sample) *ValueCell { +func NewValueCell(sample *sample.Sample) *ValueCell { return &ValueCell{Value: sample.Median(), Sample: sample} } @@ -183,7 +183,7 @@ func (t *Table) SetRelativeValues(baseColumn string) error { } cellSample := valueCell.Sample.RemoveOutliers() - pval, err := stats.UTest(baseSample, cellSample) + pval, err := sample.UTest(baseSample, cellSample) if err == nil { // Sometimes it fails because there are too few samples. valueCell.PValue = new(float64) diff --git a/tools/syz-testbed/table_test.go b/tools/syz-testbed/table_test.go index fb07e7804..313802819 100644 --- a/tools/syz-testbed/table_test.go +++ b/tools/syz-testbed/table_test.go @@ -6,16 +6,16 @@ package main import ( "testing" - "github.com/google/syzkaller/pkg/stats" + "github.com/google/syzkaller/pkg/stats/sample" "github.com/stretchr/testify/assert" ) func TestRelativeValues(t *testing.T) { table := NewTable("", "A", "B") - table.Set("row1", "A", NewValueCell(&stats.Sample{Xs: []float64{2, 2}})) - table.Set("row1", "B", NewValueCell(&stats.Sample{Xs: []float64{3, 3}})) + table.Set("row1", "A", NewValueCell(&sample.Sample{Xs: []float64{2, 2}})) + table.Set("row1", "B", NewValueCell(&sample.Sample{Xs: []float64{3, 3}})) // Don't set row2/A. - table.Set("row2", "B", NewValueCell(&stats.Sample{Xs: []float64{1, 1}})) + table.Set("row2", "B", NewValueCell(&sample.Sample{Xs: []float64{1, 1}})) err := table.SetRelativeValues("A") assert.NoError(t, err) -- cgit mrf-deployment