From 286c38bd1ae34870c40986e8ddebfd65d6e5049e Mon Sep 17 00:00:00 2001
From: Aleksandr Nogikh <nogikh@google.com>
Date: Thu, 13 Jun 2024 20:30:13 +0200
Subject: pkg/stats: split out pkg/stats/sample

This will reduce the number of dependencies needed for the main
syzkaller tools.
---
 pkg/stats/pvalue.go             | 20 -----------
 pkg/stats/sample.go             | 73 -----------------------------------------
 pkg/stats/sample/pvalue.go      | 20 +++++++++++
 pkg/stats/sample/sample.go      | 73 +++++++++++++++++++++++++++++++++++++++++
 pkg/stats/sample/sample_test.go | 66 +++++++++++++++++++++++++++++++++++++
 pkg/stats/sample_test.go        | 66 -------------------------------------
 tools/syz-testbed/stats.go      | 26 +++++++--------
 tools/syz-testbed/table.go      |  8 ++---
 tools/syz-testbed/table_test.go |  8 ++---
 9 files changed, 180 insertions(+), 180 deletions(-)
 delete mode 100644 pkg/stats/pvalue.go
 delete mode 100644 pkg/stats/sample.go
 create mode 100644 pkg/stats/sample/pvalue.go
 create mode 100644 pkg/stats/sample/sample.go
 create mode 100644 pkg/stats/sample/sample_test.go
 delete mode 100644 pkg/stats/sample_test.go

diff --git a/pkg/stats/pvalue.go b/pkg/stats/pvalue.go
deleted file mode 100644
index 9f413f5aa..000000000
--- a/pkg/stats/pvalue.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2021 syzkaller project authors. All rights reserved.
-// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
-
-package stats
-
-// TODO: I didn't find the substitution as of Feb 2023. Let's keep it as is while it works.
-import "golang.org/x/perf/benchstat" // nolint:all
-
-// Mann-Whitney U test.
-func UTest(old, new *Sample) (pval float64, err error) {
-	// Unfortunately we cannot just invoke MannWhitneyUTest from x/perf/benchstat/internal/stats,
-	// so we first wrap the data in Metrics.
-	mOld := benchstat.Metrics{
-		RValues: old.Xs,
-	}
-	mNew := benchstat.Metrics{
-		RValues: new.Xs,
-	}
-	return benchstat.UTest(&mOld, &mNew)
-}
diff --git a/pkg/stats/sample.go b/pkg/stats/sample.go
deleted file mode 100644
index 668675fc1..000000000
--- a/pkg/stats/sample.go
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2021 syzkaller project authors. All rights reserved.
-// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
-
-// Package stats provides various statistical operations and algorithms.
-package stats
-
-import (
-	"math"
-	"sort"
-)
-
-// Sample represents a single sample - set of data points collected during an experiment.
-type Sample struct {
-	Xs     []float64
-	Sorted bool
-}
-
-func (s *Sample) Percentile(p float64) float64 {
-	s.Sort()
-	// The code below is taken from golang.org/x/perf/internal/stats
-	// Unfortunately, that package is internal and we cannot just import and use it.
-	N := float64(len(s.Xs))
-	n := 1/3.0 + p*(N+1/3.0) // R8
-	kf, frac := math.Modf(n)
-	k := int(kf)
-	if k <= 0 {
-		return s.Xs[0]
-	} else if k >= len(s.Xs) {
-		return s.Xs[len(s.Xs)-1]
-	}
-	return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1])
-}
-
-func (s *Sample) Median() float64 {
-	return s.Percentile(0.5)
-}
-
-// Remove outliers by the Tukey's fences method.
-func (s *Sample) RemoveOutliers() *Sample {
-	if len(s.Xs) < 4 {
-		// If the data set is too small, we cannot reliably detect outliers anyway.
-		return s.Copy()
-	}
-	s.Sort()
-	Q1 := s.Percentile(0.25)
-	Q3 := s.Percentile(0.75)
-	minValue := Q1 - 1.5*(Q3-Q1)
-	maxValue := Q3 + 1.5*(Q3-Q1)
-	xs := []float64{}
-	for _, value := range s.Xs {
-		if value >= minValue && value <= maxValue {
-			xs = append(xs, value)
-		}
-	}
-	return &Sample{
-		Xs:     xs,
-		Sorted: s.Sorted,
-	}
-}
-
-func (s *Sample) Copy() *Sample {
-	return &Sample{
-		Xs:     append([]float64{}, s.Xs...),
-		Sorted: s.Sorted,
-	}
-}
-
-func (s *Sample) Sort() {
-	if !s.Sorted {
-		sort.Slice(s.Xs, func(i, j int) bool { return s.Xs[i] < s.Xs[j] })
-		s.Sorted = true
-	}
-}
diff --git a/pkg/stats/sample/pvalue.go b/pkg/stats/sample/pvalue.go
new file mode 100644
index 000000000..acfff4bc4
--- /dev/null
+++ b/pkg/stats/sample/pvalue.go
@@ -0,0 +1,20 @@
+// Copyright 2021 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package sample
+
+// TODO: I didn't find the substitution as of Feb 2023. Let's keep it as is while it works.
+import "golang.org/x/perf/benchstat" // nolint:all
+
+// Mann-Whitney U test.
+func UTest(old, new *Sample) (pval float64, err error) {
+	// Unfortunately we cannot just invoke MannWhitneyUTest from x/perf/benchstat/internal/stats,
+	// so we first wrap the data in Metrics.
+	mOld := benchstat.Metrics{
+		RValues: old.Xs,
+	}
+	mNew := benchstat.Metrics{
+		RValues: new.Xs,
+	}
+	return benchstat.UTest(&mOld, &mNew)
+}
diff --git a/pkg/stats/sample/sample.go b/pkg/stats/sample/sample.go
new file mode 100644
index 000000000..740f9aefe
--- /dev/null
+++ b/pkg/stats/sample/sample.go
@@ -0,0 +1,73 @@
+// Copyright 2021 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+// Package sample provides various statistical operations and algorithms.
+package sample
+
+import (
+	"math"
+	"sort"
+)
+
+// Sample represents a single sample - set of data points collected during an experiment.
+type Sample struct {
+	Xs     []float64
+	Sorted bool
+}
+
+func (s *Sample) Percentile(p float64) float64 {
+	s.Sort()
+	// The code below is taken from golang.org/x/perf/internal/stats
+	// Unfortunately, that package is internal and we cannot just import and use it.
+	N := float64(len(s.Xs))
+	n := 1/3.0 + p*(N+1/3.0) // R8
+	kf, frac := math.Modf(n)
+	k := int(kf)
+	if k <= 0 {
+		return s.Xs[0]
+	} else if k >= len(s.Xs) {
+		return s.Xs[len(s.Xs)-1]
+	}
+	return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1])
+}
+
+func (s *Sample) Median() float64 {
+	return s.Percentile(0.5)
+}
+
+// Remove outliers by the Tukey's fences method.
+func (s *Sample) RemoveOutliers() *Sample {
+	if len(s.Xs) < 4 {
+		// If the data set is too small, we cannot reliably detect outliers anyway.
+		return s.Copy()
+	}
+	s.Sort()
+	Q1 := s.Percentile(0.25)
+	Q3 := s.Percentile(0.75)
+	minValue := Q1 - 1.5*(Q3-Q1)
+	maxValue := Q3 + 1.5*(Q3-Q1)
+	xs := []float64{}
+	for _, value := range s.Xs {
+		if value >= minValue && value <= maxValue {
+			xs = append(xs, value)
+		}
+	}
+	return &Sample{
+		Xs:     xs,
+		Sorted: s.Sorted,
+	}
+}
+
+func (s *Sample) Copy() *Sample {
+	return &Sample{
+		Xs:     append([]float64{}, s.Xs...),
+		Sorted: s.Sorted,
+	}
+}
+
+func (s *Sample) Sort() {
+	if !s.Sorted {
+		sort.Slice(s.Xs, func(i, j int) bool { return s.Xs[i] < s.Xs[j] })
+		s.Sorted = true
+	}
+}
diff --git a/pkg/stats/sample/sample_test.go b/pkg/stats/sample/sample_test.go
new file mode 100644
index 000000000..ac7845ccf
--- /dev/null
+++ b/pkg/stats/sample/sample_test.go
@@ -0,0 +1,66 @@
+// Copyright 2021 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package sample
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestMedian(t *testing.T) {
+	tests := []struct {
+		input     []float64
+		minMedian float64
+		maxMedian float64
+	}{
+		{
+			input:     []float64{1, 2, 3},
+			minMedian: 1.99, // we cannot do exact floating point equality comparison
+			maxMedian: 2.01,
+		},
+		{
+			input:     []float64{0, 1, 2, 3},
+			minMedian: 1.0,
+			maxMedian: 2.0,
+		},
+	}
+	for _, test := range tests {
+		sample := Sample{Xs: test.input}
+		median := sample.Median()
+		if median < test.minMedian || median > test.maxMedian {
+			t.Errorf("sample %v, median got %v, median expected [%v;%v]",
+				test.input, median, test.minMedian, test.maxMedian)
+		}
+	}
+}
+
+func TestRemoveOutliers(t *testing.T) {
+	// Some tests just to check the overall sanity of the method.
+	tests := []struct {
+		input  []float64
+		output []float64
+	}{
+		{
+			input:  []float64{-20, 1, 2, 3, 4, 5},
+			output: []float64{1, 2, 3, 4, 5},
+		},
+		{
+			input:  []float64{1, 2, 3, 4, 25},
+			output: []float64{1, 2, 3, 4},
+		},
+		{
+			input:  []float64{-10, -5, 0, 5, 10, 15},
+			output: []float64{-10, -5, 0, 5, 10, 15},
+		},
+	}
+	for _, test := range tests {
+		sample := Sample{Xs: test.input}
+		result := sample.RemoveOutliers()
+		result.Sort()
+		if !reflect.DeepEqual(result.Xs, test.output) {
+			t.Errorf("input: %v, expected no outliers: %v, got: %v",
+				test.input, test.output, result.Xs)
+		}
+	}
+}
diff --git a/pkg/stats/sample_test.go b/pkg/stats/sample_test.go
deleted file mode 100644
index 9e88e9f93..000000000
--- a/pkg/stats/sample_test.go
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2021 syzkaller project authors. All rights reserved.
-// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
-
-package stats
-
-import (
-	"reflect"
-	"testing"
-)
-
-func TestMedian(t *testing.T) {
-	tests := []struct {
-		input     []float64
-		minMedian float64
-		maxMedian float64
-	}{
-		{
-			input:     []float64{1, 2, 3},
-			minMedian: 1.99, // we cannot do exact floating point equality comparison
-			maxMedian: 2.01,
-		},
-		{
-			input:     []float64{0, 1, 2, 3},
-			minMedian: 1.0,
-			maxMedian: 2.0,
-		},
-	}
-	for _, test := range tests {
-		sample := Sample{Xs: test.input}
-		median := sample.Median()
-		if median < test.minMedian || median > test.maxMedian {
-			t.Errorf("sample %v, median got %v, median expected [%v;%v]",
-				test.input, median, test.minMedian, test.maxMedian)
-		}
-	}
-}
-
-func TestRemoveOutliers(t *testing.T) {
-	// Some tests just to check the overall sanity of the method.
-	tests := []struct {
-		input  []float64
-		output []float64
-	}{
-		{
-			input:  []float64{-20, 1, 2, 3, 4, 5},
-			output: []float64{1, 2, 3, 4, 5},
-		},
-		{
-			input:  []float64{1, 2, 3, 4, 25},
-			output: []float64{1, 2, 3, 4},
-		},
-		{
-			input:  []float64{-10, -5, 0, 5, 10, 15},
-			output: []float64{-10, -5, 0, 5, 10, 15},
-		},
-	}
-	for _, test := range tests {
-		sample := Sample{Xs: test.input}
-		result := sample.RemoveOutliers()
-		result.Sort()
-		if !reflect.DeepEqual(result.Xs, test.output) {
-			t.Errorf("input: %v, expected no outliers: %v, got: %v",
-				test.input, test.output, result.Xs)
-		}
-	}
-}
diff --git a/tools/syz-testbed/stats.go b/tools/syz-testbed/stats.go
index 8c20d495e..108c25ed1 100644
--- a/tools/syz-testbed/stats.go
+++ b/tools/syz-testbed/stats.go
@@ -12,7 +12,7 @@ import (
 	"time"
 
 	"github.com/google/syzkaller/pkg/osutil"
-	"github.com/google/syzkaller/pkg/stats"
+	"github.com/google/syzkaller/pkg/stats/sample"
 )
 
 type BugInfo struct {
@@ -103,12 +103,12 @@ func readBenches(benchFile string) ([]StatRecord, error) {
 
 // The input are stat snapshots of different instances taken at the same time.
 // This function groups those data points per stat types (e.g. exec total, crashes, etc.).
-func groupSamples(records []StatRecord) map[string]*stats.Sample {
-	ret := make(map[string]*stats.Sample)
+func groupSamples(records []StatRecord) map[string]*sample.Sample {
+	ret := make(map[string]*sample.Sample)
 	for _, record := range records {
 		for key, value := range record {
 			if ret[key] == nil {
-				ret[key] = &stats.Sample{}
+				ret[key] = &sample.Sample{}
 			}
 			ret[key].Xs = append(ret[key].Xs, float64(value))
 		}
@@ -243,7 +243,7 @@ func (group RunResultGroup) minResultLength() int {
 	return ret
 }
 
-func (group RunResultGroup) groupNthRecord(i int) map[string]*stats.Sample {
+func (group RunResultGroup) groupNthRecord(i int) map[string]*sample.Sample {
 	records := []StatRecord{}
 	for _, result := range group.SyzManagerResults() {
 		records = append(records, result.StatRecords[i])
@@ -251,7 +251,7 @@ func (group RunResultGroup) groupNthRecord(i int) map[string]*stats.Sample {
 	return groupSamples(records)
 }
 
-func (group RunResultGroup) groupLastRecord() map[string]*stats.Sample {
+func (group RunResultGroup) groupLastRecord() map[string]*sample.Sample {
 	records := []StatRecord{}
 	for _, result := range group.SyzManagerResults() {
 		n := len(result.StatRecords)
@@ -304,7 +304,7 @@ func (view StatView) AlignedStatsTable(field string) (*Table, error) {
 			continue
 		}
 		// Unwind the samples so that they are aligned on the field value.
-		var samples map[string]*stats.Sample
+		var samples map[string]*sample.Sample
 		for i := minLen - 1; i >= 0; i-- {
 			candidate := group.groupNthRecord(i)
 			// TODO: consider data interpolation.
@@ -390,16 +390,16 @@ func (view StatView) GenerateReproDurationTable() (*Table, error) {
 		table.AddColumn(group.Name)
 	}
 	for _, group := range view.Groups {
-		samples := make(map[string]*stats.Sample)
+		samples := make(map[string]*sample.Sample)
 		for _, result := range group.SyzReproResults() {
 			title := result.Input.Title
-			var sample *stats.Sample
-			sample, ok := samples[title]
+			var sampleObj *sample.Sample
+			sampleObj, ok := samples[title]
 			if !ok {
-				sample = &stats.Sample{}
-				samples[title] = sample
+				sampleObj = &sample.Sample{}
+				samples[title] = sampleObj
 			}
-			sample.Xs = append(sample.Xs, result.Duration.Seconds())
+			sampleObj.Xs = append(sampleObj.Xs, result.Duration.Seconds())
 		}
 
 		for title, sample := range samples {
diff --git a/tools/syz-testbed/table.go b/tools/syz-testbed/table.go
index ee3ffd0e7..fae3e2279 100644
--- a/tools/syz-testbed/table.go
+++ b/tools/syz-testbed/table.go
@@ -10,7 +10,7 @@ import (
 	"os"
 	"sort"
 
-	"github.com/google/syzkaller/pkg/stats"
+	"github.com/google/syzkaller/pkg/stats/sample"
 )
 
 type Cell = interface{}
@@ -25,7 +25,7 @@ type Table struct {
 
 type ValueCell struct {
 	Value         float64
-	Sample        *stats.Sample
+	Sample        *sample.Sample
 	PercentChange *float64
 	PValue        *float64
 }
@@ -39,7 +39,7 @@ type BoolCell struct {
 	Value bool
 }
 
-func NewValueCell(sample *stats.Sample) *ValueCell {
+func NewValueCell(sample *sample.Sample) *ValueCell {
 	return &ValueCell{Value: sample.Median(), Sample: sample}
 }
 
@@ -183,7 +183,7 @@ func (t *Table) SetRelativeValues(baseColumn string) error {
 			}
 
 			cellSample := valueCell.Sample.RemoveOutliers()
-			pval, err := stats.UTest(baseSample, cellSample)
+			pval, err := sample.UTest(baseSample, cellSample)
 			if err == nil {
 				// Sometimes it fails because there are too few samples.
 				valueCell.PValue = new(float64)
diff --git a/tools/syz-testbed/table_test.go b/tools/syz-testbed/table_test.go
index fb07e7804..313802819 100644
--- a/tools/syz-testbed/table_test.go
+++ b/tools/syz-testbed/table_test.go
@@ -6,16 +6,16 @@ package main
 import (
 	"testing"
 
-	"github.com/google/syzkaller/pkg/stats"
+	"github.com/google/syzkaller/pkg/stats/sample"
 	"github.com/stretchr/testify/assert"
 )
 
 func TestRelativeValues(t *testing.T) {
 	table := NewTable("", "A", "B")
-	table.Set("row1", "A", NewValueCell(&stats.Sample{Xs: []float64{2, 2}}))
-	table.Set("row1", "B", NewValueCell(&stats.Sample{Xs: []float64{3, 3}}))
+	table.Set("row1", "A", NewValueCell(&sample.Sample{Xs: []float64{2, 2}}))
+	table.Set("row1", "B", NewValueCell(&sample.Sample{Xs: []float64{3, 3}}))
 	// Don't set row2/A.
-	table.Set("row2", "B", NewValueCell(&stats.Sample{Xs: []float64{1, 1}}))
+	table.Set("row2", "B", NewValueCell(&sample.Sample{Xs: []float64{1, 1}}))
 
 	err := table.SetRelativeValues("A")
 	assert.NoError(t, err)
-- 
cgit mrf-deployment