aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/stat/sample/sample.go
blob: 740f9aefe77b9cddb664485aae4db3c88b4d29cb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// Copyright 2021 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

// Package sample provides various statistical operations and algorithms.
package sample

import (
	"math"
	"sort"
)

// Sample represents a single sample - set of data points collected during an experiment.
type Sample struct {
	Xs     []float64
	Sorted bool
}

func (s *Sample) Percentile(p float64) float64 {
	s.Sort()
	// The code below is taken from golang.org/x/perf/internal/stats
	// Unfortunately, that package is internal and we cannot just import and use it.
	N := float64(len(s.Xs))
	n := 1/3.0 + p*(N+1/3.0) // R8
	kf, frac := math.Modf(n)
	k := int(kf)
	if k <= 0 {
		return s.Xs[0]
	} else if k >= len(s.Xs) {
		return s.Xs[len(s.Xs)-1]
	}
	return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1])
}

func (s *Sample) Median() float64 {
	return s.Percentile(0.5)
}

// Remove outliers by the Tukey's fences method.
func (s *Sample) RemoveOutliers() *Sample {
	if len(s.Xs) < 4 {
		// If the data set is too small, we cannot reliably detect outliers anyway.
		return s.Copy()
	}
	s.Sort()
	Q1 := s.Percentile(0.25)
	Q3 := s.Percentile(0.75)
	minValue := Q1 - 1.5*(Q3-Q1)
	maxValue := Q3 + 1.5*(Q3-Q1)
	xs := []float64{}
	for _, value := range s.Xs {
		if value >= minValue && value <= maxValue {
			xs = append(xs, value)
		}
	}
	return &Sample{
		Xs:     xs,
		Sorted: s.Sorted,
	}
}

func (s *Sample) Copy() *Sample {
	return &Sample{
		Xs:     append([]float64{}, s.Xs...),
		Sorted: s.Sorted,
	}
}

func (s *Sample) Sort() {
	if !s.Sorted {
		sort.Slice(s.Xs, func(i, j int) bool { return s.Xs[i] < s.Xs[j] })
		s.Sorted = true
	}
}