aboutsummaryrefslogtreecommitdiffstats
path: root/pkg
diff options
context:
space:
mode:
authorAleksandr Nogikh <nogikh@google.com>2025-04-11 14:08:10 +0200
committerAleksandr Nogikh <nogikh@google.com>2025-04-15 08:18:26 +0000
commit81b407fe4e14e1e40fa060509157143e51ec1739 (patch)
treeccaf1229e9ce952bbb832d81c9c5d0501ebdb0de /pkg
parent0bd6db418098e2d98a2edf948b41410d3d9f9e70 (diff)
pkg/manager: wait until corpus is triaged for diff fuzzing
Track the right moment to start bug reproductions more exactly: 1) Either once 90% of the corpus is triaged (*). 2) Or once we are past 50% of the time dedicated for fuzzing. Whatever happens earlier. (*) The last percents are usually quite slow and they bring much less covered PCs that all the previous ones.
Diffstat (limited to 'pkg')
-rw-r--r--pkg/fuzzer/fuzzer.go6
-rw-r--r--pkg/manager/diff.go61
2 files changed, 61 insertions, 6 deletions
diff --git a/pkg/fuzzer/fuzzer.go b/pkg/fuzzer/fuzzer.go
index 3e5f94fc3..17c5af47b 100644
--- a/pkg/fuzzer/fuzzer.go
+++ b/pkg/fuzzer/fuzzer.go
@@ -106,8 +106,12 @@ func newExecQueues(fuzzer *Fuzzer) execQueues {
return ret
}
+func (fuzzer *Fuzzer) CandidatesToTriage() int {
+ return fuzzer.statCandidates.Val() + fuzzer.statJobsTriageCandidate.Val()
+}
+
func (fuzzer *Fuzzer) CandidateTriageFinished() bool {
- return fuzzer.statCandidates.Val()+fuzzer.statJobsTriageCandidate.Val() == 0
+ return fuzzer.CandidatesToTriage() == 0
}
func (fuzzer *Fuzzer) execute(executor queue.Executor, req *queue.Request) *queue.Result {
diff --git a/pkg/manager/diff.go b/pkg/manager/diff.go
index 93922859b..0fc9b2bad 100644
--- a/pkg/manager/diff.go
+++ b/pkg/manager/diff.go
@@ -40,6 +40,11 @@ type DiffFuzzerConfig struct {
Debug bool
PatchedOnly chan *UniqueBug
ArtifactsDir string // Where to store the artifacts that supplement the logs.
+ // The fuzzer waits no more than MaxTriageTime time until it starts taking VMs away
+ // for bug reproduction.
+ // The option may help find a balance between spending too much time triaging
+ // the corpus and not reaching a proper kernel coverage.
+ MaxTriageTime time.Duration
}
type UniqueBug struct {
@@ -76,6 +81,7 @@ func RunDiffFuzzer(ctx context.Context, baseCfg, newCfg *mgrconfig.Config, cfg D
store := &DiffFuzzerStore{BasePath: cfg.ArtifactsDir}
diffCtx := &diffContext{
+ cfg: cfg,
doneRepro: make(chan *ReproResult),
base: base,
new: new,
@@ -102,6 +108,7 @@ func RunDiffFuzzer(ctx context.Context, baseCfg, newCfg *mgrconfig.Config, cfg D
}
type diffContext struct {
+ cfg DiffFuzzerConfig
store *DiffFuzzerStore
http *HTTPServer
@@ -126,10 +133,8 @@ func (dc *diffContext) Loop(baseCtx context.Context) error {
g.Go(func() error {
// Let both base and patched instances somewhat progress in fuzzing before we take
// VMs away for bug reproduction.
- // TODO: determine the exact moment of corpus triage.
- select {
- case <-time.After(15 * time.Minute):
- case <-ctx.Done():
+ dc.waitCorpusTriage(ctx)
+ if ctx.Err() != nil {
return nil
}
log.Logf(0, "starting bug reproductions")
@@ -202,6 +207,35 @@ loop:
return g.Wait()
}
+func (dc *diffContext) waitCorpusTriage(ctx context.Context) {
+ // Wait either until we have triaged 90% of the candidates or
+ // once MaxTriageTime has passed.
+ // We don't want to wait for 100% of the candidates because there's usually a long
+ // tail of slow triage jobs + the value of the candidates diminishes over time.
+ const triagedThreshold = 0.9
+ const backOffTime = 30 * time.Second
+
+ startedAt := time.Now()
+ for {
+ select {
+ case <-time.After(backOffTime):
+ case <-ctx.Done():
+ return
+ }
+ triaged := dc.new.triageProgress()
+ if triaged >= triagedThreshold {
+ log.Logf(0, "triaged %.1f%% of the corpus", triaged*100.0)
+ return
+ }
+ if dc.cfg.MaxTriageTime != 0 &&
+ time.Since(startedAt) >= dc.cfg.MaxTriageTime {
+ log.Logf(0, "timed out waiting for %.1f%% triage (have %.1f%%)",
+ triagedThreshold*100.0, triaged*100.0)
+ return
+ }
+ }
+}
+
// TODO: instead of this limit, consider expotentially growing delays between reproduction attempts.
const maxReproAttempts = 6
@@ -268,6 +302,8 @@ type kernelContext struct {
pool *vm.Dispatcher
features flatrpc.Feature
candidates chan []fuzzer.Candidate
+ // Once candidates is assigned, candidatesCount holds their original count.
+ candidatesCount atomic.Int64
coverFilters CoverageFilters
reportGenerator *ReportGeneratorWrapper
@@ -381,7 +417,6 @@ func (kc *kernelContext) setupFuzzer(features flatrpc.Feature, syscalls map[*pro
log.Logf(level, msg, args...)
},
}, rnd, kc.cfg.Target)
- kc.fuzzer.Store(fuzzerObj)
if kc.http != nil {
kc.http.Fuzzer.Store(fuzzerObj)
@@ -396,6 +431,9 @@ func (kc *kernelContext) setupFuzzer(features flatrpc.Feature, syscalls map[*pro
// The loop will be aborted later.
break
}
+ // We assign kc.fuzzer after kc.candidatesCount to simplify the triageProgress implementation.
+ kc.candidatesCount.Store(int64(len(candidates)))
+ kc.fuzzer.Store(fuzzerObj)
filtered := FilterCandidates(candidates, syscalls, false).Candidates
log.Logf(0, "%s: adding %d seeds", kc.name, len(filtered))
@@ -485,6 +523,19 @@ func (kc *kernelContext) runInstance(ctx context.Context, inst *vm.Instance,
return rep, err
}
+func (kc *kernelContext) triageProgress() float64 {
+ fuzzer := kc.fuzzer.Load()
+ if fuzzer == nil {
+ return 0
+ }
+ total := kc.candidatesCount.Load()
+ if total == 0.0 {
+ // There were no candidates in the first place.
+ return 1
+ }
+ return 1.0 - float64(fuzzer.CandidatesToTriage())/float64(total)
+}
+
// reproRunner is used to run reproducers on the base kernel to determine whether it is affected.
type reproRunner struct {
done chan reproRunnerResult