From 67945471d459ce8031b65988d49bf03b726ae691 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 15 Jan 2026 19:45:16 +0100 Subject: pkg/aflow: add notion of flow errors Flow errors denote failure of the flow itself, rather than an infrastructure error. A flow errors mean an expected condition in the flow when it cannot continue, and cannot produce expected outputs. For example, if we are doing something with the kernel, but the kernel build fails. Flow errors shouldn't be flagged in Fixes #6610 --- pkg/aflow/action/crash/reproduce.go | 9 +++++++-- pkg/aflow/action/kernel/build.go | 2 +- pkg/aflow/execute.go | 20 ++++++++++++++++++++ syz-agent/agent.go | 2 +- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/pkg/aflow/action/crash/reproduce.go b/pkg/aflow/action/crash/reproduce.go index 33be02b27..15ada378d 100644 --- a/pkg/aflow/action/crash/reproduce.go +++ b/pkg/aflow/action/crash/reproduce.go @@ -52,6 +52,7 @@ func reproduce(ctx *aflow.Context, args reproduceArgs) (reproduceResult, error) if err != nil { return reproduceResult{}, err } + const noCrash = "reproducer did not crash" desc := fmt.Sprintf("kernel commit %v, kernel config hash %v, image hash %v,"+ " vm %v, vm config hash %v, C repro hash %v", args.KernelCommit, hash.String(args.KernelConfig), hash.String(imageData), @@ -91,7 +92,7 @@ func reproduce(ctx *aflow.Context, args reproduceArgs) (reproduceResult, error) } os.RemoveAll(cfg.Workdir) if results[0].Error == nil { - results[0].Error = errors.New("reproducer did not crash") + results[0].Error = errors.New(noCrash) } file, data := "", []byte(nil) var crashErr *instance.CrashError @@ -106,7 +107,11 @@ func reproduce(ctx *aflow.Context, args reproduceArgs) (reproduceResult, error) return reproduceResult{}, err } if data, err := os.ReadFile(filepath.Join(dir, "error")); err == nil { - return reproduceResult{}, errors.New(string(data)) + err := errors.New(string(data)) + if err.Error() == noCrash { + err = aflow.FlowError(err) + } + return reproduceResult{}, err } data, err := os.ReadFile(filepath.Join(dir, "report")) return reproduceResult{ diff --git a/pkg/aflow/action/kernel/build.go b/pkg/aflow/action/kernel/build.go index 5cc9fd796..594b33fb5 100644 --- a/pkg/aflow/action/kernel/build.go +++ b/pkg/aflow/action/kernel/build.go @@ -47,7 +47,7 @@ func buildKernel(ctx *aflow.Context, args buildArgs) (buildResult, error) { compileCommnads := "compile_commands.json" makeArgs = append(makeArgs, path.Base(image), compileCommnads) if _, err := osutil.RunCmd(time.Hour, args.KernelSrc, "make", makeArgs...); err != nil { - return err + return aflow.FlowError(err) } // Remove main intermediate build files, we don't need them anymore // and they take lots of space. Keep generated source files. diff --git a/pkg/aflow/execute.go b/pkg/aflow/execute.go index 4133c01c9..482a58fb4 100644 --- a/pkg/aflow/execute.go +++ b/pkg/aflow/execute.go @@ -5,6 +5,7 @@ package aflow import ( "context" + "errors" "fmt" "maps" "os" @@ -70,6 +71,25 @@ func (flow *Flow) Execute(c context.Context, model, workdir string, inputs map[s return span.Results, nil } +// FlowError creates an error that denotes failure of the flow itself, +// rather than an infrastructure error. A flow errors mean an expected +// condition in the flow when it cannot continue, and cannot produce +// expected outputs. For example, if we are doing something with the kernel, +// but the kernel build fails. Flow errors shouldn't be flagged in +// infrastructure monitoring. +func FlowError(err error) error { + return &flowError{err} +} + +func IsFlowError(err error) bool { + var flowErr *flowError + return errors.As(err, &flowErr) +} + +type flowError struct { + error +} + type ( onEvent func(*trajectory.Span) error generateContentFunc func(*genai.GenerateContentConfig, []*genai.Content) ( diff --git a/syz-agent/agent.go b/syz-agent/agent.go index c5aad2470..54d6a67c6 100644 --- a/syz-agent/agent.go +++ b/syz-agent/agent.go @@ -199,7 +199,7 @@ func (s *Server) poll(ctx context.Context) ( if err := s.dash.AIJobDone(doneReq); err != nil { return false, err } - if jobErr != nil { + if jobErr != nil && !aflow.IsFlowError(jobErr) { return false, jobErr } return true, nil -- cgit mrf-deployment