aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2026-01-15 19:45:16 +0100
committerDmitry Vyukov <dvyukov@google.com>2026-01-16 08:38:40 +0000
commit67945471d459ce8031b65988d49bf03b726ae691 (patch)
tree25ab1293163ef3572079fe99d8f9d407b44a2fa3
parent592aa8f1268e40b9d75deb6d87166e02ac558974 (diff)
pkg/aflow: add notion of flow errors
Flow errors denote failure of the flow itself, rather than an infrastructure error. A flow errors mean an expected condition in the flow when it cannot continue, and cannot produce expected outputs. For example, if we are doing something with the kernel, but the kernel build fails. Flow errors shouldn't be flagged in Fixes #6610
-rw-r--r--pkg/aflow/action/crash/reproduce.go9
-rw-r--r--pkg/aflow/action/kernel/build.go2
-rw-r--r--pkg/aflow/execute.go20
-rw-r--r--syz-agent/agent.go2
4 files changed, 29 insertions, 4 deletions
diff --git a/pkg/aflow/action/crash/reproduce.go b/pkg/aflow/action/crash/reproduce.go
index 33be02b27..15ada378d 100644
--- a/pkg/aflow/action/crash/reproduce.go
+++ b/pkg/aflow/action/crash/reproduce.go
@@ -52,6 +52,7 @@ func reproduce(ctx *aflow.Context, args reproduceArgs) (reproduceResult, error)
if err != nil {
return reproduceResult{}, err
}
+ const noCrash = "reproducer did not crash"
desc := fmt.Sprintf("kernel commit %v, kernel config hash %v, image hash %v,"+
" vm %v, vm config hash %v, C repro hash %v",
args.KernelCommit, hash.String(args.KernelConfig), hash.String(imageData),
@@ -91,7 +92,7 @@ func reproduce(ctx *aflow.Context, args reproduceArgs) (reproduceResult, error)
}
os.RemoveAll(cfg.Workdir)
if results[0].Error == nil {
- results[0].Error = errors.New("reproducer did not crash")
+ results[0].Error = errors.New(noCrash)
}
file, data := "", []byte(nil)
var crashErr *instance.CrashError
@@ -106,7 +107,11 @@ func reproduce(ctx *aflow.Context, args reproduceArgs) (reproduceResult, error)
return reproduceResult{}, err
}
if data, err := os.ReadFile(filepath.Join(dir, "error")); err == nil {
- return reproduceResult{}, errors.New(string(data))
+ err := errors.New(string(data))
+ if err.Error() == noCrash {
+ err = aflow.FlowError(err)
+ }
+ return reproduceResult{}, err
}
data, err := os.ReadFile(filepath.Join(dir, "report"))
return reproduceResult{
diff --git a/pkg/aflow/action/kernel/build.go b/pkg/aflow/action/kernel/build.go
index 5cc9fd796..594b33fb5 100644
--- a/pkg/aflow/action/kernel/build.go
+++ b/pkg/aflow/action/kernel/build.go
@@ -47,7 +47,7 @@ func buildKernel(ctx *aflow.Context, args buildArgs) (buildResult, error) {
compileCommnads := "compile_commands.json"
makeArgs = append(makeArgs, path.Base(image), compileCommnads)
if _, err := osutil.RunCmd(time.Hour, args.KernelSrc, "make", makeArgs...); err != nil {
- return err
+ return aflow.FlowError(err)
}
// Remove main intermediate build files, we don't need them anymore
// and they take lots of space. Keep generated source files.
diff --git a/pkg/aflow/execute.go b/pkg/aflow/execute.go
index 4133c01c9..482a58fb4 100644
--- a/pkg/aflow/execute.go
+++ b/pkg/aflow/execute.go
@@ -5,6 +5,7 @@ package aflow
import (
"context"
+ "errors"
"fmt"
"maps"
"os"
@@ -70,6 +71,25 @@ func (flow *Flow) Execute(c context.Context, model, workdir string, inputs map[s
return span.Results, nil
}
+// FlowError creates an error that denotes failure of the flow itself,
+// rather than an infrastructure error. A flow errors mean an expected
+// condition in the flow when it cannot continue, and cannot produce
+// expected outputs. For example, if we are doing something with the kernel,
+// but the kernel build fails. Flow errors shouldn't be flagged in
+// infrastructure monitoring.
+func FlowError(err error) error {
+ return &flowError{err}
+}
+
+func IsFlowError(err error) bool {
+ var flowErr *flowError
+ return errors.As(err, &flowErr)
+}
+
+type flowError struct {
+ error
+}
+
type (
onEvent func(*trajectory.Span) error
generateContentFunc func(*genai.GenerateContentConfig, []*genai.Content) (
diff --git a/syz-agent/agent.go b/syz-agent/agent.go
index c5aad2470..54d6a67c6 100644
--- a/syz-agent/agent.go
+++ b/syz-agent/agent.go
@@ -199,7 +199,7 @@ func (s *Server) poll(ctx context.Context) (
if err := s.dash.AIJobDone(doneReq); err != nil {
return false, err
}
- if jobErr != nil {
+ if jobErr != nil && !aflow.IsFlowError(jobErr) {
return false, jobErr
}
return true, nil