From 69e3f8652665d3da729f3cd3a36d86f37c2c9364 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Wed, 21 Jan 2026 19:42:47 +0100
Subject: pkg/aflow: refactor tests

Add helper function that executes test workflows,
compares results (trajectory, LLM requests) against golden files,
and if requested updates these golden files.
---
 .../testdata/TestToolMisbehavior.trajectory.json   | 292 +++++++++++++++++++++
 1 file changed, 292 insertions(+)
 create mode 100644 pkg/aflow/testdata/TestToolMisbehavior.trajectory.json

(limited to 'pkg/aflow/testdata/TestToolMisbehavior.trajectory.json')

diff --git a/pkg/aflow/testdata/TestToolMisbehavior.trajectory.json b/pkg/aflow/testdata/TestToolMisbehavior.trajectory.json
new file mode 100644
index 000000000..4bfe632d2
--- /dev/null
+++ b/pkg/aflow/testdata/TestToolMisbehavior.trajectory.json
@@ -0,0 +1,292 @@
+[
+	{
+		"Seq": 0,
+		"Nesting": 0,
+		"Type": "flow",
+		"Name": "test",
+		"Started": "0001-01-01T00:00:01Z"
+	},
+	{
+		"Seq": 1,
+		"Nesting": 1,
+		"Type": "agent",
+		"Name": "smarty",
+		"Model": "model",
+		"Started": "0001-01-01T00:00:02Z",
+		"Instruction": "Do something!\nPrefer calling several tools at the same time to save round-trips.\n\n\nUse set-results tool to provide results of the analysis.\nIt must be called exactly once before the final reply.\nIgnore results of this tool.\n",
+		"Prompt": "Prompt"
+	},
+	{
+		"Seq": 2,
+		"Nesting": 2,
+		"Type": "llm",
+		"Name": "smarty",
+		"Model": "model",
+		"Started": "0001-01-01T00:00:03Z"
+	},
+	{
+		"Seq": 2,
+		"Nesting": 2,
+		"Type": "llm",
+		"Name": "smarty",
+		"Model": "model",
+		"Started": "0001-01-01T00:00:03Z",
+		"Finished": "0001-01-01T00:00:04Z"
+	},
+	{
+		"Seq": 3,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "tool1",
+		"Started": "0001-01-01T00:00:05Z",
+		"Args": {
+			"Tool1Arg": "string"
+		}
+	},
+	{
+		"Seq": 3,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "tool1",
+		"Started": "0001-01-01T00:00:05Z",
+		"Finished": "0001-01-01T00:00:06Z",
+		"Args": {
+			"Tool1Arg": "string"
+		},
+		"Results": {}
+	},
+	{
+		"Seq": 4,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "tool2",
+		"Started": "0001-01-01T00:00:07Z",
+		"Args": {
+			"Tool2Arg": "string-instead-of-int"
+		}
+	},
+	{
+		"Seq": 4,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "tool2",
+		"Started": "0001-01-01T00:00:07Z",
+		"Finished": "0001-01-01T00:00:08Z",
+		"Error": "argument \"Tool2Arg\" has wrong type: got string, want int",
+		"Args": {
+			"Tool2Arg": "string-instead-of-int"
+		}
+	},
+	{
+		"Seq": 5,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "tool2",
+		"Started": "0001-01-01T00:00:09Z"
+	},
+	{
+		"Seq": 5,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "tool2",
+		"Started": "0001-01-01T00:00:09Z",
+		"Finished": "0001-01-01T00:00:10Z",
+		"Error": "missing argument \"Tool2Arg\""
+	},
+	{
+		"Seq": 6,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "tool2",
+		"Started": "0001-01-01T00:00:11Z",
+		"Args": {
+			"Tool2Arg": 0,
+			"Tool2Arg2": 100
+		}
+	},
+	{
+		"Seq": 6,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "tool2",
+		"Started": "0001-01-01T00:00:11Z",
+		"Finished": "0001-01-01T00:00:12Z",
+		"Args": {
+			"Tool2Arg": 0,
+			"Tool2Arg2": 100
+		},
+		"Results": {
+			"Result": 42
+		}
+	},
+	{
+		"Seq": 7,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "tool3",
+		"Started": "0001-01-01T00:00:13Z",
+		"Args": {
+			"Arg": 0
+		}
+	},
+	{
+		"Seq": 7,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "tool3",
+		"Started": "0001-01-01T00:00:13Z",
+		"Finished": "0001-01-01T00:00:14Z",
+		"Error": "tool \"tool3\" does not exist, please correct the name",
+		"Args": {
+			"Arg": 0
+		}
+	},
+	{
+		"Seq": 8,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "set-results",
+		"Started": "0001-01-01T00:00:15Z",
+		"Args": {
+			"WrongArg": 0
+		}
+	},
+	{
+		"Seq": 8,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "set-results",
+		"Started": "0001-01-01T00:00:15Z",
+		"Finished": "0001-01-01T00:00:16Z",
+		"Error": "missing argument \"AdditionalOutput\"",
+		"Args": {
+			"WrongArg": 0
+		}
+	},
+	{
+		"Seq": 9,
+		"Nesting": 2,
+		"Type": "llm",
+		"Name": "smarty",
+		"Model": "model",
+		"Started": "0001-01-01T00:00:17Z"
+	},
+	{
+		"Seq": 9,
+		"Nesting": 2,
+		"Type": "llm",
+		"Name": "smarty",
+		"Model": "model",
+		"Started": "0001-01-01T00:00:17Z",
+		"Finished": "0001-01-01T00:00:18Z"
+	},
+	{
+		"Seq": 10,
+		"Nesting": 2,
+		"Type": "llm",
+		"Name": "smarty",
+		"Model": "model",
+		"Started": "0001-01-01T00:00:19Z"
+	},
+	{
+		"Seq": 10,
+		"Nesting": 2,
+		"Type": "llm",
+		"Name": "smarty",
+		"Model": "model",
+		"Started": "0001-01-01T00:00:19Z",
+		"Finished": "0001-01-01T00:00:20Z"
+	},
+	{
+		"Seq": 11,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "set-results",
+		"Started": "0001-01-01T00:00:21Z",
+		"Args": {
+			"AdditionalOutput": 1
+		}
+	},
+	{
+		"Seq": 11,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "set-results",
+		"Started": "0001-01-01T00:00:21Z",
+		"Finished": "0001-01-01T00:00:22Z",
+		"Args": {
+			"AdditionalOutput": 1
+		},
+		"Results": {
+			"AdditionalOutput": 1
+		}
+	},
+	{
+		"Seq": 12,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "set-results",
+		"Started": "0001-01-01T00:00:23Z",
+		"Args": {
+			"AdditionalOutput": 2
+		}
+	},
+	{
+		"Seq": 12,
+		"Nesting": 2,
+		"Type": "tool",
+		"Name": "set-results",
+		"Started": "0001-01-01T00:00:23Z",
+		"Finished": "0001-01-01T00:00:24Z",
+		"Args": {
+			"AdditionalOutput": 2
+		},
+		"Results": {
+			"AdditionalOutput": 2
+		}
+	},
+	{
+		"Seq": 13,
+		"Nesting": 2,
+		"Type": "llm",
+		"Name": "smarty",
+		"Model": "model",
+		"Started": "0001-01-01T00:00:25Z"
+	},
+	{
+		"Seq": 13,
+		"Nesting": 2,
+		"Type": "llm",
+		"Name": "smarty",
+		"Model": "model",
+		"Started": "0001-01-01T00:00:25Z",
+		"Finished": "0001-01-01T00:00:26Z"
+	},
+	{
+		"Seq": 1,
+		"Nesting": 1,
+		"Type": "agent",
+		"Name": "smarty",
+		"Model": "model",
+		"Started": "0001-01-01T00:00:02Z",
+		"Finished": "0001-01-01T00:00:27Z",
+		"Results": {
+			"AdditionalOutput": 2
+		},
+		"Instruction": "Do something!\nPrefer calling several tools at the same time to save round-trips.\n\n\nUse set-results tool to provide results of the analysis.\nIt must be called exactly once before the final reply.\nIgnore results of this tool.\n",
+		"Prompt": "Prompt",
+		"Reply": "Finally done"
+	},
+	{
+		"Seq": 0,
+		"Nesting": 0,
+		"Type": "flow",
+		"Name": "test",
+		"Started": "0001-01-01T00:00:01Z",
+		"Finished": "0001-01-01T00:00:28Z",
+		"Results": {
+			"AdditionalOutput": 2,
+			"Reply": "Finally done"
+		}
+	}
+]
\ No newline at end of file
-- 
cgit mrf-deployment