From 69e3f8652665d3da729f3cd3a36d86f37c2c9364 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 21 Jan 2026 19:42:47 +0100 Subject: pkg/aflow: refactor tests Add helper function that executes test workflows, compares results (trajectory, LLM requests) against golden files, and if requested updates these golden files. --- .../testdata/TestToolMisbehavior.trajectory.json | 292 +++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 pkg/aflow/testdata/TestToolMisbehavior.trajectory.json (limited to 'pkg/aflow/testdata/TestToolMisbehavior.trajectory.json') diff --git a/pkg/aflow/testdata/TestToolMisbehavior.trajectory.json b/pkg/aflow/testdata/TestToolMisbehavior.trajectory.json new file mode 100644 index 000000000..4bfe632d2 --- /dev/null +++ b/pkg/aflow/testdata/TestToolMisbehavior.trajectory.json @@ -0,0 +1,292 @@ +[ + { + "Seq": 0, + "Nesting": 0, + "Type": "flow", + "Name": "test", + "Started": "0001-01-01T00:00:01Z" + }, + { + "Seq": 1, + "Nesting": 1, + "Type": "agent", + "Name": "smarty", + "Model": "model", + "Started": "0001-01-01T00:00:02Z", + "Instruction": "Do something!\nPrefer calling several tools at the same time to save round-trips.\n\n\nUse set-results tool to provide results of the analysis.\nIt must be called exactly once before the final reply.\nIgnore results of this tool.\n", + "Prompt": "Prompt" + }, + { + "Seq": 2, + "Nesting": 2, + "Type": "llm", + "Name": "smarty", + "Model": "model", + "Started": "0001-01-01T00:00:03Z" + }, + { + "Seq": 2, + "Nesting": 2, + "Type": "llm", + "Name": "smarty", + "Model": "model", + "Started": "0001-01-01T00:00:03Z", + "Finished": "0001-01-01T00:00:04Z" + }, + { + "Seq": 3, + "Nesting": 2, + "Type": "tool", + "Name": "tool1", + "Started": "0001-01-01T00:00:05Z", + "Args": { + "Tool1Arg": "string" + } + }, + { + "Seq": 3, + "Nesting": 2, + "Type": "tool", + "Name": "tool1", + "Started": "0001-01-01T00:00:05Z", + "Finished": "0001-01-01T00:00:06Z", + "Args": { + "Tool1Arg": "string" + }, + "Results": {} + }, + { + "Seq": 4, + "Nesting": 2, + "Type": "tool", + "Name": "tool2", + "Started": "0001-01-01T00:00:07Z", + "Args": { + "Tool2Arg": "string-instead-of-int" + } + }, + { + "Seq": 4, + "Nesting": 2, + "Type": "tool", + "Name": "tool2", + "Started": "0001-01-01T00:00:07Z", + "Finished": "0001-01-01T00:00:08Z", + "Error": "argument \"Tool2Arg\" has wrong type: got string, want int", + "Args": { + "Tool2Arg": "string-instead-of-int" + } + }, + { + "Seq": 5, + "Nesting": 2, + "Type": "tool", + "Name": "tool2", + "Started": "0001-01-01T00:00:09Z" + }, + { + "Seq": 5, + "Nesting": 2, + "Type": "tool", + "Name": "tool2", + "Started": "0001-01-01T00:00:09Z", + "Finished": "0001-01-01T00:00:10Z", + "Error": "missing argument \"Tool2Arg\"" + }, + { + "Seq": 6, + "Nesting": 2, + "Type": "tool", + "Name": "tool2", + "Started": "0001-01-01T00:00:11Z", + "Args": { + "Tool2Arg": 0, + "Tool2Arg2": 100 + } + }, + { + "Seq": 6, + "Nesting": 2, + "Type": "tool", + "Name": "tool2", + "Started": "0001-01-01T00:00:11Z", + "Finished": "0001-01-01T00:00:12Z", + "Args": { + "Tool2Arg": 0, + "Tool2Arg2": 100 + }, + "Results": { + "Result": 42 + } + }, + { + "Seq": 7, + "Nesting": 2, + "Type": "tool", + "Name": "tool3", + "Started": "0001-01-01T00:00:13Z", + "Args": { + "Arg": 0 + } + }, + { + "Seq": 7, + "Nesting": 2, + "Type": "tool", + "Name": "tool3", + "Started": "0001-01-01T00:00:13Z", + "Finished": "0001-01-01T00:00:14Z", + "Error": "tool \"tool3\" does not exist, please correct the name", + "Args": { + "Arg": 0 + } + }, + { + "Seq": 8, + "Nesting": 2, + "Type": "tool", + "Name": "set-results", + "Started": "0001-01-01T00:00:15Z", + "Args": { + "WrongArg": 0 + } + }, + { + "Seq": 8, + "Nesting": 2, + "Type": "tool", + "Name": "set-results", + "Started": "0001-01-01T00:00:15Z", + "Finished": "0001-01-01T00:00:16Z", + "Error": "missing argument \"AdditionalOutput\"", + "Args": { + "WrongArg": 0 + } + }, + { + "Seq": 9, + "Nesting": 2, + "Type": "llm", + "Name": "smarty", + "Model": "model", + "Started": "0001-01-01T00:00:17Z" + }, + { + "Seq": 9, + "Nesting": 2, + "Type": "llm", + "Name": "smarty", + "Model": "model", + "Started": "0001-01-01T00:00:17Z", + "Finished": "0001-01-01T00:00:18Z" + }, + { + "Seq": 10, + "Nesting": 2, + "Type": "llm", + "Name": "smarty", + "Model": "model", + "Started": "0001-01-01T00:00:19Z" + }, + { + "Seq": 10, + "Nesting": 2, + "Type": "llm", + "Name": "smarty", + "Model": "model", + "Started": "0001-01-01T00:00:19Z", + "Finished": "0001-01-01T00:00:20Z" + }, + { + "Seq": 11, + "Nesting": 2, + "Type": "tool", + "Name": "set-results", + "Started": "0001-01-01T00:00:21Z", + "Args": { + "AdditionalOutput": 1 + } + }, + { + "Seq": 11, + "Nesting": 2, + "Type": "tool", + "Name": "set-results", + "Started": "0001-01-01T00:00:21Z", + "Finished": "0001-01-01T00:00:22Z", + "Args": { + "AdditionalOutput": 1 + }, + "Results": { + "AdditionalOutput": 1 + } + }, + { + "Seq": 12, + "Nesting": 2, + "Type": "tool", + "Name": "set-results", + "Started": "0001-01-01T00:00:23Z", + "Args": { + "AdditionalOutput": 2 + } + }, + { + "Seq": 12, + "Nesting": 2, + "Type": "tool", + "Name": "set-results", + "Started": "0001-01-01T00:00:23Z", + "Finished": "0001-01-01T00:00:24Z", + "Args": { + "AdditionalOutput": 2 + }, + "Results": { + "AdditionalOutput": 2 + } + }, + { + "Seq": 13, + "Nesting": 2, + "Type": "llm", + "Name": "smarty", + "Model": "model", + "Started": "0001-01-01T00:00:25Z" + }, + { + "Seq": 13, + "Nesting": 2, + "Type": "llm", + "Name": "smarty", + "Model": "model", + "Started": "0001-01-01T00:00:25Z", + "Finished": "0001-01-01T00:00:26Z" + }, + { + "Seq": 1, + "Nesting": 1, + "Type": "agent", + "Name": "smarty", + "Model": "model", + "Started": "0001-01-01T00:00:02Z", + "Finished": "0001-01-01T00:00:27Z", + "Results": { + "AdditionalOutput": 2 + }, + "Instruction": "Do something!\nPrefer calling several tools at the same time to save round-trips.\n\n\nUse set-results tool to provide results of the analysis.\nIt must be called exactly once before the final reply.\nIgnore results of this tool.\n", + "Prompt": "Prompt", + "Reply": "Finally done" + }, + { + "Seq": 0, + "Nesting": 0, + "Type": "flow", + "Name": "test", + "Started": "0001-01-01T00:00:01Z", + "Finished": "0001-01-01T00:00:28Z", + "Results": { + "AdditionalOutput": 2, + "Reply": "Finally done" + } + } +] \ No newline at end of file -- cgit mrf-deployment