aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/aflow/testdata/TestToolMisbehavior.llm.json
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2026-01-21 19:42:47 +0100
committerDmitry Vyukov <dvyukov@google.com>2026-01-23 09:36:05 +0000
commit69e3f8652665d3da729f3cd3a36d86f37c2c9364 (patch)
tree11f93adc35c83ac8662e0e090cca24eb06334594 /pkg/aflow/testdata/TestToolMisbehavior.llm.json
parente2b1b6e6434df4fdc57ce90331688373e872825b (diff)
pkg/aflow: refactor tests
Add helper function that executes test workflows, compares results (trajectory, LLM requests) against golden files, and if requested updates these golden files.
Diffstat (limited to 'pkg/aflow/testdata/TestToolMisbehavior.llm.json')
-rw-r--r--pkg/aflow/testdata/TestToolMisbehavior.llm.json907
1 files changed, 907 insertions, 0 deletions
diff --git a/pkg/aflow/testdata/TestToolMisbehavior.llm.json b/pkg/aflow/testdata/TestToolMisbehavior.llm.json
new file mode 100644
index 000000000..e51e6ab8d
--- /dev/null
+++ b/pkg/aflow/testdata/TestToolMisbehavior.llm.json
@@ -0,0 +1,907 @@
+[
+ {
+ "Model": "model",
+ "Config": {
+ "systemInstruction": {
+ "parts": [
+ {
+ "text": "Do something!\nPrefer calling several tools at the same time to save round-trips.\n\n\nUse set-results tool to provide results of the analysis.\nIt must be called exactly once before the final reply.\nIgnore results of this tool.\n"
+ }
+ ],
+ "role": "user"
+ },
+ "temperature": 1,
+ "tools": [
+ {
+ "functionDeclarations": [
+ {
+ "description": "tool description",
+ "name": "tool1",
+ "parametersJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "Tool1Arg": {
+ "description": "arg",
+ "type": "string"
+ }
+ },
+ "required": [
+ "Tool1Arg"
+ ],
+ "type": "object"
+ },
+ "responseJsonSchema": {
+ "additionalProperties": false,
+ "type": "object"
+ }
+ }
+ ]
+ },
+ {
+ "functionDeclarations": [
+ {
+ "description": "tool description",
+ "name": "tool2",
+ "parametersJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "Tool2Arg": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "Tool2Arg"
+ ],
+ "type": "object"
+ },
+ "responseJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "Result": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "Result"
+ ],
+ "type": "object"
+ }
+ }
+ ]
+ },
+ {
+ "functionDeclarations": [
+ {
+ "description": "Use this tool to provide results of the analysis.",
+ "name": "set-results",
+ "parametersJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "AdditionalOutput": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "AdditionalOutput"
+ ],
+ "type": "object"
+ },
+ "responseJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "AdditionalOutput": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "AdditionalOutput"
+ ],
+ "type": "object"
+ }
+ }
+ ]
+ }
+ ],
+ "responseModalities": [
+ "TEXT"
+ ]
+ },
+ "Request": [
+ {
+ "parts": [
+ {
+ "text": "Prompt"
+ }
+ ],
+ "role": "user"
+ }
+ ]
+ },
+ {
+ "Model": "model",
+ "Config": {
+ "systemInstruction": {
+ "parts": [
+ {
+ "text": "Do something!\nPrefer calling several tools at the same time to save round-trips.\n\n\nUse set-results tool to provide results of the analysis.\nIt must be called exactly once before the final reply.\nIgnore results of this tool.\n"
+ }
+ ],
+ "role": "user"
+ },
+ "temperature": 1,
+ "tools": [
+ {
+ "functionDeclarations": [
+ {
+ "description": "tool description",
+ "name": "tool1",
+ "parametersJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "Tool1Arg": {
+ "description": "arg",
+ "type": "string"
+ }
+ },
+ "required": [
+ "Tool1Arg"
+ ],
+ "type": "object"
+ },
+ "responseJsonSchema": {
+ "additionalProperties": false,
+ "type": "object"
+ }
+ }
+ ]
+ },
+ {
+ "functionDeclarations": [
+ {
+ "description": "tool description",
+ "name": "tool2",
+ "parametersJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "Tool2Arg": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "Tool2Arg"
+ ],
+ "type": "object"
+ },
+ "responseJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "Result": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "Result"
+ ],
+ "type": "object"
+ }
+ }
+ ]
+ },
+ {
+ "functionDeclarations": [
+ {
+ "description": "Use this tool to provide results of the analysis.",
+ "name": "set-results",
+ "parametersJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "AdditionalOutput": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "AdditionalOutput"
+ ],
+ "type": "object"
+ },
+ "responseJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "AdditionalOutput": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "AdditionalOutput"
+ ],
+ "type": "object"
+ }
+ }
+ ]
+ }
+ ],
+ "responseModalities": [
+ "TEXT"
+ ]
+ },
+ "Request": [
+ {
+ "parts": [
+ {
+ "text": "Prompt"
+ }
+ ],
+ "role": "user"
+ },
+ {
+ "parts": [
+ {
+ "functionCall": {
+ "id": "id1",
+ "args": {
+ "Tool1Arg": "string"
+ },
+ "name": "tool1"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id2",
+ "args": {
+ "Tool2Arg": "string-instead-of-int"
+ },
+ "name": "tool2"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id3",
+ "name": "tool2"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id4",
+ "args": {
+ "Tool2Arg": 0,
+ "Tool2Arg2": 100
+ },
+ "name": "tool2"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id5",
+ "args": {
+ "Arg": 0
+ },
+ "name": "tool3"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id6",
+ "args": {
+ "WrongArg": 0
+ },
+ "name": "set-results"
+ }
+ }
+ ],
+ "role": "user"
+ },
+ {
+ "parts": [
+ {
+ "functionResponse": {
+ "id": "id1",
+ "name": "tool1"
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id2",
+ "name": "tool2",
+ "response": {
+ "error": "argument \"Tool2Arg\" has wrong type: got string, want int"
+ }
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id3",
+ "name": "tool2",
+ "response": {
+ "error": "missing argument \"Tool2Arg\""
+ }
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id4",
+ "name": "tool2",
+ "response": {
+ "Result": 42
+ }
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id5",
+ "name": "tool3",
+ "response": {
+ "error": "tool \"tool3\" does not exist, please correct the name"
+ }
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id6",
+ "name": "set-results",
+ "response": {
+ "error": "missing argument \"AdditionalOutput\""
+ }
+ }
+ }
+ ],
+ "role": "user"
+ }
+ ]
+ },
+ {
+ "Model": "model",
+ "Config": {
+ "systemInstruction": {
+ "parts": [
+ {
+ "text": "Do something!\nPrefer calling several tools at the same time to save round-trips.\n\n\nUse set-results tool to provide results of the analysis.\nIt must be called exactly once before the final reply.\nIgnore results of this tool.\n"
+ }
+ ],
+ "role": "user"
+ },
+ "temperature": 1,
+ "tools": [
+ {
+ "functionDeclarations": [
+ {
+ "description": "tool description",
+ "name": "tool1",
+ "parametersJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "Tool1Arg": {
+ "description": "arg",
+ "type": "string"
+ }
+ },
+ "required": [
+ "Tool1Arg"
+ ],
+ "type": "object"
+ },
+ "responseJsonSchema": {
+ "additionalProperties": false,
+ "type": "object"
+ }
+ }
+ ]
+ },
+ {
+ "functionDeclarations": [
+ {
+ "description": "tool description",
+ "name": "tool2",
+ "parametersJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "Tool2Arg": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "Tool2Arg"
+ ],
+ "type": "object"
+ },
+ "responseJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "Result": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "Result"
+ ],
+ "type": "object"
+ }
+ }
+ ]
+ },
+ {
+ "functionDeclarations": [
+ {
+ "description": "Use this tool to provide results of the analysis.",
+ "name": "set-results",
+ "parametersJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "AdditionalOutput": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "AdditionalOutput"
+ ],
+ "type": "object"
+ },
+ "responseJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "AdditionalOutput": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "AdditionalOutput"
+ ],
+ "type": "object"
+ }
+ }
+ ]
+ }
+ ],
+ "responseModalities": [
+ "TEXT"
+ ]
+ },
+ "Request": [
+ {
+ "parts": [
+ {
+ "text": "Prompt"
+ }
+ ],
+ "role": "user"
+ },
+ {
+ "parts": [
+ {
+ "functionCall": {
+ "id": "id1",
+ "args": {
+ "Tool1Arg": "string"
+ },
+ "name": "tool1"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id2",
+ "args": {
+ "Tool2Arg": "string-instead-of-int"
+ },
+ "name": "tool2"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id3",
+ "name": "tool2"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id4",
+ "args": {
+ "Tool2Arg": 0,
+ "Tool2Arg2": 100
+ },
+ "name": "tool2"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id5",
+ "args": {
+ "Arg": 0
+ },
+ "name": "tool3"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id6",
+ "args": {
+ "WrongArg": 0
+ },
+ "name": "set-results"
+ }
+ }
+ ],
+ "role": "user"
+ },
+ {
+ "parts": [
+ {
+ "functionResponse": {
+ "id": "id1",
+ "name": "tool1"
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id2",
+ "name": "tool2",
+ "response": {
+ "error": "argument \"Tool2Arg\" has wrong type: got string, want int"
+ }
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id3",
+ "name": "tool2",
+ "response": {
+ "error": "missing argument \"Tool2Arg\""
+ }
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id4",
+ "name": "tool2",
+ "response": {
+ "Result": 42
+ }
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id5",
+ "name": "tool3",
+ "response": {
+ "error": "tool \"tool3\" does not exist, please correct the name"
+ }
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id6",
+ "name": "set-results",
+ "response": {
+ "error": "missing argument \"AdditionalOutput\""
+ }
+ }
+ }
+ ],
+ "role": "user"
+ },
+ {
+ "parts": [
+ {
+ "text": "I am done"
+ }
+ ],
+ "role": "user"
+ },
+ {
+ "parts": [
+ {
+ "text": "You did not call set-results tool.\nPlease call set-results tool to provide results of the analysis.\n"
+ }
+ ],
+ "role": "user"
+ }
+ ]
+ },
+ {
+ "Model": "model",
+ "Config": {
+ "systemInstruction": {
+ "parts": [
+ {
+ "text": "Do something!\nPrefer calling several tools at the same time to save round-trips.\n\n\nUse set-results tool to provide results of the analysis.\nIt must be called exactly once before the final reply.\nIgnore results of this tool.\n"
+ }
+ ],
+ "role": "user"
+ },
+ "temperature": 1,
+ "tools": [
+ {
+ "functionDeclarations": [
+ {
+ "description": "tool description",
+ "name": "tool1",
+ "parametersJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "Tool1Arg": {
+ "description": "arg",
+ "type": "string"
+ }
+ },
+ "required": [
+ "Tool1Arg"
+ ],
+ "type": "object"
+ },
+ "responseJsonSchema": {
+ "additionalProperties": false,
+ "type": "object"
+ }
+ }
+ ]
+ },
+ {
+ "functionDeclarations": [
+ {
+ "description": "tool description",
+ "name": "tool2",
+ "parametersJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "Tool2Arg": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "Tool2Arg"
+ ],
+ "type": "object"
+ },
+ "responseJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "Result": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "Result"
+ ],
+ "type": "object"
+ }
+ }
+ ]
+ },
+ {
+ "functionDeclarations": [
+ {
+ "description": "Use this tool to provide results of the analysis.",
+ "name": "set-results",
+ "parametersJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "AdditionalOutput": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "AdditionalOutput"
+ ],
+ "type": "object"
+ },
+ "responseJsonSchema": {
+ "additionalProperties": false,
+ "properties": {
+ "AdditionalOutput": {
+ "description": "arg",
+ "type": "integer"
+ }
+ },
+ "required": [
+ "AdditionalOutput"
+ ],
+ "type": "object"
+ }
+ }
+ ]
+ }
+ ],
+ "responseModalities": [
+ "TEXT"
+ ]
+ },
+ "Request": [
+ {
+ "parts": [
+ {
+ "text": "Prompt"
+ }
+ ],
+ "role": "user"
+ },
+ {
+ "parts": [
+ {
+ "functionCall": {
+ "id": "id1",
+ "args": {
+ "Tool1Arg": "string"
+ },
+ "name": "tool1"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id2",
+ "args": {
+ "Tool2Arg": "string-instead-of-int"
+ },
+ "name": "tool2"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id3",
+ "name": "tool2"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id4",
+ "args": {
+ "Tool2Arg": 0,
+ "Tool2Arg2": 100
+ },
+ "name": "tool2"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id5",
+ "args": {
+ "Arg": 0
+ },
+ "name": "tool3"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id6",
+ "args": {
+ "WrongArg": 0
+ },
+ "name": "set-results"
+ }
+ }
+ ],
+ "role": "user"
+ },
+ {
+ "parts": [
+ {
+ "functionResponse": {
+ "id": "id1",
+ "name": "tool1"
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id2",
+ "name": "tool2",
+ "response": {
+ "error": "argument \"Tool2Arg\" has wrong type: got string, want int"
+ }
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id3",
+ "name": "tool2",
+ "response": {
+ "error": "missing argument \"Tool2Arg\""
+ }
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id4",
+ "name": "tool2",
+ "response": {
+ "Result": 42
+ }
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id5",
+ "name": "tool3",
+ "response": {
+ "error": "tool \"tool3\" does not exist, please correct the name"
+ }
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id6",
+ "name": "set-results",
+ "response": {
+ "error": "missing argument \"AdditionalOutput\""
+ }
+ }
+ }
+ ],
+ "role": "user"
+ },
+ {
+ "parts": [
+ {
+ "text": "I am done"
+ }
+ ],
+ "role": "user"
+ },
+ {
+ "parts": [
+ {
+ "text": "You did not call set-results tool.\nPlease call set-results tool to provide results of the analysis.\n"
+ }
+ ],
+ "role": "user"
+ },
+ {
+ "parts": [
+ {
+ "functionCall": {
+ "id": "id1",
+ "args": {
+ "AdditionalOutput": 1
+ },
+ "name": "set-results"
+ }
+ },
+ {
+ "functionCall": {
+ "id": "id2",
+ "args": {
+ "AdditionalOutput": 2
+ },
+ "name": "set-results"
+ }
+ }
+ ],
+ "role": "user"
+ },
+ {
+ "parts": [
+ {
+ "functionResponse": {
+ "id": "id1",
+ "name": "set-results",
+ "response": {
+ "AdditionalOutput": 1
+ }
+ }
+ },
+ {
+ "functionResponse": {
+ "id": "id2",
+ "name": "set-results",
+ "response": {
+ "AdditionalOutput": 2
+ }
+ }
+ }
+ ],
+ "role": "user"
+ }
+ ]
+ }
+] \ No newline at end of file