From c9820ab0fe2dce0914ff01bcaf3829ca82150eb2 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 20 Jan 2026 15:03:18 +0100
Subject: pkg/aflow: cache LLM requests

Using cached replies is faster, cheaper, and more reliable.
Espcially handy during development when the same workflows
are retried lots of times with some changes.
---
 pkg/aflow/execute.go | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'pkg/aflow/execute.go')

diff --git a/pkg/aflow/execute.go b/pkg/aflow/execute.go
index 19f8d3aec..3e1a6a112 100644
--- a/pkg/aflow/execute.go
+++ b/pkg/aflow/execute.go
@@ -178,6 +178,9 @@ func (ctx *Context) generateContentGemini(model string, cfg *genai.GenerateConte
 		return nil, fmt.Errorf("model %q does not exist (models: %v)", model, models)
 	}
 	if thinking {
+		// Don't alter the original object (that may affect request caching).
+		cfgCopy := *cfg
+		cfg = &cfgCopy
 		cfg.ThinkingConfig = &genai.ThinkingConfig{
 			// We capture them in the trajectory for analysis.
 			IncludeThoughts: true,
-- 
cgit mrf-deployment