feat: use rich sys prompt

lizradway · lizradway · commit 6aadcc328dd4 · 2026-04-03T13:40:05.000-04:00
diff --git a/src/strands/models/model.py b/src/strands/models/model.py
@@ -49,16 +49,18 @@ def _count_content_block_tokens(block: ContentBlock, encoding: Any) -> int:
         total += len(encoding.encode(block["text"]))
 
     if "toolUse" in block:
+        tool_use = block["toolUse"]
+        total += len(encoding.encode(tool_use.get("name", "")))
         try:
-            total += len(encoding.encode(json.dumps(block["toolUse"])))
+            total += len(encoding.encode(json.dumps(tool_use.get("input", {}))))
         except (TypeError, ValueError):
             pass
 
     if "toolResult" in block:
-        try:
-            total += len(encoding.encode(json.dumps(block["toolResult"])))
-        except (TypeError, ValueError):
-            pass
+        tool_result = block["toolResult"]
+        for item in tool_result.get("content", []):
+            if "text" in item:
+                total += len(encoding.encode(item["text"]))
 
     if "reasoningContent" in block:
         reasoning = block["reasoningContent"]
@@ -74,9 +76,10 @@ def _count_content_block_tokens(block: ContentBlock, encoding: Any) -> int:
 
     if "citationsContent" in block:
         citations = block["citationsContent"]
-        for item in citations.get("content", []):
-            if "text" in item:
-                total += len(encoding.encode(item["text"]))
+        if "content" in citations:
+            for citation_item in citations["content"]:
+                if "text" in citation_item:
+                    total += len(encoding.encode(citation_item["text"]))
 
     return total
 
@@ -85,6 +88,7 @@ def _estimate_tokens_with_tiktoken(
     messages: Messages,
     tool_specs: list[ToolSpec] | None = None,
     system_prompt: str | None = None,
+    system_prompt_content: list[SystemContentBlock] | None = None,
 ) -> int:
     """Estimate tokens by serializing messages/tools to text and counting with tiktoken.
 
@@ -97,6 +101,11 @@ def _estimate_tokens_with_tiktoken(
     if system_prompt:
         total += len(encoding.encode(system_prompt))
 
+    if system_prompt_content:
+        for block in system_prompt_content:
+            if "text" in block:
+                total += len(encoding.encode(block["text"]))
+
     for message in messages:
         for block in message["content"]:
             total += _count_content_block_tokens(block, encoding)
@@ -224,6 +233,7 @@ def _estimate_tokens(
         messages: Messages,
         tool_specs: list[ToolSpec] | None = None,
         system_prompt: str | None = None,
+        system_prompt_content: list[SystemContentBlock] | None = None,
     ) -> int:
         """Estimate token count for the given input before sending to the model.
 
@@ -239,11 +249,12 @@ def _estimate_tokens(
             messages: List of message objects to estimate tokens for.
             tool_specs: List of tool specifications to include in the estimate.
             system_prompt: System prompt to include in the estimate.
+            system_prompt_content: System prompt content blocks to include in the estimate.
 
         Returns:
             Estimated total input tokens.
         """
-        return _estimate_tokens_with_tiktoken(messages, tool_specs, system_prompt)
+        return _estimate_tokens_with_tiktoken(messages, tool_specs, system_prompt, system_prompt_content)
 
 
 class _ModelPlugin(Plugin):
diff --git a/tests/strands/models/test_model.py b/tests/strands/models/test_model.py
@@ -313,7 +313,7 @@ def test_estimate_tokens_skips_binary_content(model):
     assert model._estimate_tokens(messages=messages) == 0
 
 
-def test_estimate_tokens_tool_result_with_bytes(model):
+def test_estimate_tokens_tool_result_with_bytes_only(model):
     messages = [
         {
             "role": "user",
@@ -332,6 +332,28 @@ def test_estimate_tokens_tool_result_with_bytes(model):
     assert result == 0
 
 
+def test_estimate_tokens_tool_result_with_text_and_bytes(model):
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "toolResult": {
+                        "toolUseId": "123",
+                        "content": [
+                            {"text": "Here is the screenshot"},
+                            {"image": {"format": "png", "source": {"bytes": b"image data"}}},
+                        ],
+                        "status": "success",
+                    }
+                }
+            ],
+        }
+    ]
+    result = model._estimate_tokens(messages=messages)
+    assert result > 0
+
+
 def test_estimate_tokens_guard_content_block(model):
     messages = [
         {
@@ -359,7 +381,8 @@ def test_estimate_tokens_tool_use_with_bytes(model):
         }
     ]
     result = model._estimate_tokens(messages=messages)
-    assert result == 0
+    # Should still count the tool name even though input has non-serializable bytes
+    assert result > 0
 
 
 def test_estimate_tokens_non_serializable_tool_spec(model, messages):
@@ -393,6 +416,25 @@ def test_estimate_tokens_citations_block(model):
     assert result > 0
 
 
+def test_estimate_tokens_system_prompt_content(model):
+    result = model._estimate_tokens(
+        messages=[],
+        system_prompt_content=[{"text": "You are a helpful assistant."}],
+    )
+    assert result > 0
+
+
+def test_estimate_tokens_system_prompt_content_with_cache_point(model):
+    result = model._estimate_tokens(
+        messages=[],
+        system_prompt_content=[
+            {"text": "You are a helpful assistant."},
+            {"cachePoint": {"type": "default"}},
+        ],
+    )
+    assert result > 0
+
+
 def test_estimate_tokens_all_inputs(model):
     messages = [
         {"role": "user", "content": [{"text": "hello world"}]},
@@ -402,6 +444,7 @@ def test_estimate_tokens_all_inputs(model):
         messages=messages,
         tool_specs=[{"name": "test", "description": "a test tool", "inputSchema": {"json": {}}}],
         system_prompt="Be helpful.",
+        system_prompt_content=[{"text": "Additional system context."}],
     )
     assert result > 0