From 9b9f3725ca15b0b5a045aa522f90e1507331f4e1 Mon Sep 17 00:00:00 2001
From: Jack Yuan <jackypc@amazon.com>
Date: Tue, 7 Apr 2026 16:17:27 -0400
Subject: [PATCH] fix:address comments

---
 .../sliding_window_conversation_manager.py    | 35 +++++++--
 src/strands/models/anthropic.py               | 10 ++-
 tests/strands/agent/test_agent.py             | 25 ++++--
 .../agent/test_conversation_manager.py        | 78 ++++++++++++++++---
 tests/strands/models/test_anthropic.py        |  4 +-
 5 files changed, 118 insertions(+), 34 deletions(-)

diff --git a/src/strands/agent/conversation_manager/sliding_window_conversation_manager.py b/src/strands/agent/conversation_manager/sliding_window_conversation_manager.py
index b97de0b06..94446380b 100644
--- a/src/strands/agent/conversation_manager/sliding_window_conversation_manager.py
+++ b/src/strands/agent/conversation_manager/sliding_window_conversation_manager.py
@@ -167,9 +167,9 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A
             **kwargs: Additional keyword arguments for future extensibility.
 
         Raises:
-            ContextWindowOverflowException: If the context cannot be reduced further.
-                Such as when the conversation is already minimal or when tool result messages cannot be properly
-                converted.
+            ContextWindowOverflowException: If the context cannot be reduced further and a context overflow
+                error was provided (e is not None). When called during routine window management (e is None),
+                logs a warning and returns without modification.
         """
         messages = agent.messages
 
@@ -188,24 +188,43 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A
         # If the number of messages is less than the window_size, then we default to 2, otherwise, trim to window size
         trim_index = 2 if len(messages) <= self.window_size else len(messages) - self.window_size
 
-        # Find the next valid trim_index
+        # Find the next valid trim point that:
+        # 1. Starts with a user message (required by most model providers)
+        # 2. Does not start with an orphaned toolResult
+        # 3. Does not start with a toolUse unless its toolResult immediately follows
         while trim_index < len(messages):
+            # Must start with a user message
+            if messages[trim_index]["role"] != "user":
+                trim_index += 1
+                continue
+
             if (
                 # Oldest message cannot be a toolResult because it needs a toolUse preceding it
                 any("toolResult" in content for content in messages[trim_index]["content"])
                 or (
                     # Oldest message can be a toolUse only if a toolResult immediately follows it.
+                    # Note: toolUse content normally appears only in assistant messages, but this
+                    # check is kept as a defensive safeguard for non-standard message formats.
                     any("toolUse" in content for content in messages[trim_index]["content"])
-                    and trim_index + 1 < len(messages)
-                    and not any("toolResult" in content for content in messages[trim_index + 1]["content"])
+                    and not (
+                        trim_index + 1 < len(messages)
+                        and any("toolResult" in content for content in messages[trim_index + 1]["content"])
+                    )
                 )
             ):
                 trim_index += 1
             else:
                 break
         else:
-            # If we didn't find a valid trim_index, then we throw
-            raise ContextWindowOverflowException("Unable to trim conversation context!") from e
+            # If we didn't find a valid trim_index
+            if e is not None:
+                raise ContextWindowOverflowException("Unable to trim conversation context!") from e
+            logger.warning(
+                "window_size=<%s>, message_count=<%s> | unable to trim conversation context, no valid trim point found",
+                self.window_size,
+                len(messages),
+            )
+            return
 
         # trim_index represents the number of messages being removed from the agents messages array
         self.removed_message_count += trim_index
diff --git a/src/strands/models/anthropic.py b/src/strands/models/anthropic.py
index f0be79bdd..6195f9ccd 100644
--- a/src/strands/models/anthropic.py
+++ b/src/strands/models/anthropic.py
@@ -410,10 +410,12 @@ async def stream(
                         if event.type == "message_stop":
                             # Build dict directly to avoid Pydantic serialization warnings
                             # when the message contains ParsedTextBlock objects (issue #1746)
-                            yield self.format_chunk({
-                                "type": "message_stop",
-                                "message": {"stop_reason": event.message.stop_reason},
-                            })
+                            yield self.format_chunk(
+                                {
+                                    "type": "message_stop",
+                                    "message": {"stop_reason": event.message.stop_reason},
+                                }
+                            )
                         else:
                             yield self.format_chunk(event.model_dump())
 
diff --git a/tests/strands/agent/test_agent.py b/tests/strands/agent/test_agent.py
index 5a3cce11c..0057c50a3 100644
--- a/tests/strands/agent/test_agent.py
+++ b/tests/strands/agent/test_agent.py
@@ -1615,10 +1615,15 @@ def test_agent_restored_from_session_management_with_correct_index():
 
 
 def test_agent_with_session_and_conversation_manager():
-    mock_model = MockedModelProvider([{"role": "assistant", "content": [{"text": "hello!"}]}])
+    mock_model = MockedModelProvider(
+        [
+            {"role": "assistant", "content": [{"text": "first"}]},
+            {"role": "assistant", "content": [{"text": "second"}]},
+        ]
+    )
     mock_session_repository = MockedSessionRepository()
     session_manager = RepositorySessionManager(session_id="123", session_repository=mock_session_repository)
-    conversation_manager = SlidingWindowConversationManager(window_size=1)
+    conversation_manager = SlidingWindowConversationManager(window_size=2)
     # Create an agent with a mocked model and session repository
     agent = Agent(
         session_manager=session_manager,
@@ -1633,14 +1638,20 @@ def test_agent_with_session_and_conversation_manager():
 
     agent("Hello!")
 
-    # After invoking, assert that the messages were persisted
+    # After first invocation: [user, assistant] — fits in window, no trimming
     assert len(mock_session_repository.list_messages("123", agent.agent_id)) == 2
-    # Assert conversation manager reduced the messages
-    assert len(agent.messages) == 1
+    assert len(agent.messages) == 2
+
+    agent("Second question")
+
+    # After second invocation: [user, assistant, user, assistant] exceeds window_size=2
+    # Conversation manager trims to 2 messages starting with a user message
+    assert len(agent.messages) == 2
+    assert agent.messages[0]["role"] == "user"
 
     # Initialize another agent using the same session
     session_manager_2 = RepositorySessionManager(session_id="123", session_repository=mock_session_repository)
-    conversation_manager_2 = SlidingWindowConversationManager(window_size=1)
+    conversation_manager_2 = SlidingWindowConversationManager(window_size=2)
     agent_2 = Agent(
         session_manager=session_manager_2,
         conversation_manager=conversation_manager_2,
@@ -1648,7 +1659,7 @@ def test_agent_with_session_and_conversation_manager():
     )
     # Assert that the second agent was initialized properly, and that the messages of both agents are equal
     assert agent.messages == agent_2.messages
-    # Asser the conversation manager was initialized properly
+    # Assert the conversation manager was initialized properly
     assert agent.conversation_manager.removed_message_count == agent_2.conversation_manager.removed_message_count
 
 
diff --git a/tests/strands/agent/test_conversation_manager.py b/tests/strands/agent/test_conversation_manager.py
index fd88954e8..6db9897f1 100644
--- a/tests/strands/agent/test_conversation_manager.py
+++ b/tests/strands/agent/test_conversation_manager.py
@@ -78,6 +78,7 @@ def conversation_manager(request):
             ],
         ),
         # 5 - Remove dangling assistant message with tool use and user message without tool result
+        # Must start with a user message, so we skip the assistant message
         (
             {"window_size": 3},
             [
@@ -87,7 +88,6 @@ def conversation_manager(request):
                 {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
             ],
             [
-                {"role": "assistant", "content": [{"text": "First response"}]},
                 {"role": "user", "content": [{"text": "Use a tool"}]},
                 {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
             ],
@@ -107,19 +107,22 @@ def conversation_manager(request):
             ],
         ),
         # 7 - Message count above max window size - Preserve tool use/tool result pairs
+        # Cannot start with assistant or orphaned toolResult, so trim advances to next plain user message
         (
             {"window_size": 2},
             [
-                {"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]},
+                {"role": "user", "content": [{"text": "Hello"}]},
                 {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
-                {"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]},
+                {"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]},
+                {"role": "assistant", "content": [{"text": "Done"}]},
+                {"role": "user", "content": [{"text": "Next"}]},
             ],
             [
-                {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
-                {"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]},
+                {"role": "user", "content": [{"text": "Next"}]},
             ],
         ),
         # 8 - Test sliding window behavior - preserve tool use/result pairs across cut boundary
+        # Must start with user message (not assistant, not orphaned toolResult), so trim advances to plain user msg
         (
             {"window_size": 3},
             [
@@ -127,14 +130,14 @@ def conversation_manager(request):
                 {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
                 {"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]},
                 {"role": "assistant", "content": [{"text": "Response after tool use"}]},
+                {"role": "user", "content": [{"text": "Follow up"}]},
             ],
             [
-                {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
-                {"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]},
-                {"role": "assistant", "content": [{"text": "Response after tool use"}]},
+                {"role": "user", "content": [{"text": "Follow up"}]},
             ],
         ),
         # 9 - Test sliding window with multiple tool pairs that need preservation
+        # Must start with user message; orphaned toolResult is skipped, lands on plain user text
         (
             {"window_size": 4},
             [
@@ -144,11 +147,10 @@ def conversation_manager(request):
                 {"role": "assistant", "content": [{"toolUse": {"toolUseId": "456", "name": "tool2", "input": {}}}]},
                 {"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]},
                 {"role": "assistant", "content": [{"text": "Final response"}]},
+                {"role": "user", "content": [{"text": "Another question"}]},
             ],
             [
-                {"role": "assistant", "content": [{"toolUse": {"toolUseId": "456", "name": "tool2", "input": {}}}]},
-                {"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]},
-                {"role": "assistant", "content": [{"text": "Final response"}]},
+                {"role": "user", "content": [{"text": "Another question"}]},
             ],
         ),
     ],
@@ -161,6 +163,43 @@ def test_apply_management(conversation_manager, messages, expected_messages):
     assert messages == expected_messages
 
 
+def test_sliding_window_forces_user_message_start():
+    """Test that trimmed conversation always starts with a user message (GitHub #2085)."""
+    manager = SlidingWindowConversationManager(window_size=3, should_truncate_results=False)
+    messages = [
+        {"role": "user", "content": [{"text": "Hello"}]},
+        {"role": "assistant", "content": [{"text": "Hi"}]},
+        {"role": "user", "content": [{"text": "How are you?"}]},
+        {"role": "assistant", "content": [{"text": "Good"}]},
+        {"role": "user", "content": [{"text": "Great"}]},
+    ]
+    test_agent = Agent(messages=messages)
+    manager.apply_management(test_agent)
+
+    assert len(messages) == 3
+    assert messages[0]["role"] == "user"
+    assert messages[0]["content"] == [{"text": "How are you?"}]
+
+
+def test_sliding_window_happy_path_preserves_window_size():
+    """In a typical user/assistant conversation, trimming preserves close to window_size messages."""
+    manager = SlidingWindowConversationManager(window_size=4, should_truncate_results=False)
+    messages = [
+        {"role": "user", "content": [{"text": "First"}]},
+        {"role": "assistant", "content": [{"text": "First response"}]},
+        {"role": "user", "content": [{"text": "Second"}]},
+        {"role": "assistant", "content": [{"text": "Second response"}]},
+        {"role": "user", "content": [{"text": "Third"}]},
+        {"role": "assistant", "content": [{"text": "Third response"}]},
+    ]
+    test_agent = Agent(messages=messages)
+    manager.apply_management(test_agent)
+
+    assert len(messages) == 4
+    assert messages[0]["role"] == "user"
+    assert messages[0]["content"] == [{"text": "Second"}]
+
+
 def test_sliding_window_conversation_manager_with_untrimmable_history_raises_context_window_overflow_exception():
     manager = SlidingWindowConversationManager(1, False)
     messages = [
@@ -171,7 +210,22 @@ def test_sliding_window_conversation_manager_with_untrimmable_history_raises_con
     test_agent = Agent(messages=messages)
 
     with pytest.raises(ContextWindowOverflowException):
-        manager.apply_management(test_agent)
+        manager.reduce_context(test_agent, e=RuntimeError("context overflow"))
+
+    assert messages == original_messages
+
+
+def test_sliding_window_no_valid_trim_point_without_error_does_not_raise():
+    """When no valid trim point exists during routine management (no error), messages are left unchanged."""
+    manager = SlidingWindowConversationManager(1, False)
+    messages = [
+        {"role": "assistant", "content": [{"toolUse": {"toolUseId": "456", "name": "tool1", "input": {}}}]},
+        {"role": "user", "content": [{"toolResult": {"toolUseId": "789", "content": [], "status": "success"}}]},
+    ]
+    original_messages = messages.copy()
+    test_agent = Agent(messages=messages)
+
+    manager.apply_management(test_agent)
 
     assert messages == original_messages
 
diff --git a/tests/strands/models/test_anthropic.py b/tests/strands/models/test_anthropic.py
index 8f4581655..d1f1df3b3 100644
--- a/tests/strands/models/test_anthropic.py
+++ b/tests/strands/models/test_anthropic.py
@@ -982,9 +982,7 @@ def model_dump_with_warning():
         events = await alist(response)
 
     # Verify no Pydantic serialization warnings were emitted
-    pydantic_warnings = [
-        w for w in caught_warnings if "PydanticSerializationUnexpectedValue" in str(w.message)
-    ]
+    pydantic_warnings = [w for w in caught_warnings if "PydanticSerializationUnexpectedValue" in str(w.message)]
     assert len(pydantic_warnings) == 0, f"Unexpected Pydantic warnings: {pydantic_warnings}"
 
     # Verify the message_stop event was still processed correctly