From 9b9f3725ca15b0b5a045aa522f90e1507331f4e1 Mon Sep 17 00:00:00 2001 From: Jack Yuan Date: Tue, 7 Apr 2026 16:17:27 -0400 Subject: [PATCH] fix:address comments --- .../sliding_window_conversation_manager.py | 35 +++++++-- src/strands/models/anthropic.py | 10 ++- tests/strands/agent/test_agent.py | 25 ++++-- .../agent/test_conversation_manager.py | 78 ++++++++++++++++--- tests/strands/models/test_anthropic.py | 4 +- 5 files changed, 118 insertions(+), 34 deletions(-) diff --git a/src/strands/agent/conversation_manager/sliding_window_conversation_manager.py b/src/strands/agent/conversation_manager/sliding_window_conversation_manager.py index b97de0b06..94446380b 100644 --- a/src/strands/agent/conversation_manager/sliding_window_conversation_manager.py +++ b/src/strands/agent/conversation_manager/sliding_window_conversation_manager.py @@ -167,9 +167,9 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A **kwargs: Additional keyword arguments for future extensibility. Raises: - ContextWindowOverflowException: If the context cannot be reduced further. - Such as when the conversation is already minimal or when tool result messages cannot be properly - converted. + ContextWindowOverflowException: If the context cannot be reduced further and a context overflow + error was provided (e is not None). When called during routine window management (e is None), + logs a warning and returns without modification. """ messages = agent.messages @@ -188,24 +188,43 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A # If the number of messages is less than the window_size, then we default to 2, otherwise, trim to window size trim_index = 2 if len(messages) <= self.window_size else len(messages) - self.window_size - # Find the next valid trim_index + # Find the next valid trim point that: + # 1. Starts with a user message (required by most model providers) + # 2. Does not start with an orphaned toolResult + # 3. Does not start with a toolUse unless its toolResult immediately follows while trim_index < len(messages): + # Must start with a user message + if messages[trim_index]["role"] != "user": + trim_index += 1 + continue + if ( # Oldest message cannot be a toolResult because it needs a toolUse preceding it any("toolResult" in content for content in messages[trim_index]["content"]) or ( # Oldest message can be a toolUse only if a toolResult immediately follows it. + # Note: toolUse content normally appears only in assistant messages, but this + # check is kept as a defensive safeguard for non-standard message formats. any("toolUse" in content for content in messages[trim_index]["content"]) - and trim_index + 1 < len(messages) - and not any("toolResult" in content for content in messages[trim_index + 1]["content"]) + and not ( + trim_index + 1 < len(messages) + and any("toolResult" in content for content in messages[trim_index + 1]["content"]) + ) ) ): trim_index += 1 else: break else: - # If we didn't find a valid trim_index, then we throw - raise ContextWindowOverflowException("Unable to trim conversation context!") from e + # If we didn't find a valid trim_index + if e is not None: + raise ContextWindowOverflowException("Unable to trim conversation context!") from e + logger.warning( + "window_size=<%s>, message_count=<%s> | unable to trim conversation context, no valid trim point found", + self.window_size, + len(messages), + ) + return # trim_index represents the number of messages being removed from the agents messages array self.removed_message_count += trim_index diff --git a/src/strands/models/anthropic.py b/src/strands/models/anthropic.py index f0be79bdd..6195f9ccd 100644 --- a/src/strands/models/anthropic.py +++ b/src/strands/models/anthropic.py @@ -410,10 +410,12 @@ async def stream( if event.type == "message_stop": # Build dict directly to avoid Pydantic serialization warnings # when the message contains ParsedTextBlock objects (issue #1746) - yield self.format_chunk({ - "type": "message_stop", - "message": {"stop_reason": event.message.stop_reason}, - }) + yield self.format_chunk( + { + "type": "message_stop", + "message": {"stop_reason": event.message.stop_reason}, + } + ) else: yield self.format_chunk(event.model_dump()) diff --git a/tests/strands/agent/test_agent.py b/tests/strands/agent/test_agent.py index 5a3cce11c..0057c50a3 100644 --- a/tests/strands/agent/test_agent.py +++ b/tests/strands/agent/test_agent.py @@ -1615,10 +1615,15 @@ def test_agent_restored_from_session_management_with_correct_index(): def test_agent_with_session_and_conversation_manager(): - mock_model = MockedModelProvider([{"role": "assistant", "content": [{"text": "hello!"}]}]) + mock_model = MockedModelProvider( + [ + {"role": "assistant", "content": [{"text": "first"}]}, + {"role": "assistant", "content": [{"text": "second"}]}, + ] + ) mock_session_repository = MockedSessionRepository() session_manager = RepositorySessionManager(session_id="123", session_repository=mock_session_repository) - conversation_manager = SlidingWindowConversationManager(window_size=1) + conversation_manager = SlidingWindowConversationManager(window_size=2) # Create an agent with a mocked model and session repository agent = Agent( session_manager=session_manager, @@ -1633,14 +1638,20 @@ def test_agent_with_session_and_conversation_manager(): agent("Hello!") - # After invoking, assert that the messages were persisted + # After first invocation: [user, assistant] — fits in window, no trimming assert len(mock_session_repository.list_messages("123", agent.agent_id)) == 2 - # Assert conversation manager reduced the messages - assert len(agent.messages) == 1 + assert len(agent.messages) == 2 + + agent("Second question") + + # After second invocation: [user, assistant, user, assistant] exceeds window_size=2 + # Conversation manager trims to 2 messages starting with a user message + assert len(agent.messages) == 2 + assert agent.messages[0]["role"] == "user" # Initialize another agent using the same session session_manager_2 = RepositorySessionManager(session_id="123", session_repository=mock_session_repository) - conversation_manager_2 = SlidingWindowConversationManager(window_size=1) + conversation_manager_2 = SlidingWindowConversationManager(window_size=2) agent_2 = Agent( session_manager=session_manager_2, conversation_manager=conversation_manager_2, @@ -1648,7 +1659,7 @@ def test_agent_with_session_and_conversation_manager(): ) # Assert that the second agent was initialized properly, and that the messages of both agents are equal assert agent.messages == agent_2.messages - # Asser the conversation manager was initialized properly + # Assert the conversation manager was initialized properly assert agent.conversation_manager.removed_message_count == agent_2.conversation_manager.removed_message_count diff --git a/tests/strands/agent/test_conversation_manager.py b/tests/strands/agent/test_conversation_manager.py index fd88954e8..6db9897f1 100644 --- a/tests/strands/agent/test_conversation_manager.py +++ b/tests/strands/agent/test_conversation_manager.py @@ -78,6 +78,7 @@ def conversation_manager(request): ], ), # 5 - Remove dangling assistant message with tool use and user message without tool result + # Must start with a user message, so we skip the assistant message ( {"window_size": 3}, [ @@ -87,7 +88,6 @@ def conversation_manager(request): {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]}, ], [ - {"role": "assistant", "content": [{"text": "First response"}]}, {"role": "user", "content": [{"text": "Use a tool"}]}, {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]}, ], @@ -107,19 +107,22 @@ def conversation_manager(request): ], ), # 7 - Message count above max window size - Preserve tool use/tool result pairs + # Cannot start with assistant or orphaned toolResult, so trim advances to next plain user message ( {"window_size": 2}, [ - {"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]}, + {"role": "user", "content": [{"text": "Hello"}]}, {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]}, - {"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]}, + {"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]}, + {"role": "assistant", "content": [{"text": "Done"}]}, + {"role": "user", "content": [{"text": "Next"}]}, ], [ - {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]}, - {"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]}, + {"role": "user", "content": [{"text": "Next"}]}, ], ), # 8 - Test sliding window behavior - preserve tool use/result pairs across cut boundary + # Must start with user message (not assistant, not orphaned toolResult), so trim advances to plain user msg ( {"window_size": 3}, [ @@ -127,14 +130,14 @@ def conversation_manager(request): {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]}, {"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]}, {"role": "assistant", "content": [{"text": "Response after tool use"}]}, + {"role": "user", "content": [{"text": "Follow up"}]}, ], [ - {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]}, - {"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]}, - {"role": "assistant", "content": [{"text": "Response after tool use"}]}, + {"role": "user", "content": [{"text": "Follow up"}]}, ], ), # 9 - Test sliding window with multiple tool pairs that need preservation + # Must start with user message; orphaned toolResult is skipped, lands on plain user text ( {"window_size": 4}, [ @@ -144,11 +147,10 @@ def conversation_manager(request): {"role": "assistant", "content": [{"toolUse": {"toolUseId": "456", "name": "tool2", "input": {}}}]}, {"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]}, {"role": "assistant", "content": [{"text": "Final response"}]}, + {"role": "user", "content": [{"text": "Another question"}]}, ], [ - {"role": "assistant", "content": [{"toolUse": {"toolUseId": "456", "name": "tool2", "input": {}}}]}, - {"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]}, - {"role": "assistant", "content": [{"text": "Final response"}]}, + {"role": "user", "content": [{"text": "Another question"}]}, ], ), ], @@ -161,6 +163,43 @@ def test_apply_management(conversation_manager, messages, expected_messages): assert messages == expected_messages +def test_sliding_window_forces_user_message_start(): + """Test that trimmed conversation always starts with a user message (GitHub #2085).""" + manager = SlidingWindowConversationManager(window_size=3, should_truncate_results=False) + messages = [ + {"role": "user", "content": [{"text": "Hello"}]}, + {"role": "assistant", "content": [{"text": "Hi"}]}, + {"role": "user", "content": [{"text": "How are you?"}]}, + {"role": "assistant", "content": [{"text": "Good"}]}, + {"role": "user", "content": [{"text": "Great"}]}, + ] + test_agent = Agent(messages=messages) + manager.apply_management(test_agent) + + assert len(messages) == 3 + assert messages[0]["role"] == "user" + assert messages[0]["content"] == [{"text": "How are you?"}] + + +def test_sliding_window_happy_path_preserves_window_size(): + """In a typical user/assistant conversation, trimming preserves close to window_size messages.""" + manager = SlidingWindowConversationManager(window_size=4, should_truncate_results=False) + messages = [ + {"role": "user", "content": [{"text": "First"}]}, + {"role": "assistant", "content": [{"text": "First response"}]}, + {"role": "user", "content": [{"text": "Second"}]}, + {"role": "assistant", "content": [{"text": "Second response"}]}, + {"role": "user", "content": [{"text": "Third"}]}, + {"role": "assistant", "content": [{"text": "Third response"}]}, + ] + test_agent = Agent(messages=messages) + manager.apply_management(test_agent) + + assert len(messages) == 4 + assert messages[0]["role"] == "user" + assert messages[0]["content"] == [{"text": "Second"}] + + def test_sliding_window_conversation_manager_with_untrimmable_history_raises_context_window_overflow_exception(): manager = SlidingWindowConversationManager(1, False) messages = [ @@ -171,7 +210,22 @@ def test_sliding_window_conversation_manager_with_untrimmable_history_raises_con test_agent = Agent(messages=messages) with pytest.raises(ContextWindowOverflowException): - manager.apply_management(test_agent) + manager.reduce_context(test_agent, e=RuntimeError("context overflow")) + + assert messages == original_messages + + +def test_sliding_window_no_valid_trim_point_without_error_does_not_raise(): + """When no valid trim point exists during routine management (no error), messages are left unchanged.""" + manager = SlidingWindowConversationManager(1, False) + messages = [ + {"role": "assistant", "content": [{"toolUse": {"toolUseId": "456", "name": "tool1", "input": {}}}]}, + {"role": "user", "content": [{"toolResult": {"toolUseId": "789", "content": [], "status": "success"}}]}, + ] + original_messages = messages.copy() + test_agent = Agent(messages=messages) + + manager.apply_management(test_agent) assert messages == original_messages diff --git a/tests/strands/models/test_anthropic.py b/tests/strands/models/test_anthropic.py index 8f4581655..d1f1df3b3 100644 --- a/tests/strands/models/test_anthropic.py +++ b/tests/strands/models/test_anthropic.py @@ -982,9 +982,7 @@ def model_dump_with_warning(): events = await alist(response) # Verify no Pydantic serialization warnings were emitted - pydantic_warnings = [ - w for w in caught_warnings if "PydanticSerializationUnexpectedValue" in str(w.message) - ] + pydantic_warnings = [w for w in caught_warnings if "PydanticSerializationUnexpectedValue" in str(w.message)] assert len(pydantic_warnings) == 0, f"Unexpected Pydantic warnings: {pydantic_warnings}" # Verify the message_stop event was still processed correctly