Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,9 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A
**kwargs: Additional keyword arguments for future extensibility.

Raises:
ContextWindowOverflowException: If the context cannot be reduced further.
Such as when the conversation is already minimal or when tool result messages cannot be properly
converted.
ContextWindowOverflowException: If the context cannot be reduced further and a context overflow
error was provided (e is not None). When called during routine window management (e is None),
logs a warning and returns without modification.
"""
messages = agent.messages

Expand All @@ -188,24 +188,43 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A
# If the number of messages is less than the window_size, then we default to 2, otherwise, trim to window size
trim_index = 2 if len(messages) <= self.window_size else len(messages) - self.window_size

# Find the next valid trim_index
# Find the next valid trim point that:
# 1. Starts with a user message (required by most model providers)
# 2. Does not start with an orphaned toolResult
# 3. Does not start with a toolUse unless its toolResult immediately follows
while trim_index < len(messages):
# Must start with a user message
if messages[trim_index]["role"] != "user":
trim_index += 1
continue

if (
# Oldest message cannot be a toolResult because it needs a toolUse preceding it
any("toolResult" in content for content in messages[trim_index]["content"])
or (
# Oldest message can be a toolUse only if a toolResult immediately follows it.
# Note: toolUse content normally appears only in assistant messages, but this
# check is kept as a defensive safeguard for non-standard message formats.
any("toolUse" in content for content in messages[trim_index]["content"])
and trim_index + 1 < len(messages)
and not any("toolResult" in content for content in messages[trim_index + 1]["content"])
and not (
trim_index + 1 < len(messages)
and any("toolResult" in content for content in messages[trim_index + 1]["content"])
)
)
):
trim_index += 1
else:
break
else:
# If we didn't find a valid trim_index, then we throw
raise ContextWindowOverflowException("Unable to trim conversation context!") from e
# If we didn't find a valid trim_index
if e is not None:
raise ContextWindowOverflowException("Unable to trim conversation context!") from e
logger.warning(
"window_size=<%s>, message_count=<%s> | unable to trim conversation context, no valid trim point found",
self.window_size,
len(messages),
)
return

# trim_index represents the number of messages being removed from the agents messages array
self.removed_message_count += trim_index
Expand Down
10 changes: 6 additions & 4 deletions src/strands/models/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,10 +410,12 @@ async def stream(
if event.type == "message_stop":
# Build dict directly to avoid Pydantic serialization warnings
# when the message contains ParsedTextBlock objects (issue #1746)
yield self.format_chunk({
"type": "message_stop",
"message": {"stop_reason": event.message.stop_reason},
})
yield self.format_chunk(
{
"type": "message_stop",
"message": {"stop_reason": event.message.stop_reason},
}
)
else:
yield self.format_chunk(event.model_dump())

Expand Down
25 changes: 18 additions & 7 deletions tests/strands/agent/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -1615,10 +1615,15 @@ def test_agent_restored_from_session_management_with_correct_index():


def test_agent_with_session_and_conversation_manager():
mock_model = MockedModelProvider([{"role": "assistant", "content": [{"text": "hello!"}]}])
mock_model = MockedModelProvider(
[
{"role": "assistant", "content": [{"text": "first"}]},
{"role": "assistant", "content": [{"text": "second"}]},
]
)
mock_session_repository = MockedSessionRepository()
session_manager = RepositorySessionManager(session_id="123", session_repository=mock_session_repository)
conversation_manager = SlidingWindowConversationManager(window_size=1)
conversation_manager = SlidingWindowConversationManager(window_size=2)
# Create an agent with a mocked model and session repository
agent = Agent(
session_manager=session_manager,
Expand All @@ -1633,22 +1638,28 @@ def test_agent_with_session_and_conversation_manager():

agent("Hello!")

# After invoking, assert that the messages were persisted
# After first invocation: [user, assistant] — fits in window, no trimming
assert len(mock_session_repository.list_messages("123", agent.agent_id)) == 2
# Assert conversation manager reduced the messages
assert len(agent.messages) == 1
assert len(agent.messages) == 2

agent("Second question")

# After second invocation: [user, assistant, user, assistant] exceeds window_size=2
# Conversation manager trims to 2 messages starting with a user message
assert len(agent.messages) == 2
assert agent.messages[0]["role"] == "user"

# Initialize another agent using the same session
session_manager_2 = RepositorySessionManager(session_id="123", session_repository=mock_session_repository)
conversation_manager_2 = SlidingWindowConversationManager(window_size=1)
conversation_manager_2 = SlidingWindowConversationManager(window_size=2)
agent_2 = Agent(
session_manager=session_manager_2,
conversation_manager=conversation_manager_2,
model=mock_model,
)
# Assert that the second agent was initialized properly, and that the messages of both agents are equal
assert agent.messages == agent_2.messages
# Asser the conversation manager was initialized properly
# Assert the conversation manager was initialized properly
assert agent.conversation_manager.removed_message_count == agent_2.conversation_manager.removed_message_count


Expand Down
78 changes: 66 additions & 12 deletions tests/strands/agent/test_conversation_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def conversation_manager(request):
],
),
# 5 - Remove dangling assistant message with tool use and user message without tool result
# Must start with a user message, so we skip the assistant message
(
{"window_size": 3},
[
Expand All @@ -87,7 +88,6 @@ def conversation_manager(request):
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
],
[
{"role": "assistant", "content": [{"text": "First response"}]},
{"role": "user", "content": [{"text": "Use a tool"}]},
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
],
Expand All @@ -107,34 +107,37 @@ def conversation_manager(request):
],
),
# 7 - Message count above max window size - Preserve tool use/tool result pairs
# Cannot start with assistant or orphaned toolResult, so trim advances to next plain user message
(
{"window_size": 2},
[
{"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]},
{"role": "user", "content": [{"text": "Hello"}]},
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
{"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]},
{"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]},
{"role": "assistant", "content": [{"text": "Done"}]},
{"role": "user", "content": [{"text": "Next"}]},
],
[
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
{"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]},
{"role": "user", "content": [{"text": "Next"}]},
],
),
# 8 - Test sliding window behavior - preserve tool use/result pairs across cut boundary
# Must start with user message (not assistant, not orphaned toolResult), so trim advances to plain user msg
(
{"window_size": 3},
[
{"role": "user", "content": [{"text": "First message"}]},
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
{"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]},
{"role": "assistant", "content": [{"text": "Response after tool use"}]},
{"role": "user", "content": [{"text": "Follow up"}]},
],
[
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "tool1", "input": {}}}]},
{"role": "user", "content": [{"toolResult": {"toolUseId": "123", "content": [], "status": "success"}}]},
{"role": "assistant", "content": [{"text": "Response after tool use"}]},
{"role": "user", "content": [{"text": "Follow up"}]},
],
),
# 9 - Test sliding window with multiple tool pairs that need preservation
# Must start with user message; orphaned toolResult is skipped, lands on plain user text
(
{"window_size": 4},
[
Expand All @@ -144,11 +147,10 @@ def conversation_manager(request):
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "456", "name": "tool2", "input": {}}}]},
{"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]},
{"role": "assistant", "content": [{"text": "Final response"}]},
{"role": "user", "content": [{"text": "Another question"}]},
],
[
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "456", "name": "tool2", "input": {}}}]},
{"role": "user", "content": [{"toolResult": {"toolUseId": "456", "content": [], "status": "success"}}]},
{"role": "assistant", "content": [{"text": "Final response"}]},
{"role": "user", "content": [{"text": "Another question"}]},
],
),
],
Expand All @@ -161,6 +163,43 @@ def test_apply_management(conversation_manager, messages, expected_messages):
assert messages == expected_messages


def test_sliding_window_forces_user_message_start():
"""Test that trimmed conversation always starts with a user message (GitHub #2085)."""
manager = SlidingWindowConversationManager(window_size=3, should_truncate_results=False)
messages = [
{"role": "user", "content": [{"text": "Hello"}]},
{"role": "assistant", "content": [{"text": "Hi"}]},
{"role": "user", "content": [{"text": "How are you?"}]},
{"role": "assistant", "content": [{"text": "Good"}]},
{"role": "user", "content": [{"text": "Great"}]},
]
test_agent = Agent(messages=messages)
manager.apply_management(test_agent)

assert len(messages) == 3
assert messages[0]["role"] == "user"
assert messages[0]["content"] == [{"text": "How are you?"}]


def test_sliding_window_happy_path_preserves_window_size():
"""In a typical user/assistant conversation, trimming preserves close to window_size messages."""
manager = SlidingWindowConversationManager(window_size=4, should_truncate_results=False)
messages = [
{"role": "user", "content": [{"text": "First"}]},
{"role": "assistant", "content": [{"text": "First response"}]},
{"role": "user", "content": [{"text": "Second"}]},
{"role": "assistant", "content": [{"text": "Second response"}]},
{"role": "user", "content": [{"text": "Third"}]},
{"role": "assistant", "content": [{"text": "Third response"}]},
]
test_agent = Agent(messages=messages)
manager.apply_management(test_agent)

assert len(messages) == 4
assert messages[0]["role"] == "user"
assert messages[0]["content"] == [{"text": "Second"}]


def test_sliding_window_conversation_manager_with_untrimmable_history_raises_context_window_overflow_exception():
manager = SlidingWindowConversationManager(1, False)
messages = [
Expand All @@ -171,7 +210,22 @@ def test_sliding_window_conversation_manager_with_untrimmable_history_raises_con
test_agent = Agent(messages=messages)

with pytest.raises(ContextWindowOverflowException):
manager.apply_management(test_agent)
manager.reduce_context(test_agent, e=RuntimeError("context overflow"))

assert messages == original_messages


def test_sliding_window_no_valid_trim_point_without_error_does_not_raise():
"""When no valid trim point exists during routine management (no error), messages are left unchanged."""
manager = SlidingWindowConversationManager(1, False)
messages = [
{"role": "assistant", "content": [{"toolUse": {"toolUseId": "456", "name": "tool1", "input": {}}}]},
{"role": "user", "content": [{"toolResult": {"toolUseId": "789", "content": [], "status": "success"}}]},
]
original_messages = messages.copy()
test_agent = Agent(messages=messages)

manager.apply_management(test_agent)

assert messages == original_messages

Expand Down
4 changes: 1 addition & 3 deletions tests/strands/models/test_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,9 +982,7 @@ def model_dump_with_warning():
events = await alist(response)

# Verify no Pydantic serialization warnings were emitted
pydantic_warnings = [
w for w in caught_warnings if "PydanticSerializationUnexpectedValue" in str(w.message)
]
pydantic_warnings = [w for w in caught_warnings if "PydanticSerializationUnexpectedValue" in str(w.message)]
assert len(pydantic_warnings) == 0, f"Unexpected Pydantic warnings: {pydantic_warnings}"

# Verify the message_stop event was still processed correctly
Expand Down
Loading