Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ def _truncate_single_message_content_if_present(
return message


def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> int:
def _find_truncation_index(messages: "List[Any]", max_bytes: int) -> int:
"""
Find the index of the first message that would exceed the max bytes limit.
Compute the individual message sizes, and return the index of the first message from the back
Expand Down Expand Up @@ -668,10 +668,10 @@ def redact_blob_message_parts(


def truncate_messages_by_size(
messages: "List[Dict[str, Any]]",
messages: "List[Any]",
max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS,
) -> "Tuple[List[Dict[str, Any]], int]":
) -> "Tuple[List[Any], int]":
"""
Returns a truncated messages list, consisting of
- the last message, with its content truncated to `max_single_message_chars` characters,
Expand All @@ -682,18 +682,22 @@ def truncate_messages_by_size(
In the single message case, the serialized message size may exceed `max_bytes`, because
truncation is based only on character count in that case.
"""
serialized_json = json.dumps(messages, separators=(",", ":"))
normalized_messages = _normalize_data(messages, unpack=False)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normalization converts None values to string "None"

High Severity

Calling _normalize_data(messages, unpack=False) before truncation corrupts None values in message dicts. The _normalize_data fallback at line 488 converts any non-primitive (including None) to str(data), turning None into the string "None". This is very common in practice — OpenAI assistant messages with tool calls have "content": null. The existing parameterized test with content=None will also fail since it asserts result[0]["content"] is content.

Additional Locations (1)
Fix in Cursor Fix in Web

if not isinstance(normalized_messages, list):
normalized_messages = [normalized_messages]

serialized_json = json.dumps(normalized_messages, separators=(",", ":"))
current_size = len(serialized_json.encode("utf-8"))

if current_size <= max_bytes:
return messages, 0
return normalized_messages, 0

truncation_index = _find_truncation_index(messages, max_bytes)
if truncation_index < len(messages):
truncated_messages = messages[truncation_index:]
truncation_index = _find_truncation_index(normalized_messages, max_bytes)
if truncation_index < len(normalized_messages):
truncated_messages = normalized_messages[truncation_index:]
else:
truncation_index = len(messages) - 1
truncated_messages = messages[-1:]
truncation_index = len(normalized_messages) - 1
truncated_messages = normalized_messages[-1:]

if len(truncated_messages) == 1:
truncated_messages[0] = _truncate_single_message_content_if_present(
Expand Down
21 changes: 21 additions & 0 deletions tests/test_ai_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,27 @@ def test_single_message_truncation_list_content_multiple_text_parts(self):
# Second part gets truncated to 0 chars + ellipsis
assert parts[1]["text"] == "..."

def test_truncate_handles_pydantic_like_objects(self):
"""Pydantic-like SDK objects should be normalized before JSON sizing."""

class PydanticLike:
def __init__(self, payload):
self.payload = payload

def model_dump(self):
return self.payload

messages = [
{"role": "user", "content": "hello"},
PydanticLike({"type": "function_call", "name": "notify"}),
]

result, truncation_index = truncate_messages_by_size(messages, max_bytes=10_000)

assert truncation_index == 0
assert len(result) == 2
assert result[1] == {"type": "function_call", "name": "notify"}

@pytest.mark.parametrize("content", [None, 42, 3.14, True])
def test_single_message_truncation_non_str_non_list_content(self, content):
messages = [{"role": "user", "content": content}]
Expand Down
Loading