Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion sentry_sdk/scrubber.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@
"remote_addr",
]

# Fields that must be removed entirely rather than replaced with [Filtered],
# because their value must conform to a specific format (e.g. a valid IP address)
# and [Filtered] would be a protocol violation.
DEFAULT_REMOVE_LIST = [
"ip_address",
]


class EventScrubber:
def __init__(
Expand All @@ -65,6 +72,7 @@ def __init__(
recursive: bool = False,
send_default_pii: bool = False,
pii_denylist: "Optional[List[str]]" = None,
remove_list: "Optional[List[str]]" = None,
) -> None:
"""
A scrubber that goes through the event payload and removes sensitive data configured through denylists.
Expand All @@ -73,6 +81,10 @@ def __init__(
:param recursive: Whether to scrub the event payload recursively, default False.
:param send_default_pii: Whether pii is sending is on, pii fields are not scrubbed.
:param pii_denylist: The denylist to use for scrubbing when pii is not sent, defaults to DEFAULT_PII_DENYLIST.
:param remove_list: Keys that should be removed entirely instead of being replaced
with ``[Filtered]``. This is necessary for fields like ``ip_address`` whose
values must conform to a specific format; replacing them with ``[Filtered]``
would be a protocol violation. Defaults to DEFAULT_REMOVE_LIST.
"""
self.denylist = DEFAULT_DENYLIST.copy() if denylist is None else denylist

Expand All @@ -85,6 +97,11 @@ def __init__(
self.denylist = [x.lower() for x in self.denylist]
self.recursive = recursive

self.remove_list = (
DEFAULT_REMOVE_LIST.copy() if remove_list is None else remove_list
)
self.remove_list = [x.lower() for x in self.remove_list]

def scrub_list(self, lst: object) -> None:
"""
If a list is passed to this method, the method recursively searches the list and any
Expand All @@ -109,15 +126,22 @@ def scrub_dict(self, d: object) -> None:
if not isinstance(d, dict):
return

keys_to_remove = []
for k, v in d.items():
# The cast is needed because mypy is not smart enough to figure out that k must be a
# string after the isinstance check.
if isinstance(k, str) and k.lower() in self.denylist:
d[k] = AnnotatedValue.substituted_because_contains_sensitive_data()
if k.lower() in self.remove_list:
keys_to_remove.append(k)
else:
d[k] = AnnotatedValue.substituted_because_contains_sensitive_data()
elif self.recursive:
self.scrub_dict(v) # no-op unless v is a dict
self.scrub_list(v) # no-op unless v is a list

for k in keys_to_remove:
del d[k]

def scrub_request(self, event: "Event") -> None:
with capture_internal_exceptions():
if "request" in event:
Expand Down
82 changes: 79 additions & 3 deletions tests/test_scrubber.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from sentry_sdk import capture_exception, capture_event, start_transaction, start_span
from sentry_sdk.utils import event_from_exception
from sentry_sdk.scrubber import EventScrubber
from sentry_sdk.scrubber import EventScrubber, DEFAULT_REMOVE_LIST
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused import of DEFAULT_REMOVE_LIST in tests

Low Severity

DEFAULT_REMOVE_LIST is imported but never referenced in any test function. It was likely intended for use in a test (e.g., to build a custom list via DEFAULT_REMOVE_LIST + [...]) but ended up unused. This is dead code introduced by this PR.

Fix in Cursor Fix in Web

from tests.conftest import ApproxDict


Expand Down Expand Up @@ -46,17 +46,19 @@ def test_request_scrubbing(sentry_init, capture_events):
"COOKIE": "[Filtered]",
"authorization": "[Filtered]",
"ORIGIN": "google.com",
"ip_address": "[Filtered]",
},
"cookies": {"sessionid": "[Filtered]", "foo": "bar"},
"data": {"token": "[Filtered]", "foo": "bar"},
}

# ip_address is removed entirely (not replaced with [Filtered]) to avoid
# a protocol violation, so it should not appear in the headers or _meta.
assert "ip_address" not in event["request"]["headers"]

assert event["_meta"]["request"] == {
"headers": {
"COOKIE": {"": {"rem": [["!config", "s"]]}},
"authorization": {"": {"rem": [["!config", "s"]]}},
"ip_address": {"": {"rem": [["!config", "s"]]}},
},
"cookies": {"sessionid": {"": {"rem": [["!config", "s"]]}}},
"data": {"token": {"": {"rem": [["!config", "s"]]}}},
Expand Down Expand Up @@ -248,3 +250,77 @@ def test_recursive_scrubber_does_not_override_original(sentry_init, capture_even
(frame,) = frames
assert data["csrf"] == "secret"
assert frame["vars"]["data"]["csrf"] == "[Filtered]"


def test_user_ip_address_removed(sentry_init, capture_events):
"""ip_address in user dict must be removed entirely, not replaced with
[Filtered], because [Filtered] is not a valid IP and causes a protocol
violation on the server side. See GH-5701."""
sentry_init()
events = capture_events()

capture_event(
{
"message": "hi",
"user": {"id": "1", "ip_address": "127.0.0.1"},
}
)

(event,) = events
assert "ip_address" not in event["user"]
assert event["user"]["id"] == "1"


def test_user_ip_address_not_removed_when_pii_enabled(sentry_init, capture_events):
"""When send_default_pii=True, ip_address should not be scrubbed at all."""
sentry_init(send_default_pii=True)
events = capture_events()

capture_event(
{
"message": "hi",
"user": {"id": "1", "ip_address": "127.0.0.1"},
}
)

(event,) = events
assert event["user"]["ip_address"] == "127.0.0.1"


def test_custom_remove_list(sentry_init, capture_events):
"""Users can configure which keys are removed rather than replaced."""
sentry_init(
event_scrubber=EventScrubber(remove_list=["token"]),
)
events = capture_events()

capture_event(
{
"message": "hi",
"extra": {"token": "secret", "safe": "keep"},
}
)

(event,) = events
# "token" is in the denylist AND the remove_list, so it gets deleted
assert "token" not in event["extra"]
assert event["extra"]["safe"] == "keep"


def test_empty_remove_list_replaces_ip_address(sentry_init, capture_events):
"""Setting remove_list=[] restores the old replace-with-[Filtered] behavior."""
sentry_init(
event_scrubber=EventScrubber(remove_list=[]),
)
events = capture_events()

capture_event(
{
"message": "hi",
"user": {"id": "1", "ip_address": "127.0.0.1"},
}
)

(event,) = events
# With an empty remove_list, ip_address is replaced, not removed
assert event["user"]["ip_address"] == "[Filtered]"
Loading