Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ __pycache__/

# vscode
.vscode/

# Claude Code
.claude/
dbt_internal_packages/

/package-lock.yml
9 changes: 7 additions & 2 deletions integration_tests/tests/adapter_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,14 @@ def _serialize_value(val: Any) -> Any:
* Everything else is returned unchanged.
"""
if isinstance(val, Decimal):
# Match the Jinja macro: normalize, then int or float
# Match the Jinja macro: normalize, then int or float.
# Note: for special values (Infinity, NaN), as_tuple().exponent is a
# string ('F' or 'n'), not an int — convert those directly to float.
normalized = val.normalize()
if normalized.as_tuple().exponent >= 0:
exponent = normalized.as_tuple().exponent
if isinstance(exponent, str):
return float(normalized)
if exponent >= 0:
return int(normalized)
return float(normalized)
if isinstance(val, (datetime, date, time)):
Expand Down
17 changes: 17 additions & 0 deletions macros/edr/materializations/test/test.sql
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,25 @@
{% set disable_test_samples = flattened_test["meta"]["disable_test_samples"] %}
{% endif %}

{#
Sampling control precedence (highest to lowest):
1. disable_test_samples meta flag — explicit per-test kill switch, always wins.
2. show_sample_rows tag (model/test/column) — opt-in when
enable_samples_on_show_sample_rows_tags is true. If the tag is present,
skip all further checks and keep the sample_limit.
3. enable_samples_on_show_sample_rows_tags — hide-by-default mode: if the
feature is on but no show_sample_rows tag was found, disable samples.
4. PII tag detection (model/test/column) — hide when disable_samples_on_pii_tags
is true and a PII tag is detected at any level.
#}
{% if disable_test_samples %} {% set sample_limit = 0 %}
{% elif elementary.should_show_sample_rows(flattened_test) %}
{# Tag explicitly opts in — keep sample_limit as-is #}
{% elif elementary.get_config_var("enable_samples_on_show_sample_rows_tags") %}
{# Feature is on but no show_sample_rows tag found — hide by default #}
{% set sample_limit = 0 %}
{% elif elementary.is_pii_table(flattened_test) %} {% set sample_limit = 0 %}
{% elif elementary.is_pii_test(flattened_test) %} {% set sample_limit = 0 %}
{% elif elementary.should_disable_sampling_for_pii(flattened_test) %}
{% set sample_limit = 0 %}
{% endif %}
Expand Down
2 changes: 2 additions & 0 deletions macros/edr/system/system_utils/get_config_var.sql
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@
"anomaly_exclude_metrics": none,
"disable_samples_on_pii_tags": false,
"pii_tags": ["pii"],
"enable_samples_on_show_sample_rows_tags": false,
"show_sample_rows_tags": ["show_sample_rows"],
"bigquery_disable_partitioning": false,
"bigquery_disable_clustering": false,
"upload_only_current_project_artifacts": false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,36 @@
{% set column_nodes = parent_model.get("columns") %}
{% if not column_nodes %} {% do return(pii_columns) %} {% endif %}

{#
A column tagged show_sample_rows (without pii) should still appear in samples
even when disable_samples_on_pii_tags is active — it is intentionally opted in.
We only skip it from the PII columns list if it does NOT also carry a PII tag,
since PII always takes precedence over show_sample_rows.
#}
{% set enable_show_tags = elementary.get_config_var(
"enable_samples_on_show_sample_rows_tags"
) %}
{% set raw_show_tags = elementary.get_config_var("show_sample_rows_tags") %}
{% if raw_show_tags is string %} {% set show_tags = [raw_show_tags | lower] %}
{% else %} {% set show_tags = (raw_show_tags or []) | map("lower") | list %}
{% endif %}

{% for column_node in column_nodes.values() %}
{% set all_column_tags_lower = elementary.get_column_tags(column_node) %}

{# Skip column from PII list only if show_sample_rows is set and pii is not #}
{% set has_show_tag = enable_show_tags and (
elementary.lists_intersection(all_column_tags_lower, show_tags)
| length
> 0
) %}
{% set has_pii_tag = (
elementary.lists_intersection(all_column_tags_lower, pii_tags)
| length
> 0
) %}
{% if has_show_tag and not has_pii_tag %} {% continue %} {% endif %}

{% for pii_tag in pii_tags %}
{% if pii_tag in all_column_tags_lower %}
{% do pii_columns.append(column_node.get("name")) %} {% break %}
Expand Down
16 changes: 6 additions & 10 deletions macros/edr/system/system_utils/is_pii_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,16 @@
{% if not disable_samples_on_pii_tags %} {% do return(false) %} {% endif %}

{% set raw_pii_tags = elementary.get_config_var("pii_tags") %}
{% set pii_tags = (
(raw_pii_tags if raw_pii_tags is iterable else [raw_pii_tags])
| map("lower")
| list
) %}
{% if raw_pii_tags is string %} {% set pii_tags = [raw_pii_tags | lower] %}
{% else %} {% set pii_tags = (raw_pii_tags or []) | map("lower") | list %}
{% endif %}

{% set raw_model_tags = elementary.insensitive_get_dict_value(
flattened_test, "model_tags", []
) %}
{% set model_tags = (
(raw_model_tags if raw_model_tags is iterable else [raw_model_tags])
| map("lower")
| list
) %}
{% if raw_model_tags is string %} {% set model_tags = [raw_model_tags | lower] %}
{% else %} {% set model_tags = (raw_model_tags or []) | map("lower") | list %}
{% endif %}

{% set intersection = elementary.lists_intersection(model_tags, pii_tags) %}
{% set is_pii = intersection | length > 0 %}
Expand Down
24 changes: 24 additions & 0 deletions macros/edr/system/system_utils/is_pii_test.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{#
Complements is_pii_table (model-level) and should_disable_sampling_for_pii
(column-level) by adding test-level PII tag support. A test tagged with a PII
tag will have its samples disabled, consistent with the other two levels.
#}
{% macro is_pii_test(flattened_test) %}
{% if not elementary.get_config_var("disable_samples_on_pii_tags") %}
{% do return(false) %}
{% endif %}

{% set raw_pii_tags = elementary.get_config_var("pii_tags") %}
{% if raw_pii_tags is string %} {% set pii_tags = [raw_pii_tags | lower] %}
{% else %} {% set pii_tags = (raw_pii_tags or []) | map("lower") | list %}
{% endif %}

{% set raw_test_tags = elementary.insensitive_get_dict_value(
flattened_test, "tags", []
) %}
{% if raw_test_tags is string %} {% set test_tags = [raw_test_tags | lower] %}
{% else %} {% set test_tags = (raw_test_tags or []) | map("lower") | list %}
{% endif %}

{% do return(elementary.lists_intersection(test_tags, pii_tags) | length > 0) %}
{% endmacro %}
112 changes: 112 additions & 0 deletions macros/edr/system/system_utils/should_show_sample_rows.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
{#
Inverse of PII protection: when enable_samples_on_show_sample_rows_tags is true,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Inverse of PII protection" can be removed

samples are hidden by default and only shown when the show_sample_rows tag is present.

Checks three levels in order: model → test → column (test's target column only).
Returns true if any level has a matching show_sample_rows tag.

PII precedence: if disable_samples_on_pii_tags is also enabled and the model
or column has a PII tag, PII wins and this returns false. A model-level PII
tag blocks show_sample_rows at every level (model, test, and column).

All tag matching is case-insensitive (tags are normalized to lowercase).
#}
{% macro should_show_sample_rows(flattened_test) %}
{% if not elementary.get_config_var("enable_samples_on_show_sample_rows_tags") %}
{% do return(false) %}
{% endif %}

{% set raw_show_tags = elementary.get_config_var("show_sample_rows_tags") %}
{% if raw_show_tags is string %} {% set show_tags = [raw_show_tags | lower] %}
{% else %} {% set show_tags = (raw_show_tags or []) | map("lower") | list %}
{% endif %}

{#
Resolve PII tags once upfront. We use `is string` (not `is iterable`) because
strings are iterable in Jinja — iterating a string gives individual characters.
#}
{% set check_pii = elementary.get_config_var("disable_samples_on_pii_tags") %}
{% if check_pii %}
{% set raw_pii_tags = elementary.get_config_var("pii_tags") %}
{% if raw_pii_tags is string %} {% set pii_tags = [raw_pii_tags | lower] %}
{% else %} {% set pii_tags = (raw_pii_tags or []) | map("lower") | list %}
{% endif %}
{% else %} {% set pii_tags = [] %}
{% endif %}

{# Model-level: show_sample_rows on the model applies to all its tests #}
{% set raw_model_tags = elementary.insensitive_get_dict_value(
flattened_test, "model_tags", []
) %}
{% if raw_model_tags is string %} {% set model_tags = [raw_model_tags | lower] %}
{% else %} {% set model_tags = (raw_model_tags or []) | map("lower") | list %}
{% endif %}
{% if elementary.lists_intersection(model_tags, show_tags) | length > 0 %}
{# PII on the model takes precedence over show_sample_rows on the same model #}
{% if check_pii and elementary.lists_intersection(
model_tags, pii_tags
) | length > 0 %}
{% do return(false) %}
{% endif %}
{% do return(true) %}
{% endif %}

{# Test-level: show_sample_rows on the test definition itself #}
{% set raw_test_tags = elementary.insensitive_get_dict_value(
flattened_test, "tags", []
) %}
{% if raw_test_tags is string %} {% set test_tags = [raw_test_tags | lower] %}
{% else %} {% set test_tags = (raw_test_tags or []) | map("lower") | list %}
{% endif %}
{% if elementary.lists_intersection(test_tags, show_tags) | length > 0 %}
{# If the model itself is PII-tagged, respect that even for test-level overrides #}
{% if check_pii and elementary.lists_intersection(
model_tags, pii_tags
) | length > 0 %}
{% do return(false) %}
{% endif %}
{% do return(true) %}
{% endif %}

{#
Column-level: only checks the specific column the test targets (test_column_name),
not all columns on the model. This avoids showing samples for unrelated columns.
#}
{% set test_column_name = elementary.insensitive_get_dict_value(
flattened_test, "test_column_name"
) %}
{% if test_column_name %}
{% set parent_model_unique_id = elementary.insensitive_get_dict_value(
flattened_test, "parent_model_unique_id"
) %}
{% set parent_model = elementary.get_node(parent_model_unique_id) %}
{% if parent_model %}
{% set column_nodes = parent_model.get("columns", {}) %}
{% for col_name, col_node in column_nodes.items() %}
{% if col_name | lower == test_column_name | lower %}
{% set col_tags = elementary.get_column_tags(col_node) %}
{% if elementary.lists_intersection(
col_tags, show_tags
) | length > 0 %}
{# PII on the column or model takes precedence over show_sample_rows #}
{% if check_pii and (
elementary.lists_intersection(col_tags, pii_tags)
| length
> 0
or elementary.lists_intersection(
model_tags, pii_tags
)
| length
> 0
) %}
{% do return(false) %}
{% endif %}
{% do return(true) %}
{% endif %}
{% endif %}
{% endfor %}
{% endif %}
{% endif %}

{% do return(false) %}
{% endmacro %}
Loading