diff --git a/.gitignore b/.gitignore index e11a772a8..141381915 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,9 @@ __pycache__/ # vscode .vscode/ + +# Claude Code +.claude/ dbt_internal_packages/ /package-lock.yml diff --git a/integration_tests/tests/adapter_query_runner.py b/integration_tests/tests/adapter_query_runner.py index 6ac9d96ff..c45cbb83a 100644 --- a/integration_tests/tests/adapter_query_runner.py +++ b/integration_tests/tests/adapter_query_runner.py @@ -52,9 +52,14 @@ def _serialize_value(val: Any) -> Any: * Everything else is returned unchanged. """ if isinstance(val, Decimal): - # Match the Jinja macro: normalize, then int or float + # Match the Jinja macro: normalize, then int or float. + # Note: for special values (Infinity, NaN), as_tuple().exponent is a + # string ('F' or 'n'), not an int — convert those directly to float. normalized = val.normalize() - if normalized.as_tuple().exponent >= 0: + exponent = normalized.as_tuple().exponent + if isinstance(exponent, str): + return float(normalized) + if exponent >= 0: return int(normalized) return float(normalized) if isinstance(val, (datetime, date, time)): diff --git a/macros/edr/materializations/test/test.sql b/macros/edr/materializations/test/test.sql index b9a167a72..71fe21d5f 100644 --- a/macros/edr/materializations/test/test.sql +++ b/macros/edr/materializations/test/test.sql @@ -75,8 +75,25 @@ {% set disable_test_samples = flattened_test["meta"]["disable_test_samples"] %} {% endif %} + {# + Sampling control precedence (highest to lowest): + 1. disable_test_samples meta flag — explicit per-test kill switch, always wins. + 2. show_sample_rows tag (model/test/column) — opt-in when + enable_samples_on_show_sample_rows_tags is true. If the tag is present, + skip all further checks and keep the sample_limit. + 3. enable_samples_on_show_sample_rows_tags — hide-by-default mode: if the + feature is on but no show_sample_rows tag was found, disable samples. + 4. PII tag detection (model/test/column) — hide when disable_samples_on_pii_tags + is true and a PII tag is detected at any level. + #} {% if disable_test_samples %} {% set sample_limit = 0 %} + {% elif elementary.should_show_sample_rows(flattened_test) %} + {# Tag explicitly opts in — keep sample_limit as-is #} + {% elif elementary.get_config_var("enable_samples_on_show_sample_rows_tags") %} + {# Feature is on but no show_sample_rows tag found — hide by default #} + {% set sample_limit = 0 %} {% elif elementary.is_pii_table(flattened_test) %} {% set sample_limit = 0 %} + {% elif elementary.is_pii_test(flattened_test) %} {% set sample_limit = 0 %} {% elif elementary.should_disable_sampling_for_pii(flattened_test) %} {% set sample_limit = 0 %} {% endif %} diff --git a/macros/edr/system/system_utils/get_config_var.sql b/macros/edr/system/system_utils/get_config_var.sql index fc3e1efb1..391ce25d4 100644 --- a/macros/edr/system/system_utils/get_config_var.sql +++ b/macros/edr/system/system_utils/get_config_var.sql @@ -143,6 +143,8 @@ "anomaly_exclude_metrics": none, "disable_samples_on_pii_tags": false, "pii_tags": ["pii"], + "enable_samples_on_show_sample_rows_tags": false, + "show_sample_rows_tags": ["show_sample_rows"], "bigquery_disable_partitioning": false, "bigquery_disable_clustering": false, "upload_only_current_project_artifacts": false, diff --git a/macros/edr/system/system_utils/get_pii_columns_from_parent_model.sql b/macros/edr/system/system_utils/get_pii_columns_from_parent_model.sql index bae1aea2a..0de2648bc 100644 --- a/macros/edr/system/system_utils/get_pii_columns_from_parent_model.sql +++ b/macros/edr/system/system_utils/get_pii_columns_from_parent_model.sql @@ -38,9 +38,36 @@ {% set column_nodes = parent_model.get("columns") %} {% if not column_nodes %} {% do return(pii_columns) %} {% endif %} + {# + A column tagged show_sample_rows (without pii) should still appear in samples + even when disable_samples_on_pii_tags is active — it is intentionally opted in. + We only skip it from the PII columns list if it does NOT also carry a PII tag, + since PII always takes precedence over show_sample_rows. + #} + {% set enable_show_tags = elementary.get_config_var( + "enable_samples_on_show_sample_rows_tags" + ) %} + {% set raw_show_tags = elementary.get_config_var("show_sample_rows_tags") %} + {% if raw_show_tags is string %} {% set show_tags = [raw_show_tags | lower] %} + {% else %} {% set show_tags = (raw_show_tags or []) | map("lower") | list %} + {% endif %} + {% for column_node in column_nodes.values() %} {% set all_column_tags_lower = elementary.get_column_tags(column_node) %} + {# Skip column from PII list only if show_sample_rows is set and pii is not #} + {% set has_show_tag = enable_show_tags and ( + elementary.lists_intersection(all_column_tags_lower, show_tags) + | length + > 0 + ) %} + {% set has_pii_tag = ( + elementary.lists_intersection(all_column_tags_lower, pii_tags) + | length + > 0 + ) %} + {% if has_show_tag and not has_pii_tag %} {% continue %} {% endif %} + {% for pii_tag in pii_tags %} {% if pii_tag in all_column_tags_lower %} {% do pii_columns.append(column_node.get("name")) %} {% break %} diff --git a/macros/edr/system/system_utils/is_pii_table.sql b/macros/edr/system/system_utils/is_pii_table.sql index 97c8819c3..850038e49 100644 --- a/macros/edr/system/system_utils/is_pii_table.sql +++ b/macros/edr/system/system_utils/is_pii_table.sql @@ -5,20 +5,16 @@ {% if not disable_samples_on_pii_tags %} {% do return(false) %} {% endif %} {% set raw_pii_tags = elementary.get_config_var("pii_tags") %} - {% set pii_tags = ( - (raw_pii_tags if raw_pii_tags is iterable else [raw_pii_tags]) - | map("lower") - | list - ) %} + {% if raw_pii_tags is string %} {% set pii_tags = [raw_pii_tags | lower] %} + {% else %} {% set pii_tags = (raw_pii_tags or []) | map("lower") | list %} + {% endif %} {% set raw_model_tags = elementary.insensitive_get_dict_value( flattened_test, "model_tags", [] ) %} - {% set model_tags = ( - (raw_model_tags if raw_model_tags is iterable else [raw_model_tags]) - | map("lower") - | list - ) %} + {% if raw_model_tags is string %} {% set model_tags = [raw_model_tags | lower] %} + {% else %} {% set model_tags = (raw_model_tags or []) | map("lower") | list %} + {% endif %} {% set intersection = elementary.lists_intersection(model_tags, pii_tags) %} {% set is_pii = intersection | length > 0 %} diff --git a/macros/edr/system/system_utils/is_pii_test.sql b/macros/edr/system/system_utils/is_pii_test.sql new file mode 100644 index 000000000..a050541eb --- /dev/null +++ b/macros/edr/system/system_utils/is_pii_test.sql @@ -0,0 +1,24 @@ +{# + Complements is_pii_table (model-level) and should_disable_sampling_for_pii + (column-level) by adding test-level PII tag support. A test tagged with a PII + tag will have its samples disabled, consistent with the other two levels. +#} +{% macro is_pii_test(flattened_test) %} + {% if not elementary.get_config_var("disable_samples_on_pii_tags") %} + {% do return(false) %} + {% endif %} + + {% set raw_pii_tags = elementary.get_config_var("pii_tags") %} + {% if raw_pii_tags is string %} {% set pii_tags = [raw_pii_tags | lower] %} + {% else %} {% set pii_tags = (raw_pii_tags or []) | map("lower") | list %} + {% endif %} + + {% set raw_test_tags = elementary.insensitive_get_dict_value( + flattened_test, "tags", [] + ) %} + {% if raw_test_tags is string %} {% set test_tags = [raw_test_tags | lower] %} + {% else %} {% set test_tags = (raw_test_tags or []) | map("lower") | list %} + {% endif %} + + {% do return(elementary.lists_intersection(test_tags, pii_tags) | length > 0) %} +{% endmacro %} diff --git a/macros/edr/system/system_utils/should_show_sample_rows.sql b/macros/edr/system/system_utils/should_show_sample_rows.sql new file mode 100644 index 000000000..5b84e38af --- /dev/null +++ b/macros/edr/system/system_utils/should_show_sample_rows.sql @@ -0,0 +1,112 @@ +{# + Inverse of PII protection: when enable_samples_on_show_sample_rows_tags is true, + samples are hidden by default and only shown when the show_sample_rows tag is present. + + Checks three levels in order: model → test → column (test's target column only). + Returns true if any level has a matching show_sample_rows tag. + + PII precedence: if disable_samples_on_pii_tags is also enabled and the model + or column has a PII tag, PII wins and this returns false. A model-level PII + tag blocks show_sample_rows at every level (model, test, and column). + + All tag matching is case-insensitive (tags are normalized to lowercase). +#} +{% macro should_show_sample_rows(flattened_test) %} + {% if not elementary.get_config_var("enable_samples_on_show_sample_rows_tags") %} + {% do return(false) %} + {% endif %} + + {% set raw_show_tags = elementary.get_config_var("show_sample_rows_tags") %} + {% if raw_show_tags is string %} {% set show_tags = [raw_show_tags | lower] %} + {% else %} {% set show_tags = (raw_show_tags or []) | map("lower") | list %} + {% endif %} + + {# + Resolve PII tags once upfront. We use `is string` (not `is iterable`) because + strings are iterable in Jinja — iterating a string gives individual characters. + #} + {% set check_pii = elementary.get_config_var("disable_samples_on_pii_tags") %} + {% if check_pii %} + {% set raw_pii_tags = elementary.get_config_var("pii_tags") %} + {% if raw_pii_tags is string %} {% set pii_tags = [raw_pii_tags | lower] %} + {% else %} {% set pii_tags = (raw_pii_tags or []) | map("lower") | list %} + {% endif %} + {% else %} {% set pii_tags = [] %} + {% endif %} + + {# Model-level: show_sample_rows on the model applies to all its tests #} + {% set raw_model_tags = elementary.insensitive_get_dict_value( + flattened_test, "model_tags", [] + ) %} + {% if raw_model_tags is string %} {% set model_tags = [raw_model_tags | lower] %} + {% else %} {% set model_tags = (raw_model_tags or []) | map("lower") | list %} + {% endif %} + {% if elementary.lists_intersection(model_tags, show_tags) | length > 0 %} + {# PII on the model takes precedence over show_sample_rows on the same model #} + {% if check_pii and elementary.lists_intersection( + model_tags, pii_tags + ) | length > 0 %} + {% do return(false) %} + {% endif %} + {% do return(true) %} + {% endif %} + + {# Test-level: show_sample_rows on the test definition itself #} + {% set raw_test_tags = elementary.insensitive_get_dict_value( + flattened_test, "tags", [] + ) %} + {% if raw_test_tags is string %} {% set test_tags = [raw_test_tags | lower] %} + {% else %} {% set test_tags = (raw_test_tags or []) | map("lower") | list %} + {% endif %} + {% if elementary.lists_intersection(test_tags, show_tags) | length > 0 %} + {# If the model itself is PII-tagged, respect that even for test-level overrides #} + {% if check_pii and elementary.lists_intersection( + model_tags, pii_tags + ) | length > 0 %} + {% do return(false) %} + {% endif %} + {% do return(true) %} + {% endif %} + + {# + Column-level: only checks the specific column the test targets (test_column_name), + not all columns on the model. This avoids showing samples for unrelated columns. + #} + {% set test_column_name = elementary.insensitive_get_dict_value( + flattened_test, "test_column_name" + ) %} + {% if test_column_name %} + {% set parent_model_unique_id = elementary.insensitive_get_dict_value( + flattened_test, "parent_model_unique_id" + ) %} + {% set parent_model = elementary.get_node(parent_model_unique_id) %} + {% if parent_model %} + {% set column_nodes = parent_model.get("columns", {}) %} + {% for col_name, col_node in column_nodes.items() %} + {% if col_name | lower == test_column_name | lower %} + {% set col_tags = elementary.get_column_tags(col_node) %} + {% if elementary.lists_intersection( + col_tags, show_tags + ) | length > 0 %} + {# PII on the column or model takes precedence over show_sample_rows #} + {% if check_pii and ( + elementary.lists_intersection(col_tags, pii_tags) + | length + > 0 + or elementary.lists_intersection( + model_tags, pii_tags + ) + | length + > 0 + ) %} + {% do return(false) %} + {% endif %} + {% do return(true) %} + {% endif %} + {% endif %} + {% endfor %} + {% endif %} + {% endif %} + + {% do return(false) %} +{% endmacro %}