From 07bce6e75559f750a8c7cf3caba86d440d4db4d4 Mon Sep 17 00:00:00 2001 From: Can Bekleyici Date: Fri, 27 Feb 2026 16:52:53 +0100 Subject: [PATCH 1/2] feat: support key-only databricks_tags Signed-off-by: Can Bekleyici --- .../relation_configs/column_tags.py | 4 +-- .../databricks/relation_configs/tags.py | 4 +-- .../databricks/macros/relations/tags.sql | 4 +-- .../adapter/column_tags/fixtures.py | 4 +-- .../adapter/column_tags/test_column_tags.py | 4 +-- tests/functional/adapter/tags/fixtures.py | 5 +-- .../adapter/tags/test_databricks_tags.py | 10 +++--- .../test_column_tags_config.py | 12 +++---- tests/unit/relation_configs/test_tags.py | 31 ++++++++++++++----- 9 files changed, 47 insertions(+), 31 deletions(-) diff --git a/dbt/adapters/databricks/relation_configs/column_tags.py b/dbt/adapters/databricks/relation_configs/column_tags.py index 93d5ea890..ec52840dc 100644 --- a/dbt/adapters/databricks/relation_configs/column_tags.py +++ b/dbt/adapters/databricks/relation_configs/column_tags.py @@ -55,7 +55,7 @@ def from_relation_results(cls, results: RelationResults) -> ColumnTagsConfig: # row contains [column_name, tag_name, tag_value] column_name = str(row[0]) tag_name = str(row[1]) - tag_value = str(row[2]) + tag_value = str(row[2] or "") if column_name not in set_column_tags: set_column_tags[column_name] = {} @@ -79,7 +79,7 @@ def from_relation_config(cls, relation_config: RelationConfig) -> ColumnTagsConf if databricks_tags: if isinstance(databricks_tags, dict): set_column_tags[col["name"]] = { - str(k): str(v) for k, v in databricks_tags.items() + str(k): str(v or "") for k, v in databricks_tags.items() } else: raise DbtRuntimeError("databricks_tags must be a dictionary") diff --git a/dbt/adapters/databricks/relation_configs/tags.py b/dbt/adapters/databricks/relation_configs/tags.py index 9286bc9b3..aadb5d29d 100644 --- a/dbt/adapters/databricks/relation_configs/tags.py +++ b/dbt/adapters/databricks/relation_configs/tags.py @@ -33,7 +33,7 @@ def from_relation_results(cls, results: RelationResults) -> TagsConfig: if table: for row in table.rows: - tags[str(row[0])] = str(row[1]) + tags[str(row[0])] = str(row[1] or "") return TagsConfig(set_tags=tags) @@ -43,7 +43,7 @@ def from_relation_config(cls, relation_config: RelationConfig) -> TagsConfig: if not tags: return TagsConfig(set_tags=dict()) if isinstance(tags, dict): - tags = {str(k): str(v) for k, v in tags.items()} + tags = {str(k): str(v or "") for k, v in tags.items()} return TagsConfig(set_tags=tags) else: raise DbtRuntimeError("databricks_tags must be a dictionary") diff --git a/dbt/include/databricks/macros/relations/tags.sql b/dbt/include/databricks/macros/relations/tags.sql index 6ba6e29a9..17a347ec8 100644 --- a/dbt/include/databricks/macros/relations/tags.sql +++ b/dbt/include/databricks/macros/relations/tags.sql @@ -30,8 +30,8 @@ {% macro alter_set_tags(relation, tags) -%} ALTER {{ relation.type.render() }} {{ relation.render() }} SET TAGS ( - {% for tag in tags -%} - '{{ tag }}' = '{{ tags[tag] }}' {%- if not loop.last %}, {% endif -%} + {% for key, value in tags.items() -%} + '{{ key }}' = '{{ value }}' {%- if not loop.last %}, {% endif -%} {%- endfor %} ) {%- endmacro -%} diff --git a/tests/functional/adapter/column_tags/fixtures.py b/tests/functional/adapter/column_tags/fixtures.py index 5c26f835b..256849900 100644 --- a/tests/functional/adapter/column_tags/fixtures.py +++ b/tests/functional/adapter/column_tags/fixtures.py @@ -13,7 +13,7 @@ - name: account_number databricks_tags: pii: "true" - sensitive: "true" + sensitive: "" """ updated_column_tag_model = """ @@ -29,7 +29,7 @@ - name: account_number databricks_tags: pii: "true" - sensitive: "true" + sensitive: "" """ column_tags_seed = """ diff --git a/tests/functional/adapter/column_tags/test_column_tags.py b/tests/functional/adapter/column_tags/test_column_tags.py index 6f4fdb222..17e057038 100644 --- a/tests/functional/adapter/column_tags/test_column_tags.py +++ b/tests/functional/adapter/column_tags/test_column_tags.py @@ -31,7 +31,7 @@ def test_column_tags(self, project): tags = project.run_sql(column_tags_query, fetch="all") expected_tags = { ("account_number", "pii", "true"), - ("account_number", "sensitive", "true"), + ("account_number", "sensitive", ""), } actual_tags = {(row[0], row[1], row[2]) for row in tags} assert actual_tags == expected_tags @@ -51,7 +51,7 @@ def test_column_tags(self, project): expected_tags = { ("id", "pii", "false"), ("account_number", "pii", "true"), - ("account_number", "sensitive", "true"), + ("account_number", "sensitive", ""), } actual_tags = {(row[0], row[1], row[2]) for row in tags} assert actual_tags == expected_tags diff --git a/tests/functional/adapter/tags/fixtures.py b/tests/functional/adapter/tags/fixtures.py index 67422fd93..3dad09a82 100644 --- a/tests/functional/adapter/tags/fixtures.py +++ b/tests/functional/adapter/tags/fixtures.py @@ -1,7 +1,7 @@ tags_sql = """ {{ config( materialized = 'table', - databricks_tags = {'a': 'b', 'c': 'd'}, + databricks_tags = {'a': 'b', 'c': 'd', 'k': ''}, ) }} select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color @@ -19,7 +19,7 @@ streaming_table_tags_sql = """ {{ config( materialized='streaming_table', - databricks_tags = {'a': 'b', 'c': 'd'}, + databricks_tags = {'a': 'b', 'c': 'd', 'k': ''}, ) }} select * from stream {{ ref('my_seed') }} @@ -54,4 +54,5 @@ def model(dbt, spark): databricks_tags: a: b c: d + k: "" """ diff --git a/tests/functional/adapter/tags/test_databricks_tags.py b/tests/functional/adapter/tags/test_databricks_tags.py index a4eeb7741..e28d11510 100644 --- a/tests/functional/adapter/tags/test_databricks_tags.py +++ b/tests/functional/adapter/tags/test_databricks_tags.py @@ -23,8 +23,8 @@ def test_tags(self, project): " where schema_name = '{schema}' and table_name='tags'", fetch="all", ) - assert len(results) == 2 - expected_tags = {("a", "b"), ("c", "d")} + assert len(results) == 3 + expected_tags = {("a", "b"), ("c", "d"), ("k", "")} actual_tags = set((row[0], row[1]) for row in results) assert actual_tags == expected_tags @@ -56,8 +56,8 @@ def test_updated_tags(self, project): " where schema_name = '{schema}' and table_name='tags'", fetch="all", ) - assert len(results) == 3 - expected_tags = {("a", "b"), ("c", "d"), ("e", "f")} + assert len(results) == 4 + expected_tags = {("a", "b"), ("c", "d"), ("k", ""), ("e", "f")} actual_tags = set((row[0], row[1]) for row in results) assert actual_tags == expected_tags @@ -151,7 +151,7 @@ def test_updated_tags(self, project): " where schema_name = '{schema}' and table_name='tags'", fetch="all", ) - assert len(results) == 3 + assert len(results) == 4 @pytest.mark.python diff --git a/tests/unit/relation_configs/test_column_tags_config.py b/tests/unit/relation_configs/test_column_tags_config.py index 099f9da30..a4269d2a1 100644 --- a/tests/unit/relation_configs/test_column_tags_config.py +++ b/tests/unit/relation_configs/test_column_tags_config.py @@ -26,7 +26,7 @@ def test_from_relation_results__some(self): "information_schema.column_tags": Table( rows=[ ["col1", "tag_a", "value_a"], - ["col1", "tag_b", "value_b"], + ["col1", "tag_b", ""], # key-only tag ["col2", "tag_c", "value_c"], ], column_names=["column_name", "tag_name", "tag_value"], @@ -35,7 +35,7 @@ def test_from_relation_results__some(self): spec = ColumnTagsProcessor.from_relation_results(results) assert spec == ColumnTagsConfig( set_column_tags={ - "col1": {"tag_a": "value_a", "tag_b": "value_b"}, + "col1": {"tag_a": "value_a", "tag_b": ""}, "col2": {"tag_c": "value_c"}, } ) @@ -54,14 +54,14 @@ def test_from_relation_config__without_column_tags(self): def test_from_relation_config__with_dict(self): model = Mock() model.columns = { - "email": {"_extra": {"databricks_tags": {"pii": "true", "env": "prod"}}}, + "email": {"_extra": {"databricks_tags": {"pii": "", "env": "prod"}}}, "id": {"_extra": {}}, "created_at": {}, } spec = ColumnTagsProcessor.from_relation_config(model) assert spec == ColumnTagsConfig( set_column_tags={ - "email": {"pii": "true", "env": "prod"}, + "email": {"pii": "", "env": "prod"}, } ) @@ -71,14 +71,14 @@ def test_from_relation_config__with_column_info(self): "id": ColumnInfo(name="id", _extra={}), "email": ColumnInfo( name="email", - _extra={"databricks_tags": {"pii": "true", "env": "prod"}}, + _extra={"databricks_tags": {"pii": "", "env": "prod"}}, ), "created_at": ColumnInfo(name="created_at"), } spec = ColumnTagsProcessor.from_relation_config(model) assert spec == ColumnTagsConfig( set_column_tags={ - "email": {"pii": "true", "env": "prod"}, + "email": {"pii": "", "env": "prod"}, } ) diff --git a/tests/unit/relation_configs/test_tags.py b/tests/unit/relation_configs/test_tags.py index e465739b8..0b76637f3 100644 --- a/tests/unit/relation_configs/test_tags.py +++ b/tests/unit/relation_configs/test_tags.py @@ -24,6 +24,15 @@ def test_from_relation_results__some(self): spec = TagsProcessor.from_relation_results(results) assert spec == TagsConfig(set_tags={"a": "valA", "b": "valB"}) + def test_from_relation_results__key_only(self): + results = { + "information_schema.tags": Table( + rows=[["a", ""]], column_names=["tag_name", "tag_value"] + ) + } + spec = TagsProcessor.from_relation_results(results) + assert spec == TagsConfig(set_tags={"a": ""}) + def test_from_relation_config__without_tags(self): model = Mock() model.config.extra = {} @@ -36,6 +45,12 @@ def test_from_relation_config__with_tags(self): spec = TagsProcessor.from_relation_config(model) assert spec == TagsConfig(set_tags={"a": "valA", "b": "1"}) + def test_from_relation_config__with_key_only_tags(self): + model = Mock() + model.config.extra = {"databricks_tags": {"a": "", "b": None}} + spec = TagsProcessor.from_relation_config(model) + assert spec == TagsConfig(set_tags={"a": "", "b": ""}) + def test_from_relation_config__with_incorrect_tags(self): model = Mock() model.config.extra = {"databricks_tags": ["a", "b"]} @@ -52,25 +67,25 @@ def test_get_diff__empty_and_some_exist(self): # Tags are "set only" - when config has no tags and relation has tags, # we don't unset the existing tags config = TagsConfig(set_tags={}) - other = TagsConfig(set_tags={"tag": "value"}) - diff = config.get_diff(other) + config_old = TagsConfig(set_tags={"tag": "value"}) + diff = config.get_diff(config_old) assert diff is None # No changes needed since we don't unset tags def test_get_diff__some_new_and_empty_existing(self): config = TagsConfig(set_tags={"tag": "value"}) - other = TagsConfig(set_tags={}) - diff = config.get_diff(other) + config_old = TagsConfig(set_tags={}) + diff = config.get_diff(config_old) assert diff == TagsConfig(set_tags={"tag": "value"}) def test_get_diff__mixed_case(self): # Tags are "set only" - only the new/updated tags are included config = TagsConfig(set_tags={"a": "value", "b": "value"}) - other = TagsConfig(set_tags={"b": "other_value", "c": "value"}) - diff = config.get_diff(other) + config_old = TagsConfig(set_tags={"b": "other_value", "c": "value"}) + diff = config.get_diff(config_old) assert diff == TagsConfig(set_tags={"a": "value", "b": "value"}) def test_get_diff__no_changes(self): config = TagsConfig(set_tags={"tag": "value"}) - other = TagsConfig(set_tags={"tag": "value"}) - diff = config.get_diff(other) + config_old = TagsConfig(set_tags={"tag": "value"}) + diff = config.get_diff(config_old) assert diff is None From f936946470f8c7adf50f75968a7f4ef8e7caa545 Mon Sep 17 00:00:00 2001 From: Can Bekleyici Date: Fri, 27 Feb 2026 17:04:42 +0100 Subject: [PATCH 2/2] docs: add docs for key-only tags feature Signed-off-by: Can Bekleyici --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c5c6c1dd..abcb04eb5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ gated behind the `use_concurrent_microbatch` behavior flag (default: `false`). Opt in via `flags: {use_concurrent_microbatch: true}` in `dbt_project.yml` ([#914](https://github.com/databricks/dbt-databricks/issues/914)) +- Add support for key-only `databricks_tags` for table and column tagging. This can now be configured by settings + tag values as empty strings `""`. ([#1270](https://github.com/databricks/dbt-databricks/issues/1270)) ### Fixes