Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
gated behind the `use_concurrent_microbatch` behavior flag (default: `false`).
Opt in via `flags: {use_concurrent_microbatch: true}` in `dbt_project.yml`
([#914](https://github.com/databricks/dbt-databricks/issues/914))
- Add support for key-only `databricks_tags` for table and column tagging. This can now be configured by settings
tag values as empty strings `""`. ([#1270](https://github.com/databricks/dbt-databricks/issues/1270))

### Fixes

Expand Down
4 changes: 2 additions & 2 deletions dbt/adapters/databricks/relation_configs/column_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def from_relation_results(cls, results: RelationResults) -> ColumnTagsConfig:
# row contains [column_name, tag_name, tag_value]
column_name = str(row[0])
tag_name = str(row[1])
tag_value = str(row[2])
tag_value = str(row[2] or "")

if column_name not in set_column_tags:
set_column_tags[column_name] = {}
Expand All @@ -79,7 +79,7 @@ def from_relation_config(cls, relation_config: RelationConfig) -> ColumnTagsConf
if databricks_tags:
if isinstance(databricks_tags, dict):
set_column_tags[col["name"]] = {
str(k): str(v) for k, v in databricks_tags.items()
str(k): str(v or "") for k, v in databricks_tags.items()
}
else:
raise DbtRuntimeError("databricks_tags must be a dictionary")
Expand Down
4 changes: 2 additions & 2 deletions dbt/adapters/databricks/relation_configs/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def from_relation_results(cls, results: RelationResults) -> TagsConfig:

if table:
for row in table.rows:
tags[str(row[0])] = str(row[1])
tags[str(row[0])] = str(row[1] or "")

return TagsConfig(set_tags=tags)

Expand All @@ -43,7 +43,7 @@ def from_relation_config(cls, relation_config: RelationConfig) -> TagsConfig:
if not tags:
return TagsConfig(set_tags=dict())
if isinstance(tags, dict):
tags = {str(k): str(v) for k, v in tags.items()}
tags = {str(k): str(v or "") for k, v in tags.items()}
return TagsConfig(set_tags=tags)
else:
raise DbtRuntimeError("databricks_tags must be a dictionary")
4 changes: 2 additions & 2 deletions dbt/include/databricks/macros/relations/tags.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@

{% macro alter_set_tags(relation, tags) -%}
ALTER {{ relation.type.render() }} {{ relation.render() }} SET TAGS (
{% for tag in tags -%}
'{{ tag }}' = '{{ tags[tag] }}' {%- if not loop.last %}, {% endif -%}
{% for key, value in tags.items() -%}
'{{ key }}' = '{{ value }}' {%- if not loop.last %}, {% endif -%}
{%- endfor %}
)
{%- endmacro -%}
4 changes: 2 additions & 2 deletions tests/functional/adapter/column_tags/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
- name: account_number
databricks_tags:
pii: "true"
sensitive: "true"
sensitive: ""
"""

updated_column_tag_model = """
Expand All @@ -29,7 +29,7 @@
- name: account_number
databricks_tags:
pii: "true"
sensitive: "true"
sensitive: ""
"""

column_tags_seed = """
Expand Down
4 changes: 2 additions & 2 deletions tests/functional/adapter/column_tags/test_column_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_column_tags(self, project):
tags = project.run_sql(column_tags_query, fetch="all")
expected_tags = {
("account_number", "pii", "true"),
("account_number", "sensitive", "true"),
("account_number", "sensitive", ""),
}
actual_tags = {(row[0], row[1], row[2]) for row in tags}
assert actual_tags == expected_tags
Expand All @@ -51,7 +51,7 @@ def test_column_tags(self, project):
expected_tags = {
("id", "pii", "false"),
("account_number", "pii", "true"),
("account_number", "sensitive", "true"),
("account_number", "sensitive", ""),
}
actual_tags = {(row[0], row[1], row[2]) for row in tags}
assert actual_tags == expected_tags
Expand Down
5 changes: 3 additions & 2 deletions tests/functional/adapter/tags/fixtures.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
tags_sql = """
{{ config(
materialized = 'table',
databricks_tags = {'a': 'b', 'c': 'd'},
databricks_tags = {'a': 'b', 'c': 'd', 'k': ''},
) }}

select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
Expand All @@ -19,7 +19,7 @@
streaming_table_tags_sql = """
{{ config(
materialized='streaming_table',
databricks_tags = {'a': 'b', 'c': 'd'},
databricks_tags = {'a': 'b', 'c': 'd', 'k': ''},
) }}

select * from stream {{ ref('my_seed') }}
Expand Down Expand Up @@ -54,4 +54,5 @@ def model(dbt, spark):
databricks_tags:
a: b
c: d
k: ""
"""
10 changes: 5 additions & 5 deletions tests/functional/adapter/tags/test_databricks_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ def test_tags(self, project):
" where schema_name = '{schema}' and table_name='tags'",
fetch="all",
)
assert len(results) == 2
expected_tags = {("a", "b"), ("c", "d")}
assert len(results) == 3
expected_tags = {("a", "b"), ("c", "d"), ("k", "")}
actual_tags = set((row[0], row[1]) for row in results)
assert actual_tags == expected_tags

Expand Down Expand Up @@ -56,8 +56,8 @@ def test_updated_tags(self, project):
" where schema_name = '{schema}' and table_name='tags'",
fetch="all",
)
assert len(results) == 3
expected_tags = {("a", "b"), ("c", "d"), ("e", "f")}
assert len(results) == 4
expected_tags = {("a", "b"), ("c", "d"), ("k", ""), ("e", "f")}
actual_tags = set((row[0], row[1]) for row in results)
assert actual_tags == expected_tags

Expand Down Expand Up @@ -151,7 +151,7 @@ def test_updated_tags(self, project):
" where schema_name = '{schema}' and table_name='tags'",
fetch="all",
)
assert len(results) == 3
assert len(results) == 4


@pytest.mark.python
Expand Down
12 changes: 6 additions & 6 deletions tests/unit/relation_configs/test_column_tags_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_from_relation_results__some(self):
"information_schema.column_tags": Table(
rows=[
["col1", "tag_a", "value_a"],
["col1", "tag_b", "value_b"],
["col1", "tag_b", ""], # key-only tag
["col2", "tag_c", "value_c"],
],
column_names=["column_name", "tag_name", "tag_value"],
Expand All @@ -35,7 +35,7 @@ def test_from_relation_results__some(self):
spec = ColumnTagsProcessor.from_relation_results(results)
assert spec == ColumnTagsConfig(
set_column_tags={
"col1": {"tag_a": "value_a", "tag_b": "value_b"},
"col1": {"tag_a": "value_a", "tag_b": ""},
"col2": {"tag_c": "value_c"},
}
)
Expand All @@ -54,14 +54,14 @@ def test_from_relation_config__without_column_tags(self):
def test_from_relation_config__with_dict(self):
model = Mock()
model.columns = {
"email": {"_extra": {"databricks_tags": {"pii": "true", "env": "prod"}}},
"email": {"_extra": {"databricks_tags": {"pii": "", "env": "prod"}}},
"id": {"_extra": {}},
"created_at": {},
}
spec = ColumnTagsProcessor.from_relation_config(model)
assert spec == ColumnTagsConfig(
set_column_tags={
"email": {"pii": "true", "env": "prod"},
"email": {"pii": "", "env": "prod"},
}
)

Expand All @@ -71,14 +71,14 @@ def test_from_relation_config__with_column_info(self):
"id": ColumnInfo(name="id", _extra={}),
"email": ColumnInfo(
name="email",
_extra={"databricks_tags": {"pii": "true", "env": "prod"}},
_extra={"databricks_tags": {"pii": "", "env": "prod"}},
),
"created_at": ColumnInfo(name="created_at"),
}
spec = ColumnTagsProcessor.from_relation_config(model)
assert spec == ColumnTagsConfig(
set_column_tags={
"email": {"pii": "true", "env": "prod"},
"email": {"pii": "", "env": "prod"},
}
)

Expand Down
31 changes: 23 additions & 8 deletions tests/unit/relation_configs/test_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ def test_from_relation_results__some(self):
spec = TagsProcessor.from_relation_results(results)
assert spec == TagsConfig(set_tags={"a": "valA", "b": "valB"})

def test_from_relation_results__key_only(self):
results = {
"information_schema.tags": Table(
rows=[["a", ""]], column_names=["tag_name", "tag_value"]
)
}
spec = TagsProcessor.from_relation_results(results)
assert spec == TagsConfig(set_tags={"a": ""})

def test_from_relation_config__without_tags(self):
model = Mock()
model.config.extra = {}
Expand All @@ -36,6 +45,12 @@ def test_from_relation_config__with_tags(self):
spec = TagsProcessor.from_relation_config(model)
assert spec == TagsConfig(set_tags={"a": "valA", "b": "1"})

def test_from_relation_config__with_key_only_tags(self):
model = Mock()
model.config.extra = {"databricks_tags": {"a": "", "b": None}}
spec = TagsProcessor.from_relation_config(model)
assert spec == TagsConfig(set_tags={"a": "", "b": ""})

def test_from_relation_config__with_incorrect_tags(self):
model = Mock()
model.config.extra = {"databricks_tags": ["a", "b"]}
Expand All @@ -52,25 +67,25 @@ def test_get_diff__empty_and_some_exist(self):
# Tags are "set only" - when config has no tags and relation has tags,
# we don't unset the existing tags
config = TagsConfig(set_tags={})
other = TagsConfig(set_tags={"tag": "value"})
diff = config.get_diff(other)
config_old = TagsConfig(set_tags={"tag": "value"})
diff = config.get_diff(config_old)
assert diff is None # No changes needed since we don't unset tags

def test_get_diff__some_new_and_empty_existing(self):
config = TagsConfig(set_tags={"tag": "value"})
other = TagsConfig(set_tags={})
diff = config.get_diff(other)
config_old = TagsConfig(set_tags={})
diff = config.get_diff(config_old)
assert diff == TagsConfig(set_tags={"tag": "value"})

def test_get_diff__mixed_case(self):
# Tags are "set only" - only the new/updated tags are included
config = TagsConfig(set_tags={"a": "value", "b": "value"})
other = TagsConfig(set_tags={"b": "other_value", "c": "value"})
diff = config.get_diff(other)
config_old = TagsConfig(set_tags={"b": "other_value", "c": "value"})
diff = config.get_diff(config_old)
assert diff == TagsConfig(set_tags={"a": "value", "b": "value"})

def test_get_diff__no_changes(self):
config = TagsConfig(set_tags={"tag": "value"})
other = TagsConfig(set_tags={"tag": "value"})
diff = config.get_diff(other)
config_old = TagsConfig(set_tags={"tag": "value"})
diff = config.get_diff(config_old)
assert diff is None