From 5c859d5d0e7e734c6dea2b8ea0e3b71821710aa3 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Mon, 9 Feb 2026 14:29:17 -0600 Subject: [PATCH 01/34] Add Unity Catalog ABAC policy governance skill New `uc-abac-governance` skill with 4 reference files covering governed tags, masking UDFs, column mask/row filter policies, SQL generation patterns, Python SDK (`w.policies.*`) CRUD examples, 12 MCP tools reference, and human-in-the-loop governance workflow. Adds SDK example `6-abac-policies.py`. Updates `install_skills.sh` and `databricks-python-sdk/SKILL.md`. Content derived from companion UCABAC repo (drift detection excluded). --- .claude/skills/uc-abac-governance/SKILL.md | 294 +++++++++++++ .../uc-abac-governance/mcp-tools-reference.md | 397 ++++++++++++++++++ .../uc-abac-governance/python-sdk-patterns.md | 351 ++++++++++++++++ .../uc-abac-governance/sql-generation.md | 356 ++++++++++++++++ DEV_CHANGELOG.md | 126 ++++++ PLAN_UC_ABAC_SKILLS.md | 146 +++++++ .../databricks-python-sdk/SKILL.md | 52 +++ .../examples/6-abac-policies.py | 203 +++++++++ databricks-skills/install_skills.sh | 6 +- databricks-skills/uc-abac-governance/SKILL.md | 294 +++++++++++++ .../uc-abac-governance/mcp-tools-reference.md | 397 ++++++++++++++++++ .../uc-abac-governance/python-sdk-patterns.md | 351 ++++++++++++++++ .../uc-abac-governance/sql-generation.md | 356 ++++++++++++++++ 13 files changed, 3327 insertions(+), 2 deletions(-) create mode 100644 .claude/skills/uc-abac-governance/SKILL.md create mode 100644 .claude/skills/uc-abac-governance/mcp-tools-reference.md create mode 100644 .claude/skills/uc-abac-governance/python-sdk-patterns.md create mode 100644 .claude/skills/uc-abac-governance/sql-generation.md create mode 100644 DEV_CHANGELOG.md create mode 100644 PLAN_UC_ABAC_SKILLS.md create mode 100644 databricks-skills/databricks-python-sdk/examples/6-abac-policies.py create mode 100644 databricks-skills/uc-abac-governance/SKILL.md create mode 100644 databricks-skills/uc-abac-governance/mcp-tools-reference.md create mode 100644 databricks-skills/uc-abac-governance/python-sdk-patterns.md create mode 100644 databricks-skills/uc-abac-governance/sql-generation.md diff --git a/.claude/skills/uc-abac-governance/SKILL.md b/.claude/skills/uc-abac-governance/SKILL.md new file mode 100644 index 00000000..6efabdcc --- /dev/null +++ b/.claude/skills/uc-abac-governance/SKILL.md @@ -0,0 +1,294 @@ +--- +name: uc-abac-governance +description: "Unity Catalog ABAC policy governance - governed tags, masking UDFs, column masks, row filters, and human-in-the-loop policy management." +--- + +# Unity Catalog ABAC Policy Governance + +Guidance for Attribute-Based Access Control (ABAC) policies in Databricks Unity Catalog. Covers governed tags, tag assignments, masking UDFs, CREATE/DROP POLICY syntax, and the human-in-the-loop governance workflow. + +**Databricks Docs:** +- ABAC overview: https://docs.databricks.com/data-governance/unity-catalog/abac/ +- ABAC policies: https://docs.databricks.com/data-governance/unity-catalog/abac/policies +- ABAC tutorial: https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial + +## When to Use This Skill + +Use this skill when: +- Creating or managing **ABAC policies** (column masks, row filters) +- Working with **governed tags** (creating via UI, applying via SQL) +- Building **masking UDFs** for PII protection (SSN, email, credit card, etc.) +- Implementing **human-in-the-loop governance** workflows +- Querying tag assignments via `information_schema` +- Managing policy lifecycle (create, update, delete, preview) + +## Reference Files + +| Topic | File | Description | +|-------|------|-------------| +| SQL Generation | [sql-generation.md](sql-generation.md) | SET/UNSET TAG, CREATE FUNCTION, CREATE/DROP POLICY, discovery queries | +| Python SDK | [python-sdk-patterns.md](python-sdk-patterns.md) | `w.policies.*` SDK methods for ABAC policy CRUD | +| MCP Tools | [mcp-tools-reference.md](mcp-tools-reference.md) | 12 MCP tools for policy management | + +--- + +## ABAC Workflow Overview + +ABAC policies in Databricks follow a 4-step setup: + +1. **Governed Tags** - Define classification taxonomy (UI only) +2. **Tag Assignments** - Apply tags to columns/tables via SQL +3. **Masking UDFs** - Create deterministic functions for data masking +4. **ABAC Policies** - Bind tags to UDFs with principal scoping + +``` +┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ +│ Governed Tags│───>│ Tag │───>│ Masking │───>│ ABAC │ +│ (UI only) │ │ Assignments │ │ UDFs │ │ Policies │ +└──────────────┘ └──────────────┘ └──────────────┘ └──────────────┘ +``` + +--- + +## IMPORTANT: SQL That Does NOT Exist + +These SQL commands do **not** exist in Databricks. Do not generate them. + +| Invalid SQL | What to use instead | +|---|---| +| `SHOW POLICIES` | REST API: `w.policies.list_policies()` | +| `DESCRIBE POLICY` | REST API: `w.policies.get_policy()` | +| `ALTER POLICY` | Drop and recreate the policy | +| `ALTER USER SET ATTRIBUTES` | SCIM API for user attributes | +| `SHOW USER ATTRIBUTES` | SCIM API for user attributes | + +--- + +## Step 1: Governed Tags + +Governed tags **cannot** be created via SQL. They must be created in the Databricks UI. + +### Creating a Governed Tag (UI Steps) + +1. Navigate to **Catalog** in the workspace +2. Select **Governed Tags** from the left panel +3. Click **Create governed tag** +4. Configure: + - **Tag Key**: e.g., `pii_type` + - **Allowed Values**: e.g., `ssn`, `email`, `phone`, `credit_card`, `address` + - **Description**: e.g., "PII classification for ABAC policies" + +> **Note:** Tag data is stored as plain text and may be replicated globally. Avoid sensitive information in tag names or values. + +**Docs:** https://docs.databricks.com/admin/governed-tags/ + +--- + +## Step 2: Applying Tags to Columns + +### Legacy Syntax (all versions) + +```sql +-- Set tag on column +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); + +-- Set tag on table +ALTER TABLE catalog.schema.table +SET TAGS ('data_classification' = 'confidential'); + +-- Remove tag +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name UNSET TAGS ('pii_type'); +``` + +### Modern Syntax (DBR 16.1+) + +```sql +SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; +SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; +SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; +SET TAG ON CATALOG catalog 'department' = 'finance'; + +UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; +``` + +### Querying Existing Tags + +```sql +-- Column tags +SELECT tag_name, tag_value, column_name +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; + +-- Table tags +SELECT tag_name, tag_value +FROM system.information_schema.table_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; +``` + +--- + +## Step 3: Masking UDFs + +Masking UDFs must be `DETERMINISTIC` and use simple `CASE` statements. No external calls or nested UDFs. + +```sql +-- Full mask: replaces all characters with * +CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Full masking - replaces all characters with *' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE REPEAT('*', LENGTH(value)) +END; + +-- Partial mask: show last 4 characters +CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Partial masking - shows last 4 characters' +RETURN CASE + WHEN value IS NULL THEN NULL + WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) + ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) +END; + +-- SSN mask: ***-**-XXXX format +CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks SSN showing only last 4 digits' +RETURN CASE + WHEN ssn IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 + THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) + ELSE '***-**-****' +END; + +-- Email mask: j***@example.com +CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks email showing first char and domain' +RETURN CASE + WHEN email IS NULL THEN NULL + WHEN INSTR(email, '@') > 1 + THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) + ELSE '***@***.***' +END; +``` + +**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices + +--- + +## Step 4: ABAC Policies + +### Column Mask Policy + +```sql +CREATE OR REPLACE POLICY mask_pii_columns +ON SCHEMA catalog.schema +COMMENT 'Mask PII columns for analysts' +COLUMN MASK catalog.schema.mask_partial +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; +``` + +### Row Filter Policy + +```sql +CREATE OR REPLACE POLICY filter_eu_rows +ON CATALOG my_catalog +COMMENT 'Filter EU rows for US team' +ROW FILTER catalog.schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); +``` + +### Drop Policy + +```sql +DROP POLICY mask_pii_columns ON SCHEMA catalog.schema; +``` + +### CRITICAL: Always Exclude `gov_admin` + +Every ABAC policy **MUST** include `EXCEPT \`gov_admin\`` to protect administrator access. Without this, admins could be locked out of data. + +### Policy Quotas + +| Scope | Max Policies | +|-------|-------------| +| Per Catalog | 10 | +| Per Schema | 10 | +| Per Table | 5 | + +--- + +## Human-in-the-Loop Governance Workflow + +ABAC policy changes should follow a governed workflow: + +``` +ANALYZE → RECOMMEND → PREVIEW → APPROVE → EXECUTE → VERIFY + │ │ │ │ │ │ + ▼ ▼ ▼ ▼ ▼ ▼ + Discover Generate Show SQL Human Run SQL Confirm + current policy & impact confirms or SDK changes + state proposals preview changes call applied +``` + +1. **ANALYZE**: Discover current tags, policies, and UDFs +2. **RECOMMEND**: Generate policy proposals based on requirements +3. **PREVIEW**: Use `preview_policy_changes` to show exact SQL and impact +4. **APPROVE**: Human reviews and explicitly approves +5. **EXECUTE**: Create/update/delete policies via SDK or SQL +6. **VERIFY**: Confirm policies are applied correctly + +**Never auto-execute policy changes.** Always preview and wait for human approval. + +--- + +## Common Errors + +| Error | Cause | Solution | +|-------|-------|----------| +| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | +| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag configuration in UI | +| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | +| `POLICY_ALREADY_EXISTS` | Policy name conflict | Use `CREATE OR REPLACE POLICY` | +| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission to policy creator | +| `SHOW POLICIES is not supported` | Used invalid SQL | Use REST API `w.policies.list_policies()` instead | + +## Best Practices + +1. **Use governed tags** (not ad-hoc tags) for ABAC policy matching +2. **Always include `EXCEPT \`gov_admin\``** in every policy +3. **Use deterministic UDFs** with simple CASE statements +4. **Preview before executing** any policy change +5. **Start at schema scope** and narrow to table only when needed +6. **Name policies descriptively**: `mask_{what}_{scope}` or `filter_{what}_{scope}` +7. **Test UDFs independently** before binding to policies +8. **Monitor policy quotas** — consolidate when approaching limits + +## Resources + +- [ABAC Overview](https://docs.databricks.com/data-governance/unity-catalog/abac/) +- [ABAC Policies](https://docs.databricks.com/data-governance/unity-catalog/abac/policies) +- [ABAC Tutorial](https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial) +- [UDF Best Practices](https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices) +- [Governed Tags](https://docs.databricks.com/admin/governed-tags/) +- [Column Masks & Row Filters](https://docs.databricks.com/data-governance/unity-catalog/filters-and-masks/) diff --git a/.claude/skills/uc-abac-governance/mcp-tools-reference.md b/.claude/skills/uc-abac-governance/mcp-tools-reference.md new file mode 100644 index 00000000..51fb77b2 --- /dev/null +++ b/.claude/skills/uc-abac-governance/mcp-tools-reference.md @@ -0,0 +1,397 @@ +# MCP Tools Reference for ABAC Policy Management + +Reference for the 12 MCP tools that manage ABAC policies via the Databricks Python SDK. These tools are registered in the UCABAC MCP server. + +--- + +## Discovery Tools + +### `list_abac_policies` + +List ABAC policies on a catalog, schema, or table. + +```python +list_abac_policies( + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, # e.g., "my_catalog.my_schema" + include_inherited: bool = True, + policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (optional filter) +) +``` + +**Returns:** +```json +{ + "success": true, + "securable_type": "SCHEMA", + "securable_fullname": "my_catalog.my_schema", + "policy_count": 3, + "policies": [ + { + "name": "mask_pii_ssn", + "policy_type": "COLUMN_MASK", + "to_principals": ["analysts"], + "except_principals": ["gov_admin"], + "on_securable_fullname": "my_catalog.my_schema", + "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn"}, + "match_columns": [{"tag_name": "pii_type", "tag_value": "ssn"}] + } + ] +} +``` + +### `get_abac_policy` + +Get details for a specific policy by name. + +```python +get_abac_policy( + policy_name: str, # Policy name + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, # Fully qualified securable name +) +``` + +**Returns:** +```json +{ + "success": true, + "policy": { + "name": "mask_pii_ssn", + "policy_type": "COLUMN_MASK", + "comment": "Mask SSN columns for analysts", + "to_principals": ["analysts", "data_scientists"], + "except_principals": ["gov_admin"], + "on_securable_type": "SCHEMA", + "on_securable_fullname": "my_catalog.my_schema", + "for_securable_type": "TABLE", + "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn"}, + "match_columns": [{"tag_name": "pii_type", "tag_value": "ssn"}], + "created_at": "2025-01-15T10:30:00Z", + "created_by": "admin@company.com", + "updated_at": "2025-01-20T14:00:00Z", + "updated_by": "admin@company.com" + } +} +``` + +### `get_table_policies` + +Get column masks and row filters for a specific table via Unity Catalog API. + +```python +get_table_policies( + catalog: str, + schema: str, + table: str, +) +``` + +**Returns:** +```json +{ + "success": true, + "table": "my_catalog.my_schema.my_table", + "column_masks": [ + { + "column_name": "ssn", + "mask_function": "my_catalog.my_schema.mask_ssn", + "using_column_names": [] + } + ], + "row_filters": [ + { + "filter_function": "my_catalog.my_schema.is_not_eu_region", + "using_column_names": ["region"] + } + ] +} +``` + +### `get_masking_functions` + +List masking UDFs in a schema. + +```python +get_masking_functions( + catalog: str, + schema: str, +) +``` + +**Returns:** +```json +{ + "success": true, + "catalog": "my_catalog", + "schema": "my_schema", + "functions": [ + { + "name": "mask_ssn", + "full_name": "my_catalog.my_schema.mask_ssn", + "return_type": "STRING", + "comment": "Masks SSN showing only last 4 digits", + "is_deterministic": true + } + ] +} +``` + +### `get_schema_info` + +Get schema metadata via Unity Catalog API. + +```python +get_schema_info( + catalog: str, + schema: str, +) +``` + +### `get_catalog_info` + +Get catalog metadata via Unity Catalog API. + +```python +get_catalog_info( + catalog: str, +) +``` + +### `get_column_tags_api` + +Get column-level tags via the Tags API. + +```python +get_column_tags_api( + catalog: str, + schema: str, + table: str, +) +``` + +### `list_table_policies_in_schema` + +List all tables in a schema with their column masks and row filters. + +```python +list_table_policies_in_schema( + catalog: str, + schema: str, +) +``` + +--- + +## Preview Tool (Human-in-the-Loop Gate) + +### `preview_policy_changes` + +Preview policy changes without executing. This is the critical human-in-the-loop gate. + +```python +preview_policy_changes( + action: str, # "CREATE", "UPDATE", or "DELETE" + policy_name: str, + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, + policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (for CREATE) + to_principals: list = None, + except_principals: list = None, + function_name: str = None, + tag_name: str = None, + tag_value: str = None, + comment: str = None, +) +``` + +**Returns:** +```json +{ + "success": true, + "action": "CREATE", + "preview": { + "policy_name": "mask_pii_ssn", + "policy_type": "COLUMN_MASK", + "securable": "SCHEMA my_catalog.my_schema", + "to_principals": ["analysts"], + "except_principals": ["gov_admin"], + "function": "my_catalog.my_schema.mask_ssn", + "tag_match": "hasTagValue('pii_type', 'ssn')", + "equivalent_sql": "CREATE OR REPLACE POLICY mask_pii_ssn\nON SCHEMA my_catalog.my_schema\n..." + }, + "warnings": [], + "requires_approval": true, + "message": "Review the preview above. Reply 'approve' to execute." +} +``` + +**Usage in workflow:** + +1. Call `preview_policy_changes` with proposed changes +2. Present preview to user +3. Wait for explicit approval +4. Only then call `create_abac_policy`, `update_abac_policy`, or `delete_abac_policy` + +--- + +## Management Tools + +### `create_abac_policy` + +Create a new ABAC policy (COLUMN_MASK or ROW_FILTER). + +```python +create_abac_policy( + policy_name: str, + policy_type: str, # "COLUMN_MASK" or "ROW_FILTER" + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, + function_name: str, # Fully qualified UDF name + to_principals: list, # Users/groups the policy applies to + tag_name: str, # Tag key to match + tag_value: str = None, # Tag value (optional, uses hasTag vs hasTagValue) + except_principals: list = None, # Excluded principals (gov_admin auto-added) + comment: str = "", +) +``` + +**Returns:** +```json +{ + "success": true, + "policy_name": "mask_pii_ssn", + "action": "created", + "details": { + "policy_type": "COLUMN_MASK", + "on_securable": "SCHEMA my_catalog.my_schema", + "function": "my_catalog.my_schema.mask_ssn", + "to_principals": ["analysts"], + "except_principals": ["gov_admin"] + } +} +``` + +> **Note:** `gov_admin` is automatically added to `except_principals` if not already present. + +### `update_abac_policy` + +Update an existing policy's principals or comment. + +```python +update_abac_policy( + policy_name: str, + securable_type: str, + securable_fullname: str, + to_principals: list = None, + except_principals: list = None, + comment: str = None, +) +``` + +**Returns:** +```json +{ + "success": true, + "policy_name": "mask_pii_ssn", + "action": "updated", + "changes": { + "to_principals": ["analysts", "data_scientists", "new_team"], + "comment": "Updated: added new_team" + } +} +``` + +> **Note:** To change the UDF, tag matching, or scope, drop and recreate the policy. + +### `delete_abac_policy` + +Delete an ABAC policy. + +```python +delete_abac_policy( + policy_name: str, + securable_type: str, + securable_fullname: str, +) +``` + +**Returns:** +```json +{ + "success": true, + "policy_name": "mask_pii_ssn", + "action": "deleted" +} +``` + +--- + +## Human-in-the-Loop Workflow Example + +Complete workflow using MCP tools: + +``` +Step 1: ANALYZE +───────────────────────────────── +→ list_abac_policies(securable_type="SCHEMA", securable_fullname="prod.finance") +→ get_column_tags_api(catalog="prod", schema="finance", table="customers") +→ get_masking_functions(catalog="prod", schema="finance") + +Step 2: RECOMMEND +───────────────────────────────── +→ Agent generates policy recommendations based on discovered tags and UDFs + +Step 3: PREVIEW +───────────────────────────────── +→ preview_policy_changes( + action="CREATE", + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance", + policy_type="COLUMN_MASK", + function_name="prod.finance.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn" + ) + +Step 4: APPROVE +───────────────────────────────── +→ Human reviews preview and replies "approve" + +Step 5: EXECUTE +───────────────────────────────── +→ create_abac_policy( + policy_name="mask_ssn_finance", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="prod.finance", + function_name="prod.finance.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn" + ) + +Step 6: VERIFY +───────────────────────────────── +→ get_abac_policy( + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance" + ) +``` + +--- + +## Error Handling + +| Error | Cause | Solution | +|-------|-------|----------| +| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | +| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag config in UI | +| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | +| `POLICY_ALREADY_EXISTS` | Duplicate policy name | Use different name or delete existing first | +| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission | +| `INVALID_SECURABLE_TYPE` | Wrong securable type string | Use `"CATALOG"`, `"SCHEMA"`, or `"TABLE"` | +| `gov_admin not in except_principals` | Safety check failed | Always include `gov_admin` in except list | diff --git a/.claude/skills/uc-abac-governance/python-sdk-patterns.md b/.claude/skills/uc-abac-governance/python-sdk-patterns.md new file mode 100644 index 00000000..da8bd938 --- /dev/null +++ b/.claude/skills/uc-abac-governance/python-sdk-patterns.md @@ -0,0 +1,351 @@ +# Python SDK Patterns for ABAC Policies + +Databricks Python SDK patterns for managing ABAC policies via `WorkspaceClient.policies`. + +**SDK Docs:** https://databricks-sdk-py.readthedocs.io/en/latest/ +**ABAC Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/policies + +--- + +## Setup + +```python +from databricks.sdk import WorkspaceClient + +w = WorkspaceClient() # Auto-detects credentials +``` + +--- + +## List Policies + +List ABAC policies on a securable (catalog, schema, or table). + +```python +# List all policies on a catalog +policies = w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname="my_catalog", + include_inherited=True, +) + +for p in policies: + print(f"{p.name}: {p.policy_type} on {p.on_securable_fullname}") + +# List policies on a schema +policies = w.policies.list_policies( + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + include_inherited=True, +) + +# List policies on a specific table +policies = w.policies.list_policies( + on_securable_type="TABLE", + on_securable_fullname="my_catalog.my_schema.my_table", + include_inherited=True, +) +``` + +### Filtering by Policy Type + +```python +policies = w.policies.list_policies( + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + include_inherited=True, +) + +column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] +row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] +``` + +### Extracting Policy Details + +```python +for p in policies: + p_dict = p.as_dict() if hasattr(p, "as_dict") else {} + print({ + "name": p_dict.get("name"), + "policy_type": p_dict.get("policy_type"), + "to_principals": p_dict.get("to_principals", []), + "except_principals": p_dict.get("except_principals", []), + "on_securable_type": p_dict.get("on_securable_type"), + "on_securable_fullname": p_dict.get("on_securable_fullname"), + "column_mask": p_dict.get("column_mask"), + "row_filter": p_dict.get("row_filter"), + "match_columns": p_dict.get("match_columns", []), + }) +``` + +--- + +## Get Policy + +Retrieve a specific policy by name and securable. + +```python +policy = w.policies.get_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) + +print(f"Policy: {policy.name}") +print(f"Type: {policy.policy_type}") +print(f"Principals: {policy.to_principals}") +print(f"Except: {policy.except_principals}") +``` + +--- + +## Create Policy + +### Column Mask Policy + +```python +from databricks.sdk.service.catalog import ( + CreatePolicy, + ColumnMask, + MatchColumns, +) + +policy = w.policies.create_policy( + name="mask_pii_ssn", + policy_type="COLUMN_MASK", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + for_securable_type="TABLE", + to_principals=["analysts", "data_scientists"], + except_principals=["gov_admin"], + comment="Mask SSN columns for analyst groups", + column_mask=ColumnMask( + function_name="my_catalog.my_schema.mask_ssn", + ), + match_columns=[ + MatchColumns( + tag_name="pii_type", + tag_value="ssn", + ) + ], +) +print(f"Created policy: {policy.name}") +``` + +### Row Filter Policy + +```python +from databricks.sdk.service.catalog import ( + CreatePolicy, + RowFilter, + MatchColumns, +) + +policy = w.policies.create_policy( + name="filter_eu_data", + policy_type="ROW_FILTER", + on_securable_type="CATALOG", + on_securable_fullname="my_catalog", + for_securable_type="TABLE", + to_principals=["us_team"], + except_principals=["gov_admin"], + comment="Filter EU rows for US team", + row_filter=RowFilter( + function_name="my_catalog.my_schema.is_not_eu_region", + ), + match_columns=[ + MatchColumns( + tag_name="region", + tag_value="eu", + ) + ], +) +print(f"Created policy: {policy.name}") +``` + +### Important: Always Include `gov_admin` + +Every policy **MUST** include `"gov_admin"` in `except_principals`: + +```python +# CORRECT +except_principals=["gov_admin"] + +# CORRECT - additional admin groups +except_principals=["gov_admin", "platform_admins"] + +# WRONG - missing gov_admin +except_principals=["platform_admins"] # gov_admin must be included! +``` + +--- + +## Update Policy + +Update principals or comment on an existing policy. + +```python +updated = w.policies.update_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + to_principals=["analysts", "data_scientists", "new_team"], + except_principals=["gov_admin", "senior_admins"], + comment="Updated: added new_team to masked principals", +) +print(f"Updated policy: {updated.name}") +``` + +> **Note:** To change the UDF, tag matching, or scope, you must drop and recreate the policy. `update_policy` only modifies principals and comment. + +--- + +## Delete Policy + +```python +w.policies.delete_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) +print("Policy deleted") +``` + +--- + +## Error Handling + +```python +from databricks.sdk.errors import NotFound, PermissionDenied, BadRequest + +try: + policy = w.policies.get_policy( + name="nonexistent_policy", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + ) +except NotFound: + print("Policy not found") +except PermissionDenied: + print("Insufficient permissions - need MANAGE on securable") +except BadRequest as e: + print(f"Invalid request: {e}") +``` + +--- + +## Common Patterns + +### List All Policies in a Catalog with Counts + +```python +def get_policy_summary(w, catalog: str): + """Get a summary of all ABAC policies in a catalog.""" + policies = list(w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname=catalog, + include_inherited=True, + )) + + column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] + row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] + + return { + "total": len(policies), + "column_masks": len(column_masks), + "row_filters": len(row_filters), + "policies": [p.as_dict() for p in policies], + } +``` + +### Check Policy Quotas Before Creating + +```python +def check_quota(w, securable_type: str, securable_fullname: str): + """Check if policy quota allows creating a new policy.""" + quotas = {"CATALOG": 10, "SCHEMA": 10, "TABLE": 5} + max_policies = quotas.get(securable_type, 10) + + existing = list(w.policies.list_policies( + on_securable_type=securable_type, + on_securable_fullname=securable_fullname, + )) + + # Count only direct policies (not inherited) + direct = [p for p in existing + if p.on_securable_fullname == securable_fullname] + + return { + "current": len(direct), + "max": max_policies, + "can_create": len(direct) < max_policies, + } +``` + +### Fetch Policies Without Cache (Direct API) + +```python +def fetch_policies_direct( + w, + catalog: str, + schema: str = None, + table: str = None, +): + """Fetch policies directly from REST API.""" + if table and schema: + securable_type = "TABLE" + securable_name = f"{catalog}.{schema}.{table}" + elif schema: + securable_type = "SCHEMA" + securable_name = f"{catalog}.{schema}" + else: + securable_type = "CATALOG" + securable_name = catalog + + policies = w.policies.list_policies( + on_securable_type=securable_type, + on_securable_fullname=securable_name, + include_inherited=True, + ) + + results = [] + for p in policies: + p_dict = p.as_dict() if hasattr(p, "as_dict") else {} + results.append({ + "name": p_dict.get("name"), + "policy_type": p_dict.get("policy_type"), + "to_principals": p_dict.get("to_principals", []), + "except_principals": p_dict.get("except_principals", []), + "on_securable_type": p_dict.get("on_securable_type"), + "on_securable_fullname": p_dict.get("on_securable_fullname"), + "column_mask": p_dict.get("column_mask"), + "row_filter": p_dict.get("row_filter"), + "match_columns": p_dict.get("match_columns", []), + }) + return results +``` + +--- + +## Async Usage (FastAPI, etc.) + +The Databricks SDK is synchronous. In async applications, wrap calls with `asyncio.to_thread()`: + +```python +import asyncio + +async def list_policies_async(w, catalog: str): + return await asyncio.to_thread( + lambda: list(w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname=catalog, + include_inherited=True, + )) + ) + +async def create_policy_async(w, **kwargs): + return await asyncio.to_thread( + w.policies.create_policy, + **kwargs, + ) +``` diff --git a/.claude/skills/uc-abac-governance/sql-generation.md b/.claude/skills/uc-abac-governance/sql-generation.md new file mode 100644 index 00000000..c0cb46f1 --- /dev/null +++ b/.claude/skills/uc-abac-governance/sql-generation.md @@ -0,0 +1,356 @@ +# SQL Generation Reference + +Pure SQL patterns for Unity Catalog ABAC governance operations. All SQL follows Databricks syntax. + +--- + +## Tag Operations + +### SET TAG on Column + +```sql +-- Legacy syntax (all versions) +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); + +-- Modern syntax (DBR 16.1+) +SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; +``` + +### SET TAG on Table + +```sql +-- Legacy syntax +ALTER TABLE catalog.schema.table +SET TAGS ('data_classification' = 'confidential'); + +-- Modern syntax +SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; +``` + +### SET TAG on Schema / Catalog + +```sql +SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; +SET TAG ON CATALOG my_catalog 'department' = 'finance'; +``` + +### UNSET TAG + +```sql +-- Column (legacy) +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name UNSET TAGS ('pii_type'); + +-- Column (modern) +UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; + +-- Table (legacy) +ALTER TABLE catalog.schema.table +UNSET TAGS ('data_classification'); + +-- Table (modern) +UNSET TAG ON TABLE catalog.schema.table 'data_classification'; +``` + +**Docs:** +- SET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-set-tag.html +- UNSET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-unset-tag.html + +--- + +## Tag Discovery Queries + +### Query Column Tags + +```sql +SELECT tag_name, tag_value, column_name +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; +``` + +### Query Table Tags + +```sql +SELECT tag_name, tag_value +FROM system.information_schema.table_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; +``` + +### All Tag Assignments in a Catalog + +```sql +-- Table-level tags +SELECT 'TABLE' as securable_type, + CONCAT(catalog_name, '.', schema_name, '.', table_name) as securable_name, + tag_name as tag_key, + tag_value +FROM system.information_schema.table_tags +WHERE catalog_name = 'my_catalog'; + +-- Column-level tags +SELECT 'COLUMN' as securable_type, + CONCAT(catalog_name, '.', schema_name, '.', table_name, '.', column_name) as securable_name, + tag_name as tag_key, + tag_value +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog'; +``` + +**Docs:** +- information_schema.column_tags: https://docs.databricks.com/sql/language-manual/information-schema/column_tags.html +- information_schema.table_tags: https://docs.databricks.com/sql/language-manual/information-schema/table_tags.html + +--- + +## Masking UDF Creation + +All masking UDFs must be `DETERMINISTIC` with simple `CASE` statements. No external calls or nested UDFs. + +### Generic Masking Strategies + +```sql +-- Full mask: replaces all characters with * +CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Full masking - replaces all characters with *' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE REPEAT('*', LENGTH(value)) +END; + +-- Partial mask: show last 4 characters +CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Partial masking - shows last 4 characters' +RETURN CASE + WHEN value IS NULL THEN NULL + WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) + ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) +END; + +-- Hash: SHA256 with version prefix +CREATE OR REPLACE FUNCTION catalog.schema.mask_hash(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Hash masking - SHA256 with version prefix' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE CONCAT('HASH_v1_', SUBSTRING(SHA2(CONCAT(value, ':v1'), 256), 1, 16)) +END; + +-- Redact: replace with [REDACTED] +CREATE OR REPLACE FUNCTION catalog.schema.mask_redact(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Redaction - replaces value with [REDACTED]' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE '[REDACTED]' +END; + +-- Nullify: always returns NULL +CREATE OR REPLACE FUNCTION catalog.schema.mask_nullify(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Nullify - always returns NULL' +RETURN NULL; +``` + +### Specialized Masking UDFs + +```sql +-- SSN: ***-**-XXXX +CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks SSN showing only last 4 digits in XXX-XX-XXXX format' +RETURN CASE + WHEN ssn IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 + THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) + ELSE '***-**-****' +END; + +-- Email: j***@example.com +CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks email showing first char and domain' +RETURN CASE + WHEN email IS NULL THEN NULL + WHEN INSTR(email, '@') > 1 + THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) + ELSE '***@***.***' +END; + +-- Credit card: ****-****-****-1234 +CREATE OR REPLACE FUNCTION catalog.schema.mask_credit_card(card_number STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks credit card showing only last 4 digits' +RETURN CASE + WHEN card_number IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(card_number, '[^0-9]', '')) >= 4 + THEN CONCAT('****-****-****-', RIGHT(REGEXP_REPLACE(card_number, '[^0-9]', ''), 4)) + ELSE '****-****-****-****' +END; +``` + +### Row Filter UDFs + +Row filter UDFs return `BOOLEAN`: `TRUE` to include, `FALSE` to exclude. + +```sql +-- Region-based filter: hide EU rows +CREATE OR REPLACE FUNCTION catalog.schema.is_not_eu_region(region_value STRING) +RETURNS BOOLEAN +DETERMINISTIC +COMMENT 'Row filter - returns FALSE for EU regions' +RETURN CASE + WHEN region_value IS NULL THEN TRUE + WHEN LOWER(region_value) LIKE '%eu%' THEN FALSE + WHEN LOWER(region_value) LIKE '%europe%' THEN FALSE + ELSE TRUE +END; + +-- Array membership filter +CREATE OR REPLACE FUNCTION catalog.schema.is_in_allowed_values( + row_value STRING, + allowed_values ARRAY +) +RETURNS BOOLEAN +DETERMINISTIC +COMMENT 'Row filter based on array membership' +RETURN CASE + WHEN allowed_values IS NULL THEN FALSE + WHEN ARRAY_CONTAINS(TRANSFORM(allowed_values, x -> LOWER(x)), LOWER(row_value)) THEN TRUE + ELSE FALSE +END; +``` + +**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices + +--- + +## Policy Creation + +### Column Mask Policy + +```sql +CREATE OR REPLACE POLICY mask_pii_ssn +ON SCHEMA catalog.schema +COMMENT 'Mask SSN columns for analysts' +COLUMN MASK catalog.schema.mask_ssn +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; +``` + +### Row Filter Policy + +```sql +CREATE OR REPLACE POLICY filter_eu_data +ON CATALOG my_catalog +COMMENT 'Filter EU rows for US team' +ROW FILTER catalog.schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); +``` + +### Policy with Tag Key Only (any value) + +```sql +-- Match any column with tag 'pii_type' regardless of value +CREATE OR REPLACE POLICY mask_all_pii +ON SCHEMA catalog.schema +COLUMN MASK catalog.schema.mask_full +TO `external_users` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTag('pii_type') AS masked_col +ON COLUMN masked_col; +``` + +### Drop Policy + +```sql +DROP POLICY mask_pii_ssn ON SCHEMA catalog.schema; +DROP POLICY filter_eu_data ON CATALOG my_catalog; +``` + +> **Note:** There is no `ALTER POLICY`. To modify a policy, drop and recreate it. + +--- + +## Discovery Queries + +```sql +-- List catalogs +SHOW CATALOGS; + +-- List schemas in a catalog +SHOW SCHEMAS IN my_catalog; + +-- List tables in a schema +SHOW TABLES IN my_catalog.my_schema; + +-- Describe table with extended metadata +DESCRIBE TABLE EXTENDED my_catalog.my_schema.my_table; + +-- List UDFs in a schema +SHOW USER FUNCTIONS IN my_catalog.my_schema; + +-- Describe a UDF +DESCRIBE FUNCTION EXTENDED my_catalog.my_schema.mask_ssn; + +-- Sample column values +SELECT DISTINCT column_name +FROM my_catalog.my_schema.my_table +LIMIT 20; +``` + +--- + +## Enums Reference + +### PII Types (governed tag values) + +`ssn`, `email`, `phone`, `credit_card`, `date_of_birth`, `address`, `name`, `ip_address`, `national_id`, `medical_record`, `generic` + +### Masking Strategies + +| Strategy | Description | +|----------|-------------| +| `full_mask` | Replace all characters with `*` | +| `partial_mask` | Show last 4 characters | +| `hash` | SHA256 with version prefix | +| `redact` | Replace with `[REDACTED]` | +| `nullify` | Always return NULL | +| `custom` | User-supplied SQL (requires manual UDF) | + +### Policy Scopes + +| Scope | Description | +|-------|-------------| +| `CATALOG` | Policy applies to all tables in catalog | +| `SCHEMA` | Policy applies to all tables in schema | +| `TABLE` | Policy applies to a single table | + +### Tag Syntax Variants + +| Variant | Availability | Example | +|---------|-------------|---------| +| `LEGACY` | All versions | `ALTER TABLE t ALTER COLUMN c SET TAGS ('k'='v')` | +| `MODERN` | DBR 16.1+ | `SET TAG ON COLUMN t.c 'k' = 'v'` | diff --git a/DEV_CHANGELOG.md b/DEV_CHANGELOG.md new file mode 100644 index 00000000..dd9f172e --- /dev/null +++ b/DEV_CHANGELOG.md @@ -0,0 +1,126 @@ +# Dev Changelog — Unity Catalog ABAC Policy Governance + +**Branch**: `feature/uc_abac_skills` +**Date**: 2026-02-09 +**Author**: sreeramreddy.thoom +**Reference**: UCABAC repo (`/Users/sreeramreddy.thoom/Documents/ClaudeCodeRepo/UCABAC`) + +--- + +## Overview + +Adds a new **`uc-abac-governance`** Claude Code skill to the Databricks AI Dev Kit, providing comprehensive guidance for managing Attribute-Based Access Control (ABAC) policies in Unity Catalog. Also adds Python SDK examples for ABAC policy operations. + +The skill content is derived from the UCABAC project — a production ABAC governance agent with multi-agent architecture, MCP server, and React frontend. + +**Excluded:** Policy drift detection is intentionally omitted from this skill. + +--- + +## New Files + +### Skill: `uc-abac-governance` + +| File | Description | +|------|-------------| +| `databricks-skills/uc-abac-governance/SKILL.md` | Main skill: ABAC overview, governed tags, tag assignments, masking UDFs, CREATE/DROP POLICY syntax, human-in-the-loop workflow, policy quotas, invalid SQL warnings, common errors | +| `databricks-skills/uc-abac-governance/sql-generation.md` | SQL patterns: SET/UNSET TAG (legacy + modern), CREATE FUNCTION for masking UDFs (full, partial, hash, redact, nullify, SSN, email, credit card), row filter UDFs, CREATE/DROP POLICY, tag discovery queries, enums reference | +| `databricks-skills/uc-abac-governance/python-sdk-patterns.md` | Python SDK: `w.policies.list_policies()`, `create_policy()`, `get_policy()`, `update_policy()`, `delete_policy()`, error handling, quota checking, async patterns | +| `databricks-skills/uc-abac-governance/mcp-tools-reference.md` | MCP tool reference: 12 tools — `list_abac_policies`, `get_abac_policy`, `create_abac_policy`, `update_abac_policy`, `delete_abac_policy`, `preview_policy_changes`, `get_table_policies`, `get_column_tags_api`, `get_masking_functions`, `get_schema_info`, `get_catalog_info`, `list_table_policies_in_schema` | + +### Installed Skills (mirrors of above) + +| File | Description | +|------|-------------| +| `.claude/skills/uc-abac-governance/SKILL.md` | Installed copy | +| `.claude/skills/uc-abac-governance/sql-generation.md` | Installed copy | +| `.claude/skills/uc-abac-governance/python-sdk-patterns.md` | Installed copy | +| `.claude/skills/uc-abac-governance/mcp-tools-reference.md` | Installed copy | + +### SDK Example + +| File | Description | +|------|-------------| +| `databricks-skills/databricks-python-sdk/examples/6-abac-policies.py` | Python SDK example: list, create, get, update, delete ABAC policies with error handling | + +--- + +## Modified Files + +| File | Change | +|------|--------| +| `databricks-skills/install_skills.sh` | Added `uc-abac-governance` to `DATABRICKS_SKILLS`, `get_skill_description()`, and `get_skill_extra_files()`. Updated `databricks-python-sdk` extra files to include `examples/6-abac-policies.py`. | +| `databricks-skills/databricks-python-sdk/SKILL.md` | Added ABAC Policies section with SDK examples for list, get, create, update, delete operations | + +--- + +## Key Design Decisions + +### 1. Separate Skill vs. Extending `databricks-unity-catalog` + +Created a **new dedicated skill** (`uc-abac-governance`) rather than extending the existing `databricks-unity-catalog` skill because: +- ABAC governance is a distinct, complex domain with its own workflow +- The existing UC skill focuses on system tables and volumes — different audience +- Separate skill allows targeted installation (`install_skills.sh uc-abac-governance`) +- Content volume warrants its own skill (4 reference files) + +### 2. SQL Generation + SDK Dual Approach + +The skill documents both approaches: +- **SQL generation**: `CREATE POLICY` / `DROP POLICY` syntax for SQL-based workflows +- **Python SDK**: `w.policies.*` methods for programmatic policy management +- MCP tool wrappers that combine both approaches + +### 3. Human-in-the-Loop Workflow + +The skill emphasizes a 6-step governance workflow matching the UCABAC agent pattern: +1. **Analyze** — scan table structure, existing tags, current policies +2. **Recommend** — generate policy recommendations based on tags +3. **Preview** — show proposed changes (SQL equivalent + impact) +4. **Approve** — human reviews and approves/rejects +5. **Execute** — create ABAC policies via SDK +6. **Verify** — confirm policies are active and masking works + +### 4. `gov_admin` Safety Net + +All examples enforce the `gov_admin` exception pattern — every ABAC policy must exclude the administrator group from masking/filtering. + +--- + +## Source Mapping (UCABAC → ai-dev-kit) + +| UCABAC Source | Skill Target | +|--------------|-------------| +| `ucabac/skills/governance-policy/SKILL.md` | `SKILL.md` | +| `ucabac/sql_gen/policy_skills.py` | `sql-generation.md` | +| `ucabac/sql_gen/tag_skills.py` | `sql-generation.md` | +| `ucabac/sql_gen/udf_skills.py` | `sql-generation.md` | +| `ucabac/sql_gen/_base.py` | `sql-generation.md` (enums) | +| `ucabac/mcp/policy_api_tools.py` | `mcp-tools-reference.md`, `python-sdk-patterns.md` | +| `ucabac/services/unity_catalog_client.py` | `python-sdk-patterns.md` | +| `ucabac/services/abac_policy_sync.py` | `python-sdk-patterns.md` | +| `ucabac/core/policy_manager.py` | `SKILL.md` (workflow) | +| `ucabac/skills/governance-policy/references/SQL_GEN.md` | `sql-generation.md` | +| `ucabac/skills/governance-policy/references/MCP_TOOLS.md` | `mcp-tools-reference.md` | + +--- + +## Dependencies + +- Databricks Runtime 16.1+ (for modern SET TAG syntax) or any version (for legacy syntax) +- Unity Catalog enabled workspace +- `databricks-sdk` (for `w.policies.*` API) +- MANAGE permission on target securables +- Governed tags created via Databricks UI (cannot be created via SQL) + +--- + +## Testing Checklist + +- [ ] `install_skills.sh --list` shows `uc-abac-governance` with correct description +- [ ] `install_skills.sh uc-abac-governance --local` installs all 4 files +- [ ] SKILL.md frontmatter has valid `name` and `description` +- [ ] SQL examples match Databricks ABAC documentation syntax +- [ ] Python SDK example parses without syntax errors +- [ ] No references to invalid SQL (SHOW POLICIES, DESCRIBE POLICY, etc.) +- [ ] All policies include `gov_admin` in EXCEPT clause diff --git a/PLAN_UC_ABAC_SKILLS.md b/PLAN_UC_ABAC_SKILLS.md new file mode 100644 index 00000000..5fa6e6b2 --- /dev/null +++ b/PLAN_UC_ABAC_SKILLS.md @@ -0,0 +1,146 @@ +# Plan: Add Unity Catalog ABAC Policy Governance Skill + +## Context + +The `abac_ai_dev_kit` (forked from `databricks-solutions/ai-dev-kit`) provides Claude Code skills for Databricks. The existing `databricks-unity-catalog` skill covers system tables and volumes but has **no ABAC policy governance content**. + +The UCABAC companion repo (`/Users/sreeramreddy.thoom/Documents/ClaudeCodeRepo/UCABAC`) implements a full ABAC governance agent with: +- Python SDK code for ABAC policy CRUD (`w.policies.list_policies/create_policy/update_policy/delete_policy`) +- SQL generation for `CREATE POLICY`, `DROP POLICY`, `SET TAG`, masking UDFs +- MCP tools for policy management (12 tools) +- Human-in-the-loop governance workflow (Analyze > Recommend > Preview > Approve > Execute > Verify) +- Policy conflict detection, drift scanning, compliance reporting +- Multi-agent architecture (Supervisor + 4 specialist agents) + +**Goal**: Extract the ABAC governance knowledge from UCABAC into a new skill (`uc-abac-governance`) in the ai-dev-kit, add Python SDK examples, and produce a `DEV_CHANGELOG.md`. + +--- + +## Architecture Overview (UCABAC) + +``` +UCABAC/ +├── ucabac/ # Main package (v0.2.0) +│ ├── core/policy_manager.py # GovernancePolicyManager facade +│ ├── sql_gen/ # Pure SQL generation (no state) +│ │ ├── _base.py # Enums: PIIType, MaskingStrategy, PolicyScope +│ │ ├── tag_skills.py # SET/UNSET TAG SQL +│ │ ├── udf_skills.py # CREATE FUNCTION (masking UDFs) +│ │ ├── policy_skills.py # CREATE/DROP POLICY SQL +│ │ ├── discovery_skills.py # SHOW/DESCRIBE SQL +│ │ └── compliance_skills.py # Compliance query SQL +│ ├── mcp/ +│ │ ├── policy_api_tools.py # 12 MCP tools for ABAC CRUD via SDK +│ │ ├── server.py # MCP server (40+ tools) +│ │ └── sql_executor.py # SQL Warehouse execution +│ ├── services/ +│ │ ├── unity_catalog_client.py # UC client with REST API wrappers +│ │ ├── abac_policy_sync.py # Sync policies to Postgres cache +│ │ ├── drift_detector.py # Policy drift detection +│ │ └── policy_conflict_checker.py # Conflict validation +│ ├── agents/ # Multi-agent system (Claude-powered) +│ │ ├── supervisor_agent.py # Task decomposition + delegation +│ │ ├── governance_agent.py # Governance policy specialist +│ │ ├── pii_agent.py # PII detection specialist +│ │ ├── compliance_agent.py # Compliance reporting specialist +│ │ └── query_agent.py # Query assistant specialist +│ └── database/ # Lakebase Postgres persistence +├── app/api/ # FastAPI REST + SSE backend +├── frontend/ # React + TypeScript SPA +├── skills/governance-policy/ # Existing skill docs (reference) +└── tests/ # 283 unit tests +``` + +--- + +## Changes to Make + +### 1. New Skill: `databricks-skills/uc-abac-governance/` + +Create a new skill directory following the TEMPLATE pattern: + +| File | Content Source | +|------|--------------| +| `SKILL.md` | ABAC overview, governed tags, tag assignments, masking UDFs, CREATE POLICY syntax, human-in-the-loop workflow, policy quotas, invalid SQL warnings, common errors. Derived from `ucabac/skills/governance-policy/SKILL.md` | +| `sql-generation.md` | Pure SQL patterns: SET/UNSET TAG, CREATE FUNCTION, CREATE/DROP POLICY, discovery queries. Derived from `ucabac/sql_gen/` modules | +| `python-sdk-patterns.md` | Databricks Python SDK examples: `w.policies.list_policies()`, `create_policy()`, `update_policy()`, `delete_policy()`, `get_policy()`. Derived from `ucabac/mcp/policy_api_tools.py` and `ucabac/services/` | +| `mcp-tools-reference.md` | MCP tool reference for 12 policy API tools: list, get, create, update, delete, preview. Derived from `ucabac/mcp/policy_api_tools.py` | + +### 2. Install into `.claude/skills/uc-abac-governance/` + +Copy the 4 skill files to `.claude/skills/uc-abac-governance/` (matching how other skills are installed). + +### 3. Update `databricks-skills/install_skills.sh` + +- Add `uc-abac-governance` to `DATABRICKS_SKILLS` list (line 45) +- Add `"uc-abac-governance") echo "ABAC policy governance - tags, UDFs, column masks, row filters"` in `get_skill_description()` +- Add `"uc-abac-governance") echo "sql-generation.md python-sdk-patterns.md mcp-tools-reference.md"` in `get_skill_extra_files()` + +### 4. New SDK Example: `databricks-skills/databricks-python-sdk/examples/6-abac-policies.py` + +Following the pattern of existing examples (1-authentication.py through 5-serving-and-vector-search.py), add a new example demonstrating ABAC policy operations: +- List policies on catalog/schema/table +- Create column mask policy with tag matching +- Create row filter policy +- Update policy principals +- Delete policy +- Preview policy changes before execution + +### 5. Update `databricks-python-sdk` Skill + +- Add ABAC policies section to `databricks-skills/databricks-python-sdk/SKILL.md` +- Update `get_skill_extra_files()` to include `examples/6-abac-policies.py` + +### 6. Create `DEV_CHANGELOG.md` in Project Root + +--- + +## Key Patterns to Preserve + +### SQL That Does NOT Exist in Databricks +- `SHOW POLICIES` / `DESCRIBE POLICY` -- use REST API instead +- `ALTER POLICY` -- drop and recreate +- `information_schema.column_masks` / `.row_filters` -- for old-style masking, NOT ABAC +- `ALTER USER SET ATTRIBUTES` / `SHOW USER ATTRIBUTES` -- use SCIM API + +### Automatic `gov_admin` Exception +Every ABAC policy MUST include `EXCEPT \`gov_admin\`` to protect administrator access. Enforced at 3 levels: system prompt, tool-level injection, SQL interception. + +### Policy Quotas +- Max 10 policies per catalog +- Max 10 policies per schema +- Max 5 policies per table + +### Human-in-the-Loop Workflow +``` +ANALYZE → RECOMMEND → PREVIEW → APPROVE → EXECUTE → VERIFY +``` + +--- + +## Files to Create/Modify + +| Action | File | +|--------|------| +| CREATE | `databricks-skills/uc-abac-governance/SKILL.md` | +| CREATE | `databricks-skills/uc-abac-governance/sql-generation.md` | +| CREATE | `databricks-skills/uc-abac-governance/python-sdk-patterns.md` | +| CREATE | `databricks-skills/uc-abac-governance/mcp-tools-reference.md` | +| CREATE | `.claude/skills/uc-abac-governance/SKILL.md` | +| CREATE | `.claude/skills/uc-abac-governance/sql-generation.md` | +| CREATE | `.claude/skills/uc-abac-governance/python-sdk-patterns.md` | +| CREATE | `.claude/skills/uc-abac-governance/mcp-tools-reference.md` | +| MODIFY | `databricks-skills/install_skills.sh` | +| CREATE | `databricks-skills/databricks-python-sdk/examples/6-abac-policies.py` | +| MODIFY | `databricks-skills/databricks-python-sdk/SKILL.md` | +| CREATE | `DEV_CHANGELOG.md` | + +--- + +## Verification + +1. `./databricks-skills/install_skills.sh --list` -- confirm `uc-abac-governance` appears with description +2. `./databricks-skills/install_skills.sh uc-abac-governance --local` -- confirm all 4 files install to `.claude/skills/` +3. `python -c "import ast; ast.parse(open('databricks-skills/databricks-python-sdk/examples/6-abac-policies.py').read())"` -- syntax check +4. Verify SKILL.md frontmatter matches naming conventions +5. Cross-reference SQL patterns against Databricks ABAC docs diff --git a/databricks-skills/databricks-python-sdk/SKILL.md b/databricks-skills/databricks-python-sdk/SKILL.md index c5937eec..b0ffaf0b 100644 --- a/databricks-skills/databricks-python-sdk/SKILL.md +++ b/databricks-skills/databricks-python-sdk/SKILL.md @@ -414,6 +414,58 @@ for doc in results.result.data_array: print(doc) ``` +### ABAC Policies +**Doc:** https://docs.databricks.com/data-governance/unity-catalog/abac/policies + +```python +# List policies on a schema +for policy in w.policies.list_policies( + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + include_inherited=True, +): + print(f"{policy.name}: {policy.policy_type}") + +# Get policy details +policy = w.policies.get_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) + +# Create column mask policy (ALWAYS include gov_admin in except_principals) +from databricks.sdk.service.catalog import ColumnMask, MatchColumns +created = w.policies.create_policy( + name="mask_pii_ssn", + policy_type="COLUMN_MASK", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + for_securable_type="TABLE", + to_principals=["analysts"], + except_principals=["gov_admin"], + column_mask=ColumnMask(function_name="my_catalog.my_schema.mask_ssn"), + match_columns=[MatchColumns(tag_name="pii_type", tag_value="ssn")], +) + +# Update policy principals +w.policies.update_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + to_principals=["analysts", "new_team"], + except_principals=["gov_admin"], +) + +# Delete policy +w.policies.delete_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) +``` + +**Note:** There is no `SHOW POLICIES` SQL. Use `w.policies.list_policies()` instead. There is no `ALTER POLICY` — drop and recreate. + ### Pipelines (Delta Live Tables) **Doc:** https://databricks-sdk-py.readthedocs.io/en/latest/workspace/pipelines/pipelines.html diff --git a/databricks-skills/databricks-python-sdk/examples/6-abac-policies.py b/databricks-skills/databricks-python-sdk/examples/6-abac-policies.py new file mode 100644 index 00000000..a25e0004 --- /dev/null +++ b/databricks-skills/databricks-python-sdk/examples/6-abac-policies.py @@ -0,0 +1,203 @@ +""" +Databricks SDK - ABAC Policy Management Examples + +ABAC Policies: https://docs.databricks.com/data-governance/unity-catalog/abac/policies +Python SDK: https://databricks-sdk-py.readthedocs.io/en/latest/ +""" + +from databricks.sdk import WorkspaceClient +from databricks.sdk.errors import NotFound, PermissionDenied, BadRequest + +w = WorkspaceClient() + +# ============================================================================= +# LIST POLICIES +# ============================================================================= + +# List all ABAC policies on a catalog (includes inherited policies) +for policy in w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname="my_catalog", + include_inherited=True, +): + print(f"{policy.name}: {policy.policy_type}") + +# List policies on a schema +for policy in w.policies.list_policies( + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + include_inherited=True, +): + p_dict = policy.as_dict() if hasattr(policy, "as_dict") else {} + print(f" {p_dict.get('name')}: type={p_dict.get('policy_type')}, " + f"principals={p_dict.get('to_principals')}") + +# List policies on a specific table +for policy in w.policies.list_policies( + on_securable_type="TABLE", + on_securable_fullname="my_catalog.my_schema.my_table", + include_inherited=True, +): + print(f"{policy.name}: {policy.policy_type}") + +# Filter by policy type +all_policies = list(w.policies.list_policies( + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + include_inherited=True, +)) +column_masks = [p for p in all_policies if p.policy_type == "COLUMN_MASK"] +row_filters = [p for p in all_policies if p.policy_type == "ROW_FILTER"] +print(f"Column masks: {len(column_masks)}, Row filters: {len(row_filters)}") + + +# ============================================================================= +# GET POLICY DETAILS +# ============================================================================= + +# Get a specific policy by name +policy = w.policies.get_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) +print(f"Policy: {policy.name}") +print(f"Type: {policy.policy_type}") +print(f"Principals: {policy.to_principals}") +print(f"Except: {policy.except_principals}") + + +# ============================================================================= +# CREATE COLUMN MASK POLICY +# ============================================================================= + +# Create a column mask policy that masks SSN columns for analysts +# The policy matches columns tagged with pii_type='ssn' and applies mask_ssn UDF +from databricks.sdk.service.catalog import ColumnMask, MatchColumns + +created = w.policies.create_policy( + name="mask_pii_ssn", + policy_type="COLUMN_MASK", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + for_securable_type="TABLE", + to_principals=["analysts", "data_scientists"], + except_principals=["gov_admin"], # ALWAYS include gov_admin + comment="Mask SSN columns for analyst groups", + column_mask=ColumnMask( + function_name="my_catalog.my_schema.mask_ssn", + ), + match_columns=[ + MatchColumns( + tag_name="pii_type", + tag_value="ssn", + ) + ], +) +print(f"Created policy: {created.name}") + + +# ============================================================================= +# CREATE ROW FILTER POLICY +# ============================================================================= + +# Create a row filter policy that hides EU rows from the US team +from databricks.sdk.service.catalog import RowFilter + +created = w.policies.create_policy( + name="filter_eu_data", + policy_type="ROW_FILTER", + on_securable_type="CATALOG", + on_securable_fullname="my_catalog", + for_securable_type="TABLE", + to_principals=["us_team"], + except_principals=["gov_admin"], # ALWAYS include gov_admin + comment="Filter EU rows for US team", + row_filter=RowFilter( + function_name="my_catalog.my_schema.is_not_eu_region", + ), + match_columns=[ + MatchColumns( + tag_name="region", + tag_value="eu", + ) + ], +) +print(f"Created policy: {created.name}") + + +# ============================================================================= +# UPDATE POLICY +# ============================================================================= + +# Update policy principals (cannot change UDF, tags, or scope - drop and recreate) +updated = w.policies.update_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + to_principals=["analysts", "data_scientists", "new_team"], + except_principals=["gov_admin", "senior_admins"], + comment="Updated: added new_team to masked principals", +) +print(f"Updated policy: {updated.name}") + + +# ============================================================================= +# DELETE POLICY +# ============================================================================= + +# Delete a policy +w.policies.delete_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) +print("Policy deleted") + + +# ============================================================================= +# ERROR HANDLING +# ============================================================================= + +# Handle common errors +try: + policy = w.policies.get_policy( + name="nonexistent_policy", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + ) +except NotFound: + print("Policy not found") +except PermissionDenied: + print("Insufficient permissions - need MANAGE on securable") +except BadRequest as e: + print(f"Invalid request: {e}") + + +# ============================================================================= +# UTILITY: POLICY SUMMARY +# ============================================================================= + +# Get a summary of all ABAC policies in a catalog +def get_policy_summary(w, catalog: str): + """Get a summary of all ABAC policies in a catalog.""" + policies = list(w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname=catalog, + include_inherited=True, + )) + + column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] + row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] + + return { + "total": len(policies), + "column_masks": len(column_masks), + "row_filters": len(row_filters), + } + + +summary = get_policy_summary(w, "my_catalog") +print(f"Total: {summary['total']}, " + f"Column masks: {summary['column_masks']}, " + f"Row filters: {summary['row_filters']}") diff --git a/databricks-skills/install_skills.sh b/databricks-skills/install_skills.sh index e495509e..ad04cfcb 100755 --- a/databricks-skills/install_skills.sh +++ b/databricks-skills/install_skills.sh @@ -42,7 +42,7 @@ MLFLOW_REPO_RAW_URL="https://raw.githubusercontent.com/mlflow/skills" MLFLOW_REPO_REF="main" # Databricks skills (hosted in this repo) -DATABRICKS_SKILLS="agent-bricks aibi-dashboards asset-bundles databricks-app-apx databricks-app-python databricks-config databricks-dbsql databricks-docs databricks-genie databricks-jobs databricks-python-sdk databricks-unity-catalog lakebase-autoscale lakebase-provisioned metric-views mlflow-evaluation model-serving spark-declarative-pipelines spark-structured-streaming synthetic-data-generation unstructured-pdf-generation vector-search" +DATABRICKS_SKILLS="agent-bricks aibi-dashboards asset-bundles databricks-app-apx databricks-app-python databricks-config databricks-dbsql databricks-docs databricks-genie databricks-jobs databricks-python-sdk databricks-unity-catalog lakebase-autoscale lakebase-provisioned metric-views mlflow-evaluation model-serving spark-declarative-pipelines spark-structured-streaming synthetic-data-generation uc-abac-governance unstructured-pdf-generation vector-search" # MLflow skills (fetched from mlflow/skills repo) MLFLOW_SKILLS="agent-evaluation analyze-mlflow-chat-session analyze-mlflow-trace instrumenting-with-mlflow-tracing mlflow-onboarding querying-mlflow-metrics retrieving-mlflow-traces searching-mlflow-docs" @@ -69,6 +69,7 @@ get_skill_description() { "lakebase-autoscale") echo "Lakebase Autoscale - managed PostgreSQL with autoscaling" ;; "lakebase-provisioned") echo "Lakebase Provisioned - data connections and reverse ETL" ;; "metric-views") echo "Unity Catalog Metric Views - governed business metrics in YAML" ;; + "uc-abac-governance") echo "ABAC policy governance - tags, UDFs, column masks, row filters" ;; "model-serving") echo "Model Serving - deploy MLflow models and AI agents" ;; "spark-declarative-pipelines") echo "Spark Declarative Pipelines (SDP/LDP/DLT)" ;; "spark-structured-streaming") echo "Spark Structured Streaming patterns and best practices" ;; @@ -98,9 +99,10 @@ get_skill_extra_files() { "databricks-app-apx") echo "backend-patterns.md best-practices.md frontend-patterns.md" ;; "databricks-app-python") echo "dash.md streamlit.md README.md" ;; "databricks-jobs") echo "task-types.md triggers-schedules.md notifications-monitoring.md examples.md" ;; - "databricks-python-sdk") echo "doc-index.md examples/1-authentication.py examples/2-clusters-and-jobs.py examples/3-sql-and-warehouses.py examples/4-unity-catalog.py examples/5-serving-and-vector-search.py" ;; + "databricks-python-sdk") echo "doc-index.md examples/1-authentication.py examples/2-clusters-and-jobs.py examples/3-sql-and-warehouses.py examples/4-unity-catalog.py examples/5-serving-and-vector-search.py examples/6-abac-policies.py" ;; "databricks-unity-catalog") echo "5-system-tables.md" ;; "lakebase-autoscale") echo "projects.md branches.md computes.md connection-patterns.md reverse-etl.md" ;; + "uc-abac-governance") echo "sql-generation.md python-sdk-patterns.md mcp-tools-reference.md" ;; "lakebase-provisioned") echo "connection-patterns.md reverse-etl.md" ;; "metric-views") echo "yaml-reference.md patterns.md" ;; "mlflow-evaluation") echo "references/CRITICAL-interfaces.md references/GOTCHAS.md references/patterns-context-optimization.md references/patterns-datasets.md references/patterns-evaluation.md references/patterns-scorers.md references/patterns-trace-analysis.md references/user-journeys.md" ;; diff --git a/databricks-skills/uc-abac-governance/SKILL.md b/databricks-skills/uc-abac-governance/SKILL.md new file mode 100644 index 00000000..6efabdcc --- /dev/null +++ b/databricks-skills/uc-abac-governance/SKILL.md @@ -0,0 +1,294 @@ +--- +name: uc-abac-governance +description: "Unity Catalog ABAC policy governance - governed tags, masking UDFs, column masks, row filters, and human-in-the-loop policy management." +--- + +# Unity Catalog ABAC Policy Governance + +Guidance for Attribute-Based Access Control (ABAC) policies in Databricks Unity Catalog. Covers governed tags, tag assignments, masking UDFs, CREATE/DROP POLICY syntax, and the human-in-the-loop governance workflow. + +**Databricks Docs:** +- ABAC overview: https://docs.databricks.com/data-governance/unity-catalog/abac/ +- ABAC policies: https://docs.databricks.com/data-governance/unity-catalog/abac/policies +- ABAC tutorial: https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial + +## When to Use This Skill + +Use this skill when: +- Creating or managing **ABAC policies** (column masks, row filters) +- Working with **governed tags** (creating via UI, applying via SQL) +- Building **masking UDFs** for PII protection (SSN, email, credit card, etc.) +- Implementing **human-in-the-loop governance** workflows +- Querying tag assignments via `information_schema` +- Managing policy lifecycle (create, update, delete, preview) + +## Reference Files + +| Topic | File | Description | +|-------|------|-------------| +| SQL Generation | [sql-generation.md](sql-generation.md) | SET/UNSET TAG, CREATE FUNCTION, CREATE/DROP POLICY, discovery queries | +| Python SDK | [python-sdk-patterns.md](python-sdk-patterns.md) | `w.policies.*` SDK methods for ABAC policy CRUD | +| MCP Tools | [mcp-tools-reference.md](mcp-tools-reference.md) | 12 MCP tools for policy management | + +--- + +## ABAC Workflow Overview + +ABAC policies in Databricks follow a 4-step setup: + +1. **Governed Tags** - Define classification taxonomy (UI only) +2. **Tag Assignments** - Apply tags to columns/tables via SQL +3. **Masking UDFs** - Create deterministic functions for data masking +4. **ABAC Policies** - Bind tags to UDFs with principal scoping + +``` +┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ +│ Governed Tags│───>│ Tag │───>│ Masking │───>│ ABAC │ +│ (UI only) │ │ Assignments │ │ UDFs │ │ Policies │ +└──────────────┘ └──────────────┘ └──────────────┘ └──────────────┘ +``` + +--- + +## IMPORTANT: SQL That Does NOT Exist + +These SQL commands do **not** exist in Databricks. Do not generate them. + +| Invalid SQL | What to use instead | +|---|---| +| `SHOW POLICIES` | REST API: `w.policies.list_policies()` | +| `DESCRIBE POLICY` | REST API: `w.policies.get_policy()` | +| `ALTER POLICY` | Drop and recreate the policy | +| `ALTER USER SET ATTRIBUTES` | SCIM API for user attributes | +| `SHOW USER ATTRIBUTES` | SCIM API for user attributes | + +--- + +## Step 1: Governed Tags + +Governed tags **cannot** be created via SQL. They must be created in the Databricks UI. + +### Creating a Governed Tag (UI Steps) + +1. Navigate to **Catalog** in the workspace +2. Select **Governed Tags** from the left panel +3. Click **Create governed tag** +4. Configure: + - **Tag Key**: e.g., `pii_type` + - **Allowed Values**: e.g., `ssn`, `email`, `phone`, `credit_card`, `address` + - **Description**: e.g., "PII classification for ABAC policies" + +> **Note:** Tag data is stored as plain text and may be replicated globally. Avoid sensitive information in tag names or values. + +**Docs:** https://docs.databricks.com/admin/governed-tags/ + +--- + +## Step 2: Applying Tags to Columns + +### Legacy Syntax (all versions) + +```sql +-- Set tag on column +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); + +-- Set tag on table +ALTER TABLE catalog.schema.table +SET TAGS ('data_classification' = 'confidential'); + +-- Remove tag +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name UNSET TAGS ('pii_type'); +``` + +### Modern Syntax (DBR 16.1+) + +```sql +SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; +SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; +SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; +SET TAG ON CATALOG catalog 'department' = 'finance'; + +UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; +``` + +### Querying Existing Tags + +```sql +-- Column tags +SELECT tag_name, tag_value, column_name +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; + +-- Table tags +SELECT tag_name, tag_value +FROM system.information_schema.table_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; +``` + +--- + +## Step 3: Masking UDFs + +Masking UDFs must be `DETERMINISTIC` and use simple `CASE` statements. No external calls or nested UDFs. + +```sql +-- Full mask: replaces all characters with * +CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Full masking - replaces all characters with *' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE REPEAT('*', LENGTH(value)) +END; + +-- Partial mask: show last 4 characters +CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Partial masking - shows last 4 characters' +RETURN CASE + WHEN value IS NULL THEN NULL + WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) + ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) +END; + +-- SSN mask: ***-**-XXXX format +CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks SSN showing only last 4 digits' +RETURN CASE + WHEN ssn IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 + THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) + ELSE '***-**-****' +END; + +-- Email mask: j***@example.com +CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks email showing first char and domain' +RETURN CASE + WHEN email IS NULL THEN NULL + WHEN INSTR(email, '@') > 1 + THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) + ELSE '***@***.***' +END; +``` + +**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices + +--- + +## Step 4: ABAC Policies + +### Column Mask Policy + +```sql +CREATE OR REPLACE POLICY mask_pii_columns +ON SCHEMA catalog.schema +COMMENT 'Mask PII columns for analysts' +COLUMN MASK catalog.schema.mask_partial +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; +``` + +### Row Filter Policy + +```sql +CREATE OR REPLACE POLICY filter_eu_rows +ON CATALOG my_catalog +COMMENT 'Filter EU rows for US team' +ROW FILTER catalog.schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); +``` + +### Drop Policy + +```sql +DROP POLICY mask_pii_columns ON SCHEMA catalog.schema; +``` + +### CRITICAL: Always Exclude `gov_admin` + +Every ABAC policy **MUST** include `EXCEPT \`gov_admin\`` to protect administrator access. Without this, admins could be locked out of data. + +### Policy Quotas + +| Scope | Max Policies | +|-------|-------------| +| Per Catalog | 10 | +| Per Schema | 10 | +| Per Table | 5 | + +--- + +## Human-in-the-Loop Governance Workflow + +ABAC policy changes should follow a governed workflow: + +``` +ANALYZE → RECOMMEND → PREVIEW → APPROVE → EXECUTE → VERIFY + │ │ │ │ │ │ + ▼ ▼ ▼ ▼ ▼ ▼ + Discover Generate Show SQL Human Run SQL Confirm + current policy & impact confirms or SDK changes + state proposals preview changes call applied +``` + +1. **ANALYZE**: Discover current tags, policies, and UDFs +2. **RECOMMEND**: Generate policy proposals based on requirements +3. **PREVIEW**: Use `preview_policy_changes` to show exact SQL and impact +4. **APPROVE**: Human reviews and explicitly approves +5. **EXECUTE**: Create/update/delete policies via SDK or SQL +6. **VERIFY**: Confirm policies are applied correctly + +**Never auto-execute policy changes.** Always preview and wait for human approval. + +--- + +## Common Errors + +| Error | Cause | Solution | +|-------|-------|----------| +| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | +| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag configuration in UI | +| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | +| `POLICY_ALREADY_EXISTS` | Policy name conflict | Use `CREATE OR REPLACE POLICY` | +| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission to policy creator | +| `SHOW POLICIES is not supported` | Used invalid SQL | Use REST API `w.policies.list_policies()` instead | + +## Best Practices + +1. **Use governed tags** (not ad-hoc tags) for ABAC policy matching +2. **Always include `EXCEPT \`gov_admin\``** in every policy +3. **Use deterministic UDFs** with simple CASE statements +4. **Preview before executing** any policy change +5. **Start at schema scope** and narrow to table only when needed +6. **Name policies descriptively**: `mask_{what}_{scope}` or `filter_{what}_{scope}` +7. **Test UDFs independently** before binding to policies +8. **Monitor policy quotas** — consolidate when approaching limits + +## Resources + +- [ABAC Overview](https://docs.databricks.com/data-governance/unity-catalog/abac/) +- [ABAC Policies](https://docs.databricks.com/data-governance/unity-catalog/abac/policies) +- [ABAC Tutorial](https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial) +- [UDF Best Practices](https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices) +- [Governed Tags](https://docs.databricks.com/admin/governed-tags/) +- [Column Masks & Row Filters](https://docs.databricks.com/data-governance/unity-catalog/filters-and-masks/) diff --git a/databricks-skills/uc-abac-governance/mcp-tools-reference.md b/databricks-skills/uc-abac-governance/mcp-tools-reference.md new file mode 100644 index 00000000..51fb77b2 --- /dev/null +++ b/databricks-skills/uc-abac-governance/mcp-tools-reference.md @@ -0,0 +1,397 @@ +# MCP Tools Reference for ABAC Policy Management + +Reference for the 12 MCP tools that manage ABAC policies via the Databricks Python SDK. These tools are registered in the UCABAC MCP server. + +--- + +## Discovery Tools + +### `list_abac_policies` + +List ABAC policies on a catalog, schema, or table. + +```python +list_abac_policies( + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, # e.g., "my_catalog.my_schema" + include_inherited: bool = True, + policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (optional filter) +) +``` + +**Returns:** +```json +{ + "success": true, + "securable_type": "SCHEMA", + "securable_fullname": "my_catalog.my_schema", + "policy_count": 3, + "policies": [ + { + "name": "mask_pii_ssn", + "policy_type": "COLUMN_MASK", + "to_principals": ["analysts"], + "except_principals": ["gov_admin"], + "on_securable_fullname": "my_catalog.my_schema", + "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn"}, + "match_columns": [{"tag_name": "pii_type", "tag_value": "ssn"}] + } + ] +} +``` + +### `get_abac_policy` + +Get details for a specific policy by name. + +```python +get_abac_policy( + policy_name: str, # Policy name + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, # Fully qualified securable name +) +``` + +**Returns:** +```json +{ + "success": true, + "policy": { + "name": "mask_pii_ssn", + "policy_type": "COLUMN_MASK", + "comment": "Mask SSN columns for analysts", + "to_principals": ["analysts", "data_scientists"], + "except_principals": ["gov_admin"], + "on_securable_type": "SCHEMA", + "on_securable_fullname": "my_catalog.my_schema", + "for_securable_type": "TABLE", + "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn"}, + "match_columns": [{"tag_name": "pii_type", "tag_value": "ssn"}], + "created_at": "2025-01-15T10:30:00Z", + "created_by": "admin@company.com", + "updated_at": "2025-01-20T14:00:00Z", + "updated_by": "admin@company.com" + } +} +``` + +### `get_table_policies` + +Get column masks and row filters for a specific table via Unity Catalog API. + +```python +get_table_policies( + catalog: str, + schema: str, + table: str, +) +``` + +**Returns:** +```json +{ + "success": true, + "table": "my_catalog.my_schema.my_table", + "column_masks": [ + { + "column_name": "ssn", + "mask_function": "my_catalog.my_schema.mask_ssn", + "using_column_names": [] + } + ], + "row_filters": [ + { + "filter_function": "my_catalog.my_schema.is_not_eu_region", + "using_column_names": ["region"] + } + ] +} +``` + +### `get_masking_functions` + +List masking UDFs in a schema. + +```python +get_masking_functions( + catalog: str, + schema: str, +) +``` + +**Returns:** +```json +{ + "success": true, + "catalog": "my_catalog", + "schema": "my_schema", + "functions": [ + { + "name": "mask_ssn", + "full_name": "my_catalog.my_schema.mask_ssn", + "return_type": "STRING", + "comment": "Masks SSN showing only last 4 digits", + "is_deterministic": true + } + ] +} +``` + +### `get_schema_info` + +Get schema metadata via Unity Catalog API. + +```python +get_schema_info( + catalog: str, + schema: str, +) +``` + +### `get_catalog_info` + +Get catalog metadata via Unity Catalog API. + +```python +get_catalog_info( + catalog: str, +) +``` + +### `get_column_tags_api` + +Get column-level tags via the Tags API. + +```python +get_column_tags_api( + catalog: str, + schema: str, + table: str, +) +``` + +### `list_table_policies_in_schema` + +List all tables in a schema with their column masks and row filters. + +```python +list_table_policies_in_schema( + catalog: str, + schema: str, +) +``` + +--- + +## Preview Tool (Human-in-the-Loop Gate) + +### `preview_policy_changes` + +Preview policy changes without executing. This is the critical human-in-the-loop gate. + +```python +preview_policy_changes( + action: str, # "CREATE", "UPDATE", or "DELETE" + policy_name: str, + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, + policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (for CREATE) + to_principals: list = None, + except_principals: list = None, + function_name: str = None, + tag_name: str = None, + tag_value: str = None, + comment: str = None, +) +``` + +**Returns:** +```json +{ + "success": true, + "action": "CREATE", + "preview": { + "policy_name": "mask_pii_ssn", + "policy_type": "COLUMN_MASK", + "securable": "SCHEMA my_catalog.my_schema", + "to_principals": ["analysts"], + "except_principals": ["gov_admin"], + "function": "my_catalog.my_schema.mask_ssn", + "tag_match": "hasTagValue('pii_type', 'ssn')", + "equivalent_sql": "CREATE OR REPLACE POLICY mask_pii_ssn\nON SCHEMA my_catalog.my_schema\n..." + }, + "warnings": [], + "requires_approval": true, + "message": "Review the preview above. Reply 'approve' to execute." +} +``` + +**Usage in workflow:** + +1. Call `preview_policy_changes` with proposed changes +2. Present preview to user +3. Wait for explicit approval +4. Only then call `create_abac_policy`, `update_abac_policy`, or `delete_abac_policy` + +--- + +## Management Tools + +### `create_abac_policy` + +Create a new ABAC policy (COLUMN_MASK or ROW_FILTER). + +```python +create_abac_policy( + policy_name: str, + policy_type: str, # "COLUMN_MASK" or "ROW_FILTER" + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, + function_name: str, # Fully qualified UDF name + to_principals: list, # Users/groups the policy applies to + tag_name: str, # Tag key to match + tag_value: str = None, # Tag value (optional, uses hasTag vs hasTagValue) + except_principals: list = None, # Excluded principals (gov_admin auto-added) + comment: str = "", +) +``` + +**Returns:** +```json +{ + "success": true, + "policy_name": "mask_pii_ssn", + "action": "created", + "details": { + "policy_type": "COLUMN_MASK", + "on_securable": "SCHEMA my_catalog.my_schema", + "function": "my_catalog.my_schema.mask_ssn", + "to_principals": ["analysts"], + "except_principals": ["gov_admin"] + } +} +``` + +> **Note:** `gov_admin` is automatically added to `except_principals` if not already present. + +### `update_abac_policy` + +Update an existing policy's principals or comment. + +```python +update_abac_policy( + policy_name: str, + securable_type: str, + securable_fullname: str, + to_principals: list = None, + except_principals: list = None, + comment: str = None, +) +``` + +**Returns:** +```json +{ + "success": true, + "policy_name": "mask_pii_ssn", + "action": "updated", + "changes": { + "to_principals": ["analysts", "data_scientists", "new_team"], + "comment": "Updated: added new_team" + } +} +``` + +> **Note:** To change the UDF, tag matching, or scope, drop and recreate the policy. + +### `delete_abac_policy` + +Delete an ABAC policy. + +```python +delete_abac_policy( + policy_name: str, + securable_type: str, + securable_fullname: str, +) +``` + +**Returns:** +```json +{ + "success": true, + "policy_name": "mask_pii_ssn", + "action": "deleted" +} +``` + +--- + +## Human-in-the-Loop Workflow Example + +Complete workflow using MCP tools: + +``` +Step 1: ANALYZE +───────────────────────────────── +→ list_abac_policies(securable_type="SCHEMA", securable_fullname="prod.finance") +→ get_column_tags_api(catalog="prod", schema="finance", table="customers") +→ get_masking_functions(catalog="prod", schema="finance") + +Step 2: RECOMMEND +───────────────────────────────── +→ Agent generates policy recommendations based on discovered tags and UDFs + +Step 3: PREVIEW +───────────────────────────────── +→ preview_policy_changes( + action="CREATE", + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance", + policy_type="COLUMN_MASK", + function_name="prod.finance.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn" + ) + +Step 4: APPROVE +───────────────────────────────── +→ Human reviews preview and replies "approve" + +Step 5: EXECUTE +───────────────────────────────── +→ create_abac_policy( + policy_name="mask_ssn_finance", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="prod.finance", + function_name="prod.finance.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn" + ) + +Step 6: VERIFY +───────────────────────────────── +→ get_abac_policy( + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance" + ) +``` + +--- + +## Error Handling + +| Error | Cause | Solution | +|-------|-------|----------| +| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | +| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag config in UI | +| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | +| `POLICY_ALREADY_EXISTS` | Duplicate policy name | Use different name or delete existing first | +| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission | +| `INVALID_SECURABLE_TYPE` | Wrong securable type string | Use `"CATALOG"`, `"SCHEMA"`, or `"TABLE"` | +| `gov_admin not in except_principals` | Safety check failed | Always include `gov_admin` in except list | diff --git a/databricks-skills/uc-abac-governance/python-sdk-patterns.md b/databricks-skills/uc-abac-governance/python-sdk-patterns.md new file mode 100644 index 00000000..da8bd938 --- /dev/null +++ b/databricks-skills/uc-abac-governance/python-sdk-patterns.md @@ -0,0 +1,351 @@ +# Python SDK Patterns for ABAC Policies + +Databricks Python SDK patterns for managing ABAC policies via `WorkspaceClient.policies`. + +**SDK Docs:** https://databricks-sdk-py.readthedocs.io/en/latest/ +**ABAC Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/policies + +--- + +## Setup + +```python +from databricks.sdk import WorkspaceClient + +w = WorkspaceClient() # Auto-detects credentials +``` + +--- + +## List Policies + +List ABAC policies on a securable (catalog, schema, or table). + +```python +# List all policies on a catalog +policies = w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname="my_catalog", + include_inherited=True, +) + +for p in policies: + print(f"{p.name}: {p.policy_type} on {p.on_securable_fullname}") + +# List policies on a schema +policies = w.policies.list_policies( + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + include_inherited=True, +) + +# List policies on a specific table +policies = w.policies.list_policies( + on_securable_type="TABLE", + on_securable_fullname="my_catalog.my_schema.my_table", + include_inherited=True, +) +``` + +### Filtering by Policy Type + +```python +policies = w.policies.list_policies( + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + include_inherited=True, +) + +column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] +row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] +``` + +### Extracting Policy Details + +```python +for p in policies: + p_dict = p.as_dict() if hasattr(p, "as_dict") else {} + print({ + "name": p_dict.get("name"), + "policy_type": p_dict.get("policy_type"), + "to_principals": p_dict.get("to_principals", []), + "except_principals": p_dict.get("except_principals", []), + "on_securable_type": p_dict.get("on_securable_type"), + "on_securable_fullname": p_dict.get("on_securable_fullname"), + "column_mask": p_dict.get("column_mask"), + "row_filter": p_dict.get("row_filter"), + "match_columns": p_dict.get("match_columns", []), + }) +``` + +--- + +## Get Policy + +Retrieve a specific policy by name and securable. + +```python +policy = w.policies.get_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) + +print(f"Policy: {policy.name}") +print(f"Type: {policy.policy_type}") +print(f"Principals: {policy.to_principals}") +print(f"Except: {policy.except_principals}") +``` + +--- + +## Create Policy + +### Column Mask Policy + +```python +from databricks.sdk.service.catalog import ( + CreatePolicy, + ColumnMask, + MatchColumns, +) + +policy = w.policies.create_policy( + name="mask_pii_ssn", + policy_type="COLUMN_MASK", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + for_securable_type="TABLE", + to_principals=["analysts", "data_scientists"], + except_principals=["gov_admin"], + comment="Mask SSN columns for analyst groups", + column_mask=ColumnMask( + function_name="my_catalog.my_schema.mask_ssn", + ), + match_columns=[ + MatchColumns( + tag_name="pii_type", + tag_value="ssn", + ) + ], +) +print(f"Created policy: {policy.name}") +``` + +### Row Filter Policy + +```python +from databricks.sdk.service.catalog import ( + CreatePolicy, + RowFilter, + MatchColumns, +) + +policy = w.policies.create_policy( + name="filter_eu_data", + policy_type="ROW_FILTER", + on_securable_type="CATALOG", + on_securable_fullname="my_catalog", + for_securable_type="TABLE", + to_principals=["us_team"], + except_principals=["gov_admin"], + comment="Filter EU rows for US team", + row_filter=RowFilter( + function_name="my_catalog.my_schema.is_not_eu_region", + ), + match_columns=[ + MatchColumns( + tag_name="region", + tag_value="eu", + ) + ], +) +print(f"Created policy: {policy.name}") +``` + +### Important: Always Include `gov_admin` + +Every policy **MUST** include `"gov_admin"` in `except_principals`: + +```python +# CORRECT +except_principals=["gov_admin"] + +# CORRECT - additional admin groups +except_principals=["gov_admin", "platform_admins"] + +# WRONG - missing gov_admin +except_principals=["platform_admins"] # gov_admin must be included! +``` + +--- + +## Update Policy + +Update principals or comment on an existing policy. + +```python +updated = w.policies.update_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + to_principals=["analysts", "data_scientists", "new_team"], + except_principals=["gov_admin", "senior_admins"], + comment="Updated: added new_team to masked principals", +) +print(f"Updated policy: {updated.name}") +``` + +> **Note:** To change the UDF, tag matching, or scope, you must drop and recreate the policy. `update_policy` only modifies principals and comment. + +--- + +## Delete Policy + +```python +w.policies.delete_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) +print("Policy deleted") +``` + +--- + +## Error Handling + +```python +from databricks.sdk.errors import NotFound, PermissionDenied, BadRequest + +try: + policy = w.policies.get_policy( + name="nonexistent_policy", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + ) +except NotFound: + print("Policy not found") +except PermissionDenied: + print("Insufficient permissions - need MANAGE on securable") +except BadRequest as e: + print(f"Invalid request: {e}") +``` + +--- + +## Common Patterns + +### List All Policies in a Catalog with Counts + +```python +def get_policy_summary(w, catalog: str): + """Get a summary of all ABAC policies in a catalog.""" + policies = list(w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname=catalog, + include_inherited=True, + )) + + column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] + row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] + + return { + "total": len(policies), + "column_masks": len(column_masks), + "row_filters": len(row_filters), + "policies": [p.as_dict() for p in policies], + } +``` + +### Check Policy Quotas Before Creating + +```python +def check_quota(w, securable_type: str, securable_fullname: str): + """Check if policy quota allows creating a new policy.""" + quotas = {"CATALOG": 10, "SCHEMA": 10, "TABLE": 5} + max_policies = quotas.get(securable_type, 10) + + existing = list(w.policies.list_policies( + on_securable_type=securable_type, + on_securable_fullname=securable_fullname, + )) + + # Count only direct policies (not inherited) + direct = [p for p in existing + if p.on_securable_fullname == securable_fullname] + + return { + "current": len(direct), + "max": max_policies, + "can_create": len(direct) < max_policies, + } +``` + +### Fetch Policies Without Cache (Direct API) + +```python +def fetch_policies_direct( + w, + catalog: str, + schema: str = None, + table: str = None, +): + """Fetch policies directly from REST API.""" + if table and schema: + securable_type = "TABLE" + securable_name = f"{catalog}.{schema}.{table}" + elif schema: + securable_type = "SCHEMA" + securable_name = f"{catalog}.{schema}" + else: + securable_type = "CATALOG" + securable_name = catalog + + policies = w.policies.list_policies( + on_securable_type=securable_type, + on_securable_fullname=securable_name, + include_inherited=True, + ) + + results = [] + for p in policies: + p_dict = p.as_dict() if hasattr(p, "as_dict") else {} + results.append({ + "name": p_dict.get("name"), + "policy_type": p_dict.get("policy_type"), + "to_principals": p_dict.get("to_principals", []), + "except_principals": p_dict.get("except_principals", []), + "on_securable_type": p_dict.get("on_securable_type"), + "on_securable_fullname": p_dict.get("on_securable_fullname"), + "column_mask": p_dict.get("column_mask"), + "row_filter": p_dict.get("row_filter"), + "match_columns": p_dict.get("match_columns", []), + }) + return results +``` + +--- + +## Async Usage (FastAPI, etc.) + +The Databricks SDK is synchronous. In async applications, wrap calls with `asyncio.to_thread()`: + +```python +import asyncio + +async def list_policies_async(w, catalog: str): + return await asyncio.to_thread( + lambda: list(w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname=catalog, + include_inherited=True, + )) + ) + +async def create_policy_async(w, **kwargs): + return await asyncio.to_thread( + w.policies.create_policy, + **kwargs, + ) +``` diff --git a/databricks-skills/uc-abac-governance/sql-generation.md b/databricks-skills/uc-abac-governance/sql-generation.md new file mode 100644 index 00000000..c0cb46f1 --- /dev/null +++ b/databricks-skills/uc-abac-governance/sql-generation.md @@ -0,0 +1,356 @@ +# SQL Generation Reference + +Pure SQL patterns for Unity Catalog ABAC governance operations. All SQL follows Databricks syntax. + +--- + +## Tag Operations + +### SET TAG on Column + +```sql +-- Legacy syntax (all versions) +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); + +-- Modern syntax (DBR 16.1+) +SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; +``` + +### SET TAG on Table + +```sql +-- Legacy syntax +ALTER TABLE catalog.schema.table +SET TAGS ('data_classification' = 'confidential'); + +-- Modern syntax +SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; +``` + +### SET TAG on Schema / Catalog + +```sql +SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; +SET TAG ON CATALOG my_catalog 'department' = 'finance'; +``` + +### UNSET TAG + +```sql +-- Column (legacy) +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name UNSET TAGS ('pii_type'); + +-- Column (modern) +UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; + +-- Table (legacy) +ALTER TABLE catalog.schema.table +UNSET TAGS ('data_classification'); + +-- Table (modern) +UNSET TAG ON TABLE catalog.schema.table 'data_classification'; +``` + +**Docs:** +- SET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-set-tag.html +- UNSET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-unset-tag.html + +--- + +## Tag Discovery Queries + +### Query Column Tags + +```sql +SELECT tag_name, tag_value, column_name +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; +``` + +### Query Table Tags + +```sql +SELECT tag_name, tag_value +FROM system.information_schema.table_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; +``` + +### All Tag Assignments in a Catalog + +```sql +-- Table-level tags +SELECT 'TABLE' as securable_type, + CONCAT(catalog_name, '.', schema_name, '.', table_name) as securable_name, + tag_name as tag_key, + tag_value +FROM system.information_schema.table_tags +WHERE catalog_name = 'my_catalog'; + +-- Column-level tags +SELECT 'COLUMN' as securable_type, + CONCAT(catalog_name, '.', schema_name, '.', table_name, '.', column_name) as securable_name, + tag_name as tag_key, + tag_value +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog'; +``` + +**Docs:** +- information_schema.column_tags: https://docs.databricks.com/sql/language-manual/information-schema/column_tags.html +- information_schema.table_tags: https://docs.databricks.com/sql/language-manual/information-schema/table_tags.html + +--- + +## Masking UDF Creation + +All masking UDFs must be `DETERMINISTIC` with simple `CASE` statements. No external calls or nested UDFs. + +### Generic Masking Strategies + +```sql +-- Full mask: replaces all characters with * +CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Full masking - replaces all characters with *' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE REPEAT('*', LENGTH(value)) +END; + +-- Partial mask: show last 4 characters +CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Partial masking - shows last 4 characters' +RETURN CASE + WHEN value IS NULL THEN NULL + WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) + ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) +END; + +-- Hash: SHA256 with version prefix +CREATE OR REPLACE FUNCTION catalog.schema.mask_hash(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Hash masking - SHA256 with version prefix' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE CONCAT('HASH_v1_', SUBSTRING(SHA2(CONCAT(value, ':v1'), 256), 1, 16)) +END; + +-- Redact: replace with [REDACTED] +CREATE OR REPLACE FUNCTION catalog.schema.mask_redact(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Redaction - replaces value with [REDACTED]' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE '[REDACTED]' +END; + +-- Nullify: always returns NULL +CREATE OR REPLACE FUNCTION catalog.schema.mask_nullify(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Nullify - always returns NULL' +RETURN NULL; +``` + +### Specialized Masking UDFs + +```sql +-- SSN: ***-**-XXXX +CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks SSN showing only last 4 digits in XXX-XX-XXXX format' +RETURN CASE + WHEN ssn IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 + THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) + ELSE '***-**-****' +END; + +-- Email: j***@example.com +CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks email showing first char and domain' +RETURN CASE + WHEN email IS NULL THEN NULL + WHEN INSTR(email, '@') > 1 + THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) + ELSE '***@***.***' +END; + +-- Credit card: ****-****-****-1234 +CREATE OR REPLACE FUNCTION catalog.schema.mask_credit_card(card_number STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks credit card showing only last 4 digits' +RETURN CASE + WHEN card_number IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(card_number, '[^0-9]', '')) >= 4 + THEN CONCAT('****-****-****-', RIGHT(REGEXP_REPLACE(card_number, '[^0-9]', ''), 4)) + ELSE '****-****-****-****' +END; +``` + +### Row Filter UDFs + +Row filter UDFs return `BOOLEAN`: `TRUE` to include, `FALSE` to exclude. + +```sql +-- Region-based filter: hide EU rows +CREATE OR REPLACE FUNCTION catalog.schema.is_not_eu_region(region_value STRING) +RETURNS BOOLEAN +DETERMINISTIC +COMMENT 'Row filter - returns FALSE for EU regions' +RETURN CASE + WHEN region_value IS NULL THEN TRUE + WHEN LOWER(region_value) LIKE '%eu%' THEN FALSE + WHEN LOWER(region_value) LIKE '%europe%' THEN FALSE + ELSE TRUE +END; + +-- Array membership filter +CREATE OR REPLACE FUNCTION catalog.schema.is_in_allowed_values( + row_value STRING, + allowed_values ARRAY +) +RETURNS BOOLEAN +DETERMINISTIC +COMMENT 'Row filter based on array membership' +RETURN CASE + WHEN allowed_values IS NULL THEN FALSE + WHEN ARRAY_CONTAINS(TRANSFORM(allowed_values, x -> LOWER(x)), LOWER(row_value)) THEN TRUE + ELSE FALSE +END; +``` + +**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices + +--- + +## Policy Creation + +### Column Mask Policy + +```sql +CREATE OR REPLACE POLICY mask_pii_ssn +ON SCHEMA catalog.schema +COMMENT 'Mask SSN columns for analysts' +COLUMN MASK catalog.schema.mask_ssn +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; +``` + +### Row Filter Policy + +```sql +CREATE OR REPLACE POLICY filter_eu_data +ON CATALOG my_catalog +COMMENT 'Filter EU rows for US team' +ROW FILTER catalog.schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); +``` + +### Policy with Tag Key Only (any value) + +```sql +-- Match any column with tag 'pii_type' regardless of value +CREATE OR REPLACE POLICY mask_all_pii +ON SCHEMA catalog.schema +COLUMN MASK catalog.schema.mask_full +TO `external_users` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTag('pii_type') AS masked_col +ON COLUMN masked_col; +``` + +### Drop Policy + +```sql +DROP POLICY mask_pii_ssn ON SCHEMA catalog.schema; +DROP POLICY filter_eu_data ON CATALOG my_catalog; +``` + +> **Note:** There is no `ALTER POLICY`. To modify a policy, drop and recreate it. + +--- + +## Discovery Queries + +```sql +-- List catalogs +SHOW CATALOGS; + +-- List schemas in a catalog +SHOW SCHEMAS IN my_catalog; + +-- List tables in a schema +SHOW TABLES IN my_catalog.my_schema; + +-- Describe table with extended metadata +DESCRIBE TABLE EXTENDED my_catalog.my_schema.my_table; + +-- List UDFs in a schema +SHOW USER FUNCTIONS IN my_catalog.my_schema; + +-- Describe a UDF +DESCRIBE FUNCTION EXTENDED my_catalog.my_schema.mask_ssn; + +-- Sample column values +SELECT DISTINCT column_name +FROM my_catalog.my_schema.my_table +LIMIT 20; +``` + +--- + +## Enums Reference + +### PII Types (governed tag values) + +`ssn`, `email`, `phone`, `credit_card`, `date_of_birth`, `address`, `name`, `ip_address`, `national_id`, `medical_record`, `generic` + +### Masking Strategies + +| Strategy | Description | +|----------|-------------| +| `full_mask` | Replace all characters with `*` | +| `partial_mask` | Show last 4 characters | +| `hash` | SHA256 with version prefix | +| `redact` | Replace with `[REDACTED]` | +| `nullify` | Always return NULL | +| `custom` | User-supplied SQL (requires manual UDF) | + +### Policy Scopes + +| Scope | Description | +|-------|-------------| +| `CATALOG` | Policy applies to all tables in catalog | +| `SCHEMA` | Policy applies to all tables in schema | +| `TABLE` | Policy applies to a single table | + +### Tag Syntax Variants + +| Variant | Availability | Example | +|---------|-------------|---------| +| `LEGACY` | All versions | `ALTER TABLE t ALTER COLUMN c SET TAGS ('k'='v')` | +| `MODERN` | DBR 16.1+ | `SET TAG ON COLUMN t.c 'k' = 'v'` | From f6aba8cc59b7af555c2f249616259e1456c01a2d Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Mon, 9 Feb 2026 17:45:44 -0600 Subject: [PATCH 02/34] Add ABAC policy implementation, integration tests, and cleanup fixtures Implement 9 ABAC policy functions (list, get, create, update, delete, get_table_policies, get_masking_functions, check_policy_quota, preview_policy_changes) using Databricks Python SDK v0.85.0 policies API. Add 24 integration tests with programmatic governed tag creation/cleanup via Tag Policies API. Update UC __init__.py exports, conftest fixtures (cleanup_policies, cleanup_governed_tags), and MCP tools reference docs. --- .../uc-abac-governance/mcp-tools-reference.md | 264 +++++-- .../unity_catalog/__init__.py | 23 + .../unity_catalog/abac_policies.py | 705 ++++++++++++++++++ .../integration/unity_catalog/conftest.py | 64 ++ .../unity_catalog/test_abac_policies.py | 686 +++++++++++++++++ databricks-tools-core/uv.lock | 8 +- 6 files changed, 1673 insertions(+), 77 deletions(-) create mode 100644 databricks-tools-core/databricks_tools_core/unity_catalog/abac_policies.py create mode 100644 databricks-tools-core/tests/integration/unity_catalog/test_abac_policies.py diff --git a/.claude/skills/uc-abac-governance/mcp-tools-reference.md b/.claude/skills/uc-abac-governance/mcp-tools-reference.md index 51fb77b2..c1ae9640 100644 --- a/.claude/skills/uc-abac-governance/mcp-tools-reference.md +++ b/.claude/skills/uc-abac-governance/mcp-tools-reference.md @@ -1,6 +1,10 @@ # MCP Tools Reference for ABAC Policy Management -Reference for the 12 MCP tools that manage ABAC policies via the Databricks Python SDK. These tools are registered in the UCABAC MCP server. +Reference for the MCP tools that manage ABAC policies. Core policy operations are implemented in +`databricks_tools_core.unity_catalog.abac_policies`. Discovery helpers delegate to existing +`unity_catalog` modules where possible. + +**Implementation:** `databricks-tools-core/databricks_tools_core/unity_catalog/abac_policies.py` --- @@ -10,7 +14,11 @@ Reference for the 12 MCP tools that manage ABAC policies via the Databricks Pyth List ABAC policies on a catalog, schema, or table. +**Implementation:** `unity_catalog.abac_policies.list_abac_policies` + ```python +from databricks_tools_core.unity_catalog import list_abac_policies + list_abac_policies( securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" securable_fullname: str, # e.g., "my_catalog.my_schema" @@ -44,7 +52,11 @@ list_abac_policies( Get details for a specific policy by name. +**Implementation:** `unity_catalog.abac_policies.get_abac_policy` + ```python +from databricks_tools_core.unity_catalog import get_abac_policy + get_abac_policy( policy_name: str, # Policy name securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" @@ -79,7 +91,11 @@ get_abac_policy( Get column masks and row filters for a specific table via Unity Catalog API. +**Implementation:** `unity_catalog.abac_policies.get_table_policies` + ```python +from databricks_tools_core.unity_catalog import get_table_policies + get_table_policies( catalog: str, schema: str, @@ -112,7 +128,11 @@ get_table_policies( List masking UDFs in a schema. +**Implementation:** `unity_catalog.abac_policies.get_masking_functions` + ```python +from databricks_tools_core.unity_catalog import get_masking_functions + get_masking_functions( catalog: str, schema: str, @@ -125,6 +145,7 @@ get_masking_functions( "success": true, "catalog": "my_catalog", "schema": "my_schema", + "function_count": 3, "functions": [ { "name": "mask_ssn", @@ -141,10 +162,14 @@ get_masking_functions( Get schema metadata via Unity Catalog API. +**Implementation:** Delegates to existing `unity_catalog.schemas.get_schema` + ```python -get_schema_info( - catalog: str, - schema: str, +from databricks_tools_core.unity_catalog import get_schema + +get_schema( + catalog_name: str, + schema_name: str, ) ``` @@ -152,9 +177,13 @@ get_schema_info( Get catalog metadata via Unity Catalog API. +**Implementation:** Delegates to existing `unity_catalog.catalogs.get_catalog` + ```python -get_catalog_info( - catalog: str, +from databricks_tools_core.unity_catalog import get_catalog + +get_catalog( + catalog_name: str, ) ``` @@ -162,11 +191,17 @@ get_catalog_info( Get column-level tags via the Tags API. +**Implementation:** Delegates to existing `unity_catalog.tags.query_column_tags` + ```python -get_column_tags_api( - catalog: str, - schema: str, - table: str, +from databricks_tools_core.unity_catalog import query_column_tags + +query_column_tags( + catalog_filter: str, # Filter by catalog name + table_name: str = None, # Filter by table name + tag_name: str = None, # Filter by tag name + tag_value: str = None, # Filter by tag value + limit: int = 100, ) ``` @@ -174,13 +209,50 @@ get_column_tags_api( List all tables in a schema with their column masks and row filters. +**Implementation:** Compose `unity_catalog.tables.list_tables` + `unity_catalog.abac_policies.get_table_policies` + ```python -list_table_policies_in_schema( - catalog: str, - schema: str, +from databricks_tools_core.unity_catalog import list_tables, get_table_policies + +# List all tables, then get policies for each +tables = list_tables(catalog_name=catalog, schema_name=schema) +for t in tables["tables"]: + policies = get_table_policies(catalog=catalog, schema=schema, table=t["name"]) +``` + +--- + +## Quota Check + +### `check_policy_quota` + +Check if the policy quota allows creating a new policy on a securable. + +**Implementation:** `unity_catalog.abac_policies.check_policy_quota` + +```python +from databricks_tools_core.unity_catalog import check_policy_quota + +check_policy_quota( + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, # Fully qualified securable name ) ``` +**Returns:** +```json +{ + "success": true, + "securable_type": "SCHEMA", + "securable_fullname": "my_catalog.my_schema", + "current": 3, + "max": 10, + "can_create": true +} +``` + +Policy quotas: CATALOG=10, SCHEMA=10, TABLE=5. + --- ## Preview Tool (Human-in-the-Loop Gate) @@ -189,7 +261,11 @@ list_table_policies_in_schema( Preview policy changes without executing. This is the critical human-in-the-loop gate. +**Implementation:** `unity_catalog.abac_policies.preview_policy_changes` + ```python +from databricks_tools_core.unity_catalog import preview_policy_changes + preview_policy_changes( action: str, # "CREATE", "UPDATE", or "DELETE" policy_name: str, @@ -241,7 +317,11 @@ preview_policy_changes( Create a new ABAC policy (COLUMN_MASK or ROW_FILTER). +**Implementation:** `unity_catalog.abac_policies.create_abac_policy` + ```python +from databricks_tools_core.unity_catalog import create_abac_policy + create_abac_policy( policy_name: str, policy_type: str, # "COLUMN_MASK" or "ROW_FILTER" @@ -251,7 +331,7 @@ create_abac_policy( to_principals: list, # Users/groups the policy applies to tag_name: str, # Tag key to match tag_value: str = None, # Tag value (optional, uses hasTag vs hasTagValue) - except_principals: list = None, # Excluded principals (gov_admin auto-added) + except_principals: list = None, # Excluded principals comment: str = "", ) ``` @@ -267,18 +347,24 @@ create_abac_policy( "on_securable": "SCHEMA my_catalog.my_schema", "function": "my_catalog.my_schema.mask_ssn", "to_principals": ["analysts"], - "except_principals": ["gov_admin"] - } + "except_principals": ["gov_admin"], + "tag_match": "pii_type=ssn" + }, + "policy": { ... } } ``` -> **Note:** `gov_admin` is automatically added to `except_principals` if not already present. +> **Note:** Callers should include appropriate admin groups in `except_principals` to protect administrator access. ### `update_abac_policy` Update an existing policy's principals or comment. +**Implementation:** `unity_catalog.abac_policies.update_abac_policy` + ```python +from databricks_tools_core.unity_catalog import update_abac_policy + update_abac_policy( policy_name: str, securable_type: str, @@ -298,7 +384,8 @@ update_abac_policy( "changes": { "to_principals": ["analysts", "data_scientists", "new_team"], "comment": "Updated: added new_team" - } + }, + "policy": { ... } } ``` @@ -308,7 +395,11 @@ update_abac_policy( Delete an ABAC policy. +**Implementation:** `unity_catalog.abac_policies.delete_abac_policy` + ```python +from databricks_tools_core.unity_catalog import delete_abac_policy + delete_abac_policy( policy_name: str, securable_type: str, @@ -329,57 +420,64 @@ delete_abac_policy( ## Human-in-the-Loop Workflow Example -Complete workflow using MCP tools: +Complete workflow using the implemented functions: -``` -Step 1: ANALYZE -───────────────────────────────── -→ list_abac_policies(securable_type="SCHEMA", securable_fullname="prod.finance") -→ get_column_tags_api(catalog="prod", schema="finance", table="customers") -→ get_masking_functions(catalog="prod", schema="finance") - -Step 2: RECOMMEND -───────────────────────────────── -→ Agent generates policy recommendations based on discovered tags and UDFs - -Step 3: PREVIEW -───────────────────────────────── -→ preview_policy_changes( - action="CREATE", - policy_name="mask_ssn_finance", - securable_type="SCHEMA", - securable_fullname="prod.finance", - policy_type="COLUMN_MASK", - function_name="prod.finance.mask_ssn", - to_principals=["analysts"], - tag_name="pii_type", - tag_value="ssn" - ) - -Step 4: APPROVE -───────────────────────────────── -→ Human reviews preview and replies "approve" - -Step 5: EXECUTE -───────────────────────────────── -→ create_abac_policy( - policy_name="mask_ssn_finance", - policy_type="COLUMN_MASK", - securable_type="SCHEMA", - securable_fullname="prod.finance", - function_name="prod.finance.mask_ssn", - to_principals=["analysts"], - tag_name="pii_type", - tag_value="ssn" - ) - -Step 6: VERIFY -───────────────────────────────── -→ get_abac_policy( - policy_name="mask_ssn_finance", - securable_type="SCHEMA", - securable_fullname="prod.finance" - ) +```python +from databricks_tools_core.unity_catalog import ( + list_abac_policies, + query_column_tags, + get_masking_functions, + check_policy_quota, + preview_policy_changes, + create_abac_policy, + get_abac_policy, +) + +# Step 1: ANALYZE — discover current state +policies = list_abac_policies(securable_type="SCHEMA", securable_fullname="prod.finance") +tags = query_column_tags(catalog_filter="prod", table_name="customers") +udfs = get_masking_functions(catalog="prod", schema="finance") + +# Step 2: RECOMMEND — agent generates policy recommendations based on tags and UDFs + +# Step 3: CHECK QUOTA — ensure we can create a new policy +quota = check_policy_quota(securable_type="SCHEMA", securable_fullname="prod.finance") +assert quota["can_create"], f"Quota exceeded: {quota['current']}/{quota['max']}" + +# Step 4: PREVIEW — generate SQL for human review (no changes made) +preview = preview_policy_changes( + action="CREATE", + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance", + policy_type="COLUMN_MASK", + function_name="prod.finance.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn", +) +# → Present preview["preview"]["equivalent_sql"] to user + +# Step 5: APPROVE — human reviews preview and replies "approve" + +# Step 6: EXECUTE — create the policy +result = create_abac_policy( + policy_name="mask_ssn_finance", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="prod.finance", + function_name="prod.finance.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn", +) + +# Step 7: VERIFY — confirm policy was created +policy = get_abac_policy( + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance", +) ``` --- @@ -388,10 +486,30 @@ Step 6: VERIFY | Error | Cause | Solution | |-------|-------|----------| -| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | +| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Use `check_policy_quota` first; consolidate or use broader scope | | `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag config in UI | -| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | -| `POLICY_ALREADY_EXISTS` | Duplicate policy name | Use different name or delete existing first | -| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission | +| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first via `create_security_function`, use fully qualified name | +| `POLICY_ALREADY_EXISTS` | Duplicate policy name | Use different name or `delete_abac_policy` first | +| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | `grant_privileges` with MANAGE | | `INVALID_SECURABLE_TYPE` | Wrong securable type string | Use `"CATALOG"`, `"SCHEMA"`, or `"TABLE"` | -| `gov_admin not in except_principals` | Safety check failed | Always include `gov_admin` in except list | +| `PRINCIPAL_NOT_FOUND` | Principal group doesn't exist | Verify group exists on the workspace | + +--- + +## Implementation Map + +| MCP Tool | Implementation | Module | +|----------|---------------|--------| +| `list_abac_policies` | `list_abac_policies()` | `abac_policies` | +| `get_abac_policy` | `get_abac_policy()` | `abac_policies` | +| `get_table_policies` | `get_table_policies()` | `abac_policies` | +| `get_masking_functions` | `get_masking_functions()` | `abac_policies` | +| `check_policy_quota` | `check_policy_quota()` | `abac_policies` | +| `get_schema_info` | `get_schema()` | `schemas` | +| `get_catalog_info` | `get_catalog()` | `catalogs` | +| `get_column_tags_api` | `query_column_tags()` | `tags` | +| `list_table_policies_in_schema` | `list_tables()` + `get_table_policies()` | `tables` + `abac_policies` | +| `preview_policy_changes` | `preview_policy_changes()` | `abac_policies` | +| `create_abac_policy` | `create_abac_policy()` | `abac_policies` | +| `update_abac_policy` | `update_abac_policy()` | `abac_policies` | +| `delete_abac_policy` | `delete_abac_policy()` | `abac_policies` | diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/__init__.py b/databricks-tools-core/databricks_tools_core/unity_catalog/__init__.py index 21e37808..e315d482 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/__init__.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/__init__.py @@ -112,6 +112,19 @@ drop_column_mask, ) +# ABAC policies +from .abac_policies import ( + list_abac_policies, + get_abac_policy, + get_table_policies, + get_masking_functions, + check_policy_quota, + preview_policy_changes, + create_abac_policy, + update_abac_policy, + delete_abac_policy, +) + # Quality monitors from .monitors import ( create_monitor, @@ -226,6 +239,16 @@ "drop_row_filter", "set_column_mask", "drop_column_mask", + # ABAC policies + "list_abac_policies", + "get_abac_policy", + "get_table_policies", + "get_masking_functions", + "check_policy_quota", + "preview_policy_changes", + "create_abac_policy", + "update_abac_policy", + "delete_abac_policy", # Quality monitors "create_monitor", "get_monitor", diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/abac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/abac_policies.py new file mode 100644 index 00000000..c75ff1a6 --- /dev/null +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/abac_policies.py @@ -0,0 +1,705 @@ +""" +Unity Catalog - ABAC Policy Operations + +Functions for managing Attribute-Based Access Control (ABAC) policies +via the Databricks Python SDK (WorkspaceClient.policies). + +ABAC policies bind governed tags to masking UDFs or row filters, scoped to +catalogs, schemas, or tables, and targeted at specific principals. + +Policy quotas: + - Catalog: 10 policies max + - Schema: 10 policies max + - Table: 5 policies max +""" + +import logging +import re +from typing import Any, Dict, List, Optional + +from ..auth import get_workspace_client + +logger = logging.getLogger(__name__) + +_IDENTIFIER_PATTERN = re.compile(r"^[a-zA-Z0-9_][a-zA-Z0-9_.\-]*$") + +_VALID_SECURABLE_TYPES = {"CATALOG", "SCHEMA", "TABLE"} +_VALID_POLICY_TYPES = {"COLUMN_MASK", "ROW_FILTER"} +_POLICY_QUOTAS = {"CATALOG": 10, "SCHEMA": 10, "TABLE": 5} +def _validate_identifier(name: str) -> str: + """Validate a SQL identifier to prevent injection.""" + if not _IDENTIFIER_PATTERN.match(name): + raise ValueError(f"Invalid SQL identifier: '{name}'") + return name + + +def _validate_securable_type(securable_type: str) -> str: + """Validate and normalize securable type.""" + normalized = securable_type.upper() + if normalized not in _VALID_SECURABLE_TYPES: + raise ValueError( + f"Invalid securable_type: '{securable_type}'. " + f"Must be one of: {sorted(_VALID_SECURABLE_TYPES)}" + ) + return normalized + + +def _validate_policy_type(policy_type: str) -> str: + """Validate and normalize policy type.""" + normalized = policy_type.upper().replace("POLICY_TYPE_", "") + if normalized not in _VALID_POLICY_TYPES: + raise ValueError( + f"Invalid policy_type: '{policy_type}'. " + f"Must be one of: {sorted(_VALID_POLICY_TYPES)}" + ) + return normalized + + +def _to_policy_type_enum(policy_type: str): + """Convert a policy type string to the SDK PolicyType enum.""" + from databricks.sdk.service.catalog import PolicyType + + normalized = policy_type.upper().replace("POLICY_TYPE_", "") + if normalized == "COLUMN_MASK": + return PolicyType.POLICY_TYPE_COLUMN_MASK + elif normalized == "ROW_FILTER": + return PolicyType.POLICY_TYPE_ROW_FILTER + raise ValueError(f"Invalid policy_type: '{policy_type}'") + + +def _to_securable_type_enum(securable_type: str): + """Convert a securable type string to the SDK SecurableType enum.""" + from databricks.sdk.service.catalog import SecurableType + + return SecurableType(securable_type.upper()) + + +def _policy_to_dict(policy: Any) -> Dict[str, Any]: + """Convert a policy SDK object to a serializable dict.""" + if hasattr(policy, "as_dict"): + return policy.as_dict() + return { + "name": getattr(policy, "name", None), + "policy_type": getattr(policy, "policy_type", None), + "to_principals": getattr(policy, "to_principals", []), + "except_principals": getattr(policy, "except_principals", []), + "on_securable_type": getattr(policy, "on_securable_type", None), + "on_securable_fullname": getattr(policy, "on_securable_fullname", None), + "for_securable_type": getattr(policy, "for_securable_type", None), + "column_mask": getattr(policy, "column_mask", None), + "row_filter": getattr(policy, "row_filter", None), + "match_columns": getattr(policy, "match_columns", []), + "comment": getattr(policy, "comment", None), + } + + +# --------------------------------------------------------------------------- +# Discovery +# --------------------------------------------------------------------------- + + +def list_abac_policies( + securable_type: str, + securable_fullname: str, + include_inherited: bool = True, + policy_type: Optional[str] = None, +) -> Dict[str, Any]: + """ + List ABAC policies on a catalog, schema, or table. + + Args: + securable_type: "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: Fully qualified name (e.g., "my_catalog.my_schema") + include_inherited: Include policies inherited from parent securables + policy_type: Optional filter — "COLUMN_MASK" or "ROW_FILTER" + + Returns: + Dict with policy_count and policies list + """ + stype = _validate_securable_type(securable_type) + _validate_identifier(securable_fullname) + + w = get_workspace_client() + policies = list( + w.policies.list_policies( + on_securable_type=stype, + on_securable_fullname=securable_fullname, + include_inherited=include_inherited, + ) + ) + + if policy_type: + ptype = _validate_policy_type(policy_type) + # SDK returns POLICY_TYPE_COLUMN_MASK / POLICY_TYPE_ROW_FILTER + sdk_ptype = f"POLICY_TYPE_{ptype}" + policies = [ + p for p in policies + if str(getattr(p, "policy_type", "")) in (ptype, sdk_ptype) + or (p.as_dict() if hasattr(p, "as_dict") else {}).get("policy_type") in (ptype, sdk_ptype) + ] + + policy_dicts = [_policy_to_dict(p) for p in policies] + return { + "success": True, + "securable_type": stype, + "securable_fullname": securable_fullname, + "policy_count": len(policy_dicts), + "policies": policy_dicts, + } + + +def get_abac_policy( + policy_name: str, + securable_type: str, + securable_fullname: str, +) -> Dict[str, Any]: + """ + Get details for a specific ABAC policy by name. + + Args: + policy_name: Policy name + securable_type: "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: Fully qualified securable name + + Returns: + Dict with policy details + """ + stype = _validate_securable_type(securable_type) + _validate_identifier(securable_fullname) + + w = get_workspace_client() + policy = w.policies.get_policy( + on_securable_type=stype, + on_securable_fullname=securable_fullname, + name=policy_name, + ) + + return { + "success": True, + "policy": _policy_to_dict(policy), + } + + +def get_table_policies( + catalog: str, + schema: str, + table: str, +) -> Dict[str, Any]: + """ + Get column masks and row filters applied to a specific table. + + Uses the Unity Catalog REST API directly to retrieve effective + column masks and row filters, including those derived from ABAC policies. + + Args: + catalog: Catalog name + schema: Schema name + table: Table name + + Returns: + Dict with column_masks and row_filters lists + """ + _validate_identifier(catalog) + _validate_identifier(schema) + _validate_identifier(table) + full_name = f"{catalog}.{schema}.{table}" + + w = get_workspace_client() + result = w.api_client.do("GET", f"/api/2.1/unity-catalog/tables/{full_name}") + + column_masks = [] + for col in result.get("columns", []): + masks = col.get("column_masks", {}) + effective_masks = col.get("effective_masks", []) + + if masks.get("column_masks") or effective_masks: + mask_functions = [] + for m in masks.get("column_masks", []): + mask_functions.append(m.get("function_name")) + for m in effective_masks: + fn = m.get("function_name") + if fn and fn not in mask_functions: + mask_functions.append(fn) + + column_masks.append({ + "column_name": col.get("name"), + "column_type": col.get("type_name"), + "mask_functions": mask_functions, + }) + + row_filters = [] + row_filters_data = result.get("row_filters", {}) + if row_filters_data: + for rf in row_filters_data.get("row_filters", []): + row_filters.append({ + "function_name": rf.get("function_name"), + "input_column_names": rf.get("input_column_names", []), + }) + + return { + "success": True, + "table": full_name, + "column_masks": column_masks, + "row_filters": row_filters, + } + + +def get_masking_functions( + catalog: str, + schema: str, +) -> Dict[str, Any]: + """ + List masking UDFs in a schema. + + Retrieves all user-defined functions in the specified schema and returns + their metadata for use in ABAC policy creation. + + Args: + catalog: Catalog name + schema: Schema name + + Returns: + Dict with list of functions and their metadata + """ + _validate_identifier(catalog) + _validate_identifier(schema) + + w = get_workspace_client() + functions = list(w.functions.list(catalog_name=catalog, schema_name=schema)) + + func_list = [] + for f in functions: + func_list.append({ + "name": f.name, + "full_name": f.full_name, + "return_type": str(f.data_type) if f.data_type else None, + "comment": getattr(f, "comment", None), + "is_deterministic": getattr(f, "is_deterministic", None), + }) + + return { + "success": True, + "catalog": catalog, + "schema": schema, + "function_count": len(func_list), + "functions": func_list, + } + + +# --------------------------------------------------------------------------- +# Quota checking +# --------------------------------------------------------------------------- + + +def check_policy_quota( + securable_type: str, + securable_fullname: str, +) -> Dict[str, Any]: + """ + Check if the policy quota allows creating a new policy. + + Policy quotas: CATALOG=10, SCHEMA=10, TABLE=5. + + Args: + securable_type: "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: Fully qualified securable name + + Returns: + Dict with current count, max allowed, and whether creation is allowed + """ + stype = _validate_securable_type(securable_type) + _validate_identifier(securable_fullname) + + w = get_workspace_client() + existing = list( + w.policies.list_policies( + on_securable_type=stype, + on_securable_fullname=securable_fullname, + ) + ) + + # Count only direct policies (not inherited) + direct = [ + p for p in existing + if getattr(p, "on_securable_fullname", None) == securable_fullname + ] + + max_policies = _POLICY_QUOTAS.get(stype, 10) + return { + "success": True, + "securable_type": stype, + "securable_fullname": securable_fullname, + "current": len(direct), + "max": max_policies, + "can_create": len(direct) < max_policies, + } + + +# --------------------------------------------------------------------------- +# Preview (human-in-the-loop gate) +# --------------------------------------------------------------------------- + + +def preview_policy_changes( + action: str, + policy_name: str, + securable_type: str, + securable_fullname: str, + policy_type: Optional[str] = None, + to_principals: Optional[List[str]] = None, + except_principals: Optional[List[str]] = None, + function_name: Optional[str] = None, + tag_name: Optional[str] = None, + tag_value: Optional[str] = None, + comment: Optional[str] = None, +) -> Dict[str, Any]: + """ + Preview policy changes without executing. Human-in-the-loop gate. + + Generates the equivalent SQL and returns it for review. No changes + are made until a subsequent create/update/delete call. + + Args: + action: "CREATE", "UPDATE", or "DELETE" + policy_name: Policy name + securable_type: "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: Fully qualified securable name + policy_type: "COLUMN_MASK" or "ROW_FILTER" (required for CREATE) + to_principals: Principals the policy applies to + except_principals: Excluded principals + function_name: Fully qualified UDF name (required for CREATE) + tag_name: Tag key to match (required for CREATE) + tag_value: Tag value to match (optional; omit for hasTag vs hasTagValue) + comment: Policy description + + Returns: + Dict with preview details, equivalent SQL, warnings, and approval flag + """ + action = action.upper() + if action not in ("CREATE", "UPDATE", "DELETE"): + raise ValueError(f"Invalid action: '{action}'. Must be CREATE, UPDATE, or DELETE") + + stype = _validate_securable_type(securable_type) + _validate_identifier(securable_fullname) + warnings = [] + + safe_except = list(except_principals) if except_principals else [] + + if action == "CREATE": + if not policy_type: + raise ValueError("policy_type is required for CREATE action") + ptype = _validate_policy_type(policy_type) + if not function_name: + raise ValueError("function_name is required for CREATE action") + if not tag_name: + raise ValueError("tag_name is required for CREATE action") + if not to_principals: + raise ValueError("to_principals is required for CREATE action") + + tag_match = ( + f"hasTagValue('{tag_name}', '{tag_value}')" if tag_value + else f"hasTag('{tag_name}')" + ) + + principals_sql = ", ".join(f"`{p}`" for p in to_principals) + except_sql = ", ".join(f"`{p}`" for p in safe_except) if safe_except else "" + + if ptype == "COLUMN_MASK": + sql_lines = [ + f"CREATE OR REPLACE POLICY {policy_name}", + f"ON {stype} {securable_fullname}", + ] + if comment: + sql_lines.append(f"COMMENT '{comment}'") + sql_lines += [ + f"COLUMN MASK {function_name}", + f"TO {principals_sql}", + ] + if except_sql: + sql_lines.append(f"EXCEPT {except_sql}") + sql_lines += [ + "FOR TABLES", + f"MATCH COLUMNS {tag_match} AS masked_col", + "ON COLUMN masked_col;", + ] + else: # ROW_FILTER + sql_lines = [ + f"CREATE OR REPLACE POLICY {policy_name}", + f"ON {stype} {securable_fullname}", + ] + if comment: + sql_lines.append(f"COMMENT '{comment}'") + sql_lines += [ + f"ROW FILTER {function_name}", + f"TO {principals_sql}", + ] + if except_sql: + sql_lines.append(f"EXCEPT {except_sql}") + sql_lines += [ + "FOR TABLES", + f"MATCH COLUMNS {tag_match} AS filter_col", + "USING COLUMNS (filter_col);", + ] + + equivalent_sql = "\n".join(sql_lines) + preview = { + "policy_name": policy_name, + "policy_type": ptype, + "securable": f"{stype} {securable_fullname}", + "to_principals": to_principals, + "except_principals": safe_except, + "function": function_name, + "tag_match": tag_match, + "equivalent_sql": equivalent_sql, + } + + elif action == "UPDATE": + changes = {} + if to_principals is not None: + changes["to_principals"] = to_principals + if except_principals is not None: + changes["except_principals"] = safe_except + if comment is not None: + changes["comment"] = comment + + if not changes: + warnings.append("No changes specified for UPDATE") + + preview = { + "policy_name": policy_name, + "securable": f"{stype} {securable_fullname}", + "changes": changes, + "equivalent_sql": f"-- UPDATE via SDK: w.policies.update_policy(name='{policy_name}', ...)", + "note": "update_policy only modifies principals and comment. To change UDF, tags, or scope, drop and recreate.", + } + + else: # DELETE + equivalent_sql = f"DROP POLICY {policy_name} ON {stype} {securable_fullname};" + preview = { + "policy_name": policy_name, + "securable": f"{stype} {securable_fullname}", + "equivalent_sql": equivalent_sql, + } + warnings.append("This action is irreversible. The policy will be permanently removed.") + + return { + "success": True, + "action": action, + "preview": preview, + "warnings": warnings, + "requires_approval": True, + "message": "Review the preview above. Reply 'approve' to execute.", + } + + +# --------------------------------------------------------------------------- +# Management (mutating operations) +# --------------------------------------------------------------------------- + + +def create_abac_policy( + policy_name: str, + policy_type: str, + securable_type: str, + securable_fullname: str, + function_name: str, + to_principals: List[str], + tag_name: str, + tag_value: Optional[str] = None, + except_principals: Optional[List[str]] = None, + comment: str = "", +) -> Dict[str, Any]: + """ + Create a new ABAC policy (COLUMN_MASK or ROW_FILTER). + + Args: + policy_name: Policy name (must be unique within the securable scope) + policy_type: "COLUMN_MASK" or "ROW_FILTER" + securable_type: "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: Fully qualified securable name + function_name: Fully qualified UDF name (e.g., "catalog.schema.mask_ssn") + to_principals: Users/groups the policy applies to + tag_name: Tag key to match columns on + tag_value: Tag value to match (optional; omit for hasTag vs hasTagValue) + except_principals: Excluded principals + comment: Policy description + + Returns: + Dict with creation status and policy details + """ + ptype = _validate_policy_type(policy_type) + stype = _validate_securable_type(securable_type) + _validate_identifier(securable_fullname) + _validate_identifier(function_name) + + from databricks.sdk.service.catalog import ( + ColumnMaskOptions, + MatchColumn, + PolicyInfo, + RowFilterOptions, + ) + + # Build tag match condition + tag_condition = ( + f"hasTagValue('{tag_name}', '{tag_value}')" if tag_value + else f"hasTag('{tag_name}')" + ) + alias = "masked_col" if ptype == "COLUMN_MASK" else "filter_col" + match_columns = [MatchColumn(alias=alias, condition=tag_condition)] + + # Build PolicyInfo + policy_info = PolicyInfo( + name=policy_name, + policy_type=_to_policy_type_enum(ptype), + on_securable_type=_to_securable_type_enum(stype), + on_securable_fullname=securable_fullname, + for_securable_type=_to_securable_type_enum("TABLE"), + to_principals=to_principals, + except_principals=list(except_principals) if except_principals else None, + comment=comment, + match_columns=match_columns, + ) + + if ptype == "COLUMN_MASK": + policy_info.column_mask = ColumnMaskOptions( + function_name=function_name, + on_column=alias, + ) + else: # ROW_FILTER + policy_info.row_filter = RowFilterOptions( + function_name=function_name, + ) + + w = get_workspace_client() + policy = w.policies.create_policy(policy_info=policy_info) + + return { + "success": True, + "policy_name": policy_name, + "action": "created", + "details": { + "policy_type": ptype, + "on_securable": f"{stype} {securable_fullname}", + "function": function_name, + "to_principals": to_principals, + "except_principals": list(except_principals) if except_principals else [], + "tag_match": f"{tag_name}={tag_value}" if tag_value else tag_name, + }, + "policy": _policy_to_dict(policy), + } + + +def update_abac_policy( + policy_name: str, + securable_type: str, + securable_fullname: str, + to_principals: Optional[List[str]] = None, + except_principals: Optional[List[str]] = None, + comment: Optional[str] = None, +) -> Dict[str, Any]: + """ + Update an existing ABAC policy's principals or comment. + + Only principals and comment can be modified. To change the UDF, tag + matching, or scope, drop and recreate the policy. + + Args: + policy_name: Policy name + securable_type: "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: Fully qualified securable name + to_principals: Updated list of principals the policy applies to + except_principals: Updated excluded principals + comment: Updated policy description + + Returns: + Dict with update status and applied changes + """ + stype = _validate_securable_type(securable_type) + _validate_identifier(securable_fullname) + + from databricks.sdk.service.catalog import PolicyInfo + + w = get_workspace_client() + + # Get existing policy to preserve required fields + existing = w.policies.get_policy( + on_securable_type=stype, + on_securable_fullname=securable_fullname, + name=policy_name, + ) + + # Build update PolicyInfo with existing required fields + policy_info = PolicyInfo( + to_principals=existing.to_principals, + for_securable_type=existing.for_securable_type, + policy_type=existing.policy_type, + ) + + changes: Dict[str, Any] = {} + update_fields = [] + + if to_principals is not None: + policy_info.to_principals = to_principals + changes["to_principals"] = to_principals + update_fields.append("to_principals") + + if except_principals is not None: + policy_info.except_principals = list(except_principals) + changes["except_principals"] = list(except_principals) + update_fields.append("except_principals") + + if comment is not None: + policy_info.comment = comment + changes["comment"] = comment + update_fields.append("comment") + + policy = w.policies.update_policy( + on_securable_type=stype, + on_securable_fullname=securable_fullname, + name=policy_name, + policy_info=policy_info, + update_mask=",".join(update_fields) if update_fields else None, + ) + + return { + "success": True, + "policy_name": policy_name, + "action": "updated", + "changes": changes, + "policy": _policy_to_dict(policy), + } + + +def delete_abac_policy( + policy_name: str, + securable_type: str, + securable_fullname: str, +) -> Dict[str, Any]: + """ + Delete an ABAC policy. + + This is irreversible. The policy will be permanently removed. + + Args: + policy_name: Policy name + securable_type: "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: Fully qualified securable name + + Returns: + Dict with deletion status + """ + stype = _validate_securable_type(securable_type) + _validate_identifier(securable_fullname) + + w = get_workspace_client() + w.policies.delete_policy( + on_securable_type=stype, + on_securable_fullname=securable_fullname, + name=policy_name, + ) + + return { + "success": True, + "policy_name": policy_name, + "action": "deleted", + } diff --git a/databricks-tools-core/tests/integration/unity_catalog/conftest.py b/databricks-tools-core/tests/integration/unity_catalog/conftest.py index 5f495c30..7cd244b6 100644 --- a/databricks-tools-core/tests/integration/unity_catalog/conftest.py +++ b/databricks-tools-core/tests/integration/unity_catalog/conftest.py @@ -230,3 +230,67 @@ def register(full_function_name: str): delete_function(fn_name, force=True) except Exception as e: logger.warning(f"Failed to cleanup function {fn_name}: {e}") + + +@pytest.fixture(scope="function") +def cleanup_policies(): + """ + Track and cleanup ABAC policies created during tests. + + Usage: + def test_create_policy(cleanup_policies): + create_abac_policy(...) + cleanup_policies((policy_name, securable_type, securable_fullname)) + """ + from databricks_tools_core.unity_catalog import delete_abac_policy + + policies_to_cleanup = [] + + def register(policy_tuple: tuple): + """Register a policy for cleanup. Tuple: (name, securable_type, securable_fullname).""" + if policy_tuple not in policies_to_cleanup: + policies_to_cleanup.append(policy_tuple) + logger.info(f"Registered policy for cleanup: {policy_tuple[0]}") + + yield register + + for name, stype, sfullname in policies_to_cleanup: + try: + logger.info(f"Cleaning up policy: {name}") + delete_abac_policy( + policy_name=name, + securable_type=stype, + securable_fullname=sfullname, + ) + except Exception as e: + logger.warning(f"Failed to cleanup policy {name}: {e}") + + +@pytest.fixture(scope="function") +def cleanup_governed_tags(): + """ + Track and cleanup governed tags (tag policies) created during tests. + + Uses the Tag Policies API (w.tag_policies) to delete governed tags. + + Usage: + def test_create_tag(cleanup_governed_tags): + w.tag_policies.create_tag_policy(...) + cleanup_governed_tags("my_tag_key") + """ + tags_to_cleanup = [] + + def register(tag_key: str): + if tag_key not in tags_to_cleanup: + tags_to_cleanup.append(tag_key) + logger.info(f"Registered governed tag for cleanup: {tag_key}") + + yield register + + w = get_workspace_client() + for tag_key in tags_to_cleanup: + try: + logger.info(f"Cleaning up governed tag: {tag_key}") + w.tag_policies.delete_tag_policy(tag_key=tag_key) + except Exception as e: + logger.warning(f"Failed to cleanup governed tag {tag_key}: {e}") diff --git a/databricks-tools-core/tests/integration/unity_catalog/test_abac_policies.py b/databricks-tools-core/tests/integration/unity_catalog/test_abac_policies.py new file mode 100644 index 00000000..cea877bc --- /dev/null +++ b/databricks-tools-core/tests/integration/unity_catalog/test_abac_policies.py @@ -0,0 +1,686 @@ +""" +Integration tests for Unity Catalog ABAC Policy operations. + +Tests the abac_policies module functions: +- list_abac_policies +- get_abac_policy +- get_table_policies +- get_masking_functions +- check_policy_quota +- preview_policy_changes +- create_abac_policy / update_abac_policy / delete_abac_policy + +Governed Tags +------------- +ABAC policies require **governed tags** (not regular metadata tags). +The CRUD tests automatically create and clean up governed tags via the +Tag Policies API (``w.tag_policies``). No manual UI setup is needed. +""" + +import logging +import time + +import pytest + +from databricks_tools_core.auth import get_workspace_client +from databricks_tools_core.sql import execute_sql +from databricks_tools_core.unity_catalog import ( + create_security_function, + set_tags, +) +from databricks_tools_core.unity_catalog.abac_policies import ( + list_abac_policies, + get_abac_policy, + get_table_policies, + get_masking_functions, + check_policy_quota, + preview_policy_changes, + create_abac_policy, + update_abac_policy, + delete_abac_policy, +) + +logger = logging.getLogger(__name__) + +UC_TEST_PREFIX = "uc_test" + + +# --------------------------------------------------------------------------- +# Discovery tests +# --------------------------------------------------------------------------- + + +@pytest.mark.integration +class TestListAbacPolicies: + """Tests for listing ABAC policies.""" + + def test_list_policies_on_catalog(self, test_catalog: str): + """Should list policies on a catalog (may be empty).""" + result = list_abac_policies( + securable_type="CATALOG", + securable_fullname=test_catalog, + ) + + assert result["success"] is True + assert result["securable_type"] == "CATALOG" + assert result["securable_fullname"] == test_catalog + assert isinstance(result["policies"], list) + assert isinstance(result["policy_count"], int) + logger.info(f"Found {result['policy_count']} policies on catalog {test_catalog}") + + def test_list_policies_on_schema(self, test_catalog: str, uc_test_schema: str): + """Should list policies on a schema.""" + full_name = f"{test_catalog}.{uc_test_schema}" + result = list_abac_policies( + securable_type="SCHEMA", + securable_fullname=full_name, + ) + + assert result["success"] is True + assert result["securable_type"] == "SCHEMA" + assert isinstance(result["policies"], list) + logger.info(f"Found {result['policy_count']} policies on schema {full_name}") + + def test_list_policies_with_type_filter(self, test_catalog: str): + """Should filter policies by type.""" + result = list_abac_policies( + securable_type="CATALOG", + securable_fullname=test_catalog, + policy_type="COLUMN_MASK", + ) + + assert result["success"] is True + for p in result["policies"]: + assert p.get("policy_type") == "COLUMN_MASK" + logger.info(f"Found {result['policy_count']} COLUMN_MASK policies") + + def test_list_policies_without_inherited(self, test_catalog: str): + """Should list only direct policies when include_inherited=False.""" + result = list_abac_policies( + securable_type="CATALOG", + securable_fullname=test_catalog, + include_inherited=False, + ) + + assert result["success"] is True + assert isinstance(result["policies"], list) + logger.info(f"Found {result['policy_count']} direct policies") + + +@pytest.mark.integration +class TestGetTablePolicies: + """Tests for getting column masks and row filters on a table.""" + + def test_get_table_policies(self, test_catalog: str, uc_test_schema: str, uc_test_table: str): + """Should return column masks and row filters for a table.""" + # uc_test_table is "catalog.schema.table" + parts = uc_test_table.split(".") + result = get_table_policies( + catalog=parts[0], + schema=parts[1], + table=parts[2], + ) + + assert result["success"] is True + assert result["table"] == uc_test_table + assert isinstance(result["column_masks"], list) + assert isinstance(result["row_filters"], list) + logger.info( + f"Table {uc_test_table}: {len(result['column_masks'])} masks, " + f"{len(result['row_filters'])} filters" + ) + + +@pytest.mark.integration +class TestGetMaskingFunctions: + """Tests for listing masking UDFs in a schema.""" + + def test_get_masking_functions( + self, + test_catalog: str, + uc_test_schema: str, + unique_name: str, + warehouse_id: str, + cleanup_functions, + ): + """Should list UDFs in the schema.""" + # Create a test function so there's at least one + fn_name = f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_mask_{unique_name}" + cleanup_functions(fn_name) + + create_security_function( + function_name=fn_name, + parameter_name="val", + parameter_type="STRING", + return_type="STRING", + function_body="RETURN CASE WHEN val IS NULL THEN NULL ELSE '***' END", + warehouse_id=warehouse_id, + ) + + result = get_masking_functions( + catalog=test_catalog, + schema=uc_test_schema, + ) + + assert result["success"] is True + assert result["catalog"] == test_catalog + assert result["schema"] == uc_test_schema + assert isinstance(result["functions"], list) + assert result["function_count"] > 0 + + # Verify our function appears + func_names = [f["name"] for f in result["functions"]] + expected_name = f"{UC_TEST_PREFIX}_mask_{unique_name}" + assert expected_name in func_names, f"Expected {expected_name} in {func_names}" + logger.info(f"Found {result['function_count']} functions in schema") + + +# --------------------------------------------------------------------------- +# Quota check tests +# --------------------------------------------------------------------------- + + +@pytest.mark.integration +class TestCheckPolicyQuota: + """Tests for policy quota checking.""" + + def test_check_quota_on_catalog(self, test_catalog: str): + """Should return quota info for a catalog.""" + result = check_policy_quota( + securable_type="CATALOG", + securable_fullname=test_catalog, + ) + + assert result["success"] is True + assert result["securable_type"] == "CATALOG" + assert result["max"] == 10 + assert isinstance(result["current"], int) + assert isinstance(result["can_create"], bool) + logger.info(f"Catalog quota: {result['current']}/{result['max']}") + + def test_check_quota_on_schema(self, test_catalog: str, uc_test_schema: str): + """Should return quota info for a schema.""" + full_name = f"{test_catalog}.{uc_test_schema}" + result = check_policy_quota( + securable_type="SCHEMA", + securable_fullname=full_name, + ) + + assert result["success"] is True + assert result["max"] == 10 + logger.info(f"Schema quota: {result['current']}/{result['max']}") + + def test_check_quota_on_table(self, uc_test_table: str): + """Should return quota info for a table.""" + result = check_policy_quota( + securable_type="TABLE", + securable_fullname=uc_test_table, + ) + + assert result["success"] is True + assert result["max"] == 5 + logger.info(f"Table quota: {result['current']}/{result['max']}") + + +# --------------------------------------------------------------------------- +# Preview tests (no side effects) +# --------------------------------------------------------------------------- + + +@pytest.mark.integration +class TestPreviewPolicyChanges: + """Tests for preview_policy_changes (human-in-the-loop gate).""" + + def test_preview_create_column_mask(self): + """Should generate CREATE preview with SQL for a column mask.""" + result = preview_policy_changes( + action="CREATE", + policy_name="test_mask_ssn", + securable_type="SCHEMA", + securable_fullname="my_catalog.my_schema", + policy_type="COLUMN_MASK", + to_principals=["analysts"], + function_name="my_catalog.my_schema.mask_ssn", + tag_name="pii_type", + tag_value="ssn", + comment="Test mask SSN", + ) + + assert result["success"] is True + assert result["action"] == "CREATE" + assert result["requires_approval"] is True + + preview = result["preview"] + assert preview["policy_name"] == "test_mask_ssn" + assert preview["policy_type"] == "COLUMN_MASK" + assert "analysts" in preview["to_principals"] + assert "hasTagValue('pii_type', 'ssn')" in preview["tag_match"] + assert "COLUMN MASK" in preview["equivalent_sql"] + assert "MATCH COLUMNS" in preview["equivalent_sql"] + logger.info(f"Preview SQL:\n{preview['equivalent_sql']}") + + def test_preview_create_row_filter(self): + """Should generate CREATE preview with SQL for a row filter.""" + result = preview_policy_changes( + action="CREATE", + policy_name="test_filter_eu", + securable_type="CATALOG", + securable_fullname="my_catalog", + policy_type="ROW_FILTER", + to_principals=["us_team"], + function_name="my_catalog.my_schema.is_not_eu", + tag_name="region", + tag_value="eu", + ) + + assert result["success"] is True + preview = result["preview"] + assert preview["policy_type"] == "ROW_FILTER" + assert "ROW FILTER" in preview["equivalent_sql"] + assert "USING COLUMNS" in preview["equivalent_sql"] + logger.info(f"Preview SQL:\n{preview['equivalent_sql']}") + + def test_preview_create_with_has_tag(self): + """Should use hasTag when tag_value is omitted.""" + result = preview_policy_changes( + action="CREATE", + policy_name="test_mask_all_pii", + securable_type="SCHEMA", + securable_fullname="my_catalog.my_schema", + policy_type="COLUMN_MASK", + to_principals=["external_users"], + function_name="my_catalog.my_schema.mask_full", + tag_name="pii_type", + ) + + assert result["success"] is True + assert "hasTag('pii_type')" in result["preview"]["tag_match"] + logger.info("Preview uses hasTag (no tag_value)") + + def test_preview_delete(self): + """Should generate DELETE preview with DROP SQL.""" + result = preview_policy_changes( + action="DELETE", + policy_name="test_mask_ssn", + securable_type="SCHEMA", + securable_fullname="my_catalog.my_schema", + ) + + assert result["success"] is True + assert result["action"] == "DELETE" + assert "DROP POLICY" in result["preview"]["equivalent_sql"] + assert len(result["warnings"]) > 0 # Should warn about irreversibility + logger.info(f"Delete preview: {result['preview']['equivalent_sql']}") + + def test_preview_update(self): + """Should generate UPDATE preview.""" + result = preview_policy_changes( + action="UPDATE", + policy_name="test_mask_ssn", + securable_type="SCHEMA", + securable_fullname="my_catalog.my_schema", + to_principals=["analysts", "new_team"], + comment="Updated principals", + ) + + assert result["success"] is True + assert result["action"] == "UPDATE" + assert "to_principals" in result["preview"]["changes"] + assert "comment" in result["preview"]["changes"] + logger.info(f"Update preview changes: {result['preview']['changes']}") + + +# --------------------------------------------------------------------------- +# Validation tests (no Databricks connection needed) +# --------------------------------------------------------------------------- + + +@pytest.mark.integration +class TestAbacPolicyValidation: + """Tests for input validation in ABAC policy functions.""" + + def test_invalid_securable_type_raises(self): + """Should raise ValueError for invalid securable type.""" + with pytest.raises(ValueError) as exc_info: + list_abac_policies( + securable_type="INVALID", + securable_fullname="test", + ) + + assert "invalid securable_type" in str(exc_info.value).lower() + + def test_invalid_policy_type_raises(self): + """Should raise ValueError for invalid policy type.""" + with pytest.raises(ValueError) as exc_info: + preview_policy_changes( + action="CREATE", + policy_name="test", + securable_type="SCHEMA", + securable_fullname="cat.sch", + policy_type="INVALID", + to_principals=["x"], + function_name="fn", + tag_name="t", + ) + + assert "invalid policy_type" in str(exc_info.value).lower() + + def test_invalid_action_raises(self): + """Should raise ValueError for invalid action.""" + with pytest.raises(ValueError) as exc_info: + preview_policy_changes( + action="INVALID", + policy_name="test", + securable_type="SCHEMA", + securable_fullname="cat.sch", + ) + + assert "invalid action" in str(exc_info.value).lower() + + def test_create_preview_missing_policy_type_raises(self): + """Should raise ValueError when policy_type missing for CREATE.""" + with pytest.raises(ValueError) as exc_info: + preview_policy_changes( + action="CREATE", + policy_name="test", + securable_type="SCHEMA", + securable_fullname="cat.sch", + to_principals=["x"], + function_name="fn", + tag_name="t", + ) + + assert "policy_type" in str(exc_info.value).lower() + + def test_create_preview_missing_function_name_raises(self): + """Should raise ValueError when function_name missing for CREATE.""" + with pytest.raises(ValueError) as exc_info: + preview_policy_changes( + action="CREATE", + policy_name="test", + securable_type="SCHEMA", + securable_fullname="cat.sch", + policy_type="COLUMN_MASK", + to_principals=["x"], + tag_name="t", + ) + + assert "function_name" in str(exc_info.value).lower() + + def test_create_preview_missing_tag_name_raises(self): + """Should raise ValueError when tag_name missing for CREATE.""" + with pytest.raises(ValueError) as exc_info: + preview_policy_changes( + action="CREATE", + policy_name="test", + securable_type="SCHEMA", + securable_fullname="cat.sch", + policy_type="COLUMN_MASK", + to_principals=["x"], + function_name="fn", + ) + + assert "tag_name" in str(exc_info.value).lower() + + def test_create_preview_missing_principals_raises(self): + """Should raise ValueError when to_principals missing for CREATE.""" + with pytest.raises(ValueError) as exc_info: + preview_policy_changes( + action="CREATE", + policy_name="test", + securable_type="SCHEMA", + securable_fullname="cat.sch", + policy_type="COLUMN_MASK", + function_name="fn", + tag_name="t", + ) + + assert "to_principals" in str(exc_info.value).lower() + + def test_invalid_identifier_raises(self): + """Should raise ValueError for SQL injection attempts.""" + with pytest.raises(ValueError) as exc_info: + list_abac_policies( + securable_type="CATALOG", + securable_fullname="DROP TABLE; --", + ) + + assert "invalid sql identifier" in str(exc_info.value).lower() + + +# --------------------------------------------------------------------------- +# CRUD lifecycle tests +# --------------------------------------------------------------------------- + + +@pytest.mark.integration +class TestAbacPolicyCRUD: + """Tests for create, get, update, and delete policy operations. + + Each test creates its own governed tag via the Tag Policies API, + then cleans it up afterwards. No manual UI setup is required. + """ + + @staticmethod + def _create_governed_tag(tag_key: str, allowed_values: list[str]) -> None: + """Create a governed tag via the Tag Policies API.""" + from databricks.sdk.service.tags import TagPolicy, Value + + w = get_workspace_client() + w.tag_policies.create_tag_policy( + tag_policy=TagPolicy( + tag_key=tag_key, + description=f"Integration test tag ({tag_key})", + values=[Value(name=v) for v in allowed_values], + ) + ) + logger.info(f"Created governed tag: {tag_key} (values={allowed_values})") + + # Wait for governed tag to propagate to the ABAC policy system + logger.info("Waiting 30s for governed tag propagation...") + time.sleep(30) + + @staticmethod + def _delete_governed_tag(tag_key: str) -> None: + """Delete a governed tag via the Tag Policies API.""" + try: + w = get_workspace_client() + w.tag_policies.delete_tag_policy(tag_key=tag_key) + logger.info(f"Deleted governed tag: {tag_key}") + except Exception as e: + logger.warning(f"Failed to delete governed tag {tag_key}: {e}") + + def test_create_get_update_delete_column_mask_policy( + self, + test_catalog: str, + uc_test_schema: str, + uc_test_table: str, + unique_name: str, + warehouse_id: str, + cleanup_functions, + cleanup_policies, + ): + """Should create, get, update, and delete a column mask policy.""" + full_schema = f"{test_catalog}.{uc_test_schema}" + policy_name = f"{UC_TEST_PREFIX}_mask_{unique_name}" + + # Unique governed tag for this test run + tag_key = f"uc_test_pii_{unique_name}" + tag_value = "email" + + # Register for cleanup + cleanup_policies((policy_name, "SCHEMA", full_schema)) + + # --- Setup: governed tag, masking UDF, column tag --- + self._create_governed_tag(tag_key, [tag_value]) + + try: + fn_name = f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_mask_fn_{unique_name}" + cleanup_functions(fn_name) + + create_security_function( + function_name=fn_name, + parameter_name="val", + parameter_type="STRING", + return_type="STRING", + function_body="RETURN CASE WHEN val IS NULL THEN NULL ELSE '***' END", + warehouse_id=warehouse_id, + ) + logger.info(f"Created masking UDF: {fn_name}") + + # Apply governed tag to column + set_tags( + object_type="column", + full_name=uc_test_table, + column_name="email", + tags={tag_key: tag_value}, + warehouse_id=warehouse_id, + ) + logger.info(f"Tagged column email with {tag_key}={tag_value}") + + # --- CREATE --- + logger.info(f"Creating ABAC policy: {policy_name}") + create_result = create_abac_policy( + policy_name=policy_name, + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname=full_schema, + function_name=fn_name, + to_principals=["account users"], + tag_name=tag_key, + tag_value=tag_value, + comment=f"Test policy {unique_name}", + ) + + assert create_result["success"] is True + assert create_result["policy_name"] == policy_name + assert create_result["action"] == "created" + logger.info(f"Policy created: {create_result['details']}") + + # --- GET --- + logger.info(f"Getting policy: {policy_name}") + get_result = get_abac_policy( + policy_name=policy_name, + securable_type="SCHEMA", + securable_fullname=full_schema, + ) + + assert get_result["success"] is True + assert get_result["policy"]["name"] == policy_name + logger.info(f"Policy details: {get_result['policy']}") + + # --- UPDATE --- + logger.info(f"Updating policy: {policy_name}") + update_result = update_abac_policy( + policy_name=policy_name, + securable_type="SCHEMA", + securable_fullname=full_schema, + comment=f"Updated test policy {unique_name}", + ) + + assert update_result["success"] is True + assert update_result["action"] == "updated" + assert "comment" in update_result["changes"] + logger.info(f"Policy updated: {update_result['changes']}") + + # --- Verify in list --- + list_result = list_abac_policies( + securable_type="SCHEMA", + securable_fullname=full_schema, + ) + policy_names = [p.get("name") for p in list_result["policies"]] + assert policy_name in policy_names, f"Expected {policy_name} in {policy_names}" + logger.info(f"Policy found in list ({list_result['policy_count']} total)") + + # --- DELETE --- + logger.info(f"Deleting policy: {policy_name}") + delete_result = delete_abac_policy( + policy_name=policy_name, + securable_type="SCHEMA", + securable_fullname=full_schema, + ) + + assert delete_result["success"] is True + assert delete_result["action"] == "deleted" + logger.info("Policy deleted") + + finally: + self._delete_governed_tag(tag_key) + + def test_create_row_filter_policy( + self, + test_catalog: str, + uc_test_schema: str, + uc_test_table: str, + unique_name: str, + warehouse_id: str, + cleanup_functions, + cleanup_policies, + ): + """Should create and delete a row filter policy.""" + full_schema = f"{test_catalog}.{uc_test_schema}" + policy_name = f"{UC_TEST_PREFIX}_filter_{unique_name}" + + # Unique governed tag for this test run + tag_key = f"uc_test_dept_{unique_name}" + tag_value = "filter" + + cleanup_policies((policy_name, "SCHEMA", full_schema)) + + # --- Setup: governed tag, zero-arg UDF, column tag --- + self._create_governed_tag(tag_key, [tag_value]) + + try: + fn_name = f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_rf_fn_{unique_name}" + cleanup_functions(fn_name) + + execute_sql( + sql_query=f""" + CREATE OR REPLACE FUNCTION {fn_name}() + RETURNS BOOLEAN + RETURN TRUE + """, + warehouse_id=warehouse_id, + ) + logger.info(f"Created row filter UDF (0-arg): {fn_name}") + + # Apply governed tag to column + set_tags( + object_type="column", + full_name=uc_test_table, + column_name="department", + tags={tag_key: tag_value}, + warehouse_id=warehouse_id, + ) + logger.info(f"Tagged column department with {tag_key}={tag_value}") + + # Create row filter policy + logger.info(f"Creating row filter policy: {policy_name}") + result = create_abac_policy( + policy_name=policy_name, + policy_type="ROW_FILTER", + securable_type="SCHEMA", + securable_fullname=full_schema, + function_name=fn_name, + to_principals=["account users"], + tag_name=tag_key, + tag_value=tag_value, + comment=f"Test row filter {unique_name}", + ) + + assert result["success"] is True + assert result["action"] == "created" + assert result["details"]["policy_type"] == "ROW_FILTER" + logger.info(f"Row filter policy created: {result['details']}") + + # Delete policy + delete_abac_policy( + policy_name=policy_name, + securable_type="SCHEMA", + securable_fullname=full_schema, + ) + logger.info("Row filter policy deleted") + + finally: + self._delete_governed_tag(tag_key) + diff --git a/databricks-tools-core/uv.lock b/databricks-tools-core/uv.lock index 7238e81c..f3efe4b5 100644 --- a/databricks-tools-core/uv.lock +++ b/databricks-tools-core/uv.lock @@ -754,7 +754,7 @@ wheels = [ [[package]] name = "databricks-sdk" -version = "0.76.0" +version = "0.85.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "google-auth" }, @@ -763,9 +763,9 @@ dependencies = [ { name = "requests", version = "2.32.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, { name = "requests", version = "2.32.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/70/82/5efcfdca8779c84b5c6f61cc110d0938c9818e422f55c36a68d96b98c61f/databricks_sdk-0.76.0.tar.gz", hash = "sha256:fcfce4561b090b3c8e9cac2101f549766d9fb3bece31bb5720571919fa37d210", size = 822376, upload-time = "2025-12-17T17:11:31.907Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/40/3941b6919c3854bd107e04be1686b3e0f1ce3ca4fbeea0c7fd81909bd90c/databricks_sdk-0.85.0.tar.gz", hash = "sha256:0b5f415fba69ea0c5bfc4d0b21cb3366c6b66f678e78e4b3c94cbcf2e9e0972f", size = 846275, upload-time = "2026-02-05T08:22:40.488Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/96/ee7742b94f996560c57d6fb8d2e10eab3c489e8a72187369ed0917baf8aa/databricks_sdk-0.76.0-py3-none-any.whl", hash = "sha256:6696dda22bc52c8f50a50d24e6ccd1c855f92c0f68f5afe4eb2e77d5b1b1a65f", size = 774688, upload-time = "2025-12-17T17:11:29.925Z" }, + { url = "https://files.pythonhosted.org/packages/e9/e8/1a3292820762a9b48c4774d2f9297b2e2c43319dc4b5d31a585fb76e3a05/databricks_sdk-0.85.0-py3-none-any.whl", hash = "sha256:2a2da176a55d55fb84696e0255520e99e838dd942b97b971dff724041fe00c64", size = 796888, upload-time = "2026-02-05T08:22:39.018Z" }, ] [[package]] @@ -804,7 +804,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "black", marker = "extra == 'dev'", specifier = ">=23.0.0" }, - { name = "databricks-sdk", specifier = ">=0.20.0" }, + { name = "databricks-sdk", specifier = ">=0.81.0" }, { name = "litellm", specifier = ">=1.0.0" }, { name = "pydantic", specifier = ">=2.0.0" }, { name = "pymupdf", specifier = ">=1.24.0" }, From 6e23184c75a81c13b554bffcd2d13d8a0c60572b Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Tue, 10 Feb 2026 17:44:49 -0600 Subject: [PATCH 03/34] Reorganize Unity Catalog skill into 4 categories: System Tables, Volumes, ACLs, FGAC Add new UC ACL reference file (10-uc-acls.md) covering GRANT/REVOKE, ownership, privilege hierarchy, SDK patterns, and common role-based access patterns. Restructure SKILL.md with clear category separations and per-category quick starts, reference tables, and best practices. --- .../databricks-unity-catalog/10-uc-acls.md | 219 ++++++++++++++++ .../skills/databricks-unity-catalog/SKILL.md | 242 ++++++++++++++++++ .../SKILL.md | 0 .../mcp-tools-reference.md | 0 .../python-sdk-patterns.md | 0 .../sql-generation.md | 0 ...C_ABAC_SKILLS.md => PLAN_UC_FGAC_SKILLS.md | 0 ...{6-abac-policies.py => 6-fgac-policies.py} | 0 .../databricks-unity-catalog/10-uc-acls.md | 219 ++++++++++++++++ .../databricks-unity-catalog/SKILL.md | 202 ++++++++++++--- .../SKILL.md | 0 .../mcp-tools-reference.md | 0 .../python-sdk-patterns.md | 0 .../sql-generation.md | 0 .../{abac_policies.py => fgac_policies.py} | 0 ...abac_policies.py => test_fgac_policies.py} | 0 16 files changed, 847 insertions(+), 35 deletions(-) create mode 100644 .claude/skills/databricks-unity-catalog/10-uc-acls.md create mode 100644 .claude/skills/databricks-unity-catalog/SKILL.md rename .claude/skills/{uc-abac-governance => uc-fgac-governance}/SKILL.md (100%) rename .claude/skills/{uc-abac-governance => uc-fgac-governance}/mcp-tools-reference.md (100%) rename .claude/skills/{uc-abac-governance => uc-fgac-governance}/python-sdk-patterns.md (100%) rename .claude/skills/{uc-abac-governance => uc-fgac-governance}/sql-generation.md (100%) rename PLAN_UC_ABAC_SKILLS.md => PLAN_UC_FGAC_SKILLS.md (100%) rename databricks-skills/databricks-python-sdk/examples/{6-abac-policies.py => 6-fgac-policies.py} (100%) create mode 100644 databricks-skills/databricks-unity-catalog/10-uc-acls.md rename databricks-skills/{uc-abac-governance => uc-fgac-governance}/SKILL.md (100%) rename databricks-skills/{uc-abac-governance => uc-fgac-governance}/mcp-tools-reference.md (100%) rename databricks-skills/{uc-abac-governance => uc-fgac-governance}/python-sdk-patterns.md (100%) rename databricks-skills/{uc-abac-governance => uc-fgac-governance}/sql-generation.md (100%) rename databricks-tools-core/databricks_tools_core/unity_catalog/{abac_policies.py => fgac_policies.py} (100%) rename databricks-tools-core/tests/integration/unity_catalog/{test_abac_policies.py => test_fgac_policies.py} (100%) diff --git a/.claude/skills/databricks-unity-catalog/10-uc-acls.md b/.claude/skills/databricks-unity-catalog/10-uc-acls.md new file mode 100644 index 00000000..038ceb85 --- /dev/null +++ b/.claude/skills/databricks-unity-catalog/10-uc-acls.md @@ -0,0 +1,219 @@ +# Unity Catalog Access Controls (ACLs) + +Comprehensive reference for Unity Catalog privilege management: GRANT/REVOKE, ownership, and permission patterns across securables. + +## Securable Hierarchy + +``` +METASTORE + └── CATALOG + └── SCHEMA + ├── TABLE / VIEW / MATERIALIZED VIEW + ├── VOLUME + ├── FUNCTION + └── MODEL +``` + +Privileges **inherit** down the hierarchy. Granting `USE CATALOG` on a catalog grants access to all schemas within it (but not data access — that requires `SELECT`, `MODIFY`, etc.). + +## Privilege Reference + +### Catalog-Level + +| Privilege | Description | +|-----------|-------------| +| `USE CATALOG` | Required to access any object within the catalog | +| `CREATE SCHEMA` | Create schemas within the catalog | +| `ALL PRIVILEGES` | All catalog-level privileges | + +### Schema-Level + +| Privilege | Description | +|-----------|-------------| +| `USE SCHEMA` | Required to access any object within the schema | +| `CREATE TABLE` | Create tables and views | +| `CREATE VOLUME` | Create volumes | +| `CREATE FUNCTION` | Create functions | +| `CREATE MODEL` | Create registered models | +| `ALL PRIVILEGES` | All schema-level privileges | + +### Table/View-Level + +| Privilege | Description | +|-----------|-------------| +| `SELECT` | Read data from the table or view | +| `MODIFY` | Insert, update, delete data | +| `ALL PRIVILEGES` | All table-level privileges | + +### Volume-Level + +| Privilege | Description | +|-----------|-------------| +| `READ VOLUME` | Read files from the volume | +| `WRITE VOLUME` | Write files to the volume | +| `ALL PRIVILEGES` | All volume-level privileges | + +### Function-Level + +| Privilege | Description | +|-----------|-------------| +| `EXECUTE` | Execute the function | +| `ALL PRIVILEGES` | All function-level privileges | + +## SQL Syntax + +### GRANT + +```sql +-- Catalog access +GRANT USE CATALOG ON CATALOG my_catalog TO `group_name`; +GRANT CREATE SCHEMA ON CATALOG my_catalog TO `group_name`; + +-- Schema access +GRANT USE SCHEMA ON SCHEMA my_catalog.my_schema TO `group_name`; +GRANT CREATE TABLE ON SCHEMA my_catalog.my_schema TO `group_name`; +GRANT CREATE VOLUME ON SCHEMA my_catalog.my_schema TO `group_name`; +GRANT CREATE FUNCTION ON SCHEMA my_catalog.my_schema TO `group_name`; + +-- Table/View access +GRANT SELECT ON TABLE my_catalog.my_schema.my_table TO `group_name`; +GRANT MODIFY ON TABLE my_catalog.my_schema.my_table TO `group_name`; + +-- Volume access +GRANT READ VOLUME ON VOLUME my_catalog.my_schema.my_volume TO `group_name`; +GRANT WRITE VOLUME ON VOLUME my_catalog.my_schema.my_volume TO `group_name`; + +-- Function access +GRANT EXECUTE ON FUNCTION my_catalog.my_schema.my_function TO `group_name`; + +-- All privileges shorthand +GRANT ALL PRIVILEGES ON SCHEMA my_catalog.my_schema TO `admin_group`; +``` + +### REVOKE + +```sql +REVOKE SELECT ON TABLE my_catalog.my_schema.my_table FROM `group_name`; +REVOKE MODIFY ON TABLE my_catalog.my_schema.my_table FROM `group_name`; +REVOKE ALL PRIVILEGES ON SCHEMA my_catalog.my_schema FROM `group_name`; +``` + +### Show Grants + +```sql +-- Show all grants on a securable +SHOW GRANTS ON CATALOG my_catalog; +SHOW GRANTS ON SCHEMA my_catalog.my_schema; +SHOW GRANTS ON TABLE my_catalog.my_schema.my_table; +SHOW GRANTS ON VOLUME my_catalog.my_schema.my_volume; + +-- Show grants for a specific principal +SHOW GRANTS `group_name` ON CATALOG my_catalog; +SHOW GRANTS `user@example.com` ON SCHEMA my_catalog.my_schema; +``` + +## Ownership + +Every securable has exactly one **owner**. The owner has all privileges on the object and can grant/revoke privileges to others. + +```sql +-- Transfer ownership +ALTER CATALOG my_catalog OWNER TO `new_owner`; +ALTER SCHEMA my_catalog.my_schema OWNER TO `new_owner`; +ALTER TABLE my_catalog.my_schema.my_table OWNER TO `new_owner`; +ALTER VOLUME my_catalog.my_schema.my_volume OWNER TO `new_owner`; +``` + +## Python SDK Patterns + +```python +from databricks.sdk import WorkspaceClient + +w = WorkspaceClient() + +# Grant privileges +w.grants.update( + securable_type="TABLE", + full_name="my_catalog.my_schema.my_table", + changes=[{ + "principal": "data_readers", + "add": ["SELECT"], + }] +) + +# Revoke privileges +w.grants.update( + securable_type="TABLE", + full_name="my_catalog.my_schema.my_table", + changes=[{ + "principal": "data_readers", + "remove": ["SELECT"], + }] +) + +# Get current grants +grants = w.grants.get( + securable_type="TABLE", + full_name="my_catalog.my_schema.my_table" +) +for assignment in grants.privilege_assignments: + print(f"{assignment.principal}: {assignment.privileges}") + +# Get effective grants (includes inherited) +effective = w.grants.get_effective( + securable_type="TABLE", + full_name="my_catalog.my_schema.my_table", + principal="data_readers" +) +``` + +## Common Patterns + +### Read-Only Data Consumer + +```sql +-- Minimal access for data readers +GRANT USE CATALOG ON CATALOG analytics TO `data_readers`; +GRANT USE SCHEMA ON SCHEMA analytics.gold TO `data_readers`; +GRANT SELECT ON SCHEMA analytics.gold TO `data_readers`; +``` + +### Data Engineer (Read + Write) + +```sql +GRANT USE CATALOG ON CATALOG analytics TO `data_engineers`; +GRANT USE SCHEMA ON SCHEMA analytics.silver TO `data_engineers`; +GRANT SELECT ON SCHEMA analytics.silver TO `data_engineers`; +GRANT MODIFY ON SCHEMA analytics.silver TO `data_engineers`; +GRANT CREATE TABLE ON SCHEMA analytics.silver TO `data_engineers`; +``` + +### Schema Admin + +```sql +GRANT USE CATALOG ON CATALOG analytics TO `schema_admins`; +GRANT ALL PRIVILEGES ON SCHEMA analytics.gold TO `schema_admins`; +``` + +### ML Engineer (Models + Functions) + +```sql +GRANT USE CATALOG ON CATALOG ml TO `ml_engineers`; +GRANT USE SCHEMA ON SCHEMA ml.models TO `ml_engineers`; +GRANT CREATE MODEL ON SCHEMA ml.models TO `ml_engineers`; +GRANT CREATE FUNCTION ON SCHEMA ml.models TO `ml_engineers`; +GRANT SELECT ON SCHEMA ml.features TO `ml_engineers`; +``` + +## MCP Tool + +Use `mcp__databricks__manage_uc_grants` for grant operations, or `mcp__databricks__execute_sql` for SQL-based grant management. + +## Best Practices + +1. **Grant to groups, not users** — Easier to manage and audit +2. **Use least privilege** — Grant only the minimum permissions needed +3. **Leverage inheritance** — Grant at schema level when all tables need the same access +4. **Audit regularly** — Query `system.access.audit` for grant/revoke events +5. **Prefer `USE CATALOG` + `USE SCHEMA` + `SELECT`** over `ALL PRIVILEGES` +6. **Document ownership** — Keep track of who owns each catalog/schema diff --git a/.claude/skills/databricks-unity-catalog/SKILL.md b/.claude/skills/databricks-unity-catalog/SKILL.md new file mode 100644 index 00000000..ede7db9f --- /dev/null +++ b/.claude/skills/databricks-unity-catalog/SKILL.md @@ -0,0 +1,242 @@ +--- +name: databricks-unity-catalog +description: "Unity Catalog: system tables, volumes, access controls (ACLs), and FGAC governance. Use when querying system tables (audit, lineage, billing), working with volume file operations, managing UC permissions (GRANT/REVOKE), or managing FGAC policies (column masks, row filters, governed tags, masking UDFs)." +--- + +# Unity Catalog + +Guidance for Unity Catalog across four areas: system tables, volumes, access controls, and FGAC policy governance. + +## When to Use This Skill + +Use this skill when working with any of these four categories: + +### System Tables +- Querying **lineage** (table dependencies, column-level lineage) +- Analyzing **audit logs** (who accessed what, permission changes) +- Monitoring **billing and usage** (DBU consumption, cost analysis) +- Tracking **compute resources** (cluster usage, warehouse metrics) +- Reviewing **job execution** (run history, success rates, failures) +- Analyzing **query performance** (slow queries, warehouse utilization) + +### Volumes +- Working with **volumes** (upload, download, list files in `/Volumes/`) +- Managing volume **directories** and file operations +- Configuring volume **permissions** (READ VOLUME, WRITE VOLUME) + +### UC Access Controls (ACLs) +- **Granting or revoking** privileges on catalogs, schemas, tables, volumes, functions +- Managing **ownership** transfers +- Setting up **role-based access** patterns (data readers, engineers, admins) +- Auditing **current permissions** (SHOW GRANTS) + +### FGAC (Fine-Grained Access Control) +- Creating or managing **FGAC policies** (column masks, row filters) +- Working with **governed tags** (creating via UI, applying via SQL) +- Building **masking UDFs** for PII protection (SSN, email, credit card, etc.) +- Implementing **human-in-the-loop governance** workflows +- Managing **policy lifecycle** (create, update, delete, preview) +- Querying **tag assignments** via `information_schema` + +--- + +## Reference Files + +### System Tables + +| File | Description | +|------|-------------| +| [5-system-tables.md](5-system-tables.md) | Lineage, audit, billing, compute, jobs, query history | + +### Volumes + +| File | Description | +|------|-------------| +| [6-volumes.md](6-volumes.md) | Volume file operations, permissions, best practices | + +### UC Access Controls (ACLs) + +| File | Description | +|------|-------------| +| [10-uc-acls.md](10-uc-acls.md) | GRANT/REVOKE, ownership, privilege reference, SDK patterns, common role patterns | + +### FGAC (Fine-Grained Access Control) + +| File | Description | +|------|-------------| +| [7-fgac-overview.md](7-fgac-overview.md) | FGAC workflow, governed tags, masking UDFs, policy syntax, errors, best practices | +| [8-fgac-sql-generation.md](8-fgac-sql-generation.md) | SET/UNSET TAG, CREATE FUNCTION, CREATE/DROP POLICY, discovery queries | +| [9-fgac-sdk-and-tools.md](9-fgac-sdk-and-tools.md) | Python SDK patterns and 12 MCP tools for policy management | + +--- + +## Quick Start: System Tables + +### Enable Access + +```sql +-- Grant access to system tables +GRANT USE CATALOG ON CATALOG system TO `data_engineers`; +GRANT USE SCHEMA ON SCHEMA system.access TO `data_engineers`; +GRANT SELECT ON SCHEMA system.access TO `data_engineers`; +``` + +### Common Queries + +```sql +-- Table lineage: What tables feed into this table? +SELECT source_table_full_name, source_column_name +FROM system.access.table_lineage +WHERE target_table_full_name = 'catalog.schema.table' + AND event_date >= current_date() - 7; + +-- Audit: Recent permission changes +SELECT event_time, user_identity.email, action_name, request_params +FROM system.access.audit +WHERE action_name LIKE '%GRANT%' OR action_name LIKE '%REVOKE%' +ORDER BY event_time DESC +LIMIT 100; + +-- Billing: DBU usage by workspace +SELECT workspace_id, sku_name, SUM(usage_quantity) AS total_dbus +FROM system.billing.usage +WHERE usage_date >= current_date() - 30 +GROUP BY workspace_id, sku_name; +``` + +### MCP Tool Integration + +```python +mcp__databricks__execute_sql( + sql_query=""" + SELECT source_table_full_name, target_table_full_name + FROM system.access.table_lineage + WHERE event_date >= current_date() - 7 + """, + catalog="system" +) +``` + +--- + +## Quick Start: Volumes + +```python +# List files in a volume +list_volume_files(volume_path="/Volumes/catalog/schema/volume/folder/") + +# Upload file to volume +upload_to_volume( + local_path="/tmp/data.csv", + volume_path="/Volumes/catalog/schema/volume/data.csv" +) + +# Download file from volume +download_from_volume( + volume_path="/Volumes/catalog/schema/volume/data.csv", + local_path="/tmp/downloaded.csv" +) + +# Create directory +create_volume_directory(volume_path="/Volumes/catalog/schema/volume/new_folder") +``` + +See [6-volumes.md](6-volumes.md) for full volume operations, permissions, and troubleshooting. + +--- + +## Quick Start: UC Access Controls (ACLs) + +```sql +-- Read-only access pattern +GRANT USE CATALOG ON CATALOG analytics TO `data_readers`; +GRANT USE SCHEMA ON SCHEMA analytics.gold TO `data_readers`; +GRANT SELECT ON SCHEMA analytics.gold TO `data_readers`; + +-- Data engineer access pattern +GRANT USE CATALOG ON CATALOG analytics TO `data_engineers`; +GRANT USE SCHEMA ON SCHEMA analytics.silver TO `data_engineers`; +GRANT SELECT ON SCHEMA analytics.silver TO `data_engineers`; +GRANT MODIFY ON SCHEMA analytics.silver TO `data_engineers`; +GRANT CREATE TABLE ON SCHEMA analytics.silver TO `data_engineers`; + +-- Show current grants +SHOW GRANTS ON SCHEMA analytics.gold; + +-- Transfer ownership +ALTER SCHEMA analytics.gold OWNER TO `new_owner`; +``` + +See [10-uc-acls.md](10-uc-acls.md) for full privilege reference, SDK patterns, and common role patterns. + +--- + +## Quick Start: FGAC + +```sql +-- 1. Apply governed tag to a column (tag must exist in UI first) +SET TAG ON COLUMN catalog.schema.table.ssn_column 'pii_type' = 'ssn'; + +-- 2. Create a masking UDF +CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) +RETURNS STRING +DETERMINISTIC +RETURN CASE + WHEN ssn IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 + THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) + ELSE '***-**-****' +END; + +-- 3. Create an FGAC column mask policy +CREATE OR REPLACE POLICY mask_pii_ssn +ON SCHEMA catalog.schema +COMMENT 'Mask SSN columns for analysts' +COLUMN MASK catalog.schema.mask_ssn +TO `analysts` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; +``` + +See [7-fgac-overview.md](7-fgac-overview.md) for the full FGAC workflow, policy syntax, and best practices. + +--- + +## Best Practices + +### System Tables +1. **Filter by date** — System tables can be large; always use date filters +2. **Use appropriate retention** — Check your workspace's retention settings +3. **Schedule reports** — Create scheduled queries for regular monitoring + +### Volumes +4. **Organize by purpose** — Use directory structure within volumes +5. **Grant minimal access** — Use `READ VOLUME` vs `WRITE VOLUME` appropriately + +### UC Access Controls (ACLs) +6. **Grant to groups, not users** — Easier to manage and audit +7. **Use least privilege** — Grant only the minimum permissions needed +8. **Leverage inheritance** — Grant at schema level when all tables need the same access +9. **Audit regularly** — Query `system.access.audit` for grant/revoke events + +### FGAC +10. **Always include `EXCEPT \`gov_admin\``** in every FGAC policy +11. **Preview before executing** any FGAC policy change +12. **Use governed tags** (not ad-hoc tags) for FGAC policy matching + +## Resources + +### System Tables & Volumes +- [Unity Catalog System Tables](https://docs.databricks.com/administration-guide/system-tables/) +- [Audit Log Reference](https://docs.databricks.com/administration-guide/account-settings/audit-logs.html) + +### UC Access Controls +- [UC Privileges](https://docs.databricks.com/data-governance/unity-catalog/manage-privileges/) + +### FGAC +- [FGAC Overview](https://docs.databricks.com/data-governance/unity-catalog/abac/) +- [FGAC Policies](https://docs.databricks.com/data-governance/unity-catalog/abac/policies) +- [FGAC Tutorial](https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial) +- [Governed Tags](https://docs.databricks.com/admin/governed-tags/) diff --git a/.claude/skills/uc-abac-governance/SKILL.md b/.claude/skills/uc-fgac-governance/SKILL.md similarity index 100% rename from .claude/skills/uc-abac-governance/SKILL.md rename to .claude/skills/uc-fgac-governance/SKILL.md diff --git a/.claude/skills/uc-abac-governance/mcp-tools-reference.md b/.claude/skills/uc-fgac-governance/mcp-tools-reference.md similarity index 100% rename from .claude/skills/uc-abac-governance/mcp-tools-reference.md rename to .claude/skills/uc-fgac-governance/mcp-tools-reference.md diff --git a/.claude/skills/uc-abac-governance/python-sdk-patterns.md b/.claude/skills/uc-fgac-governance/python-sdk-patterns.md similarity index 100% rename from .claude/skills/uc-abac-governance/python-sdk-patterns.md rename to .claude/skills/uc-fgac-governance/python-sdk-patterns.md diff --git a/.claude/skills/uc-abac-governance/sql-generation.md b/.claude/skills/uc-fgac-governance/sql-generation.md similarity index 100% rename from .claude/skills/uc-abac-governance/sql-generation.md rename to .claude/skills/uc-fgac-governance/sql-generation.md diff --git a/PLAN_UC_ABAC_SKILLS.md b/PLAN_UC_FGAC_SKILLS.md similarity index 100% rename from PLAN_UC_ABAC_SKILLS.md rename to PLAN_UC_FGAC_SKILLS.md diff --git a/databricks-skills/databricks-python-sdk/examples/6-abac-policies.py b/databricks-skills/databricks-python-sdk/examples/6-fgac-policies.py similarity index 100% rename from databricks-skills/databricks-python-sdk/examples/6-abac-policies.py rename to databricks-skills/databricks-python-sdk/examples/6-fgac-policies.py diff --git a/databricks-skills/databricks-unity-catalog/10-uc-acls.md b/databricks-skills/databricks-unity-catalog/10-uc-acls.md new file mode 100644 index 00000000..038ceb85 --- /dev/null +++ b/databricks-skills/databricks-unity-catalog/10-uc-acls.md @@ -0,0 +1,219 @@ +# Unity Catalog Access Controls (ACLs) + +Comprehensive reference for Unity Catalog privilege management: GRANT/REVOKE, ownership, and permission patterns across securables. + +## Securable Hierarchy + +``` +METASTORE + └── CATALOG + └── SCHEMA + ├── TABLE / VIEW / MATERIALIZED VIEW + ├── VOLUME + ├── FUNCTION + └── MODEL +``` + +Privileges **inherit** down the hierarchy. Granting `USE CATALOG` on a catalog grants access to all schemas within it (but not data access — that requires `SELECT`, `MODIFY`, etc.). + +## Privilege Reference + +### Catalog-Level + +| Privilege | Description | +|-----------|-------------| +| `USE CATALOG` | Required to access any object within the catalog | +| `CREATE SCHEMA` | Create schemas within the catalog | +| `ALL PRIVILEGES` | All catalog-level privileges | + +### Schema-Level + +| Privilege | Description | +|-----------|-------------| +| `USE SCHEMA` | Required to access any object within the schema | +| `CREATE TABLE` | Create tables and views | +| `CREATE VOLUME` | Create volumes | +| `CREATE FUNCTION` | Create functions | +| `CREATE MODEL` | Create registered models | +| `ALL PRIVILEGES` | All schema-level privileges | + +### Table/View-Level + +| Privilege | Description | +|-----------|-------------| +| `SELECT` | Read data from the table or view | +| `MODIFY` | Insert, update, delete data | +| `ALL PRIVILEGES` | All table-level privileges | + +### Volume-Level + +| Privilege | Description | +|-----------|-------------| +| `READ VOLUME` | Read files from the volume | +| `WRITE VOLUME` | Write files to the volume | +| `ALL PRIVILEGES` | All volume-level privileges | + +### Function-Level + +| Privilege | Description | +|-----------|-------------| +| `EXECUTE` | Execute the function | +| `ALL PRIVILEGES` | All function-level privileges | + +## SQL Syntax + +### GRANT + +```sql +-- Catalog access +GRANT USE CATALOG ON CATALOG my_catalog TO `group_name`; +GRANT CREATE SCHEMA ON CATALOG my_catalog TO `group_name`; + +-- Schema access +GRANT USE SCHEMA ON SCHEMA my_catalog.my_schema TO `group_name`; +GRANT CREATE TABLE ON SCHEMA my_catalog.my_schema TO `group_name`; +GRANT CREATE VOLUME ON SCHEMA my_catalog.my_schema TO `group_name`; +GRANT CREATE FUNCTION ON SCHEMA my_catalog.my_schema TO `group_name`; + +-- Table/View access +GRANT SELECT ON TABLE my_catalog.my_schema.my_table TO `group_name`; +GRANT MODIFY ON TABLE my_catalog.my_schema.my_table TO `group_name`; + +-- Volume access +GRANT READ VOLUME ON VOLUME my_catalog.my_schema.my_volume TO `group_name`; +GRANT WRITE VOLUME ON VOLUME my_catalog.my_schema.my_volume TO `group_name`; + +-- Function access +GRANT EXECUTE ON FUNCTION my_catalog.my_schema.my_function TO `group_name`; + +-- All privileges shorthand +GRANT ALL PRIVILEGES ON SCHEMA my_catalog.my_schema TO `admin_group`; +``` + +### REVOKE + +```sql +REVOKE SELECT ON TABLE my_catalog.my_schema.my_table FROM `group_name`; +REVOKE MODIFY ON TABLE my_catalog.my_schema.my_table FROM `group_name`; +REVOKE ALL PRIVILEGES ON SCHEMA my_catalog.my_schema FROM `group_name`; +``` + +### Show Grants + +```sql +-- Show all grants on a securable +SHOW GRANTS ON CATALOG my_catalog; +SHOW GRANTS ON SCHEMA my_catalog.my_schema; +SHOW GRANTS ON TABLE my_catalog.my_schema.my_table; +SHOW GRANTS ON VOLUME my_catalog.my_schema.my_volume; + +-- Show grants for a specific principal +SHOW GRANTS `group_name` ON CATALOG my_catalog; +SHOW GRANTS `user@example.com` ON SCHEMA my_catalog.my_schema; +``` + +## Ownership + +Every securable has exactly one **owner**. The owner has all privileges on the object and can grant/revoke privileges to others. + +```sql +-- Transfer ownership +ALTER CATALOG my_catalog OWNER TO `new_owner`; +ALTER SCHEMA my_catalog.my_schema OWNER TO `new_owner`; +ALTER TABLE my_catalog.my_schema.my_table OWNER TO `new_owner`; +ALTER VOLUME my_catalog.my_schema.my_volume OWNER TO `new_owner`; +``` + +## Python SDK Patterns + +```python +from databricks.sdk import WorkspaceClient + +w = WorkspaceClient() + +# Grant privileges +w.grants.update( + securable_type="TABLE", + full_name="my_catalog.my_schema.my_table", + changes=[{ + "principal": "data_readers", + "add": ["SELECT"], + }] +) + +# Revoke privileges +w.grants.update( + securable_type="TABLE", + full_name="my_catalog.my_schema.my_table", + changes=[{ + "principal": "data_readers", + "remove": ["SELECT"], + }] +) + +# Get current grants +grants = w.grants.get( + securable_type="TABLE", + full_name="my_catalog.my_schema.my_table" +) +for assignment in grants.privilege_assignments: + print(f"{assignment.principal}: {assignment.privileges}") + +# Get effective grants (includes inherited) +effective = w.grants.get_effective( + securable_type="TABLE", + full_name="my_catalog.my_schema.my_table", + principal="data_readers" +) +``` + +## Common Patterns + +### Read-Only Data Consumer + +```sql +-- Minimal access for data readers +GRANT USE CATALOG ON CATALOG analytics TO `data_readers`; +GRANT USE SCHEMA ON SCHEMA analytics.gold TO `data_readers`; +GRANT SELECT ON SCHEMA analytics.gold TO `data_readers`; +``` + +### Data Engineer (Read + Write) + +```sql +GRANT USE CATALOG ON CATALOG analytics TO `data_engineers`; +GRANT USE SCHEMA ON SCHEMA analytics.silver TO `data_engineers`; +GRANT SELECT ON SCHEMA analytics.silver TO `data_engineers`; +GRANT MODIFY ON SCHEMA analytics.silver TO `data_engineers`; +GRANT CREATE TABLE ON SCHEMA analytics.silver TO `data_engineers`; +``` + +### Schema Admin + +```sql +GRANT USE CATALOG ON CATALOG analytics TO `schema_admins`; +GRANT ALL PRIVILEGES ON SCHEMA analytics.gold TO `schema_admins`; +``` + +### ML Engineer (Models + Functions) + +```sql +GRANT USE CATALOG ON CATALOG ml TO `ml_engineers`; +GRANT USE SCHEMA ON SCHEMA ml.models TO `ml_engineers`; +GRANT CREATE MODEL ON SCHEMA ml.models TO `ml_engineers`; +GRANT CREATE FUNCTION ON SCHEMA ml.models TO `ml_engineers`; +GRANT SELECT ON SCHEMA ml.features TO `ml_engineers`; +``` + +## MCP Tool + +Use `mcp__databricks__manage_uc_grants` for grant operations, or `mcp__databricks__execute_sql` for SQL-based grant management. + +## Best Practices + +1. **Grant to groups, not users** — Easier to manage and audit +2. **Use least privilege** — Grant only the minimum permissions needed +3. **Leverage inheritance** — Grant at schema level when all tables need the same access +4. **Audit regularly** — Query `system.access.audit` for grant/revoke events +5. **Prefer `USE CATALOG` + `USE SCHEMA` + `SELECT`** over `ALL PRIVILEGES` +6. **Document ownership** — Keep track of who owns each catalog/schema diff --git a/databricks-skills/databricks-unity-catalog/SKILL.md b/databricks-skills/databricks-unity-catalog/SKILL.md index b8dbbc20..ede7db9f 100644 --- a/databricks-skills/databricks-unity-catalog/SKILL.md +++ b/databricks-skills/databricks-unity-catalog/SKILL.md @@ -1,16 +1,17 @@ --- name: databricks-unity-catalog -description: "Unity Catalog system tables and volumes. Use when querying system tables (audit, lineage, billing) or working with volume file operations (upload, download, list files in /Volumes/)." +description: "Unity Catalog: system tables, volumes, access controls (ACLs), and FGAC governance. Use when querying system tables (audit, lineage, billing), working with volume file operations, managing UC permissions (GRANT/REVOKE), or managing FGAC policies (column masks, row filters, governed tags, masking UDFs)." --- # Unity Catalog -Guidance for Unity Catalog system tables, volumes, and governance. +Guidance for Unity Catalog across four areas: system tables, volumes, access controls, and FGAC policy governance. ## When to Use This Skill -Use this skill when: -- Working with **volumes** (upload, download, list files in `/Volumes/`) +Use this skill when working with any of these four categories: + +### System Tables - Querying **lineage** (table dependencies, column-level lineage) - Analyzing **audit logs** (who accessed what, permission changes) - Monitoring **billing and usage** (DBU consumption, cost analysis) @@ -18,38 +19,60 @@ Use this skill when: - Reviewing **job execution** (run history, success rates, failures) - Analyzing **query performance** (slow queries, warehouse utilization) +### Volumes +- Working with **volumes** (upload, download, list files in `/Volumes/`) +- Managing volume **directories** and file operations +- Configuring volume **permissions** (READ VOLUME, WRITE VOLUME) + +### UC Access Controls (ACLs) +- **Granting or revoking** privileges on catalogs, schemas, tables, volumes, functions +- Managing **ownership** transfers +- Setting up **role-based access** patterns (data readers, engineers, admins) +- Auditing **current permissions** (SHOW GRANTS) + +### FGAC (Fine-Grained Access Control) +- Creating or managing **FGAC policies** (column masks, row filters) +- Working with **governed tags** (creating via UI, applying via SQL) +- Building **masking UDFs** for PII protection (SSN, email, credit card, etc.) +- Implementing **human-in-the-loop governance** workflows +- Managing **policy lifecycle** (create, update, delete, preview) +- Querying **tag assignments** via `information_schema` + +--- + ## Reference Files -| Topic | File | Description | -|-------|------|-------------| -| System Tables | [5-system-tables.md](5-system-tables.md) | Lineage, audit, billing, compute, jobs, query history | -| Volumes | [6-volumes.md](6-volumes.md) | Volume file operations, permissions, best practices | +### System Tables -## Quick Start +| File | Description | +|------|-------------| +| [5-system-tables.md](5-system-tables.md) | Lineage, audit, billing, compute, jobs, query history | -### Volume File Operations (MCP Tools) +### Volumes -```python -# List files in a volume -list_volume_files(volume_path="/Volumes/catalog/schema/volume/folder/") +| File | Description | +|------|-------------| +| [6-volumes.md](6-volumes.md) | Volume file operations, permissions, best practices | -# Upload file to volume -upload_to_volume( - local_path="/tmp/data.csv", - volume_path="/Volumes/catalog/schema/volume/data.csv" -) +### UC Access Controls (ACLs) -# Download file from volume -download_from_volume( - volume_path="/Volumes/catalog/schema/volume/data.csv", - local_path="/tmp/downloaded.csv" -) +| File | Description | +|------|-------------| +| [10-uc-acls.md](10-uc-acls.md) | GRANT/REVOKE, ownership, privilege reference, SDK patterns, common role patterns | -# Create directory -create_volume_directory(volume_path="/Volumes/catalog/schema/volume/new_folder") -``` +### FGAC (Fine-Grained Access Control) -### Enable System Tables Access +| File | Description | +|------|-------------| +| [7-fgac-overview.md](7-fgac-overview.md) | FGAC workflow, governed tags, masking UDFs, policy syntax, errors, best practices | +| [8-fgac-sql-generation.md](8-fgac-sql-generation.md) | SET/UNSET TAG, CREATE FUNCTION, CREATE/DROP POLICY, discovery queries | +| [9-fgac-sdk-and-tools.md](9-fgac-sdk-and-tools.md) | Python SDK patterns and 12 MCP tools for policy management | + +--- + +## Quick Start: System Tables + +### Enable Access ```sql -- Grant access to system tables @@ -81,12 +104,9 @@ WHERE usage_date >= current_date() - 30 GROUP BY workspace_id, sku_name; ``` -## MCP Tool Integration - -Use `mcp__databricks__execute_sql` for system table queries: +### MCP Tool Integration ```python -# Query lineage mcp__databricks__execute_sql( sql_query=""" SELECT source_table_full_name, target_table_full_name @@ -97,14 +117,126 @@ mcp__databricks__execute_sql( ) ``` +--- + +## Quick Start: Volumes + +```python +# List files in a volume +list_volume_files(volume_path="/Volumes/catalog/schema/volume/folder/") + +# Upload file to volume +upload_to_volume( + local_path="/tmp/data.csv", + volume_path="/Volumes/catalog/schema/volume/data.csv" +) + +# Download file from volume +download_from_volume( + volume_path="/Volumes/catalog/schema/volume/data.csv", + local_path="/tmp/downloaded.csv" +) + +# Create directory +create_volume_directory(volume_path="/Volumes/catalog/schema/volume/new_folder") +``` + +See [6-volumes.md](6-volumes.md) for full volume operations, permissions, and troubleshooting. + +--- + +## Quick Start: UC Access Controls (ACLs) + +```sql +-- Read-only access pattern +GRANT USE CATALOG ON CATALOG analytics TO `data_readers`; +GRANT USE SCHEMA ON SCHEMA analytics.gold TO `data_readers`; +GRANT SELECT ON SCHEMA analytics.gold TO `data_readers`; + +-- Data engineer access pattern +GRANT USE CATALOG ON CATALOG analytics TO `data_engineers`; +GRANT USE SCHEMA ON SCHEMA analytics.silver TO `data_engineers`; +GRANT SELECT ON SCHEMA analytics.silver TO `data_engineers`; +GRANT MODIFY ON SCHEMA analytics.silver TO `data_engineers`; +GRANT CREATE TABLE ON SCHEMA analytics.silver TO `data_engineers`; + +-- Show current grants +SHOW GRANTS ON SCHEMA analytics.gold; + +-- Transfer ownership +ALTER SCHEMA analytics.gold OWNER TO `new_owner`; +``` + +See [10-uc-acls.md](10-uc-acls.md) for full privilege reference, SDK patterns, and common role patterns. + +--- + +## Quick Start: FGAC + +```sql +-- 1. Apply governed tag to a column (tag must exist in UI first) +SET TAG ON COLUMN catalog.schema.table.ssn_column 'pii_type' = 'ssn'; + +-- 2. Create a masking UDF +CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) +RETURNS STRING +DETERMINISTIC +RETURN CASE + WHEN ssn IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 + THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) + ELSE '***-**-****' +END; + +-- 3. Create an FGAC column mask policy +CREATE OR REPLACE POLICY mask_pii_ssn +ON SCHEMA catalog.schema +COMMENT 'Mask SSN columns for analysts' +COLUMN MASK catalog.schema.mask_ssn +TO `analysts` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; +``` + +See [7-fgac-overview.md](7-fgac-overview.md) for the full FGAC workflow, policy syntax, and best practices. + +--- + ## Best Practices -1. **Filter by date** - System tables can be large; always use date filters -2. **Use appropriate retention** - Check your workspace's retention settings -3. **Grant minimal access** - System tables contain sensitive metadata -4. **Schedule reports** - Create scheduled queries for regular monitoring +### System Tables +1. **Filter by date** — System tables can be large; always use date filters +2. **Use appropriate retention** — Check your workspace's retention settings +3. **Schedule reports** — Create scheduled queries for regular monitoring + +### Volumes +4. **Organize by purpose** — Use directory structure within volumes +5. **Grant minimal access** — Use `READ VOLUME` vs `WRITE VOLUME` appropriately + +### UC Access Controls (ACLs) +6. **Grant to groups, not users** — Easier to manage and audit +7. **Use least privilege** — Grant only the minimum permissions needed +8. **Leverage inheritance** — Grant at schema level when all tables need the same access +9. **Audit regularly** — Query `system.access.audit` for grant/revoke events + +### FGAC +10. **Always include `EXCEPT \`gov_admin\``** in every FGAC policy +11. **Preview before executing** any FGAC policy change +12. **Use governed tags** (not ad-hoc tags) for FGAC policy matching ## Resources +### System Tables & Volumes - [Unity Catalog System Tables](https://docs.databricks.com/administration-guide/system-tables/) - [Audit Log Reference](https://docs.databricks.com/administration-guide/account-settings/audit-logs.html) + +### UC Access Controls +- [UC Privileges](https://docs.databricks.com/data-governance/unity-catalog/manage-privileges/) + +### FGAC +- [FGAC Overview](https://docs.databricks.com/data-governance/unity-catalog/abac/) +- [FGAC Policies](https://docs.databricks.com/data-governance/unity-catalog/abac/policies) +- [FGAC Tutorial](https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial) +- [Governed Tags](https://docs.databricks.com/admin/governed-tags/) diff --git a/databricks-skills/uc-abac-governance/SKILL.md b/databricks-skills/uc-fgac-governance/SKILL.md similarity index 100% rename from databricks-skills/uc-abac-governance/SKILL.md rename to databricks-skills/uc-fgac-governance/SKILL.md diff --git a/databricks-skills/uc-abac-governance/mcp-tools-reference.md b/databricks-skills/uc-fgac-governance/mcp-tools-reference.md similarity index 100% rename from databricks-skills/uc-abac-governance/mcp-tools-reference.md rename to databricks-skills/uc-fgac-governance/mcp-tools-reference.md diff --git a/databricks-skills/uc-abac-governance/python-sdk-patterns.md b/databricks-skills/uc-fgac-governance/python-sdk-patterns.md similarity index 100% rename from databricks-skills/uc-abac-governance/python-sdk-patterns.md rename to databricks-skills/uc-fgac-governance/python-sdk-patterns.md diff --git a/databricks-skills/uc-abac-governance/sql-generation.md b/databricks-skills/uc-fgac-governance/sql-generation.md similarity index 100% rename from databricks-skills/uc-abac-governance/sql-generation.md rename to databricks-skills/uc-fgac-governance/sql-generation.md diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/abac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py similarity index 100% rename from databricks-tools-core/databricks_tools_core/unity_catalog/abac_policies.py rename to databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py diff --git a/databricks-tools-core/tests/integration/unity_catalog/test_abac_policies.py b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py similarity index 100% rename from databricks-tools-core/tests/integration/unity_catalog/test_abac_policies.py rename to databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py From 636552f013983d094946ac31e24c65be1a2eaeb8 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Tue, 10 Feb 2026 18:12:38 -0600 Subject: [PATCH 04/34] Add FGAC human-in-the-loop guardrails with approval token and admin group check - Add HMAC-SHA256 approval token: preview_policy_changes() returns a cryptographic token binding params + timestamp; create/update/delete reject calls without a valid, unexpired token - Add admin group check: mutating operations verify the caller belongs to a configurable admin group (default: admins) via current_user API - Update MCP tool wrapper to pass approval_token through to mutations - Add TestApprovalTokenEnforcement and TestAdminGroupCheck test classes - Update existing CRUD tests to use preview-then-execute token flow - Update conftest cleanup fixture to use SDK directly (bypass guardrails) - Update conftest warehouse fixture to auto-start stopped serverless warehouses - Update skill docs (both mirrors) with guardrails section and new signatures - Add FGAC_GUARDRAILS.md with architecture diagrams and workflow documentation --- .../9-fgac-sdk-and-tools.md | 668 ++++++++++++++++++ FGAC_GUARDRAILS.md | 271 +++++++ .../tools/fgac_policies.py | 171 +++++ .../9-fgac-sdk-and-tools.md | 668 ++++++++++++++++++ .../unity_catalog/fgac_policies.py | 196 ++++- databricks-tools-core/tests/conftest.py | 14 + .../integration/unity_catalog/conftest.py | 16 +- .../unity_catalog/test_fgac_policies.py | 334 ++++++++- 8 files changed, 2283 insertions(+), 55 deletions(-) create mode 100644 .claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md create mode 100644 FGAC_GUARDRAILS.md create mode 100644 databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py create mode 100644 databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md diff --git a/.claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md b/.claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md new file mode 100644 index 00000000..17c0d5f1 --- /dev/null +++ b/.claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md @@ -0,0 +1,668 @@ +# FGAC Policy SDK & MCP Tools + +Python SDK patterns and MCP tool reference for managing FGAC policies in Unity Catalog. + +**SDK Docs:** https://databricks-sdk-py.readthedocs.io/en/latest/ +**FGAC Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/policies + +--- + +## Policy Scopes + +`on_securable_type` sets the **scope** of the policy. `for_securable_type` is always `TABLE`. + +| Scope | `on_securable_type` | `on_securable_fullname` | Effect | +|---|---|---|---| +| Catalog | `CATALOG` | `"my_catalog"` | Applies to all tables in the catalog | +| Schema | `SCHEMA` | `"my_catalog.my_schema"` | Applies to all tables in the schema | +| Table | `TABLE` | `"my_catalog.my_schema.my_table"` | Applies to a single table | + +### Important: Always Include `gov_admin` + +Every policy **MUST** include `"gov_admin"` in `except_principals`: + +```python +# CORRECT +except_principals=["gov_admin"] + +# CORRECT - additional admin groups +except_principals=["gov_admin", "platform_admins"] + +# WRONG - missing gov_admin +except_principals=["platform_admins"] # gov_admin must be included! +``` + +--- + +## Guardrails + +FGAC mutating operations (`create`, `update`, `delete`) enforce two programmatic guardrails: + +### Approval Token + +Every mutating call **requires** a valid `approval_token` obtained from `preview_policy_changes()`. The token is an HMAC-SHA256 signature binding the previewed parameters to a timestamp. + +- Token TTL: **10 minutes** (configurable via `_TOKEN_TTL_SECONDS`) +- Parameters must match exactly between preview and mutation +- Action mapping: preview `CREATE` → mutation `create`, `UPDATE` → `update`, `DELETE` → `delete` + +### Admin Group Check + +The caller must be a member of the configured admin group. Membership is verified via `w.current_user.me().groups`. + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `FGAC_APPROVAL_SECRET` | `fgac-default-dev-secret` | HMAC secret for token signing | +| `FGAC_ADMIN_GROUP` | `admins` | Required group membership for mutations | + +> **Important:** In production, always set `FGAC_APPROVAL_SECRET` to a strong random value. + +--- + +## MCP Tools + +### Discovery Tools + +#### `list_fgac_policies` + +List FGAC policies on a catalog, schema, or table. + +```python +list_fgac_policies( + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, # e.g., "my_catalog.my_schema" + include_inherited: bool = True, + policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (optional filter) +) +``` + +**Returns:** +```json +{ + "success": true, + "securable_type": "SCHEMA", + "securable_fullname": "my_catalog.my_schema", + "policy_count": 3, + "policies": [ + { + "name": "mask_pii_ssn", + "policy_type": "COLUMN_MASK", + "to_principals": ["analysts"], + "except_principals": ["gov_admin"], + "on_securable_fullname": "my_catalog.my_schema", + "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn", "on_column": "masked_col"}, + "match_columns": [{"alias": "masked_col", "condition": "hasTagValue('pii_type', 'ssn')"}] + } + ] +} +``` + +#### `get_fgac_policy` + +Get details for a specific policy by name. + +```python +get_fgac_policy( + policy_name: str, # Policy name + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, # Fully qualified securable name +) +``` + +**Returns:** +```json +{ + "success": true, + "policy": { + "name": "mask_pii_ssn", + "policy_type": "COLUMN_MASK", + "comment": "Mask SSN columns for analysts", + "to_principals": ["analysts", "data_scientists"], + "except_principals": ["gov_admin"], + "on_securable_type": "SCHEMA", + "on_securable_fullname": "my_catalog.my_schema", + "for_securable_type": "TABLE", + "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn", "on_column": "masked_col"}, + "match_columns": [{"alias": "masked_col", "condition": "hasTagValue('pii_type', 'ssn')"}] + } +} +``` + +#### `get_table_policies` + +Get column masks and row filters for a specific table via Unity Catalog API. + +```python +get_table_policies( + catalog: str, + schema: str, + table: str, +) +``` + +**Returns:** +```json +{ + "success": true, + "table": "my_catalog.my_schema.my_table", + "column_masks": [ + { + "column_name": "ssn", + "column_type": "STRING", + "mask_functions": ["my_catalog.my_schema.mask_ssn"] + } + ], + "row_filters": [ + { + "function_name": "my_catalog.my_schema.is_not_eu_region", + "input_column_names": ["region"] + } + ] +} +``` + +#### `get_masking_functions` + +List masking UDFs in a schema. + +```python +get_masking_functions( + catalog: str, + schema: str, +) +``` + +**Returns:** +```json +{ + "success": true, + "catalog": "my_catalog", + "schema": "my_schema", + "functions": [ + { + "name": "mask_ssn", + "full_name": "my_catalog.my_schema.mask_ssn", + "return_type": "STRING", + "comment": "Masks SSN showing only last 4 digits", + "is_deterministic": true + } + ] +} +``` + +#### `get_column_tags_api` + +Get column-level tags via the Tags API. + +```python +get_column_tags_api( + catalog: str, + schema: str, + table: str, +) +``` + +#### `get_schema_info` / `get_catalog_info` + +Get schema or catalog metadata via Unity Catalog API. + +```python +get_schema_info(catalog: str, schema: str) +get_catalog_info(catalog: str) +``` + +#### `list_table_policies_in_schema` + +List all tables in a schema with their column masks and row filters. + +```python +list_table_policies_in_schema( + catalog: str, + schema: str, +) +``` + +### Preview Tool (Human-in-the-Loop Gate) + +#### `preview_policy_changes` + +Preview policy changes without executing. This is the critical human-in-the-loop gate. + +```python +preview_policy_changes( + action: str, # "CREATE", "UPDATE", or "DELETE" + policy_name: str, + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, + policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (for CREATE) + to_principals: list = None, + except_principals: list = None, + function_name: str = None, + tag_name: str = None, + tag_value: str = None, + comment: str = None, +) +``` + +**Returns:** +```json +{ + "success": true, + "action": "CREATE", + "preview": { + "policy_name": "mask_pii_ssn", + "policy_type": "COLUMN_MASK", + "securable": "SCHEMA my_catalog.my_schema", + "to_principals": ["analysts"], + "except_principals": ["gov_admin"], + "function": "my_catalog.my_schema.mask_ssn", + "tag_match": "hasTagValue('pii_type', 'ssn')", + "equivalent_sql": "CREATE OR REPLACE POLICY mask_pii_ssn\nON SCHEMA my_catalog.my_schema\n..." + }, + "warnings": [], + "requires_approval": true, + "approval_token": "a1b2c3...:eyJhY3Rpb24i...", + "message": "Review the preview above. Reply 'approve' to execute, passing the approval_token." +} +``` + +**Usage in workflow:** + +1. Call `preview_policy_changes` with proposed changes +2. Present preview to user (includes `approval_token`) +3. Wait for explicit approval +4. Pass `approval_token` to `create_fgac_policy`, `update_fgac_policy`, or `delete_fgac_policy` + +### Management Tools + +#### `create_fgac_policy` + +Create a new FGAC policy (COLUMN_MASK or ROW_FILTER). + +```python +create_fgac_policy( + policy_name: str, + policy_type: str, # "COLUMN_MASK" or "ROW_FILTER" + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, + function_name: str, # Fully qualified UDF name + to_principals: list, # Users/groups the policy applies to + tag_name: str, # Tag key to match + approval_token: str, # Token from preview_policy_changes() + tag_value: str = None, # Tag value (optional, uses hasTag vs hasTagValue) + except_principals: list = None, # Excluded principals (gov_admin auto-added) + comment: str = "", +) +``` + +**Returns:** +```json +{ + "success": true, + "policy_name": "mask_pii_ssn", + "action": "created", + "details": { + "policy_type": "COLUMN_MASK", + "on_securable": "SCHEMA my_catalog.my_schema", + "function": "my_catalog.my_schema.mask_ssn", + "to_principals": ["analysts"], + "except_principals": ["gov_admin"] + } +} +``` + +#### `update_fgac_policy` + +Update an existing policy's principals or comment. + +```python +update_fgac_policy( + policy_name: str, + securable_type: str, + securable_fullname: str, + approval_token: str, # Token from preview_policy_changes() + to_principals: list = None, + except_principals: list = None, + comment: str = None, +) +``` + +**Returns:** +```json +{ + "success": true, + "policy_name": "mask_pii_ssn", + "action": "updated", + "changes": { + "to_principals": ["analysts", "data_scientists", "new_team"], + "comment": "Updated: added new_team" + } +} +``` + +> **Note:** To change the UDF, tag matching, or scope, drop and recreate the policy. + +#### `delete_fgac_policy` + +Delete an FGAC policy. + +```python +delete_fgac_policy( + policy_name: str, + securable_type: str, + securable_fullname: str, + approval_token: str, # Token from preview_policy_changes() +) +``` + +**Returns:** +```json +{ + "success": true, + "policy_name": "mask_pii_ssn", + "action": "deleted" +} +``` + +--- + +## Human-in-the-Loop Workflow Example + +Complete workflow using MCP tools: + +``` +Step 1: ANALYZE +───────────────────────────────── +→ list_fgac_policies(securable_type="SCHEMA", securable_fullname="prod.finance") +→ get_column_tags_api(catalog="prod", schema="finance", table="customers") +→ get_masking_functions(catalog="prod", schema="finance") + +Step 2: RECOMMEND +───────────────────────────────── +→ Agent generates policy recommendations based on discovered tags and UDFs + +Step 3: PREVIEW (returns approval_token) +───────────────────────────────── +→ result = preview_policy_changes( + action="CREATE", + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance", + policy_type="COLUMN_MASK", + function_name="prod.finance.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn" + ) +→ token = result["approval_token"] + +Step 4: APPROVE +───────────────────────────────── +→ Human reviews preview and replies "approve" + +Step 5: EXECUTE (pass approval_token) +───────────────────────────────── +→ create_fgac_policy( + policy_name="mask_ssn_finance", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="prod.finance", + function_name="prod.finance.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn", + approval_token=token + ) + +Step 6: VERIFY +───────────────────────────────── +→ get_fgac_policy( + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance" + ) +``` + +--- + +## Python SDK Direct Usage + +For writing custom code outside MCP tools, use the Databricks Python SDK directly. + +### Setup + +```python +from databricks.sdk import WorkspaceClient + +w = WorkspaceClient() # Auto-detects credentials +``` + +### SDK Types + +```python +from databricks.sdk.service.catalog import ( + ColumnMaskOptions, + MatchColumn, + PolicyInfo, + PolicyType, + RowFilterOptions, + SecurableType, +) +``` + +### List Policies + +```python +policies = w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname="my_catalog", + include_inherited=True, +) + +for p in policies: + print(f"{p.name}: {p.policy_type} on {p.on_securable_fullname}") + +# Filter by type +column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] +row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] +``` + +### Get Policy + +```python +policy = w.policies.get_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) + +print(f"Policy: {policy.name}") +print(f"Type: {policy.policy_type}") +print(f"Principals: {policy.to_principals}") +print(f"Except: {policy.except_principals}") +``` + +### Create Column Mask Policy + +```python +policy_info = PolicyInfo( + name="mask_pii_ssn_schema", + policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, + on_securable_type=SecurableType.SCHEMA, + on_securable_fullname="my_catalog.my_schema", + for_securable_type=SecurableType.TABLE, + to_principals=["analysts", "data_scientists"], + except_principals=["gov_admin"], + comment="Mask SSN columns in schema", + column_mask=ColumnMaskOptions( + function_name="my_catalog.my_schema.mask_ssn", + on_column="masked_col", + ), + match_columns=[ + MatchColumn( + alias="masked_col", + condition="hasTagValue('pii_type', 'ssn')", + ) + ], +) +policy = w.policies.create_policy(policy_info=policy_info) +``` + +Change `on_securable_type` and `on_securable_fullname` to target catalog or table scope. + +### Create Row Filter Policy + +```python +policy_info = PolicyInfo( + name="filter_eu_data_schema", + policy_type=PolicyType.POLICY_TYPE_ROW_FILTER, + on_securable_type=SecurableType.SCHEMA, + on_securable_fullname="my_catalog.my_schema", + for_securable_type=SecurableType.TABLE, + to_principals=["us_team"], + except_principals=["gov_admin"], + comment="Filter EU rows in schema", + row_filter=RowFilterOptions( + function_name="my_catalog.my_schema.is_not_eu_region", + ), + match_columns=[ + MatchColumn( + alias="filter_col", + condition="hasTagValue('region', 'eu')", + ) + ], +) +policy = w.policies.create_policy(policy_info=policy_info) +``` + +### Update Policy + +Update principals or comment on an existing policy. + +```python +update_info = PolicyInfo( + to_principals=["analysts", "data_scientists", "new_team"], + except_principals=["gov_admin", "senior_admins"], + comment="Updated: added new_team to masked principals", + for_securable_type=SecurableType.TABLE, + policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, +) +updated = w.policies.update_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + policy_info=update_info, + update_mask="to_principals,except_principals,comment", +) +``` + +> **Note:** To change the UDF, tag matching, or scope, you must drop and recreate the policy. `update_policy` only modifies principals and comment via `update_mask`. + +### Delete Policy + +```python +w.policies.delete_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) +``` + +--- + +## Error Handling + +```python +from databricks.sdk.errors import NotFound, PermissionDenied, BadRequest + +try: + policy = w.policies.get_policy( + name="nonexistent_policy", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + ) +except NotFound: + print("Policy not found") +except PermissionDenied: + print("Insufficient permissions - need MANAGE on securable") +except BadRequest as e: + print(f"Invalid request: {e}") +``` + +| Error | Cause | Solution | +|-------|-------|----------| +| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | +| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag config in UI | +| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | +| `POLICY_ALREADY_EXISTS` | Duplicate policy name | Use different name or delete existing first | +| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission | +| `INVALID_SECURABLE_TYPE` | Wrong securable type string | Use `"CATALOG"`, `"SCHEMA"`, or `"TABLE"` | + +--- + +## Common Patterns + +### Policy Summary with Counts + +```python +def get_policy_summary(w, catalog: str): + """Get a summary of all FGAC policies in a catalog.""" + policies = list(w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname=catalog, + include_inherited=True, + )) + + column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] + row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] + + return { + "total": len(policies), + "column_masks": len(column_masks), + "row_filters": len(row_filters), + "policies": [p.as_dict() for p in policies], + } +``` + +### Check Policy Quotas Before Creating + +```python +def check_quota(w, securable_type: str, securable_fullname: str): + """Check if policy quota allows creating a new policy.""" + quotas = {"CATALOG": 10, "SCHEMA": 10, "TABLE": 5} + max_policies = quotas.get(securable_type, 10) + + existing = list(w.policies.list_policies( + on_securable_type=securable_type, + on_securable_fullname=securable_fullname, + )) + + # Count only direct policies (not inherited) + direct = [p for p in existing + if p.on_securable_fullname == securable_fullname] + + return { + "current": len(direct), + "max": max_policies, + "can_create": len(direct) < max_policies, + } +``` + +### Async Usage (FastAPI, etc.) + +The Databricks SDK is synchronous. In async applications, wrap calls with `asyncio.to_thread()`: + +```python +import asyncio + +async def list_policies_async(w, catalog: str): + return await asyncio.to_thread( + lambda: list(w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname=catalog, + include_inherited=True, + )) + ) +``` diff --git a/FGAC_GUARDRAILS.md b/FGAC_GUARDRAILS.md new file mode 100644 index 00000000..99e37549 --- /dev/null +++ b/FGAC_GUARDRAILS.md @@ -0,0 +1,271 @@ +# FGAC Human-in-the-Loop Guardrails + +Fine-Grained Access Control (FGAC) policy mutations (create, update, delete) are protected by two programmatic guardrails that ensure every change is previewed, approved, and executed by an authorized user. + +--- + +## Architecture Overview + +``` + +------------------+ + | Human / Agent | + +--------+---------+ + | + 1. Request change + | + v + +----------------------------+ + | preview_policy_changes() | + | | + | - Validates parameters | + | - Generates SQL preview | + | - Signs params + timestamp | + | with HMAC-SHA256 | + | - Returns approval_token | + +-------------+--------------+ + | + 2. Preview + approval_token + | + v + +----------------------------+ + | Human Reviews Preview | + | | + | - Equivalent SQL shown | + | - Warnings displayed | + | - Approves or rejects | + +-------------+--------------+ + | + 3. "Approve" + token + | + v + +----------------------------+ + | create/update/delete_*() | + | | + | +-- Admin Group Check --+ | + | | w.current_user.me() | | + | | Is user in group? | | + | +---------+-------------+ | + | | Yes | + | v | + | +-- Token Validation ---+ | + | | Verify HMAC sig | | + | | Check TTL (10 min) | | + | | Match params | | + | +---------+-------------+ | + | | Valid | + | v | + | Execute SDK call | + +----------------------------+ +``` + +--- + +## Guardrail 1: Approval Token + +Every mutating call **requires** a cryptographic token obtained from `preview_policy_changes()`. This prevents any create/update/delete from executing without a prior preview step. + +### Token Lifecycle + +``` + preview_policy_changes(action="CREATE", policy_name="mask_ssn", ...) + | + | 1. Collect parameters + | 2. Add timestamp = now() + | 3. JSON serialize (sorted keys) + | 4. HMAC-SHA256(secret, payload) -> signature + | 5. Return "signature:base64(payload)" + | + v + approval_token = "a3f8c1...:eyJhY3Rpb24iOiJDUkVBVEUi..." + | + | Token is valid for 10 minutes + | Token is bound to exact parameters + | + v + create_fgac_policy(..., approval_token=token) + | + | 1. Split token -> signature + payload + | 2. Recompute HMAC, compare (constant-time) + | 3. Decode payload, check timestamp within TTL + | 4. Verify params match (action, policy_name, etc.) + | 5. Reject on any mismatch + | + v + Execute or Reject +``` + +### What the Token Binds + +The token cryptographically binds these fields: + +| Field | Purpose | +|-------|---------| +| `action` | CREATE, UPDATE, or DELETE | +| `policy_name` | Prevents using token A's preview for policy B | +| `securable_type` | CATALOG, SCHEMA, or TABLE | +| `securable_fullname` | The target securable | +| `policy_type` | COLUMN_MASK or ROW_FILTER (CREATE only) | +| `to_principals` | Who the policy applies to | +| `function_name` | The masking UDF (CREATE only) | +| `tag_name` / `tag_value` | Tag match condition (CREATE only) | +| `timestamp` | Ensures token expires after TTL | + +### Rejection Scenarios + +``` + Token from preview with policy_name="A" + Used in create with policy_name="B" + --> ValueError: "Invalid or expired approval token" + + Token generated 15 minutes ago (TTL = 10 min) + --> ValueError: "Invalid or expired approval token" + + Token string tampered with or fabricated + --> ValueError: "Invalid or expired approval token" + + No token provided at all + --> TypeError (missing required argument) +``` + +--- + +## Guardrail 2: Admin Group Check + +Before validating the token, the system verifies the caller belongs to a configurable admin group. + +``` + Mutating call received + | + v + +-----------------------------+ + | w.current_user.me() | + | Extract group memberships | + +-------------+---------------+ + | + +--------+--------+ + | | + "admins" in "admins" not + user.groups in user.groups + | | + v v + Continue to PermissionError: + token check "User 'x' is not a member + of admin group 'admins'" +``` + +--- + +## Configuration + +| Environment Variable | Default | Description | +|---------------------|---------|-------------| +| `FGAC_APPROVAL_SECRET` | `fgac-default-dev-secret` | HMAC signing secret | +| `FGAC_ADMIN_GROUP` | `admins` | Required group for mutations | + +> **Production**: Always set `FGAC_APPROVAL_SECRET` to a strong random value. The default is only suitable for development. + +Token TTL is set to **600 seconds (10 minutes)** via `_TOKEN_TTL_SECONDS` in the source. + +--- + +## End-to-End Workflow + +### Happy Path + +``` + Agent System Databricks + | | | + | 1. preview(CREATE, ...) | | + |----------------------------->| | + | | Generate token | + | <-- preview + token --------| | + | | | + | 2. Show preview to human | | + | 3. Human says "approve" | | + | | | + | 4. create(..., token) | | + |----------------------------->| | + | | Check admin group | + | | Validate token | + | | create_policy() ------------->| + | | | + | <-- success + policy -------| <-- policy created -----------| + | | | +``` + +### Rejection Path (Mismatched Params) + +``` + Agent System + | | + | 1. preview(CREATE, name=A) | + |----------------------------->| + | <-- token_A ----------------| + | | + | 2. create(name=B, token_A) | + |----------------------------->| + | | Check admin group -> OK + | | Validate token: + | | name=B != name=A in token + | <-- ValueError -------------| + | | +``` + +### Rejection Path (Not an Admin) + +``` + Agent System Databricks + | | | + | 1. preview(CREATE, ...) | | + |----------------------------->| | + | <-- token ------------------| | + | | | + | 2. create(..., token) | | + |----------------------------->| | + | | me() ------------------->| + | | <-- user (no admin grp) -| + | <-- PermissionError --------| | + | | | +``` + +--- + +## Code Locations + +| Component | File | +|-----------|------| +| Core guardrail functions | `databricks-tools-core/.../unity_catalog/fgac_policies.py` | +| MCP tool wrapper | `databricks-mcp-server/.../tools/fgac_policies.py` | +| Integration tests | `databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py` | +| Skill docs | `databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md` | + +### Key Functions + +| Function | Purpose | +|----------|---------| +| `_generate_approval_token(params)` | Signs preview params into a token | +| `_validate_approval_token(token, params)` | Verifies signature, TTL, and param match | +| `_check_admin_group()` | Verifies caller is in the admin group | +| `preview_policy_changes()` | Returns preview + `approval_token` | +| `create_fgac_policy(approval_token=...)` | Guarded policy creation | +| `update_fgac_policy(approval_token=...)` | Guarded policy update | +| `delete_fgac_policy(approval_token=...)` | Guarded policy deletion | + +--- + +## FAQ + +**Q: Can I skip the preview step and call create directly?** +No. `approval_token` is a required positional argument. Calling without it raises `TypeError`. + +**Q: Can I reuse a token for multiple operations?** +No. Each token is bound to exact parameters. A token for policy A cannot create policy B. + +**Q: What happens if my token expires?** +Call `preview_policy_changes()` again to get a fresh token. Tokens expire after 10 minutes. + +**Q: Does the admin check apply to read operations?** +No. Only `create`, `update`, and `delete` require admin membership. Discovery functions (`list`, `get`, `preview`) are unrestricted. + +**Q: How do I change the admin group?** +Set the `FGAC_ADMIN_GROUP` environment variable before starting the application. diff --git a/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py b/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py new file mode 100644 index 00000000..1ef5dd5f --- /dev/null +++ b/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py @@ -0,0 +1,171 @@ +""" +Unity Catalog FGAC Policy MCP Tool + +Consolidated MCP tool for managing Fine-Grained Access Control (FGAC) policies. +Dispatches to core functions in databricks-tools-core based on the action parameter. +""" + +from typing import Any, Dict, List, Optional + +from databricks_tools_core.unity_catalog import ( + list_fgac_policies as _list_fgac_policies, + get_fgac_policy as _get_fgac_policy, + get_table_policies as _get_table_policies, + get_masking_functions as _get_masking_functions, + check_policy_quota as _check_policy_quota, + preview_policy_changes as _preview_policy_changes, + create_fgac_policy as _create_fgac_policy, + update_fgac_policy as _update_fgac_policy, + delete_fgac_policy as _delete_fgac_policy, +) + +from ..server import mcp + + +@mcp.tool +def manage_uc_fgac_policies( + action: str, + securable_type: str = None, + securable_fullname: str = None, + policy_name: str = None, + policy_type: str = None, + to_principals: List[str] = None, + except_principals: List[str] = None, + function_name: str = None, + tag_name: str = None, + tag_value: str = None, + comment: str = None, + include_inherited: bool = True, + catalog: str = None, + schema: str = None, + table: str = None, + preview_action: str = None, + approval_token: str = None, +) -> Dict[str, Any]: + """ + Manage FGAC (Fine-Grained Access Control) policies on Unity Catalog securables. + + FGAC policies bind governed tags to masking UDFs or row filters, scoped to + catalogs, schemas, or tables, and targeted at specific principals. + + Actions: + - list: List policies on a securable. Params: securable_type, securable_fullname, include_inherited, policy_type + - get: Get a specific policy. Params: policy_name, securable_type, securable_fullname + - get_table_policies: Get column masks and row filters on a table. Params: catalog, schema, table + - get_masking_functions: List masking UDFs in a schema. Params: catalog, schema + - check_quota: Check policy quota on a securable. Params: securable_type, securable_fullname + - preview: Preview policy changes without executing. Params: preview_action ("CREATE"/"UPDATE"/"DELETE"), + policy_name, securable_type, securable_fullname, plus policy_type/function_name/tag_name/to_principals for CREATE + - create: Create an FGAC policy. Params: policy_name, policy_type ("COLUMN_MASK"/"ROW_FILTER"), + securable_type, securable_fullname, function_name, to_principals, tag_name, tag_value, except_principals, comment, + approval_token (required, from preview) + - update: Update policy principals or comment. Params: policy_name, securable_type, securable_fullname, + to_principals, except_principals, comment, approval_token (required, from preview) + - delete: Delete an FGAC policy. Params: policy_name, securable_type, securable_fullname, + approval_token (required, from preview) + + Args: + action: Operation to perform (see actions above) + securable_type: "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: Fully qualified securable name (e.g., "my_catalog.my_schema") + policy_name: Policy name + policy_type: "COLUMN_MASK" or "ROW_FILTER" (for create/list/preview) + to_principals: Users/groups the policy applies to + except_principals: Excluded principals + function_name: Fully qualified UDF name (e.g., "catalog.schema.mask_ssn") + tag_name: Tag key to match columns on + tag_value: Tag value to match (optional; omit for hasTag vs hasTagValue) + comment: Policy description + include_inherited: Include inherited policies in list (default: True) + catalog: Catalog name (for get_table_policies, get_masking_functions) + schema: Schema name (for get_table_policies, get_masking_functions) + table: Table name (for get_table_policies) + preview_action: Sub-action for preview: "CREATE", "UPDATE", or "DELETE" + approval_token: Approval token from preview action (required for create/update/delete) + + Returns: + Dict with operation result + """ + act = action.lower() + + if act == "list": + return _list_fgac_policies( + securable_type=securable_type, + securable_fullname=securable_fullname, + include_inherited=include_inherited, + policy_type=policy_type, + ) + elif act == "get": + return _get_fgac_policy( + policy_name=policy_name, + securable_type=securable_type, + securable_fullname=securable_fullname, + ) + elif act == "get_table_policies": + return _get_table_policies( + catalog=catalog, + schema=schema, + table=table, + ) + elif act == "get_masking_functions": + return _get_masking_functions( + catalog=catalog, + schema=schema, + ) + elif act == "check_quota": + return _check_policy_quota( + securable_type=securable_type, + securable_fullname=securable_fullname, + ) + elif act == "preview": + if not preview_action: + raise ValueError("preview_action is required for preview action. Must be 'CREATE', 'UPDATE', or 'DELETE'.") + return _preview_policy_changes( + action=preview_action, + policy_name=policy_name, + securable_type=securable_type, + securable_fullname=securable_fullname, + policy_type=policy_type, + to_principals=to_principals, + except_principals=except_principals, + function_name=function_name, + tag_name=tag_name, + tag_value=tag_value, + comment=comment, + ) + elif act == "create": + return _create_fgac_policy( + policy_name=policy_name, + policy_type=policy_type, + securable_type=securable_type, + securable_fullname=securable_fullname, + function_name=function_name, + to_principals=to_principals, + tag_name=tag_name, + approval_token=approval_token, + tag_value=tag_value, + except_principals=except_principals, + comment=comment or "", + ) + elif act == "update": + return _update_fgac_policy( + policy_name=policy_name, + securable_type=securable_type, + securable_fullname=securable_fullname, + approval_token=approval_token, + to_principals=to_principals, + except_principals=except_principals, + comment=comment, + ) + elif act == "delete": + return _delete_fgac_policy( + policy_name=policy_name, + securable_type=securable_type, + securable_fullname=securable_fullname, + approval_token=approval_token, + ) + + raise ValueError( + f"Invalid action: '{action}'. Valid actions: list, get, get_table_policies, " + f"get_masking_functions, check_quota, preview, create, update, delete" + ) diff --git a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md new file mode 100644 index 00000000..17c0d5f1 --- /dev/null +++ b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md @@ -0,0 +1,668 @@ +# FGAC Policy SDK & MCP Tools + +Python SDK patterns and MCP tool reference for managing FGAC policies in Unity Catalog. + +**SDK Docs:** https://databricks-sdk-py.readthedocs.io/en/latest/ +**FGAC Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/policies + +--- + +## Policy Scopes + +`on_securable_type` sets the **scope** of the policy. `for_securable_type` is always `TABLE`. + +| Scope | `on_securable_type` | `on_securable_fullname` | Effect | +|---|---|---|---| +| Catalog | `CATALOG` | `"my_catalog"` | Applies to all tables in the catalog | +| Schema | `SCHEMA` | `"my_catalog.my_schema"` | Applies to all tables in the schema | +| Table | `TABLE` | `"my_catalog.my_schema.my_table"` | Applies to a single table | + +### Important: Always Include `gov_admin` + +Every policy **MUST** include `"gov_admin"` in `except_principals`: + +```python +# CORRECT +except_principals=["gov_admin"] + +# CORRECT - additional admin groups +except_principals=["gov_admin", "platform_admins"] + +# WRONG - missing gov_admin +except_principals=["platform_admins"] # gov_admin must be included! +``` + +--- + +## Guardrails + +FGAC mutating operations (`create`, `update`, `delete`) enforce two programmatic guardrails: + +### Approval Token + +Every mutating call **requires** a valid `approval_token` obtained from `preview_policy_changes()`. The token is an HMAC-SHA256 signature binding the previewed parameters to a timestamp. + +- Token TTL: **10 minutes** (configurable via `_TOKEN_TTL_SECONDS`) +- Parameters must match exactly between preview and mutation +- Action mapping: preview `CREATE` → mutation `create`, `UPDATE` → `update`, `DELETE` → `delete` + +### Admin Group Check + +The caller must be a member of the configured admin group. Membership is verified via `w.current_user.me().groups`. + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `FGAC_APPROVAL_SECRET` | `fgac-default-dev-secret` | HMAC secret for token signing | +| `FGAC_ADMIN_GROUP` | `admins` | Required group membership for mutations | + +> **Important:** In production, always set `FGAC_APPROVAL_SECRET` to a strong random value. + +--- + +## MCP Tools + +### Discovery Tools + +#### `list_fgac_policies` + +List FGAC policies on a catalog, schema, or table. + +```python +list_fgac_policies( + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, # e.g., "my_catalog.my_schema" + include_inherited: bool = True, + policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (optional filter) +) +``` + +**Returns:** +```json +{ + "success": true, + "securable_type": "SCHEMA", + "securable_fullname": "my_catalog.my_schema", + "policy_count": 3, + "policies": [ + { + "name": "mask_pii_ssn", + "policy_type": "COLUMN_MASK", + "to_principals": ["analysts"], + "except_principals": ["gov_admin"], + "on_securable_fullname": "my_catalog.my_schema", + "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn", "on_column": "masked_col"}, + "match_columns": [{"alias": "masked_col", "condition": "hasTagValue('pii_type', 'ssn')"}] + } + ] +} +``` + +#### `get_fgac_policy` + +Get details for a specific policy by name. + +```python +get_fgac_policy( + policy_name: str, # Policy name + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, # Fully qualified securable name +) +``` + +**Returns:** +```json +{ + "success": true, + "policy": { + "name": "mask_pii_ssn", + "policy_type": "COLUMN_MASK", + "comment": "Mask SSN columns for analysts", + "to_principals": ["analysts", "data_scientists"], + "except_principals": ["gov_admin"], + "on_securable_type": "SCHEMA", + "on_securable_fullname": "my_catalog.my_schema", + "for_securable_type": "TABLE", + "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn", "on_column": "masked_col"}, + "match_columns": [{"alias": "masked_col", "condition": "hasTagValue('pii_type', 'ssn')"}] + } +} +``` + +#### `get_table_policies` + +Get column masks and row filters for a specific table via Unity Catalog API. + +```python +get_table_policies( + catalog: str, + schema: str, + table: str, +) +``` + +**Returns:** +```json +{ + "success": true, + "table": "my_catalog.my_schema.my_table", + "column_masks": [ + { + "column_name": "ssn", + "column_type": "STRING", + "mask_functions": ["my_catalog.my_schema.mask_ssn"] + } + ], + "row_filters": [ + { + "function_name": "my_catalog.my_schema.is_not_eu_region", + "input_column_names": ["region"] + } + ] +} +``` + +#### `get_masking_functions` + +List masking UDFs in a schema. + +```python +get_masking_functions( + catalog: str, + schema: str, +) +``` + +**Returns:** +```json +{ + "success": true, + "catalog": "my_catalog", + "schema": "my_schema", + "functions": [ + { + "name": "mask_ssn", + "full_name": "my_catalog.my_schema.mask_ssn", + "return_type": "STRING", + "comment": "Masks SSN showing only last 4 digits", + "is_deterministic": true + } + ] +} +``` + +#### `get_column_tags_api` + +Get column-level tags via the Tags API. + +```python +get_column_tags_api( + catalog: str, + schema: str, + table: str, +) +``` + +#### `get_schema_info` / `get_catalog_info` + +Get schema or catalog metadata via Unity Catalog API. + +```python +get_schema_info(catalog: str, schema: str) +get_catalog_info(catalog: str) +``` + +#### `list_table_policies_in_schema` + +List all tables in a schema with their column masks and row filters. + +```python +list_table_policies_in_schema( + catalog: str, + schema: str, +) +``` + +### Preview Tool (Human-in-the-Loop Gate) + +#### `preview_policy_changes` + +Preview policy changes without executing. This is the critical human-in-the-loop gate. + +```python +preview_policy_changes( + action: str, # "CREATE", "UPDATE", or "DELETE" + policy_name: str, + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, + policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (for CREATE) + to_principals: list = None, + except_principals: list = None, + function_name: str = None, + tag_name: str = None, + tag_value: str = None, + comment: str = None, +) +``` + +**Returns:** +```json +{ + "success": true, + "action": "CREATE", + "preview": { + "policy_name": "mask_pii_ssn", + "policy_type": "COLUMN_MASK", + "securable": "SCHEMA my_catalog.my_schema", + "to_principals": ["analysts"], + "except_principals": ["gov_admin"], + "function": "my_catalog.my_schema.mask_ssn", + "tag_match": "hasTagValue('pii_type', 'ssn')", + "equivalent_sql": "CREATE OR REPLACE POLICY mask_pii_ssn\nON SCHEMA my_catalog.my_schema\n..." + }, + "warnings": [], + "requires_approval": true, + "approval_token": "a1b2c3...:eyJhY3Rpb24i...", + "message": "Review the preview above. Reply 'approve' to execute, passing the approval_token." +} +``` + +**Usage in workflow:** + +1. Call `preview_policy_changes` with proposed changes +2. Present preview to user (includes `approval_token`) +3. Wait for explicit approval +4. Pass `approval_token` to `create_fgac_policy`, `update_fgac_policy`, or `delete_fgac_policy` + +### Management Tools + +#### `create_fgac_policy` + +Create a new FGAC policy (COLUMN_MASK or ROW_FILTER). + +```python +create_fgac_policy( + policy_name: str, + policy_type: str, # "COLUMN_MASK" or "ROW_FILTER" + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, + function_name: str, # Fully qualified UDF name + to_principals: list, # Users/groups the policy applies to + tag_name: str, # Tag key to match + approval_token: str, # Token from preview_policy_changes() + tag_value: str = None, # Tag value (optional, uses hasTag vs hasTagValue) + except_principals: list = None, # Excluded principals (gov_admin auto-added) + comment: str = "", +) +``` + +**Returns:** +```json +{ + "success": true, + "policy_name": "mask_pii_ssn", + "action": "created", + "details": { + "policy_type": "COLUMN_MASK", + "on_securable": "SCHEMA my_catalog.my_schema", + "function": "my_catalog.my_schema.mask_ssn", + "to_principals": ["analysts"], + "except_principals": ["gov_admin"] + } +} +``` + +#### `update_fgac_policy` + +Update an existing policy's principals or comment. + +```python +update_fgac_policy( + policy_name: str, + securable_type: str, + securable_fullname: str, + approval_token: str, # Token from preview_policy_changes() + to_principals: list = None, + except_principals: list = None, + comment: str = None, +) +``` + +**Returns:** +```json +{ + "success": true, + "policy_name": "mask_pii_ssn", + "action": "updated", + "changes": { + "to_principals": ["analysts", "data_scientists", "new_team"], + "comment": "Updated: added new_team" + } +} +``` + +> **Note:** To change the UDF, tag matching, or scope, drop and recreate the policy. + +#### `delete_fgac_policy` + +Delete an FGAC policy. + +```python +delete_fgac_policy( + policy_name: str, + securable_type: str, + securable_fullname: str, + approval_token: str, # Token from preview_policy_changes() +) +``` + +**Returns:** +```json +{ + "success": true, + "policy_name": "mask_pii_ssn", + "action": "deleted" +} +``` + +--- + +## Human-in-the-Loop Workflow Example + +Complete workflow using MCP tools: + +``` +Step 1: ANALYZE +───────────────────────────────── +→ list_fgac_policies(securable_type="SCHEMA", securable_fullname="prod.finance") +→ get_column_tags_api(catalog="prod", schema="finance", table="customers") +→ get_masking_functions(catalog="prod", schema="finance") + +Step 2: RECOMMEND +───────────────────────────────── +→ Agent generates policy recommendations based on discovered tags and UDFs + +Step 3: PREVIEW (returns approval_token) +───────────────────────────────── +→ result = preview_policy_changes( + action="CREATE", + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance", + policy_type="COLUMN_MASK", + function_name="prod.finance.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn" + ) +→ token = result["approval_token"] + +Step 4: APPROVE +───────────────────────────────── +→ Human reviews preview and replies "approve" + +Step 5: EXECUTE (pass approval_token) +───────────────────────────────── +→ create_fgac_policy( + policy_name="mask_ssn_finance", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="prod.finance", + function_name="prod.finance.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn", + approval_token=token + ) + +Step 6: VERIFY +───────────────────────────────── +→ get_fgac_policy( + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance" + ) +``` + +--- + +## Python SDK Direct Usage + +For writing custom code outside MCP tools, use the Databricks Python SDK directly. + +### Setup + +```python +from databricks.sdk import WorkspaceClient + +w = WorkspaceClient() # Auto-detects credentials +``` + +### SDK Types + +```python +from databricks.sdk.service.catalog import ( + ColumnMaskOptions, + MatchColumn, + PolicyInfo, + PolicyType, + RowFilterOptions, + SecurableType, +) +``` + +### List Policies + +```python +policies = w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname="my_catalog", + include_inherited=True, +) + +for p in policies: + print(f"{p.name}: {p.policy_type} on {p.on_securable_fullname}") + +# Filter by type +column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] +row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] +``` + +### Get Policy + +```python +policy = w.policies.get_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) + +print(f"Policy: {policy.name}") +print(f"Type: {policy.policy_type}") +print(f"Principals: {policy.to_principals}") +print(f"Except: {policy.except_principals}") +``` + +### Create Column Mask Policy + +```python +policy_info = PolicyInfo( + name="mask_pii_ssn_schema", + policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, + on_securable_type=SecurableType.SCHEMA, + on_securable_fullname="my_catalog.my_schema", + for_securable_type=SecurableType.TABLE, + to_principals=["analysts", "data_scientists"], + except_principals=["gov_admin"], + comment="Mask SSN columns in schema", + column_mask=ColumnMaskOptions( + function_name="my_catalog.my_schema.mask_ssn", + on_column="masked_col", + ), + match_columns=[ + MatchColumn( + alias="masked_col", + condition="hasTagValue('pii_type', 'ssn')", + ) + ], +) +policy = w.policies.create_policy(policy_info=policy_info) +``` + +Change `on_securable_type` and `on_securable_fullname` to target catalog or table scope. + +### Create Row Filter Policy + +```python +policy_info = PolicyInfo( + name="filter_eu_data_schema", + policy_type=PolicyType.POLICY_TYPE_ROW_FILTER, + on_securable_type=SecurableType.SCHEMA, + on_securable_fullname="my_catalog.my_schema", + for_securable_type=SecurableType.TABLE, + to_principals=["us_team"], + except_principals=["gov_admin"], + comment="Filter EU rows in schema", + row_filter=RowFilterOptions( + function_name="my_catalog.my_schema.is_not_eu_region", + ), + match_columns=[ + MatchColumn( + alias="filter_col", + condition="hasTagValue('region', 'eu')", + ) + ], +) +policy = w.policies.create_policy(policy_info=policy_info) +``` + +### Update Policy + +Update principals or comment on an existing policy. + +```python +update_info = PolicyInfo( + to_principals=["analysts", "data_scientists", "new_team"], + except_principals=["gov_admin", "senior_admins"], + comment="Updated: added new_team to masked principals", + for_securable_type=SecurableType.TABLE, + policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, +) +updated = w.policies.update_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + policy_info=update_info, + update_mask="to_principals,except_principals,comment", +) +``` + +> **Note:** To change the UDF, tag matching, or scope, you must drop and recreate the policy. `update_policy` only modifies principals and comment via `update_mask`. + +### Delete Policy + +```python +w.policies.delete_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) +``` + +--- + +## Error Handling + +```python +from databricks.sdk.errors import NotFound, PermissionDenied, BadRequest + +try: + policy = w.policies.get_policy( + name="nonexistent_policy", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + ) +except NotFound: + print("Policy not found") +except PermissionDenied: + print("Insufficient permissions - need MANAGE on securable") +except BadRequest as e: + print(f"Invalid request: {e}") +``` + +| Error | Cause | Solution | +|-------|-------|----------| +| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | +| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag config in UI | +| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | +| `POLICY_ALREADY_EXISTS` | Duplicate policy name | Use different name or delete existing first | +| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission | +| `INVALID_SECURABLE_TYPE` | Wrong securable type string | Use `"CATALOG"`, `"SCHEMA"`, or `"TABLE"` | + +--- + +## Common Patterns + +### Policy Summary with Counts + +```python +def get_policy_summary(w, catalog: str): + """Get a summary of all FGAC policies in a catalog.""" + policies = list(w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname=catalog, + include_inherited=True, + )) + + column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] + row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] + + return { + "total": len(policies), + "column_masks": len(column_masks), + "row_filters": len(row_filters), + "policies": [p.as_dict() for p in policies], + } +``` + +### Check Policy Quotas Before Creating + +```python +def check_quota(w, securable_type: str, securable_fullname: str): + """Check if policy quota allows creating a new policy.""" + quotas = {"CATALOG": 10, "SCHEMA": 10, "TABLE": 5} + max_policies = quotas.get(securable_type, 10) + + existing = list(w.policies.list_policies( + on_securable_type=securable_type, + on_securable_fullname=securable_fullname, + )) + + # Count only direct policies (not inherited) + direct = [p for p in existing + if p.on_securable_fullname == securable_fullname] + + return { + "current": len(direct), + "max": max_policies, + "can_create": len(direct) < max_policies, + } +``` + +### Async Usage (FastAPI, etc.) + +The Databricks SDK is synchronous. In async applications, wrap calls with `asyncio.to_thread()`: + +```python +import asyncio + +async def list_policies_async(w, catalog: str): + return await asyncio.to_thread( + lambda: list(w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname=catalog, + include_inherited=True, + )) + ) +``` diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py index c75ff1a6..b1a01234 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py @@ -1,10 +1,10 @@ """ -Unity Catalog - ABAC Policy Operations +Unity Catalog - FGAC Policy Operations -Functions for managing Attribute-Based Access Control (ABAC) policies +Functions for managing Fine-Grained Access Control (FGAC) policies via the Databricks Python SDK (WorkspaceClient.policies). -ABAC policies bind governed tags to masking UDFs or row filters, scoped to +FGAC policies bind governed tags to masking UDFs or row filters, scoped to catalogs, schemas, or tables, and targeted at specific principals. Policy quotas: @@ -13,8 +13,14 @@ - Table: 5 policies max """ +import base64 +import hashlib +import hmac +import json import logging +import os import re +import time from typing import Any, Dict, List, Optional from ..auth import get_workspace_client @@ -26,6 +32,82 @@ _VALID_SECURABLE_TYPES = {"CATALOG", "SCHEMA", "TABLE"} _VALID_POLICY_TYPES = {"COLUMN_MASK", "ROW_FILTER"} _POLICY_QUOTAS = {"CATALOG": 10, "SCHEMA": 10, "TABLE": 5} + +_APPROVAL_SECRET = os.environ.get("FGAC_APPROVAL_SECRET", "fgac-default-dev-secret") +_ADMIN_GROUP = os.environ.get("FGAC_ADMIN_GROUP", "admins") +_TOKEN_TTL_SECONDS = 600 # 10 minutes +def _generate_approval_token(params: dict) -> str: + """Generate an HMAC-based approval token binding preview params to a timestamp.""" + clean_params = {k: v for k, v in params.items() if v is not None} + clean_params["timestamp"] = int(time.time()) + payload = json.dumps(clean_params, sort_keys=True) + signature = hmac.new( + _APPROVAL_SECRET.encode(), payload.encode(), hashlib.sha256 + ).hexdigest() + b64_payload = base64.b64encode(payload.encode()).decode() + return f"{signature}:{b64_payload}" + + +def _validate_approval_token(approval_token: str, current_params: dict) -> None: + """Validate an approval token against current parameters. + + Raises ValueError if the token is invalid, expired, or params don't match. + """ + try: + signature, b64_payload = approval_token.split(":", 1) + except (ValueError, AttributeError): + raise ValueError("Invalid or expired approval token") + + try: + payload = base64.b64decode(b64_payload).decode() + except Exception: + raise ValueError("Invalid or expired approval token") + + expected_sig = hmac.new( + _APPROVAL_SECRET.encode(), payload.encode(), hashlib.sha256 + ).hexdigest() + if not hmac.compare_digest(signature, expected_sig): + raise ValueError("Invalid or expired approval token") + + try: + token_data = json.loads(payload) + except json.JSONDecodeError: + raise ValueError("Invalid or expired approval token") + + ts = token_data.pop("timestamp", 0) + if abs(time.time() - ts) > _TOKEN_TTL_SECONDS: + raise ValueError("Invalid or expired approval token") + + # Map preview action to mutation action + action_map = {"CREATE": "create", "UPDATE": "update", "DELETE": "delete"} + token_action = token_data.pop("action", None) + current_action = current_params.pop("action", None) + if token_action and current_action: + if action_map.get(token_action) != current_action: + raise ValueError("Invalid or expired approval token") + + # Compare remaining params + clean_current = {k: v for k, v in current_params.items() if v is not None} + if token_data != clean_current: + raise ValueError("Invalid or expired approval token") + + +def _check_admin_group() -> dict: + """Verify the current user belongs to the configured admin group. + + Raises PermissionError if user is not a member. + """ + w = get_workspace_client() + me = w.current_user.me() + group_names = [g.display for g in (me.groups or []) if g.display] + if _ADMIN_GROUP not in group_names: + raise PermissionError( + f"User '{me.user_name}' is not a member of admin group '{_ADMIN_GROUP}'. " + f"FGAC mutating operations require membership in the '{_ADMIN_GROUP}' group." + ) + return {"is_admin": True, "user": me.user_name, "admin_group": _ADMIN_GROUP} + + def _validate_identifier(name: str) -> str: """Validate a SQL identifier to prevent injection.""" if not _IDENTIFIER_PATTERN.match(name): @@ -98,14 +180,14 @@ def _policy_to_dict(policy: Any) -> Dict[str, Any]: # --------------------------------------------------------------------------- -def list_abac_policies( +def list_fgac_policies( securable_type: str, securable_fullname: str, include_inherited: bool = True, policy_type: Optional[str] = None, ) -> Dict[str, Any]: """ - List ABAC policies on a catalog, schema, or table. + List FGAC policies on a catalog, schema, or table. Args: securable_type: "CATALOG", "SCHEMA", or "TABLE" @@ -148,13 +230,13 @@ def list_abac_policies( } -def get_abac_policy( +def get_fgac_policy( policy_name: str, securable_type: str, securable_fullname: str, ) -> Dict[str, Any]: """ - Get details for a specific ABAC policy by name. + Get details for a specific FGAC policy by name. Args: policy_name: Policy name @@ -189,7 +271,7 @@ def get_table_policies( Get column masks and row filters applied to a specific table. Uses the Unity Catalog REST API directly to retrieve effective - column masks and row filters, including those derived from ABAC policies. + column masks and row filters, including those derived from FGAC policies. Args: catalog: Catalog name @@ -252,7 +334,7 @@ def get_masking_functions( List masking UDFs in a schema. Retrieves all user-defined functions in the specified schema and returns - their metadata for use in ABAC policy creation. + their metadata for use in FGAC policy creation. Args: catalog: Catalog name @@ -482,13 +564,38 @@ def preview_policy_changes( } warnings.append("This action is irreversible. The policy will be permanently removed.") + # Generate approval token binding these params + token_params = { + "action": action, + "policy_name": policy_name, + "securable_type": stype, + "securable_fullname": securable_fullname, + } + if policy_type: + token_params["policy_type"] = _validate_policy_type(policy_type) + if to_principals is not None: + token_params["to_principals"] = to_principals + if except_principals is not None: + token_params["except_principals"] = safe_except + if function_name is not None: + token_params["function_name"] = function_name + if tag_name is not None: + token_params["tag_name"] = tag_name + if tag_value is not None: + token_params["tag_value"] = tag_value + if comment is not None: + token_params["comment"] = comment + + approval_token = _generate_approval_token(token_params) + return { "success": True, "action": action, "preview": preview, "warnings": warnings, "requires_approval": True, - "message": "Review the preview above. Reply 'approve' to execute.", + "approval_token": approval_token, + "message": "Review the preview above. Reply 'approve' to execute, passing the approval_token.", } @@ -497,7 +604,7 @@ def preview_policy_changes( # --------------------------------------------------------------------------- -def create_abac_policy( +def create_fgac_policy( policy_name: str, policy_type: str, securable_type: str, @@ -505,12 +612,16 @@ def create_abac_policy( function_name: str, to_principals: List[str], tag_name: str, + approval_token: str, tag_value: Optional[str] = None, except_principals: Optional[List[str]] = None, comment: str = "", ) -> Dict[str, Any]: """ - Create a new ABAC policy (COLUMN_MASK or ROW_FILTER). + Create a new FGAC policy (COLUMN_MASK or ROW_FILTER). + + Requires a valid approval_token from preview_policy_changes() and + the caller must be a member of the configured admin group. Args: policy_name: Policy name (must be unique within the securable scope) @@ -520,6 +631,7 @@ def create_abac_policy( function_name: Fully qualified UDF name (e.g., "catalog.schema.mask_ssn") to_principals: Users/groups the policy applies to tag_name: Tag key to match columns on + approval_token: Token from preview_policy_changes() tag_value: Tag value to match (optional; omit for hasTag vs hasTagValue) except_principals: Excluded principals comment: Policy description @@ -527,8 +639,27 @@ def create_abac_policy( Returns: Dict with creation status and policy details """ + _check_admin_group() ptype = _validate_policy_type(policy_type) stype = _validate_securable_type(securable_type) + current_params = { + "action": "create", + "policy_name": policy_name, + "policy_type": ptype, + "securable_type": stype, + "securable_fullname": securable_fullname, + "function_name": function_name, + "to_principals": to_principals, + "tag_name": tag_name, + } + if tag_value is not None: + current_params["tag_value"] = tag_value + if except_principals is not None: + current_params["except_principals"] = list(except_principals) + if comment: + current_params["comment"] = comment + _validate_approval_token(approval_token, current_params) + _validate_identifier(securable_fullname) _validate_identifier(function_name) @@ -589,16 +720,20 @@ def create_abac_policy( } -def update_abac_policy( +def update_fgac_policy( policy_name: str, securable_type: str, securable_fullname: str, + approval_token: str, to_principals: Optional[List[str]] = None, except_principals: Optional[List[str]] = None, comment: Optional[str] = None, ) -> Dict[str, Any]: """ - Update an existing ABAC policy's principals or comment. + Update an existing FGAC policy's principals or comment. + + Requires a valid approval_token from preview_policy_changes() and + the caller must be a member of the configured admin group. Only principals and comment can be modified. To change the UDF, tag matching, or scope, drop and recreate the policy. @@ -607,6 +742,7 @@ def update_abac_policy( policy_name: Policy name securable_type: "CATALOG", "SCHEMA", or "TABLE" securable_fullname: Fully qualified securable name + approval_token: Token from preview_policy_changes() to_principals: Updated list of principals the policy applies to except_principals: Updated excluded principals comment: Updated policy description @@ -614,7 +750,22 @@ def update_abac_policy( Returns: Dict with update status and applied changes """ + _check_admin_group() stype = _validate_securable_type(securable_type) + current_params = { + "action": "update", + "policy_name": policy_name, + "securable_type": stype, + "securable_fullname": securable_fullname, + } + if to_principals is not None: + current_params["to_principals"] = to_principals + if except_principals is not None: + current_params["except_principals"] = list(except_principals) + if comment is not None: + current_params["comment"] = comment + _validate_approval_token(approval_token, current_params) + _validate_identifier(securable_fullname) from databricks.sdk.service.catalog import PolicyInfo @@ -670,13 +821,17 @@ def update_abac_policy( } -def delete_abac_policy( +def delete_fgac_policy( policy_name: str, securable_type: str, securable_fullname: str, + approval_token: str, ) -> Dict[str, Any]: """ - Delete an ABAC policy. + Delete an FGAC policy. + + Requires a valid approval_token from preview_policy_changes() and + the caller must be a member of the configured admin group. This is irreversible. The policy will be permanently removed. @@ -684,12 +839,21 @@ def delete_abac_policy( policy_name: Policy name securable_type: "CATALOG", "SCHEMA", or "TABLE" securable_fullname: Fully qualified securable name + approval_token: Token from preview_policy_changes() Returns: Dict with deletion status """ + _check_admin_group() stype = _validate_securable_type(securable_type) _validate_identifier(securable_fullname) + current_params = { + "action": "delete", + "policy_name": policy_name, + "securable_type": stype, + "securable_fullname": securable_fullname, + } + _validate_approval_token(approval_token, current_params) w = get_workspace_client() w.policies.delete_policy( diff --git a/databricks-tools-core/tests/conftest.py b/databricks-tools-core/tests/conftest.py index ab854a2b..6a8d4fbc 100644 --- a/databricks-tools-core/tests/conftest.py +++ b/databricks-tools-core/tests/conftest.py @@ -117,7 +117,9 @@ def warehouse_id(workspace_client: WorkspaceClient) -> str: Get a running SQL warehouse for tests. Prefers shared endpoints, falls back to any running warehouse. + Starts a stopped serverless warehouse if none are running. """ + import time from databricks.sdk.service.sql import State warehouses = list(workspace_client.warehouses.list()) @@ -134,6 +136,18 @@ def warehouse_id(workspace_client: WorkspaceClient) -> str: logger.info(f"Using warehouse: {w.name} ({w.id})") return w.id + # Start a stopped serverless warehouse + for w in warehouses: + if w.state == State.STOPPED and "serverless" in (w.name or "").lower(): + logger.info(f"Starting stopped serverless warehouse: {w.name} ({w.id})") + workspace_client.warehouses.start(w.id) + for _ in range(30): + wh = workspace_client.warehouses.get(w.id) + if wh.state == State.RUNNING: + logger.info(f"Warehouse started: {w.name} ({w.id})") + return w.id + time.sleep(10) + # No running warehouse found pytest.skip("No running SQL warehouse available for tests") diff --git a/databricks-tools-core/tests/integration/unity_catalog/conftest.py b/databricks-tools-core/tests/integration/unity_catalog/conftest.py index 7cd244b6..dbff949b 100644 --- a/databricks-tools-core/tests/integration/unity_catalog/conftest.py +++ b/databricks-tools-core/tests/integration/unity_catalog/conftest.py @@ -235,14 +235,14 @@ def register(full_function_name: str): @pytest.fixture(scope="function") def cleanup_policies(): """ - Track and cleanup ABAC policies created during tests. + Track and cleanup FGAC policies created during tests. Usage: def test_create_policy(cleanup_policies): - create_abac_policy(...) + create_fgac_policy(...) cleanup_policies((policy_name, securable_type, securable_fullname)) """ - from databricks_tools_core.unity_catalog import delete_abac_policy + from databricks_tools_core.auth import get_workspace_client policies_to_cleanup = [] @@ -254,13 +254,15 @@ def register(policy_tuple: tuple): yield register + # Use SDK directly to bypass approval token guardrails during cleanup + w = get_workspace_client() for name, stype, sfullname in policies_to_cleanup: try: logger.info(f"Cleaning up policy: {name}") - delete_abac_policy( - policy_name=name, - securable_type=stype, - securable_fullname=sfullname, + w.policies.delete_policy( + on_securable_type=stype, + on_securable_fullname=sfullname, + name=name, ) except Exception as e: logger.warning(f"Failed to cleanup policy {name}: {e}") diff --git a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py index cea877bc..eae1f3e4 100644 --- a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py +++ b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py @@ -1,23 +1,24 @@ """ -Integration tests for Unity Catalog ABAC Policy operations. +Integration tests for Unity Catalog FGAC Policy operations. -Tests the abac_policies module functions: -- list_abac_policies -- get_abac_policy +Tests the fgac_policies module functions: +- list_fgac_policies +- get_fgac_policy - get_table_policies - get_masking_functions - check_policy_quota - preview_policy_changes -- create_abac_policy / update_abac_policy / delete_abac_policy +- create_fgac_policy / update_fgac_policy / delete_fgac_policy Governed Tags ------------- -ABAC policies require **governed tags** (not regular metadata tags). +FGAC policies require **governed tags** (not regular metadata tags). The CRUD tests automatically create and clean up governed tags via the Tag Policies API (``w.tag_policies``). No manual UI setup is needed. """ import logging +import os import time import pytest @@ -28,16 +29,17 @@ create_security_function, set_tags, ) -from databricks_tools_core.unity_catalog.abac_policies import ( - list_abac_policies, - get_abac_policy, +from databricks_tools_core.unity_catalog.fgac_policies import ( + list_fgac_policies, + get_fgac_policy, get_table_policies, get_masking_functions, check_policy_quota, preview_policy_changes, - create_abac_policy, - update_abac_policy, - delete_abac_policy, + create_fgac_policy, + update_fgac_policy, + delete_fgac_policy, + _check_admin_group, ) logger = logging.getLogger(__name__) @@ -51,12 +53,12 @@ @pytest.mark.integration -class TestListAbacPolicies: - """Tests for listing ABAC policies.""" +class TestListFgacPolicies: + """Tests for listing FGAC policies.""" def test_list_policies_on_catalog(self, test_catalog: str): """Should list policies on a catalog (may be empty).""" - result = list_abac_policies( + result = list_fgac_policies( securable_type="CATALOG", securable_fullname=test_catalog, ) @@ -71,7 +73,7 @@ def test_list_policies_on_catalog(self, test_catalog: str): def test_list_policies_on_schema(self, test_catalog: str, uc_test_schema: str): """Should list policies on a schema.""" full_name = f"{test_catalog}.{uc_test_schema}" - result = list_abac_policies( + result = list_fgac_policies( securable_type="SCHEMA", securable_fullname=full_name, ) @@ -83,7 +85,7 @@ def test_list_policies_on_schema(self, test_catalog: str, uc_test_schema: str): def test_list_policies_with_type_filter(self, test_catalog: str): """Should filter policies by type.""" - result = list_abac_policies( + result = list_fgac_policies( securable_type="CATALOG", securable_fullname=test_catalog, policy_type="COLUMN_MASK", @@ -96,7 +98,7 @@ def test_list_policies_with_type_filter(self, test_catalog: str): def test_list_policies_without_inherited(self, test_catalog: str): """Should list only direct policies when include_inherited=False.""" - result = list_abac_policies( + result = list_fgac_policies( securable_type="CATALOG", securable_fullname=test_catalog, include_inherited=False, @@ -336,13 +338,13 @@ def test_preview_update(self): @pytest.mark.integration -class TestAbacPolicyValidation: - """Tests for input validation in ABAC policy functions.""" +class TestFgacPolicyValidation: + """Tests for input validation in FGAC policy functions.""" def test_invalid_securable_type_raises(self): """Should raise ValueError for invalid securable type.""" with pytest.raises(ValueError) as exc_info: - list_abac_policies( + list_fgac_policies( securable_type="INVALID", securable_fullname="test", ) @@ -440,7 +442,7 @@ def test_create_preview_missing_principals_raises(self): def test_invalid_identifier_raises(self): """Should raise ValueError for SQL injection attempts.""" with pytest.raises(ValueError) as exc_info: - list_abac_policies( + list_fgac_policies( securable_type="CATALOG", securable_fullname="DROP TABLE; --", ) @@ -448,13 +450,213 @@ def test_invalid_identifier_raises(self): assert "invalid sql identifier" in str(exc_info.value).lower() +# --------------------------------------------------------------------------- +# Approval token enforcement tests +# --------------------------------------------------------------------------- + + +@pytest.mark.integration +class TestApprovalTokenEnforcement: + """Tests for approval token guardrails on mutating operations.""" + + def test_create_without_token_raises(self): + """create_fgac_policy without approval_token should raise TypeError.""" + with pytest.raises(TypeError): + create_fgac_policy( + policy_name="test_no_token", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="cat.sch", + function_name="cat.sch.fn", + to_principals=["analysts"], + tag_name="pii", + ) + + def test_create_with_invalid_token_raises(self): + """create_fgac_policy with an invalid token should raise ValueError.""" + with pytest.raises((ValueError, PermissionError)): + create_fgac_policy( + policy_name="test_bad_token", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="cat.sch", + function_name="cat.sch.fn", + to_principals=["analysts"], + tag_name="pii", + approval_token="garbage", + ) + + def test_preview_returns_approval_token(self): + """preview_policy_changes should return an approval_token.""" + result = preview_policy_changes( + action="CREATE", + policy_name="test_token_preview", + securable_type="SCHEMA", + securable_fullname="my_catalog.my_schema", + policy_type="COLUMN_MASK", + to_principals=["analysts"], + function_name="my_catalog.my_schema.mask_ssn", + tag_name="pii_type", + tag_value="ssn", + ) + + assert result["success"] is True + assert "approval_token" in result + assert isinstance(result["approval_token"], str) + assert ":" in result["approval_token"] + logger.info("Preview returned approval token") + + def test_full_preview_then_create_workflow( + self, + test_catalog: str, + uc_test_schema: str, + uc_test_table: str, + unique_name: str, + warehouse_id: str, + cleanup_functions, + cleanup_policies, + ): + """Should preview, extract token, then create with token (happy path).""" + full_schema = f"{test_catalog}.{uc_test_schema}" + policy_name = f"{UC_TEST_PREFIX}_tok_{unique_name}" + tag_key = f"uc_test_tok_{unique_name}" + tag_value = "email" + + cleanup_policies((policy_name, "SCHEMA", full_schema)) + + TestFgacPolicyCRUD._create_governed_tag(tag_key, [tag_value]) + + try: + fn_name = f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_tok_fn_{unique_name}" + cleanup_functions(fn_name) + + create_security_function( + function_name=fn_name, + parameter_name="val", + parameter_type="STRING", + return_type="STRING", + function_body="RETURN CASE WHEN val IS NULL THEN NULL ELSE '***' END", + warehouse_id=warehouse_id, + ) + + set_tags( + object_type="column", + full_name=uc_test_table, + column_name="email", + tags={tag_key: tag_value}, + warehouse_id=warehouse_id, + ) + + # Preview to get token + preview = preview_policy_changes( + action="CREATE", + policy_name=policy_name, + securable_type="SCHEMA", + securable_fullname=full_schema, + policy_type="COLUMN_MASK", + to_principals=["account users"], + function_name=fn_name, + tag_name=tag_key, + tag_value=tag_value, + comment=f"Token test {unique_name}", + ) + token = preview["approval_token"] + + # Create with token + result = create_fgac_policy( + policy_name=policy_name, + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname=full_schema, + function_name=fn_name, + to_principals=["account users"], + tag_name=tag_key, + approval_token=token, + tag_value=tag_value, + comment=f"Token test {unique_name}", + ) + + assert result["success"] is True + assert result["action"] == "created" + logger.info("Full preview-then-create workflow passed") + + # Clean up via SDK directly (bypass guardrails) + w = get_workspace_client() + w.policies.delete_policy( + on_securable_type="SCHEMA", + on_securable_fullname=full_schema, + name=policy_name, + ) + + finally: + TestFgacPolicyCRUD._delete_governed_tag(tag_key) + + def test_token_with_mismatched_params_raises(self): + """Token from preview with name A should not work for create with name B.""" + preview = preview_policy_changes( + action="CREATE", + policy_name="policy_a", + securable_type="SCHEMA", + securable_fullname="cat.sch", + policy_type="COLUMN_MASK", + to_principals=["analysts"], + function_name="cat.sch.mask", + tag_name="pii", + ) + token = preview["approval_token"] + + with pytest.raises((ValueError, PermissionError)): + create_fgac_policy( + policy_name="policy_b", # Different name! + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="cat.sch", + function_name="cat.sch.mask", + to_principals=["analysts"], + tag_name="pii", + approval_token=token, + ) + + +# --------------------------------------------------------------------------- +# Admin group check tests +# --------------------------------------------------------------------------- + + +@pytest.mark.integration +class TestAdminGroupCheck: + """Tests for admin group membership verification.""" + + def test_admin_check_passes(self): + """Should pass for workspace admin user (test profile user).""" + result = _check_admin_group() + assert result["is_admin"] is True + assert result["user"] is not None + assert result["admin_group"] == "admins" + logger.info(f"Admin check passed for user: {result['user']}") + + def test_admin_check_custom_group_fails(self): + """Should raise PermissionError for a non-existent group.""" + import databricks_tools_core.unity_catalog.fgac_policies as fgac_mod + + original = fgac_mod._ADMIN_GROUP + try: + fgac_mod._ADMIN_GROUP = "nonexistent_group_xyz_12345" + with pytest.raises(PermissionError) as exc_info: + _check_admin_group() + assert "nonexistent_group_xyz_12345" in str(exc_info.value) + logger.info("Admin check correctly denied for non-existent group") + finally: + fgac_mod._ADMIN_GROUP = original + + # --------------------------------------------------------------------------- # CRUD lifecycle tests # --------------------------------------------------------------------------- @pytest.mark.integration -class TestAbacPolicyCRUD: +class TestFgacPolicyCRUD: """Tests for create, get, update, and delete policy operations. Each test creates its own governed tag via the Tag Policies API, @@ -476,7 +678,7 @@ def _create_governed_tag(tag_key: str, allowed_values: list[str]) -> None: ) logger.info(f"Created governed tag: {tag_key} (values={allowed_values})") - # Wait for governed tag to propagate to the ABAC policy system + # Wait for governed tag to propagate to the FGAC policy system logger.info("Waiting 30s for governed tag propagation...") time.sleep(30) @@ -538,9 +740,26 @@ def test_create_get_update_delete_column_mask_policy( ) logger.info(f"Tagged column email with {tag_key}={tag_value}") + # --- PREVIEW CREATE --- + logger.info(f"Previewing FGAC policy creation: {policy_name}") + create_preview = preview_policy_changes( + action="CREATE", + policy_name=policy_name, + securable_type="SCHEMA", + securable_fullname=full_schema, + policy_type="COLUMN_MASK", + to_principals=["account users"], + function_name=fn_name, + tag_name=tag_key, + tag_value=tag_value, + comment=f"Test policy {unique_name}", + ) + assert "approval_token" in create_preview + create_token = create_preview["approval_token"] + # --- CREATE --- - logger.info(f"Creating ABAC policy: {policy_name}") - create_result = create_abac_policy( + logger.info(f"Creating FGAC policy: {policy_name}") + create_result = create_fgac_policy( policy_name=policy_name, policy_type="COLUMN_MASK", securable_type="SCHEMA", @@ -548,6 +767,7 @@ def test_create_get_update_delete_column_mask_policy( function_name=fn_name, to_principals=["account users"], tag_name=tag_key, + approval_token=create_token, tag_value=tag_value, comment=f"Test policy {unique_name}", ) @@ -559,7 +779,7 @@ def test_create_get_update_delete_column_mask_policy( # --- GET --- logger.info(f"Getting policy: {policy_name}") - get_result = get_abac_policy( + get_result = get_fgac_policy( policy_name=policy_name, securable_type="SCHEMA", securable_fullname=full_schema, @@ -569,12 +789,24 @@ def test_create_get_update_delete_column_mask_policy( assert get_result["policy"]["name"] == policy_name logger.info(f"Policy details: {get_result['policy']}") + # --- PREVIEW UPDATE --- + logger.info(f"Previewing update for: {policy_name}") + update_preview = preview_policy_changes( + action="UPDATE", + policy_name=policy_name, + securable_type="SCHEMA", + securable_fullname=full_schema, + comment=f"Updated test policy {unique_name}", + ) + update_token = update_preview["approval_token"] + # --- UPDATE --- logger.info(f"Updating policy: {policy_name}") - update_result = update_abac_policy( + update_result = update_fgac_policy( policy_name=policy_name, securable_type="SCHEMA", securable_fullname=full_schema, + approval_token=update_token, comment=f"Updated test policy {unique_name}", ) @@ -584,7 +816,7 @@ def test_create_get_update_delete_column_mask_policy( logger.info(f"Policy updated: {update_result['changes']}") # --- Verify in list --- - list_result = list_abac_policies( + list_result = list_fgac_policies( securable_type="SCHEMA", securable_fullname=full_schema, ) @@ -592,12 +824,23 @@ def test_create_get_update_delete_column_mask_policy( assert policy_name in policy_names, f"Expected {policy_name} in {policy_names}" logger.info(f"Policy found in list ({list_result['policy_count']} total)") + # --- PREVIEW DELETE --- + logger.info(f"Previewing delete for: {policy_name}") + delete_preview = preview_policy_changes( + action="DELETE", + policy_name=policy_name, + securable_type="SCHEMA", + securable_fullname=full_schema, + ) + delete_token = delete_preview["approval_token"] + # --- DELETE --- logger.info(f"Deleting policy: {policy_name}") - delete_result = delete_abac_policy( + delete_result = delete_fgac_policy( policy_name=policy_name, securable_type="SCHEMA", securable_fullname=full_schema, + approval_token=delete_token, ) assert delete_result["success"] is True @@ -654,9 +897,25 @@ def test_create_row_filter_policy( ) logger.info(f"Tagged column department with {tag_key}={tag_value}") + # Preview create + logger.info(f"Previewing row filter policy creation: {policy_name}") + create_preview = preview_policy_changes( + action="CREATE", + policy_name=policy_name, + securable_type="SCHEMA", + securable_fullname=full_schema, + policy_type="ROW_FILTER", + to_principals=["account users"], + function_name=fn_name, + tag_name=tag_key, + tag_value=tag_value, + comment=f"Test row filter {unique_name}", + ) + create_token = create_preview["approval_token"] + # Create row filter policy logger.info(f"Creating row filter policy: {policy_name}") - result = create_abac_policy( + result = create_fgac_policy( policy_name=policy_name, policy_type="ROW_FILTER", securable_type="SCHEMA", @@ -664,6 +923,7 @@ def test_create_row_filter_policy( function_name=fn_name, to_principals=["account users"], tag_name=tag_key, + approval_token=create_token, tag_value=tag_value, comment=f"Test row filter {unique_name}", ) @@ -673,11 +933,21 @@ def test_create_row_filter_policy( assert result["details"]["policy_type"] == "ROW_FILTER" logger.info(f"Row filter policy created: {result['details']}") + # Preview delete + delete_preview = preview_policy_changes( + action="DELETE", + policy_name=policy_name, + securable_type="SCHEMA", + securable_fullname=full_schema, + ) + delete_token = delete_preview["approval_token"] + # Delete policy - delete_abac_policy( + delete_fgac_policy( policy_name=policy_name, securable_type="SCHEMA", securable_fullname=full_schema, + approval_token=delete_token, ) logger.info("Row filter policy deleted") From c2c7b362cf2caa3443d2b88f68927c0d26805b6f Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Wed, 11 Feb 2026 11:25:38 -0600 Subject: [PATCH 05/34] Add cross-catalog UDF support for FGAC policies Add udf_catalog/udf_schema params to MCP tool for discovering masking UDFs in a different catalog/schema than the policy scope. Update core function docstrings and skill docs with cross-catalog examples. --- .../7-fgac-overview.md | 342 ++++++++++++++ .../8-fgac-sql-generation.md | 420 ++++++++++++++++++ .../9-fgac-sdk-and-tools.md | 41 +- .../tools/fgac_policies.py | 10 +- .../7-fgac-overview.md | 342 ++++++++++++++ .../8-fgac-sql-generation.md | 420 ++++++++++++++++++ .../9-fgac-sdk-and-tools.md | 41 +- .../unity_catalog/fgac_policies.py | 7 +- 8 files changed, 1614 insertions(+), 9 deletions(-) create mode 100644 .claude/skills/databricks-unity-catalog/7-fgac-overview.md create mode 100644 .claude/skills/databricks-unity-catalog/8-fgac-sql-generation.md create mode 100644 databricks-skills/databricks-unity-catalog/7-fgac-overview.md create mode 100644 databricks-skills/databricks-unity-catalog/8-fgac-sql-generation.md diff --git a/.claude/skills/databricks-unity-catalog/7-fgac-overview.md b/.claude/skills/databricks-unity-catalog/7-fgac-overview.md new file mode 100644 index 00000000..b692eea0 --- /dev/null +++ b/.claude/skills/databricks-unity-catalog/7-fgac-overview.md @@ -0,0 +1,342 @@ +# FGAC Policy Governance Overview + +Guidance for Fine-Grained Access Control (FGAC) policies in Databricks Unity Catalog. Covers governed tags, tag assignments, masking UDFs, CREATE/DROP POLICY syntax, and the human-in-the-loop governance workflow. + +**Databricks Docs:** +- FGAC overview: https://docs.databricks.com/data-governance/unity-catalog/abac/ +- FGAC policies: https://docs.databricks.com/data-governance/unity-catalog/abac/policies +- FGAC tutorial: https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial + +## When to Use This Skill + +Use this skill when: +- Creating or managing **FGAC policies** (column masks, row filters) +- Working with **governed tags** (creating via UI, applying via SQL) +- Building **masking UDFs** for PII protection (SSN, email, credit card, etc.) +- Implementing **human-in-the-loop governance** workflows +- Querying tag assignments via `information_schema` +- Managing policy lifecycle (create, update, delete, preview) + +## Reference Files + +| Topic | File | Description | +|-------|------|-------------| +| SQL Generation | [8-fgac-sql-generation.md](8-fgac-sql-generation.md) | SET/UNSET TAG, CREATE FUNCTION, CREATE/DROP POLICY, discovery queries | +| SDK & MCP Tools | [9-fgac-sdk-and-tools.md](9-fgac-sdk-and-tools.md) | Python SDK patterns and 12 MCP tools for policy management | + +--- + +## FGAC Workflow Overview + +FGAC policies in Databricks follow a 4-step setup: + +1. **Governed Tags** - Define classification taxonomy (UI only) +2. **Tag Assignments** - Apply tags to columns/tables via SQL +3. **Masking UDFs** - Create deterministic functions for data masking +4. **FGAC Policies** - Bind tags to UDFs with principal scoping + +``` +┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ +│ Governed Tags│───>│ Tag │───>│ Masking │───>│ FGAC │ +│ (UI only) │ │ Assignments │ │ UDFs │ │ Policies │ +└──────────────┘ └──────────────┘ └──────────────┘ └──────────────┘ +``` + +--- + +## IMPORTANT: SQL That Does NOT Exist + +These SQL commands do **not** exist in Databricks. Do not generate them. + +| Invalid SQL | What to use instead | +|---|---| +| `SHOW POLICIES` | REST API: `w.policies.list_policies()` | +| `DESCRIBE POLICY` | REST API: `w.policies.get_policy()` | +| `ALTER POLICY` | Drop and recreate the policy | +| `ALTER USER SET ATTRIBUTES` | SCIM API for user attributes | +| `SHOW USER ATTRIBUTES` | SCIM API for user attributes | + +--- + +## Step 1: Governed Tags + +Governed tags **cannot** be created via SQL. They must be created in the Databricks UI. + +### Creating a Governed Tag (UI Steps) + +1. Navigate to **Catalog** in the workspace +2. Select **Governed Tags** from the left panel +3. Click **Create governed tag** +4. Configure: + - **Tag Key**: e.g., `pii_type` + - **Allowed Values**: e.g., `ssn`, `email`, `phone`, `credit_card`, `address` + - **Description**: e.g., "PII classification for FGAC policies" + +> **Note:** Tag data is stored as plain text and may be replicated globally. Avoid sensitive information in tag names or values. + +**Docs:** https://docs.databricks.com/admin/governed-tags/ + +--- + +## Step 2: Applying Tags to Columns + +### Legacy Syntax (all versions) + +```sql +-- Set tag on column +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); + +-- Set tag on table +ALTER TABLE catalog.schema.table +SET TAGS ('data_classification' = 'confidential'); + +-- Remove tag +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name UNSET TAGS ('pii_type'); +``` + +### Modern Syntax (DBR 16.1+) + +```sql +SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; +SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; +SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; +SET TAG ON CATALOG catalog 'department' = 'finance'; + +UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; +``` + +### Querying Existing Tags + +```sql +-- Column tags +SELECT tag_name, tag_value, column_name +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; + +-- Table tags +SELECT tag_name, tag_value +FROM system.information_schema.table_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; +``` + +--- + +## Step 3: Masking UDFs + +Masking UDFs must be `DETERMINISTIC` and use simple `CASE` statements. No external calls or nested UDFs. + +```sql +-- Full mask: replaces all characters with * +CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Full masking - replaces all characters with *' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE REPEAT('*', LENGTH(value)) +END; + +-- Partial mask: show last 4 characters +CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Partial masking - shows last 4 characters' +RETURN CASE + WHEN value IS NULL THEN NULL + WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) + ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) +END; + +-- SSN mask: ***-**-XXXX format +CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks SSN showing only last 4 digits' +RETURN CASE + WHEN ssn IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 + THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) + ELSE '***-**-****' +END; + +-- Email mask: j***@example.com +CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks email showing first char and domain' +RETURN CASE + WHEN email IS NULL THEN NULL + WHEN INSTR(email, '@') > 1 + THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) + ELSE '***@***.***' +END; +``` + +**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices + +> **Cross-catalog UDFs:** Masking UDFs do not need to be in the same catalog/schema as the policy scope. A common pattern is a shared governance schema (e.g., `governance.masking_udfs`) containing all masking functions, referenced by policies across multiple catalogs. The UDF name in a policy is always fully qualified (e.g., `governance.masking_udfs.mask_ssn`). + +--- + +## Step 4: FGAC Policies + +Policies are scoped to a **catalog**, **schema**, or **table**. `FOR TABLES` is always present. + +### Column Mask Policy + +```sql +-- Catalog level — masks matching columns in ALL tables in the catalog +CREATE OR REPLACE POLICY mask_pii_catalog +ON CATALOG my_catalog +COMMENT 'Mask PII columns catalog-wide' +COLUMN MASK my_catalog.my_schema.mask_partial +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Schema level — masks matching columns in all tables in the schema +CREATE OR REPLACE POLICY mask_pii_schema +ON SCHEMA my_catalog.my_schema +COMMENT 'Mask PII columns in schema' +COLUMN MASK my_catalog.my_schema.mask_partial +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Table level — masks matching columns on a single table +CREATE OR REPLACE POLICY mask_pii_table +ON TABLE my_catalog.my_schema.my_table +COMMENT 'Mask PII columns on specific table' +COLUMN MASK my_catalog.my_schema.mask_partial +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; +``` + +### Row Filter Policy + +```sql +-- Catalog level — filters rows in ALL tables in the catalog +CREATE OR REPLACE POLICY filter_eu_catalog +ON CATALOG my_catalog +COMMENT 'Filter EU rows catalog-wide' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); + +-- Schema level — filters rows in all tables in the schema +CREATE OR REPLACE POLICY filter_eu_schema +ON SCHEMA my_catalog.my_schema +COMMENT 'Filter EU rows in schema' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); + +-- Table level — filters rows on a single table +CREATE OR REPLACE POLICY filter_eu_table +ON TABLE my_catalog.my_schema.my_table +COMMENT 'Filter EU rows on specific table' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); +``` + +### Drop Policy + +```sql +-- Drop at each scope level +DROP POLICY mask_pii_catalog ON CATALOG my_catalog; +DROP POLICY mask_pii_schema ON SCHEMA my_catalog.my_schema; +DROP POLICY mask_pii_table ON TABLE my_catalog.my_schema.my_table; +``` + +### CRITICAL: Always Exclude `gov_admin` + +Every FGAC policy **MUST** include `EXCEPT \`gov_admin\`` to protect administrator access. Without this, admins could be locked out of data. + +### Policy Quotas + +| Scope | Max Policies | +|-------|-------------| +| Per Catalog | 10 | +| Per Schema | 10 | +| Per Table | 5 | + +https://docs.databricks.com/gcp/en/data-governance/unity-catalog/abac/policies#policy-quotas +--- + +## Human-in-the-Loop Governance Workflow + +FGAC policy changes should follow a governed workflow: + +``` +ANALYZE → RECOMMEND → PREVIEW → APPROVE → EXECUTE → VERIFY + │ │ │ │ │ │ + ▼ ▼ ▼ ▼ ▼ ▼ + Discover Generate Show SQL Human Run SQL Confirm + current policy & impact confirms or SDK changes + state proposals preview changes call applied +``` + +1. **ANALYZE**: Discover current tags, policies, and UDFs +2. **RECOMMEND**: Generate policy proposals based on requirements +3. **PREVIEW**: Use `preview_policy_changes` to show exact SQL and impact +4. **APPROVE**: Human reviews and explicitly approves +5. **EXECUTE**: Create/update/delete policies via SDK or SQL +6. **VERIFY**: Confirm policies are applied correctly + +**Never auto-execute policy changes.** Always preview and wait for human approval. + +--- + +## Common Errors + +| Error | Cause | Solution | +|-------|-------|----------| +| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | +| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag configuration in UI | +| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | +| `POLICY_ALREADY_EXISTS` | Policy name conflict | Use `CREATE OR REPLACE POLICY` | +| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission to policy creator | +| `SHOW POLICIES is not supported` | Used invalid SQL | Use REST API `w.policies.list_policies()` instead | + +## Best Practices + +1. **Use governed tags** (not ad-hoc tags) for FGAC policy matching +2. **Always include `EXCEPT \`gov_admin\``** in every policy +3. **Use deterministic UDFs** with simple CASE statements +4. **Preview before executing** any policy change +5. **Start at schema scope** and narrow to table only when needed +6. **Name policies descriptively**: `mask_{what}_{scope}` or `filter_{what}_{scope}` +7. **Test UDFs independently** before binding to policies +8. **Monitor policy quotas** — consolidate when approaching limits + +## Resources + +- [FGAC Overview](https://docs.databricks.com/data-governance/unity-catalog/abac/) +- [FGAC Policies](https://docs.databricks.com/data-governance/unity-catalog/abac/policies) +- [FGAC Tutorial](https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial) +- [UDF Best Practices](https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices) +- [Governed Tags](https://docs.databricks.com/admin/governed-tags/) +- [Column Masks & Row Filters](https://docs.databricks.com/data-governance/unity-catalog/filters-and-masks/) diff --git a/.claude/skills/databricks-unity-catalog/8-fgac-sql-generation.md b/.claude/skills/databricks-unity-catalog/8-fgac-sql-generation.md new file mode 100644 index 00000000..b1cf729e --- /dev/null +++ b/.claude/skills/databricks-unity-catalog/8-fgac-sql-generation.md @@ -0,0 +1,420 @@ +# SQL Generation Reference + +Pure SQL patterns for Unity Catalog FGAC governance operations. All SQL follows Databricks syntax. + +--- + +## Tag Operations + +### SET TAG on Column + +```sql +-- Legacy syntax (all versions) +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); + +-- Modern syntax (DBR 16.1+) +SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; +``` + +### SET TAG on Table + +```sql +-- Legacy syntax +ALTER TABLE catalog.schema.table +SET TAGS ('data_classification' = 'confidential'); + +-- Modern syntax +SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; +``` + +### SET TAG on Schema / Catalog + +```sql +SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; +SET TAG ON CATALOG my_catalog 'department' = 'finance'; +``` + +### UNSET TAG + +```sql +-- Column (legacy) +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name UNSET TAGS ('pii_type'); + +-- Column (modern) +UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; + +-- Table (legacy) +ALTER TABLE catalog.schema.table +UNSET TAGS ('data_classification'); + +-- Table (modern) +UNSET TAG ON TABLE catalog.schema.table 'data_classification'; +``` + +**Docs:** +- SET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-set-tag.html +- UNSET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-unset-tag.html + +--- + +## Tag Discovery Queries + +### Query Column Tags + +```sql +SELECT tag_name, tag_value, column_name +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; +``` + +### Query Table Tags + +```sql +SELECT tag_name, tag_value +FROM system.information_schema.table_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; +``` + +### All Tag Assignments in a Catalog + +```sql +-- Table-level tags +SELECT 'TABLE' as securable_type, + CONCAT(catalog_name, '.', schema_name, '.', table_name) as securable_name, + tag_name as tag_key, + tag_value +FROM system.information_schema.table_tags +WHERE catalog_name = 'my_catalog'; + +-- Column-level tags +SELECT 'COLUMN' as securable_type, + CONCAT(catalog_name, '.', schema_name, '.', table_name, '.', column_name) as securable_name, + tag_name as tag_key, + tag_value +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog'; +``` + +**Docs:** +- information_schema.column_tags: https://docs.databricks.com/sql/language-manual/information-schema/column_tags.html +- information_schema.table_tags: https://docs.databricks.com/sql/language-manual/information-schema/table_tags.html + +--- + +## Masking UDF Creation + +All masking UDFs must be `DETERMINISTIC` with simple `CASE` statements. No external calls or nested UDFs. + +### Generic Masking Strategies + +```sql +-- Full mask: replaces all characters with * +CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Full masking - replaces all characters with *' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE REPEAT('*', LENGTH(value)) +END; + +-- Partial mask: show last 4 characters +CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Partial masking - shows last 4 characters' +RETURN CASE + WHEN value IS NULL THEN NULL + WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) + ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) +END; + +-- Hash: SHA256 with version prefix +CREATE OR REPLACE FUNCTION catalog.schema.mask_hash(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Hash masking - SHA256 with version prefix' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE CONCAT('HASH_v1_', SUBSTRING(SHA2(CONCAT(value, ':v1'), 256), 1, 16)) +END; + +-- Redact: replace with [REDACTED] +CREATE OR REPLACE FUNCTION catalog.schema.mask_redact(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Redaction - replaces value with [REDACTED]' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE '[REDACTED]' +END; + +-- Nullify: always returns NULL +CREATE OR REPLACE FUNCTION catalog.schema.mask_nullify(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Nullify - always returns NULL' +RETURN NULL; +``` + +### Specialized Masking UDFs + +```sql +-- SSN: ***-**-XXXX +CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks SSN showing only last 4 digits in XXX-XX-XXXX format' +RETURN CASE + WHEN ssn IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 + THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) + ELSE '***-**-****' +END; + +-- Email: j***@example.com +CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks email showing first char and domain' +RETURN CASE + WHEN email IS NULL THEN NULL + WHEN INSTR(email, '@') > 1 + THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) + ELSE '***@***.***' +END; + +-- Credit card: ****-****-****-1234 +CREATE OR REPLACE FUNCTION catalog.schema.mask_credit_card(card_number STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks credit card showing only last 4 digits' +RETURN CASE + WHEN card_number IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(card_number, '[^0-9]', '')) >= 4 + THEN CONCAT('****-****-****-', RIGHT(REGEXP_REPLACE(card_number, '[^0-9]', ''), 4)) + ELSE '****-****-****-****' +END; +``` + +### Row Filter UDFs + +Row filter UDFs return `BOOLEAN`: `TRUE` to include, `FALSE` to exclude. + +```sql +-- Region-based filter: hide EU rows +CREATE OR REPLACE FUNCTION catalog.schema.is_not_eu_region(region_value STRING) +RETURNS BOOLEAN +DETERMINISTIC +COMMENT 'Row filter - returns FALSE for EU regions' +RETURN CASE + WHEN region_value IS NULL THEN TRUE + WHEN LOWER(region_value) LIKE '%eu%' THEN FALSE + WHEN LOWER(region_value) LIKE '%europe%' THEN FALSE + ELSE TRUE +END; + +-- Array membership filter +CREATE OR REPLACE FUNCTION catalog.schema.is_in_allowed_values( + row_value STRING, + allowed_values ARRAY +) +RETURNS BOOLEAN +DETERMINISTIC +COMMENT 'Row filter based on array membership' +RETURN CASE + WHEN allowed_values IS NULL THEN FALSE + WHEN ARRAY_CONTAINS(TRANSFORM(allowed_values, x -> LOWER(x)), LOWER(row_value)) THEN TRUE + ELSE FALSE +END; +``` + +**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices + +--- + +## Policy Creation + +Policies are scoped to a **catalog**, **schema**, or **table**. `FOR TABLES` is always present. + +> **Cross-catalog UDFs:** The UDF referenced in a policy is always fully qualified (`catalog.schema.function`) and can reside in any catalog/schema — it does not need to be in the same catalog or schema as the policy scope. A common pattern is a shared governance schema (e.g., `governance.masking_udfs`) containing all masking functions. + +### Column Mask Policy + +```sql +-- Catalog level — masks matching columns in ALL tables in the catalog +CREATE OR REPLACE POLICY mask_pii_ssn_catalog +ON CATALOG my_catalog +COMMENT 'Mask SSN columns catalog-wide' +COLUMN MASK my_catalog.my_schema.mask_ssn +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Schema level — masks matching columns in all tables in the schema +CREATE OR REPLACE POLICY mask_pii_ssn_schema +ON SCHEMA my_catalog.my_schema +COMMENT 'Mask SSN columns in schema' +COLUMN MASK my_catalog.my_schema.mask_ssn +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Table level — masks matching columns on a single table +CREATE OR REPLACE POLICY mask_pii_ssn_table +ON TABLE my_catalog.my_schema.my_table +COMMENT 'Mask SSN columns on specific table' +COLUMN MASK my_catalog.my_schema.mask_ssn +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Cross-catalog UDF — UDF in governance catalog, policy on prod +CREATE OR REPLACE POLICY mask_ssn_finance +ON SCHEMA prod.finance +COMMENT 'Mask SSN using shared governance UDF' +COLUMN MASK governance.masking_udfs.mask_ssn +TO `analysts` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; +``` + +### Row Filter Policy + +```sql +-- Catalog level — filters rows in ALL tables in the catalog +CREATE OR REPLACE POLICY filter_eu_data_catalog +ON CATALOG my_catalog +COMMENT 'Filter EU rows catalog-wide' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); + +-- Schema level — filters rows in all tables in the schema +CREATE OR REPLACE POLICY filter_eu_data_schema +ON SCHEMA my_catalog.my_schema +COMMENT 'Filter EU rows in schema' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); + +-- Table level — filters rows on a single table +CREATE OR REPLACE POLICY filter_eu_data_table +ON TABLE my_catalog.my_schema.my_table +COMMENT 'Filter EU rows on specific table' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); +``` + +### Policy with Tag Key Only (any value) + +```sql +-- Match any column with tag 'pii_type' regardless of value +-- Works at any scope: ON CATALOG, ON SCHEMA, or ON TABLE +CREATE OR REPLACE POLICY mask_all_pii +ON SCHEMA my_catalog.my_schema +COLUMN MASK my_catalog.my_schema.mask_full +TO `external_users` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTag('pii_type') AS masked_col +ON COLUMN masked_col; +``` + +### Drop Policy + +```sql +-- Drop at each scope level +DROP POLICY mask_pii_ssn_catalog ON CATALOG my_catalog; +DROP POLICY mask_pii_ssn_schema ON SCHEMA my_catalog.my_schema; +DROP POLICY mask_pii_ssn_table ON TABLE my_catalog.my_schema.my_table; +``` + +> **Note:** There is no `ALTER POLICY`. To modify a policy, drop and recreate it. + +--- + +## Discovery Queries + +```sql +-- List catalogs +SHOW CATALOGS; + +-- List schemas in a catalog +SHOW SCHEMAS IN my_catalog; + +-- List tables in a schema +SHOW TABLES IN my_catalog.my_schema; + +-- Describe table with extended metadata +DESCRIBE TABLE EXTENDED my_catalog.my_schema.my_table; + +-- List UDFs in a schema +SHOW USER FUNCTIONS IN my_catalog.my_schema; + +-- Describe a UDF +DESCRIBE FUNCTION EXTENDED my_catalog.my_schema.mask_ssn; + +-- Sample column values +SELECT DISTINCT column_name +FROM my_catalog.my_schema.my_table +LIMIT 20; +``` + +--- + +## Enums Reference + +### PII Types (governed tag values) + +`ssn`, `email`, `phone`, `credit_card`, `date_of_birth`, `address`, `name`, `ip_address`, `national_id`, `medical_record`, `generic` + +### Masking Strategies + +| Strategy | Description | +|----------|-------------| +| `full_mask` | Replace all characters with `*` | +| `partial_mask` | Show last 4 characters | +| `hash` | SHA256 with version prefix | +| `redact` | Replace with `[REDACTED]` | +| `nullify` | Always return NULL | +| `custom` | User-supplied SQL (requires manual UDF) | + +### Policy Scopes + +| Scope | Description | +|-------|-------------| +| `CATALOG` | Policy applies to all tables in catalog | +| `SCHEMA` | Policy applies to all tables in schema | +| `TABLE` | Policy applies to a single table | + +### Tag Syntax Variants + +| Variant | Availability | Example | +|---------|-------------|---------| +| `LEGACY` | All versions | `ALTER TABLE t ALTER COLUMN c SET TAGS ('k'='v')` | +| `MODERN` | DBR 16.1+ | `SET TAG ON COLUMN t.c 'k' = 'v'` | diff --git a/.claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md b/.claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md index 17c0d5f1..dddda9e9 100644 --- a/.claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md +++ b/.claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md @@ -167,10 +167,15 @@ get_table_policies( List masking UDFs in a schema. +> **Cross-catalog UDFs:** Masking UDFs can reside in any catalog/schema, not just the policy scope. Use `udf_catalog` and `udf_schema` to discover UDFs stored in a shared governance schema (e.g., `governance.masking_udfs`). These default to `catalog`/`schema` when not specified. + ```python get_masking_functions( catalog: str, schema: str, + # To discover UDFs in a different catalog/schema: + udf_catalog: str = None, # defaults to catalog + udf_schema: str = None, # defaults to schema ) ``` @@ -378,6 +383,9 @@ Step 1: ANALYZE → list_fgac_policies(securable_type="SCHEMA", securable_fullname="prod.finance") → get_column_tags_api(catalog="prod", schema="finance", table="customers") → get_masking_functions(catalog="prod", schema="finance") + # If UDFs are in a shared governance schema: +→ get_masking_functions(catalog="prod", schema="finance", + udf_catalog="governance", udf_schema="masking_udfs") Step 2: RECOMMEND ───────────────────────────────── @@ -391,7 +399,7 @@ Step 3: PREVIEW (returns approval_token) securable_type="SCHEMA", securable_fullname="prod.finance", policy_type="COLUMN_MASK", - function_name="prod.finance.mask_ssn", + function_name="governance.masking_udfs.mask_ssn", to_principals=["analysts"], tag_name="pii_type", tag_value="ssn" @@ -409,7 +417,7 @@ Step 5: EXECUTE (pass approval_token) policy_type="COLUMN_MASK", securable_type="SCHEMA", securable_fullname="prod.finance", - function_name="prod.finance.mask_ssn", + function_name="governance.masking_udfs.mask_ssn", to_principals=["analysts"], tag_name="pii_type", tag_value="ssn", @@ -512,6 +520,35 @@ policy = w.policies.create_policy(policy_info=policy_info) Change `on_securable_type` and `on_securable_fullname` to target catalog or table scope. +### Create Column Mask Policy (Cross-Catalog UDF) + +The UDF can live in a separate governance catalog/schema from the policy scope: + +```python +# UDF in governance.masking_udfs, policy on prod.finance +policy_info = PolicyInfo( + name="mask_ssn_finance", + policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, + on_securable_type=SecurableType.SCHEMA, + on_securable_fullname="prod.finance", + for_securable_type=SecurableType.TABLE, + to_principals=["analysts"], + except_principals=["gov_admin"], + comment="Mask SSN columns in prod.finance using shared governance UDF", + column_mask=ColumnMaskOptions( + function_name="governance.masking_udfs.mask_ssn", + on_column="masked_col", + ), + match_columns=[ + MatchColumn( + alias="masked_col", + condition="hasTagValue('pii_type', 'ssn')", + ) + ], +) +policy = w.policies.create_policy(policy_info=policy_info) +``` + ### Create Row Filter Policy ```python diff --git a/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py b/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py index 1ef5dd5f..9738e7fc 100644 --- a/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py +++ b/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py @@ -39,6 +39,8 @@ def manage_uc_fgac_policies( catalog: str = None, schema: str = None, table: str = None, + udf_catalog: str = None, + udf_schema: str = None, preview_action: str = None, approval_token: str = None, ) -> Dict[str, Any]: @@ -52,7 +54,7 @@ def manage_uc_fgac_policies( - list: List policies on a securable. Params: securable_type, securable_fullname, include_inherited, policy_type - get: Get a specific policy. Params: policy_name, securable_type, securable_fullname - get_table_policies: Get column masks and row filters on a table. Params: catalog, schema, table - - get_masking_functions: List masking UDFs in a schema. Params: catalog, schema + - get_masking_functions: List masking UDFs in a schema. Params: catalog, schema (or udf_catalog, udf_schema to discover UDFs in a different catalog/schema) - check_quota: Check policy quota on a securable. Params: securable_type, securable_fullname - preview: Preview policy changes without executing. Params: preview_action ("CREATE"/"UPDATE"/"DELETE"), policy_name, securable_type, securable_fullname, plus policy_type/function_name/tag_name/to_principals for CREATE @@ -80,6 +82,8 @@ def manage_uc_fgac_policies( catalog: Catalog name (for get_table_policies, get_masking_functions) schema: Schema name (for get_table_policies, get_masking_functions) table: Table name (for get_table_policies) + udf_catalog: Catalog where masking UDFs reside (for get_masking_functions; defaults to catalog) + udf_schema: Schema where masking UDFs reside (for get_masking_functions; defaults to schema) preview_action: Sub-action for preview: "CREATE", "UPDATE", or "DELETE" approval_token: Approval token from preview action (required for create/update/delete) @@ -109,8 +113,8 @@ def manage_uc_fgac_policies( ) elif act == "get_masking_functions": return _get_masking_functions( - catalog=catalog, - schema=schema, + catalog=udf_catalog or catalog, + schema=udf_schema or schema, ) elif act == "check_quota": return _check_policy_quota( diff --git a/databricks-skills/databricks-unity-catalog/7-fgac-overview.md b/databricks-skills/databricks-unity-catalog/7-fgac-overview.md new file mode 100644 index 00000000..b692eea0 --- /dev/null +++ b/databricks-skills/databricks-unity-catalog/7-fgac-overview.md @@ -0,0 +1,342 @@ +# FGAC Policy Governance Overview + +Guidance for Fine-Grained Access Control (FGAC) policies in Databricks Unity Catalog. Covers governed tags, tag assignments, masking UDFs, CREATE/DROP POLICY syntax, and the human-in-the-loop governance workflow. + +**Databricks Docs:** +- FGAC overview: https://docs.databricks.com/data-governance/unity-catalog/abac/ +- FGAC policies: https://docs.databricks.com/data-governance/unity-catalog/abac/policies +- FGAC tutorial: https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial + +## When to Use This Skill + +Use this skill when: +- Creating or managing **FGAC policies** (column masks, row filters) +- Working with **governed tags** (creating via UI, applying via SQL) +- Building **masking UDFs** for PII protection (SSN, email, credit card, etc.) +- Implementing **human-in-the-loop governance** workflows +- Querying tag assignments via `information_schema` +- Managing policy lifecycle (create, update, delete, preview) + +## Reference Files + +| Topic | File | Description | +|-------|------|-------------| +| SQL Generation | [8-fgac-sql-generation.md](8-fgac-sql-generation.md) | SET/UNSET TAG, CREATE FUNCTION, CREATE/DROP POLICY, discovery queries | +| SDK & MCP Tools | [9-fgac-sdk-and-tools.md](9-fgac-sdk-and-tools.md) | Python SDK patterns and 12 MCP tools for policy management | + +--- + +## FGAC Workflow Overview + +FGAC policies in Databricks follow a 4-step setup: + +1. **Governed Tags** - Define classification taxonomy (UI only) +2. **Tag Assignments** - Apply tags to columns/tables via SQL +3. **Masking UDFs** - Create deterministic functions for data masking +4. **FGAC Policies** - Bind tags to UDFs with principal scoping + +``` +┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ +│ Governed Tags│───>│ Tag │───>│ Masking │───>│ FGAC │ +│ (UI only) │ │ Assignments │ │ UDFs │ │ Policies │ +└──────────────┘ └──────────────┘ └──────────────┘ └──────────────┘ +``` + +--- + +## IMPORTANT: SQL That Does NOT Exist + +These SQL commands do **not** exist in Databricks. Do not generate them. + +| Invalid SQL | What to use instead | +|---|---| +| `SHOW POLICIES` | REST API: `w.policies.list_policies()` | +| `DESCRIBE POLICY` | REST API: `w.policies.get_policy()` | +| `ALTER POLICY` | Drop and recreate the policy | +| `ALTER USER SET ATTRIBUTES` | SCIM API for user attributes | +| `SHOW USER ATTRIBUTES` | SCIM API for user attributes | + +--- + +## Step 1: Governed Tags + +Governed tags **cannot** be created via SQL. They must be created in the Databricks UI. + +### Creating a Governed Tag (UI Steps) + +1. Navigate to **Catalog** in the workspace +2. Select **Governed Tags** from the left panel +3. Click **Create governed tag** +4. Configure: + - **Tag Key**: e.g., `pii_type` + - **Allowed Values**: e.g., `ssn`, `email`, `phone`, `credit_card`, `address` + - **Description**: e.g., "PII classification for FGAC policies" + +> **Note:** Tag data is stored as plain text and may be replicated globally. Avoid sensitive information in tag names or values. + +**Docs:** https://docs.databricks.com/admin/governed-tags/ + +--- + +## Step 2: Applying Tags to Columns + +### Legacy Syntax (all versions) + +```sql +-- Set tag on column +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); + +-- Set tag on table +ALTER TABLE catalog.schema.table +SET TAGS ('data_classification' = 'confidential'); + +-- Remove tag +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name UNSET TAGS ('pii_type'); +``` + +### Modern Syntax (DBR 16.1+) + +```sql +SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; +SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; +SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; +SET TAG ON CATALOG catalog 'department' = 'finance'; + +UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; +``` + +### Querying Existing Tags + +```sql +-- Column tags +SELECT tag_name, tag_value, column_name +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; + +-- Table tags +SELECT tag_name, tag_value +FROM system.information_schema.table_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; +``` + +--- + +## Step 3: Masking UDFs + +Masking UDFs must be `DETERMINISTIC` and use simple `CASE` statements. No external calls or nested UDFs. + +```sql +-- Full mask: replaces all characters with * +CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Full masking - replaces all characters with *' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE REPEAT('*', LENGTH(value)) +END; + +-- Partial mask: show last 4 characters +CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Partial masking - shows last 4 characters' +RETURN CASE + WHEN value IS NULL THEN NULL + WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) + ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) +END; + +-- SSN mask: ***-**-XXXX format +CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks SSN showing only last 4 digits' +RETURN CASE + WHEN ssn IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 + THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) + ELSE '***-**-****' +END; + +-- Email mask: j***@example.com +CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks email showing first char and domain' +RETURN CASE + WHEN email IS NULL THEN NULL + WHEN INSTR(email, '@') > 1 + THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) + ELSE '***@***.***' +END; +``` + +**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices + +> **Cross-catalog UDFs:** Masking UDFs do not need to be in the same catalog/schema as the policy scope. A common pattern is a shared governance schema (e.g., `governance.masking_udfs`) containing all masking functions, referenced by policies across multiple catalogs. The UDF name in a policy is always fully qualified (e.g., `governance.masking_udfs.mask_ssn`). + +--- + +## Step 4: FGAC Policies + +Policies are scoped to a **catalog**, **schema**, or **table**. `FOR TABLES` is always present. + +### Column Mask Policy + +```sql +-- Catalog level — masks matching columns in ALL tables in the catalog +CREATE OR REPLACE POLICY mask_pii_catalog +ON CATALOG my_catalog +COMMENT 'Mask PII columns catalog-wide' +COLUMN MASK my_catalog.my_schema.mask_partial +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Schema level — masks matching columns in all tables in the schema +CREATE OR REPLACE POLICY mask_pii_schema +ON SCHEMA my_catalog.my_schema +COMMENT 'Mask PII columns in schema' +COLUMN MASK my_catalog.my_schema.mask_partial +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Table level — masks matching columns on a single table +CREATE OR REPLACE POLICY mask_pii_table +ON TABLE my_catalog.my_schema.my_table +COMMENT 'Mask PII columns on specific table' +COLUMN MASK my_catalog.my_schema.mask_partial +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; +``` + +### Row Filter Policy + +```sql +-- Catalog level — filters rows in ALL tables in the catalog +CREATE OR REPLACE POLICY filter_eu_catalog +ON CATALOG my_catalog +COMMENT 'Filter EU rows catalog-wide' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); + +-- Schema level — filters rows in all tables in the schema +CREATE OR REPLACE POLICY filter_eu_schema +ON SCHEMA my_catalog.my_schema +COMMENT 'Filter EU rows in schema' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); + +-- Table level — filters rows on a single table +CREATE OR REPLACE POLICY filter_eu_table +ON TABLE my_catalog.my_schema.my_table +COMMENT 'Filter EU rows on specific table' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); +``` + +### Drop Policy + +```sql +-- Drop at each scope level +DROP POLICY mask_pii_catalog ON CATALOG my_catalog; +DROP POLICY mask_pii_schema ON SCHEMA my_catalog.my_schema; +DROP POLICY mask_pii_table ON TABLE my_catalog.my_schema.my_table; +``` + +### CRITICAL: Always Exclude `gov_admin` + +Every FGAC policy **MUST** include `EXCEPT \`gov_admin\`` to protect administrator access. Without this, admins could be locked out of data. + +### Policy Quotas + +| Scope | Max Policies | +|-------|-------------| +| Per Catalog | 10 | +| Per Schema | 10 | +| Per Table | 5 | + +https://docs.databricks.com/gcp/en/data-governance/unity-catalog/abac/policies#policy-quotas +--- + +## Human-in-the-Loop Governance Workflow + +FGAC policy changes should follow a governed workflow: + +``` +ANALYZE → RECOMMEND → PREVIEW → APPROVE → EXECUTE → VERIFY + │ │ │ │ │ │ + ▼ ▼ ▼ ▼ ▼ ▼ + Discover Generate Show SQL Human Run SQL Confirm + current policy & impact confirms or SDK changes + state proposals preview changes call applied +``` + +1. **ANALYZE**: Discover current tags, policies, and UDFs +2. **RECOMMEND**: Generate policy proposals based on requirements +3. **PREVIEW**: Use `preview_policy_changes` to show exact SQL and impact +4. **APPROVE**: Human reviews and explicitly approves +5. **EXECUTE**: Create/update/delete policies via SDK or SQL +6. **VERIFY**: Confirm policies are applied correctly + +**Never auto-execute policy changes.** Always preview and wait for human approval. + +--- + +## Common Errors + +| Error | Cause | Solution | +|-------|-------|----------| +| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | +| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag configuration in UI | +| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | +| `POLICY_ALREADY_EXISTS` | Policy name conflict | Use `CREATE OR REPLACE POLICY` | +| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission to policy creator | +| `SHOW POLICIES is not supported` | Used invalid SQL | Use REST API `w.policies.list_policies()` instead | + +## Best Practices + +1. **Use governed tags** (not ad-hoc tags) for FGAC policy matching +2. **Always include `EXCEPT \`gov_admin\``** in every policy +3. **Use deterministic UDFs** with simple CASE statements +4. **Preview before executing** any policy change +5. **Start at schema scope** and narrow to table only when needed +6. **Name policies descriptively**: `mask_{what}_{scope}` or `filter_{what}_{scope}` +7. **Test UDFs independently** before binding to policies +8. **Monitor policy quotas** — consolidate when approaching limits + +## Resources + +- [FGAC Overview](https://docs.databricks.com/data-governance/unity-catalog/abac/) +- [FGAC Policies](https://docs.databricks.com/data-governance/unity-catalog/abac/policies) +- [FGAC Tutorial](https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial) +- [UDF Best Practices](https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices) +- [Governed Tags](https://docs.databricks.com/admin/governed-tags/) +- [Column Masks & Row Filters](https://docs.databricks.com/data-governance/unity-catalog/filters-and-masks/) diff --git a/databricks-skills/databricks-unity-catalog/8-fgac-sql-generation.md b/databricks-skills/databricks-unity-catalog/8-fgac-sql-generation.md new file mode 100644 index 00000000..b1cf729e --- /dev/null +++ b/databricks-skills/databricks-unity-catalog/8-fgac-sql-generation.md @@ -0,0 +1,420 @@ +# SQL Generation Reference + +Pure SQL patterns for Unity Catalog FGAC governance operations. All SQL follows Databricks syntax. + +--- + +## Tag Operations + +### SET TAG on Column + +```sql +-- Legacy syntax (all versions) +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); + +-- Modern syntax (DBR 16.1+) +SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; +``` + +### SET TAG on Table + +```sql +-- Legacy syntax +ALTER TABLE catalog.schema.table +SET TAGS ('data_classification' = 'confidential'); + +-- Modern syntax +SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; +``` + +### SET TAG on Schema / Catalog + +```sql +SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; +SET TAG ON CATALOG my_catalog 'department' = 'finance'; +``` + +### UNSET TAG + +```sql +-- Column (legacy) +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name UNSET TAGS ('pii_type'); + +-- Column (modern) +UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; + +-- Table (legacy) +ALTER TABLE catalog.schema.table +UNSET TAGS ('data_classification'); + +-- Table (modern) +UNSET TAG ON TABLE catalog.schema.table 'data_classification'; +``` + +**Docs:** +- SET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-set-tag.html +- UNSET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-unset-tag.html + +--- + +## Tag Discovery Queries + +### Query Column Tags + +```sql +SELECT tag_name, tag_value, column_name +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; +``` + +### Query Table Tags + +```sql +SELECT tag_name, tag_value +FROM system.information_schema.table_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; +``` + +### All Tag Assignments in a Catalog + +```sql +-- Table-level tags +SELECT 'TABLE' as securable_type, + CONCAT(catalog_name, '.', schema_name, '.', table_name) as securable_name, + tag_name as tag_key, + tag_value +FROM system.information_schema.table_tags +WHERE catalog_name = 'my_catalog'; + +-- Column-level tags +SELECT 'COLUMN' as securable_type, + CONCAT(catalog_name, '.', schema_name, '.', table_name, '.', column_name) as securable_name, + tag_name as tag_key, + tag_value +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog'; +``` + +**Docs:** +- information_schema.column_tags: https://docs.databricks.com/sql/language-manual/information-schema/column_tags.html +- information_schema.table_tags: https://docs.databricks.com/sql/language-manual/information-schema/table_tags.html + +--- + +## Masking UDF Creation + +All masking UDFs must be `DETERMINISTIC` with simple `CASE` statements. No external calls or nested UDFs. + +### Generic Masking Strategies + +```sql +-- Full mask: replaces all characters with * +CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Full masking - replaces all characters with *' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE REPEAT('*', LENGTH(value)) +END; + +-- Partial mask: show last 4 characters +CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Partial masking - shows last 4 characters' +RETURN CASE + WHEN value IS NULL THEN NULL + WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) + ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) +END; + +-- Hash: SHA256 with version prefix +CREATE OR REPLACE FUNCTION catalog.schema.mask_hash(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Hash masking - SHA256 with version prefix' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE CONCAT('HASH_v1_', SUBSTRING(SHA2(CONCAT(value, ':v1'), 256), 1, 16)) +END; + +-- Redact: replace with [REDACTED] +CREATE OR REPLACE FUNCTION catalog.schema.mask_redact(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Redaction - replaces value with [REDACTED]' +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE '[REDACTED]' +END; + +-- Nullify: always returns NULL +CREATE OR REPLACE FUNCTION catalog.schema.mask_nullify(value STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Nullify - always returns NULL' +RETURN NULL; +``` + +### Specialized Masking UDFs + +```sql +-- SSN: ***-**-XXXX +CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks SSN showing only last 4 digits in XXX-XX-XXXX format' +RETURN CASE + WHEN ssn IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 + THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) + ELSE '***-**-****' +END; + +-- Email: j***@example.com +CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks email showing first char and domain' +RETURN CASE + WHEN email IS NULL THEN NULL + WHEN INSTR(email, '@') > 1 + THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) + ELSE '***@***.***' +END; + +-- Credit card: ****-****-****-1234 +CREATE OR REPLACE FUNCTION catalog.schema.mask_credit_card(card_number STRING) +RETURNS STRING +DETERMINISTIC +COMMENT 'Masks credit card showing only last 4 digits' +RETURN CASE + WHEN card_number IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(card_number, '[^0-9]', '')) >= 4 + THEN CONCAT('****-****-****-', RIGHT(REGEXP_REPLACE(card_number, '[^0-9]', ''), 4)) + ELSE '****-****-****-****' +END; +``` + +### Row Filter UDFs + +Row filter UDFs return `BOOLEAN`: `TRUE` to include, `FALSE` to exclude. + +```sql +-- Region-based filter: hide EU rows +CREATE OR REPLACE FUNCTION catalog.schema.is_not_eu_region(region_value STRING) +RETURNS BOOLEAN +DETERMINISTIC +COMMENT 'Row filter - returns FALSE for EU regions' +RETURN CASE + WHEN region_value IS NULL THEN TRUE + WHEN LOWER(region_value) LIKE '%eu%' THEN FALSE + WHEN LOWER(region_value) LIKE '%europe%' THEN FALSE + ELSE TRUE +END; + +-- Array membership filter +CREATE OR REPLACE FUNCTION catalog.schema.is_in_allowed_values( + row_value STRING, + allowed_values ARRAY +) +RETURNS BOOLEAN +DETERMINISTIC +COMMENT 'Row filter based on array membership' +RETURN CASE + WHEN allowed_values IS NULL THEN FALSE + WHEN ARRAY_CONTAINS(TRANSFORM(allowed_values, x -> LOWER(x)), LOWER(row_value)) THEN TRUE + ELSE FALSE +END; +``` + +**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices + +--- + +## Policy Creation + +Policies are scoped to a **catalog**, **schema**, or **table**. `FOR TABLES` is always present. + +> **Cross-catalog UDFs:** The UDF referenced in a policy is always fully qualified (`catalog.schema.function`) and can reside in any catalog/schema — it does not need to be in the same catalog or schema as the policy scope. A common pattern is a shared governance schema (e.g., `governance.masking_udfs`) containing all masking functions. + +### Column Mask Policy + +```sql +-- Catalog level — masks matching columns in ALL tables in the catalog +CREATE OR REPLACE POLICY mask_pii_ssn_catalog +ON CATALOG my_catalog +COMMENT 'Mask SSN columns catalog-wide' +COLUMN MASK my_catalog.my_schema.mask_ssn +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Schema level — masks matching columns in all tables in the schema +CREATE OR REPLACE POLICY mask_pii_ssn_schema +ON SCHEMA my_catalog.my_schema +COMMENT 'Mask SSN columns in schema' +COLUMN MASK my_catalog.my_schema.mask_ssn +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Table level — masks matching columns on a single table +CREATE OR REPLACE POLICY mask_pii_ssn_table +ON TABLE my_catalog.my_schema.my_table +COMMENT 'Mask SSN columns on specific table' +COLUMN MASK my_catalog.my_schema.mask_ssn +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Cross-catalog UDF — UDF in governance catalog, policy on prod +CREATE OR REPLACE POLICY mask_ssn_finance +ON SCHEMA prod.finance +COMMENT 'Mask SSN using shared governance UDF' +COLUMN MASK governance.masking_udfs.mask_ssn +TO `analysts` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; +``` + +### Row Filter Policy + +```sql +-- Catalog level — filters rows in ALL tables in the catalog +CREATE OR REPLACE POLICY filter_eu_data_catalog +ON CATALOG my_catalog +COMMENT 'Filter EU rows catalog-wide' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); + +-- Schema level — filters rows in all tables in the schema +CREATE OR REPLACE POLICY filter_eu_data_schema +ON SCHEMA my_catalog.my_schema +COMMENT 'Filter EU rows in schema' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); + +-- Table level — filters rows on a single table +CREATE OR REPLACE POLICY filter_eu_data_table +ON TABLE my_catalog.my_schema.my_table +COMMENT 'Filter EU rows on specific table' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); +``` + +### Policy with Tag Key Only (any value) + +```sql +-- Match any column with tag 'pii_type' regardless of value +-- Works at any scope: ON CATALOG, ON SCHEMA, or ON TABLE +CREATE OR REPLACE POLICY mask_all_pii +ON SCHEMA my_catalog.my_schema +COLUMN MASK my_catalog.my_schema.mask_full +TO `external_users` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTag('pii_type') AS masked_col +ON COLUMN masked_col; +``` + +### Drop Policy + +```sql +-- Drop at each scope level +DROP POLICY mask_pii_ssn_catalog ON CATALOG my_catalog; +DROP POLICY mask_pii_ssn_schema ON SCHEMA my_catalog.my_schema; +DROP POLICY mask_pii_ssn_table ON TABLE my_catalog.my_schema.my_table; +``` + +> **Note:** There is no `ALTER POLICY`. To modify a policy, drop and recreate it. + +--- + +## Discovery Queries + +```sql +-- List catalogs +SHOW CATALOGS; + +-- List schemas in a catalog +SHOW SCHEMAS IN my_catalog; + +-- List tables in a schema +SHOW TABLES IN my_catalog.my_schema; + +-- Describe table with extended metadata +DESCRIBE TABLE EXTENDED my_catalog.my_schema.my_table; + +-- List UDFs in a schema +SHOW USER FUNCTIONS IN my_catalog.my_schema; + +-- Describe a UDF +DESCRIBE FUNCTION EXTENDED my_catalog.my_schema.mask_ssn; + +-- Sample column values +SELECT DISTINCT column_name +FROM my_catalog.my_schema.my_table +LIMIT 20; +``` + +--- + +## Enums Reference + +### PII Types (governed tag values) + +`ssn`, `email`, `phone`, `credit_card`, `date_of_birth`, `address`, `name`, `ip_address`, `national_id`, `medical_record`, `generic` + +### Masking Strategies + +| Strategy | Description | +|----------|-------------| +| `full_mask` | Replace all characters with `*` | +| `partial_mask` | Show last 4 characters | +| `hash` | SHA256 with version prefix | +| `redact` | Replace with `[REDACTED]` | +| `nullify` | Always return NULL | +| `custom` | User-supplied SQL (requires manual UDF) | + +### Policy Scopes + +| Scope | Description | +|-------|-------------| +| `CATALOG` | Policy applies to all tables in catalog | +| `SCHEMA` | Policy applies to all tables in schema | +| `TABLE` | Policy applies to a single table | + +### Tag Syntax Variants + +| Variant | Availability | Example | +|---------|-------------|---------| +| `LEGACY` | All versions | `ALTER TABLE t ALTER COLUMN c SET TAGS ('k'='v')` | +| `MODERN` | DBR 16.1+ | `SET TAG ON COLUMN t.c 'k' = 'v'` | diff --git a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md index 17c0d5f1..dddda9e9 100644 --- a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md +++ b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md @@ -167,10 +167,15 @@ get_table_policies( List masking UDFs in a schema. +> **Cross-catalog UDFs:** Masking UDFs can reside in any catalog/schema, not just the policy scope. Use `udf_catalog` and `udf_schema` to discover UDFs stored in a shared governance schema (e.g., `governance.masking_udfs`). These default to `catalog`/`schema` when not specified. + ```python get_masking_functions( catalog: str, schema: str, + # To discover UDFs in a different catalog/schema: + udf_catalog: str = None, # defaults to catalog + udf_schema: str = None, # defaults to schema ) ``` @@ -378,6 +383,9 @@ Step 1: ANALYZE → list_fgac_policies(securable_type="SCHEMA", securable_fullname="prod.finance") → get_column_tags_api(catalog="prod", schema="finance", table="customers") → get_masking_functions(catalog="prod", schema="finance") + # If UDFs are in a shared governance schema: +→ get_masking_functions(catalog="prod", schema="finance", + udf_catalog="governance", udf_schema="masking_udfs") Step 2: RECOMMEND ───────────────────────────────── @@ -391,7 +399,7 @@ Step 3: PREVIEW (returns approval_token) securable_type="SCHEMA", securable_fullname="prod.finance", policy_type="COLUMN_MASK", - function_name="prod.finance.mask_ssn", + function_name="governance.masking_udfs.mask_ssn", to_principals=["analysts"], tag_name="pii_type", tag_value="ssn" @@ -409,7 +417,7 @@ Step 5: EXECUTE (pass approval_token) policy_type="COLUMN_MASK", securable_type="SCHEMA", securable_fullname="prod.finance", - function_name="prod.finance.mask_ssn", + function_name="governance.masking_udfs.mask_ssn", to_principals=["analysts"], tag_name="pii_type", tag_value="ssn", @@ -512,6 +520,35 @@ policy = w.policies.create_policy(policy_info=policy_info) Change `on_securable_type` and `on_securable_fullname` to target catalog or table scope. +### Create Column Mask Policy (Cross-Catalog UDF) + +The UDF can live in a separate governance catalog/schema from the policy scope: + +```python +# UDF in governance.masking_udfs, policy on prod.finance +policy_info = PolicyInfo( + name="mask_ssn_finance", + policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, + on_securable_type=SecurableType.SCHEMA, + on_securable_fullname="prod.finance", + for_securable_type=SecurableType.TABLE, + to_principals=["analysts"], + except_principals=["gov_admin"], + comment="Mask SSN columns in prod.finance using shared governance UDF", + column_mask=ColumnMaskOptions( + function_name="governance.masking_udfs.mask_ssn", + on_column="masked_col", + ), + match_columns=[ + MatchColumn( + alias="masked_col", + condition="hasTagValue('pii_type', 'ssn')", + ) + ], +) +policy = w.policies.create_policy(policy_info=policy_info) +``` + ### Create Row Filter Policy ```python diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py index b1a01234..1496a3b7 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py @@ -449,7 +449,8 @@ def preview_policy_changes( policy_type: "COLUMN_MASK" or "ROW_FILTER" (required for CREATE) to_principals: Principals the policy applies to except_principals: Excluded principals - function_name: Fully qualified UDF name (required for CREATE) + function_name: Fully qualified UDF name (required for CREATE). + Can reference any catalog/schema, not just the policy scope. tag_name: Tag key to match (required for CREATE) tag_value: Tag value to match (optional; omit for hasTag vs hasTagValue) comment: Policy description @@ -628,7 +629,9 @@ def create_fgac_policy( policy_type: "COLUMN_MASK" or "ROW_FILTER" securable_type: "CATALOG", "SCHEMA", or "TABLE" securable_fullname: Fully qualified securable name - function_name: Fully qualified UDF name (e.g., "catalog.schema.mask_ssn") + function_name: Fully qualified UDF name (e.g., "catalog.schema.mask_ssn"). + The UDF can reside in any catalog/schema, not just the policy scope. + For example, a policy on "prod.finance" can use "governance.masking_udfs.mask_ssn". to_principals: Users/groups the policy applies to tag_name: Tag key to match columns on approval_token: Token from preview_policy_changes() From b9436117ce84e0d0e8375ddd52c4f0b5dc44ab92 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Wed, 11 Feb 2026 12:41:24 -0600 Subject: [PATCH 06/34] Add FGAC skill evaluation test suite with 10 ground-truth cases Covers SQL generation (column mask, row filter, masking UDF, cross-catalog), Python SDK (create policy, list policies, row filter), MCP workflows (human-in-the-loop, cross-catalog discovery), and error handling (invalid SQL like SHOW POLICIES). --- .../databricks-unity-catalog/candidates.yaml | 12 + .../ground_truth.yaml | 822 ++++++++++++++++++ .../databricks-unity-catalog/manifest.yaml | 40 + 3 files changed, 874 insertions(+) create mode 100644 .test/skills/databricks-unity-catalog/candidates.yaml create mode 100644 .test/skills/databricks-unity-catalog/ground_truth.yaml create mode 100644 .test/skills/databricks-unity-catalog/manifest.yaml diff --git a/.test/skills/databricks-unity-catalog/candidates.yaml b/.test/skills/databricks-unity-catalog/candidates.yaml new file mode 100644 index 00000000..dfda4204 --- /dev/null +++ b/.test/skills/databricks-unity-catalog/candidates.yaml @@ -0,0 +1,12 @@ +--- +# Candidate test cases pending review for databricks-unity-catalog skill +# Add new test cases here for review before promoting to ground_truth.yaml +# +# Workflow: +# 1. Generate response for a new prompt +# 2. Add to candidates with review_status: "pending" +# 3. Execute on Databricks to verify +# 4. Run MLflow evaluation +# 5. If passed, promote to ground_truth.yaml + +candidates: [] diff --git a/.test/skills/databricks-unity-catalog/ground_truth.yaml b/.test/skills/databricks-unity-catalog/ground_truth.yaml new file mode 100644 index 00000000..dcb9bacf --- /dev/null +++ b/.test/skills/databricks-unity-catalog/ground_truth.yaml @@ -0,0 +1,822 @@ +test_cases: + # ============================================================================ + # SQL Generation — Column Mask + # ============================================================================ + - id: "fgac_column_mask_sql_001" + inputs: + prompt: "Create a column mask policy for SSN columns at schema level" + outputs: + response: | + Here's a column mask policy that masks SSN columns across all tables in a schema: + + ```sql + -- Step 1: Ensure the masking UDF exists + CREATE OR REPLACE FUNCTION my_catalog.my_schema.mask_ssn(ssn STRING) + RETURNS STRING + DETERMINISTIC + COMMENT 'Masks SSN showing only last 4 digits in XXX-XX-XXXX format' + RETURN CASE + WHEN ssn IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 + THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) + ELSE '***-**-****' + END; + + -- Step 2: Create the FGAC column mask policy + CREATE OR REPLACE POLICY mask_ssn_schema + ON SCHEMA my_catalog.my_schema + COMMENT 'Mask SSN columns schema-wide' + COLUMN MASK my_catalog.my_schema.mask_ssn + TO `analysts`, `data_scientists` + EXCEPT `gov_admin` + FOR TABLES + MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col + ON COLUMN masked_col; + ``` + + This policy: + - Creates a deterministic masking UDF that shows only the last 4 digits + - Uses `CREATE OR REPLACE POLICY` at schema scope + - Matches columns tagged with `pii_type = ssn` using `hasTagValue()` + - Applies to all tables in the schema via `FOR TABLES` + - Excludes `gov_admin` to preserve admin access + execution_success: null + expectations: + expected_facts: + - "CREATE OR REPLACE POLICY" + - "COLUMN MASK" + - "hasTagValue" + - "EXCEPT `gov_admin`" + - "FOR TABLES" + expected_patterns: + - pattern: "CREATE OR REPLACE POLICY" + min_count: 1 + description: "policy creation syntax" + - pattern: "COLUMN MASK" + min_count: 1 + description: "column mask type" + - pattern: "hasTagValue\\s*\\(\\s*'pii_type'\\s*,\\s*'ssn'\\s*\\)" + min_count: 1 + description: "tag-based column matching" + - pattern: "EXCEPT\\s+`gov_admin`" + min_count: 1 + description: "gov_admin exclusion" + - pattern: "FOR TABLES" + min_count: 1 + description: "for_securable_type = TABLE" + guidelines: + - "Must use CREATE OR REPLACE POLICY syntax" + - "Must include COLUMN MASK with fully qualified UDF name" + - "Must match columns via hasTagValue('pii_type', 'ssn')" + - "Must include EXCEPT `gov_admin`" + - "Must include FOR TABLES" + metadata: + category: "happy_path" + difficulty: "easy" + source: "manual" + tags: ["sql", "column-mask", "schema-scope", "ssn"] + + # ============================================================================ + # SQL Generation — Row Filter + # ============================================================================ + - id: "fgac_row_filter_sql_001" + inputs: + prompt: "Create a row filter policy to hide EU data from the US team" + outputs: + response: | + Here's a row filter policy that hides EU data from the US team: + + ```sql + -- Step 1: Create the row filter UDF + CREATE OR REPLACE FUNCTION my_catalog.my_schema.is_not_eu_region(region_value STRING) + RETURNS BOOLEAN + DETERMINISTIC + COMMENT 'Row filter - returns FALSE for EU regions' + RETURN CASE + WHEN region_value IS NULL THEN TRUE + WHEN LOWER(region_value) LIKE '%eu%' THEN FALSE + WHEN LOWER(region_value) LIKE '%europe%' THEN FALSE + ELSE TRUE + END; + + -- Step 2: Create the FGAC row filter policy + CREATE OR REPLACE POLICY filter_eu_data_schema + ON SCHEMA my_catalog.my_schema + COMMENT 'Filter EU rows for US team' + ROW FILTER my_catalog.my_schema.is_not_eu_region + TO `us_team` + EXCEPT `gov_admin` + FOR TABLES + MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col + USING COLUMNS (filter_col); + ``` + + This policy: + - Creates a deterministic UDF that returns FALSE for EU regions (filtering them out) + - Uses `ROW FILTER` with `USING COLUMNS` syntax (not `ON COLUMN`) + - Matches columns tagged with `region = eu` via `hasTagValue()` + - Applies only to the `us_team` principal group + - Excludes `gov_admin` to preserve admin access + execution_success: null + expectations: + expected_facts: + - "ROW FILTER" + - "hasTagValue" + - "USING COLUMNS" + - "EXCEPT `gov_admin`" + expected_patterns: + - pattern: "CREATE OR REPLACE POLICY" + min_count: 1 + description: "policy creation syntax" + - pattern: "ROW FILTER" + min_count: 1 + description: "row filter type" + - pattern: "hasTagValue\\s*\\(\\s*'region'\\s*,\\s*'eu'\\s*\\)" + min_count: 1 + description: "tag-based column matching" + - pattern: "EXCEPT\\s+`gov_admin`" + min_count: 1 + description: "gov_admin exclusion" + - pattern: "USING COLUMNS" + min_count: 1 + description: "row filter USING COLUMNS syntax" + guidelines: + - "Must use ROW FILTER syntax, not COLUMN MASK" + - "Must use USING COLUMNS (not ON COLUMN) for row filters" + - "Must include EXCEPT `gov_admin`" + - "Row filter UDF must return BOOLEAN" + metadata: + category: "happy_path" + difficulty: "easy" + source: "manual" + tags: ["sql", "row-filter", "region-based", "schema-scope"] + + # ============================================================================ + # SQL Generation — Masking UDF + # ============================================================================ + - id: "fgac_masking_udf_sql_001" + inputs: + prompt: "Write a masking UDF that partially masks email addresses" + outputs: + response: | + Here's a masking UDF that partially masks email addresses, showing only the first character and the domain: + + ```sql + CREATE OR REPLACE FUNCTION my_catalog.my_schema.mask_email(email STRING) + RETURNS STRING + DETERMINISTIC + COMMENT 'Masks email showing first char and domain' + RETURN CASE + WHEN email IS NULL THEN NULL + WHEN INSTR(email, '@') > 1 + THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) + ELSE '***@***.***' + END; + ``` + + Example outputs: + - `john.doe@example.com` → `j***@example.com` + - `a@b.com` → `a***@b.com` + - `NULL` → `NULL` + - `invalid-email` → `***@***.***` + + Key requirements for masking UDFs: + - Must be `DETERMINISTIC` — same input always produces same output + - Uses simple `CASE` statements — no external calls or nested UDFs + - Handles NULL values explicitly + - Handles edge cases (missing @ sign) + - Uses fully qualified name: `catalog.schema.function_name` + execution_success: null + expectations: + expected_facts: + - "DETERMINISTIC" + - "CASE" + - "CREATE OR REPLACE FUNCTION" + - "RETURNS STRING" + expected_patterns: + - pattern: "CREATE OR REPLACE FUNCTION" + min_count: 1 + description: "UDF creation syntax" + - pattern: "RETURNS STRING" + min_count: 1 + description: "return type" + - pattern: "DETERMINISTIC" + min_count: 1 + description: "deterministic requirement" + - pattern: "CASE\\s+WHEN" + min_count: 1 + description: "CASE statement pattern" + - pattern: "INSTR\\s*\\(.*@.*\\)" + min_count: 1 + description: "email @ detection" + guidelines: + - "Masking UDF must be DETERMINISTIC" + - "Must use simple CASE statements, no external calls" + - "Must handle NULL values" + - "Must use fully qualified function name (catalog.schema.function)" + metadata: + category: "happy_path" + difficulty: "medium" + source: "manual" + tags: ["sql", "masking-udf", "email", "partial-mask"] + + # ============================================================================ + # SQL Generation — Cross-Catalog UDF + # ============================================================================ + - id: "fgac_cross_catalog_sql_001" + inputs: + prompt: "Create a column mask policy on prod.finance using a UDF from governance.masking_udfs" + outputs: + response: | + Here's a cross-catalog column mask policy using a shared governance UDF: + + ```sql + -- The masking UDF already exists in the shared governance schema: + -- governance.masking_udfs.mask_ssn + + -- Create policy on prod.finance, referencing UDF from governance catalog + CREATE OR REPLACE POLICY mask_ssn_finance + ON SCHEMA prod.finance + COMMENT 'Mask SSN using shared governance UDF' + COLUMN MASK governance.masking_udfs.mask_ssn + TO `analysts` + EXCEPT `gov_admin` + FOR TABLES + MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col + ON COLUMN masked_col; + ``` + + Key points about cross-catalog UDFs: + - The UDF (`governance.masking_udfs.mask_ssn`) can live in a different catalog/schema than the policy scope (`prod.finance`) + - The UDF name must always be fully qualified: `catalog.schema.function` + - A common pattern is a shared governance schema containing all masking functions + - The policy executor needs `EXECUTE` on the UDF and `MANAGE` on the target securable + execution_success: null + expectations: + expected_facts: + - "governance.masking_udfs.mask_ssn" + - "prod.finance" + - "cross-catalog" + - "fully qualified" + expected_patterns: + - pattern: "CREATE OR REPLACE POLICY" + min_count: 1 + description: "policy creation" + - pattern: "COLUMN MASK governance\\.masking_udfs\\.mask_ssn" + min_count: 1 + description: "cross-catalog UDF reference" + - pattern: "ON SCHEMA prod\\.finance" + min_count: 1 + description: "policy scope on prod.finance" + - pattern: "EXCEPT\\s+`gov_admin`" + min_count: 1 + description: "gov_admin exclusion" + guidelines: + - "Must use fully qualified UDF name from governance catalog" + - "Policy scope must be on prod.finance" + - "Must explain cross-catalog UDF pattern" + - "Must include EXCEPT `gov_admin`" + metadata: + category: "happy_path" + difficulty: "medium" + source: "manual" + tags: ["sql", "cross-catalog", "column-mask", "governance"] + + # ============================================================================ + # SDK/Python — Create Column Mask Policy + # ============================================================================ + - id: "fgac_sdk_create_policy_001" + inputs: + prompt: "Create a column mask FGAC policy using the Python SDK" + outputs: + response: | + Here's how to create a column mask FGAC policy using the Databricks Python SDK: + + ```python + from databricks.sdk import WorkspaceClient + from databricks.sdk.service.catalog import ( + ColumnMaskOptions, + MatchColumn, + PolicyInfo, + PolicyType, + SecurableType, + ) + + w = WorkspaceClient() + + policy_info = PolicyInfo( + name="mask_pii_ssn_schema", + policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, + on_securable_type=SecurableType.SCHEMA, + on_securable_fullname="my_catalog.my_schema", + for_securable_type=SecurableType.TABLE, + to_principals=["analysts", "data_scientists"], + except_principals=["gov_admin"], + comment="Mask SSN columns in schema", + column_mask=ColumnMaskOptions( + function_name="my_catalog.my_schema.mask_ssn", + on_column="masked_col", + ), + match_columns=[ + MatchColumn( + alias="masked_col", + condition="hasTagValue('pii_type', 'ssn')", + ) + ], + ) + policy = w.policies.create_policy(policy_info=policy_info) + print(f"Created policy: {policy.name}") + ``` + + Key points: + - `policy_type` must be `PolicyType.POLICY_TYPE_COLUMN_MASK` + - `for_securable_type` is always `SecurableType.TABLE` + - `except_principals` must include `"gov_admin"` to protect admin access + - `function_name` must be fully qualified (catalog.schema.function) + - `MatchColumn.alias` must match `ColumnMaskOptions.on_column` + execution_success: null + expectations: + expected_facts: + - "PolicyInfo" + - "PolicyType.POLICY_TYPE_COLUMN_MASK" + - "ColumnMaskOptions" + - "MatchColumn" + - "for_securable_type" + - "except_principals" + expected_patterns: + - pattern: "PolicyInfo\\(" + min_count: 1 + description: "PolicyInfo construction" + - pattern: "PolicyType\\.POLICY_TYPE_COLUMN_MASK" + min_count: 1 + description: "column mask policy type" + - pattern: "ColumnMaskOptions\\(" + min_count: 1 + description: "column mask options" + - pattern: "MatchColumn\\(" + min_count: 1 + description: "match column configuration" + - pattern: "for_securable_type.*=.*SecurableType\\.TABLE" + min_count: 1 + description: "for_securable_type always TABLE" + - pattern: "except_principals.*gov_admin" + min_count: 1 + description: "gov_admin in except_principals" + guidelines: + - "Must use PolicyInfo with all required fields" + - "Must set for_securable_type to SecurableType.TABLE" + - "Must include gov_admin in except_principals" + - "Must use ColumnMaskOptions and MatchColumn" + metadata: + category: "happy_path" + difficulty: "medium" + source: "manual" + tags: ["python", "sdk", "column-mask", "create-policy"] + + # ============================================================================ + # SDK/Python — List Policies + # ============================================================================ + - id: "fgac_sdk_list_policies_001" + inputs: + prompt: "List all FGAC policies on my catalog and filter by type" + outputs: + response: | + Here's how to list and filter FGAC policies using the Python SDK: + + ```python + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + # List all policies on a catalog (including inherited from schemas/tables) + policies = list(w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname="my_catalog", + include_inherited=True, + )) + + print(f"Total policies: {len(policies)}") + + # Filter by policy type + column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] + row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] + + print(f"Column masks: {len(column_masks)}") + print(f"Row filters: {len(row_filters)}") + + # Print details for each policy + for p in policies: + print(f" {p.name}: {p.policy_type} on {p.on_securable_fullname}") + ``` + + Key points: + - Use `w.policies.list_policies()` — there is no SQL equivalent (`SHOW POLICIES` does not exist) + - `include_inherited=True` shows policies from parent scopes + - Filter by `policy_type` string: `"COLUMN_MASK"` or `"ROW_FILTER"` + - `on_securable_type` can be `"CATALOG"`, `"SCHEMA"`, or `"TABLE"` + execution_success: null + expectations: + expected_facts: + - "w.policies.list_policies" + - "on_securable_type" + - "include_inherited" + - "COLUMN_MASK" + - "ROW_FILTER" + expected_patterns: + - pattern: "w\\.policies\\.list_policies\\(" + min_count: 1 + description: "list_policies API call" + - pattern: "on_securable_type" + min_count: 1 + description: "securable type parameter" + - pattern: "include_inherited" + min_count: 1 + description: "inherited policies flag" + - pattern: "policy_type.*COLUMN_MASK" + min_count: 1 + description: "filter by column mask type" + guidelines: + - "Must use w.policies.list_policies() SDK method" + - "Must show filtering by policy_type" + - "Must mention include_inherited parameter" + - "Should note SHOW POLICIES does not exist in SQL" + metadata: + category: "happy_path" + difficulty: "easy" + source: "manual" + tags: ["python", "sdk", "list-policies", "discovery"] + + # ============================================================================ + # SDK/Python — Create Row Filter Policy + # ============================================================================ + - id: "fgac_sdk_row_filter_001" + inputs: + prompt: "Create a row filter policy using the SDK with a 0-argument UDF" + outputs: + response: | + Here's how to create a row filter policy using the Python SDK: + + ```python + from databricks.sdk import WorkspaceClient + from databricks.sdk.service.catalog import ( + MatchColumn, + PolicyInfo, + PolicyType, + RowFilterOptions, + SecurableType, + ) + + w = WorkspaceClient() + + policy_info = PolicyInfo( + name="filter_eu_data_schema", + policy_type=PolicyType.POLICY_TYPE_ROW_FILTER, + on_securable_type=SecurableType.SCHEMA, + on_securable_fullname="my_catalog.my_schema", + for_securable_type=SecurableType.TABLE, + to_principals=["us_team"], + except_principals=["gov_admin"], + comment="Filter EU rows in schema", + row_filter=RowFilterOptions( + function_name="my_catalog.my_schema.is_not_eu_region", + ), + match_columns=[ + MatchColumn( + alias="filter_col", + condition="hasTagValue('region', 'eu')", + ) + ], + ) + policy = w.policies.create_policy(policy_info=policy_info) + print(f"Created policy: {policy.name}") + ``` + + Key differences from column mask: + - `policy_type` is `PolicyType.POLICY_TYPE_ROW_FILTER` + - Uses `RowFilterOptions` instead of `ColumnMaskOptions` + - Row filter UDFs take the matched column as input and return `BOOLEAN` + - `TRUE` includes the row, `FALSE` excludes it + - `for_securable_type` is still always `SecurableType.TABLE` + execution_success: null + expectations: + expected_facts: + - "PolicyType.POLICY_TYPE_ROW_FILTER" + - "RowFilterOptions" + - "MatchColumn" + - "for_securable_type" + - "BOOLEAN" + expected_patterns: + - pattern: "PolicyType\\.POLICY_TYPE_ROW_FILTER" + min_count: 1 + description: "row filter policy type" + - pattern: "RowFilterOptions\\(" + min_count: 1 + description: "row filter options" + - pattern: "MatchColumn\\(" + min_count: 1 + description: "match column" + - pattern: "for_securable_type.*=.*SecurableType\\.TABLE" + min_count: 1 + description: "for_securable_type always TABLE" + - pattern: "except_principals.*gov_admin" + min_count: 1 + description: "gov_admin in except_principals" + guidelines: + - "Must use POLICY_TYPE_ROW_FILTER, not COLUMN_MASK" + - "Must use RowFilterOptions, not ColumnMaskOptions" + - "Must include gov_admin in except_principals" + - "Should explain that row filter UDFs return BOOLEAN" + metadata: + category: "happy_path" + difficulty: "medium" + source: "manual" + tags: ["python", "sdk", "row-filter", "create-policy"] + + # ============================================================================ + # MCP Tool Workflow — Human-in-the-Loop + # ============================================================================ + - id: "fgac_mcp_workflow_001" + inputs: + prompt: "Walk me through the human-in-the-loop workflow to create a column mask policy using MCP tools" + outputs: + response: | + Here's the complete human-in-the-loop workflow for creating an FGAC column mask policy: + + **Step 1: ANALYZE** — Discover current state + + ``` + list_fgac_policies( + securable_type="SCHEMA", + securable_fullname="prod.finance" + ) + + get_masking_functions( + catalog="prod", + schema="finance" + ) + + get_column_tags_api( + catalog="prod", + schema="finance", + table="customers" + ) + ``` + + **Step 2: RECOMMEND** — Generate policy proposal based on discovered tags and UDFs. + + **Step 3: PREVIEW** — Get approval token + + ``` + result = preview_policy_changes( + action="CREATE", + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance", + policy_type="COLUMN_MASK", + function_name="prod.finance.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn" + ) + # Returns: approval_token, equivalent SQL, warnings + ``` + + **Step 4: APPROVE** — Human reviews the preview and replies "approve" + + **Step 5: EXECUTE** — Pass approval token to create + + ``` + create_fgac_policy( + policy_name="mask_ssn_finance", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="prod.finance", + function_name="prod.finance.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn", + approval_token=result["approval_token"] + ) + ``` + + **Step 6: VERIFY** — Confirm the policy was applied + + ``` + get_fgac_policy( + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance" + ) + ``` + + The workflow is: ANALYZE -> RECOMMEND -> PREVIEW -> APPROVE -> EXECUTE -> VERIFY. + Never auto-execute policy changes — always preview and wait for human approval. + execution_success: null + expectations: + expected_facts: + - "preview_policy_changes" + - "approval_token" + - "create_fgac_policy" + - "ANALYZE" + - "PREVIEW" + - "APPROVE" + - "EXECUTE" + expected_patterns: + - pattern: "preview_policy_changes\\(" + min_count: 1 + description: "preview step" + - pattern: "approval_token" + min_count: 2 + description: "approval token in preview and execute" + - pattern: "create_fgac_policy\\(" + min_count: 1 + description: "create execution step" + - pattern: "list_fgac_policies\\(" + min_count: 1 + description: "discovery step" + guidelines: + - "Must show complete ANALYZE -> PREVIEW -> APPROVE -> EXECUTE workflow" + - "Must include preview_policy_changes with approval_token" + - "Must pass approval_token to create_fgac_policy" + - "Must include discovery step (list_fgac_policies or get_masking_functions)" + - "Must emphasize never auto-executing policy changes" + metadata: + category: "happy_path" + difficulty: "medium" + source: "manual" + tags: ["mcp", "workflow", "human-in-the-loop", "approval-token"] + + # ============================================================================ + # MCP Tool Workflow — Cross-Catalog Discovery + # ============================================================================ + - id: "fgac_mcp_cross_catalog_001" + inputs: + prompt: "Discover masking UDFs in a shared governance schema and create a policy using MCP tools" + outputs: + response: | + Here's how to discover UDFs from a shared governance schema and create a cross-catalog policy: + + **Step 1: Discover masking UDFs in the governance catalog** + + ``` + get_masking_functions( + catalog="prod", + schema="finance", + udf_catalog="governance", + udf_schema="masking_udfs" + ) + # Returns: governance.masking_udfs.mask_ssn, governance.masking_udfs.mask_email, etc. + ``` + + **Step 2: Check existing policies** + + ``` + list_fgac_policies( + securable_type="SCHEMA", + securable_fullname="prod.finance" + ) + ``` + + **Step 3: Preview the policy with cross-catalog UDF** + + ``` + result = preview_policy_changes( + action="CREATE", + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance", + policy_type="COLUMN_MASK", + function_name="governance.masking_udfs.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn" + ) + ``` + + **Step 4: After human approval, execute** + + ``` + create_fgac_policy( + policy_name="mask_ssn_finance", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="prod.finance", + function_name="governance.masking_udfs.mask_ssn", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="ssn", + approval_token=result["approval_token"] + ) + ``` + + Key points: + - Use `udf_catalog` and `udf_schema` parameters to discover UDFs in a different catalog + - The `function_name` in the policy must be fully qualified: `governance.masking_udfs.mask_ssn` + - The UDF does not need to be in the same catalog/schema as the policy scope + execution_success: null + expectations: + expected_facts: + - "udf_catalog" + - "udf_schema" + - "governance.masking_udfs" + - "cross-catalog" + - "get_masking_functions" + expected_patterns: + - pattern: "get_masking_functions\\(" + min_count: 1 + description: "UDF discovery tool" + - pattern: "udf_catalog.*=.*governance" + min_count: 1 + description: "cross-catalog UDF discovery" + - pattern: "udf_schema.*=.*masking_udfs" + min_count: 1 + description: "cross-schema UDF discovery" + - pattern: "function_name.*governance\\.masking_udfs\\." + min_count: 1 + description: "fully qualified cross-catalog function reference" + guidelines: + - "Must use get_masking_functions with udf_catalog and udf_schema" + - "Must show fully qualified function_name from governance catalog" + - "Must follow human-in-the-loop workflow with preview and approval" + - "Should explain the cross-catalog UDF pattern" + metadata: + category: "happy_path" + difficulty: "medium" + source: "manual" + tags: ["mcp", "cross-catalog", "discovery", "governance"] + + # ============================================================================ + # Error/Edge Cases — Invalid SQL + # ============================================================================ + - id: "fgac_error_invalid_sql_001" + inputs: + prompt: "How do I list all FGAC policies using SQL?" + outputs: + response: | + **Important: `SHOW POLICIES` does not exist in Databricks SQL.** + + There is no SQL command to list FGAC policies. The following SQL statements are **not valid**: + + | Invalid SQL | Status | + |---|---| + | `SHOW POLICIES` | Does not exist | + | `DESCRIBE POLICY` | Does not exist | + | `ALTER POLICY` | Does not exist | + + Instead, use the **Python SDK** or **REST API**: + + ```python + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + # List all policies on a catalog + policies = list(w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname="my_catalog", + include_inherited=True, + )) + + for p in policies: + print(f"{p.name}: {p.policy_type} on {p.on_securable_fullname}") + ``` + + Or use the **MCP tool**: + + ``` + list_fgac_policies( + securable_type="CATALOG", + securable_fullname="my_catalog", + include_inherited=True + ) + ``` + + Policy management is API-only. Use `w.policies.list_policies()` for listing, + `w.policies.get_policy()` for details, and `CREATE OR REPLACE POLICY` SQL only for creation/deletion. + execution_success: null + expectations: + expected_facts: + - "SHOW POLICIES" + - "does not exist" + - "w.policies.list_policies" + - "REST API" + expected_patterns: + - pattern: "SHOW POLICIES.*not" + min_count: 1 + description: "explains SHOW POLICIES does not exist" + - pattern: "w\\.policies\\.list_policies\\(" + min_count: 1 + description: "recommends SDK alternative" + - pattern: "list_fgac_policies\\(" + min_count: 1 + description: "recommends MCP tool alternative" + guidelines: + - "Must clearly state SHOW POLICIES does not exist" + - "Must recommend w.policies.list_policies() as the alternative" + - "Must not generate SHOW POLICIES, DESCRIBE POLICY, or ALTER POLICY" + - "Should mention MCP tool alternative" + metadata: + category: "error_handling" + difficulty: "medium" + source: "manual" + tags: ["error", "invalid-sql", "show-policies", "edge-case"] diff --git a/.test/skills/databricks-unity-catalog/manifest.yaml b/.test/skills/databricks-unity-catalog/manifest.yaml new file mode 100644 index 00000000..a92b04ee --- /dev/null +++ b/.test/skills/databricks-unity-catalog/manifest.yaml @@ -0,0 +1,40 @@ +skill: + name: "databricks-unity-catalog" + source_path: "databricks-skills/databricks-unity-catalog" + description: "Unity Catalog FGAC governance - column masks, row filters, governed tags, masking UDFs" + +evaluation: + datasets: + - path: ground_truth.yaml + type: yaml + +# Scorer configuration +scorers: + # Built-in deterministic scorers + enabled: + - python_syntax + - sql_syntax + - pattern_adherence + - no_hallucinated_apis + - expected_facts_present + + # LLM-based scorers + llm_scorers: + - Safety + - guidelines_from_expectations # Dynamic from YAML expectations.guidelines + + # Default guidelines (used when test case has no guidelines) + default_guidelines: + - "Response must address the user's request completely" + - "Code examples must follow documented best practices" + - "FGAC policies must always include EXCEPT `gov_admin`" + - "Masking UDFs must be DETERMINISTIC with simple CASE statements" + - "Must use fully qualified names for UDFs (catalog.schema.function)" + - "Must never generate SHOW POLICIES, DESCRIBE POLICY, or ALTER POLICY SQL" + - "Policy creation must follow human-in-the-loop workflow (preview -> approve -> execute)" + +quality_gates: + syntax_valid: 1.0 + pattern_adherence: 0.90 + no_hallucinated_apis: 1.0 + execution_success: 0.80 From 58789d4b9600ac3b183b93e9d1fdedbe2943afd2 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Wed, 11 Feb 2026 12:51:24 -0600 Subject: [PATCH 07/34] Remove FGAC examples from databricks-python-sdk skill FGAC SDK examples are now covered by the databricks-unity-catalog skill in 9-fgac-sdk-and-tools.md. --- .../examples/6-fgac-policies.py | 203 ------------------ databricks-skills/install_skills.sh | 10 +- 2 files changed, 4 insertions(+), 209 deletions(-) delete mode 100644 databricks-skills/databricks-python-sdk/examples/6-fgac-policies.py diff --git a/databricks-skills/databricks-python-sdk/examples/6-fgac-policies.py b/databricks-skills/databricks-python-sdk/examples/6-fgac-policies.py deleted file mode 100644 index a25e0004..00000000 --- a/databricks-skills/databricks-python-sdk/examples/6-fgac-policies.py +++ /dev/null @@ -1,203 +0,0 @@ -""" -Databricks SDK - ABAC Policy Management Examples - -ABAC Policies: https://docs.databricks.com/data-governance/unity-catalog/abac/policies -Python SDK: https://databricks-sdk-py.readthedocs.io/en/latest/ -""" - -from databricks.sdk import WorkspaceClient -from databricks.sdk.errors import NotFound, PermissionDenied, BadRequest - -w = WorkspaceClient() - -# ============================================================================= -# LIST POLICIES -# ============================================================================= - -# List all ABAC policies on a catalog (includes inherited policies) -for policy in w.policies.list_policies( - on_securable_type="CATALOG", - on_securable_fullname="my_catalog", - include_inherited=True, -): - print(f"{policy.name}: {policy.policy_type}") - -# List policies on a schema -for policy in w.policies.list_policies( - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - include_inherited=True, -): - p_dict = policy.as_dict() if hasattr(policy, "as_dict") else {} - print(f" {p_dict.get('name')}: type={p_dict.get('policy_type')}, " - f"principals={p_dict.get('to_principals')}") - -# List policies on a specific table -for policy in w.policies.list_policies( - on_securable_type="TABLE", - on_securable_fullname="my_catalog.my_schema.my_table", - include_inherited=True, -): - print(f"{policy.name}: {policy.policy_type}") - -# Filter by policy type -all_policies = list(w.policies.list_policies( - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - include_inherited=True, -)) -column_masks = [p for p in all_policies if p.policy_type == "COLUMN_MASK"] -row_filters = [p for p in all_policies if p.policy_type == "ROW_FILTER"] -print(f"Column masks: {len(column_masks)}, Row filters: {len(row_filters)}") - - -# ============================================================================= -# GET POLICY DETAILS -# ============================================================================= - -# Get a specific policy by name -policy = w.policies.get_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", -) -print(f"Policy: {policy.name}") -print(f"Type: {policy.policy_type}") -print(f"Principals: {policy.to_principals}") -print(f"Except: {policy.except_principals}") - - -# ============================================================================= -# CREATE COLUMN MASK POLICY -# ============================================================================= - -# Create a column mask policy that masks SSN columns for analysts -# The policy matches columns tagged with pii_type='ssn' and applies mask_ssn UDF -from databricks.sdk.service.catalog import ColumnMask, MatchColumns - -created = w.policies.create_policy( - name="mask_pii_ssn", - policy_type="COLUMN_MASK", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - for_securable_type="TABLE", - to_principals=["analysts", "data_scientists"], - except_principals=["gov_admin"], # ALWAYS include gov_admin - comment="Mask SSN columns for analyst groups", - column_mask=ColumnMask( - function_name="my_catalog.my_schema.mask_ssn", - ), - match_columns=[ - MatchColumns( - tag_name="pii_type", - tag_value="ssn", - ) - ], -) -print(f"Created policy: {created.name}") - - -# ============================================================================= -# CREATE ROW FILTER POLICY -# ============================================================================= - -# Create a row filter policy that hides EU rows from the US team -from databricks.sdk.service.catalog import RowFilter - -created = w.policies.create_policy( - name="filter_eu_data", - policy_type="ROW_FILTER", - on_securable_type="CATALOG", - on_securable_fullname="my_catalog", - for_securable_type="TABLE", - to_principals=["us_team"], - except_principals=["gov_admin"], # ALWAYS include gov_admin - comment="Filter EU rows for US team", - row_filter=RowFilter( - function_name="my_catalog.my_schema.is_not_eu_region", - ), - match_columns=[ - MatchColumns( - tag_name="region", - tag_value="eu", - ) - ], -) -print(f"Created policy: {created.name}") - - -# ============================================================================= -# UPDATE POLICY -# ============================================================================= - -# Update policy principals (cannot change UDF, tags, or scope - drop and recreate) -updated = w.policies.update_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - to_principals=["analysts", "data_scientists", "new_team"], - except_principals=["gov_admin", "senior_admins"], - comment="Updated: added new_team to masked principals", -) -print(f"Updated policy: {updated.name}") - - -# ============================================================================= -# DELETE POLICY -# ============================================================================= - -# Delete a policy -w.policies.delete_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", -) -print("Policy deleted") - - -# ============================================================================= -# ERROR HANDLING -# ============================================================================= - -# Handle common errors -try: - policy = w.policies.get_policy( - name="nonexistent_policy", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - ) -except NotFound: - print("Policy not found") -except PermissionDenied: - print("Insufficient permissions - need MANAGE on securable") -except BadRequest as e: - print(f"Invalid request: {e}") - - -# ============================================================================= -# UTILITY: POLICY SUMMARY -# ============================================================================= - -# Get a summary of all ABAC policies in a catalog -def get_policy_summary(w, catalog: str): - """Get a summary of all ABAC policies in a catalog.""" - policies = list(w.policies.list_policies( - on_securable_type="CATALOG", - on_securable_fullname=catalog, - include_inherited=True, - )) - - column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] - row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] - - return { - "total": len(policies), - "column_masks": len(column_masks), - "row_filters": len(row_filters), - } - - -summary = get_policy_summary(w, "my_catalog") -print(f"Total: {summary['total']}, " - f"Column masks: {summary['column_masks']}, " - f"Row filters: {summary['row_filters']}") diff --git a/databricks-skills/install_skills.sh b/databricks-skills/install_skills.sh index ad04cfcb..c8db69b4 100755 --- a/databricks-skills/install_skills.sh +++ b/databricks-skills/install_skills.sh @@ -42,7 +42,7 @@ MLFLOW_REPO_RAW_URL="https://raw.githubusercontent.com/mlflow/skills" MLFLOW_REPO_REF="main" # Databricks skills (hosted in this repo) -DATABRICKS_SKILLS="agent-bricks aibi-dashboards asset-bundles databricks-app-apx databricks-app-python databricks-config databricks-dbsql databricks-docs databricks-genie databricks-jobs databricks-python-sdk databricks-unity-catalog lakebase-autoscale lakebase-provisioned metric-views mlflow-evaluation model-serving spark-declarative-pipelines spark-structured-streaming synthetic-data-generation uc-abac-governance unstructured-pdf-generation vector-search" +DATABRICKS_SKILLS="agent-bricks aibi-dashboards asset-bundles databricks-app-apx databricks-app-python databricks-config databricks-dbsql databricks-docs databricks-genie databricks-jobs databricks-python-sdk databricks-unity-catalog lakebase-autoscale lakebase-provisioned metric-views mlflow-evaluation model-serving spark-declarative-pipelines spark-structured-streaming synthetic-data-generation unstructured-pdf-generation vector-search" # MLflow skills (fetched from mlflow/skills repo) MLFLOW_SKILLS="agent-evaluation analyze-mlflow-chat-session analyze-mlflow-trace instrumenting-with-mlflow-tracing mlflow-onboarding querying-mlflow-metrics retrieving-mlflow-traces searching-mlflow-docs" @@ -65,11 +65,10 @@ get_skill_description() { "databricks-genie") echo "Genie Spaces - create, curate, and query via Conversation API" ;; "databricks-jobs") echo "Databricks Lakeflow Jobs - workflow orchestration" ;; "databricks-python-sdk") echo "Databricks Python SDK, Connect, and REST API" ;; - "databricks-unity-catalog") echo "System tables for lineage, audit, billing" ;; + "databricks-unity-catalog") echo "System tables, volumes, and FGAC policy governance" ;; "lakebase-autoscale") echo "Lakebase Autoscale - managed PostgreSQL with autoscaling" ;; "lakebase-provisioned") echo "Lakebase Provisioned - data connections and reverse ETL" ;; "metric-views") echo "Unity Catalog Metric Views - governed business metrics in YAML" ;; - "uc-abac-governance") echo "ABAC policy governance - tags, UDFs, column masks, row filters" ;; "model-serving") echo "Model Serving - deploy MLflow models and AI agents" ;; "spark-declarative-pipelines") echo "Spark Declarative Pipelines (SDP/LDP/DLT)" ;; "spark-structured-streaming") echo "Spark Structured Streaming patterns and best practices" ;; @@ -99,10 +98,9 @@ get_skill_extra_files() { "databricks-app-apx") echo "backend-patterns.md best-practices.md frontend-patterns.md" ;; "databricks-app-python") echo "dash.md streamlit.md README.md" ;; "databricks-jobs") echo "task-types.md triggers-schedules.md notifications-monitoring.md examples.md" ;; - "databricks-python-sdk") echo "doc-index.md examples/1-authentication.py examples/2-clusters-and-jobs.py examples/3-sql-and-warehouses.py examples/4-unity-catalog.py examples/5-serving-and-vector-search.py examples/6-abac-policies.py" ;; - "databricks-unity-catalog") echo "5-system-tables.md" ;; + "databricks-python-sdk") echo "doc-index.md examples/1-authentication.py examples/2-clusters-and-jobs.py examples/3-sql-and-warehouses.py examples/4-unity-catalog.py examples/5-serving-and-vector-search.py" ;; + "databricks-unity-catalog") echo "5-system-tables.md 6-volumes.md 7-fgac-overview.md 8-fgac-sql-generation.md 9-fgac-sdk-and-tools.md" ;; "lakebase-autoscale") echo "projects.md branches.md computes.md connection-patterns.md reverse-etl.md" ;; - "uc-abac-governance") echo "sql-generation.md python-sdk-patterns.md mcp-tools-reference.md" ;; "lakebase-provisioned") echo "connection-patterns.md reverse-etl.md" ;; "metric-views") echo "yaml-reference.md patterns.md" ;; "mlflow-evaluation") echo "references/CRITICAL-interfaces.md references/GOTCHAS.md references/patterns-context-optimization.md references/patterns-datasets.md references/patterns-evaluation.md references/patterns-scorers.md references/patterns-trace-analysis.md references/user-journeys.md" ;; From 1fdefd21da3bd676abc24ea94e5c0d61111ac8eb Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Wed, 11 Feb 2026 12:52:55 -0600 Subject: [PATCH 08/34] Rename ABAC to FGAC in unity_catalog __init__.py exports --- .../unity_catalog/__init__.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/__init__.py b/databricks-tools-core/databricks_tools_core/unity_catalog/__init__.py index e315d482..b676954a 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/__init__.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/__init__.py @@ -112,17 +112,17 @@ drop_column_mask, ) -# ABAC policies -from .abac_policies import ( - list_abac_policies, - get_abac_policy, +# FGAC policies +from .fgac_policies import ( + list_fgac_policies, + get_fgac_policy, get_table_policies, get_masking_functions, check_policy_quota, preview_policy_changes, - create_abac_policy, - update_abac_policy, - delete_abac_policy, + create_fgac_policy, + update_fgac_policy, + delete_fgac_policy, ) # Quality monitors @@ -239,16 +239,16 @@ "drop_row_filter", "set_column_mask", "drop_column_mask", - # ABAC policies - "list_abac_policies", - "get_abac_policy", + # FGAC policies + "list_fgac_policies", + "get_fgac_policy", "get_table_policies", "get_masking_functions", "check_policy_quota", "preview_policy_changes", - "create_abac_policy", - "update_abac_policy", - "delete_abac_policy", + "create_fgac_policy", + "update_fgac_policy", + "delete_fgac_policy", # Quality monitors "create_monitor", "get_monitor", From 37f09bb5343e601db03467dacfc4c3abf720d864 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Wed, 11 Feb 2026 12:55:33 -0600 Subject: [PATCH 09/34] Clean up ABAC-to-FGAC migration: remove old skill, update routing tests and SDK docs - Delete old uc-fgac-governance skill from both .claude/skills/ and databricks-skills/ - Delete DEV_CHANGELOG.md and PLAN_UC_FGAC_SKILLS.md (no longer needed) - Add 7 FGAC routing test cases to _routing/ground_truth.yaml - Register fgac_policies tool in MCP server - Update databricks-python-sdk SKILL.md with correct SDK types and all scope levels - Add COMMIT_REVIEW.md to .gitignore --- .claude/skills/uc-fgac-governance/SKILL.md | 294 ---------- .../uc-fgac-governance/mcp-tools-reference.md | 515 ------------------ .../uc-fgac-governance/python-sdk-patterns.md | 351 ------------ .../uc-fgac-governance/sql-generation.md | 356 ------------ .gitignore | 3 + .test/skills/_routing/ground_truth.yaml | 78 +++ DEV_CHANGELOG.md | 126 ----- PLAN_UC_FGAC_SKILLS.md | 146 ----- .../databricks_mcp_server/server.py | 1 + .../databricks-python-sdk/SKILL.md | 98 +++- databricks-skills/uc-fgac-governance/SKILL.md | 294 ---------- .../uc-fgac-governance/mcp-tools-reference.md | 397 -------------- .../uc-fgac-governance/python-sdk-patterns.md | 351 ------------ .../uc-fgac-governance/sql-generation.md | 356 ------------ 14 files changed, 168 insertions(+), 3198 deletions(-) delete mode 100644 .claude/skills/uc-fgac-governance/SKILL.md delete mode 100644 .claude/skills/uc-fgac-governance/mcp-tools-reference.md delete mode 100644 .claude/skills/uc-fgac-governance/python-sdk-patterns.md delete mode 100644 .claude/skills/uc-fgac-governance/sql-generation.md delete mode 100644 DEV_CHANGELOG.md delete mode 100644 PLAN_UC_FGAC_SKILLS.md delete mode 100644 databricks-skills/uc-fgac-governance/SKILL.md delete mode 100644 databricks-skills/uc-fgac-governance/mcp-tools-reference.md delete mode 100644 databricks-skills/uc-fgac-governance/python-sdk-patterns.md delete mode 100644 databricks-skills/uc-fgac-governance/sql-generation.md diff --git a/.claude/skills/uc-fgac-governance/SKILL.md b/.claude/skills/uc-fgac-governance/SKILL.md deleted file mode 100644 index 6efabdcc..00000000 --- a/.claude/skills/uc-fgac-governance/SKILL.md +++ /dev/null @@ -1,294 +0,0 @@ ---- -name: uc-abac-governance -description: "Unity Catalog ABAC policy governance - governed tags, masking UDFs, column masks, row filters, and human-in-the-loop policy management." ---- - -# Unity Catalog ABAC Policy Governance - -Guidance for Attribute-Based Access Control (ABAC) policies in Databricks Unity Catalog. Covers governed tags, tag assignments, masking UDFs, CREATE/DROP POLICY syntax, and the human-in-the-loop governance workflow. - -**Databricks Docs:** -- ABAC overview: https://docs.databricks.com/data-governance/unity-catalog/abac/ -- ABAC policies: https://docs.databricks.com/data-governance/unity-catalog/abac/policies -- ABAC tutorial: https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial - -## When to Use This Skill - -Use this skill when: -- Creating or managing **ABAC policies** (column masks, row filters) -- Working with **governed tags** (creating via UI, applying via SQL) -- Building **masking UDFs** for PII protection (SSN, email, credit card, etc.) -- Implementing **human-in-the-loop governance** workflows -- Querying tag assignments via `information_schema` -- Managing policy lifecycle (create, update, delete, preview) - -## Reference Files - -| Topic | File | Description | -|-------|------|-------------| -| SQL Generation | [sql-generation.md](sql-generation.md) | SET/UNSET TAG, CREATE FUNCTION, CREATE/DROP POLICY, discovery queries | -| Python SDK | [python-sdk-patterns.md](python-sdk-patterns.md) | `w.policies.*` SDK methods for ABAC policy CRUD | -| MCP Tools | [mcp-tools-reference.md](mcp-tools-reference.md) | 12 MCP tools for policy management | - ---- - -## ABAC Workflow Overview - -ABAC policies in Databricks follow a 4-step setup: - -1. **Governed Tags** - Define classification taxonomy (UI only) -2. **Tag Assignments** - Apply tags to columns/tables via SQL -3. **Masking UDFs** - Create deterministic functions for data masking -4. **ABAC Policies** - Bind tags to UDFs with principal scoping - -``` -┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ -│ Governed Tags│───>│ Tag │───>│ Masking │───>│ ABAC │ -│ (UI only) │ │ Assignments │ │ UDFs │ │ Policies │ -└──────────────┘ └──────────────┘ └──────────────┘ └──────────────┘ -``` - ---- - -## IMPORTANT: SQL That Does NOT Exist - -These SQL commands do **not** exist in Databricks. Do not generate them. - -| Invalid SQL | What to use instead | -|---|---| -| `SHOW POLICIES` | REST API: `w.policies.list_policies()` | -| `DESCRIBE POLICY` | REST API: `w.policies.get_policy()` | -| `ALTER POLICY` | Drop and recreate the policy | -| `ALTER USER SET ATTRIBUTES` | SCIM API for user attributes | -| `SHOW USER ATTRIBUTES` | SCIM API for user attributes | - ---- - -## Step 1: Governed Tags - -Governed tags **cannot** be created via SQL. They must be created in the Databricks UI. - -### Creating a Governed Tag (UI Steps) - -1. Navigate to **Catalog** in the workspace -2. Select **Governed Tags** from the left panel -3. Click **Create governed tag** -4. Configure: - - **Tag Key**: e.g., `pii_type` - - **Allowed Values**: e.g., `ssn`, `email`, `phone`, `credit_card`, `address` - - **Description**: e.g., "PII classification for ABAC policies" - -> **Note:** Tag data is stored as plain text and may be replicated globally. Avoid sensitive information in tag names or values. - -**Docs:** https://docs.databricks.com/admin/governed-tags/ - ---- - -## Step 2: Applying Tags to Columns - -### Legacy Syntax (all versions) - -```sql --- Set tag on column -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); - --- Set tag on table -ALTER TABLE catalog.schema.table -SET TAGS ('data_classification' = 'confidential'); - --- Remove tag -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name UNSET TAGS ('pii_type'); -``` - -### Modern Syntax (DBR 16.1+) - -```sql -SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; -SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; -SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; -SET TAG ON CATALOG catalog 'department' = 'finance'; - -UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; -``` - -### Querying Existing Tags - -```sql --- Column tags -SELECT tag_name, tag_value, column_name -FROM system.information_schema.column_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; - --- Table tags -SELECT tag_name, tag_value -FROM system.information_schema.table_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; -``` - ---- - -## Step 3: Masking UDFs - -Masking UDFs must be `DETERMINISTIC` and use simple `CASE` statements. No external calls or nested UDFs. - -```sql --- Full mask: replaces all characters with * -CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Full masking - replaces all characters with *' -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE REPEAT('*', LENGTH(value)) -END; - --- Partial mask: show last 4 characters -CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Partial masking - shows last 4 characters' -RETURN CASE - WHEN value IS NULL THEN NULL - WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) - ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) -END; - --- SSN mask: ***-**-XXXX format -CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks SSN showing only last 4 digits' -RETURN CASE - WHEN ssn IS NULL THEN NULL - WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 - THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) - ELSE '***-**-****' -END; - --- Email mask: j***@example.com -CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks email showing first char and domain' -RETURN CASE - WHEN email IS NULL THEN NULL - WHEN INSTR(email, '@') > 1 - THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) - ELSE '***@***.***' -END; -``` - -**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices - ---- - -## Step 4: ABAC Policies - -### Column Mask Policy - -```sql -CREATE OR REPLACE POLICY mask_pii_columns -ON SCHEMA catalog.schema -COMMENT 'Mask PII columns for analysts' -COLUMN MASK catalog.schema.mask_partial -TO `analysts`, `data_scientists` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; -``` - -### Row Filter Policy - -```sql -CREATE OR REPLACE POLICY filter_eu_rows -ON CATALOG my_catalog -COMMENT 'Filter EU rows for US team' -ROW FILTER catalog.schema.is_not_eu_region -TO `us_team` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col -USING COLUMNS (filter_col); -``` - -### Drop Policy - -```sql -DROP POLICY mask_pii_columns ON SCHEMA catalog.schema; -``` - -### CRITICAL: Always Exclude `gov_admin` - -Every ABAC policy **MUST** include `EXCEPT \`gov_admin\`` to protect administrator access. Without this, admins could be locked out of data. - -### Policy Quotas - -| Scope | Max Policies | -|-------|-------------| -| Per Catalog | 10 | -| Per Schema | 10 | -| Per Table | 5 | - ---- - -## Human-in-the-Loop Governance Workflow - -ABAC policy changes should follow a governed workflow: - -``` -ANALYZE → RECOMMEND → PREVIEW → APPROVE → EXECUTE → VERIFY - │ │ │ │ │ │ - ▼ ▼ ▼ ▼ ▼ ▼ - Discover Generate Show SQL Human Run SQL Confirm - current policy & impact confirms or SDK changes - state proposals preview changes call applied -``` - -1. **ANALYZE**: Discover current tags, policies, and UDFs -2. **RECOMMEND**: Generate policy proposals based on requirements -3. **PREVIEW**: Use `preview_policy_changes` to show exact SQL and impact -4. **APPROVE**: Human reviews and explicitly approves -5. **EXECUTE**: Create/update/delete policies via SDK or SQL -6. **VERIFY**: Confirm policies are applied correctly - -**Never auto-execute policy changes.** Always preview and wait for human approval. - ---- - -## Common Errors - -| Error | Cause | Solution | -|-------|-------|----------| -| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | -| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag configuration in UI | -| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | -| `POLICY_ALREADY_EXISTS` | Policy name conflict | Use `CREATE OR REPLACE POLICY` | -| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission to policy creator | -| `SHOW POLICIES is not supported` | Used invalid SQL | Use REST API `w.policies.list_policies()` instead | - -## Best Practices - -1. **Use governed tags** (not ad-hoc tags) for ABAC policy matching -2. **Always include `EXCEPT \`gov_admin\``** in every policy -3. **Use deterministic UDFs** with simple CASE statements -4. **Preview before executing** any policy change -5. **Start at schema scope** and narrow to table only when needed -6. **Name policies descriptively**: `mask_{what}_{scope}` or `filter_{what}_{scope}` -7. **Test UDFs independently** before binding to policies -8. **Monitor policy quotas** — consolidate when approaching limits - -## Resources - -- [ABAC Overview](https://docs.databricks.com/data-governance/unity-catalog/abac/) -- [ABAC Policies](https://docs.databricks.com/data-governance/unity-catalog/abac/policies) -- [ABAC Tutorial](https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial) -- [UDF Best Practices](https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices) -- [Governed Tags](https://docs.databricks.com/admin/governed-tags/) -- [Column Masks & Row Filters](https://docs.databricks.com/data-governance/unity-catalog/filters-and-masks/) diff --git a/.claude/skills/uc-fgac-governance/mcp-tools-reference.md b/.claude/skills/uc-fgac-governance/mcp-tools-reference.md deleted file mode 100644 index c1ae9640..00000000 --- a/.claude/skills/uc-fgac-governance/mcp-tools-reference.md +++ /dev/null @@ -1,515 +0,0 @@ -# MCP Tools Reference for ABAC Policy Management - -Reference for the MCP tools that manage ABAC policies. Core policy operations are implemented in -`databricks_tools_core.unity_catalog.abac_policies`. Discovery helpers delegate to existing -`unity_catalog` modules where possible. - -**Implementation:** `databricks-tools-core/databricks_tools_core/unity_catalog/abac_policies.py` - ---- - -## Discovery Tools - -### `list_abac_policies` - -List ABAC policies on a catalog, schema, or table. - -**Implementation:** `unity_catalog.abac_policies.list_abac_policies` - -```python -from databricks_tools_core.unity_catalog import list_abac_policies - -list_abac_policies( - securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" - securable_fullname: str, # e.g., "my_catalog.my_schema" - include_inherited: bool = True, - policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (optional filter) -) -``` - -**Returns:** -```json -{ - "success": true, - "securable_type": "SCHEMA", - "securable_fullname": "my_catalog.my_schema", - "policy_count": 3, - "policies": [ - { - "name": "mask_pii_ssn", - "policy_type": "COLUMN_MASK", - "to_principals": ["analysts"], - "except_principals": ["gov_admin"], - "on_securable_fullname": "my_catalog.my_schema", - "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn"}, - "match_columns": [{"tag_name": "pii_type", "tag_value": "ssn"}] - } - ] -} -``` - -### `get_abac_policy` - -Get details for a specific policy by name. - -**Implementation:** `unity_catalog.abac_policies.get_abac_policy` - -```python -from databricks_tools_core.unity_catalog import get_abac_policy - -get_abac_policy( - policy_name: str, # Policy name - securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" - securable_fullname: str, # Fully qualified securable name -) -``` - -**Returns:** -```json -{ - "success": true, - "policy": { - "name": "mask_pii_ssn", - "policy_type": "COLUMN_MASK", - "comment": "Mask SSN columns for analysts", - "to_principals": ["analysts", "data_scientists"], - "except_principals": ["gov_admin"], - "on_securable_type": "SCHEMA", - "on_securable_fullname": "my_catalog.my_schema", - "for_securable_type": "TABLE", - "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn"}, - "match_columns": [{"tag_name": "pii_type", "tag_value": "ssn"}], - "created_at": "2025-01-15T10:30:00Z", - "created_by": "admin@company.com", - "updated_at": "2025-01-20T14:00:00Z", - "updated_by": "admin@company.com" - } -} -``` - -### `get_table_policies` - -Get column masks and row filters for a specific table via Unity Catalog API. - -**Implementation:** `unity_catalog.abac_policies.get_table_policies` - -```python -from databricks_tools_core.unity_catalog import get_table_policies - -get_table_policies( - catalog: str, - schema: str, - table: str, -) -``` - -**Returns:** -```json -{ - "success": true, - "table": "my_catalog.my_schema.my_table", - "column_masks": [ - { - "column_name": "ssn", - "mask_function": "my_catalog.my_schema.mask_ssn", - "using_column_names": [] - } - ], - "row_filters": [ - { - "filter_function": "my_catalog.my_schema.is_not_eu_region", - "using_column_names": ["region"] - } - ] -} -``` - -### `get_masking_functions` - -List masking UDFs in a schema. - -**Implementation:** `unity_catalog.abac_policies.get_masking_functions` - -```python -from databricks_tools_core.unity_catalog import get_masking_functions - -get_masking_functions( - catalog: str, - schema: str, -) -``` - -**Returns:** -```json -{ - "success": true, - "catalog": "my_catalog", - "schema": "my_schema", - "function_count": 3, - "functions": [ - { - "name": "mask_ssn", - "full_name": "my_catalog.my_schema.mask_ssn", - "return_type": "STRING", - "comment": "Masks SSN showing only last 4 digits", - "is_deterministic": true - } - ] -} -``` - -### `get_schema_info` - -Get schema metadata via Unity Catalog API. - -**Implementation:** Delegates to existing `unity_catalog.schemas.get_schema` - -```python -from databricks_tools_core.unity_catalog import get_schema - -get_schema( - catalog_name: str, - schema_name: str, -) -``` - -### `get_catalog_info` - -Get catalog metadata via Unity Catalog API. - -**Implementation:** Delegates to existing `unity_catalog.catalogs.get_catalog` - -```python -from databricks_tools_core.unity_catalog import get_catalog - -get_catalog( - catalog_name: str, -) -``` - -### `get_column_tags_api` - -Get column-level tags via the Tags API. - -**Implementation:** Delegates to existing `unity_catalog.tags.query_column_tags` - -```python -from databricks_tools_core.unity_catalog import query_column_tags - -query_column_tags( - catalog_filter: str, # Filter by catalog name - table_name: str = None, # Filter by table name - tag_name: str = None, # Filter by tag name - tag_value: str = None, # Filter by tag value - limit: int = 100, -) -``` - -### `list_table_policies_in_schema` - -List all tables in a schema with their column masks and row filters. - -**Implementation:** Compose `unity_catalog.tables.list_tables` + `unity_catalog.abac_policies.get_table_policies` - -```python -from databricks_tools_core.unity_catalog import list_tables, get_table_policies - -# List all tables, then get policies for each -tables = list_tables(catalog_name=catalog, schema_name=schema) -for t in tables["tables"]: - policies = get_table_policies(catalog=catalog, schema=schema, table=t["name"]) -``` - ---- - -## Quota Check - -### `check_policy_quota` - -Check if the policy quota allows creating a new policy on a securable. - -**Implementation:** `unity_catalog.abac_policies.check_policy_quota` - -```python -from databricks_tools_core.unity_catalog import check_policy_quota - -check_policy_quota( - securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" - securable_fullname: str, # Fully qualified securable name -) -``` - -**Returns:** -```json -{ - "success": true, - "securable_type": "SCHEMA", - "securable_fullname": "my_catalog.my_schema", - "current": 3, - "max": 10, - "can_create": true -} -``` - -Policy quotas: CATALOG=10, SCHEMA=10, TABLE=5. - ---- - -## Preview Tool (Human-in-the-Loop Gate) - -### `preview_policy_changes` - -Preview policy changes without executing. This is the critical human-in-the-loop gate. - -**Implementation:** `unity_catalog.abac_policies.preview_policy_changes` - -```python -from databricks_tools_core.unity_catalog import preview_policy_changes - -preview_policy_changes( - action: str, # "CREATE", "UPDATE", or "DELETE" - policy_name: str, - securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" - securable_fullname: str, - policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (for CREATE) - to_principals: list = None, - except_principals: list = None, - function_name: str = None, - tag_name: str = None, - tag_value: str = None, - comment: str = None, -) -``` - -**Returns:** -```json -{ - "success": true, - "action": "CREATE", - "preview": { - "policy_name": "mask_pii_ssn", - "policy_type": "COLUMN_MASK", - "securable": "SCHEMA my_catalog.my_schema", - "to_principals": ["analysts"], - "except_principals": ["gov_admin"], - "function": "my_catalog.my_schema.mask_ssn", - "tag_match": "hasTagValue('pii_type', 'ssn')", - "equivalent_sql": "CREATE OR REPLACE POLICY mask_pii_ssn\nON SCHEMA my_catalog.my_schema\n..." - }, - "warnings": [], - "requires_approval": true, - "message": "Review the preview above. Reply 'approve' to execute." -} -``` - -**Usage in workflow:** - -1. Call `preview_policy_changes` with proposed changes -2. Present preview to user -3. Wait for explicit approval -4. Only then call `create_abac_policy`, `update_abac_policy`, or `delete_abac_policy` - ---- - -## Management Tools - -### `create_abac_policy` - -Create a new ABAC policy (COLUMN_MASK or ROW_FILTER). - -**Implementation:** `unity_catalog.abac_policies.create_abac_policy` - -```python -from databricks_tools_core.unity_catalog import create_abac_policy - -create_abac_policy( - policy_name: str, - policy_type: str, # "COLUMN_MASK" or "ROW_FILTER" - securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" - securable_fullname: str, - function_name: str, # Fully qualified UDF name - to_principals: list, # Users/groups the policy applies to - tag_name: str, # Tag key to match - tag_value: str = None, # Tag value (optional, uses hasTag vs hasTagValue) - except_principals: list = None, # Excluded principals - comment: str = "", -) -``` - -**Returns:** -```json -{ - "success": true, - "policy_name": "mask_pii_ssn", - "action": "created", - "details": { - "policy_type": "COLUMN_MASK", - "on_securable": "SCHEMA my_catalog.my_schema", - "function": "my_catalog.my_schema.mask_ssn", - "to_principals": ["analysts"], - "except_principals": ["gov_admin"], - "tag_match": "pii_type=ssn" - }, - "policy": { ... } -} -``` - -> **Note:** Callers should include appropriate admin groups in `except_principals` to protect administrator access. - -### `update_abac_policy` - -Update an existing policy's principals or comment. - -**Implementation:** `unity_catalog.abac_policies.update_abac_policy` - -```python -from databricks_tools_core.unity_catalog import update_abac_policy - -update_abac_policy( - policy_name: str, - securable_type: str, - securable_fullname: str, - to_principals: list = None, - except_principals: list = None, - comment: str = None, -) -``` - -**Returns:** -```json -{ - "success": true, - "policy_name": "mask_pii_ssn", - "action": "updated", - "changes": { - "to_principals": ["analysts", "data_scientists", "new_team"], - "comment": "Updated: added new_team" - }, - "policy": { ... } -} -``` - -> **Note:** To change the UDF, tag matching, or scope, drop and recreate the policy. - -### `delete_abac_policy` - -Delete an ABAC policy. - -**Implementation:** `unity_catalog.abac_policies.delete_abac_policy` - -```python -from databricks_tools_core.unity_catalog import delete_abac_policy - -delete_abac_policy( - policy_name: str, - securable_type: str, - securable_fullname: str, -) -``` - -**Returns:** -```json -{ - "success": true, - "policy_name": "mask_pii_ssn", - "action": "deleted" -} -``` - ---- - -## Human-in-the-Loop Workflow Example - -Complete workflow using the implemented functions: - -```python -from databricks_tools_core.unity_catalog import ( - list_abac_policies, - query_column_tags, - get_masking_functions, - check_policy_quota, - preview_policy_changes, - create_abac_policy, - get_abac_policy, -) - -# Step 1: ANALYZE — discover current state -policies = list_abac_policies(securable_type="SCHEMA", securable_fullname="prod.finance") -tags = query_column_tags(catalog_filter="prod", table_name="customers") -udfs = get_masking_functions(catalog="prod", schema="finance") - -# Step 2: RECOMMEND — agent generates policy recommendations based on tags and UDFs - -# Step 3: CHECK QUOTA — ensure we can create a new policy -quota = check_policy_quota(securable_type="SCHEMA", securable_fullname="prod.finance") -assert quota["can_create"], f"Quota exceeded: {quota['current']}/{quota['max']}" - -# Step 4: PREVIEW — generate SQL for human review (no changes made) -preview = preview_policy_changes( - action="CREATE", - policy_name="mask_ssn_finance", - securable_type="SCHEMA", - securable_fullname="prod.finance", - policy_type="COLUMN_MASK", - function_name="prod.finance.mask_ssn", - to_principals=["analysts"], - tag_name="pii_type", - tag_value="ssn", -) -# → Present preview["preview"]["equivalent_sql"] to user - -# Step 5: APPROVE — human reviews preview and replies "approve" - -# Step 6: EXECUTE — create the policy -result = create_abac_policy( - policy_name="mask_ssn_finance", - policy_type="COLUMN_MASK", - securable_type="SCHEMA", - securable_fullname="prod.finance", - function_name="prod.finance.mask_ssn", - to_principals=["analysts"], - tag_name="pii_type", - tag_value="ssn", -) - -# Step 7: VERIFY — confirm policy was created -policy = get_abac_policy( - policy_name="mask_ssn_finance", - securable_type="SCHEMA", - securable_fullname="prod.finance", -) -``` - ---- - -## Error Handling - -| Error | Cause | Solution | -|-------|-------|----------| -| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Use `check_policy_quota` first; consolidate or use broader scope | -| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag config in UI | -| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first via `create_security_function`, use fully qualified name | -| `POLICY_ALREADY_EXISTS` | Duplicate policy name | Use different name or `delete_abac_policy` first | -| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | `grant_privileges` with MANAGE | -| `INVALID_SECURABLE_TYPE` | Wrong securable type string | Use `"CATALOG"`, `"SCHEMA"`, or `"TABLE"` | -| `PRINCIPAL_NOT_FOUND` | Principal group doesn't exist | Verify group exists on the workspace | - ---- - -## Implementation Map - -| MCP Tool | Implementation | Module | -|----------|---------------|--------| -| `list_abac_policies` | `list_abac_policies()` | `abac_policies` | -| `get_abac_policy` | `get_abac_policy()` | `abac_policies` | -| `get_table_policies` | `get_table_policies()` | `abac_policies` | -| `get_masking_functions` | `get_masking_functions()` | `abac_policies` | -| `check_policy_quota` | `check_policy_quota()` | `abac_policies` | -| `get_schema_info` | `get_schema()` | `schemas` | -| `get_catalog_info` | `get_catalog()` | `catalogs` | -| `get_column_tags_api` | `query_column_tags()` | `tags` | -| `list_table_policies_in_schema` | `list_tables()` + `get_table_policies()` | `tables` + `abac_policies` | -| `preview_policy_changes` | `preview_policy_changes()` | `abac_policies` | -| `create_abac_policy` | `create_abac_policy()` | `abac_policies` | -| `update_abac_policy` | `update_abac_policy()` | `abac_policies` | -| `delete_abac_policy` | `delete_abac_policy()` | `abac_policies` | diff --git a/.claude/skills/uc-fgac-governance/python-sdk-patterns.md b/.claude/skills/uc-fgac-governance/python-sdk-patterns.md deleted file mode 100644 index da8bd938..00000000 --- a/.claude/skills/uc-fgac-governance/python-sdk-patterns.md +++ /dev/null @@ -1,351 +0,0 @@ -# Python SDK Patterns for ABAC Policies - -Databricks Python SDK patterns for managing ABAC policies via `WorkspaceClient.policies`. - -**SDK Docs:** https://databricks-sdk-py.readthedocs.io/en/latest/ -**ABAC Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/policies - ---- - -## Setup - -```python -from databricks.sdk import WorkspaceClient - -w = WorkspaceClient() # Auto-detects credentials -``` - ---- - -## List Policies - -List ABAC policies on a securable (catalog, schema, or table). - -```python -# List all policies on a catalog -policies = w.policies.list_policies( - on_securable_type="CATALOG", - on_securable_fullname="my_catalog", - include_inherited=True, -) - -for p in policies: - print(f"{p.name}: {p.policy_type} on {p.on_securable_fullname}") - -# List policies on a schema -policies = w.policies.list_policies( - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - include_inherited=True, -) - -# List policies on a specific table -policies = w.policies.list_policies( - on_securable_type="TABLE", - on_securable_fullname="my_catalog.my_schema.my_table", - include_inherited=True, -) -``` - -### Filtering by Policy Type - -```python -policies = w.policies.list_policies( - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - include_inherited=True, -) - -column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] -row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] -``` - -### Extracting Policy Details - -```python -for p in policies: - p_dict = p.as_dict() if hasattr(p, "as_dict") else {} - print({ - "name": p_dict.get("name"), - "policy_type": p_dict.get("policy_type"), - "to_principals": p_dict.get("to_principals", []), - "except_principals": p_dict.get("except_principals", []), - "on_securable_type": p_dict.get("on_securable_type"), - "on_securable_fullname": p_dict.get("on_securable_fullname"), - "column_mask": p_dict.get("column_mask"), - "row_filter": p_dict.get("row_filter"), - "match_columns": p_dict.get("match_columns", []), - }) -``` - ---- - -## Get Policy - -Retrieve a specific policy by name and securable. - -```python -policy = w.policies.get_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", -) - -print(f"Policy: {policy.name}") -print(f"Type: {policy.policy_type}") -print(f"Principals: {policy.to_principals}") -print(f"Except: {policy.except_principals}") -``` - ---- - -## Create Policy - -### Column Mask Policy - -```python -from databricks.sdk.service.catalog import ( - CreatePolicy, - ColumnMask, - MatchColumns, -) - -policy = w.policies.create_policy( - name="mask_pii_ssn", - policy_type="COLUMN_MASK", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - for_securable_type="TABLE", - to_principals=["analysts", "data_scientists"], - except_principals=["gov_admin"], - comment="Mask SSN columns for analyst groups", - column_mask=ColumnMask( - function_name="my_catalog.my_schema.mask_ssn", - ), - match_columns=[ - MatchColumns( - tag_name="pii_type", - tag_value="ssn", - ) - ], -) -print(f"Created policy: {policy.name}") -``` - -### Row Filter Policy - -```python -from databricks.sdk.service.catalog import ( - CreatePolicy, - RowFilter, - MatchColumns, -) - -policy = w.policies.create_policy( - name="filter_eu_data", - policy_type="ROW_FILTER", - on_securable_type="CATALOG", - on_securable_fullname="my_catalog", - for_securable_type="TABLE", - to_principals=["us_team"], - except_principals=["gov_admin"], - comment="Filter EU rows for US team", - row_filter=RowFilter( - function_name="my_catalog.my_schema.is_not_eu_region", - ), - match_columns=[ - MatchColumns( - tag_name="region", - tag_value="eu", - ) - ], -) -print(f"Created policy: {policy.name}") -``` - -### Important: Always Include `gov_admin` - -Every policy **MUST** include `"gov_admin"` in `except_principals`: - -```python -# CORRECT -except_principals=["gov_admin"] - -# CORRECT - additional admin groups -except_principals=["gov_admin", "platform_admins"] - -# WRONG - missing gov_admin -except_principals=["platform_admins"] # gov_admin must be included! -``` - ---- - -## Update Policy - -Update principals or comment on an existing policy. - -```python -updated = w.policies.update_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - to_principals=["analysts", "data_scientists", "new_team"], - except_principals=["gov_admin", "senior_admins"], - comment="Updated: added new_team to masked principals", -) -print(f"Updated policy: {updated.name}") -``` - -> **Note:** To change the UDF, tag matching, or scope, you must drop and recreate the policy. `update_policy` only modifies principals and comment. - ---- - -## Delete Policy - -```python -w.policies.delete_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", -) -print("Policy deleted") -``` - ---- - -## Error Handling - -```python -from databricks.sdk.errors import NotFound, PermissionDenied, BadRequest - -try: - policy = w.policies.get_policy( - name="nonexistent_policy", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - ) -except NotFound: - print("Policy not found") -except PermissionDenied: - print("Insufficient permissions - need MANAGE on securable") -except BadRequest as e: - print(f"Invalid request: {e}") -``` - ---- - -## Common Patterns - -### List All Policies in a Catalog with Counts - -```python -def get_policy_summary(w, catalog: str): - """Get a summary of all ABAC policies in a catalog.""" - policies = list(w.policies.list_policies( - on_securable_type="CATALOG", - on_securable_fullname=catalog, - include_inherited=True, - )) - - column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] - row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] - - return { - "total": len(policies), - "column_masks": len(column_masks), - "row_filters": len(row_filters), - "policies": [p.as_dict() for p in policies], - } -``` - -### Check Policy Quotas Before Creating - -```python -def check_quota(w, securable_type: str, securable_fullname: str): - """Check if policy quota allows creating a new policy.""" - quotas = {"CATALOG": 10, "SCHEMA": 10, "TABLE": 5} - max_policies = quotas.get(securable_type, 10) - - existing = list(w.policies.list_policies( - on_securable_type=securable_type, - on_securable_fullname=securable_fullname, - )) - - # Count only direct policies (not inherited) - direct = [p for p in existing - if p.on_securable_fullname == securable_fullname] - - return { - "current": len(direct), - "max": max_policies, - "can_create": len(direct) < max_policies, - } -``` - -### Fetch Policies Without Cache (Direct API) - -```python -def fetch_policies_direct( - w, - catalog: str, - schema: str = None, - table: str = None, -): - """Fetch policies directly from REST API.""" - if table and schema: - securable_type = "TABLE" - securable_name = f"{catalog}.{schema}.{table}" - elif schema: - securable_type = "SCHEMA" - securable_name = f"{catalog}.{schema}" - else: - securable_type = "CATALOG" - securable_name = catalog - - policies = w.policies.list_policies( - on_securable_type=securable_type, - on_securable_fullname=securable_name, - include_inherited=True, - ) - - results = [] - for p in policies: - p_dict = p.as_dict() if hasattr(p, "as_dict") else {} - results.append({ - "name": p_dict.get("name"), - "policy_type": p_dict.get("policy_type"), - "to_principals": p_dict.get("to_principals", []), - "except_principals": p_dict.get("except_principals", []), - "on_securable_type": p_dict.get("on_securable_type"), - "on_securable_fullname": p_dict.get("on_securable_fullname"), - "column_mask": p_dict.get("column_mask"), - "row_filter": p_dict.get("row_filter"), - "match_columns": p_dict.get("match_columns", []), - }) - return results -``` - ---- - -## Async Usage (FastAPI, etc.) - -The Databricks SDK is synchronous. In async applications, wrap calls with `asyncio.to_thread()`: - -```python -import asyncio - -async def list_policies_async(w, catalog: str): - return await asyncio.to_thread( - lambda: list(w.policies.list_policies( - on_securable_type="CATALOG", - on_securable_fullname=catalog, - include_inherited=True, - )) - ) - -async def create_policy_async(w, **kwargs): - return await asyncio.to_thread( - w.policies.create_policy, - **kwargs, - ) -``` diff --git a/.claude/skills/uc-fgac-governance/sql-generation.md b/.claude/skills/uc-fgac-governance/sql-generation.md deleted file mode 100644 index c0cb46f1..00000000 --- a/.claude/skills/uc-fgac-governance/sql-generation.md +++ /dev/null @@ -1,356 +0,0 @@ -# SQL Generation Reference - -Pure SQL patterns for Unity Catalog ABAC governance operations. All SQL follows Databricks syntax. - ---- - -## Tag Operations - -### SET TAG on Column - -```sql --- Legacy syntax (all versions) -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); - --- Modern syntax (DBR 16.1+) -SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; -``` - -### SET TAG on Table - -```sql --- Legacy syntax -ALTER TABLE catalog.schema.table -SET TAGS ('data_classification' = 'confidential'); - --- Modern syntax -SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; -``` - -### SET TAG on Schema / Catalog - -```sql -SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; -SET TAG ON CATALOG my_catalog 'department' = 'finance'; -``` - -### UNSET TAG - -```sql --- Column (legacy) -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name UNSET TAGS ('pii_type'); - --- Column (modern) -UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; - --- Table (legacy) -ALTER TABLE catalog.schema.table -UNSET TAGS ('data_classification'); - --- Table (modern) -UNSET TAG ON TABLE catalog.schema.table 'data_classification'; -``` - -**Docs:** -- SET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-set-tag.html -- UNSET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-unset-tag.html - ---- - -## Tag Discovery Queries - -### Query Column Tags - -```sql -SELECT tag_name, tag_value, column_name -FROM system.information_schema.column_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; -``` - -### Query Table Tags - -```sql -SELECT tag_name, tag_value -FROM system.information_schema.table_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; -``` - -### All Tag Assignments in a Catalog - -```sql --- Table-level tags -SELECT 'TABLE' as securable_type, - CONCAT(catalog_name, '.', schema_name, '.', table_name) as securable_name, - tag_name as tag_key, - tag_value -FROM system.information_schema.table_tags -WHERE catalog_name = 'my_catalog'; - --- Column-level tags -SELECT 'COLUMN' as securable_type, - CONCAT(catalog_name, '.', schema_name, '.', table_name, '.', column_name) as securable_name, - tag_name as tag_key, - tag_value -FROM system.information_schema.column_tags -WHERE catalog_name = 'my_catalog'; -``` - -**Docs:** -- information_schema.column_tags: https://docs.databricks.com/sql/language-manual/information-schema/column_tags.html -- information_schema.table_tags: https://docs.databricks.com/sql/language-manual/information-schema/table_tags.html - ---- - -## Masking UDF Creation - -All masking UDFs must be `DETERMINISTIC` with simple `CASE` statements. No external calls or nested UDFs. - -### Generic Masking Strategies - -```sql --- Full mask: replaces all characters with * -CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Full masking - replaces all characters with *' -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE REPEAT('*', LENGTH(value)) -END; - --- Partial mask: show last 4 characters -CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Partial masking - shows last 4 characters' -RETURN CASE - WHEN value IS NULL THEN NULL - WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) - ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) -END; - --- Hash: SHA256 with version prefix -CREATE OR REPLACE FUNCTION catalog.schema.mask_hash(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Hash masking - SHA256 with version prefix' -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE CONCAT('HASH_v1_', SUBSTRING(SHA2(CONCAT(value, ':v1'), 256), 1, 16)) -END; - --- Redact: replace with [REDACTED] -CREATE OR REPLACE FUNCTION catalog.schema.mask_redact(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Redaction - replaces value with [REDACTED]' -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE '[REDACTED]' -END; - --- Nullify: always returns NULL -CREATE OR REPLACE FUNCTION catalog.schema.mask_nullify(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Nullify - always returns NULL' -RETURN NULL; -``` - -### Specialized Masking UDFs - -```sql --- SSN: ***-**-XXXX -CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks SSN showing only last 4 digits in XXX-XX-XXXX format' -RETURN CASE - WHEN ssn IS NULL THEN NULL - WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 - THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) - ELSE '***-**-****' -END; - --- Email: j***@example.com -CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks email showing first char and domain' -RETURN CASE - WHEN email IS NULL THEN NULL - WHEN INSTR(email, '@') > 1 - THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) - ELSE '***@***.***' -END; - --- Credit card: ****-****-****-1234 -CREATE OR REPLACE FUNCTION catalog.schema.mask_credit_card(card_number STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks credit card showing only last 4 digits' -RETURN CASE - WHEN card_number IS NULL THEN NULL - WHEN LENGTH(REGEXP_REPLACE(card_number, '[^0-9]', '')) >= 4 - THEN CONCAT('****-****-****-', RIGHT(REGEXP_REPLACE(card_number, '[^0-9]', ''), 4)) - ELSE '****-****-****-****' -END; -``` - -### Row Filter UDFs - -Row filter UDFs return `BOOLEAN`: `TRUE` to include, `FALSE` to exclude. - -```sql --- Region-based filter: hide EU rows -CREATE OR REPLACE FUNCTION catalog.schema.is_not_eu_region(region_value STRING) -RETURNS BOOLEAN -DETERMINISTIC -COMMENT 'Row filter - returns FALSE for EU regions' -RETURN CASE - WHEN region_value IS NULL THEN TRUE - WHEN LOWER(region_value) LIKE '%eu%' THEN FALSE - WHEN LOWER(region_value) LIKE '%europe%' THEN FALSE - ELSE TRUE -END; - --- Array membership filter -CREATE OR REPLACE FUNCTION catalog.schema.is_in_allowed_values( - row_value STRING, - allowed_values ARRAY -) -RETURNS BOOLEAN -DETERMINISTIC -COMMENT 'Row filter based on array membership' -RETURN CASE - WHEN allowed_values IS NULL THEN FALSE - WHEN ARRAY_CONTAINS(TRANSFORM(allowed_values, x -> LOWER(x)), LOWER(row_value)) THEN TRUE - ELSE FALSE -END; -``` - -**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices - ---- - -## Policy Creation - -### Column Mask Policy - -```sql -CREATE OR REPLACE POLICY mask_pii_ssn -ON SCHEMA catalog.schema -COMMENT 'Mask SSN columns for analysts' -COLUMN MASK catalog.schema.mask_ssn -TO `analysts`, `data_scientists` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; -``` - -### Row Filter Policy - -```sql -CREATE OR REPLACE POLICY filter_eu_data -ON CATALOG my_catalog -COMMENT 'Filter EU rows for US team' -ROW FILTER catalog.schema.is_not_eu_region -TO `us_team` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col -USING COLUMNS (filter_col); -``` - -### Policy with Tag Key Only (any value) - -```sql --- Match any column with tag 'pii_type' regardless of value -CREATE OR REPLACE POLICY mask_all_pii -ON SCHEMA catalog.schema -COLUMN MASK catalog.schema.mask_full -TO `external_users` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTag('pii_type') AS masked_col -ON COLUMN masked_col; -``` - -### Drop Policy - -```sql -DROP POLICY mask_pii_ssn ON SCHEMA catalog.schema; -DROP POLICY filter_eu_data ON CATALOG my_catalog; -``` - -> **Note:** There is no `ALTER POLICY`. To modify a policy, drop and recreate it. - ---- - -## Discovery Queries - -```sql --- List catalogs -SHOW CATALOGS; - --- List schemas in a catalog -SHOW SCHEMAS IN my_catalog; - --- List tables in a schema -SHOW TABLES IN my_catalog.my_schema; - --- Describe table with extended metadata -DESCRIBE TABLE EXTENDED my_catalog.my_schema.my_table; - --- List UDFs in a schema -SHOW USER FUNCTIONS IN my_catalog.my_schema; - --- Describe a UDF -DESCRIBE FUNCTION EXTENDED my_catalog.my_schema.mask_ssn; - --- Sample column values -SELECT DISTINCT column_name -FROM my_catalog.my_schema.my_table -LIMIT 20; -``` - ---- - -## Enums Reference - -### PII Types (governed tag values) - -`ssn`, `email`, `phone`, `credit_card`, `date_of_birth`, `address`, `name`, `ip_address`, `national_id`, `medical_record`, `generic` - -### Masking Strategies - -| Strategy | Description | -|----------|-------------| -| `full_mask` | Replace all characters with `*` | -| `partial_mask` | Show last 4 characters | -| `hash` | SHA256 with version prefix | -| `redact` | Replace with `[REDACTED]` | -| `nullify` | Always return NULL | -| `custom` | User-supplied SQL (requires manual UDF) | - -### Policy Scopes - -| Scope | Description | -|-------|-------------| -| `CATALOG` | Policy applies to all tables in catalog | -| `SCHEMA` | Policy applies to all tables in schema | -| `TABLE` | Policy applies to a single table | - -### Tag Syntax Variants - -| Variant | Availability | Example | -|---------|-------------|---------| -| `LEGACY` | All versions | `ALTER TABLE t ALTER COLUMN c SET TAGS ('k'='v')` | -| `MODERN` | DBR 16.1+ | `SET TAG ON COLUMN t.c 'k' = 'v'` | diff --git a/.gitignore b/.gitignore index 385994fa..e9f0ce95 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,6 @@ databricks-tools-core/tests/integration/pdf/generated_pdf/ # Python cache __pycache__/ windows_info.txt + +# Local review notes +COMMIT_REVIEW.md diff --git a/.test/skills/_routing/ground_truth.yaml b/.test/skills/_routing/ground_truth.yaml index 67b6708e..f7fba36c 100644 --- a/.test/skills/_routing/ground_truth.yaml +++ b/.test/skills/_routing/ground_truth.yaml @@ -80,6 +80,73 @@ test_cases: difficulty: "easy" reasoning: "Mentions 'genai.evaluate' - MLflow evaluation trigger" + # Single-skill routing - UC FGAC Governance + - id: "routing_fgac_001" + inputs: + prompt: "Create an FGAC column mask policy for SSN columns" + expectations: + expected_skills: ["databricks-unity-catalog"] + is_multi_skill: false + metadata: + category: "single_skill" + difficulty: "easy" + reasoning: "Mentions 'FGAC' and 'column mask policy' - UC FGAC governance" + + - id: "routing_fgac_002" + inputs: + prompt: "How do I apply governed tags to columns for PII classification?" + expectations: + expected_skills: ["databricks-unity-catalog"] + is_multi_skill: false + metadata: + category: "single_skill" + difficulty: "easy" + reasoning: "Mentions 'governed tags' and 'PII classification' - UC FGAC governance" + + - id: "routing_fgac_003" + inputs: + prompt: "Write a masking UDF that hides email addresses and bind it to a policy" + expectations: + expected_skills: ["databricks-unity-catalog"] + is_multi_skill: false + metadata: + category: "single_skill" + difficulty: "medium" + reasoning: "Mentions 'masking UDF' and 'policy' - UC FGAC governance" + + - id: "routing_fgac_004" + inputs: + prompt: "List all FGAC policies on my catalog using the Python SDK" + expectations: + expected_skills: ["databricks-unity-catalog"] + is_multi_skill: false + metadata: + category: "single_skill" + difficulty: "medium" + reasoning: "Mentions 'FGAC policies' - UC FGAC governance over generic SDK skill" + + - id: "routing_fgac_005" + inputs: + prompt: "Create a row filter policy to hide EU data from the US team" + expectations: + expected_skills: ["databricks-unity-catalog"] + is_multi_skill: false + metadata: + category: "single_skill" + difficulty: "easy" + reasoning: "Mentions 'row filter policy' - UC FGAC governance" + + - id: "routing_fgac_006" + inputs: + prompt: "How do I use CREATE POLICY with hasTagValue to mask credit card columns?" + expectations: + expected_skills: ["databricks-unity-catalog"] + is_multi_skill: false + metadata: + category: "single_skill" + difficulty: "medium" + reasoning: "Mentions 'CREATE POLICY' and 'hasTagValue' - UC FGAC governance" + # Multi-skill routing - id: "routing_multi_001" inputs: @@ -120,6 +187,17 @@ test_cases: difficulty: "hard" reasoning: "Combines 'medallion' (SDP) with 'Genie space' (agent-bricks)" + - id: "routing_multi_004" + inputs: + prompt: "Set up FGAC policies for PII masking and query the audit logs to verify who accessed the masked columns" + expectations: + expected_skills: ["databricks-unity-catalog"] + is_multi_skill: false + metadata: + category: "single_skill" + difficulty: "hard" + reasoning: "Both FGAC policies and audit logs are in databricks-unity-catalog" + # No skill match - id: "routing_no_match_001" inputs: diff --git a/DEV_CHANGELOG.md b/DEV_CHANGELOG.md deleted file mode 100644 index dd9f172e..00000000 --- a/DEV_CHANGELOG.md +++ /dev/null @@ -1,126 +0,0 @@ -# Dev Changelog — Unity Catalog ABAC Policy Governance - -**Branch**: `feature/uc_abac_skills` -**Date**: 2026-02-09 -**Author**: sreeramreddy.thoom -**Reference**: UCABAC repo (`/Users/sreeramreddy.thoom/Documents/ClaudeCodeRepo/UCABAC`) - ---- - -## Overview - -Adds a new **`uc-abac-governance`** Claude Code skill to the Databricks AI Dev Kit, providing comprehensive guidance for managing Attribute-Based Access Control (ABAC) policies in Unity Catalog. Also adds Python SDK examples for ABAC policy operations. - -The skill content is derived from the UCABAC project — a production ABAC governance agent with multi-agent architecture, MCP server, and React frontend. - -**Excluded:** Policy drift detection is intentionally omitted from this skill. - ---- - -## New Files - -### Skill: `uc-abac-governance` - -| File | Description | -|------|-------------| -| `databricks-skills/uc-abac-governance/SKILL.md` | Main skill: ABAC overview, governed tags, tag assignments, masking UDFs, CREATE/DROP POLICY syntax, human-in-the-loop workflow, policy quotas, invalid SQL warnings, common errors | -| `databricks-skills/uc-abac-governance/sql-generation.md` | SQL patterns: SET/UNSET TAG (legacy + modern), CREATE FUNCTION for masking UDFs (full, partial, hash, redact, nullify, SSN, email, credit card), row filter UDFs, CREATE/DROP POLICY, tag discovery queries, enums reference | -| `databricks-skills/uc-abac-governance/python-sdk-patterns.md` | Python SDK: `w.policies.list_policies()`, `create_policy()`, `get_policy()`, `update_policy()`, `delete_policy()`, error handling, quota checking, async patterns | -| `databricks-skills/uc-abac-governance/mcp-tools-reference.md` | MCP tool reference: 12 tools — `list_abac_policies`, `get_abac_policy`, `create_abac_policy`, `update_abac_policy`, `delete_abac_policy`, `preview_policy_changes`, `get_table_policies`, `get_column_tags_api`, `get_masking_functions`, `get_schema_info`, `get_catalog_info`, `list_table_policies_in_schema` | - -### Installed Skills (mirrors of above) - -| File | Description | -|------|-------------| -| `.claude/skills/uc-abac-governance/SKILL.md` | Installed copy | -| `.claude/skills/uc-abac-governance/sql-generation.md` | Installed copy | -| `.claude/skills/uc-abac-governance/python-sdk-patterns.md` | Installed copy | -| `.claude/skills/uc-abac-governance/mcp-tools-reference.md` | Installed copy | - -### SDK Example - -| File | Description | -|------|-------------| -| `databricks-skills/databricks-python-sdk/examples/6-abac-policies.py` | Python SDK example: list, create, get, update, delete ABAC policies with error handling | - ---- - -## Modified Files - -| File | Change | -|------|--------| -| `databricks-skills/install_skills.sh` | Added `uc-abac-governance` to `DATABRICKS_SKILLS`, `get_skill_description()`, and `get_skill_extra_files()`. Updated `databricks-python-sdk` extra files to include `examples/6-abac-policies.py`. | -| `databricks-skills/databricks-python-sdk/SKILL.md` | Added ABAC Policies section with SDK examples for list, get, create, update, delete operations | - ---- - -## Key Design Decisions - -### 1. Separate Skill vs. Extending `databricks-unity-catalog` - -Created a **new dedicated skill** (`uc-abac-governance`) rather than extending the existing `databricks-unity-catalog` skill because: -- ABAC governance is a distinct, complex domain with its own workflow -- The existing UC skill focuses on system tables and volumes — different audience -- Separate skill allows targeted installation (`install_skills.sh uc-abac-governance`) -- Content volume warrants its own skill (4 reference files) - -### 2. SQL Generation + SDK Dual Approach - -The skill documents both approaches: -- **SQL generation**: `CREATE POLICY` / `DROP POLICY` syntax for SQL-based workflows -- **Python SDK**: `w.policies.*` methods for programmatic policy management -- MCP tool wrappers that combine both approaches - -### 3. Human-in-the-Loop Workflow - -The skill emphasizes a 6-step governance workflow matching the UCABAC agent pattern: -1. **Analyze** — scan table structure, existing tags, current policies -2. **Recommend** — generate policy recommendations based on tags -3. **Preview** — show proposed changes (SQL equivalent + impact) -4. **Approve** — human reviews and approves/rejects -5. **Execute** — create ABAC policies via SDK -6. **Verify** — confirm policies are active and masking works - -### 4. `gov_admin` Safety Net - -All examples enforce the `gov_admin` exception pattern — every ABAC policy must exclude the administrator group from masking/filtering. - ---- - -## Source Mapping (UCABAC → ai-dev-kit) - -| UCABAC Source | Skill Target | -|--------------|-------------| -| `ucabac/skills/governance-policy/SKILL.md` | `SKILL.md` | -| `ucabac/sql_gen/policy_skills.py` | `sql-generation.md` | -| `ucabac/sql_gen/tag_skills.py` | `sql-generation.md` | -| `ucabac/sql_gen/udf_skills.py` | `sql-generation.md` | -| `ucabac/sql_gen/_base.py` | `sql-generation.md` (enums) | -| `ucabac/mcp/policy_api_tools.py` | `mcp-tools-reference.md`, `python-sdk-patterns.md` | -| `ucabac/services/unity_catalog_client.py` | `python-sdk-patterns.md` | -| `ucabac/services/abac_policy_sync.py` | `python-sdk-patterns.md` | -| `ucabac/core/policy_manager.py` | `SKILL.md` (workflow) | -| `ucabac/skills/governance-policy/references/SQL_GEN.md` | `sql-generation.md` | -| `ucabac/skills/governance-policy/references/MCP_TOOLS.md` | `mcp-tools-reference.md` | - ---- - -## Dependencies - -- Databricks Runtime 16.1+ (for modern SET TAG syntax) or any version (for legacy syntax) -- Unity Catalog enabled workspace -- `databricks-sdk` (for `w.policies.*` API) -- MANAGE permission on target securables -- Governed tags created via Databricks UI (cannot be created via SQL) - ---- - -## Testing Checklist - -- [ ] `install_skills.sh --list` shows `uc-abac-governance` with correct description -- [ ] `install_skills.sh uc-abac-governance --local` installs all 4 files -- [ ] SKILL.md frontmatter has valid `name` and `description` -- [ ] SQL examples match Databricks ABAC documentation syntax -- [ ] Python SDK example parses without syntax errors -- [ ] No references to invalid SQL (SHOW POLICIES, DESCRIBE POLICY, etc.) -- [ ] All policies include `gov_admin` in EXCEPT clause diff --git a/PLAN_UC_FGAC_SKILLS.md b/PLAN_UC_FGAC_SKILLS.md deleted file mode 100644 index 5fa6e6b2..00000000 --- a/PLAN_UC_FGAC_SKILLS.md +++ /dev/null @@ -1,146 +0,0 @@ -# Plan: Add Unity Catalog ABAC Policy Governance Skill - -## Context - -The `abac_ai_dev_kit` (forked from `databricks-solutions/ai-dev-kit`) provides Claude Code skills for Databricks. The existing `databricks-unity-catalog` skill covers system tables and volumes but has **no ABAC policy governance content**. - -The UCABAC companion repo (`/Users/sreeramreddy.thoom/Documents/ClaudeCodeRepo/UCABAC`) implements a full ABAC governance agent with: -- Python SDK code for ABAC policy CRUD (`w.policies.list_policies/create_policy/update_policy/delete_policy`) -- SQL generation for `CREATE POLICY`, `DROP POLICY`, `SET TAG`, masking UDFs -- MCP tools for policy management (12 tools) -- Human-in-the-loop governance workflow (Analyze > Recommend > Preview > Approve > Execute > Verify) -- Policy conflict detection, drift scanning, compliance reporting -- Multi-agent architecture (Supervisor + 4 specialist agents) - -**Goal**: Extract the ABAC governance knowledge from UCABAC into a new skill (`uc-abac-governance`) in the ai-dev-kit, add Python SDK examples, and produce a `DEV_CHANGELOG.md`. - ---- - -## Architecture Overview (UCABAC) - -``` -UCABAC/ -├── ucabac/ # Main package (v0.2.0) -│ ├── core/policy_manager.py # GovernancePolicyManager facade -│ ├── sql_gen/ # Pure SQL generation (no state) -│ │ ├── _base.py # Enums: PIIType, MaskingStrategy, PolicyScope -│ │ ├── tag_skills.py # SET/UNSET TAG SQL -│ │ ├── udf_skills.py # CREATE FUNCTION (masking UDFs) -│ │ ├── policy_skills.py # CREATE/DROP POLICY SQL -│ │ ├── discovery_skills.py # SHOW/DESCRIBE SQL -│ │ └── compliance_skills.py # Compliance query SQL -│ ├── mcp/ -│ │ ├── policy_api_tools.py # 12 MCP tools for ABAC CRUD via SDK -│ │ ├── server.py # MCP server (40+ tools) -│ │ └── sql_executor.py # SQL Warehouse execution -│ ├── services/ -│ │ ├── unity_catalog_client.py # UC client with REST API wrappers -│ │ ├── abac_policy_sync.py # Sync policies to Postgres cache -│ │ ├── drift_detector.py # Policy drift detection -│ │ └── policy_conflict_checker.py # Conflict validation -│ ├── agents/ # Multi-agent system (Claude-powered) -│ │ ├── supervisor_agent.py # Task decomposition + delegation -│ │ ├── governance_agent.py # Governance policy specialist -│ │ ├── pii_agent.py # PII detection specialist -│ │ ├── compliance_agent.py # Compliance reporting specialist -│ │ └── query_agent.py # Query assistant specialist -│ └── database/ # Lakebase Postgres persistence -├── app/api/ # FastAPI REST + SSE backend -├── frontend/ # React + TypeScript SPA -├── skills/governance-policy/ # Existing skill docs (reference) -└── tests/ # 283 unit tests -``` - ---- - -## Changes to Make - -### 1. New Skill: `databricks-skills/uc-abac-governance/` - -Create a new skill directory following the TEMPLATE pattern: - -| File | Content Source | -|------|--------------| -| `SKILL.md` | ABAC overview, governed tags, tag assignments, masking UDFs, CREATE POLICY syntax, human-in-the-loop workflow, policy quotas, invalid SQL warnings, common errors. Derived from `ucabac/skills/governance-policy/SKILL.md` | -| `sql-generation.md` | Pure SQL patterns: SET/UNSET TAG, CREATE FUNCTION, CREATE/DROP POLICY, discovery queries. Derived from `ucabac/sql_gen/` modules | -| `python-sdk-patterns.md` | Databricks Python SDK examples: `w.policies.list_policies()`, `create_policy()`, `update_policy()`, `delete_policy()`, `get_policy()`. Derived from `ucabac/mcp/policy_api_tools.py` and `ucabac/services/` | -| `mcp-tools-reference.md` | MCP tool reference for 12 policy API tools: list, get, create, update, delete, preview. Derived from `ucabac/mcp/policy_api_tools.py` | - -### 2. Install into `.claude/skills/uc-abac-governance/` - -Copy the 4 skill files to `.claude/skills/uc-abac-governance/` (matching how other skills are installed). - -### 3. Update `databricks-skills/install_skills.sh` - -- Add `uc-abac-governance` to `DATABRICKS_SKILLS` list (line 45) -- Add `"uc-abac-governance") echo "ABAC policy governance - tags, UDFs, column masks, row filters"` in `get_skill_description()` -- Add `"uc-abac-governance") echo "sql-generation.md python-sdk-patterns.md mcp-tools-reference.md"` in `get_skill_extra_files()` - -### 4. New SDK Example: `databricks-skills/databricks-python-sdk/examples/6-abac-policies.py` - -Following the pattern of existing examples (1-authentication.py through 5-serving-and-vector-search.py), add a new example demonstrating ABAC policy operations: -- List policies on catalog/schema/table -- Create column mask policy with tag matching -- Create row filter policy -- Update policy principals -- Delete policy -- Preview policy changes before execution - -### 5. Update `databricks-python-sdk` Skill - -- Add ABAC policies section to `databricks-skills/databricks-python-sdk/SKILL.md` -- Update `get_skill_extra_files()` to include `examples/6-abac-policies.py` - -### 6. Create `DEV_CHANGELOG.md` in Project Root - ---- - -## Key Patterns to Preserve - -### SQL That Does NOT Exist in Databricks -- `SHOW POLICIES` / `DESCRIBE POLICY` -- use REST API instead -- `ALTER POLICY` -- drop and recreate -- `information_schema.column_masks` / `.row_filters` -- for old-style masking, NOT ABAC -- `ALTER USER SET ATTRIBUTES` / `SHOW USER ATTRIBUTES` -- use SCIM API - -### Automatic `gov_admin` Exception -Every ABAC policy MUST include `EXCEPT \`gov_admin\`` to protect administrator access. Enforced at 3 levels: system prompt, tool-level injection, SQL interception. - -### Policy Quotas -- Max 10 policies per catalog -- Max 10 policies per schema -- Max 5 policies per table - -### Human-in-the-Loop Workflow -``` -ANALYZE → RECOMMEND → PREVIEW → APPROVE → EXECUTE → VERIFY -``` - ---- - -## Files to Create/Modify - -| Action | File | -|--------|------| -| CREATE | `databricks-skills/uc-abac-governance/SKILL.md` | -| CREATE | `databricks-skills/uc-abac-governance/sql-generation.md` | -| CREATE | `databricks-skills/uc-abac-governance/python-sdk-patterns.md` | -| CREATE | `databricks-skills/uc-abac-governance/mcp-tools-reference.md` | -| CREATE | `.claude/skills/uc-abac-governance/SKILL.md` | -| CREATE | `.claude/skills/uc-abac-governance/sql-generation.md` | -| CREATE | `.claude/skills/uc-abac-governance/python-sdk-patterns.md` | -| CREATE | `.claude/skills/uc-abac-governance/mcp-tools-reference.md` | -| MODIFY | `databricks-skills/install_skills.sh` | -| CREATE | `databricks-skills/databricks-python-sdk/examples/6-abac-policies.py` | -| MODIFY | `databricks-skills/databricks-python-sdk/SKILL.md` | -| CREATE | `DEV_CHANGELOG.md` | - ---- - -## Verification - -1. `./databricks-skills/install_skills.sh --list` -- confirm `uc-abac-governance` appears with description -2. `./databricks-skills/install_skills.sh uc-abac-governance --local` -- confirm all 4 files install to `.claude/skills/` -3. `python -c "import ast; ast.parse(open('databricks-skills/databricks-python-sdk/examples/6-abac-policies.py').read())"` -- syntax check -4. Verify SKILL.md frontmatter matches naming conventions -5. Cross-reference SQL patterns against Databricks ABAC docs diff --git a/databricks-mcp-server/databricks_mcp_server/server.py b/databricks-mcp-server/databricks_mcp_server/server.py index 1c2a591a..90369033 100644 --- a/databricks-mcp-server/databricks_mcp_server/server.py +++ b/databricks-mcp-server/databricks_mcp_server/server.py @@ -33,4 +33,5 @@ lakebase, lakebase_autoscale, user, + fgac_policies, ) diff --git a/databricks-skills/databricks-python-sdk/SKILL.md b/databricks-skills/databricks-python-sdk/SKILL.md index b0ffaf0b..4b01fb17 100644 --- a/databricks-skills/databricks-python-sdk/SKILL.md +++ b/databricks-skills/databricks-python-sdk/SKILL.md @@ -414,7 +414,7 @@ for doc in results.result.data_array: print(doc) ``` -### ABAC Policies +### FGAC Policies **Doc:** https://docs.databricks.com/data-governance/unity-catalog/abac/policies ```python @@ -434,26 +434,100 @@ policy = w.policies.get_policy( ) # Create column mask policy (ALWAYS include gov_admin in except_principals) -from databricks.sdk.service.catalog import ColumnMask, MatchColumns -created = w.policies.create_policy( - name="mask_pii_ssn", - policy_type="COLUMN_MASK", - on_securable_type="SCHEMA", +# on_securable_type sets scope: CATALOG (all tables), SCHEMA (schema tables), TABLE (single table) +# for_securable_type is always TABLE +from databricks.sdk.service.catalog import ( + ColumnMaskOptions, MatchColumn, PolicyInfo, PolicyType, RowFilterOptions, SecurableType, +) + +# Catalog-level column mask — applies to all tables in catalog +policy_info = PolicyInfo( + name="mask_pii_catalog", + policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, + on_securable_type=SecurableType.CATALOG, + on_securable_fullname="my_catalog", + for_securable_type=SecurableType.TABLE, + to_principals=["analysts"], + except_principals=["gov_admin"], + column_mask=ColumnMaskOptions( + function_name="my_catalog.my_schema.mask_ssn", + on_column="masked_col", + ), + match_columns=[ + MatchColumn(alias="masked_col", condition="hasTagValue('pii_type', 'ssn')"), + ], +) +created = w.policies.create_policy(policy_info=policy_info) + +# Schema-level column mask — applies to all tables in schema +policy_info = PolicyInfo( + name="mask_pii_schema", + policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, + on_securable_type=SecurableType.SCHEMA, on_securable_fullname="my_catalog.my_schema", - for_securable_type="TABLE", + for_securable_type=SecurableType.TABLE, + to_principals=["analysts"], + except_principals=["gov_admin"], + column_mask=ColumnMaskOptions( + function_name="my_catalog.my_schema.mask_ssn", + on_column="masked_col", + ), + match_columns=[ + MatchColumn(alias="masked_col", condition="hasTagValue('pii_type', 'ssn')"), + ], +) +created = w.policies.create_policy(policy_info=policy_info) + +# Table-level column mask — applies to a single table +policy_info = PolicyInfo( + name="mask_pii_table", + policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, + on_securable_type=SecurableType.TABLE, + on_securable_fullname="my_catalog.my_schema.my_table", + for_securable_type=SecurableType.TABLE, to_principals=["analysts"], except_principals=["gov_admin"], - column_mask=ColumnMask(function_name="my_catalog.my_schema.mask_ssn"), - match_columns=[MatchColumns(tag_name="pii_type", tag_value="ssn")], + column_mask=ColumnMaskOptions( + function_name="my_catalog.my_schema.mask_ssn", + on_column="masked_col", + ), + match_columns=[ + MatchColumn(alias="masked_col", condition="hasTagValue('pii_type', 'ssn')"), + ], +) +created = w.policies.create_policy(policy_info=policy_info) + +# Row filter — same three levels apply (CATALOG, SCHEMA, TABLE) +policy_info = PolicyInfo( + name="filter_eu_data", + policy_type=PolicyType.POLICY_TYPE_ROW_FILTER, + on_securable_type=SecurableType.SCHEMA, + on_securable_fullname="my_catalog.my_schema", + for_securable_type=SecurableType.TABLE, + to_principals=["us_team"], + except_principals=["gov_admin"], + row_filter=RowFilterOptions( + function_name="my_catalog.my_schema.is_not_eu_region", + ), + match_columns=[ + MatchColumn(alias="filter_col", condition="hasTagValue('region', 'eu')"), + ], ) +created = w.policies.create_policy(policy_info=policy_info) -# Update policy principals +# Update policy principals (only principals and comment can be updated) +update_info = PolicyInfo( + to_principals=["analysts", "new_team"], + except_principals=["gov_admin"], + for_securable_type=SecurableType.TABLE, + policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, +) w.policies.update_policy( name="mask_pii_ssn", on_securable_type="SCHEMA", on_securable_fullname="my_catalog.my_schema", - to_principals=["analysts", "new_team"], - except_principals=["gov_admin"], + policy_info=update_info, + update_mask="to_principals,except_principals", ) # Delete policy diff --git a/databricks-skills/uc-fgac-governance/SKILL.md b/databricks-skills/uc-fgac-governance/SKILL.md deleted file mode 100644 index 6efabdcc..00000000 --- a/databricks-skills/uc-fgac-governance/SKILL.md +++ /dev/null @@ -1,294 +0,0 @@ ---- -name: uc-abac-governance -description: "Unity Catalog ABAC policy governance - governed tags, masking UDFs, column masks, row filters, and human-in-the-loop policy management." ---- - -# Unity Catalog ABAC Policy Governance - -Guidance for Attribute-Based Access Control (ABAC) policies in Databricks Unity Catalog. Covers governed tags, tag assignments, masking UDFs, CREATE/DROP POLICY syntax, and the human-in-the-loop governance workflow. - -**Databricks Docs:** -- ABAC overview: https://docs.databricks.com/data-governance/unity-catalog/abac/ -- ABAC policies: https://docs.databricks.com/data-governance/unity-catalog/abac/policies -- ABAC tutorial: https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial - -## When to Use This Skill - -Use this skill when: -- Creating or managing **ABAC policies** (column masks, row filters) -- Working with **governed tags** (creating via UI, applying via SQL) -- Building **masking UDFs** for PII protection (SSN, email, credit card, etc.) -- Implementing **human-in-the-loop governance** workflows -- Querying tag assignments via `information_schema` -- Managing policy lifecycle (create, update, delete, preview) - -## Reference Files - -| Topic | File | Description | -|-------|------|-------------| -| SQL Generation | [sql-generation.md](sql-generation.md) | SET/UNSET TAG, CREATE FUNCTION, CREATE/DROP POLICY, discovery queries | -| Python SDK | [python-sdk-patterns.md](python-sdk-patterns.md) | `w.policies.*` SDK methods for ABAC policy CRUD | -| MCP Tools | [mcp-tools-reference.md](mcp-tools-reference.md) | 12 MCP tools for policy management | - ---- - -## ABAC Workflow Overview - -ABAC policies in Databricks follow a 4-step setup: - -1. **Governed Tags** - Define classification taxonomy (UI only) -2. **Tag Assignments** - Apply tags to columns/tables via SQL -3. **Masking UDFs** - Create deterministic functions for data masking -4. **ABAC Policies** - Bind tags to UDFs with principal scoping - -``` -┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ -│ Governed Tags│───>│ Tag │───>│ Masking │───>│ ABAC │ -│ (UI only) │ │ Assignments │ │ UDFs │ │ Policies │ -└──────────────┘ └──────────────┘ └──────────────┘ └──────────────┘ -``` - ---- - -## IMPORTANT: SQL That Does NOT Exist - -These SQL commands do **not** exist in Databricks. Do not generate them. - -| Invalid SQL | What to use instead | -|---|---| -| `SHOW POLICIES` | REST API: `w.policies.list_policies()` | -| `DESCRIBE POLICY` | REST API: `w.policies.get_policy()` | -| `ALTER POLICY` | Drop and recreate the policy | -| `ALTER USER SET ATTRIBUTES` | SCIM API for user attributes | -| `SHOW USER ATTRIBUTES` | SCIM API for user attributes | - ---- - -## Step 1: Governed Tags - -Governed tags **cannot** be created via SQL. They must be created in the Databricks UI. - -### Creating a Governed Tag (UI Steps) - -1. Navigate to **Catalog** in the workspace -2. Select **Governed Tags** from the left panel -3. Click **Create governed tag** -4. Configure: - - **Tag Key**: e.g., `pii_type` - - **Allowed Values**: e.g., `ssn`, `email`, `phone`, `credit_card`, `address` - - **Description**: e.g., "PII classification for ABAC policies" - -> **Note:** Tag data is stored as plain text and may be replicated globally. Avoid sensitive information in tag names or values. - -**Docs:** https://docs.databricks.com/admin/governed-tags/ - ---- - -## Step 2: Applying Tags to Columns - -### Legacy Syntax (all versions) - -```sql --- Set tag on column -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); - --- Set tag on table -ALTER TABLE catalog.schema.table -SET TAGS ('data_classification' = 'confidential'); - --- Remove tag -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name UNSET TAGS ('pii_type'); -``` - -### Modern Syntax (DBR 16.1+) - -```sql -SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; -SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; -SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; -SET TAG ON CATALOG catalog 'department' = 'finance'; - -UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; -``` - -### Querying Existing Tags - -```sql --- Column tags -SELECT tag_name, tag_value, column_name -FROM system.information_schema.column_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; - --- Table tags -SELECT tag_name, tag_value -FROM system.information_schema.table_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; -``` - ---- - -## Step 3: Masking UDFs - -Masking UDFs must be `DETERMINISTIC` and use simple `CASE` statements. No external calls or nested UDFs. - -```sql --- Full mask: replaces all characters with * -CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Full masking - replaces all characters with *' -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE REPEAT('*', LENGTH(value)) -END; - --- Partial mask: show last 4 characters -CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Partial masking - shows last 4 characters' -RETURN CASE - WHEN value IS NULL THEN NULL - WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) - ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) -END; - --- SSN mask: ***-**-XXXX format -CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks SSN showing only last 4 digits' -RETURN CASE - WHEN ssn IS NULL THEN NULL - WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 - THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) - ELSE '***-**-****' -END; - --- Email mask: j***@example.com -CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks email showing first char and domain' -RETURN CASE - WHEN email IS NULL THEN NULL - WHEN INSTR(email, '@') > 1 - THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) - ELSE '***@***.***' -END; -``` - -**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices - ---- - -## Step 4: ABAC Policies - -### Column Mask Policy - -```sql -CREATE OR REPLACE POLICY mask_pii_columns -ON SCHEMA catalog.schema -COMMENT 'Mask PII columns for analysts' -COLUMN MASK catalog.schema.mask_partial -TO `analysts`, `data_scientists` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; -``` - -### Row Filter Policy - -```sql -CREATE OR REPLACE POLICY filter_eu_rows -ON CATALOG my_catalog -COMMENT 'Filter EU rows for US team' -ROW FILTER catalog.schema.is_not_eu_region -TO `us_team` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col -USING COLUMNS (filter_col); -``` - -### Drop Policy - -```sql -DROP POLICY mask_pii_columns ON SCHEMA catalog.schema; -``` - -### CRITICAL: Always Exclude `gov_admin` - -Every ABAC policy **MUST** include `EXCEPT \`gov_admin\`` to protect administrator access. Without this, admins could be locked out of data. - -### Policy Quotas - -| Scope | Max Policies | -|-------|-------------| -| Per Catalog | 10 | -| Per Schema | 10 | -| Per Table | 5 | - ---- - -## Human-in-the-Loop Governance Workflow - -ABAC policy changes should follow a governed workflow: - -``` -ANALYZE → RECOMMEND → PREVIEW → APPROVE → EXECUTE → VERIFY - │ │ │ │ │ │ - ▼ ▼ ▼ ▼ ▼ ▼ - Discover Generate Show SQL Human Run SQL Confirm - current policy & impact confirms or SDK changes - state proposals preview changes call applied -``` - -1. **ANALYZE**: Discover current tags, policies, and UDFs -2. **RECOMMEND**: Generate policy proposals based on requirements -3. **PREVIEW**: Use `preview_policy_changes` to show exact SQL and impact -4. **APPROVE**: Human reviews and explicitly approves -5. **EXECUTE**: Create/update/delete policies via SDK or SQL -6. **VERIFY**: Confirm policies are applied correctly - -**Never auto-execute policy changes.** Always preview and wait for human approval. - ---- - -## Common Errors - -| Error | Cause | Solution | -|-------|-------|----------| -| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | -| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag configuration in UI | -| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | -| `POLICY_ALREADY_EXISTS` | Policy name conflict | Use `CREATE OR REPLACE POLICY` | -| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission to policy creator | -| `SHOW POLICIES is not supported` | Used invalid SQL | Use REST API `w.policies.list_policies()` instead | - -## Best Practices - -1. **Use governed tags** (not ad-hoc tags) for ABAC policy matching -2. **Always include `EXCEPT \`gov_admin\``** in every policy -3. **Use deterministic UDFs** with simple CASE statements -4. **Preview before executing** any policy change -5. **Start at schema scope** and narrow to table only when needed -6. **Name policies descriptively**: `mask_{what}_{scope}` or `filter_{what}_{scope}` -7. **Test UDFs independently** before binding to policies -8. **Monitor policy quotas** — consolidate when approaching limits - -## Resources - -- [ABAC Overview](https://docs.databricks.com/data-governance/unity-catalog/abac/) -- [ABAC Policies](https://docs.databricks.com/data-governance/unity-catalog/abac/policies) -- [ABAC Tutorial](https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial) -- [UDF Best Practices](https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices) -- [Governed Tags](https://docs.databricks.com/admin/governed-tags/) -- [Column Masks & Row Filters](https://docs.databricks.com/data-governance/unity-catalog/filters-and-masks/) diff --git a/databricks-skills/uc-fgac-governance/mcp-tools-reference.md b/databricks-skills/uc-fgac-governance/mcp-tools-reference.md deleted file mode 100644 index 51fb77b2..00000000 --- a/databricks-skills/uc-fgac-governance/mcp-tools-reference.md +++ /dev/null @@ -1,397 +0,0 @@ -# MCP Tools Reference for ABAC Policy Management - -Reference for the 12 MCP tools that manage ABAC policies via the Databricks Python SDK. These tools are registered in the UCABAC MCP server. - ---- - -## Discovery Tools - -### `list_abac_policies` - -List ABAC policies on a catalog, schema, or table. - -```python -list_abac_policies( - securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" - securable_fullname: str, # e.g., "my_catalog.my_schema" - include_inherited: bool = True, - policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (optional filter) -) -``` - -**Returns:** -```json -{ - "success": true, - "securable_type": "SCHEMA", - "securable_fullname": "my_catalog.my_schema", - "policy_count": 3, - "policies": [ - { - "name": "mask_pii_ssn", - "policy_type": "COLUMN_MASK", - "to_principals": ["analysts"], - "except_principals": ["gov_admin"], - "on_securable_fullname": "my_catalog.my_schema", - "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn"}, - "match_columns": [{"tag_name": "pii_type", "tag_value": "ssn"}] - } - ] -} -``` - -### `get_abac_policy` - -Get details for a specific policy by name. - -```python -get_abac_policy( - policy_name: str, # Policy name - securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" - securable_fullname: str, # Fully qualified securable name -) -``` - -**Returns:** -```json -{ - "success": true, - "policy": { - "name": "mask_pii_ssn", - "policy_type": "COLUMN_MASK", - "comment": "Mask SSN columns for analysts", - "to_principals": ["analysts", "data_scientists"], - "except_principals": ["gov_admin"], - "on_securable_type": "SCHEMA", - "on_securable_fullname": "my_catalog.my_schema", - "for_securable_type": "TABLE", - "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn"}, - "match_columns": [{"tag_name": "pii_type", "tag_value": "ssn"}], - "created_at": "2025-01-15T10:30:00Z", - "created_by": "admin@company.com", - "updated_at": "2025-01-20T14:00:00Z", - "updated_by": "admin@company.com" - } -} -``` - -### `get_table_policies` - -Get column masks and row filters for a specific table via Unity Catalog API. - -```python -get_table_policies( - catalog: str, - schema: str, - table: str, -) -``` - -**Returns:** -```json -{ - "success": true, - "table": "my_catalog.my_schema.my_table", - "column_masks": [ - { - "column_name": "ssn", - "mask_function": "my_catalog.my_schema.mask_ssn", - "using_column_names": [] - } - ], - "row_filters": [ - { - "filter_function": "my_catalog.my_schema.is_not_eu_region", - "using_column_names": ["region"] - } - ] -} -``` - -### `get_masking_functions` - -List masking UDFs in a schema. - -```python -get_masking_functions( - catalog: str, - schema: str, -) -``` - -**Returns:** -```json -{ - "success": true, - "catalog": "my_catalog", - "schema": "my_schema", - "functions": [ - { - "name": "mask_ssn", - "full_name": "my_catalog.my_schema.mask_ssn", - "return_type": "STRING", - "comment": "Masks SSN showing only last 4 digits", - "is_deterministic": true - } - ] -} -``` - -### `get_schema_info` - -Get schema metadata via Unity Catalog API. - -```python -get_schema_info( - catalog: str, - schema: str, -) -``` - -### `get_catalog_info` - -Get catalog metadata via Unity Catalog API. - -```python -get_catalog_info( - catalog: str, -) -``` - -### `get_column_tags_api` - -Get column-level tags via the Tags API. - -```python -get_column_tags_api( - catalog: str, - schema: str, - table: str, -) -``` - -### `list_table_policies_in_schema` - -List all tables in a schema with their column masks and row filters. - -```python -list_table_policies_in_schema( - catalog: str, - schema: str, -) -``` - ---- - -## Preview Tool (Human-in-the-Loop Gate) - -### `preview_policy_changes` - -Preview policy changes without executing. This is the critical human-in-the-loop gate. - -```python -preview_policy_changes( - action: str, # "CREATE", "UPDATE", or "DELETE" - policy_name: str, - securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" - securable_fullname: str, - policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (for CREATE) - to_principals: list = None, - except_principals: list = None, - function_name: str = None, - tag_name: str = None, - tag_value: str = None, - comment: str = None, -) -``` - -**Returns:** -```json -{ - "success": true, - "action": "CREATE", - "preview": { - "policy_name": "mask_pii_ssn", - "policy_type": "COLUMN_MASK", - "securable": "SCHEMA my_catalog.my_schema", - "to_principals": ["analysts"], - "except_principals": ["gov_admin"], - "function": "my_catalog.my_schema.mask_ssn", - "tag_match": "hasTagValue('pii_type', 'ssn')", - "equivalent_sql": "CREATE OR REPLACE POLICY mask_pii_ssn\nON SCHEMA my_catalog.my_schema\n..." - }, - "warnings": [], - "requires_approval": true, - "message": "Review the preview above. Reply 'approve' to execute." -} -``` - -**Usage in workflow:** - -1. Call `preview_policy_changes` with proposed changes -2. Present preview to user -3. Wait for explicit approval -4. Only then call `create_abac_policy`, `update_abac_policy`, or `delete_abac_policy` - ---- - -## Management Tools - -### `create_abac_policy` - -Create a new ABAC policy (COLUMN_MASK or ROW_FILTER). - -```python -create_abac_policy( - policy_name: str, - policy_type: str, # "COLUMN_MASK" or "ROW_FILTER" - securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" - securable_fullname: str, - function_name: str, # Fully qualified UDF name - to_principals: list, # Users/groups the policy applies to - tag_name: str, # Tag key to match - tag_value: str = None, # Tag value (optional, uses hasTag vs hasTagValue) - except_principals: list = None, # Excluded principals (gov_admin auto-added) - comment: str = "", -) -``` - -**Returns:** -```json -{ - "success": true, - "policy_name": "mask_pii_ssn", - "action": "created", - "details": { - "policy_type": "COLUMN_MASK", - "on_securable": "SCHEMA my_catalog.my_schema", - "function": "my_catalog.my_schema.mask_ssn", - "to_principals": ["analysts"], - "except_principals": ["gov_admin"] - } -} -``` - -> **Note:** `gov_admin` is automatically added to `except_principals` if not already present. - -### `update_abac_policy` - -Update an existing policy's principals or comment. - -```python -update_abac_policy( - policy_name: str, - securable_type: str, - securable_fullname: str, - to_principals: list = None, - except_principals: list = None, - comment: str = None, -) -``` - -**Returns:** -```json -{ - "success": true, - "policy_name": "mask_pii_ssn", - "action": "updated", - "changes": { - "to_principals": ["analysts", "data_scientists", "new_team"], - "comment": "Updated: added new_team" - } -} -``` - -> **Note:** To change the UDF, tag matching, or scope, drop and recreate the policy. - -### `delete_abac_policy` - -Delete an ABAC policy. - -```python -delete_abac_policy( - policy_name: str, - securable_type: str, - securable_fullname: str, -) -``` - -**Returns:** -```json -{ - "success": true, - "policy_name": "mask_pii_ssn", - "action": "deleted" -} -``` - ---- - -## Human-in-the-Loop Workflow Example - -Complete workflow using MCP tools: - -``` -Step 1: ANALYZE -───────────────────────────────── -→ list_abac_policies(securable_type="SCHEMA", securable_fullname="prod.finance") -→ get_column_tags_api(catalog="prod", schema="finance", table="customers") -→ get_masking_functions(catalog="prod", schema="finance") - -Step 2: RECOMMEND -───────────────────────────────── -→ Agent generates policy recommendations based on discovered tags and UDFs - -Step 3: PREVIEW -───────────────────────────────── -→ preview_policy_changes( - action="CREATE", - policy_name="mask_ssn_finance", - securable_type="SCHEMA", - securable_fullname="prod.finance", - policy_type="COLUMN_MASK", - function_name="prod.finance.mask_ssn", - to_principals=["analysts"], - tag_name="pii_type", - tag_value="ssn" - ) - -Step 4: APPROVE -───────────────────────────────── -→ Human reviews preview and replies "approve" - -Step 5: EXECUTE -───────────────────────────────── -→ create_abac_policy( - policy_name="mask_ssn_finance", - policy_type="COLUMN_MASK", - securable_type="SCHEMA", - securable_fullname="prod.finance", - function_name="prod.finance.mask_ssn", - to_principals=["analysts"], - tag_name="pii_type", - tag_value="ssn" - ) - -Step 6: VERIFY -───────────────────────────────── -→ get_abac_policy( - policy_name="mask_ssn_finance", - securable_type="SCHEMA", - securable_fullname="prod.finance" - ) -``` - ---- - -## Error Handling - -| Error | Cause | Solution | -|-------|-------|----------| -| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | -| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag config in UI | -| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | -| `POLICY_ALREADY_EXISTS` | Duplicate policy name | Use different name or delete existing first | -| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission | -| `INVALID_SECURABLE_TYPE` | Wrong securable type string | Use `"CATALOG"`, `"SCHEMA"`, or `"TABLE"` | -| `gov_admin not in except_principals` | Safety check failed | Always include `gov_admin` in except list | diff --git a/databricks-skills/uc-fgac-governance/python-sdk-patterns.md b/databricks-skills/uc-fgac-governance/python-sdk-patterns.md deleted file mode 100644 index da8bd938..00000000 --- a/databricks-skills/uc-fgac-governance/python-sdk-patterns.md +++ /dev/null @@ -1,351 +0,0 @@ -# Python SDK Patterns for ABAC Policies - -Databricks Python SDK patterns for managing ABAC policies via `WorkspaceClient.policies`. - -**SDK Docs:** https://databricks-sdk-py.readthedocs.io/en/latest/ -**ABAC Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/policies - ---- - -## Setup - -```python -from databricks.sdk import WorkspaceClient - -w = WorkspaceClient() # Auto-detects credentials -``` - ---- - -## List Policies - -List ABAC policies on a securable (catalog, schema, or table). - -```python -# List all policies on a catalog -policies = w.policies.list_policies( - on_securable_type="CATALOG", - on_securable_fullname="my_catalog", - include_inherited=True, -) - -for p in policies: - print(f"{p.name}: {p.policy_type} on {p.on_securable_fullname}") - -# List policies on a schema -policies = w.policies.list_policies( - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - include_inherited=True, -) - -# List policies on a specific table -policies = w.policies.list_policies( - on_securable_type="TABLE", - on_securable_fullname="my_catalog.my_schema.my_table", - include_inherited=True, -) -``` - -### Filtering by Policy Type - -```python -policies = w.policies.list_policies( - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - include_inherited=True, -) - -column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] -row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] -``` - -### Extracting Policy Details - -```python -for p in policies: - p_dict = p.as_dict() if hasattr(p, "as_dict") else {} - print({ - "name": p_dict.get("name"), - "policy_type": p_dict.get("policy_type"), - "to_principals": p_dict.get("to_principals", []), - "except_principals": p_dict.get("except_principals", []), - "on_securable_type": p_dict.get("on_securable_type"), - "on_securable_fullname": p_dict.get("on_securable_fullname"), - "column_mask": p_dict.get("column_mask"), - "row_filter": p_dict.get("row_filter"), - "match_columns": p_dict.get("match_columns", []), - }) -``` - ---- - -## Get Policy - -Retrieve a specific policy by name and securable. - -```python -policy = w.policies.get_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", -) - -print(f"Policy: {policy.name}") -print(f"Type: {policy.policy_type}") -print(f"Principals: {policy.to_principals}") -print(f"Except: {policy.except_principals}") -``` - ---- - -## Create Policy - -### Column Mask Policy - -```python -from databricks.sdk.service.catalog import ( - CreatePolicy, - ColumnMask, - MatchColumns, -) - -policy = w.policies.create_policy( - name="mask_pii_ssn", - policy_type="COLUMN_MASK", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - for_securable_type="TABLE", - to_principals=["analysts", "data_scientists"], - except_principals=["gov_admin"], - comment="Mask SSN columns for analyst groups", - column_mask=ColumnMask( - function_name="my_catalog.my_schema.mask_ssn", - ), - match_columns=[ - MatchColumns( - tag_name="pii_type", - tag_value="ssn", - ) - ], -) -print(f"Created policy: {policy.name}") -``` - -### Row Filter Policy - -```python -from databricks.sdk.service.catalog import ( - CreatePolicy, - RowFilter, - MatchColumns, -) - -policy = w.policies.create_policy( - name="filter_eu_data", - policy_type="ROW_FILTER", - on_securable_type="CATALOG", - on_securable_fullname="my_catalog", - for_securable_type="TABLE", - to_principals=["us_team"], - except_principals=["gov_admin"], - comment="Filter EU rows for US team", - row_filter=RowFilter( - function_name="my_catalog.my_schema.is_not_eu_region", - ), - match_columns=[ - MatchColumns( - tag_name="region", - tag_value="eu", - ) - ], -) -print(f"Created policy: {policy.name}") -``` - -### Important: Always Include `gov_admin` - -Every policy **MUST** include `"gov_admin"` in `except_principals`: - -```python -# CORRECT -except_principals=["gov_admin"] - -# CORRECT - additional admin groups -except_principals=["gov_admin", "platform_admins"] - -# WRONG - missing gov_admin -except_principals=["platform_admins"] # gov_admin must be included! -``` - ---- - -## Update Policy - -Update principals or comment on an existing policy. - -```python -updated = w.policies.update_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - to_principals=["analysts", "data_scientists", "new_team"], - except_principals=["gov_admin", "senior_admins"], - comment="Updated: added new_team to masked principals", -) -print(f"Updated policy: {updated.name}") -``` - -> **Note:** To change the UDF, tag matching, or scope, you must drop and recreate the policy. `update_policy` only modifies principals and comment. - ---- - -## Delete Policy - -```python -w.policies.delete_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", -) -print("Policy deleted") -``` - ---- - -## Error Handling - -```python -from databricks.sdk.errors import NotFound, PermissionDenied, BadRequest - -try: - policy = w.policies.get_policy( - name="nonexistent_policy", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - ) -except NotFound: - print("Policy not found") -except PermissionDenied: - print("Insufficient permissions - need MANAGE on securable") -except BadRequest as e: - print(f"Invalid request: {e}") -``` - ---- - -## Common Patterns - -### List All Policies in a Catalog with Counts - -```python -def get_policy_summary(w, catalog: str): - """Get a summary of all ABAC policies in a catalog.""" - policies = list(w.policies.list_policies( - on_securable_type="CATALOG", - on_securable_fullname=catalog, - include_inherited=True, - )) - - column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] - row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] - - return { - "total": len(policies), - "column_masks": len(column_masks), - "row_filters": len(row_filters), - "policies": [p.as_dict() for p in policies], - } -``` - -### Check Policy Quotas Before Creating - -```python -def check_quota(w, securable_type: str, securable_fullname: str): - """Check if policy quota allows creating a new policy.""" - quotas = {"CATALOG": 10, "SCHEMA": 10, "TABLE": 5} - max_policies = quotas.get(securable_type, 10) - - existing = list(w.policies.list_policies( - on_securable_type=securable_type, - on_securable_fullname=securable_fullname, - )) - - # Count only direct policies (not inherited) - direct = [p for p in existing - if p.on_securable_fullname == securable_fullname] - - return { - "current": len(direct), - "max": max_policies, - "can_create": len(direct) < max_policies, - } -``` - -### Fetch Policies Without Cache (Direct API) - -```python -def fetch_policies_direct( - w, - catalog: str, - schema: str = None, - table: str = None, -): - """Fetch policies directly from REST API.""" - if table and schema: - securable_type = "TABLE" - securable_name = f"{catalog}.{schema}.{table}" - elif schema: - securable_type = "SCHEMA" - securable_name = f"{catalog}.{schema}" - else: - securable_type = "CATALOG" - securable_name = catalog - - policies = w.policies.list_policies( - on_securable_type=securable_type, - on_securable_fullname=securable_name, - include_inherited=True, - ) - - results = [] - for p in policies: - p_dict = p.as_dict() if hasattr(p, "as_dict") else {} - results.append({ - "name": p_dict.get("name"), - "policy_type": p_dict.get("policy_type"), - "to_principals": p_dict.get("to_principals", []), - "except_principals": p_dict.get("except_principals", []), - "on_securable_type": p_dict.get("on_securable_type"), - "on_securable_fullname": p_dict.get("on_securable_fullname"), - "column_mask": p_dict.get("column_mask"), - "row_filter": p_dict.get("row_filter"), - "match_columns": p_dict.get("match_columns", []), - }) - return results -``` - ---- - -## Async Usage (FastAPI, etc.) - -The Databricks SDK is synchronous. In async applications, wrap calls with `asyncio.to_thread()`: - -```python -import asyncio - -async def list_policies_async(w, catalog: str): - return await asyncio.to_thread( - lambda: list(w.policies.list_policies( - on_securable_type="CATALOG", - on_securable_fullname=catalog, - include_inherited=True, - )) - ) - -async def create_policy_async(w, **kwargs): - return await asyncio.to_thread( - w.policies.create_policy, - **kwargs, - ) -``` diff --git a/databricks-skills/uc-fgac-governance/sql-generation.md b/databricks-skills/uc-fgac-governance/sql-generation.md deleted file mode 100644 index c0cb46f1..00000000 --- a/databricks-skills/uc-fgac-governance/sql-generation.md +++ /dev/null @@ -1,356 +0,0 @@ -# SQL Generation Reference - -Pure SQL patterns for Unity Catalog ABAC governance operations. All SQL follows Databricks syntax. - ---- - -## Tag Operations - -### SET TAG on Column - -```sql --- Legacy syntax (all versions) -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); - --- Modern syntax (DBR 16.1+) -SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; -``` - -### SET TAG on Table - -```sql --- Legacy syntax -ALTER TABLE catalog.schema.table -SET TAGS ('data_classification' = 'confidential'); - --- Modern syntax -SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; -``` - -### SET TAG on Schema / Catalog - -```sql -SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; -SET TAG ON CATALOG my_catalog 'department' = 'finance'; -``` - -### UNSET TAG - -```sql --- Column (legacy) -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name UNSET TAGS ('pii_type'); - --- Column (modern) -UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; - --- Table (legacy) -ALTER TABLE catalog.schema.table -UNSET TAGS ('data_classification'); - --- Table (modern) -UNSET TAG ON TABLE catalog.schema.table 'data_classification'; -``` - -**Docs:** -- SET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-set-tag.html -- UNSET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-unset-tag.html - ---- - -## Tag Discovery Queries - -### Query Column Tags - -```sql -SELECT tag_name, tag_value, column_name -FROM system.information_schema.column_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; -``` - -### Query Table Tags - -```sql -SELECT tag_name, tag_value -FROM system.information_schema.table_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; -``` - -### All Tag Assignments in a Catalog - -```sql --- Table-level tags -SELECT 'TABLE' as securable_type, - CONCAT(catalog_name, '.', schema_name, '.', table_name) as securable_name, - tag_name as tag_key, - tag_value -FROM system.information_schema.table_tags -WHERE catalog_name = 'my_catalog'; - --- Column-level tags -SELECT 'COLUMN' as securable_type, - CONCAT(catalog_name, '.', schema_name, '.', table_name, '.', column_name) as securable_name, - tag_name as tag_key, - tag_value -FROM system.information_schema.column_tags -WHERE catalog_name = 'my_catalog'; -``` - -**Docs:** -- information_schema.column_tags: https://docs.databricks.com/sql/language-manual/information-schema/column_tags.html -- information_schema.table_tags: https://docs.databricks.com/sql/language-manual/information-schema/table_tags.html - ---- - -## Masking UDF Creation - -All masking UDFs must be `DETERMINISTIC` with simple `CASE` statements. No external calls or nested UDFs. - -### Generic Masking Strategies - -```sql --- Full mask: replaces all characters with * -CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Full masking - replaces all characters with *' -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE REPEAT('*', LENGTH(value)) -END; - --- Partial mask: show last 4 characters -CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Partial masking - shows last 4 characters' -RETURN CASE - WHEN value IS NULL THEN NULL - WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) - ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) -END; - --- Hash: SHA256 with version prefix -CREATE OR REPLACE FUNCTION catalog.schema.mask_hash(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Hash masking - SHA256 with version prefix' -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE CONCAT('HASH_v1_', SUBSTRING(SHA2(CONCAT(value, ':v1'), 256), 1, 16)) -END; - --- Redact: replace with [REDACTED] -CREATE OR REPLACE FUNCTION catalog.schema.mask_redact(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Redaction - replaces value with [REDACTED]' -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE '[REDACTED]' -END; - --- Nullify: always returns NULL -CREATE OR REPLACE FUNCTION catalog.schema.mask_nullify(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Nullify - always returns NULL' -RETURN NULL; -``` - -### Specialized Masking UDFs - -```sql --- SSN: ***-**-XXXX -CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks SSN showing only last 4 digits in XXX-XX-XXXX format' -RETURN CASE - WHEN ssn IS NULL THEN NULL - WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 - THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) - ELSE '***-**-****' -END; - --- Email: j***@example.com -CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks email showing first char and domain' -RETURN CASE - WHEN email IS NULL THEN NULL - WHEN INSTR(email, '@') > 1 - THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) - ELSE '***@***.***' -END; - --- Credit card: ****-****-****-1234 -CREATE OR REPLACE FUNCTION catalog.schema.mask_credit_card(card_number STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks credit card showing only last 4 digits' -RETURN CASE - WHEN card_number IS NULL THEN NULL - WHEN LENGTH(REGEXP_REPLACE(card_number, '[^0-9]', '')) >= 4 - THEN CONCAT('****-****-****-', RIGHT(REGEXP_REPLACE(card_number, '[^0-9]', ''), 4)) - ELSE '****-****-****-****' -END; -``` - -### Row Filter UDFs - -Row filter UDFs return `BOOLEAN`: `TRUE` to include, `FALSE` to exclude. - -```sql --- Region-based filter: hide EU rows -CREATE OR REPLACE FUNCTION catalog.schema.is_not_eu_region(region_value STRING) -RETURNS BOOLEAN -DETERMINISTIC -COMMENT 'Row filter - returns FALSE for EU regions' -RETURN CASE - WHEN region_value IS NULL THEN TRUE - WHEN LOWER(region_value) LIKE '%eu%' THEN FALSE - WHEN LOWER(region_value) LIKE '%europe%' THEN FALSE - ELSE TRUE -END; - --- Array membership filter -CREATE OR REPLACE FUNCTION catalog.schema.is_in_allowed_values( - row_value STRING, - allowed_values ARRAY -) -RETURNS BOOLEAN -DETERMINISTIC -COMMENT 'Row filter based on array membership' -RETURN CASE - WHEN allowed_values IS NULL THEN FALSE - WHEN ARRAY_CONTAINS(TRANSFORM(allowed_values, x -> LOWER(x)), LOWER(row_value)) THEN TRUE - ELSE FALSE -END; -``` - -**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices - ---- - -## Policy Creation - -### Column Mask Policy - -```sql -CREATE OR REPLACE POLICY mask_pii_ssn -ON SCHEMA catalog.schema -COMMENT 'Mask SSN columns for analysts' -COLUMN MASK catalog.schema.mask_ssn -TO `analysts`, `data_scientists` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; -``` - -### Row Filter Policy - -```sql -CREATE OR REPLACE POLICY filter_eu_data -ON CATALOG my_catalog -COMMENT 'Filter EU rows for US team' -ROW FILTER catalog.schema.is_not_eu_region -TO `us_team` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col -USING COLUMNS (filter_col); -``` - -### Policy with Tag Key Only (any value) - -```sql --- Match any column with tag 'pii_type' regardless of value -CREATE OR REPLACE POLICY mask_all_pii -ON SCHEMA catalog.schema -COLUMN MASK catalog.schema.mask_full -TO `external_users` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTag('pii_type') AS masked_col -ON COLUMN masked_col; -``` - -### Drop Policy - -```sql -DROP POLICY mask_pii_ssn ON SCHEMA catalog.schema; -DROP POLICY filter_eu_data ON CATALOG my_catalog; -``` - -> **Note:** There is no `ALTER POLICY`. To modify a policy, drop and recreate it. - ---- - -## Discovery Queries - -```sql --- List catalogs -SHOW CATALOGS; - --- List schemas in a catalog -SHOW SCHEMAS IN my_catalog; - --- List tables in a schema -SHOW TABLES IN my_catalog.my_schema; - --- Describe table with extended metadata -DESCRIBE TABLE EXTENDED my_catalog.my_schema.my_table; - --- List UDFs in a schema -SHOW USER FUNCTIONS IN my_catalog.my_schema; - --- Describe a UDF -DESCRIBE FUNCTION EXTENDED my_catalog.my_schema.mask_ssn; - --- Sample column values -SELECT DISTINCT column_name -FROM my_catalog.my_schema.my_table -LIMIT 20; -``` - ---- - -## Enums Reference - -### PII Types (governed tag values) - -`ssn`, `email`, `phone`, `credit_card`, `date_of_birth`, `address`, `name`, `ip_address`, `national_id`, `medical_record`, `generic` - -### Masking Strategies - -| Strategy | Description | -|----------|-------------| -| `full_mask` | Replace all characters with `*` | -| `partial_mask` | Show last 4 characters | -| `hash` | SHA256 with version prefix | -| `redact` | Replace with `[REDACTED]` | -| `nullify` | Always return NULL | -| `custom` | User-supplied SQL (requires manual UDF) | - -### Policy Scopes - -| Scope | Description | -|-------|-------------| -| `CATALOG` | Policy applies to all tables in catalog | -| `SCHEMA` | Policy applies to all tables in schema | -| `TABLE` | Policy applies to a single table | - -### Tag Syntax Variants - -| Variant | Availability | Example | -|---------|-------------|---------| -| `LEGACY` | All versions | `ALTER TABLE t ALTER COLUMN c SET TAGS ('k'='v')` | -| `MODERN` | DBR 16.1+ | `SET TAG ON COLUMN t.c 'k' = 'v'` | From ee340e1da73ac7d279e10c4b453b0e732890fae4 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Wed, 11 Feb 2026 13:30:44 -0600 Subject: [PATCH 10/34] Clean up: remove FGAC duplication from SDK skill, delete guardrails doc, gitignore .claude/ - Remove FGAC section from databricks-python-sdk SKILL.md (covered by databricks-unity-catalog skill) - Delete FGAC_GUARDRAILS.md (guardrails documented in 9-fgac-sdk-and-tools.md) - Add .claude/ to .gitignore (local skill installs) --- .gitignore | 6 +- FGAC_GUARDRAILS.md | 271 ------------------ .../databricks-python-sdk/SKILL.md | 126 -------- 3 files changed, 3 insertions(+), 400 deletions(-) delete mode 100644 FGAC_GUARDRAILS.md diff --git a/.gitignore b/.gitignore index e9f0ce95..dfcda712 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,9 @@ .ai-dev-kit/ .claude/ +# Claude Code local skills (installed via install_skills.sh) +.claude/ + # Python __pycache__/ @@ -73,6 +76,3 @@ databricks-tools-core/tests/integration/pdf/generated_pdf/ # Python cache __pycache__/ windows_info.txt - -# Local review notes -COMMIT_REVIEW.md diff --git a/FGAC_GUARDRAILS.md b/FGAC_GUARDRAILS.md deleted file mode 100644 index 99e37549..00000000 --- a/FGAC_GUARDRAILS.md +++ /dev/null @@ -1,271 +0,0 @@ -# FGAC Human-in-the-Loop Guardrails - -Fine-Grained Access Control (FGAC) policy mutations (create, update, delete) are protected by two programmatic guardrails that ensure every change is previewed, approved, and executed by an authorized user. - ---- - -## Architecture Overview - -``` - +------------------+ - | Human / Agent | - +--------+---------+ - | - 1. Request change - | - v - +----------------------------+ - | preview_policy_changes() | - | | - | - Validates parameters | - | - Generates SQL preview | - | - Signs params + timestamp | - | with HMAC-SHA256 | - | - Returns approval_token | - +-------------+--------------+ - | - 2. Preview + approval_token - | - v - +----------------------------+ - | Human Reviews Preview | - | | - | - Equivalent SQL shown | - | - Warnings displayed | - | - Approves or rejects | - +-------------+--------------+ - | - 3. "Approve" + token - | - v - +----------------------------+ - | create/update/delete_*() | - | | - | +-- Admin Group Check --+ | - | | w.current_user.me() | | - | | Is user in group? | | - | +---------+-------------+ | - | | Yes | - | v | - | +-- Token Validation ---+ | - | | Verify HMAC sig | | - | | Check TTL (10 min) | | - | | Match params | | - | +---------+-------------+ | - | | Valid | - | v | - | Execute SDK call | - +----------------------------+ -``` - ---- - -## Guardrail 1: Approval Token - -Every mutating call **requires** a cryptographic token obtained from `preview_policy_changes()`. This prevents any create/update/delete from executing without a prior preview step. - -### Token Lifecycle - -``` - preview_policy_changes(action="CREATE", policy_name="mask_ssn", ...) - | - | 1. Collect parameters - | 2. Add timestamp = now() - | 3. JSON serialize (sorted keys) - | 4. HMAC-SHA256(secret, payload) -> signature - | 5. Return "signature:base64(payload)" - | - v - approval_token = "a3f8c1...:eyJhY3Rpb24iOiJDUkVBVEUi..." - | - | Token is valid for 10 minutes - | Token is bound to exact parameters - | - v - create_fgac_policy(..., approval_token=token) - | - | 1. Split token -> signature + payload - | 2. Recompute HMAC, compare (constant-time) - | 3. Decode payload, check timestamp within TTL - | 4. Verify params match (action, policy_name, etc.) - | 5. Reject on any mismatch - | - v - Execute or Reject -``` - -### What the Token Binds - -The token cryptographically binds these fields: - -| Field | Purpose | -|-------|---------| -| `action` | CREATE, UPDATE, or DELETE | -| `policy_name` | Prevents using token A's preview for policy B | -| `securable_type` | CATALOG, SCHEMA, or TABLE | -| `securable_fullname` | The target securable | -| `policy_type` | COLUMN_MASK or ROW_FILTER (CREATE only) | -| `to_principals` | Who the policy applies to | -| `function_name` | The masking UDF (CREATE only) | -| `tag_name` / `tag_value` | Tag match condition (CREATE only) | -| `timestamp` | Ensures token expires after TTL | - -### Rejection Scenarios - -``` - Token from preview with policy_name="A" - Used in create with policy_name="B" - --> ValueError: "Invalid or expired approval token" - - Token generated 15 minutes ago (TTL = 10 min) - --> ValueError: "Invalid or expired approval token" - - Token string tampered with or fabricated - --> ValueError: "Invalid or expired approval token" - - No token provided at all - --> TypeError (missing required argument) -``` - ---- - -## Guardrail 2: Admin Group Check - -Before validating the token, the system verifies the caller belongs to a configurable admin group. - -``` - Mutating call received - | - v - +-----------------------------+ - | w.current_user.me() | - | Extract group memberships | - +-------------+---------------+ - | - +--------+--------+ - | | - "admins" in "admins" not - user.groups in user.groups - | | - v v - Continue to PermissionError: - token check "User 'x' is not a member - of admin group 'admins'" -``` - ---- - -## Configuration - -| Environment Variable | Default | Description | -|---------------------|---------|-------------| -| `FGAC_APPROVAL_SECRET` | `fgac-default-dev-secret` | HMAC signing secret | -| `FGAC_ADMIN_GROUP` | `admins` | Required group for mutations | - -> **Production**: Always set `FGAC_APPROVAL_SECRET` to a strong random value. The default is only suitable for development. - -Token TTL is set to **600 seconds (10 minutes)** via `_TOKEN_TTL_SECONDS` in the source. - ---- - -## End-to-End Workflow - -### Happy Path - -``` - Agent System Databricks - | | | - | 1. preview(CREATE, ...) | | - |----------------------------->| | - | | Generate token | - | <-- preview + token --------| | - | | | - | 2. Show preview to human | | - | 3. Human says "approve" | | - | | | - | 4. create(..., token) | | - |----------------------------->| | - | | Check admin group | - | | Validate token | - | | create_policy() ------------->| - | | | - | <-- success + policy -------| <-- policy created -----------| - | | | -``` - -### Rejection Path (Mismatched Params) - -``` - Agent System - | | - | 1. preview(CREATE, name=A) | - |----------------------------->| - | <-- token_A ----------------| - | | - | 2. create(name=B, token_A) | - |----------------------------->| - | | Check admin group -> OK - | | Validate token: - | | name=B != name=A in token - | <-- ValueError -------------| - | | -``` - -### Rejection Path (Not an Admin) - -``` - Agent System Databricks - | | | - | 1. preview(CREATE, ...) | | - |----------------------------->| | - | <-- token ------------------| | - | | | - | 2. create(..., token) | | - |----------------------------->| | - | | me() ------------------->| - | | <-- user (no admin grp) -| - | <-- PermissionError --------| | - | | | -``` - ---- - -## Code Locations - -| Component | File | -|-----------|------| -| Core guardrail functions | `databricks-tools-core/.../unity_catalog/fgac_policies.py` | -| MCP tool wrapper | `databricks-mcp-server/.../tools/fgac_policies.py` | -| Integration tests | `databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py` | -| Skill docs | `databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md` | - -### Key Functions - -| Function | Purpose | -|----------|---------| -| `_generate_approval_token(params)` | Signs preview params into a token | -| `_validate_approval_token(token, params)` | Verifies signature, TTL, and param match | -| `_check_admin_group()` | Verifies caller is in the admin group | -| `preview_policy_changes()` | Returns preview + `approval_token` | -| `create_fgac_policy(approval_token=...)` | Guarded policy creation | -| `update_fgac_policy(approval_token=...)` | Guarded policy update | -| `delete_fgac_policy(approval_token=...)` | Guarded policy deletion | - ---- - -## FAQ - -**Q: Can I skip the preview step and call create directly?** -No. `approval_token` is a required positional argument. Calling without it raises `TypeError`. - -**Q: Can I reuse a token for multiple operations?** -No. Each token is bound to exact parameters. A token for policy A cannot create policy B. - -**Q: What happens if my token expires?** -Call `preview_policy_changes()` again to get a fresh token. Tokens expire after 10 minutes. - -**Q: Does the admin check apply to read operations?** -No. Only `create`, `update`, and `delete` require admin membership. Discovery functions (`list`, `get`, `preview`) are unrestricted. - -**Q: How do I change the admin group?** -Set the `FGAC_ADMIN_GROUP` environment variable before starting the application. diff --git a/databricks-skills/databricks-python-sdk/SKILL.md b/databricks-skills/databricks-python-sdk/SKILL.md index 4b01fb17..c5937eec 100644 --- a/databricks-skills/databricks-python-sdk/SKILL.md +++ b/databricks-skills/databricks-python-sdk/SKILL.md @@ -414,132 +414,6 @@ for doc in results.result.data_array: print(doc) ``` -### FGAC Policies -**Doc:** https://docs.databricks.com/data-governance/unity-catalog/abac/policies - -```python -# List policies on a schema -for policy in w.policies.list_policies( - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - include_inherited=True, -): - print(f"{policy.name}: {policy.policy_type}") - -# Get policy details -policy = w.policies.get_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", -) - -# Create column mask policy (ALWAYS include gov_admin in except_principals) -# on_securable_type sets scope: CATALOG (all tables), SCHEMA (schema tables), TABLE (single table) -# for_securable_type is always TABLE -from databricks.sdk.service.catalog import ( - ColumnMaskOptions, MatchColumn, PolicyInfo, PolicyType, RowFilterOptions, SecurableType, -) - -# Catalog-level column mask — applies to all tables in catalog -policy_info = PolicyInfo( - name="mask_pii_catalog", - policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, - on_securable_type=SecurableType.CATALOG, - on_securable_fullname="my_catalog", - for_securable_type=SecurableType.TABLE, - to_principals=["analysts"], - except_principals=["gov_admin"], - column_mask=ColumnMaskOptions( - function_name="my_catalog.my_schema.mask_ssn", - on_column="masked_col", - ), - match_columns=[ - MatchColumn(alias="masked_col", condition="hasTagValue('pii_type', 'ssn')"), - ], -) -created = w.policies.create_policy(policy_info=policy_info) - -# Schema-level column mask — applies to all tables in schema -policy_info = PolicyInfo( - name="mask_pii_schema", - policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, - on_securable_type=SecurableType.SCHEMA, - on_securable_fullname="my_catalog.my_schema", - for_securable_type=SecurableType.TABLE, - to_principals=["analysts"], - except_principals=["gov_admin"], - column_mask=ColumnMaskOptions( - function_name="my_catalog.my_schema.mask_ssn", - on_column="masked_col", - ), - match_columns=[ - MatchColumn(alias="masked_col", condition="hasTagValue('pii_type', 'ssn')"), - ], -) -created = w.policies.create_policy(policy_info=policy_info) - -# Table-level column mask — applies to a single table -policy_info = PolicyInfo( - name="mask_pii_table", - policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, - on_securable_type=SecurableType.TABLE, - on_securable_fullname="my_catalog.my_schema.my_table", - for_securable_type=SecurableType.TABLE, - to_principals=["analysts"], - except_principals=["gov_admin"], - column_mask=ColumnMaskOptions( - function_name="my_catalog.my_schema.mask_ssn", - on_column="masked_col", - ), - match_columns=[ - MatchColumn(alias="masked_col", condition="hasTagValue('pii_type', 'ssn')"), - ], -) -created = w.policies.create_policy(policy_info=policy_info) - -# Row filter — same three levels apply (CATALOG, SCHEMA, TABLE) -policy_info = PolicyInfo( - name="filter_eu_data", - policy_type=PolicyType.POLICY_TYPE_ROW_FILTER, - on_securable_type=SecurableType.SCHEMA, - on_securable_fullname="my_catalog.my_schema", - for_securable_type=SecurableType.TABLE, - to_principals=["us_team"], - except_principals=["gov_admin"], - row_filter=RowFilterOptions( - function_name="my_catalog.my_schema.is_not_eu_region", - ), - match_columns=[ - MatchColumn(alias="filter_col", condition="hasTagValue('region', 'eu')"), - ], -) -created = w.policies.create_policy(policy_info=policy_info) - -# Update policy principals (only principals and comment can be updated) -update_info = PolicyInfo( - to_principals=["analysts", "new_team"], - except_principals=["gov_admin"], - for_securable_type=SecurableType.TABLE, - policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, -) -w.policies.update_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - policy_info=update_info, - update_mask="to_principals,except_principals", -) - -# Delete policy -w.policies.delete_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", -) -``` - -**Note:** There is no `SHOW POLICIES` SQL. Use `w.policies.list_policies()` instead. There is no `ALTER POLICY` — drop and recreate. - ### Pipelines (Delta Live Tables) **Doc:** https://databricks-sdk-py.readthedocs.io/en/latest/workspace/pipelines/pipelines.html From 5b6f9d22e128daf185242ffc394b9aac53338f4d Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Wed, 11 Feb 2026 18:02:01 -0600 Subject: [PATCH 11/34] Add comprehensive FGAC readme with human-in-the-loop documentation Consolidates all FGAC feature details into a single root-level readme: governed tags, tag assignments, masking UDFs, policy management, Python SDK reference, MCP tools, and the full human-in-the-loop governance workflow with approval token internals and threat model. --- FGAC_README.md | 876 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 876 insertions(+) create mode 100644 FGAC_README.md diff --git a/FGAC_README.md b/FGAC_README.md new file mode 100644 index 00000000..259891c7 --- /dev/null +++ b/FGAC_README.md @@ -0,0 +1,876 @@ +# FGAC — Fine-Grained Access Control for Databricks Unity Catalog + +Fine-Grained Access Control (FGAC) policies bind governed tags to masking UDFs or row filters, scoped to catalogs, schemas, or tables, and targeted at specific principals. This document covers the complete FGAC feature set: governed tags, tag assignments, masking UDFs, policy management, the Python SDK, MCP tools, and the human-in-the-loop governance workflow. + +**Databricks Docs:** +- [FGAC Overview](https://docs.databricks.com/data-governance/unity-catalog/abac/) +- [FGAC Policies](https://docs.databricks.com/data-governance/unity-catalog/abac/policies) +- [FGAC Tutorial](https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial) +- [UDF Best Practices](https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices) +- [Governed Tags](https://docs.databricks.com/admin/governed-tags/) + +--- + +## Table of Contents + +- [Architecture Overview](#architecture-overview) +- [Step 1: Governed Tags](#step-1-governed-tags) +- [Step 2: Tag Assignments](#step-2-tag-assignments) +- [Step 3: Masking UDFs](#step-3-masking-udfs) +- [Step 4: FGAC Policies](#step-4-fgac-policies) +- [Policy Quotas](#policy-quotas) +- [SQL That Does NOT Exist](#sql-that-does-not-exist) +- [Discovery Queries](#discovery-queries) +- [Python SDK Reference](#python-sdk-reference) +- [MCP Tools Reference](#mcp-tools-reference) +- [Human-in-the-Loop Governance Workflow](#human-in-the-loop-governance-workflow) +- [Approval Token Internals](#approval-token-internals) +- [Environment Variables](#environment-variables) +- [Threat Model](#threat-model) +- [Common Errors](#common-errors) +- [Best Practices](#best-practices) +- [Source Files](#source-files) + +--- + +## Architecture Overview + +FGAC policies follow a 4-step setup: + +``` +┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ +│ Governed Tags│───>│ Tag │───>│ Masking │───>│ FGAC │ +│ (UI only) │ │ Assignments │ │ UDFs │ │ Policies │ +└──────────────┘ └──────────────┘ └──────────────┘ └──────────────┘ + Step 1 Step 2 Step 3 Step 4 +``` + +1. **Governed Tags** — Define a classification taxonomy (e.g., `pii_type` with values `ssn`, `email`, `phone`) +2. **Tag Assignments** — Apply tags to columns or tables via SQL +3. **Masking UDFs** — Create deterministic functions that transform sensitive values +4. **FGAC Policies** — Bind tags to UDFs with principal scoping (who sees masked data, who is exempt) + +--- + +## Step 1: Governed Tags + +Governed tags **cannot** be created via SQL or API. They must be created in the Databricks UI. + +### Creating a Governed Tag (UI Steps) + +1. Navigate to **Catalog** in the workspace +2. Select **Governed Tags** from the left panel +3. Click **Create governed tag** +4. Configure: + - **Tag Key**: e.g., `pii_type` + - **Allowed Values**: e.g., `ssn`, `email`, `phone`, `credit_card`, `address` + - **Description**: e.g., "PII classification for FGAC policies" + +> **Note:** Tag data is stored as plain text and may be replicated globally. Avoid putting sensitive information in tag names or values. + +> **Propagation delay:** Newly created governed tags need ~30 seconds to propagate before they can be used in tag assignments. + +--- + +## Step 2: Tag Assignments + +### Modern Syntax (DBR 16.1+) + +```sql +-- Set tag on column +SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; + +-- Set tag on table +SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; + +-- Set tag on schema +SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; + +-- Set tag on catalog +SET TAG ON CATALOG my_catalog 'department' = 'finance'; + +-- Remove tag +UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; +UNSET TAG ON TABLE catalog.schema.table 'data_classification'; +``` + +### Legacy Syntax (all versions) + +```sql +-- Set tag on column +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); + +-- Set tag on table +ALTER TABLE catalog.schema.table +SET TAGS ('data_classification' = 'confidential'); + +-- Remove tag +ALTER TABLE catalog.schema.table +ALTER COLUMN column_name UNSET TAGS ('pii_type'); +``` + +### Querying Existing Tags + +```sql +-- Column tags +SELECT tag_name, tag_value, column_name +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; + +-- Table tags +SELECT tag_name, tag_value +FROM system.information_schema.table_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; + +-- All tag assignments in a catalog +SELECT 'COLUMN' AS securable_type, + CONCAT(catalog_name, '.', schema_name, '.', table_name, '.', column_name) AS securable_name, + tag_name, tag_value +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog'; +``` + +--- + +## Step 3: Masking UDFs + +Masking UDFs must be `DETERMINISTIC` and use simple `CASE` statements. No external calls or nested UDFs. + +> **Cross-catalog UDFs:** Masking UDFs do not need to be in the same catalog/schema as the policy scope. A common pattern is a shared governance schema (e.g., `governance.masking_udfs`) containing all masking functions, referenced by policies across multiple catalogs. + +### Column Mask UDFs + +```sql +-- Full mask: replaces all characters with * +CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) +RETURNS STRING +DETERMINISTIC +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE REPEAT('*', LENGTH(value)) +END; + +-- Partial mask: show last 4 characters +CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) +RETURNS STRING +DETERMINISTIC +RETURN CASE + WHEN value IS NULL THEN NULL + WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) + ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) +END; + +-- SSN: ***-**-XXXX +CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) +RETURNS STRING +DETERMINISTIC +RETURN CASE + WHEN ssn IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 + THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) + ELSE '***-**-****' +END; + +-- Email: j***@example.com +CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) +RETURNS STRING +DETERMINISTIC +RETURN CASE + WHEN email IS NULL THEN NULL + WHEN INSTR(email, '@') > 1 + THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) + ELSE '***@***.***' +END; + +-- Credit card: ****-****-****-1234 +CREATE OR REPLACE FUNCTION catalog.schema.mask_credit_card(card_number STRING) +RETURNS STRING +DETERMINISTIC +RETURN CASE + WHEN card_number IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(card_number, '[^0-9]', '')) >= 4 + THEN CONCAT('****-****-****-', RIGHT(REGEXP_REPLACE(card_number, '[^0-9]', ''), 4)) + ELSE '****-****-****-****' +END; + +-- Hash: SHA256 with version prefix +CREATE OR REPLACE FUNCTION catalog.schema.mask_hash(value STRING) +RETURNS STRING +DETERMINISTIC +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE CONCAT('HASH_v1_', SUBSTRING(SHA2(CONCAT(value, ':v1'), 256), 1, 16)) +END; + +-- Redact: replace with [REDACTED] +CREATE OR REPLACE FUNCTION catalog.schema.mask_redact(value STRING) +RETURNS STRING +DETERMINISTIC +RETURN CASE + WHEN value IS NULL THEN NULL + ELSE '[REDACTED]' +END; +``` + +### Row Filter UDFs + +Row filter UDFs return `BOOLEAN`: `TRUE` to include the row, `FALSE` to exclude it. Row filter UDFs used with FGAC must take **0 arguments** (unlike column masks which take 1). + +```sql +-- Region-based filter: hide EU rows +CREATE OR REPLACE FUNCTION catalog.schema.is_not_eu_region(region_value STRING) +RETURNS BOOLEAN +DETERMINISTIC +RETURN CASE + WHEN region_value IS NULL THEN TRUE + WHEN LOWER(region_value) LIKE '%eu%' THEN FALSE + WHEN LOWER(region_value) LIKE '%europe%' THEN FALSE + ELSE TRUE +END; +``` + +--- + +## Step 4: FGAC Policies + +Policies are scoped to a **catalog**, **schema**, or **table**. The clause `FOR TABLES` is always present. The `for_securable_type` is always `TABLE`. + +### Column Mask Policy + +```sql +-- Catalog level — masks matching columns in ALL tables in the catalog +CREATE OR REPLACE POLICY mask_pii_ssn +ON CATALOG my_catalog +COMMENT 'Mask SSN columns catalog-wide' +COLUMN MASK my_catalog.my_schema.mask_ssn +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Schema level — masks matching columns in all tables in the schema +CREATE OR REPLACE POLICY mask_pii_ssn +ON SCHEMA my_catalog.my_schema +COMMENT 'Mask SSN columns in schema' +COLUMN MASK my_catalog.my_schema.mask_ssn +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Table level — masks matching columns on a single table +CREATE OR REPLACE POLICY mask_pii_ssn +ON TABLE my_catalog.my_schema.my_table +COMMENT 'Mask SSN columns on specific table' +COLUMN MASK my_catalog.my_schema.mask_ssn +TO `analysts`, `data_scientists` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Cross-catalog UDF — UDF in governance catalog, policy on prod +CREATE OR REPLACE POLICY mask_ssn_finance +ON SCHEMA prod.finance +COMMENT 'Mask SSN using shared governance UDF' +COLUMN MASK governance.masking_udfs.mask_ssn +TO `analysts` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col +ON COLUMN masked_col; + +-- Match any column with a tag key (regardless of value) +CREATE OR REPLACE POLICY mask_all_pii +ON SCHEMA my_catalog.my_schema +COLUMN MASK my_catalog.my_schema.mask_full +TO `external_users` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTag('pii_type') AS masked_col +ON COLUMN masked_col; +``` + +### Row Filter Policy + +```sql +-- Catalog level +CREATE OR REPLACE POLICY filter_eu_data +ON CATALOG my_catalog +COMMENT 'Filter EU rows catalog-wide' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); + +-- Schema level +CREATE OR REPLACE POLICY filter_eu_data +ON SCHEMA my_catalog.my_schema +COMMENT 'Filter EU rows in schema' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); + +-- Table level +CREATE OR REPLACE POLICY filter_eu_data +ON TABLE my_catalog.my_schema.my_table +COMMENT 'Filter EU rows on specific table' +ROW FILTER my_catalog.my_schema.is_not_eu_region +TO `us_team` +EXCEPT `gov_admin` +FOR TABLES +MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col +USING COLUMNS (filter_col); +``` + +### Drop Policy + +```sql +DROP POLICY mask_pii_ssn ON CATALOG my_catalog; +DROP POLICY mask_pii_ssn ON SCHEMA my_catalog.my_schema; +DROP POLICY mask_pii_ssn ON TABLE my_catalog.my_schema.my_table; +``` + +> There is no `ALTER POLICY`. To modify a policy's UDF, tag matching, or scope, drop and recreate it. Only principals and comment can be updated in-place via the SDK. + +--- + +## Policy Quotas + +| Scope | Max Policies | +|-------|-------------| +| Per Catalog | 10 | +| Per Schema | 10 | +| Per Table | 5 | + +--- + +## SQL That Does NOT Exist + +These SQL commands do **not** exist in Databricks. Do not use them. + +| Invalid SQL | What to Use Instead | +|---|---| +| `SHOW POLICIES` | SDK: `w.policies.list_policies()` or MCP tool `list_fgac_policies` | +| `DESCRIBE POLICY` | SDK: `w.policies.get_policy()` or MCP tool `get_fgac_policy` | +| `ALTER POLICY` | Drop and recreate the policy | +| `ALTER USER SET ATTRIBUTES` | SCIM API for user attributes | + +--- + +## Discovery Queries + +```sql +-- List catalogs, schemas, tables +SHOW CATALOGS; +SHOW SCHEMAS IN my_catalog; +SHOW TABLES IN my_catalog.my_schema; + +-- Describe table with extended metadata +DESCRIBE TABLE EXTENDED my_catalog.my_schema.my_table; + +-- List UDFs in a schema +SHOW USER FUNCTIONS IN my_catalog.my_schema; + +-- Describe a UDF +DESCRIBE FUNCTION EXTENDED my_catalog.my_schema.mask_ssn; + +-- Column tags in a table +SELECT tag_name, tag_value, column_name +FROM system.information_schema.column_tags +WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'my_table'; +``` + +--- + +## Python SDK Reference + +### Setup + +```python +from databricks.sdk import WorkspaceClient +from databricks.sdk.service.catalog import ( + ColumnMaskOptions, + MatchColumn, + PolicyInfo, + PolicyType, + RowFilterOptions, + SecurableType, +) + +w = WorkspaceClient() # Auto-detects credentials +``` + +### List Policies + +```python +policies = list(w.policies.list_policies( + on_securable_type="CATALOG", + on_securable_fullname="my_catalog", + include_inherited=True, +)) + +for p in policies: + print(f"{p.name}: {p.policy_type} on {p.on_securable_fullname}") + +# Filter by type +column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] +row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] +``` + +### Get Policy + +```python +policy = w.policies.get_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) +``` + +### Create Column Mask Policy + +```python +policy_info = PolicyInfo( + name="mask_pii_ssn", + policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, + on_securable_type=SecurableType.SCHEMA, + on_securable_fullname="my_catalog.my_schema", + for_securable_type=SecurableType.TABLE, + to_principals=["analysts", "data_scientists"], + except_principals=["gov_admin"], + comment="Mask SSN columns in schema", + column_mask=ColumnMaskOptions( + function_name="my_catalog.my_schema.mask_ssn", + on_column="masked_col", + ), + match_columns=[ + MatchColumn( + alias="masked_col", + condition="hasTagValue('pii_type', 'ssn')", + ) + ], +) +policy = w.policies.create_policy(policy_info=policy_info) +``` + +### Create Row Filter Policy + +```python +policy_info = PolicyInfo( + name="filter_eu_data", + policy_type=PolicyType.POLICY_TYPE_ROW_FILTER, + on_securable_type=SecurableType.SCHEMA, + on_securable_fullname="my_catalog.my_schema", + for_securable_type=SecurableType.TABLE, + to_principals=["us_team"], + except_principals=["gov_admin"], + comment="Filter EU rows in schema", + row_filter=RowFilterOptions( + function_name="my_catalog.my_schema.is_not_eu_region", + ), + match_columns=[ + MatchColumn( + alias="filter_col", + condition="hasTagValue('region', 'eu')", + ) + ], +) +policy = w.policies.create_policy(policy_info=policy_info) +``` + +### Update Policy + +Only principals and comment can be updated. To change the UDF, tag matching, or scope, drop and recreate. + +```python +update_info = PolicyInfo( + to_principals=["analysts", "data_scientists", "new_team"], + except_principals=["gov_admin", "senior_admins"], + comment="Updated: added new_team", + for_securable_type=SecurableType.TABLE, + policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, +) +updated = w.policies.update_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", + policy_info=update_info, + update_mask="to_principals,except_principals,comment", +) +``` + +### Delete Policy + +```python +w.policies.delete_policy( + name="mask_pii_ssn", + on_securable_type="SCHEMA", + on_securable_fullname="my_catalog.my_schema", +) +``` + +### Error Handling + +```python +from databricks.sdk.errors import NotFound, PermissionDenied, BadRequest + +try: + policy = w.policies.get_policy(name="nonexistent", ...) +except NotFound: + print("Policy not found") +except PermissionDenied: + print("Insufficient permissions - need MANAGE on securable") +except BadRequest as e: + print(f"Invalid request: {e}") +``` + +--- + +## MCP Tools Reference + +All FGAC operations are exposed through a single MCP tool: `manage_uc_fgac_policies`. The `action` parameter selects the operation. + +### Discovery Actions + +| Action | Description | Key Parameters | +|--------|-------------|----------------| +| `list` | List policies on a securable | `securable_type`, `securable_fullname`, `include_inherited`, `policy_type` | +| `get` | Get a specific policy by name | `policy_name`, `securable_type`, `securable_fullname` | +| `get_table_policies` | Get column masks and row filters on a table | `catalog`, `schema`, `table` | +| `get_masking_functions` | List masking UDFs in a schema | `catalog`, `schema` (or `udf_catalog`, `udf_schema` for cross-catalog) | +| `check_quota` | Check policy quota on a securable | `securable_type`, `securable_fullname` | + +### Preview Action (Human-in-the-Loop Gate) + +| Action | Description | Key Parameters | +|--------|-------------|----------------| +| `preview` | Preview changes without executing; returns `approval_token` | `preview_action` (`CREATE`/`UPDATE`/`DELETE`), `policy_name`, `securable_type`, `securable_fullname`, plus policy params for CREATE | + +### Mutation Actions (Require Approval Token) + +| Action | Description | Key Parameters | +|--------|-------------|----------------| +| `create` | Create a new FGAC policy | `policy_name`, `policy_type`, `securable_type`, `securable_fullname`, `function_name`, `to_principals`, `tag_name`, `tag_value`, `approval_token` | +| `update` | Update policy principals or comment | `policy_name`, `securable_type`, `securable_fullname`, `to_principals`, `except_principals`, `comment`, `approval_token` | +| `delete` | Delete a policy | `policy_name`, `securable_type`, `securable_fullname`, `approval_token` | + +--- + +## Human-in-the-Loop Governance Workflow + +FGAC policies control who can see sensitive data like SSNs, emails, and salaries. Because misconfigured policies can expose private data or lock out administrators, all mutating operations go through a governed workflow with two safety gates. + +### Why Human-in-the-Loop? + +An AI agent that can freely create, change, or delete access control policies is dangerous. It could: + +- Accidentally expose PII to the wrong group +- Remove masking from sensitive columns +- Lock administrators out of their own data + +The human-in-the-loop pattern ensures **no policy change happens without explicit human approval**. + +### The Two Safety Gates + +#### Gate 1: Preview + Approval Token + +Every mutating operation (create, update, delete) requires a two-step process: + +1. **Preview** — The agent calls `preview_policy_changes()` which generates the exact SQL that *would* run, but **does not execute anything**. It also returns a cryptographic **approval token**. + +2. **Execute** — Only after the human reviews and approves does the agent call the mutation (e.g., `create_fgac_policy()`), passing the approval token from the preview step. + +The approval token is an **HMAC-SHA256 signed receipt** that binds the exact parameters from the preview to a timestamp: + +| Protection | How It Works | +|-----------|--------------| +| Parameter tampering | The token encodes every parameter (policy name, type, principals, UDF, tags). If the agent passes different parameters at execution time, the signature won't match and the operation is rejected. | +| Replay attacks | The token includes a timestamp and **expires after 10 minutes**. Old approvals cannot be reused. | +| Token forgery | The token is signed with an HMAC secret (`FGAC_APPROVAL_SECRET`). Without the secret, a valid token cannot be forged. | + +#### Gate 2: Admin Group Check + +Every mutating operation also verifies that the current Databricks user belongs to the configured admin group (env var `FGAC_ADMIN_GROUP`, defaults to `admins`). Even with a valid approval token, a non-admin user cannot make changes. + +### The 6-Step Workflow + +``` +ANALYZE --> RECOMMEND --> PREVIEW --> APPROVE --> EXECUTE --> VERIFY + | | | | | | + v v v v v v + Discover Generate Show SQL Human Run SDK Confirm + current policy & impact confirms call w/ changes + state proposals preview changes token applied +``` + +#### Step 1: ANALYZE — Discover Current State + +The agent gathers information without making any changes: + +``` +list_fgac_policies() --> What policies already exist? +get_masking_functions() --> What masking UDFs are available? +get_column_tags_api() --> What columns are tagged with PII labels? +execute_sql(DESCRIBE) --> What does the table schema look like? +``` + +#### Step 2: RECOMMEND — Generate Proposals + +Based on the analysis, the agent identifies gaps and recommends new policies: + +> "The `email` column is tagged `pii=email` but has no masking policy. I recommend creating a column mask policy using a `mask_email` UDF." + +If a required UDF doesn't exist yet, the agent creates it first (UDF creation is a non-destructive SQL operation). + +#### Step 3: PREVIEW — Human-in-the-Loop Gate + +The agent calls `preview_policy_changes()` with the proposed parameters. **This does NOT execute anything.** It returns: + +```json +{ + "success": true, + "action": "CREATE", + "preview": { + "policy_name": "mask_email_for_non_admins", + "equivalent_sql": "CREATE OR REPLACE POLICY mask_email_for_non_admins\nON SCHEMA ai_dev_kit_test.test_schema\n..." + }, + "requires_approval": true, + "approval_token": "da70b6c3...:" +} +``` + +The agent presents the equivalent SQL and impact summary to the human. + +#### Step 4: APPROVE — Human Decision + +The human reviews: +- The exact SQL that will run +- Which principals are affected +- Which columns/tables will be masked +- Any warnings + +Then explicitly replies **"approve"** or requests changes. + +#### Step 5: EXECUTE — Apply With Token + +Only after approval, the agent passes the approval token to the mutation: + +```python +create_fgac_policy( + policy_name="mask_email_for_non_admins", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="ai_dev_kit_test.test_schema", + function_name="ai_dev_kit_test.test_schema.mask_email", + to_principals=["account users"], + tag_name="pii", + tag_value="email", + approval_token="da70b6c3...:" +) +``` + +Internally, the function: +1. Checks admin group membership (`_check_admin_group()`) +2. Validates the approval token signature matches the parameters +3. Verifies the token hasn't expired (10-minute TTL) +4. Only then calls the Databricks SDK to create the policy + +#### Step 6: VERIFY — Confirm Changes + +The agent verifies the policy was applied correctly: + +```python +get_fgac_policy(policy_name="mask_email_for_non_admins", ...) +execute_sql("SELECT email FROM employee_pii LIMIT 5") +# Expected: a***@acme.com, b***@acme.com, etc. +``` + +### Sequence Diagram + +``` +Agent Human Databricks + | | | + |--- preview(CREATE, ...) -----------------------------------> | + |<-- SQL + approval_token ------------------------------------ | + | | | + |--- "Here's what I'll do:" --> | | + | [shows SQL + details] | | + | | | + |<-- "approve" ---------------- | | + | | | + |--- create(... token) --------------------------------------> | + | [1] check admin group | | + | [2] verify token signature | | + | [3] verify params match | | + | [4] verify not expired | | + |<-- policy created ------------------------------------------ | + | | | + |--- verify(get_policy) --------------------------------------> | + |<-- confirmed ------------------------------------------------ | +``` + +--- + +## Approval Token Internals + +### Token Structure + +``` +: +``` + +Example: +``` +da70b6c3455944a3...:eyJhY3Rpb24iOiAiQ1JFQVRFIiwgInBvbGljeV9uYW1lIjog... +``` + +### Generation (during preview) + +```python +def _generate_approval_token(params: dict) -> str: + # 1. Remove null values, add current timestamp + clean_params = {k: v for k, v in params.items() if v is not None} + clean_params["timestamp"] = int(time.time()) + + # 2. Serialize to deterministic JSON (sorted keys for consistency) + payload = json.dumps(clean_params, sort_keys=True) + + # 3. Sign with HMAC-SHA256 + signature = hmac.new( + APPROVAL_SECRET.encode(), payload.encode(), hashlib.sha256 + ).hexdigest() + + # 4. Encode payload as base64 + b64_payload = base64.b64encode(payload.encode()).decode() + + return f"{signature}:{b64_payload}" +``` + +### Validation (during execute) + +```python +def _validate_approval_token(approval_token: str, current_params: dict) -> None: + # 1. Split token into signature and payload + signature, b64_payload = approval_token.split(":", 1) + + # 2. Decode payload and re-compute expected signature + payload = base64.b64decode(b64_payload).decode() + expected_sig = hmac.new( + APPROVAL_SECRET.encode(), payload.encode(), hashlib.sha256 + ).hexdigest() + + # 3. Verify signature matches (constant-time comparison) + if not hmac.compare_digest(signature, expected_sig): + raise ValueError("Invalid or expired approval token") + + # 4. Check timestamp (10-minute TTL) + token_data = json.loads(payload) + ts = token_data.pop("timestamp", 0) + if abs(time.time() - ts) > 600: + raise ValueError("Invalid or expired approval token") + + # 5. Verify all parameters match what was previewed + if token_data != current_params: + raise ValueError("Invalid or expired approval token") +``` + +### Token Payload Example + +For a CREATE action, the token payload contains: + +```json +{ + "action": "CREATE", + "policy_name": "mask_email_for_non_admins", + "policy_type": "COLUMN_MASK", + "securable_type": "SCHEMA", + "securable_fullname": "ai_dev_kit_test.test_schema", + "function_name": "ai_dev_kit_test.test_schema.mask_email", + "to_principals": ["account users"], + "tag_name": "pii", + "tag_value": "email", + "comment": "Masks email columns for all non-admin users", + "timestamp": 1770853648 +} +``` + +Every single field must match between preview and execution, or the token is rejected. + +--- + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `FGAC_APPROVAL_SECRET` | `fgac-default-dev-secret` | HMAC secret for signing approval tokens. **Set to a strong random value in production.** | +| `FGAC_ADMIN_GROUP` | `admins` | Databricks group required for mutating operations. | + +--- + +## Threat Model + +| Attack Vector | Protection | +|--------------|-----------| +| Agent changes parameters after human approval | Token signature binds exact params; mismatch = rejected | +| Stale approval reused hours/days later | Token expires after 10 minutes | +| Non-admin user attempts policy mutation | `_check_admin_group()` verifies group membership | +| Token forged without the signing secret | HMAC-SHA256 verification fails | +| Timing attack on signature comparison | `hmac.compare_digest()` uses constant-time comparison | + +--- + +## Common Errors + +| Error | Cause | Solution | +|-------|-------|----------| +| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | +| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag configuration in UI | +| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | +| `POLICY_ALREADY_EXISTS` | Policy name conflict | Use `CREATE OR REPLACE POLICY` or delete existing first | +| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission to policy creator | +| `SHOW POLICIES is not supported` | Used invalid SQL | Use SDK `w.policies.list_policies()` instead | +| `Could not find principal` | Principal group doesn't exist in workspace | Verify group name exists in account/workspace | +| `Invalid or expired approval token` | Token expired, params changed, or forged | Re-run preview to get a fresh token | + +--- + +## Best Practices + +1. **Use governed tags** (not ad-hoc tags) for FGAC policy matching +2. **Always include an admin exception** (`EXCEPT \`gov_admin\``) in every policy to prevent lockout +3. **Use deterministic UDFs** with simple CASE statements — no external calls or nested UDFs +4. **Preview before executing** any policy change — never auto-execute +5. **Start at schema scope** and narrow to table only when needed +6. **Name policies descriptively**: `mask_{what}_{scope}` or `filter_{what}_{scope}` +7. **Test UDFs independently** before binding to policies (e.g., `SELECT mask_ssn('123-45-6789')`) +8. **Monitor policy quotas** — consolidate when approaching limits (10 per catalog/schema, 5 per table) +9. **Set `FGAC_APPROVAL_SECRET`** to a strong random value in production +10. **Grant to groups, not users** — easier to manage and audit + +--- + +## Source Files + +| File | Description | +|------|-------------| +| `databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py` | Core implementation (token generation, validation, CRUD) | +| `databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py` | MCP tool dispatcher (routes actions to core functions) | +| `databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py` | Integration tests | +| `ai-dev-project/.claude/skills/databricks-unity-catalog/7-fgac-overview.md` | FGAC workflow overview and SQL syntax | +| `ai-dev-project/.claude/skills/databricks-unity-catalog/8-fgac-sql-generation.md` | SQL generation reference | +| `ai-dev-project/.claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md` | SDK patterns and MCP tool reference | From a47fdef47e6bf1f6619007f47c142a7ba7159cca Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Wed, 11 Feb 2026 18:04:07 -0600 Subject: [PATCH 12/34] Update unity-catalog skill description and add UC ACLs doc - Update databricks-unity-catalog skill description to include access controls and FGAC policy governance in CLAUDE.md and setup.sh - Remove model-serving from CLAUDE.md skill list - Add 10-uc-acls.md to unity-catalog skill extra files in install_skills.sh --- ai-dev-project/CLAUDE.md | 3 +-- ai-dev-project/setup.sh | 2 +- databricks-skills/install_skills.sh | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ai-dev-project/CLAUDE.md b/ai-dev-project/CLAUDE.md index c77b4588..8bf4aadf 100644 --- a/ai-dev-project/CLAUDE.md +++ b/ai-dev-project/CLAUDE.md @@ -18,9 +18,8 @@ Load skills for detailed guidance: - `skill: "databricks-docs"` - Documentation reference - `skill: "databricks-jobs"` - Lakeflow Jobs and workflows - `skill: "databricks-python-sdk"` - Python SDK patterns -- `skill: "databricks-unity-catalog"` - System tables for lineage, audit, billing +- `skill: "databricks-unity-catalog"` - System tables for lineage, audit, billing, access controls, and FGAC policy governance. - `skill: "mlflow-evaluation"` - MLflow evaluation and trace analysis -- `skill: "model-serving"` - Model Serving deployment and endpoint management - `skill: "spark-declarative-pipelines"` - Spark Declarative Pipelines - `skill: "synthetic-data-generation"` - Test data generation - `skill: "unstructured-pdf-generation"` - Generate synthetic PDFs for RAG diff --git a/ai-dev-project/setup.sh b/ai-dev-project/setup.sh index 09ad3e9b..9c490240 100755 --- a/ai-dev-project/setup.sh +++ b/ai-dev-project/setup.sh @@ -134,7 +134,7 @@ Load skills for detailed guidance: - `skill: "databricks-docs"` - Documentation reference - `skill: "databricks-jobs"` - Lakeflow Jobs and workflows - `skill: "databricks-python-sdk"` - Python SDK patterns -- `skill: "databricks-unity-catalog"` - System tables for lineage, audit, billing +- `skill: "databricks-unity-catalog"` - System tables for lineage, audit, billing, access controls, and FGAC policy governance. - `skill: "mlflow-evaluation"` - MLflow evaluation and trace analysis - `skill: "spark-declarative-pipelines"` - Spark Declarative Pipelines - `skill: "synthetic-data-generation"` - Test data generation diff --git a/databricks-skills/install_skills.sh b/databricks-skills/install_skills.sh index c8db69b4..4bf16342 100755 --- a/databricks-skills/install_skills.sh +++ b/databricks-skills/install_skills.sh @@ -99,7 +99,7 @@ get_skill_extra_files() { "databricks-app-python") echo "dash.md streamlit.md README.md" ;; "databricks-jobs") echo "task-types.md triggers-schedules.md notifications-monitoring.md examples.md" ;; "databricks-python-sdk") echo "doc-index.md examples/1-authentication.py examples/2-clusters-and-jobs.py examples/3-sql-and-warehouses.py examples/4-unity-catalog.py examples/5-serving-and-vector-search.py" ;; - "databricks-unity-catalog") echo "5-system-tables.md 6-volumes.md 7-fgac-overview.md 8-fgac-sql-generation.md 9-fgac-sdk-and-tools.md" ;; + "databricks-unity-catalog") echo "5-system-tables.md 6-volumes.md 7-fgac-overview.md 8-fgac-sql-generation.md 9-fgac-sdk-and-tools.md 10-uc-acls.md" ;; "lakebase-autoscale") echo "projects.md branches.md computes.md connection-patterns.md reverse-etl.md" ;; "lakebase-provisioned") echo "connection-patterns.md reverse-etl.md" ;; "metric-views") echo "yaml-reference.md patterns.md" ;; From c6f41549670789921c7a22f8acbb40107ec558ce Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 09:26:27 -0600 Subject: [PATCH 13/34] Remove .claude skills and FGAC_README.md per PR review These files should not be in the repo - .claude/skills are local and FGAC_README.md is not needed at the root level. --- .../databricks-unity-catalog/10-uc-acls.md | 219 ----- .../7-fgac-overview.md | 342 ------- .../8-fgac-sql-generation.md | 420 --------- .../9-fgac-sdk-and-tools.md | 705 -------------- .../skills/databricks-unity-catalog/SKILL.md | 242 ----- FGAC_README.md | 876 ------------------ 6 files changed, 2804 deletions(-) delete mode 100644 .claude/skills/databricks-unity-catalog/10-uc-acls.md delete mode 100644 .claude/skills/databricks-unity-catalog/7-fgac-overview.md delete mode 100644 .claude/skills/databricks-unity-catalog/8-fgac-sql-generation.md delete mode 100644 .claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md delete mode 100644 .claude/skills/databricks-unity-catalog/SKILL.md delete mode 100644 FGAC_README.md diff --git a/.claude/skills/databricks-unity-catalog/10-uc-acls.md b/.claude/skills/databricks-unity-catalog/10-uc-acls.md deleted file mode 100644 index 038ceb85..00000000 --- a/.claude/skills/databricks-unity-catalog/10-uc-acls.md +++ /dev/null @@ -1,219 +0,0 @@ -# Unity Catalog Access Controls (ACLs) - -Comprehensive reference for Unity Catalog privilege management: GRANT/REVOKE, ownership, and permission patterns across securables. - -## Securable Hierarchy - -``` -METASTORE - └── CATALOG - └── SCHEMA - ├── TABLE / VIEW / MATERIALIZED VIEW - ├── VOLUME - ├── FUNCTION - └── MODEL -``` - -Privileges **inherit** down the hierarchy. Granting `USE CATALOG` on a catalog grants access to all schemas within it (but not data access — that requires `SELECT`, `MODIFY`, etc.). - -## Privilege Reference - -### Catalog-Level - -| Privilege | Description | -|-----------|-------------| -| `USE CATALOG` | Required to access any object within the catalog | -| `CREATE SCHEMA` | Create schemas within the catalog | -| `ALL PRIVILEGES` | All catalog-level privileges | - -### Schema-Level - -| Privilege | Description | -|-----------|-------------| -| `USE SCHEMA` | Required to access any object within the schema | -| `CREATE TABLE` | Create tables and views | -| `CREATE VOLUME` | Create volumes | -| `CREATE FUNCTION` | Create functions | -| `CREATE MODEL` | Create registered models | -| `ALL PRIVILEGES` | All schema-level privileges | - -### Table/View-Level - -| Privilege | Description | -|-----------|-------------| -| `SELECT` | Read data from the table or view | -| `MODIFY` | Insert, update, delete data | -| `ALL PRIVILEGES` | All table-level privileges | - -### Volume-Level - -| Privilege | Description | -|-----------|-------------| -| `READ VOLUME` | Read files from the volume | -| `WRITE VOLUME` | Write files to the volume | -| `ALL PRIVILEGES` | All volume-level privileges | - -### Function-Level - -| Privilege | Description | -|-----------|-------------| -| `EXECUTE` | Execute the function | -| `ALL PRIVILEGES` | All function-level privileges | - -## SQL Syntax - -### GRANT - -```sql --- Catalog access -GRANT USE CATALOG ON CATALOG my_catalog TO `group_name`; -GRANT CREATE SCHEMA ON CATALOG my_catalog TO `group_name`; - --- Schema access -GRANT USE SCHEMA ON SCHEMA my_catalog.my_schema TO `group_name`; -GRANT CREATE TABLE ON SCHEMA my_catalog.my_schema TO `group_name`; -GRANT CREATE VOLUME ON SCHEMA my_catalog.my_schema TO `group_name`; -GRANT CREATE FUNCTION ON SCHEMA my_catalog.my_schema TO `group_name`; - --- Table/View access -GRANT SELECT ON TABLE my_catalog.my_schema.my_table TO `group_name`; -GRANT MODIFY ON TABLE my_catalog.my_schema.my_table TO `group_name`; - --- Volume access -GRANT READ VOLUME ON VOLUME my_catalog.my_schema.my_volume TO `group_name`; -GRANT WRITE VOLUME ON VOLUME my_catalog.my_schema.my_volume TO `group_name`; - --- Function access -GRANT EXECUTE ON FUNCTION my_catalog.my_schema.my_function TO `group_name`; - --- All privileges shorthand -GRANT ALL PRIVILEGES ON SCHEMA my_catalog.my_schema TO `admin_group`; -``` - -### REVOKE - -```sql -REVOKE SELECT ON TABLE my_catalog.my_schema.my_table FROM `group_name`; -REVOKE MODIFY ON TABLE my_catalog.my_schema.my_table FROM `group_name`; -REVOKE ALL PRIVILEGES ON SCHEMA my_catalog.my_schema FROM `group_name`; -``` - -### Show Grants - -```sql --- Show all grants on a securable -SHOW GRANTS ON CATALOG my_catalog; -SHOW GRANTS ON SCHEMA my_catalog.my_schema; -SHOW GRANTS ON TABLE my_catalog.my_schema.my_table; -SHOW GRANTS ON VOLUME my_catalog.my_schema.my_volume; - --- Show grants for a specific principal -SHOW GRANTS `group_name` ON CATALOG my_catalog; -SHOW GRANTS `user@example.com` ON SCHEMA my_catalog.my_schema; -``` - -## Ownership - -Every securable has exactly one **owner**. The owner has all privileges on the object and can grant/revoke privileges to others. - -```sql --- Transfer ownership -ALTER CATALOG my_catalog OWNER TO `new_owner`; -ALTER SCHEMA my_catalog.my_schema OWNER TO `new_owner`; -ALTER TABLE my_catalog.my_schema.my_table OWNER TO `new_owner`; -ALTER VOLUME my_catalog.my_schema.my_volume OWNER TO `new_owner`; -``` - -## Python SDK Patterns - -```python -from databricks.sdk import WorkspaceClient - -w = WorkspaceClient() - -# Grant privileges -w.grants.update( - securable_type="TABLE", - full_name="my_catalog.my_schema.my_table", - changes=[{ - "principal": "data_readers", - "add": ["SELECT"], - }] -) - -# Revoke privileges -w.grants.update( - securable_type="TABLE", - full_name="my_catalog.my_schema.my_table", - changes=[{ - "principal": "data_readers", - "remove": ["SELECT"], - }] -) - -# Get current grants -grants = w.grants.get( - securable_type="TABLE", - full_name="my_catalog.my_schema.my_table" -) -for assignment in grants.privilege_assignments: - print(f"{assignment.principal}: {assignment.privileges}") - -# Get effective grants (includes inherited) -effective = w.grants.get_effective( - securable_type="TABLE", - full_name="my_catalog.my_schema.my_table", - principal="data_readers" -) -``` - -## Common Patterns - -### Read-Only Data Consumer - -```sql --- Minimal access for data readers -GRANT USE CATALOG ON CATALOG analytics TO `data_readers`; -GRANT USE SCHEMA ON SCHEMA analytics.gold TO `data_readers`; -GRANT SELECT ON SCHEMA analytics.gold TO `data_readers`; -``` - -### Data Engineer (Read + Write) - -```sql -GRANT USE CATALOG ON CATALOG analytics TO `data_engineers`; -GRANT USE SCHEMA ON SCHEMA analytics.silver TO `data_engineers`; -GRANT SELECT ON SCHEMA analytics.silver TO `data_engineers`; -GRANT MODIFY ON SCHEMA analytics.silver TO `data_engineers`; -GRANT CREATE TABLE ON SCHEMA analytics.silver TO `data_engineers`; -``` - -### Schema Admin - -```sql -GRANT USE CATALOG ON CATALOG analytics TO `schema_admins`; -GRANT ALL PRIVILEGES ON SCHEMA analytics.gold TO `schema_admins`; -``` - -### ML Engineer (Models + Functions) - -```sql -GRANT USE CATALOG ON CATALOG ml TO `ml_engineers`; -GRANT USE SCHEMA ON SCHEMA ml.models TO `ml_engineers`; -GRANT CREATE MODEL ON SCHEMA ml.models TO `ml_engineers`; -GRANT CREATE FUNCTION ON SCHEMA ml.models TO `ml_engineers`; -GRANT SELECT ON SCHEMA ml.features TO `ml_engineers`; -``` - -## MCP Tool - -Use `mcp__databricks__manage_uc_grants` for grant operations, or `mcp__databricks__execute_sql` for SQL-based grant management. - -## Best Practices - -1. **Grant to groups, not users** — Easier to manage and audit -2. **Use least privilege** — Grant only the minimum permissions needed -3. **Leverage inheritance** — Grant at schema level when all tables need the same access -4. **Audit regularly** — Query `system.access.audit` for grant/revoke events -5. **Prefer `USE CATALOG` + `USE SCHEMA` + `SELECT`** over `ALL PRIVILEGES` -6. **Document ownership** — Keep track of who owns each catalog/schema diff --git a/.claude/skills/databricks-unity-catalog/7-fgac-overview.md b/.claude/skills/databricks-unity-catalog/7-fgac-overview.md deleted file mode 100644 index b692eea0..00000000 --- a/.claude/skills/databricks-unity-catalog/7-fgac-overview.md +++ /dev/null @@ -1,342 +0,0 @@ -# FGAC Policy Governance Overview - -Guidance for Fine-Grained Access Control (FGAC) policies in Databricks Unity Catalog. Covers governed tags, tag assignments, masking UDFs, CREATE/DROP POLICY syntax, and the human-in-the-loop governance workflow. - -**Databricks Docs:** -- FGAC overview: https://docs.databricks.com/data-governance/unity-catalog/abac/ -- FGAC policies: https://docs.databricks.com/data-governance/unity-catalog/abac/policies -- FGAC tutorial: https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial - -## When to Use This Skill - -Use this skill when: -- Creating or managing **FGAC policies** (column masks, row filters) -- Working with **governed tags** (creating via UI, applying via SQL) -- Building **masking UDFs** for PII protection (SSN, email, credit card, etc.) -- Implementing **human-in-the-loop governance** workflows -- Querying tag assignments via `information_schema` -- Managing policy lifecycle (create, update, delete, preview) - -## Reference Files - -| Topic | File | Description | -|-------|------|-------------| -| SQL Generation | [8-fgac-sql-generation.md](8-fgac-sql-generation.md) | SET/UNSET TAG, CREATE FUNCTION, CREATE/DROP POLICY, discovery queries | -| SDK & MCP Tools | [9-fgac-sdk-and-tools.md](9-fgac-sdk-and-tools.md) | Python SDK patterns and 12 MCP tools for policy management | - ---- - -## FGAC Workflow Overview - -FGAC policies in Databricks follow a 4-step setup: - -1. **Governed Tags** - Define classification taxonomy (UI only) -2. **Tag Assignments** - Apply tags to columns/tables via SQL -3. **Masking UDFs** - Create deterministic functions for data masking -4. **FGAC Policies** - Bind tags to UDFs with principal scoping - -``` -┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ -│ Governed Tags│───>│ Tag │───>│ Masking │───>│ FGAC │ -│ (UI only) │ │ Assignments │ │ UDFs │ │ Policies │ -└──────────────┘ └──────────────┘ └──────────────┘ └──────────────┘ -``` - ---- - -## IMPORTANT: SQL That Does NOT Exist - -These SQL commands do **not** exist in Databricks. Do not generate them. - -| Invalid SQL | What to use instead | -|---|---| -| `SHOW POLICIES` | REST API: `w.policies.list_policies()` | -| `DESCRIBE POLICY` | REST API: `w.policies.get_policy()` | -| `ALTER POLICY` | Drop and recreate the policy | -| `ALTER USER SET ATTRIBUTES` | SCIM API for user attributes | -| `SHOW USER ATTRIBUTES` | SCIM API for user attributes | - ---- - -## Step 1: Governed Tags - -Governed tags **cannot** be created via SQL. They must be created in the Databricks UI. - -### Creating a Governed Tag (UI Steps) - -1. Navigate to **Catalog** in the workspace -2. Select **Governed Tags** from the left panel -3. Click **Create governed tag** -4. Configure: - - **Tag Key**: e.g., `pii_type` - - **Allowed Values**: e.g., `ssn`, `email`, `phone`, `credit_card`, `address` - - **Description**: e.g., "PII classification for FGAC policies" - -> **Note:** Tag data is stored as plain text and may be replicated globally. Avoid sensitive information in tag names or values. - -**Docs:** https://docs.databricks.com/admin/governed-tags/ - ---- - -## Step 2: Applying Tags to Columns - -### Legacy Syntax (all versions) - -```sql --- Set tag on column -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); - --- Set tag on table -ALTER TABLE catalog.schema.table -SET TAGS ('data_classification' = 'confidential'); - --- Remove tag -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name UNSET TAGS ('pii_type'); -``` - -### Modern Syntax (DBR 16.1+) - -```sql -SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; -SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; -SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; -SET TAG ON CATALOG catalog 'department' = 'finance'; - -UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; -``` - -### Querying Existing Tags - -```sql --- Column tags -SELECT tag_name, tag_value, column_name -FROM system.information_schema.column_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; - --- Table tags -SELECT tag_name, tag_value -FROM system.information_schema.table_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; -``` - ---- - -## Step 3: Masking UDFs - -Masking UDFs must be `DETERMINISTIC` and use simple `CASE` statements. No external calls or nested UDFs. - -```sql --- Full mask: replaces all characters with * -CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Full masking - replaces all characters with *' -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE REPEAT('*', LENGTH(value)) -END; - --- Partial mask: show last 4 characters -CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Partial masking - shows last 4 characters' -RETURN CASE - WHEN value IS NULL THEN NULL - WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) - ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) -END; - --- SSN mask: ***-**-XXXX format -CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks SSN showing only last 4 digits' -RETURN CASE - WHEN ssn IS NULL THEN NULL - WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 - THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) - ELSE '***-**-****' -END; - --- Email mask: j***@example.com -CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks email showing first char and domain' -RETURN CASE - WHEN email IS NULL THEN NULL - WHEN INSTR(email, '@') > 1 - THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) - ELSE '***@***.***' -END; -``` - -**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices - -> **Cross-catalog UDFs:** Masking UDFs do not need to be in the same catalog/schema as the policy scope. A common pattern is a shared governance schema (e.g., `governance.masking_udfs`) containing all masking functions, referenced by policies across multiple catalogs. The UDF name in a policy is always fully qualified (e.g., `governance.masking_udfs.mask_ssn`). - ---- - -## Step 4: FGAC Policies - -Policies are scoped to a **catalog**, **schema**, or **table**. `FOR TABLES` is always present. - -### Column Mask Policy - -```sql --- Catalog level — masks matching columns in ALL tables in the catalog -CREATE OR REPLACE POLICY mask_pii_catalog -ON CATALOG my_catalog -COMMENT 'Mask PII columns catalog-wide' -COLUMN MASK my_catalog.my_schema.mask_partial -TO `analysts`, `data_scientists` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; - --- Schema level — masks matching columns in all tables in the schema -CREATE OR REPLACE POLICY mask_pii_schema -ON SCHEMA my_catalog.my_schema -COMMENT 'Mask PII columns in schema' -COLUMN MASK my_catalog.my_schema.mask_partial -TO `analysts`, `data_scientists` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; - --- Table level — masks matching columns on a single table -CREATE OR REPLACE POLICY mask_pii_table -ON TABLE my_catalog.my_schema.my_table -COMMENT 'Mask PII columns on specific table' -COLUMN MASK my_catalog.my_schema.mask_partial -TO `analysts`, `data_scientists` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; -``` - -### Row Filter Policy - -```sql --- Catalog level — filters rows in ALL tables in the catalog -CREATE OR REPLACE POLICY filter_eu_catalog -ON CATALOG my_catalog -COMMENT 'Filter EU rows catalog-wide' -ROW FILTER my_catalog.my_schema.is_not_eu_region -TO `us_team` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col -USING COLUMNS (filter_col); - --- Schema level — filters rows in all tables in the schema -CREATE OR REPLACE POLICY filter_eu_schema -ON SCHEMA my_catalog.my_schema -COMMENT 'Filter EU rows in schema' -ROW FILTER my_catalog.my_schema.is_not_eu_region -TO `us_team` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col -USING COLUMNS (filter_col); - --- Table level — filters rows on a single table -CREATE OR REPLACE POLICY filter_eu_table -ON TABLE my_catalog.my_schema.my_table -COMMENT 'Filter EU rows on specific table' -ROW FILTER my_catalog.my_schema.is_not_eu_region -TO `us_team` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col -USING COLUMNS (filter_col); -``` - -### Drop Policy - -```sql --- Drop at each scope level -DROP POLICY mask_pii_catalog ON CATALOG my_catalog; -DROP POLICY mask_pii_schema ON SCHEMA my_catalog.my_schema; -DROP POLICY mask_pii_table ON TABLE my_catalog.my_schema.my_table; -``` - -### CRITICAL: Always Exclude `gov_admin` - -Every FGAC policy **MUST** include `EXCEPT \`gov_admin\`` to protect administrator access. Without this, admins could be locked out of data. - -### Policy Quotas - -| Scope | Max Policies | -|-------|-------------| -| Per Catalog | 10 | -| Per Schema | 10 | -| Per Table | 5 | - -https://docs.databricks.com/gcp/en/data-governance/unity-catalog/abac/policies#policy-quotas ---- - -## Human-in-the-Loop Governance Workflow - -FGAC policy changes should follow a governed workflow: - -``` -ANALYZE → RECOMMEND → PREVIEW → APPROVE → EXECUTE → VERIFY - │ │ │ │ │ │ - ▼ ▼ ▼ ▼ ▼ ▼ - Discover Generate Show SQL Human Run SQL Confirm - current policy & impact confirms or SDK changes - state proposals preview changes call applied -``` - -1. **ANALYZE**: Discover current tags, policies, and UDFs -2. **RECOMMEND**: Generate policy proposals based on requirements -3. **PREVIEW**: Use `preview_policy_changes` to show exact SQL and impact -4. **APPROVE**: Human reviews and explicitly approves -5. **EXECUTE**: Create/update/delete policies via SDK or SQL -6. **VERIFY**: Confirm policies are applied correctly - -**Never auto-execute policy changes.** Always preview and wait for human approval. - ---- - -## Common Errors - -| Error | Cause | Solution | -|-------|-------|----------| -| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | -| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag configuration in UI | -| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | -| `POLICY_ALREADY_EXISTS` | Policy name conflict | Use `CREATE OR REPLACE POLICY` | -| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission to policy creator | -| `SHOW POLICIES is not supported` | Used invalid SQL | Use REST API `w.policies.list_policies()` instead | - -## Best Practices - -1. **Use governed tags** (not ad-hoc tags) for FGAC policy matching -2. **Always include `EXCEPT \`gov_admin\``** in every policy -3. **Use deterministic UDFs** with simple CASE statements -4. **Preview before executing** any policy change -5. **Start at schema scope** and narrow to table only when needed -6. **Name policies descriptively**: `mask_{what}_{scope}` or `filter_{what}_{scope}` -7. **Test UDFs independently** before binding to policies -8. **Monitor policy quotas** — consolidate when approaching limits - -## Resources - -- [FGAC Overview](https://docs.databricks.com/data-governance/unity-catalog/abac/) -- [FGAC Policies](https://docs.databricks.com/data-governance/unity-catalog/abac/policies) -- [FGAC Tutorial](https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial) -- [UDF Best Practices](https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices) -- [Governed Tags](https://docs.databricks.com/admin/governed-tags/) -- [Column Masks & Row Filters](https://docs.databricks.com/data-governance/unity-catalog/filters-and-masks/) diff --git a/.claude/skills/databricks-unity-catalog/8-fgac-sql-generation.md b/.claude/skills/databricks-unity-catalog/8-fgac-sql-generation.md deleted file mode 100644 index b1cf729e..00000000 --- a/.claude/skills/databricks-unity-catalog/8-fgac-sql-generation.md +++ /dev/null @@ -1,420 +0,0 @@ -# SQL Generation Reference - -Pure SQL patterns for Unity Catalog FGAC governance operations. All SQL follows Databricks syntax. - ---- - -## Tag Operations - -### SET TAG on Column - -```sql --- Legacy syntax (all versions) -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); - --- Modern syntax (DBR 16.1+) -SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; -``` - -### SET TAG on Table - -```sql --- Legacy syntax -ALTER TABLE catalog.schema.table -SET TAGS ('data_classification' = 'confidential'); - --- Modern syntax -SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; -``` - -### SET TAG on Schema / Catalog - -```sql -SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; -SET TAG ON CATALOG my_catalog 'department' = 'finance'; -``` - -### UNSET TAG - -```sql --- Column (legacy) -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name UNSET TAGS ('pii_type'); - --- Column (modern) -UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; - --- Table (legacy) -ALTER TABLE catalog.schema.table -UNSET TAGS ('data_classification'); - --- Table (modern) -UNSET TAG ON TABLE catalog.schema.table 'data_classification'; -``` - -**Docs:** -- SET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-set-tag.html -- UNSET TAG: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-unset-tag.html - ---- - -## Tag Discovery Queries - -### Query Column Tags - -```sql -SELECT tag_name, tag_value, column_name -FROM system.information_schema.column_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; -``` - -### Query Table Tags - -```sql -SELECT tag_name, tag_value -FROM system.information_schema.table_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; -``` - -### All Tag Assignments in a Catalog - -```sql --- Table-level tags -SELECT 'TABLE' as securable_type, - CONCAT(catalog_name, '.', schema_name, '.', table_name) as securable_name, - tag_name as tag_key, - tag_value -FROM system.information_schema.table_tags -WHERE catalog_name = 'my_catalog'; - --- Column-level tags -SELECT 'COLUMN' as securable_type, - CONCAT(catalog_name, '.', schema_name, '.', table_name, '.', column_name) as securable_name, - tag_name as tag_key, - tag_value -FROM system.information_schema.column_tags -WHERE catalog_name = 'my_catalog'; -``` - -**Docs:** -- information_schema.column_tags: https://docs.databricks.com/sql/language-manual/information-schema/column_tags.html -- information_schema.table_tags: https://docs.databricks.com/sql/language-manual/information-schema/table_tags.html - ---- - -## Masking UDF Creation - -All masking UDFs must be `DETERMINISTIC` with simple `CASE` statements. No external calls or nested UDFs. - -### Generic Masking Strategies - -```sql --- Full mask: replaces all characters with * -CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Full masking - replaces all characters with *' -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE REPEAT('*', LENGTH(value)) -END; - --- Partial mask: show last 4 characters -CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Partial masking - shows last 4 characters' -RETURN CASE - WHEN value IS NULL THEN NULL - WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) - ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) -END; - --- Hash: SHA256 with version prefix -CREATE OR REPLACE FUNCTION catalog.schema.mask_hash(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Hash masking - SHA256 with version prefix' -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE CONCAT('HASH_v1_', SUBSTRING(SHA2(CONCAT(value, ':v1'), 256), 1, 16)) -END; - --- Redact: replace with [REDACTED] -CREATE OR REPLACE FUNCTION catalog.schema.mask_redact(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Redaction - replaces value with [REDACTED]' -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE '[REDACTED]' -END; - --- Nullify: always returns NULL -CREATE OR REPLACE FUNCTION catalog.schema.mask_nullify(value STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Nullify - always returns NULL' -RETURN NULL; -``` - -### Specialized Masking UDFs - -```sql --- SSN: ***-**-XXXX -CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks SSN showing only last 4 digits in XXX-XX-XXXX format' -RETURN CASE - WHEN ssn IS NULL THEN NULL - WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 - THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) - ELSE '***-**-****' -END; - --- Email: j***@example.com -CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks email showing first char and domain' -RETURN CASE - WHEN email IS NULL THEN NULL - WHEN INSTR(email, '@') > 1 - THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) - ELSE '***@***.***' -END; - --- Credit card: ****-****-****-1234 -CREATE OR REPLACE FUNCTION catalog.schema.mask_credit_card(card_number STRING) -RETURNS STRING -DETERMINISTIC -COMMENT 'Masks credit card showing only last 4 digits' -RETURN CASE - WHEN card_number IS NULL THEN NULL - WHEN LENGTH(REGEXP_REPLACE(card_number, '[^0-9]', '')) >= 4 - THEN CONCAT('****-****-****-', RIGHT(REGEXP_REPLACE(card_number, '[^0-9]', ''), 4)) - ELSE '****-****-****-****' -END; -``` - -### Row Filter UDFs - -Row filter UDFs return `BOOLEAN`: `TRUE` to include, `FALSE` to exclude. - -```sql --- Region-based filter: hide EU rows -CREATE OR REPLACE FUNCTION catalog.schema.is_not_eu_region(region_value STRING) -RETURNS BOOLEAN -DETERMINISTIC -COMMENT 'Row filter - returns FALSE for EU regions' -RETURN CASE - WHEN region_value IS NULL THEN TRUE - WHEN LOWER(region_value) LIKE '%eu%' THEN FALSE - WHEN LOWER(region_value) LIKE '%europe%' THEN FALSE - ELSE TRUE -END; - --- Array membership filter -CREATE OR REPLACE FUNCTION catalog.schema.is_in_allowed_values( - row_value STRING, - allowed_values ARRAY -) -RETURNS BOOLEAN -DETERMINISTIC -COMMENT 'Row filter based on array membership' -RETURN CASE - WHEN allowed_values IS NULL THEN FALSE - WHEN ARRAY_CONTAINS(TRANSFORM(allowed_values, x -> LOWER(x)), LOWER(row_value)) THEN TRUE - ELSE FALSE -END; -``` - -**Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices - ---- - -## Policy Creation - -Policies are scoped to a **catalog**, **schema**, or **table**. `FOR TABLES` is always present. - -> **Cross-catalog UDFs:** The UDF referenced in a policy is always fully qualified (`catalog.schema.function`) and can reside in any catalog/schema — it does not need to be in the same catalog or schema as the policy scope. A common pattern is a shared governance schema (e.g., `governance.masking_udfs`) containing all masking functions. - -### Column Mask Policy - -```sql --- Catalog level — masks matching columns in ALL tables in the catalog -CREATE OR REPLACE POLICY mask_pii_ssn_catalog -ON CATALOG my_catalog -COMMENT 'Mask SSN columns catalog-wide' -COLUMN MASK my_catalog.my_schema.mask_ssn -TO `analysts`, `data_scientists` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; - --- Schema level — masks matching columns in all tables in the schema -CREATE OR REPLACE POLICY mask_pii_ssn_schema -ON SCHEMA my_catalog.my_schema -COMMENT 'Mask SSN columns in schema' -COLUMN MASK my_catalog.my_schema.mask_ssn -TO `analysts`, `data_scientists` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; - --- Table level — masks matching columns on a single table -CREATE OR REPLACE POLICY mask_pii_ssn_table -ON TABLE my_catalog.my_schema.my_table -COMMENT 'Mask SSN columns on specific table' -COLUMN MASK my_catalog.my_schema.mask_ssn -TO `analysts`, `data_scientists` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; - --- Cross-catalog UDF — UDF in governance catalog, policy on prod -CREATE OR REPLACE POLICY mask_ssn_finance -ON SCHEMA prod.finance -COMMENT 'Mask SSN using shared governance UDF' -COLUMN MASK governance.masking_udfs.mask_ssn -TO `analysts` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; -``` - -### Row Filter Policy - -```sql --- Catalog level — filters rows in ALL tables in the catalog -CREATE OR REPLACE POLICY filter_eu_data_catalog -ON CATALOG my_catalog -COMMENT 'Filter EU rows catalog-wide' -ROW FILTER my_catalog.my_schema.is_not_eu_region -TO `us_team` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col -USING COLUMNS (filter_col); - --- Schema level — filters rows in all tables in the schema -CREATE OR REPLACE POLICY filter_eu_data_schema -ON SCHEMA my_catalog.my_schema -COMMENT 'Filter EU rows in schema' -ROW FILTER my_catalog.my_schema.is_not_eu_region -TO `us_team` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col -USING COLUMNS (filter_col); - --- Table level — filters rows on a single table -CREATE OR REPLACE POLICY filter_eu_data_table -ON TABLE my_catalog.my_schema.my_table -COMMENT 'Filter EU rows on specific table' -ROW FILTER my_catalog.my_schema.is_not_eu_region -TO `us_team` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col -USING COLUMNS (filter_col); -``` - -### Policy with Tag Key Only (any value) - -```sql --- Match any column with tag 'pii_type' regardless of value --- Works at any scope: ON CATALOG, ON SCHEMA, or ON TABLE -CREATE OR REPLACE POLICY mask_all_pii -ON SCHEMA my_catalog.my_schema -COLUMN MASK my_catalog.my_schema.mask_full -TO `external_users` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTag('pii_type') AS masked_col -ON COLUMN masked_col; -``` - -### Drop Policy - -```sql --- Drop at each scope level -DROP POLICY mask_pii_ssn_catalog ON CATALOG my_catalog; -DROP POLICY mask_pii_ssn_schema ON SCHEMA my_catalog.my_schema; -DROP POLICY mask_pii_ssn_table ON TABLE my_catalog.my_schema.my_table; -``` - -> **Note:** There is no `ALTER POLICY`. To modify a policy, drop and recreate it. - ---- - -## Discovery Queries - -```sql --- List catalogs -SHOW CATALOGS; - --- List schemas in a catalog -SHOW SCHEMAS IN my_catalog; - --- List tables in a schema -SHOW TABLES IN my_catalog.my_schema; - --- Describe table with extended metadata -DESCRIBE TABLE EXTENDED my_catalog.my_schema.my_table; - --- List UDFs in a schema -SHOW USER FUNCTIONS IN my_catalog.my_schema; - --- Describe a UDF -DESCRIBE FUNCTION EXTENDED my_catalog.my_schema.mask_ssn; - --- Sample column values -SELECT DISTINCT column_name -FROM my_catalog.my_schema.my_table -LIMIT 20; -``` - ---- - -## Enums Reference - -### PII Types (governed tag values) - -`ssn`, `email`, `phone`, `credit_card`, `date_of_birth`, `address`, `name`, `ip_address`, `national_id`, `medical_record`, `generic` - -### Masking Strategies - -| Strategy | Description | -|----------|-------------| -| `full_mask` | Replace all characters with `*` | -| `partial_mask` | Show last 4 characters | -| `hash` | SHA256 with version prefix | -| `redact` | Replace with `[REDACTED]` | -| `nullify` | Always return NULL | -| `custom` | User-supplied SQL (requires manual UDF) | - -### Policy Scopes - -| Scope | Description | -|-------|-------------| -| `CATALOG` | Policy applies to all tables in catalog | -| `SCHEMA` | Policy applies to all tables in schema | -| `TABLE` | Policy applies to a single table | - -### Tag Syntax Variants - -| Variant | Availability | Example | -|---------|-------------|---------| -| `LEGACY` | All versions | `ALTER TABLE t ALTER COLUMN c SET TAGS ('k'='v')` | -| `MODERN` | DBR 16.1+ | `SET TAG ON COLUMN t.c 'k' = 'v'` | diff --git a/.claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md b/.claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md deleted file mode 100644 index dddda9e9..00000000 --- a/.claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md +++ /dev/null @@ -1,705 +0,0 @@ -# FGAC Policy SDK & MCP Tools - -Python SDK patterns and MCP tool reference for managing FGAC policies in Unity Catalog. - -**SDK Docs:** https://databricks-sdk-py.readthedocs.io/en/latest/ -**FGAC Docs:** https://docs.databricks.com/data-governance/unity-catalog/abac/policies - ---- - -## Policy Scopes - -`on_securable_type` sets the **scope** of the policy. `for_securable_type` is always `TABLE`. - -| Scope | `on_securable_type` | `on_securable_fullname` | Effect | -|---|---|---|---| -| Catalog | `CATALOG` | `"my_catalog"` | Applies to all tables in the catalog | -| Schema | `SCHEMA` | `"my_catalog.my_schema"` | Applies to all tables in the schema | -| Table | `TABLE` | `"my_catalog.my_schema.my_table"` | Applies to a single table | - -### Important: Always Include `gov_admin` - -Every policy **MUST** include `"gov_admin"` in `except_principals`: - -```python -# CORRECT -except_principals=["gov_admin"] - -# CORRECT - additional admin groups -except_principals=["gov_admin", "platform_admins"] - -# WRONG - missing gov_admin -except_principals=["platform_admins"] # gov_admin must be included! -``` - ---- - -## Guardrails - -FGAC mutating operations (`create`, `update`, `delete`) enforce two programmatic guardrails: - -### Approval Token - -Every mutating call **requires** a valid `approval_token` obtained from `preview_policy_changes()`. The token is an HMAC-SHA256 signature binding the previewed parameters to a timestamp. - -- Token TTL: **10 minutes** (configurable via `_TOKEN_TTL_SECONDS`) -- Parameters must match exactly between preview and mutation -- Action mapping: preview `CREATE` → mutation `create`, `UPDATE` → `update`, `DELETE` → `delete` - -### Admin Group Check - -The caller must be a member of the configured admin group. Membership is verified via `w.current_user.me().groups`. - -### Environment Variables - -| Variable | Default | Description | -|----------|---------|-------------| -| `FGAC_APPROVAL_SECRET` | `fgac-default-dev-secret` | HMAC secret for token signing | -| `FGAC_ADMIN_GROUP` | `admins` | Required group membership for mutations | - -> **Important:** In production, always set `FGAC_APPROVAL_SECRET` to a strong random value. - ---- - -## MCP Tools - -### Discovery Tools - -#### `list_fgac_policies` - -List FGAC policies on a catalog, schema, or table. - -```python -list_fgac_policies( - securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" - securable_fullname: str, # e.g., "my_catalog.my_schema" - include_inherited: bool = True, - policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (optional filter) -) -``` - -**Returns:** -```json -{ - "success": true, - "securable_type": "SCHEMA", - "securable_fullname": "my_catalog.my_schema", - "policy_count": 3, - "policies": [ - { - "name": "mask_pii_ssn", - "policy_type": "COLUMN_MASK", - "to_principals": ["analysts"], - "except_principals": ["gov_admin"], - "on_securable_fullname": "my_catalog.my_schema", - "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn", "on_column": "masked_col"}, - "match_columns": [{"alias": "masked_col", "condition": "hasTagValue('pii_type', 'ssn')"}] - } - ] -} -``` - -#### `get_fgac_policy` - -Get details for a specific policy by name. - -```python -get_fgac_policy( - policy_name: str, # Policy name - securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" - securable_fullname: str, # Fully qualified securable name -) -``` - -**Returns:** -```json -{ - "success": true, - "policy": { - "name": "mask_pii_ssn", - "policy_type": "COLUMN_MASK", - "comment": "Mask SSN columns for analysts", - "to_principals": ["analysts", "data_scientists"], - "except_principals": ["gov_admin"], - "on_securable_type": "SCHEMA", - "on_securable_fullname": "my_catalog.my_schema", - "for_securable_type": "TABLE", - "column_mask": {"function_name": "my_catalog.my_schema.mask_ssn", "on_column": "masked_col"}, - "match_columns": [{"alias": "masked_col", "condition": "hasTagValue('pii_type', 'ssn')"}] - } -} -``` - -#### `get_table_policies` - -Get column masks and row filters for a specific table via Unity Catalog API. - -```python -get_table_policies( - catalog: str, - schema: str, - table: str, -) -``` - -**Returns:** -```json -{ - "success": true, - "table": "my_catalog.my_schema.my_table", - "column_masks": [ - { - "column_name": "ssn", - "column_type": "STRING", - "mask_functions": ["my_catalog.my_schema.mask_ssn"] - } - ], - "row_filters": [ - { - "function_name": "my_catalog.my_schema.is_not_eu_region", - "input_column_names": ["region"] - } - ] -} -``` - -#### `get_masking_functions` - -List masking UDFs in a schema. - -> **Cross-catalog UDFs:** Masking UDFs can reside in any catalog/schema, not just the policy scope. Use `udf_catalog` and `udf_schema` to discover UDFs stored in a shared governance schema (e.g., `governance.masking_udfs`). These default to `catalog`/`schema` when not specified. - -```python -get_masking_functions( - catalog: str, - schema: str, - # To discover UDFs in a different catalog/schema: - udf_catalog: str = None, # defaults to catalog - udf_schema: str = None, # defaults to schema -) -``` - -**Returns:** -```json -{ - "success": true, - "catalog": "my_catalog", - "schema": "my_schema", - "functions": [ - { - "name": "mask_ssn", - "full_name": "my_catalog.my_schema.mask_ssn", - "return_type": "STRING", - "comment": "Masks SSN showing only last 4 digits", - "is_deterministic": true - } - ] -} -``` - -#### `get_column_tags_api` - -Get column-level tags via the Tags API. - -```python -get_column_tags_api( - catalog: str, - schema: str, - table: str, -) -``` - -#### `get_schema_info` / `get_catalog_info` - -Get schema or catalog metadata via Unity Catalog API. - -```python -get_schema_info(catalog: str, schema: str) -get_catalog_info(catalog: str) -``` - -#### `list_table_policies_in_schema` - -List all tables in a schema with their column masks and row filters. - -```python -list_table_policies_in_schema( - catalog: str, - schema: str, -) -``` - -### Preview Tool (Human-in-the-Loop Gate) - -#### `preview_policy_changes` - -Preview policy changes without executing. This is the critical human-in-the-loop gate. - -```python -preview_policy_changes( - action: str, # "CREATE", "UPDATE", or "DELETE" - policy_name: str, - securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" - securable_fullname: str, - policy_type: str = None, # "COLUMN_MASK" or "ROW_FILTER" (for CREATE) - to_principals: list = None, - except_principals: list = None, - function_name: str = None, - tag_name: str = None, - tag_value: str = None, - comment: str = None, -) -``` - -**Returns:** -```json -{ - "success": true, - "action": "CREATE", - "preview": { - "policy_name": "mask_pii_ssn", - "policy_type": "COLUMN_MASK", - "securable": "SCHEMA my_catalog.my_schema", - "to_principals": ["analysts"], - "except_principals": ["gov_admin"], - "function": "my_catalog.my_schema.mask_ssn", - "tag_match": "hasTagValue('pii_type', 'ssn')", - "equivalent_sql": "CREATE OR REPLACE POLICY mask_pii_ssn\nON SCHEMA my_catalog.my_schema\n..." - }, - "warnings": [], - "requires_approval": true, - "approval_token": "a1b2c3...:eyJhY3Rpb24i...", - "message": "Review the preview above. Reply 'approve' to execute, passing the approval_token." -} -``` - -**Usage in workflow:** - -1. Call `preview_policy_changes` with proposed changes -2. Present preview to user (includes `approval_token`) -3. Wait for explicit approval -4. Pass `approval_token` to `create_fgac_policy`, `update_fgac_policy`, or `delete_fgac_policy` - -### Management Tools - -#### `create_fgac_policy` - -Create a new FGAC policy (COLUMN_MASK or ROW_FILTER). - -```python -create_fgac_policy( - policy_name: str, - policy_type: str, # "COLUMN_MASK" or "ROW_FILTER" - securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" - securable_fullname: str, - function_name: str, # Fully qualified UDF name - to_principals: list, # Users/groups the policy applies to - tag_name: str, # Tag key to match - approval_token: str, # Token from preview_policy_changes() - tag_value: str = None, # Tag value (optional, uses hasTag vs hasTagValue) - except_principals: list = None, # Excluded principals (gov_admin auto-added) - comment: str = "", -) -``` - -**Returns:** -```json -{ - "success": true, - "policy_name": "mask_pii_ssn", - "action": "created", - "details": { - "policy_type": "COLUMN_MASK", - "on_securable": "SCHEMA my_catalog.my_schema", - "function": "my_catalog.my_schema.mask_ssn", - "to_principals": ["analysts"], - "except_principals": ["gov_admin"] - } -} -``` - -#### `update_fgac_policy` - -Update an existing policy's principals or comment. - -```python -update_fgac_policy( - policy_name: str, - securable_type: str, - securable_fullname: str, - approval_token: str, # Token from preview_policy_changes() - to_principals: list = None, - except_principals: list = None, - comment: str = None, -) -``` - -**Returns:** -```json -{ - "success": true, - "policy_name": "mask_pii_ssn", - "action": "updated", - "changes": { - "to_principals": ["analysts", "data_scientists", "new_team"], - "comment": "Updated: added new_team" - } -} -``` - -> **Note:** To change the UDF, tag matching, or scope, drop and recreate the policy. - -#### `delete_fgac_policy` - -Delete an FGAC policy. - -```python -delete_fgac_policy( - policy_name: str, - securable_type: str, - securable_fullname: str, - approval_token: str, # Token from preview_policy_changes() -) -``` - -**Returns:** -```json -{ - "success": true, - "policy_name": "mask_pii_ssn", - "action": "deleted" -} -``` - ---- - -## Human-in-the-Loop Workflow Example - -Complete workflow using MCP tools: - -``` -Step 1: ANALYZE -───────────────────────────────── -→ list_fgac_policies(securable_type="SCHEMA", securable_fullname="prod.finance") -→ get_column_tags_api(catalog="prod", schema="finance", table="customers") -→ get_masking_functions(catalog="prod", schema="finance") - # If UDFs are in a shared governance schema: -→ get_masking_functions(catalog="prod", schema="finance", - udf_catalog="governance", udf_schema="masking_udfs") - -Step 2: RECOMMEND -───────────────────────────────── -→ Agent generates policy recommendations based on discovered tags and UDFs - -Step 3: PREVIEW (returns approval_token) -───────────────────────────────── -→ result = preview_policy_changes( - action="CREATE", - policy_name="mask_ssn_finance", - securable_type="SCHEMA", - securable_fullname="prod.finance", - policy_type="COLUMN_MASK", - function_name="governance.masking_udfs.mask_ssn", - to_principals=["analysts"], - tag_name="pii_type", - tag_value="ssn" - ) -→ token = result["approval_token"] - -Step 4: APPROVE -───────────────────────────────── -→ Human reviews preview and replies "approve" - -Step 5: EXECUTE (pass approval_token) -───────────────────────────────── -→ create_fgac_policy( - policy_name="mask_ssn_finance", - policy_type="COLUMN_MASK", - securable_type="SCHEMA", - securable_fullname="prod.finance", - function_name="governance.masking_udfs.mask_ssn", - to_principals=["analysts"], - tag_name="pii_type", - tag_value="ssn", - approval_token=token - ) - -Step 6: VERIFY -───────────────────────────────── -→ get_fgac_policy( - policy_name="mask_ssn_finance", - securable_type="SCHEMA", - securable_fullname="prod.finance" - ) -``` - ---- - -## Python SDK Direct Usage - -For writing custom code outside MCP tools, use the Databricks Python SDK directly. - -### Setup - -```python -from databricks.sdk import WorkspaceClient - -w = WorkspaceClient() # Auto-detects credentials -``` - -### SDK Types - -```python -from databricks.sdk.service.catalog import ( - ColumnMaskOptions, - MatchColumn, - PolicyInfo, - PolicyType, - RowFilterOptions, - SecurableType, -) -``` - -### List Policies - -```python -policies = w.policies.list_policies( - on_securable_type="CATALOG", - on_securable_fullname="my_catalog", - include_inherited=True, -) - -for p in policies: - print(f"{p.name}: {p.policy_type} on {p.on_securable_fullname}") - -# Filter by type -column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] -row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] -``` - -### Get Policy - -```python -policy = w.policies.get_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", -) - -print(f"Policy: {policy.name}") -print(f"Type: {policy.policy_type}") -print(f"Principals: {policy.to_principals}") -print(f"Except: {policy.except_principals}") -``` - -### Create Column Mask Policy - -```python -policy_info = PolicyInfo( - name="mask_pii_ssn_schema", - policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, - on_securable_type=SecurableType.SCHEMA, - on_securable_fullname="my_catalog.my_schema", - for_securable_type=SecurableType.TABLE, - to_principals=["analysts", "data_scientists"], - except_principals=["gov_admin"], - comment="Mask SSN columns in schema", - column_mask=ColumnMaskOptions( - function_name="my_catalog.my_schema.mask_ssn", - on_column="masked_col", - ), - match_columns=[ - MatchColumn( - alias="masked_col", - condition="hasTagValue('pii_type', 'ssn')", - ) - ], -) -policy = w.policies.create_policy(policy_info=policy_info) -``` - -Change `on_securable_type` and `on_securable_fullname` to target catalog or table scope. - -### Create Column Mask Policy (Cross-Catalog UDF) - -The UDF can live in a separate governance catalog/schema from the policy scope: - -```python -# UDF in governance.masking_udfs, policy on prod.finance -policy_info = PolicyInfo( - name="mask_ssn_finance", - policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, - on_securable_type=SecurableType.SCHEMA, - on_securable_fullname="prod.finance", - for_securable_type=SecurableType.TABLE, - to_principals=["analysts"], - except_principals=["gov_admin"], - comment="Mask SSN columns in prod.finance using shared governance UDF", - column_mask=ColumnMaskOptions( - function_name="governance.masking_udfs.mask_ssn", - on_column="masked_col", - ), - match_columns=[ - MatchColumn( - alias="masked_col", - condition="hasTagValue('pii_type', 'ssn')", - ) - ], -) -policy = w.policies.create_policy(policy_info=policy_info) -``` - -### Create Row Filter Policy - -```python -policy_info = PolicyInfo( - name="filter_eu_data_schema", - policy_type=PolicyType.POLICY_TYPE_ROW_FILTER, - on_securable_type=SecurableType.SCHEMA, - on_securable_fullname="my_catalog.my_schema", - for_securable_type=SecurableType.TABLE, - to_principals=["us_team"], - except_principals=["gov_admin"], - comment="Filter EU rows in schema", - row_filter=RowFilterOptions( - function_name="my_catalog.my_schema.is_not_eu_region", - ), - match_columns=[ - MatchColumn( - alias="filter_col", - condition="hasTagValue('region', 'eu')", - ) - ], -) -policy = w.policies.create_policy(policy_info=policy_info) -``` - -### Update Policy - -Update principals or comment on an existing policy. - -```python -update_info = PolicyInfo( - to_principals=["analysts", "data_scientists", "new_team"], - except_principals=["gov_admin", "senior_admins"], - comment="Updated: added new_team to masked principals", - for_securable_type=SecurableType.TABLE, - policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, -) -updated = w.policies.update_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - policy_info=update_info, - update_mask="to_principals,except_principals,comment", -) -``` - -> **Note:** To change the UDF, tag matching, or scope, you must drop and recreate the policy. `update_policy` only modifies principals and comment via `update_mask`. - -### Delete Policy - -```python -w.policies.delete_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", -) -``` - ---- - -## Error Handling - -```python -from databricks.sdk.errors import NotFound, PermissionDenied, BadRequest - -try: - policy = w.policies.get_policy( - name="nonexistent_policy", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - ) -except NotFound: - print("Policy not found") -except PermissionDenied: - print("Insufficient permissions - need MANAGE on securable") -except BadRequest as e: - print(f"Invalid request: {e}") -``` - -| Error | Cause | Solution | -|-------|-------|----------| -| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | -| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag config in UI | -| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | -| `POLICY_ALREADY_EXISTS` | Duplicate policy name | Use different name or delete existing first | -| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission | -| `INVALID_SECURABLE_TYPE` | Wrong securable type string | Use `"CATALOG"`, `"SCHEMA"`, or `"TABLE"` | - ---- - -## Common Patterns - -### Policy Summary with Counts - -```python -def get_policy_summary(w, catalog: str): - """Get a summary of all FGAC policies in a catalog.""" - policies = list(w.policies.list_policies( - on_securable_type="CATALOG", - on_securable_fullname=catalog, - include_inherited=True, - )) - - column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] - row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] - - return { - "total": len(policies), - "column_masks": len(column_masks), - "row_filters": len(row_filters), - "policies": [p.as_dict() for p in policies], - } -``` - -### Check Policy Quotas Before Creating - -```python -def check_quota(w, securable_type: str, securable_fullname: str): - """Check if policy quota allows creating a new policy.""" - quotas = {"CATALOG": 10, "SCHEMA": 10, "TABLE": 5} - max_policies = quotas.get(securable_type, 10) - - existing = list(w.policies.list_policies( - on_securable_type=securable_type, - on_securable_fullname=securable_fullname, - )) - - # Count only direct policies (not inherited) - direct = [p for p in existing - if p.on_securable_fullname == securable_fullname] - - return { - "current": len(direct), - "max": max_policies, - "can_create": len(direct) < max_policies, - } -``` - -### Async Usage (FastAPI, etc.) - -The Databricks SDK is synchronous. In async applications, wrap calls with `asyncio.to_thread()`: - -```python -import asyncio - -async def list_policies_async(w, catalog: str): - return await asyncio.to_thread( - lambda: list(w.policies.list_policies( - on_securable_type="CATALOG", - on_securable_fullname=catalog, - include_inherited=True, - )) - ) -``` diff --git a/.claude/skills/databricks-unity-catalog/SKILL.md b/.claude/skills/databricks-unity-catalog/SKILL.md deleted file mode 100644 index ede7db9f..00000000 --- a/.claude/skills/databricks-unity-catalog/SKILL.md +++ /dev/null @@ -1,242 +0,0 @@ ---- -name: databricks-unity-catalog -description: "Unity Catalog: system tables, volumes, access controls (ACLs), and FGAC governance. Use when querying system tables (audit, lineage, billing), working with volume file operations, managing UC permissions (GRANT/REVOKE), or managing FGAC policies (column masks, row filters, governed tags, masking UDFs)." ---- - -# Unity Catalog - -Guidance for Unity Catalog across four areas: system tables, volumes, access controls, and FGAC policy governance. - -## When to Use This Skill - -Use this skill when working with any of these four categories: - -### System Tables -- Querying **lineage** (table dependencies, column-level lineage) -- Analyzing **audit logs** (who accessed what, permission changes) -- Monitoring **billing and usage** (DBU consumption, cost analysis) -- Tracking **compute resources** (cluster usage, warehouse metrics) -- Reviewing **job execution** (run history, success rates, failures) -- Analyzing **query performance** (slow queries, warehouse utilization) - -### Volumes -- Working with **volumes** (upload, download, list files in `/Volumes/`) -- Managing volume **directories** and file operations -- Configuring volume **permissions** (READ VOLUME, WRITE VOLUME) - -### UC Access Controls (ACLs) -- **Granting or revoking** privileges on catalogs, schemas, tables, volumes, functions -- Managing **ownership** transfers -- Setting up **role-based access** patterns (data readers, engineers, admins) -- Auditing **current permissions** (SHOW GRANTS) - -### FGAC (Fine-Grained Access Control) -- Creating or managing **FGAC policies** (column masks, row filters) -- Working with **governed tags** (creating via UI, applying via SQL) -- Building **masking UDFs** for PII protection (SSN, email, credit card, etc.) -- Implementing **human-in-the-loop governance** workflows -- Managing **policy lifecycle** (create, update, delete, preview) -- Querying **tag assignments** via `information_schema` - ---- - -## Reference Files - -### System Tables - -| File | Description | -|------|-------------| -| [5-system-tables.md](5-system-tables.md) | Lineage, audit, billing, compute, jobs, query history | - -### Volumes - -| File | Description | -|------|-------------| -| [6-volumes.md](6-volumes.md) | Volume file operations, permissions, best practices | - -### UC Access Controls (ACLs) - -| File | Description | -|------|-------------| -| [10-uc-acls.md](10-uc-acls.md) | GRANT/REVOKE, ownership, privilege reference, SDK patterns, common role patterns | - -### FGAC (Fine-Grained Access Control) - -| File | Description | -|------|-------------| -| [7-fgac-overview.md](7-fgac-overview.md) | FGAC workflow, governed tags, masking UDFs, policy syntax, errors, best practices | -| [8-fgac-sql-generation.md](8-fgac-sql-generation.md) | SET/UNSET TAG, CREATE FUNCTION, CREATE/DROP POLICY, discovery queries | -| [9-fgac-sdk-and-tools.md](9-fgac-sdk-and-tools.md) | Python SDK patterns and 12 MCP tools for policy management | - ---- - -## Quick Start: System Tables - -### Enable Access - -```sql --- Grant access to system tables -GRANT USE CATALOG ON CATALOG system TO `data_engineers`; -GRANT USE SCHEMA ON SCHEMA system.access TO `data_engineers`; -GRANT SELECT ON SCHEMA system.access TO `data_engineers`; -``` - -### Common Queries - -```sql --- Table lineage: What tables feed into this table? -SELECT source_table_full_name, source_column_name -FROM system.access.table_lineage -WHERE target_table_full_name = 'catalog.schema.table' - AND event_date >= current_date() - 7; - --- Audit: Recent permission changes -SELECT event_time, user_identity.email, action_name, request_params -FROM system.access.audit -WHERE action_name LIKE '%GRANT%' OR action_name LIKE '%REVOKE%' -ORDER BY event_time DESC -LIMIT 100; - --- Billing: DBU usage by workspace -SELECT workspace_id, sku_name, SUM(usage_quantity) AS total_dbus -FROM system.billing.usage -WHERE usage_date >= current_date() - 30 -GROUP BY workspace_id, sku_name; -``` - -### MCP Tool Integration - -```python -mcp__databricks__execute_sql( - sql_query=""" - SELECT source_table_full_name, target_table_full_name - FROM system.access.table_lineage - WHERE event_date >= current_date() - 7 - """, - catalog="system" -) -``` - ---- - -## Quick Start: Volumes - -```python -# List files in a volume -list_volume_files(volume_path="/Volumes/catalog/schema/volume/folder/") - -# Upload file to volume -upload_to_volume( - local_path="/tmp/data.csv", - volume_path="/Volumes/catalog/schema/volume/data.csv" -) - -# Download file from volume -download_from_volume( - volume_path="/Volumes/catalog/schema/volume/data.csv", - local_path="/tmp/downloaded.csv" -) - -# Create directory -create_volume_directory(volume_path="/Volumes/catalog/schema/volume/new_folder") -``` - -See [6-volumes.md](6-volumes.md) for full volume operations, permissions, and troubleshooting. - ---- - -## Quick Start: UC Access Controls (ACLs) - -```sql --- Read-only access pattern -GRANT USE CATALOG ON CATALOG analytics TO `data_readers`; -GRANT USE SCHEMA ON SCHEMA analytics.gold TO `data_readers`; -GRANT SELECT ON SCHEMA analytics.gold TO `data_readers`; - --- Data engineer access pattern -GRANT USE CATALOG ON CATALOG analytics TO `data_engineers`; -GRANT USE SCHEMA ON SCHEMA analytics.silver TO `data_engineers`; -GRANT SELECT ON SCHEMA analytics.silver TO `data_engineers`; -GRANT MODIFY ON SCHEMA analytics.silver TO `data_engineers`; -GRANT CREATE TABLE ON SCHEMA analytics.silver TO `data_engineers`; - --- Show current grants -SHOW GRANTS ON SCHEMA analytics.gold; - --- Transfer ownership -ALTER SCHEMA analytics.gold OWNER TO `new_owner`; -``` - -See [10-uc-acls.md](10-uc-acls.md) for full privilege reference, SDK patterns, and common role patterns. - ---- - -## Quick Start: FGAC - -```sql --- 1. Apply governed tag to a column (tag must exist in UI first) -SET TAG ON COLUMN catalog.schema.table.ssn_column 'pii_type' = 'ssn'; - --- 2. Create a masking UDF -CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) -RETURNS STRING -DETERMINISTIC -RETURN CASE - WHEN ssn IS NULL THEN NULL - WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 - THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) - ELSE '***-**-****' -END; - --- 3. Create an FGAC column mask policy -CREATE OR REPLACE POLICY mask_pii_ssn -ON SCHEMA catalog.schema -COMMENT 'Mask SSN columns for analysts' -COLUMN MASK catalog.schema.mask_ssn -TO `analysts` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; -``` - -See [7-fgac-overview.md](7-fgac-overview.md) for the full FGAC workflow, policy syntax, and best practices. - ---- - -## Best Practices - -### System Tables -1. **Filter by date** — System tables can be large; always use date filters -2. **Use appropriate retention** — Check your workspace's retention settings -3. **Schedule reports** — Create scheduled queries for regular monitoring - -### Volumes -4. **Organize by purpose** — Use directory structure within volumes -5. **Grant minimal access** — Use `READ VOLUME` vs `WRITE VOLUME` appropriately - -### UC Access Controls (ACLs) -6. **Grant to groups, not users** — Easier to manage and audit -7. **Use least privilege** — Grant only the minimum permissions needed -8. **Leverage inheritance** — Grant at schema level when all tables need the same access -9. **Audit regularly** — Query `system.access.audit` for grant/revoke events - -### FGAC -10. **Always include `EXCEPT \`gov_admin\``** in every FGAC policy -11. **Preview before executing** any FGAC policy change -12. **Use governed tags** (not ad-hoc tags) for FGAC policy matching - -## Resources - -### System Tables & Volumes -- [Unity Catalog System Tables](https://docs.databricks.com/administration-guide/system-tables/) -- [Audit Log Reference](https://docs.databricks.com/administration-guide/account-settings/audit-logs.html) - -### UC Access Controls -- [UC Privileges](https://docs.databricks.com/data-governance/unity-catalog/manage-privileges/) - -### FGAC -- [FGAC Overview](https://docs.databricks.com/data-governance/unity-catalog/abac/) -- [FGAC Policies](https://docs.databricks.com/data-governance/unity-catalog/abac/policies) -- [FGAC Tutorial](https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial) -- [Governed Tags](https://docs.databricks.com/admin/governed-tags/) diff --git a/FGAC_README.md b/FGAC_README.md deleted file mode 100644 index 259891c7..00000000 --- a/FGAC_README.md +++ /dev/null @@ -1,876 +0,0 @@ -# FGAC — Fine-Grained Access Control for Databricks Unity Catalog - -Fine-Grained Access Control (FGAC) policies bind governed tags to masking UDFs or row filters, scoped to catalogs, schemas, or tables, and targeted at specific principals. This document covers the complete FGAC feature set: governed tags, tag assignments, masking UDFs, policy management, the Python SDK, MCP tools, and the human-in-the-loop governance workflow. - -**Databricks Docs:** -- [FGAC Overview](https://docs.databricks.com/data-governance/unity-catalog/abac/) -- [FGAC Policies](https://docs.databricks.com/data-governance/unity-catalog/abac/policies) -- [FGAC Tutorial](https://docs.databricks.com/data-governance/unity-catalog/abac/tutorial) -- [UDF Best Practices](https://docs.databricks.com/data-governance/unity-catalog/abac/udf-best-practices) -- [Governed Tags](https://docs.databricks.com/admin/governed-tags/) - ---- - -## Table of Contents - -- [Architecture Overview](#architecture-overview) -- [Step 1: Governed Tags](#step-1-governed-tags) -- [Step 2: Tag Assignments](#step-2-tag-assignments) -- [Step 3: Masking UDFs](#step-3-masking-udfs) -- [Step 4: FGAC Policies](#step-4-fgac-policies) -- [Policy Quotas](#policy-quotas) -- [SQL That Does NOT Exist](#sql-that-does-not-exist) -- [Discovery Queries](#discovery-queries) -- [Python SDK Reference](#python-sdk-reference) -- [MCP Tools Reference](#mcp-tools-reference) -- [Human-in-the-Loop Governance Workflow](#human-in-the-loop-governance-workflow) -- [Approval Token Internals](#approval-token-internals) -- [Environment Variables](#environment-variables) -- [Threat Model](#threat-model) -- [Common Errors](#common-errors) -- [Best Practices](#best-practices) -- [Source Files](#source-files) - ---- - -## Architecture Overview - -FGAC policies follow a 4-step setup: - -``` -┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ -│ Governed Tags│───>│ Tag │───>│ Masking │───>│ FGAC │ -│ (UI only) │ │ Assignments │ │ UDFs │ │ Policies │ -└──────────────┘ └──────────────┘ └──────────────┘ └──────────────┘ - Step 1 Step 2 Step 3 Step 4 -``` - -1. **Governed Tags** — Define a classification taxonomy (e.g., `pii_type` with values `ssn`, `email`, `phone`) -2. **Tag Assignments** — Apply tags to columns or tables via SQL -3. **Masking UDFs** — Create deterministic functions that transform sensitive values -4. **FGAC Policies** — Bind tags to UDFs with principal scoping (who sees masked data, who is exempt) - ---- - -## Step 1: Governed Tags - -Governed tags **cannot** be created via SQL or API. They must be created in the Databricks UI. - -### Creating a Governed Tag (UI Steps) - -1. Navigate to **Catalog** in the workspace -2. Select **Governed Tags** from the left panel -3. Click **Create governed tag** -4. Configure: - - **Tag Key**: e.g., `pii_type` - - **Allowed Values**: e.g., `ssn`, `email`, `phone`, `credit_card`, `address` - - **Description**: e.g., "PII classification for FGAC policies" - -> **Note:** Tag data is stored as plain text and may be replicated globally. Avoid putting sensitive information in tag names or values. - -> **Propagation delay:** Newly created governed tags need ~30 seconds to propagate before they can be used in tag assignments. - ---- - -## Step 2: Tag Assignments - -### Modern Syntax (DBR 16.1+) - -```sql --- Set tag on column -SET TAG ON COLUMN catalog.schema.table.column_name 'pii_type' = 'ssn'; - --- Set tag on table -SET TAG ON TABLE catalog.schema.table 'data_classification' = 'confidential'; - --- Set tag on schema -SET TAG ON SCHEMA catalog.schema 'environment' = 'production'; - --- Set tag on catalog -SET TAG ON CATALOG my_catalog 'department' = 'finance'; - --- Remove tag -UNSET TAG ON COLUMN catalog.schema.table.column_name 'pii_type'; -UNSET TAG ON TABLE catalog.schema.table 'data_classification'; -``` - -### Legacy Syntax (all versions) - -```sql --- Set tag on column -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name SET TAGS ('pii_type' = 'ssn'); - --- Set tag on table -ALTER TABLE catalog.schema.table -SET TAGS ('data_classification' = 'confidential'); - --- Remove tag -ALTER TABLE catalog.schema.table -ALTER COLUMN column_name UNSET TAGS ('pii_type'); -``` - -### Querying Existing Tags - -```sql --- Column tags -SELECT tag_name, tag_value, column_name -FROM system.information_schema.column_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; - --- Table tags -SELECT tag_name, tag_value -FROM system.information_schema.table_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; - --- All tag assignments in a catalog -SELECT 'COLUMN' AS securable_type, - CONCAT(catalog_name, '.', schema_name, '.', table_name, '.', column_name) AS securable_name, - tag_name, tag_value -FROM system.information_schema.column_tags -WHERE catalog_name = 'my_catalog'; -``` - ---- - -## Step 3: Masking UDFs - -Masking UDFs must be `DETERMINISTIC` and use simple `CASE` statements. No external calls or nested UDFs. - -> **Cross-catalog UDFs:** Masking UDFs do not need to be in the same catalog/schema as the policy scope. A common pattern is a shared governance schema (e.g., `governance.masking_udfs`) containing all masking functions, referenced by policies across multiple catalogs. - -### Column Mask UDFs - -```sql --- Full mask: replaces all characters with * -CREATE OR REPLACE FUNCTION catalog.schema.mask_full(value STRING) -RETURNS STRING -DETERMINISTIC -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE REPEAT('*', LENGTH(value)) -END; - --- Partial mask: show last 4 characters -CREATE OR REPLACE FUNCTION catalog.schema.mask_partial(value STRING) -RETURNS STRING -DETERMINISTIC -RETURN CASE - WHEN value IS NULL THEN NULL - WHEN LENGTH(value) <= 4 THEN REPEAT('*', LENGTH(value)) - ELSE CONCAT(REPEAT('*', LENGTH(value) - 4), RIGHT(value, 4)) -END; - --- SSN: ***-**-XXXX -CREATE OR REPLACE FUNCTION catalog.schema.mask_ssn(ssn STRING) -RETURNS STRING -DETERMINISTIC -RETURN CASE - WHEN ssn IS NULL THEN NULL - WHEN LENGTH(REGEXP_REPLACE(ssn, '[^0-9]', '')) >= 4 - THEN CONCAT('***-**-', RIGHT(REGEXP_REPLACE(ssn, '[^0-9]', ''), 4)) - ELSE '***-**-****' -END; - --- Email: j***@example.com -CREATE OR REPLACE FUNCTION catalog.schema.mask_email(email STRING) -RETURNS STRING -DETERMINISTIC -RETURN CASE - WHEN email IS NULL THEN NULL - WHEN INSTR(email, '@') > 1 - THEN CONCAT(LEFT(email, 1), '***@', SUBSTRING(email, INSTR(email, '@') + 1)) - ELSE '***@***.***' -END; - --- Credit card: ****-****-****-1234 -CREATE OR REPLACE FUNCTION catalog.schema.mask_credit_card(card_number STRING) -RETURNS STRING -DETERMINISTIC -RETURN CASE - WHEN card_number IS NULL THEN NULL - WHEN LENGTH(REGEXP_REPLACE(card_number, '[^0-9]', '')) >= 4 - THEN CONCAT('****-****-****-', RIGHT(REGEXP_REPLACE(card_number, '[^0-9]', ''), 4)) - ELSE '****-****-****-****' -END; - --- Hash: SHA256 with version prefix -CREATE OR REPLACE FUNCTION catalog.schema.mask_hash(value STRING) -RETURNS STRING -DETERMINISTIC -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE CONCAT('HASH_v1_', SUBSTRING(SHA2(CONCAT(value, ':v1'), 256), 1, 16)) -END; - --- Redact: replace with [REDACTED] -CREATE OR REPLACE FUNCTION catalog.schema.mask_redact(value STRING) -RETURNS STRING -DETERMINISTIC -RETURN CASE - WHEN value IS NULL THEN NULL - ELSE '[REDACTED]' -END; -``` - -### Row Filter UDFs - -Row filter UDFs return `BOOLEAN`: `TRUE` to include the row, `FALSE` to exclude it. Row filter UDFs used with FGAC must take **0 arguments** (unlike column masks which take 1). - -```sql --- Region-based filter: hide EU rows -CREATE OR REPLACE FUNCTION catalog.schema.is_not_eu_region(region_value STRING) -RETURNS BOOLEAN -DETERMINISTIC -RETURN CASE - WHEN region_value IS NULL THEN TRUE - WHEN LOWER(region_value) LIKE '%eu%' THEN FALSE - WHEN LOWER(region_value) LIKE '%europe%' THEN FALSE - ELSE TRUE -END; -``` - ---- - -## Step 4: FGAC Policies - -Policies are scoped to a **catalog**, **schema**, or **table**. The clause `FOR TABLES` is always present. The `for_securable_type` is always `TABLE`. - -### Column Mask Policy - -```sql --- Catalog level — masks matching columns in ALL tables in the catalog -CREATE OR REPLACE POLICY mask_pii_ssn -ON CATALOG my_catalog -COMMENT 'Mask SSN columns catalog-wide' -COLUMN MASK my_catalog.my_schema.mask_ssn -TO `analysts`, `data_scientists` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; - --- Schema level — masks matching columns in all tables in the schema -CREATE OR REPLACE POLICY mask_pii_ssn -ON SCHEMA my_catalog.my_schema -COMMENT 'Mask SSN columns in schema' -COLUMN MASK my_catalog.my_schema.mask_ssn -TO `analysts`, `data_scientists` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; - --- Table level — masks matching columns on a single table -CREATE OR REPLACE POLICY mask_pii_ssn -ON TABLE my_catalog.my_schema.my_table -COMMENT 'Mask SSN columns on specific table' -COLUMN MASK my_catalog.my_schema.mask_ssn -TO `analysts`, `data_scientists` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; - --- Cross-catalog UDF — UDF in governance catalog, policy on prod -CREATE OR REPLACE POLICY mask_ssn_finance -ON SCHEMA prod.finance -COMMENT 'Mask SSN using shared governance UDF' -COLUMN MASK governance.masking_udfs.mask_ssn -TO `analysts` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col -ON COLUMN masked_col; - --- Match any column with a tag key (regardless of value) -CREATE OR REPLACE POLICY mask_all_pii -ON SCHEMA my_catalog.my_schema -COLUMN MASK my_catalog.my_schema.mask_full -TO `external_users` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTag('pii_type') AS masked_col -ON COLUMN masked_col; -``` - -### Row Filter Policy - -```sql --- Catalog level -CREATE OR REPLACE POLICY filter_eu_data -ON CATALOG my_catalog -COMMENT 'Filter EU rows catalog-wide' -ROW FILTER my_catalog.my_schema.is_not_eu_region -TO `us_team` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col -USING COLUMNS (filter_col); - --- Schema level -CREATE OR REPLACE POLICY filter_eu_data -ON SCHEMA my_catalog.my_schema -COMMENT 'Filter EU rows in schema' -ROW FILTER my_catalog.my_schema.is_not_eu_region -TO `us_team` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col -USING COLUMNS (filter_col); - --- Table level -CREATE OR REPLACE POLICY filter_eu_data -ON TABLE my_catalog.my_schema.my_table -COMMENT 'Filter EU rows on specific table' -ROW FILTER my_catalog.my_schema.is_not_eu_region -TO `us_team` -EXCEPT `gov_admin` -FOR TABLES -MATCH COLUMNS hasTagValue('region', 'eu') AS filter_col -USING COLUMNS (filter_col); -``` - -### Drop Policy - -```sql -DROP POLICY mask_pii_ssn ON CATALOG my_catalog; -DROP POLICY mask_pii_ssn ON SCHEMA my_catalog.my_schema; -DROP POLICY mask_pii_ssn ON TABLE my_catalog.my_schema.my_table; -``` - -> There is no `ALTER POLICY`. To modify a policy's UDF, tag matching, or scope, drop and recreate it. Only principals and comment can be updated in-place via the SDK. - ---- - -## Policy Quotas - -| Scope | Max Policies | -|-------|-------------| -| Per Catalog | 10 | -| Per Schema | 10 | -| Per Table | 5 | - ---- - -## SQL That Does NOT Exist - -These SQL commands do **not** exist in Databricks. Do not use them. - -| Invalid SQL | What to Use Instead | -|---|---| -| `SHOW POLICIES` | SDK: `w.policies.list_policies()` or MCP tool `list_fgac_policies` | -| `DESCRIBE POLICY` | SDK: `w.policies.get_policy()` or MCP tool `get_fgac_policy` | -| `ALTER POLICY` | Drop and recreate the policy | -| `ALTER USER SET ATTRIBUTES` | SCIM API for user attributes | - ---- - -## Discovery Queries - -```sql --- List catalogs, schemas, tables -SHOW CATALOGS; -SHOW SCHEMAS IN my_catalog; -SHOW TABLES IN my_catalog.my_schema; - --- Describe table with extended metadata -DESCRIBE TABLE EXTENDED my_catalog.my_schema.my_table; - --- List UDFs in a schema -SHOW USER FUNCTIONS IN my_catalog.my_schema; - --- Describe a UDF -DESCRIBE FUNCTION EXTENDED my_catalog.my_schema.mask_ssn; - --- Column tags in a table -SELECT tag_name, tag_value, column_name -FROM system.information_schema.column_tags -WHERE catalog_name = 'my_catalog' - AND schema_name = 'my_schema' - AND table_name = 'my_table'; -``` - ---- - -## Python SDK Reference - -### Setup - -```python -from databricks.sdk import WorkspaceClient -from databricks.sdk.service.catalog import ( - ColumnMaskOptions, - MatchColumn, - PolicyInfo, - PolicyType, - RowFilterOptions, - SecurableType, -) - -w = WorkspaceClient() # Auto-detects credentials -``` - -### List Policies - -```python -policies = list(w.policies.list_policies( - on_securable_type="CATALOG", - on_securable_fullname="my_catalog", - include_inherited=True, -)) - -for p in policies: - print(f"{p.name}: {p.policy_type} on {p.on_securable_fullname}") - -# Filter by type -column_masks = [p for p in policies if p.policy_type == "COLUMN_MASK"] -row_filters = [p for p in policies if p.policy_type == "ROW_FILTER"] -``` - -### Get Policy - -```python -policy = w.policies.get_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", -) -``` - -### Create Column Mask Policy - -```python -policy_info = PolicyInfo( - name="mask_pii_ssn", - policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, - on_securable_type=SecurableType.SCHEMA, - on_securable_fullname="my_catalog.my_schema", - for_securable_type=SecurableType.TABLE, - to_principals=["analysts", "data_scientists"], - except_principals=["gov_admin"], - comment="Mask SSN columns in schema", - column_mask=ColumnMaskOptions( - function_name="my_catalog.my_schema.mask_ssn", - on_column="masked_col", - ), - match_columns=[ - MatchColumn( - alias="masked_col", - condition="hasTagValue('pii_type', 'ssn')", - ) - ], -) -policy = w.policies.create_policy(policy_info=policy_info) -``` - -### Create Row Filter Policy - -```python -policy_info = PolicyInfo( - name="filter_eu_data", - policy_type=PolicyType.POLICY_TYPE_ROW_FILTER, - on_securable_type=SecurableType.SCHEMA, - on_securable_fullname="my_catalog.my_schema", - for_securable_type=SecurableType.TABLE, - to_principals=["us_team"], - except_principals=["gov_admin"], - comment="Filter EU rows in schema", - row_filter=RowFilterOptions( - function_name="my_catalog.my_schema.is_not_eu_region", - ), - match_columns=[ - MatchColumn( - alias="filter_col", - condition="hasTagValue('region', 'eu')", - ) - ], -) -policy = w.policies.create_policy(policy_info=policy_info) -``` - -### Update Policy - -Only principals and comment can be updated. To change the UDF, tag matching, or scope, drop and recreate. - -```python -update_info = PolicyInfo( - to_principals=["analysts", "data_scientists", "new_team"], - except_principals=["gov_admin", "senior_admins"], - comment="Updated: added new_team", - for_securable_type=SecurableType.TABLE, - policy_type=PolicyType.POLICY_TYPE_COLUMN_MASK, -) -updated = w.policies.update_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", - policy_info=update_info, - update_mask="to_principals,except_principals,comment", -) -``` - -### Delete Policy - -```python -w.policies.delete_policy( - name="mask_pii_ssn", - on_securable_type="SCHEMA", - on_securable_fullname="my_catalog.my_schema", -) -``` - -### Error Handling - -```python -from databricks.sdk.errors import NotFound, PermissionDenied, BadRequest - -try: - policy = w.policies.get_policy(name="nonexistent", ...) -except NotFound: - print("Policy not found") -except PermissionDenied: - print("Insufficient permissions - need MANAGE on securable") -except BadRequest as e: - print(f"Invalid request: {e}") -``` - ---- - -## MCP Tools Reference - -All FGAC operations are exposed through a single MCP tool: `manage_uc_fgac_policies`. The `action` parameter selects the operation. - -### Discovery Actions - -| Action | Description | Key Parameters | -|--------|-------------|----------------| -| `list` | List policies on a securable | `securable_type`, `securable_fullname`, `include_inherited`, `policy_type` | -| `get` | Get a specific policy by name | `policy_name`, `securable_type`, `securable_fullname` | -| `get_table_policies` | Get column masks and row filters on a table | `catalog`, `schema`, `table` | -| `get_masking_functions` | List masking UDFs in a schema | `catalog`, `schema` (or `udf_catalog`, `udf_schema` for cross-catalog) | -| `check_quota` | Check policy quota on a securable | `securable_type`, `securable_fullname` | - -### Preview Action (Human-in-the-Loop Gate) - -| Action | Description | Key Parameters | -|--------|-------------|----------------| -| `preview` | Preview changes without executing; returns `approval_token` | `preview_action` (`CREATE`/`UPDATE`/`DELETE`), `policy_name`, `securable_type`, `securable_fullname`, plus policy params for CREATE | - -### Mutation Actions (Require Approval Token) - -| Action | Description | Key Parameters | -|--------|-------------|----------------| -| `create` | Create a new FGAC policy | `policy_name`, `policy_type`, `securable_type`, `securable_fullname`, `function_name`, `to_principals`, `tag_name`, `tag_value`, `approval_token` | -| `update` | Update policy principals or comment | `policy_name`, `securable_type`, `securable_fullname`, `to_principals`, `except_principals`, `comment`, `approval_token` | -| `delete` | Delete a policy | `policy_name`, `securable_type`, `securable_fullname`, `approval_token` | - ---- - -## Human-in-the-Loop Governance Workflow - -FGAC policies control who can see sensitive data like SSNs, emails, and salaries. Because misconfigured policies can expose private data or lock out administrators, all mutating operations go through a governed workflow with two safety gates. - -### Why Human-in-the-Loop? - -An AI agent that can freely create, change, or delete access control policies is dangerous. It could: - -- Accidentally expose PII to the wrong group -- Remove masking from sensitive columns -- Lock administrators out of their own data - -The human-in-the-loop pattern ensures **no policy change happens without explicit human approval**. - -### The Two Safety Gates - -#### Gate 1: Preview + Approval Token - -Every mutating operation (create, update, delete) requires a two-step process: - -1. **Preview** — The agent calls `preview_policy_changes()` which generates the exact SQL that *would* run, but **does not execute anything**. It also returns a cryptographic **approval token**. - -2. **Execute** — Only after the human reviews and approves does the agent call the mutation (e.g., `create_fgac_policy()`), passing the approval token from the preview step. - -The approval token is an **HMAC-SHA256 signed receipt** that binds the exact parameters from the preview to a timestamp: - -| Protection | How It Works | -|-----------|--------------| -| Parameter tampering | The token encodes every parameter (policy name, type, principals, UDF, tags). If the agent passes different parameters at execution time, the signature won't match and the operation is rejected. | -| Replay attacks | The token includes a timestamp and **expires after 10 minutes**. Old approvals cannot be reused. | -| Token forgery | The token is signed with an HMAC secret (`FGAC_APPROVAL_SECRET`). Without the secret, a valid token cannot be forged. | - -#### Gate 2: Admin Group Check - -Every mutating operation also verifies that the current Databricks user belongs to the configured admin group (env var `FGAC_ADMIN_GROUP`, defaults to `admins`). Even with a valid approval token, a non-admin user cannot make changes. - -### The 6-Step Workflow - -``` -ANALYZE --> RECOMMEND --> PREVIEW --> APPROVE --> EXECUTE --> VERIFY - | | | | | | - v v v v v v - Discover Generate Show SQL Human Run SDK Confirm - current policy & impact confirms call w/ changes - state proposals preview changes token applied -``` - -#### Step 1: ANALYZE — Discover Current State - -The agent gathers information without making any changes: - -``` -list_fgac_policies() --> What policies already exist? -get_masking_functions() --> What masking UDFs are available? -get_column_tags_api() --> What columns are tagged with PII labels? -execute_sql(DESCRIBE) --> What does the table schema look like? -``` - -#### Step 2: RECOMMEND — Generate Proposals - -Based on the analysis, the agent identifies gaps and recommends new policies: - -> "The `email` column is tagged `pii=email` but has no masking policy. I recommend creating a column mask policy using a `mask_email` UDF." - -If a required UDF doesn't exist yet, the agent creates it first (UDF creation is a non-destructive SQL operation). - -#### Step 3: PREVIEW — Human-in-the-Loop Gate - -The agent calls `preview_policy_changes()` with the proposed parameters. **This does NOT execute anything.** It returns: - -```json -{ - "success": true, - "action": "CREATE", - "preview": { - "policy_name": "mask_email_for_non_admins", - "equivalent_sql": "CREATE OR REPLACE POLICY mask_email_for_non_admins\nON SCHEMA ai_dev_kit_test.test_schema\n..." - }, - "requires_approval": true, - "approval_token": "da70b6c3...:" -} -``` - -The agent presents the equivalent SQL and impact summary to the human. - -#### Step 4: APPROVE — Human Decision - -The human reviews: -- The exact SQL that will run -- Which principals are affected -- Which columns/tables will be masked -- Any warnings - -Then explicitly replies **"approve"** or requests changes. - -#### Step 5: EXECUTE — Apply With Token - -Only after approval, the agent passes the approval token to the mutation: - -```python -create_fgac_policy( - policy_name="mask_email_for_non_admins", - policy_type="COLUMN_MASK", - securable_type="SCHEMA", - securable_fullname="ai_dev_kit_test.test_schema", - function_name="ai_dev_kit_test.test_schema.mask_email", - to_principals=["account users"], - tag_name="pii", - tag_value="email", - approval_token="da70b6c3...:" -) -``` - -Internally, the function: -1. Checks admin group membership (`_check_admin_group()`) -2. Validates the approval token signature matches the parameters -3. Verifies the token hasn't expired (10-minute TTL) -4. Only then calls the Databricks SDK to create the policy - -#### Step 6: VERIFY — Confirm Changes - -The agent verifies the policy was applied correctly: - -```python -get_fgac_policy(policy_name="mask_email_for_non_admins", ...) -execute_sql("SELECT email FROM employee_pii LIMIT 5") -# Expected: a***@acme.com, b***@acme.com, etc. -``` - -### Sequence Diagram - -``` -Agent Human Databricks - | | | - |--- preview(CREATE, ...) -----------------------------------> | - |<-- SQL + approval_token ------------------------------------ | - | | | - |--- "Here's what I'll do:" --> | | - | [shows SQL + details] | | - | | | - |<-- "approve" ---------------- | | - | | | - |--- create(... token) --------------------------------------> | - | [1] check admin group | | - | [2] verify token signature | | - | [3] verify params match | | - | [4] verify not expired | | - |<-- policy created ------------------------------------------ | - | | | - |--- verify(get_policy) --------------------------------------> | - |<-- confirmed ------------------------------------------------ | -``` - ---- - -## Approval Token Internals - -### Token Structure - -``` -: -``` - -Example: -``` -da70b6c3455944a3...:eyJhY3Rpb24iOiAiQ1JFQVRFIiwgInBvbGljeV9uYW1lIjog... -``` - -### Generation (during preview) - -```python -def _generate_approval_token(params: dict) -> str: - # 1. Remove null values, add current timestamp - clean_params = {k: v for k, v in params.items() if v is not None} - clean_params["timestamp"] = int(time.time()) - - # 2. Serialize to deterministic JSON (sorted keys for consistency) - payload = json.dumps(clean_params, sort_keys=True) - - # 3. Sign with HMAC-SHA256 - signature = hmac.new( - APPROVAL_SECRET.encode(), payload.encode(), hashlib.sha256 - ).hexdigest() - - # 4. Encode payload as base64 - b64_payload = base64.b64encode(payload.encode()).decode() - - return f"{signature}:{b64_payload}" -``` - -### Validation (during execute) - -```python -def _validate_approval_token(approval_token: str, current_params: dict) -> None: - # 1. Split token into signature and payload - signature, b64_payload = approval_token.split(":", 1) - - # 2. Decode payload and re-compute expected signature - payload = base64.b64decode(b64_payload).decode() - expected_sig = hmac.new( - APPROVAL_SECRET.encode(), payload.encode(), hashlib.sha256 - ).hexdigest() - - # 3. Verify signature matches (constant-time comparison) - if not hmac.compare_digest(signature, expected_sig): - raise ValueError("Invalid or expired approval token") - - # 4. Check timestamp (10-minute TTL) - token_data = json.loads(payload) - ts = token_data.pop("timestamp", 0) - if abs(time.time() - ts) > 600: - raise ValueError("Invalid or expired approval token") - - # 5. Verify all parameters match what was previewed - if token_data != current_params: - raise ValueError("Invalid or expired approval token") -``` - -### Token Payload Example - -For a CREATE action, the token payload contains: - -```json -{ - "action": "CREATE", - "policy_name": "mask_email_for_non_admins", - "policy_type": "COLUMN_MASK", - "securable_type": "SCHEMA", - "securable_fullname": "ai_dev_kit_test.test_schema", - "function_name": "ai_dev_kit_test.test_schema.mask_email", - "to_principals": ["account users"], - "tag_name": "pii", - "tag_value": "email", - "comment": "Masks email columns for all non-admin users", - "timestamp": 1770853648 -} -``` - -Every single field must match between preview and execution, or the token is rejected. - ---- - -## Environment Variables - -| Variable | Default | Description | -|----------|---------|-------------| -| `FGAC_APPROVAL_SECRET` | `fgac-default-dev-secret` | HMAC secret for signing approval tokens. **Set to a strong random value in production.** | -| `FGAC_ADMIN_GROUP` | `admins` | Databricks group required for mutating operations. | - ---- - -## Threat Model - -| Attack Vector | Protection | -|--------------|-----------| -| Agent changes parameters after human approval | Token signature binds exact params; mismatch = rejected | -| Stale approval reused hours/days later | Token expires after 10 minutes | -| Non-admin user attempts policy mutation | `_check_admin_group()` verifies group membership | -| Token forged without the signing secret | HMAC-SHA256 verification fails | -| Timing attack on signature comparison | `hmac.compare_digest()` uses constant-time comparison | - ---- - -## Common Errors - -| Error | Cause | Solution | -|-------|-------|----------| -| `POLICY_QUOTA_EXCEEDED` | Too many policies on scope | Consolidate policies or use broader scope | -| `INVALID_TAG_VALUE` | Tag value not in governed tag's allowed values | Check governed tag configuration in UI | -| `UDF_NOT_FOUND` | Masking UDF doesn't exist | Create UDF first, use fully qualified name | -| `POLICY_ALREADY_EXISTS` | Policy name conflict | Use `CREATE OR REPLACE POLICY` or delete existing first | -| `INSUFFICIENT_PERMISSIONS` | Missing `MANAGE` on securable | Grant `MANAGE` permission to policy creator | -| `SHOW POLICIES is not supported` | Used invalid SQL | Use SDK `w.policies.list_policies()` instead | -| `Could not find principal` | Principal group doesn't exist in workspace | Verify group name exists in account/workspace | -| `Invalid or expired approval token` | Token expired, params changed, or forged | Re-run preview to get a fresh token | - ---- - -## Best Practices - -1. **Use governed tags** (not ad-hoc tags) for FGAC policy matching -2. **Always include an admin exception** (`EXCEPT \`gov_admin\``) in every policy to prevent lockout -3. **Use deterministic UDFs** with simple CASE statements — no external calls or nested UDFs -4. **Preview before executing** any policy change — never auto-execute -5. **Start at schema scope** and narrow to table only when needed -6. **Name policies descriptively**: `mask_{what}_{scope}` or `filter_{what}_{scope}` -7. **Test UDFs independently** before binding to policies (e.g., `SELECT mask_ssn('123-45-6789')`) -8. **Monitor policy quotas** — consolidate when approaching limits (10 per catalog/schema, 5 per table) -9. **Set `FGAC_APPROVAL_SECRET`** to a strong random value in production -10. **Grant to groups, not users** — easier to manage and audit - ---- - -## Source Files - -| File | Description | -|------|-------------| -| `databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py` | Core implementation (token generation, validation, CRUD) | -| `databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py` | MCP tool dispatcher (routes actions to core functions) | -| `databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py` | Integration tests | -| `ai-dev-project/.claude/skills/databricks-unity-catalog/7-fgac-overview.md` | FGAC workflow overview and SQL syntax | -| `ai-dev-project/.claude/skills/databricks-unity-catalog/8-fgac-sql-generation.md` | SQL generation reference | -| `ai-dev-project/.claude/skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md` | SDK patterns and MCP tool reference | From 5609236b852e3ef33c86919a3dabd79a7c9a3ea0 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 10:05:34 -0600 Subject: [PATCH 14/34] Fix ruff lint errors: line length and unused import --- .../databricks_mcp_server/tools/fgac_policies.py | 15 +++++++++------ .../unity_catalog/fgac_policies.py | 3 ++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py b/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py index 9738e7fc..3c0d0197 100644 --- a/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py +++ b/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py @@ -5,7 +5,7 @@ Dispatches to core functions in databricks-tools-core based on the action parameter. """ -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List from databricks_tools_core.unity_catalog import ( list_fgac_policies as _list_fgac_policies, @@ -54,12 +54,15 @@ def manage_uc_fgac_policies( - list: List policies on a securable. Params: securable_type, securable_fullname, include_inherited, policy_type - get: Get a specific policy. Params: policy_name, securable_type, securable_fullname - get_table_policies: Get column masks and row filters on a table. Params: catalog, schema, table - - get_masking_functions: List masking UDFs in a schema. Params: catalog, schema (or udf_catalog, udf_schema to discover UDFs in a different catalog/schema) + - get_masking_functions: List masking UDFs in a schema. Params: catalog, schema + (or udf_catalog, udf_schema for UDFs in a different catalog/schema) - check_quota: Check policy quota on a securable. Params: securable_type, securable_fullname - - preview: Preview policy changes without executing. Params: preview_action ("CREATE"/"UPDATE"/"DELETE"), - policy_name, securable_type, securable_fullname, plus policy_type/function_name/tag_name/to_principals for CREATE - - create: Create an FGAC policy. Params: policy_name, policy_type ("COLUMN_MASK"/"ROW_FILTER"), - securable_type, securable_fullname, function_name, to_principals, tag_name, tag_value, except_principals, comment, + - preview: Preview policy changes without executing. Params: preview_action + ("CREATE"/"UPDATE"/"DELETE"), policy_name, securable_type, securable_fullname, + plus policy_type/function_name/tag_name/to_principals for CREATE + - create: Create an FGAC policy. Params: policy_name, + policy_type ("COLUMN_MASK"/"ROW_FILTER"), securable_type, securable_fullname, + function_name, to_principals, tag_name, tag_value, except_principals, comment, approval_token (required, from preview) - update: Update policy principals or comment. Params: policy_name, securable_type, securable_fullname, to_principals, except_principals, comment, approval_token (required, from preview) diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py index 1496a3b7..6c4eea6b 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py @@ -553,7 +553,8 @@ def preview_policy_changes( "securable": f"{stype} {securable_fullname}", "changes": changes, "equivalent_sql": f"-- UPDATE via SDK: w.policies.update_policy(name='{policy_name}', ...)", - "note": "update_policy only modifies principals and comment. To change UDF, tags, or scope, drop and recreate.", + "note": "update_policy only modifies principals and comment. " + "To change UDF, tags, or scope, drop and recreate.", } else: # DELETE From cd1487ff5f30cda6d9c35d96ee2c6a4d4bda6252 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 10:08:59 -0600 Subject: [PATCH 15/34] Apply ruff formatting and remove unused import --- .../unity_catalog/fgac_policies.py | 63 ++++++++++++------- .../unity_catalog/test_fgac_policies.py | 22 ++++--- 2 files changed, 55 insertions(+), 30 deletions(-) diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py index 6c4eea6b..799d1e71 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py @@ -36,6 +36,8 @@ _APPROVAL_SECRET = os.environ.get("FGAC_APPROVAL_SECRET", "fgac-default-dev-secret") _ADMIN_GROUP = os.environ.get("FGAC_ADMIN_GROUP", "admins") _TOKEN_TTL_SECONDS = 600 # 10 minutes + + def _generate_approval_token(params: dict) -> str: """Generate an HMAC-based approval token binding preview params to a timestamp.""" clean_params = {k: v for k, v in params.items() if v is not None} @@ -215,9 +217,11 @@ def list_fgac_policies( # SDK returns POLICY_TYPE_COLUMN_MASK / POLICY_TYPE_ROW_FILTER sdk_ptype = f"POLICY_TYPE_{ptype}" policies = [ - p for p in policies + p + for p in policies if str(getattr(p, "policy_type", "")) in (ptype, sdk_ptype) - or (p.as_dict() if hasattr(p, "as_dict") else {}).get("policy_type") in (ptype, sdk_ptype) + or (p.as_dict() if hasattr(p, "as_dict") else {}).get("policy_type") + in (ptype, sdk_ptype) ] policy_dicts = [_policy_to_dict(p) for p in policies] @@ -303,20 +307,24 @@ def get_table_policies( if fn and fn not in mask_functions: mask_functions.append(fn) - column_masks.append({ - "column_name": col.get("name"), - "column_type": col.get("type_name"), - "mask_functions": mask_functions, - }) + column_masks.append( + { + "column_name": col.get("name"), + "column_type": col.get("type_name"), + "mask_functions": mask_functions, + } + ) row_filters = [] row_filters_data = result.get("row_filters", {}) if row_filters_data: for rf in row_filters_data.get("row_filters", []): - row_filters.append({ - "function_name": rf.get("function_name"), - "input_column_names": rf.get("input_column_names", []), - }) + row_filters.append( + { + "function_name": rf.get("function_name"), + "input_column_names": rf.get("input_column_names", []), + } + ) return { "success": True, @@ -351,13 +359,15 @@ def get_masking_functions( func_list = [] for f in functions: - func_list.append({ - "name": f.name, - "full_name": f.full_name, - "return_type": str(f.data_type) if f.data_type else None, - "comment": getattr(f, "comment", None), - "is_deterministic": getattr(f, "is_deterministic", None), - }) + func_list.append( + { + "name": f.name, + "full_name": f.full_name, + "return_type": str(f.data_type) if f.data_type else None, + "comment": getattr(f, "comment", None), + "is_deterministic": getattr(f, "is_deterministic", None), + } + ) return { "success": True, @@ -402,7 +412,8 @@ def check_policy_quota( # Count only direct policies (not inherited) direct = [ - p for p in existing + p + for p in existing if getattr(p, "on_securable_fullname", None) == securable_fullname ] @@ -460,7 +471,9 @@ def preview_policy_changes( """ action = action.upper() if action not in ("CREATE", "UPDATE", "DELETE"): - raise ValueError(f"Invalid action: '{action}'. Must be CREATE, UPDATE, or DELETE") + raise ValueError( + f"Invalid action: '{action}'. Must be CREATE, UPDATE, or DELETE" + ) stype = _validate_securable_type(securable_type) _validate_identifier(securable_fullname) @@ -480,7 +493,8 @@ def preview_policy_changes( raise ValueError("to_principals is required for CREATE action") tag_match = ( - f"hasTagValue('{tag_name}', '{tag_value}')" if tag_value + f"hasTagValue('{tag_name}', '{tag_value}')" + if tag_value else f"hasTag('{tag_name}')" ) @@ -564,7 +578,9 @@ def preview_policy_changes( "securable": f"{stype} {securable_fullname}", "equivalent_sql": equivalent_sql, } - warnings.append("This action is irreversible. The policy will be permanently removed.") + warnings.append( + "This action is irreversible. The policy will be permanently removed." + ) # Generate approval token binding these params token_params = { @@ -676,7 +692,8 @@ def create_fgac_policy( # Build tag match condition tag_condition = ( - f"hasTagValue('{tag_name}', '{tag_value}')" if tag_value + f"hasTagValue('{tag_name}', '{tag_value}')" + if tag_value else f"hasTag('{tag_name}')" ) alias = "masked_col" if ptype == "COLUMN_MASK" else "filter_col" diff --git a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py index eae1f3e4..ccff950b 100644 --- a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py +++ b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py @@ -18,7 +18,6 @@ """ import logging -import os import time import pytest @@ -68,7 +67,9 @@ def test_list_policies_on_catalog(self, test_catalog: str): assert result["securable_fullname"] == test_catalog assert isinstance(result["policies"], list) assert isinstance(result["policy_count"], int) - logger.info(f"Found {result['policy_count']} policies on catalog {test_catalog}") + logger.info( + f"Found {result['policy_count']} policies on catalog {test_catalog}" + ) def test_list_policies_on_schema(self, test_catalog: str, uc_test_schema: str): """Should list policies on a schema.""" @@ -113,7 +114,9 @@ def test_list_policies_without_inherited(self, test_catalog: str): class TestGetTablePolicies: """Tests for getting column masks and row filters on a table.""" - def test_get_table_policies(self, test_catalog: str, uc_test_schema: str, uc_test_table: str): + def test_get_table_policies( + self, test_catalog: str, uc_test_schema: str, uc_test_table: str + ): """Should return column masks and row filters for a table.""" # uc_test_table is "catalog.schema.table" parts = uc_test_table.split(".") @@ -527,7 +530,9 @@ def test_full_preview_then_create_workflow( TestFgacPolicyCRUD._create_governed_tag(tag_key, [tag_value]) try: - fn_name = f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_tok_fn_{unique_name}" + fn_name = ( + f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_tok_fn_{unique_name}" + ) cleanup_functions(fn_name) create_security_function( @@ -821,7 +826,9 @@ def test_create_get_update_delete_column_mask_policy( securable_fullname=full_schema, ) policy_names = [p.get("name") for p in list_result["policies"]] - assert policy_name in policy_names, f"Expected {policy_name} in {policy_names}" + assert policy_name in policy_names, ( + f"Expected {policy_name} in {policy_names}" + ) logger.info(f"Policy found in list ({list_result['policy_count']} total)") # --- PREVIEW DELETE --- @@ -874,7 +881,9 @@ def test_create_row_filter_policy( self._create_governed_tag(tag_key, [tag_value]) try: - fn_name = f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_rf_fn_{unique_name}" + fn_name = ( + f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_rf_fn_{unique_name}" + ) cleanup_functions(fn_name) execute_sql( @@ -953,4 +962,3 @@ def test_create_row_filter_policy( finally: self._delete_governed_tag(tag_key) - From 5a45e6e0ab5dbcb103dbdc47a0ff2bb3f08fff1b Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 10:12:26 -0600 Subject: [PATCH 16/34] Reformat with line-length=120 to match CI settings --- .../unity_catalog/fgac_policies.py | 45 +++++-------------- .../unity_catalog/test_fgac_policies.py | 25 +++-------- 2 files changed, 16 insertions(+), 54 deletions(-) diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py index 799d1e71..29148ece 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py @@ -43,9 +43,7 @@ def _generate_approval_token(params: dict) -> str: clean_params = {k: v for k, v in params.items() if v is not None} clean_params["timestamp"] = int(time.time()) payload = json.dumps(clean_params, sort_keys=True) - signature = hmac.new( - _APPROVAL_SECRET.encode(), payload.encode(), hashlib.sha256 - ).hexdigest() + signature = hmac.new(_APPROVAL_SECRET.encode(), payload.encode(), hashlib.sha256).hexdigest() b64_payload = base64.b64encode(payload.encode()).decode() return f"{signature}:{b64_payload}" @@ -65,9 +63,7 @@ def _validate_approval_token(approval_token: str, current_params: dict) -> None: except Exception: raise ValueError("Invalid or expired approval token") - expected_sig = hmac.new( - _APPROVAL_SECRET.encode(), payload.encode(), hashlib.sha256 - ).hexdigest() + expected_sig = hmac.new(_APPROVAL_SECRET.encode(), payload.encode(), hashlib.sha256).hexdigest() if not hmac.compare_digest(signature, expected_sig): raise ValueError("Invalid or expired approval token") @@ -122,8 +118,7 @@ def _validate_securable_type(securable_type: str) -> str: normalized = securable_type.upper() if normalized not in _VALID_SECURABLE_TYPES: raise ValueError( - f"Invalid securable_type: '{securable_type}'. " - f"Must be one of: {sorted(_VALID_SECURABLE_TYPES)}" + f"Invalid securable_type: '{securable_type}'. Must be one of: {sorted(_VALID_SECURABLE_TYPES)}" ) return normalized @@ -132,10 +127,7 @@ def _validate_policy_type(policy_type: str) -> str: """Validate and normalize policy type.""" normalized = policy_type.upper().replace("POLICY_TYPE_", "") if normalized not in _VALID_POLICY_TYPES: - raise ValueError( - f"Invalid policy_type: '{policy_type}'. " - f"Must be one of: {sorted(_VALID_POLICY_TYPES)}" - ) + raise ValueError(f"Invalid policy_type: '{policy_type}'. Must be one of: {sorted(_VALID_POLICY_TYPES)}") return normalized @@ -220,8 +212,7 @@ def list_fgac_policies( p for p in policies if str(getattr(p, "policy_type", "")) in (ptype, sdk_ptype) - or (p.as_dict() if hasattr(p, "as_dict") else {}).get("policy_type") - in (ptype, sdk_ptype) + or (p.as_dict() if hasattr(p, "as_dict") else {}).get("policy_type") in (ptype, sdk_ptype) ] policy_dicts = [_policy_to_dict(p) for p in policies] @@ -411,11 +402,7 @@ def check_policy_quota( ) # Count only direct policies (not inherited) - direct = [ - p - for p in existing - if getattr(p, "on_securable_fullname", None) == securable_fullname - ] + direct = [p for p in existing if getattr(p, "on_securable_fullname", None) == securable_fullname] max_policies = _POLICY_QUOTAS.get(stype, 10) return { @@ -471,9 +458,7 @@ def preview_policy_changes( """ action = action.upper() if action not in ("CREATE", "UPDATE", "DELETE"): - raise ValueError( - f"Invalid action: '{action}'. Must be CREATE, UPDATE, or DELETE" - ) + raise ValueError(f"Invalid action: '{action}'. Must be CREATE, UPDATE, or DELETE") stype = _validate_securable_type(securable_type) _validate_identifier(securable_fullname) @@ -492,11 +477,7 @@ def preview_policy_changes( if not to_principals: raise ValueError("to_principals is required for CREATE action") - tag_match = ( - f"hasTagValue('{tag_name}', '{tag_value}')" - if tag_value - else f"hasTag('{tag_name}')" - ) + tag_match = f"hasTagValue('{tag_name}', '{tag_value}')" if tag_value else f"hasTag('{tag_name}')" principals_sql = ", ".join(f"`{p}`" for p in to_principals) except_sql = ", ".join(f"`{p}`" for p in safe_except) if safe_except else "" @@ -578,9 +559,7 @@ def preview_policy_changes( "securable": f"{stype} {securable_fullname}", "equivalent_sql": equivalent_sql, } - warnings.append( - "This action is irreversible. The policy will be permanently removed." - ) + warnings.append("This action is irreversible. The policy will be permanently removed.") # Generate approval token binding these params token_params = { @@ -691,11 +670,7 @@ def create_fgac_policy( ) # Build tag match condition - tag_condition = ( - f"hasTagValue('{tag_name}', '{tag_value}')" - if tag_value - else f"hasTag('{tag_name}')" - ) + tag_condition = f"hasTagValue('{tag_name}', '{tag_value}')" if tag_value else f"hasTag('{tag_name}')" alias = "masked_col" if ptype == "COLUMN_MASK" else "filter_col" match_columns = [MatchColumn(alias=alias, condition=tag_condition)] diff --git a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py index ccff950b..fe9ac280 100644 --- a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py +++ b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py @@ -67,9 +67,7 @@ def test_list_policies_on_catalog(self, test_catalog: str): assert result["securable_fullname"] == test_catalog assert isinstance(result["policies"], list) assert isinstance(result["policy_count"], int) - logger.info( - f"Found {result['policy_count']} policies on catalog {test_catalog}" - ) + logger.info(f"Found {result['policy_count']} policies on catalog {test_catalog}") def test_list_policies_on_schema(self, test_catalog: str, uc_test_schema: str): """Should list policies on a schema.""" @@ -114,9 +112,7 @@ def test_list_policies_without_inherited(self, test_catalog: str): class TestGetTablePolicies: """Tests for getting column masks and row filters on a table.""" - def test_get_table_policies( - self, test_catalog: str, uc_test_schema: str, uc_test_table: str - ): + def test_get_table_policies(self, test_catalog: str, uc_test_schema: str, uc_test_table: str): """Should return column masks and row filters for a table.""" # uc_test_table is "catalog.schema.table" parts = uc_test_table.split(".") @@ -130,10 +126,7 @@ def test_get_table_policies( assert result["table"] == uc_test_table assert isinstance(result["column_masks"], list) assert isinstance(result["row_filters"], list) - logger.info( - f"Table {uc_test_table}: {len(result['column_masks'])} masks, " - f"{len(result['row_filters'])} filters" - ) + logger.info(f"Table {uc_test_table}: {len(result['column_masks'])} masks, {len(result['row_filters'])} filters") @pytest.mark.integration @@ -530,9 +523,7 @@ def test_full_preview_then_create_workflow( TestFgacPolicyCRUD._create_governed_tag(tag_key, [tag_value]) try: - fn_name = ( - f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_tok_fn_{unique_name}" - ) + fn_name = f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_tok_fn_{unique_name}" cleanup_functions(fn_name) create_security_function( @@ -826,9 +817,7 @@ def test_create_get_update_delete_column_mask_policy( securable_fullname=full_schema, ) policy_names = [p.get("name") for p in list_result["policies"]] - assert policy_name in policy_names, ( - f"Expected {policy_name} in {policy_names}" - ) + assert policy_name in policy_names, f"Expected {policy_name} in {policy_names}" logger.info(f"Policy found in list ({list_result['policy_count']} total)") # --- PREVIEW DELETE --- @@ -881,9 +870,7 @@ def test_create_row_filter_policy( self._create_governed_tag(tag_key, [tag_value]) try: - fn_name = ( - f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_rf_fn_{unique_name}" - ) + fn_name = f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_rf_fn_{unique_name}" cleanup_functions(fn_name) execute_sql( From dd311855f87b8ba9b446d5d6f1cf8ab7a5acd6cf Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 10:49:55 -0600 Subject: [PATCH 17/34] Restore model-serving skill entry in CLAUDE.md Accidentally removed in earlier commit. Unrelated to FGAC changes. --- ai-dev-project/CLAUDE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ai-dev-project/CLAUDE.md b/ai-dev-project/CLAUDE.md index 8bf4aadf..fb679825 100644 --- a/ai-dev-project/CLAUDE.md +++ b/ai-dev-project/CLAUDE.md @@ -20,6 +20,7 @@ Load skills for detailed guidance: - `skill: "databricks-python-sdk"` - Python SDK patterns - `skill: "databricks-unity-catalog"` - System tables for lineage, audit, billing, access controls, and FGAC policy governance. - `skill: "mlflow-evaluation"` - MLflow evaluation and trace analysis +- `skill: "model-serving"` - Model Serving deployment and endpoint management - `skill: "spark-declarative-pipelines"` - Spark Declarative Pipelines - `skill: "synthetic-data-generation"` - Test data generation - `skill: "unstructured-pdf-generation"` - Generate synthetic PDFs for RAG From bd938d695a9a7fbcb6a37657f118951ccbb81582 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 10:50:51 -0600 Subject: [PATCH 18/34] Remove duplicate .claude/ entry in .gitignore --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index dfcda712..385994fa 100644 --- a/.gitignore +++ b/.gitignore @@ -2,9 +2,6 @@ .ai-dev-kit/ .claude/ -# Claude Code local skills (installed via install_skills.sh) -.claude/ - # Python __pycache__/ From 18fd4df6609ecf85438661517ee5a0fd5ba8ce8e Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 10:53:02 -0600 Subject: [PATCH 19/34] Split invalid token test into separate ValueError and PermissionError tests Reviewer noted the combined test could pass for the wrong reason. Now tests token validation and admin group check independently. --- .../integration/unity_catalog/test_fgac_policies.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py index fe9ac280..3bddb962 100644 --- a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py +++ b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py @@ -468,9 +468,17 @@ def test_create_without_token_raises(self): tag_name="pii", ) - def test_create_with_invalid_token_raises(self): + def test_create_with_invalid_token_raises_value_error(self): """create_fgac_policy with an invalid token should raise ValueError.""" - with pytest.raises((ValueError, PermissionError)): + with pytest.raises(ValueError, match="Invalid or expired approval token"): + # Call the token validator directly to isolate from admin group check + from databricks_tools_core.unity_catalog.fgac_policies import _validate_approval_token + + _validate_approval_token("garbage", {"action": "create"}) + + def test_create_without_admin_group_raises_permission_error(self): + """create_fgac_policy should raise PermissionError if user is not in admin group.""" + with pytest.raises(PermissionError, match="not a member of admin group"): create_fgac_policy( policy_name="test_bad_token", policy_type="COLUMN_MASK", From f2b5afc4227ba6065bf82296f1ddd3c0f70d600d Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 10:54:55 -0600 Subject: [PATCH 20/34] Run cheap validation before expensive admin group network call Reorder create/update/delete_fgac_policy to validate token and params before calling _check_admin_group(). Updated tests to match new validation order. --- .../unity_catalog/fgac_policies.py | 14 ++++----- .../unity_catalog/test_fgac_policies.py | 31 ++++++++++++++----- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py index 29148ece..d1aaad8f 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py @@ -638,9 +638,10 @@ def create_fgac_policy( Returns: Dict with creation status and policy details """ - _check_admin_group() ptype = _validate_policy_type(policy_type) stype = _validate_securable_type(securable_type) + _validate_identifier(securable_fullname) + _validate_identifier(function_name) current_params = { "action": "create", "policy_name": policy_name, @@ -658,9 +659,7 @@ def create_fgac_policy( if comment: current_params["comment"] = comment _validate_approval_token(approval_token, current_params) - - _validate_identifier(securable_fullname) - _validate_identifier(function_name) + _check_admin_group() from databricks.sdk.service.catalog import ( ColumnMaskOptions, @@ -746,8 +745,8 @@ def update_fgac_policy( Returns: Dict with update status and applied changes """ - _check_admin_group() stype = _validate_securable_type(securable_type) + _validate_identifier(securable_fullname) current_params = { "action": "update", "policy_name": policy_name, @@ -761,8 +760,7 @@ def update_fgac_policy( if comment is not None: current_params["comment"] = comment _validate_approval_token(approval_token, current_params) - - _validate_identifier(securable_fullname) + _check_admin_group() from databricks.sdk.service.catalog import PolicyInfo @@ -840,7 +838,6 @@ def delete_fgac_policy( Returns: Dict with deletion status """ - _check_admin_group() stype = _validate_securable_type(securable_type) _validate_identifier(securable_fullname) current_params = { @@ -850,6 +847,7 @@ def delete_fgac_policy( "securable_fullname": securable_fullname, } _validate_approval_token(approval_token, current_params) + _check_admin_group() w = get_workspace_client() w.policies.delete_policy( diff --git a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py index 3bddb962..780f5850 100644 --- a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py +++ b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py @@ -469,25 +469,42 @@ def test_create_without_token_raises(self): ) def test_create_with_invalid_token_raises_value_error(self): - """create_fgac_policy with an invalid token should raise ValueError.""" + """create_fgac_policy with an invalid token should raise ValueError before admin check.""" with pytest.raises(ValueError, match="Invalid or expired approval token"): - # Call the token validator directly to isolate from admin group check - from databricks_tools_core.unity_catalog.fgac_policies import _validate_approval_token - - _validate_approval_token("garbage", {"action": "create"}) + create_fgac_policy( + policy_name="test_bad_token", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="cat.sch", + function_name="cat.sch.fn", + to_principals=["analysts"], + tag_name="pii", + approval_token="garbage", + ) def test_create_without_admin_group_raises_permission_error(self): """create_fgac_policy should raise PermissionError if user is not in admin group.""" + # Get a valid token via preview so we pass token validation + preview = preview_policy_changes( + action="CREATE", + policy_name="test_admin_check", + securable_type="SCHEMA", + securable_fullname="cat.sch", + policy_type="COLUMN_MASK", + to_principals=["analysts"], + function_name="cat.sch.fn", + tag_name="pii", + ) with pytest.raises(PermissionError, match="not a member of admin group"): create_fgac_policy( - policy_name="test_bad_token", + policy_name="test_admin_check", policy_type="COLUMN_MASK", securable_type="SCHEMA", securable_fullname="cat.sch", function_name="cat.sch.fn", to_principals=["analysts"], tag_name="pii", - approval_token="garbage", + approval_token=preview["approval_token"], ) def test_preview_returns_approval_token(self): From 23391495fedd058a1506ad31971d464793aade41 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 10:58:05 -0600 Subject: [PATCH 21/34] Use random secret for approval tokens instead of hardcoded default Generate a unique HMAC secret per process via os.urandom(32).hex() when FGAC_APPROVAL_SECRET env var is not set. Prevents token forgery in the default configuration. --- .../databricks_tools_core/unity_catalog/fgac_policies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py index d1aaad8f..a5f5fcdb 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py @@ -33,7 +33,7 @@ _VALID_POLICY_TYPES = {"COLUMN_MASK", "ROW_FILTER"} _POLICY_QUOTAS = {"CATALOG": 10, "SCHEMA": 10, "TABLE": 5} -_APPROVAL_SECRET = os.environ.get("FGAC_APPROVAL_SECRET", "fgac-default-dev-secret") +_APPROVAL_SECRET = os.environ.get("FGAC_APPROVAL_SECRET") or os.urandom(32).hex() _ADMIN_GROUP = os.environ.get("FGAC_ADMIN_GROUP", "admins") _TOKEN_TTL_SECONDS = 600 # 10 minutes From 55121bf6c764b79e6c1f033b20a383090202a1e7 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 10:59:46 -0600 Subject: [PATCH 22/34] Update FGAC_APPROVAL_SECRET docs to reflect auto-generated default --- .../databricks-unity-catalog/9-fgac-sdk-and-tools.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md index dddda9e9..73cbcab1 100644 --- a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md +++ b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md @@ -54,10 +54,10 @@ The caller must be a member of the configured admin group. Membership is verifie | Variable | Default | Description | |----------|---------|-------------| -| `FGAC_APPROVAL_SECRET` | `fgac-default-dev-secret` | HMAC secret for token signing | +| `FGAC_APPROVAL_SECRET` | Random per process | Optional. HMAC secret for token signing. Auto-generated if unset. | | `FGAC_ADMIN_GROUP` | `admins` | Required group membership for mutations | -> **Important:** In production, always set `FGAC_APPROVAL_SECRET` to a strong random value. +> **Note:** `FGAC_APPROVAL_SECRET` is auto-generated per process using `os.urandom(32)`. Only set it if you need tokens to persist across process restarts. --- From d9fa8cbfb1f0588d31e0cd352ebacfb7c60b5b29 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 11:06:44 -0600 Subject: [PATCH 23/34] Remove FGAC_APPROVAL_SECRET env var, always use random secret MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Token only lives within a single MCP process — no need for a configurable secret. Simplifies setup and eliminates any risk of a leaked/guessable default. --- .../databricks-unity-catalog/9-fgac-sdk-and-tools.md | 3 --- .../databricks_tools_core/unity_catalog/fgac_policies.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md index 73cbcab1..8655b02f 100644 --- a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md +++ b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md @@ -54,11 +54,8 @@ The caller must be a member of the configured admin group. Membership is verifie | Variable | Default | Description | |----------|---------|-------------| -| `FGAC_APPROVAL_SECRET` | Random per process | Optional. HMAC secret for token signing. Auto-generated if unset. | | `FGAC_ADMIN_GROUP` | `admins` | Required group membership for mutations | -> **Note:** `FGAC_APPROVAL_SECRET` is auto-generated per process using `os.urandom(32)`. Only set it if you need tokens to persist across process restarts. - --- ## MCP Tools diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py index a5f5fcdb..7d10187d 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py @@ -33,7 +33,7 @@ _VALID_POLICY_TYPES = {"COLUMN_MASK", "ROW_FILTER"} _POLICY_QUOTAS = {"CATALOG": 10, "SCHEMA": 10, "TABLE": 5} -_APPROVAL_SECRET = os.environ.get("FGAC_APPROVAL_SECRET") or os.urandom(32).hex() +_APPROVAL_SECRET = os.urandom(32).hex() _ADMIN_GROUP = os.environ.get("FGAC_ADMIN_GROUP", "admins") _TOKEN_TTL_SECONDS = 600 # 10 minutes From 62dc429055587b155e0e6b9ff854af5e7def3039 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 11:10:39 -0600 Subject: [PATCH 24/34] Document that approval token does not guarantee human review The token ensures preview-mutation parameter integrity, but actual human-in-the-loop confirmation depends on the MCP client behavior (e.g., Claude Code prompts between tool calls). Added clear notes in module docstring and skill documentation. --- .../databricks-unity-catalog/9-fgac-sdk-and-tools.md | 2 ++ .../unity_catalog/fgac_policies.py | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md index 8655b02f..02307764 100644 --- a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md +++ b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md @@ -46,6 +46,8 @@ Every mutating call **requires** a valid `approval_token` obtained from `preview - Parameters must match exactly between preview and mutation - Action mapping: preview `CREATE` → mutation `create`, `UPDATE` → `update`, `DELETE` → `delete` +> **Design note:** The approval token ensures mutations match what was previewed and prevents parameter tampering, but it does **not** guarantee a human reviewed the preview. Human-in-the-loop confirmation depends on the MCP client — for example, Claude Code prompts the user to approve each tool call, creating a natural pause between preview and mutation. If using a client that auto-approves tool calls, consider adding explicit confirmation logic. + ### Admin Group Check The caller must be a member of the configured admin group. Membership is verified via `w.current_user.me().groups`. diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py index 7d10187d..eb92e1f6 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py @@ -7,6 +7,17 @@ FGAC policies bind governed tags to masking UDFs or row filters, scoped to catalogs, schemas, or tables, and targeted at specific principals. +Human-in-the-loop design: + Mutations (create/update/delete) require an approval token from + preview_policy_changes(). The token is an HMAC-signed binding of + preview parameters to a timestamp — it ensures mutations match what + was previewed and prevents parameter tampering. + + IMPORTANT: The token does NOT guarantee a human reviewed the preview. + That responsibility falls on the MCP client (e.g., Claude Code prompts + the user for confirmation between tool calls). The token only ensures + that whatever was approved matches what gets executed. + Policy quotas: - Catalog: 10 policies max - Schema: 10 policies max From faff55fa6905d074a69c710376aa08828c3deb60 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 11:14:50 -0600 Subject: [PATCH 25/34] Avoid mutating caller's dict in _validate_approval_token --- .../databricks_tools_core/unity_catalog/fgac_policies.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py index eb92e1f6..16331d82 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py @@ -64,6 +64,8 @@ def _validate_approval_token(approval_token: str, current_params: dict) -> None: Raises ValueError if the token is invalid, expired, or params don't match. """ + params = dict(current_params) # work on a copy to avoid mutating caller's dict + try: signature, b64_payload = approval_token.split(":", 1) except (ValueError, AttributeError): @@ -90,13 +92,13 @@ def _validate_approval_token(approval_token: str, current_params: dict) -> None: # Map preview action to mutation action action_map = {"CREATE": "create", "UPDATE": "update", "DELETE": "delete"} token_action = token_data.pop("action", None) - current_action = current_params.pop("action", None) + current_action = params.pop("action", None) if token_action and current_action: if action_map.get(token_action) != current_action: raise ValueError("Invalid or expired approval token") # Compare remaining params - clean_current = {k: v for k, v in current_params.items() if v is not None} + clean_current = {k: v for k, v in params.items() if v is not None} if token_data != clean_current: raise ValueError("Invalid or expired approval token") From 9f99e3a5642d2eb470749b1512e879f4ad6fda2e Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 11:17:55 -0600 Subject: [PATCH 26/34] Add setup guidance for FGAC_ADMIN_GROUP env var in skill docs --- .../9-fgac-sdk-and-tools.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md index 02307764..8f3e0edf 100644 --- a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md +++ b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md @@ -50,13 +50,25 @@ Every mutating call **requires** a valid `approval_token` obtained from `preview ### Admin Group Check -The caller must be a member of the configured admin group. Membership is verified via `w.current_user.me().groups`. +The caller must be a member of the configured admin group before any mutating operation (create/update/delete) is allowed. Membership is verified via `w.current_user.me().groups`. + +Set the `FGAC_ADMIN_GROUP` environment variable to your workspace admin group name: + +```bash +# Example: use your workspace's governance admin group +export FGAC_ADMIN_GROUP="governance_admins" + +# Or use the workspace admins group +export FGAC_ADMIN_GROUP="admins" +``` + +If unset, defaults to `"admins"`. This should match an existing group in your Databricks workspace that contains users authorized to manage FGAC policies. ### Environment Variables | Variable | Default | Description | |----------|---------|-------------| -| `FGAC_ADMIN_GROUP` | `admins` | Required group membership for mutations | +| `FGAC_ADMIN_GROUP` | `admins` | Databricks workspace group whose members can create/update/delete FGAC policies | --- From 175d32bc579af0f1bc415743092c52345ccce84e Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 11:54:49 -0600 Subject: [PATCH 27/34] Add FGAC analysis & discovery functions Implement 5 new functions for the FGAC analysis workflow: - get_column_tags_api: query column-level tags via information_schema - get_schema_info / get_catalog_info: retrieve UC metadata - list_table_policies_in_schema: enumerate tables with their policies - analyze_fgac_coverage: cross-reference tags, policies, and UDFs to identify coverage gaps and suggest policy creation Wire up MCP dispatcher actions, export from __init__.py, document return schemas in skill docs, and add integration tests. --- .../ground_truth.yaml | 184 +++++++++++ .../tools/fgac_policies.py | 38 ++- .../9-fgac-sdk-and-tools.md | 126 +++++++- .../unity_catalog/__init__.py | 10 + .../unity_catalog/fgac_policies.py | 295 ++++++++++++++++++ .../unity_catalog/test_fgac_policies.py | 147 +++++++++ 6 files changed, 795 insertions(+), 5 deletions(-) diff --git a/.test/skills/databricks-unity-catalog/ground_truth.yaml b/.test/skills/databricks-unity-catalog/ground_truth.yaml index dcb9bacf..cebf609f 100644 --- a/.test/skills/databricks-unity-catalog/ground_truth.yaml +++ b/.test/skills/databricks-unity-catalog/ground_truth.yaml @@ -820,3 +820,187 @@ test_cases: difficulty: "medium" source: "manual" tags: ["error", "invalid-sql", "show-policies", "edge-case"] + + # ============================================================================ + # MCP Tool — Analyze FGAC Coverage + # ============================================================================ + - id: "fgac_mcp_analyze_coverage_001" + inputs: + prompt: "Analyze FGAC policy coverage for the prod.finance schema and identify gaps" + outputs: + response: | + Here's how to analyze FGAC policy coverage and identify gaps: + + **Step 1: Run coverage analysis** + + ``` + analyze_fgac_coverage( + catalog="prod", + schema="finance" + ) + ``` + + This returns: + - `summary` — tables scanned, tagged columns, existing policies, available UDFs + - `covered_tags` — tag/value pairs already covered by policies + - `uncovered_tags` — tag/value pairs with tagged columns but no policy + - `gaps` — detailed list of uncovered tags with affected columns and suggestions + - `existing_policies` — current FGAC policies + - `available_udfs` — masking functions available in the schema + + **Step 2: Review gaps** + + Each gap entry identifies: + - The uncovered `tag_name` and `tag_value` + - Which `columns` (table + column) have that tag but no policy + - A `suggestion` for what kind of policy to create + + **Step 3: For each gap, create policies using the human-in-the-loop workflow** + + ``` + result = preview_policy_changes( + action="CREATE", + policy_name="mask_email_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance", + policy_type="COLUMN_MASK", + function_name="prod.finance.mask_email", + to_principals=["analysts"], + tag_name="pii_type", + tag_value="email" + ) + + # After human approval: + create_fgac_policy( + ..., + approval_token=result["approval_token"] + ) + ``` + + You can also analyze an entire catalog by omitting the schema: + ``` + analyze_fgac_coverage(catalog="prod") + ``` + execution_success: null + expectations: + expected_facts: + - "analyze_fgac_coverage" + - "gaps" + - "uncovered_tags" + - "covered_tags" + - "summary" + expected_patterns: + - pattern: "analyze_fgac_coverage\\(" + min_count: 1 + description: "coverage analysis tool" + - pattern: "gaps" + min_count: 1 + description: "identifies coverage gaps" + - pattern: "uncovered_tags" + min_count: 1 + description: "uncovered tags in output" + - pattern: "preview_policy_changes\\(" + min_count: 1 + description: "follows up with human-in-the-loop workflow" + guidelines: + - "Must use analyze_fgac_coverage as the primary analysis tool" + - "Must explain the output structure (summary, gaps, covered/uncovered tags)" + - "Must show follow-up with human-in-the-loop workflow to create policies for gaps" + - "Should mention catalog-level analysis option" + metadata: + category: "happy_path" + difficulty: "medium" + source: "manual" + tags: ["mcp", "analyze-coverage", "gap-analysis", "discovery"] + + # ============================================================================ + # MCP Tool — Discovery: Column Tags, Schema Info, Table Policies + # ============================================================================ + - id: "fgac_mcp_discovery_tools_001" + inputs: + prompt: "Discover the current FGAC state of prod.finance — what tags exist, what policies are applied, and what UDFs are available" + outputs: + response: | + Here's how to discover the full FGAC state of a schema using MCP tools: + + **1. Get schema metadata** + + ``` + get_schema_info(catalog="prod", schema="finance") + ``` + Returns: schema name, owner, comment, creation/update timestamps. + + **2. Get catalog metadata** + + ``` + get_catalog_info(catalog="prod") + ``` + Returns: catalog name, owner, comment, creation/update timestamps. + + **3. List all tables with their current policies** + + ``` + list_table_policies_in_schema(catalog="prod", schema="finance") + ``` + Returns: each table with its `column_masks` and `row_filters`. + + **4. Check column tags on a specific table** + + ``` + get_column_tags_api(catalog="prod", schema="finance", table="customers") + ``` + Returns: column-level tags (tag_name, tag_value, column_name) for the table. + + **5. List available masking UDFs** + + ``` + get_masking_functions(catalog="prod", schema="finance") + ``` + Returns: functions with name, full_name, return_type, comment. + + **6. List existing FGAC policies** + + ``` + list_fgac_policies( + securable_type="SCHEMA", + securable_fullname="prod.finance" + ) + ``` + Returns: all policies on the schema with their configuration. + + Or use `analyze_fgac_coverage(catalog="prod", schema="finance")` to get a combined analysis with gap detection in a single call. + execution_success: null + expectations: + expected_facts: + - "get_schema_info" + - "get_column_tags_api" + - "list_table_policies_in_schema" + - "get_masking_functions" + - "list_fgac_policies" + expected_patterns: + - pattern: "get_schema_info\\(" + min_count: 1 + description: "schema metadata discovery" + - pattern: "get_column_tags_api\\(" + min_count: 1 + description: "column tag discovery" + - pattern: "list_table_policies_in_schema\\(" + min_count: 1 + description: "schema-wide policy discovery" + - pattern: "get_masking_functions\\(" + min_count: 1 + description: "UDF discovery" + - pattern: "list_fgac_policies\\(" + min_count: 1 + description: "policy listing" + guidelines: + - "Must show get_schema_info or get_catalog_info for metadata" + - "Must use get_column_tags_api to discover column tags" + - "Must use list_table_policies_in_schema for schema-wide policy view" + - "Must include get_masking_functions for UDF discovery" + - "Should mention analyze_fgac_coverage as a combined alternative" + metadata: + category: "happy_path" + difficulty: "medium" + source: "manual" + tags: ["mcp", "discovery", "column-tags", "schema-info", "table-policies"] diff --git a/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py b/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py index 3c0d0197..13638f1e 100644 --- a/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py +++ b/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py @@ -12,6 +12,11 @@ get_fgac_policy as _get_fgac_policy, get_table_policies as _get_table_policies, get_masking_functions as _get_masking_functions, + get_column_tags_api as _get_column_tags_api, + get_schema_info as _get_schema_info, + get_catalog_info as _get_catalog_info, + list_table_policies_in_schema as _list_table_policies_in_schema, + analyze_fgac_coverage as _analyze_fgac_coverage, check_policy_quota as _check_policy_quota, preview_policy_changes as _preview_policy_changes, create_fgac_policy as _create_fgac_policy, @@ -56,6 +61,11 @@ def manage_uc_fgac_policies( - get_table_policies: Get column masks and row filters on a table. Params: catalog, schema, table - get_masking_functions: List masking UDFs in a schema. Params: catalog, schema (or udf_catalog, udf_schema for UDFs in a different catalog/schema) + - get_column_tags: Get column-level tags for a table. Params: catalog, schema, table + - get_schema_info: Get schema metadata. Params: catalog, schema + - get_catalog_info: Get catalog metadata. Params: catalog + - list_table_policies_in_schema: List all tables in a schema with their policies. Params: catalog, schema + - analyze_coverage: Analyze FGAC policy coverage gaps. Params: catalog, schema (optional) - check_quota: Check policy quota on a securable. Params: securable_type, securable_fullname - preview: Preview policy changes without executing. Params: preview_action ("CREATE"/"UPDATE"/"DELETE"), policy_name, securable_type, securable_fullname, @@ -119,6 +129,31 @@ def manage_uc_fgac_policies( catalog=udf_catalog or catalog, schema=udf_schema or schema, ) + elif act == "get_column_tags": + return _get_column_tags_api( + catalog=catalog, + schema=schema, + table=table, + ) + elif act == "get_schema_info": + return _get_schema_info( + catalog=catalog, + schema=schema, + ) + elif act == "get_catalog_info": + return _get_catalog_info( + catalog=catalog, + ) + elif act == "list_table_policies_in_schema": + return _list_table_policies_in_schema( + catalog=catalog, + schema=schema, + ) + elif act == "analyze_coverage": + return _analyze_fgac_coverage( + catalog=catalog, + schema=schema, + ) elif act == "check_quota": return _check_policy_quota( securable_type=securable_type, @@ -174,5 +209,6 @@ def manage_uc_fgac_policies( raise ValueError( f"Invalid action: '{action}'. Valid actions: list, get, get_table_policies, " - f"get_masking_functions, check_quota, preview, create, update, delete" + f"get_masking_functions, get_column_tags, get_schema_info, get_catalog_info, " + f"list_table_policies_in_schema, analyze_coverage, check_quota, preview, create, update, delete" ) diff --git a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md index 8f3e0edf..867ff60e 100644 --- a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md +++ b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md @@ -210,7 +210,7 @@ get_masking_functions( #### `get_column_tags_api` -Get column-level tags via the Tags API. +Get column-level tags for a table via the Tags API (queries `system.information_schema.column_tags`). ```python get_column_tags_api( @@ -220,15 +220,70 @@ get_column_tags_api( ) ``` -#### `get_schema_info` / `get_catalog_info` +**Returns:** +```json +{ + "success": true, + "table": "my_catalog.my_schema.my_table", + "tags": [ + { + "catalog_name": "my_catalog", + "schema_name": "my_schema", + "table_name": "my_table", + "column_name": "ssn", + "tag_name": "pii_type", + "tag_value": "ssn" + } + ] +} +``` + +#### `get_schema_info` -Get schema or catalog metadata via Unity Catalog API. +Get schema metadata via Unity Catalog API. ```python get_schema_info(catalog: str, schema: str) +``` + +**Returns:** +```json +{ + "success": true, + "schema": { + "name": "my_schema", + "full_name": "my_catalog.my_schema", + "catalog_name": "my_catalog", + "owner": "admin_user", + "comment": "Production finance schema", + "created_at": 1700000000000, + "updated_at": 1700100000000 + } +} +``` + +#### `get_catalog_info` + +Get catalog metadata via Unity Catalog API. + +```python get_catalog_info(catalog: str) ``` +**Returns:** +```json +{ + "success": true, + "catalog": { + "name": "my_catalog", + "owner": "admin_user", + "comment": "Production catalog", + "created_at": 1700000000000, + "updated_at": 1700100000000 + } +} +``` + #### `list_table_policies_in_schema` List all tables in a schema with their column masks and row filters. @@ -240,6 +295,67 @@ list_table_policies_in_schema( ) ``` +**Returns:** +```json +{ + "success": true, + "catalog": "my_catalog", + "schema": "my_schema", + "table_count": 3, + "tables": [ + { + "table": "customers", + "column_masks": [ + {"column_name": "ssn", "column_type": "STRING", "mask_functions": ["my_catalog.my_schema.mask_ssn"]} + ], + "row_filters": [] + }, + { + "table": "orders", + "column_masks": [], + "row_filters": [] + } + ] +} +``` + +#### `analyze_fgac_coverage` + +Analyze FGAC policy coverage for a catalog or schema. Identifies tagged columns that lack policy coverage and suggests actions. + +```python +analyze_fgac_coverage( + catalog: str, + schema: str = None, # Optional; omit to analyze entire catalog +) +``` + +**Returns:** +```json +{ + "success": true, + "scope": "SCHEMA my_catalog.my_schema", + "summary": { + "tables_scanned": 10, + "tagged_columns": 5, + "existing_policies": 2, + "available_udfs": 3, + "covered_tags": ["pii_type:ssn"], + "uncovered_tags": ["pii_type:email"] + }, + "gaps": [ + { + "tag_name": "pii_type", + "tag_value": "email", + "columns": [{"table": "my_catalog.my_schema.customers", "column": "email"}], + "suggestion": "No policy covers this tag. Consider creating a COLUMN_MASK policy." + } + ], + "existing_policies": [{"name": "mask_pii_ssn", "policy_type": "COLUMN_MASK", "...": "..."}], + "available_udfs": [{"name": "mask_ssn", "full_name": "my_catalog.my_schema.mask_ssn", "...": "..."}] +} +``` + ### Preview Tool (Human-in-the-Loop Gate) #### `preview_policy_changes` @@ -391,6 +507,8 @@ Complete workflow using MCP tools: ``` Step 1: ANALYZE ───────────────────────────────── +→ analyze_fgac_coverage(catalog="prod", schema="finance") + # Or analyze individual components: → list_fgac_policies(securable_type="SCHEMA", securable_fullname="prod.finance") → get_column_tags_api(catalog="prod", schema="finance", table="customers") → get_masking_functions(catalog="prod", schema="finance") @@ -400,7 +518,7 @@ Step 1: ANALYZE Step 2: RECOMMEND ───────────────────────────────── -→ Agent generates policy recommendations based on discovered tags and UDFs +→ Agent generates policy recommendations based on coverage gaps and available UDFs Step 3: PREVIEW (returns approval_token) ───────────────────────────────── diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/__init__.py b/databricks-tools-core/databricks_tools_core/unity_catalog/__init__.py index b676954a..c2f4005e 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/__init__.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/__init__.py @@ -118,6 +118,11 @@ get_fgac_policy, get_table_policies, get_masking_functions, + get_column_tags_api, + get_schema_info, + get_catalog_info, + list_table_policies_in_schema, + analyze_fgac_coverage, check_policy_quota, preview_policy_changes, create_fgac_policy, @@ -244,6 +249,11 @@ "get_fgac_policy", "get_table_policies", "get_masking_functions", + "get_column_tags_api", + "get_schema_info", + "get_catalog_info", + "list_table_policies_in_schema", + "analyze_fgac_coverage", "check_policy_quota", "preview_policy_changes", "create_fgac_policy", diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py index 16331d82..9afbe93f 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py @@ -382,6 +382,301 @@ def get_masking_functions( } +# --------------------------------------------------------------------------- +# Analysis & Discovery +# --------------------------------------------------------------------------- + + +def get_column_tags_api( + catalog: str, + schema: str, + table: str, +) -> Dict[str, Any]: + """ + Get column-level tags for a table via the Tags API. + + Queries system.information_schema.column_tags to return governed and + metadata tags applied to columns on the specified table. + + Args: + catalog: Catalog name + schema: Schema name + table: Table name + + Returns: + Dict with table name and list of column tag entries + """ + _validate_identifier(catalog) + _validate_identifier(schema) + _validate_identifier(table) + + from .tags import query_column_tags + + tags = query_column_tags(catalog_filter=catalog, table_name=table) + # Filter to the specific schema (query_column_tags filters by catalog and table but not schema) + tags = [t for t in tags if t.get("schema_name") == schema] + + return { + "success": True, + "table": f"{catalog}.{schema}.{table}", + "tags": tags, + } + + +def get_schema_info( + catalog: str, + schema: str, +) -> Dict[str, Any]: + """ + Get schema metadata via the Unity Catalog API. + + Args: + catalog: Catalog name + schema: Schema name + + Returns: + Dict with serialized schema metadata + """ + _validate_identifier(catalog) + _validate_identifier(schema) + + from .schemas import get_schema + + schema_obj = get_schema(f"{catalog}.{schema}") + return { + "success": True, + "schema": { + "name": schema_obj.name, + "full_name": schema_obj.full_name, + "catalog_name": schema_obj.catalog_name, + "owner": schema_obj.owner, + "comment": schema_obj.comment, + "created_at": schema_obj.created_at, + "updated_at": schema_obj.updated_at, + }, + } + + +def get_catalog_info( + catalog: str, +) -> Dict[str, Any]: + """ + Get catalog metadata via the Unity Catalog API. + + Args: + catalog: Catalog name + + Returns: + Dict with serialized catalog metadata + """ + _validate_identifier(catalog) + + from .catalogs import get_catalog + + catalog_obj = get_catalog(catalog) + return { + "success": True, + "catalog": { + "name": catalog_obj.name, + "owner": catalog_obj.owner, + "comment": catalog_obj.comment, + "created_at": catalog_obj.created_at, + "updated_at": catalog_obj.updated_at, + }, + } + + +def list_table_policies_in_schema( + catalog: str, + schema: str, +) -> Dict[str, Any]: + """ + List all tables in a schema with their column masks and row filters. + + Enumerates tables in the schema and calls get_table_policies() on each. + + Args: + catalog: Catalog name + schema: Schema name + + Returns: + Dict with table count and per-table policy details + """ + _validate_identifier(catalog) + _validate_identifier(schema) + + from .tables import list_tables + + tables = list_tables(catalog_name=catalog, schema_name=schema) + table_results = [] + for t in tables: + try: + policies = get_table_policies(catalog=catalog, schema=schema, table=t.name) + table_results.append( + { + "table": t.name, + "column_masks": policies.get("column_masks", []), + "row_filters": policies.get("row_filters", []), + } + ) + except Exception as e: + logger.warning(f"Failed to get policies for table {t.name}: {e}") + table_results.append( + { + "table": t.name, + "column_masks": [], + "row_filters": [], + "error": str(e), + } + ) + + return { + "success": True, + "catalog": catalog, + "schema": schema, + "table_count": len(table_results), + "tables": table_results, + } + + +def analyze_fgac_coverage( + catalog: str, + schema: Optional[str] = None, +) -> Dict[str, Any]: + """ + Analyze FGAC policy coverage for a catalog or schema. + + Examines tagged columns, existing policies, and available masking UDFs + to identify gaps where tagged columns lack policy coverage. Useful for + the "analyze this catalog/schema and suggest FGAC policies" workflow. + + Args: + catalog: Catalog name + schema: Optional schema name. If omitted, analyzes all schemas in the catalog. + + Returns: + Dict with coverage summary, gaps, existing policies, and available UDFs + """ + _validate_identifier(catalog) + if schema: + _validate_identifier(schema) + + from .schemas import list_schemas + from .tables import list_tables + from .tags import query_column_tags + + # Determine schemas to scan + if schema: + schema_names = [schema] + scope = f"SCHEMA {catalog}.{schema}" + else: + schema_objs = list_schemas(catalog) + schema_names = [s.name for s in schema_objs if s.name != "information_schema"] + scope = f"CATALOG {catalog}" + + # 1. Enumerate tables across schemas + all_tables = [] + for s in schema_names: + try: + tables = list_tables(catalog_name=catalog, schema_name=s) + all_tables.extend(tables) + except Exception as e: + logger.warning(f"Failed to list tables in {catalog}.{s}: {e}") + + # 2. Query column tags + tagged_columns = query_column_tags(catalog_filter=catalog) + if schema: + tagged_columns = [t for t in tagged_columns if t.get("schema_name") == schema] + + # 3. List existing FGAC policies + securable_type = "SCHEMA" if schema else "CATALOG" + securable_fullname = f"{catalog}.{schema}" if schema else catalog + policies_result = list_fgac_policies( + securable_type=securable_type, + securable_fullname=securable_fullname, + include_inherited=True, + ) + existing_policies = policies_result.get("policies", []) + + # 4. List masking UDFs across scanned schemas + all_udfs = [] + for s in schema_names: + try: + udfs_result = get_masking_functions(catalog=catalog, schema=s) + all_udfs.extend(udfs_result.get("functions", [])) + except Exception as e: + logger.warning(f"Failed to list UDFs in {catalog}.{s}: {e}") + + # 5. Cross-reference: determine which tag/value pairs are covered by policies + covered_tags = set() + for p in existing_policies: + for mc in p.get("match_columns") or []: + condition = mc.get("condition", "") + # Parse hasTagValue('key', 'value') or hasTag('key') + if "hasTagValue" in condition: + parts = condition.replace("hasTagValue(", "").rstrip(")").replace("'", "").split(", ") + if len(parts) == 2: + covered_tags.add(f"{parts[0]}:{parts[1]}") + elif "hasTag" in condition: + tag = condition.replace("hasTag(", "").rstrip(")").replace("'", "") + covered_tags.add(tag) + + # Build tag -> columns mapping for uncovered tags + tag_columns: Dict[str, List[Dict[str, str]]] = {} + for tc in tagged_columns: + tag_key = f"{tc.get('tag_name')}:{tc.get('tag_value')}" if tc.get("tag_value") else tc.get("tag_name", "") + if tag_key not in covered_tags: + tag_columns.setdefault(tag_key, []).append( + { + "table": f"{tc.get('catalog_name')}.{tc.get('schema_name')}.{tc.get('table_name')}", + "column": tc.get("column_name", ""), + } + ) + + # Build unique tag keys for summary + all_tag_keys = set() + for tc in tagged_columns: + tag_key = f"{tc.get('tag_name')}:{tc.get('tag_value')}" if tc.get("tag_value") else tc.get("tag_name", "") + all_tag_keys.add(tag_key) + + uncovered_tags = all_tag_keys - covered_tags + + # Build gaps + gaps = [] + for tag_key in sorted(uncovered_tags): + if ":" in tag_key: + t_name, t_value = tag_key.split(":", 1) + else: + t_name, t_value = tag_key, None + + columns = tag_columns.get(tag_key, []) + suggestion = "No policy covers this tag. Consider creating a COLUMN_MASK policy." + gaps.append( + { + "tag_name": t_name, + "tag_value": t_value, + "columns": columns, + "suggestion": suggestion, + } + ) + + return { + "success": True, + "scope": scope, + "summary": { + "tables_scanned": len(all_tables), + "tagged_columns": len(tagged_columns), + "existing_policies": len(existing_policies), + "available_udfs": len(all_udfs), + "covered_tags": sorted(covered_tags), + "uncovered_tags": sorted(uncovered_tags), + }, + "gaps": gaps, + "existing_policies": existing_policies, + "available_udfs": all_udfs, + } + + # --------------------------------------------------------------------------- # Quota checking # --------------------------------------------------------------------------- diff --git a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py index 780f5850..5892fddf 100644 --- a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py +++ b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py @@ -33,6 +33,11 @@ get_fgac_policy, get_table_policies, get_masking_functions, + get_column_tags_api, + get_schema_info, + get_catalog_info, + list_table_policies_in_schema, + analyze_fgac_coverage, check_policy_quota, preview_policy_changes, create_fgac_policy, @@ -173,6 +178,148 @@ def test_get_masking_functions( logger.info(f"Found {result['function_count']} functions in schema") +# --------------------------------------------------------------------------- +# Analysis & Discovery tests +# --------------------------------------------------------------------------- + + +@pytest.mark.integration +class TestGetColumnTagsApi: + """Tests for get_column_tags_api.""" + + def test_get_column_tags_api( + self, + test_catalog: str, + uc_test_schema: str, + uc_test_table: str, + unique_name: str, + warehouse_id: str, + ): + """Should return column tags for a table.""" + parts = uc_test_table.split(".") + tag_key = f"uc_test_tag_{unique_name}" + + # Tag a column so there's something to find + set_tags( + object_type="column", + full_name=uc_test_table, + column_name="email", + tags={tag_key: "test_val"}, + warehouse_id=warehouse_id, + ) + + result = get_column_tags_api( + catalog=parts[0], + schema=parts[1], + table=parts[2], + ) + + assert result["success"] is True + assert result["table"] == uc_test_table + assert isinstance(result["tags"], list) + logger.info(f"Found {len(result['tags'])} column tags on {uc_test_table}") + + +@pytest.mark.integration +class TestGetSchemaInfo: + """Tests for get_schema_info.""" + + def test_get_schema_info(self, test_catalog: str, uc_test_schema: str): + """Should return schema metadata.""" + result = get_schema_info( + catalog=test_catalog, + schema=uc_test_schema, + ) + + assert result["success"] is True + assert result["schema"]["name"] == uc_test_schema + assert result["schema"]["catalog_name"] == test_catalog + assert result["schema"]["full_name"] == f"{test_catalog}.{uc_test_schema}" + assert "owner" in result["schema"] + logger.info(f"Schema info: {result['schema']['full_name']} owned by {result['schema']['owner']}") + + +@pytest.mark.integration +class TestGetCatalogInfo: + """Tests for get_catalog_info.""" + + def test_get_catalog_info(self, test_catalog: str): + """Should return catalog metadata.""" + result = get_catalog_info(catalog=test_catalog) + + assert result["success"] is True + assert result["catalog"]["name"] == test_catalog + assert "owner" in result["catalog"] + logger.info(f"Catalog info: {result['catalog']['name']} owned by {result['catalog']['owner']}") + + +@pytest.mark.integration +class TestListTablePoliciesInSchema: + """Tests for list_table_policies_in_schema.""" + + def test_list_table_policies_in_schema(self, test_catalog: str, uc_test_schema: str, uc_test_table: str): + """Should list tables with their policies.""" + result = list_table_policies_in_schema( + catalog=test_catalog, + schema=uc_test_schema, + ) + + assert result["success"] is True + assert result["catalog"] == test_catalog + assert result["schema"] == uc_test_schema + assert isinstance(result["tables"], list) + assert result["table_count"] > 0 + + # Each table should have column_masks and row_filters keys + for t in result["tables"]: + assert "table" in t + assert "column_masks" in t + assert "row_filters" in t + logger.info(f"Found {result['table_count']} tables in {test_catalog}.{uc_test_schema}") + + +@pytest.mark.integration +class TestAnalyzeFgacCoverage: + """Tests for analyze_fgac_coverage.""" + + def test_analyze_coverage_schema_scope(self, test_catalog: str, uc_test_schema: str, uc_test_table: str): + """Should return coverage analysis for a schema.""" + result = analyze_fgac_coverage( + catalog=test_catalog, + schema=uc_test_schema, + ) + + assert result["success"] is True + assert result["scope"] == f"SCHEMA {test_catalog}.{uc_test_schema}" + + summary = result["summary"] + assert isinstance(summary["tables_scanned"], int) + assert isinstance(summary["tagged_columns"], int) + assert isinstance(summary["existing_policies"], int) + assert isinstance(summary["available_udfs"], int) + assert isinstance(summary["covered_tags"], list) + assert isinstance(summary["uncovered_tags"], list) + assert isinstance(result["gaps"], list) + assert isinstance(result["existing_policies"], list) + assert isinstance(result["available_udfs"], list) + logger.info( + f"Coverage analysis: {summary['tables_scanned']} tables, " + f"{summary['tagged_columns']} tagged cols, " + f"{summary['existing_policies']} policies, " + f"{len(result['gaps'])} gaps" + ) + + def test_analyze_coverage_catalog_scope(self, test_catalog: str, uc_test_schema: str, uc_test_table: str): + """Should return coverage analysis for an entire catalog.""" + result = analyze_fgac_coverage(catalog=test_catalog) + + assert result["success"] is True + assert result["scope"] == f"CATALOG {test_catalog}" + assert isinstance(result["summary"]["tables_scanned"], int) + assert isinstance(result["gaps"], list) + logger.info(f"Catalog coverage: {result['summary']['tables_scanned']} tables scanned") + + # --------------------------------------------------------------------------- # Quota check tests # --------------------------------------------------------------------------- From 03f7120535a8964df664405ce51c5a003fc50f5d Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 12:43:58 -0600 Subject: [PATCH 28/34] Fix PR review comments: type annotations, token tests, docs URL - Add Optional[] to all None-defaulted params in MCP dispatcher - Add tests for expired tokens and cross-action replay attacks - Fix GCP-specific docs URL to AWS-specific --- .../tools/fgac_policies.py | 36 ++++++------ .../7-fgac-overview.md | 2 +- .../unity_catalog/test_fgac_policies.py | 56 +++++++++++++++++++ 3 files changed, 75 insertions(+), 19 deletions(-) diff --git a/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py b/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py index 13638f1e..6ef0e24a 100644 --- a/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py +++ b/databricks-mcp-server/databricks_mcp_server/tools/fgac_policies.py @@ -5,7 +5,7 @@ Dispatches to core functions in databricks-tools-core based on the action parameter. """ -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from databricks_tools_core.unity_catalog import ( list_fgac_policies as _list_fgac_policies, @@ -30,24 +30,24 @@ @mcp.tool def manage_uc_fgac_policies( action: str, - securable_type: str = None, - securable_fullname: str = None, - policy_name: str = None, - policy_type: str = None, - to_principals: List[str] = None, - except_principals: List[str] = None, - function_name: str = None, - tag_name: str = None, - tag_value: str = None, - comment: str = None, + securable_type: Optional[str] = None, + securable_fullname: Optional[str] = None, + policy_name: Optional[str] = None, + policy_type: Optional[str] = None, + to_principals: Optional[List[str]] = None, + except_principals: Optional[List[str]] = None, + function_name: Optional[str] = None, + tag_name: Optional[str] = None, + tag_value: Optional[str] = None, + comment: Optional[str] = None, include_inherited: bool = True, - catalog: str = None, - schema: str = None, - table: str = None, - udf_catalog: str = None, - udf_schema: str = None, - preview_action: str = None, - approval_token: str = None, + catalog: Optional[str] = None, + schema: Optional[str] = None, + table: Optional[str] = None, + udf_catalog: Optional[str] = None, + udf_schema: Optional[str] = None, + preview_action: Optional[str] = None, + approval_token: Optional[str] = None, ) -> Dict[str, Any]: """ Manage FGAC (Fine-Grained Access Control) policies on Unity Catalog securables. diff --git a/databricks-skills/databricks-unity-catalog/7-fgac-overview.md b/databricks-skills/databricks-unity-catalog/7-fgac-overview.md index b692eea0..1b07ddbf 100644 --- a/databricks-skills/databricks-unity-catalog/7-fgac-overview.md +++ b/databricks-skills/databricks-unity-catalog/7-fgac-overview.md @@ -283,7 +283,7 @@ Every FGAC policy **MUST** include `EXCEPT \`gov_admin\`` to protect administrat | Per Schema | 10 | | Per Table | 5 | -https://docs.databricks.com/gcp/en/data-governance/unity-catalog/abac/policies#policy-quotas +https://docs.databricks.com/aws/en/data-governance/unity-catalog/abac/policies#policy-quotas --- ## Human-in-the-Loop Governance Workflow diff --git a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py index 5892fddf..026e123e 100644 --- a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py +++ b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py @@ -785,6 +785,62 @@ def test_token_with_mismatched_params_raises(self): approval_token=token, ) + def test_expired_token_raises(self): + """Token past TTL should be rejected.""" + import databricks_tools_core.unity_catalog.fgac_policies as fgac_mod + + preview = preview_policy_changes( + action="CREATE", + policy_name="test_expire", + securable_type="SCHEMA", + securable_fullname="cat.sch", + policy_type="COLUMN_MASK", + to_principals=["analysts"], + function_name="cat.sch.mask", + tag_name="pii", + ) + token = preview["approval_token"] + + # Temporarily set TTL to 0 so the token is already expired + original_ttl = fgac_mod._TOKEN_TTL_SECONDS + try: + fgac_mod._TOKEN_TTL_SECONDS = 0 + with pytest.raises(ValueError, match="Invalid or expired approval token"): + create_fgac_policy( + policy_name="test_expire", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="cat.sch", + function_name="cat.sch.mask", + to_principals=["analysts"], + tag_name="pii", + approval_token=token, + ) + finally: + fgac_mod._TOKEN_TTL_SECONDS = original_ttl + + def test_cross_action_replay_raises(self): + """DELETE preview token should not work for CREATE operation.""" + delete_preview = preview_policy_changes( + action="DELETE", + policy_name="test_replay", + securable_type="SCHEMA", + securable_fullname="cat.sch", + ) + token = delete_preview["approval_token"] + + with pytest.raises(ValueError, match="Invalid or expired approval token"): + create_fgac_policy( + policy_name="test_replay", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="cat.sch", + function_name="cat.sch.mask", + to_principals=["analysts"], + tag_name="pii", + approval_token=token, + ) + # --------------------------------------------------------------------------- # Admin group check tests From ad0c34f8f9686fbf00130f0cfd89aa2a228ee76a Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 12:46:41 -0600 Subject: [PATCH 29/34] Add ACL test coverage for ground truth and routing Add 4 ground truth test cases for UC ACL operations: read-only access, data engineer access, revoke/show grants, and SDK grant/revoke patterns. Add 3 routing test cases to route ACL prompts to the databricks-unity-catalog skill. --- .test/skills/_routing/ground_truth.yaml | 34 +++ .../ground_truth.yaml | 269 ++++++++++++++++++ 2 files changed, 303 insertions(+) diff --git a/.test/skills/_routing/ground_truth.yaml b/.test/skills/_routing/ground_truth.yaml index f7fba36c..793626ad 100644 --- a/.test/skills/_routing/ground_truth.yaml +++ b/.test/skills/_routing/ground_truth.yaml @@ -147,6 +147,40 @@ test_cases: difficulty: "medium" reasoning: "Mentions 'CREATE POLICY' and 'hasTagValue' - UC FGAC governance" + # Single-skill routing - UC ACLs + - id: "routing_acl_001" + inputs: + prompt: "Grant SELECT access on a table to the data_readers group" + expectations: + expected_skills: ["databricks-unity-catalog"] + is_multi_skill: false + metadata: + category: "single_skill" + difficulty: "easy" + reasoning: "Mentions 'Grant SELECT' - UC ACL operation" + + - id: "routing_acl_002" + inputs: + prompt: "How do I revoke write permissions from a group on a schema?" + expectations: + expected_skills: ["databricks-unity-catalog"] + is_multi_skill: false + metadata: + category: "single_skill" + difficulty: "easy" + reasoning: "Mentions 'revoke' and 'permissions' on 'schema' - UC ACL operation" + + - id: "routing_acl_003" + inputs: + prompt: "Set up read-only access for the analytics team on the gold schema" + expectations: + expected_skills: ["databricks-unity-catalog"] + is_multi_skill: false + metadata: + category: "single_skill" + difficulty: "medium" + reasoning: "Access setup on schema with role-based pattern - UC ACL operation" + # Multi-skill routing - id: "routing_multi_001" inputs: diff --git a/.test/skills/databricks-unity-catalog/ground_truth.yaml b/.test/skills/databricks-unity-catalog/ground_truth.yaml index cebf609f..d82f91be 100644 --- a/.test/skills/databricks-unity-catalog/ground_truth.yaml +++ b/.test/skills/databricks-unity-catalog/ground_truth.yaml @@ -1004,3 +1004,272 @@ test_cases: difficulty: "medium" source: "manual" tags: ["mcp", "discovery", "column-tags", "schema-info", "table-policies"] + + # ============================================================================ + # UC ACLs — Read-Only Access Pattern + # ============================================================================ + - id: "uc_acl_read_only_001" + inputs: + prompt: "Grant the data_readers group read-only access to the analytics.gold schema" + outputs: + response: | + Here's how to grant read-only access to a schema: + + ```sql + -- Step 1: Grant catalog access (required to see anything inside) + GRANT USE CATALOG ON CATALOG analytics TO `data_readers`; + + -- Step 2: Grant schema access + GRANT USE SCHEMA ON SCHEMA analytics.gold TO `data_readers`; + + -- Step 3: Grant SELECT on all tables in the schema + GRANT SELECT ON SCHEMA analytics.gold TO `data_readers`; + ``` + + Key points: + - `USE CATALOG` is required before any schema-level access + - `USE SCHEMA` is required before any table-level access + - `SELECT` at schema level applies to all current and future tables in the schema + - Grant to **groups**, not individual users — easier to manage and audit + + To verify the grants: + ```sql + SHOW GRANTS ON SCHEMA analytics.gold; + ``` + execution_success: null + expectations: + expected_facts: + - "USE CATALOG" + - "USE SCHEMA" + - "SELECT" + - "GRANT" + expected_patterns: + - pattern: "GRANT USE CATALOG" + min_count: 1 + description: "catalog access grant" + - pattern: "GRANT USE SCHEMA" + min_count: 1 + description: "schema access grant" + - pattern: "GRANT SELECT" + min_count: 1 + description: "read access grant" + - pattern: "data_readers" + min_count: 2 + description: "principal group reference" + guidelines: + - "Must include USE CATALOG, USE SCHEMA, and SELECT grants" + - "Must use group name, not individual user" + - "Should explain privilege inheritance" + - "Should show how to verify grants" + metadata: + category: "happy_path" + difficulty: "easy" + source: "manual" + tags: ["sql", "acl", "grant", "read-only", "schema"] + + # ============================================================================ + # UC ACLs — Data Engineer Access Pattern + # ============================================================================ + - id: "uc_acl_data_engineer_001" + inputs: + prompt: "Set up data engineer access to the analytics.silver schema with read, write, and create table privileges" + outputs: + response: | + Here's how to set up data engineer access with read, write, and create table privileges: + + ```sql + -- Catalog access + GRANT USE CATALOG ON CATALOG analytics TO `data_engineers`; + + -- Schema access + GRANT USE SCHEMA ON SCHEMA analytics.silver TO `data_engineers`; + + -- Read access + GRANT SELECT ON SCHEMA analytics.silver TO `data_engineers`; + + -- Write access (insert, update, delete) + GRANT MODIFY ON SCHEMA analytics.silver TO `data_engineers`; + + -- Create new tables + GRANT CREATE TABLE ON SCHEMA analytics.silver TO `data_engineers`; + ``` + + Key points: + - `MODIFY` allows INSERT, UPDATE, DELETE operations on tables + - `CREATE TABLE` allows creating new tables and views in the schema + - Granting at schema level covers all current and future tables + - For volume access, add `GRANT CREATE VOLUME` and `GRANT WRITE VOLUME` separately + + To verify: + ```sql + SHOW GRANTS ON SCHEMA analytics.silver; + ``` + execution_success: null + expectations: + expected_facts: + - "USE CATALOG" + - "USE SCHEMA" + - "SELECT" + - "MODIFY" + - "CREATE TABLE" + expected_patterns: + - pattern: "GRANT USE CATALOG" + min_count: 1 + description: "catalog access" + - pattern: "GRANT MODIFY" + min_count: 1 + description: "write access" + - pattern: "GRANT CREATE TABLE" + min_count: 1 + description: "table creation privilege" + guidelines: + - "Must include USE CATALOG, USE SCHEMA, SELECT, MODIFY, and CREATE TABLE" + - "Must explain what MODIFY allows (insert, update, delete)" + - "Should mention schema-level grants cover all tables" + metadata: + category: "happy_path" + difficulty: "easy" + source: "manual" + tags: ["sql", "acl", "grant", "data-engineer", "schema"] + + # ============================================================================ + # UC ACLs — Revoke and Show Grants + # ============================================================================ + - id: "uc_acl_revoke_001" + inputs: + prompt: "Revoke write access from the contractors group on analytics.silver and show remaining grants" + outputs: + response: | + Here's how to revoke write access and verify the remaining grants: + + ```sql + -- Revoke write access + REVOKE MODIFY ON SCHEMA analytics.silver FROM `contractors`; + + -- Optionally also revoke CREATE TABLE if they shouldn't create new tables + REVOKE CREATE TABLE ON SCHEMA analytics.silver FROM `contractors`; + + -- Verify remaining grants + SHOW GRANTS ON SCHEMA analytics.silver; + + -- Check grants for specific principal + SHOW GRANTS `contractors` ON SCHEMA analytics.silver; + ``` + + Key points: + - `REVOKE` removes only the specified privilege — other grants remain + - `SHOW GRANTS` lists all grants on the securable + - `SHOW GRANTS ` filters to a specific user or group + - To fully remove access, revoke `USE SCHEMA` and `USE CATALOG` as well + execution_success: null + expectations: + expected_facts: + - "REVOKE" + - "MODIFY" + - "SHOW GRANTS" + expected_patterns: + - pattern: "REVOKE MODIFY" + min_count: 1 + description: "revoke write access" + - pattern: "SHOW GRANTS" + min_count: 1 + description: "verify grants" + - pattern: "FROM `contractors`" + min_count: 1 + description: "revoke from correct principal" + guidelines: + - "Must use REVOKE with FROM syntax" + - "Must show how to verify remaining grants with SHOW GRANTS" + - "Should explain that REVOKE only removes the specified privilege" + metadata: + category: "happy_path" + difficulty: "easy" + source: "manual" + tags: ["sql", "acl", "revoke", "show-grants"] + + # ============================================================================ + # UC ACLs — SDK Grant/Revoke + # ============================================================================ + - id: "uc_acl_sdk_001" + inputs: + prompt: "Grant SELECT on a table to a group using the Python SDK" + outputs: + response: | + Here's how to grant SELECT on a table using the Databricks Python SDK: + + ```python + from databricks.sdk import WorkspaceClient + + w = WorkspaceClient() + + # Grant SELECT to data_readers + w.grants.update( + securable_type="TABLE", + full_name="my_catalog.my_schema.my_table", + changes=[{ + "principal": "data_readers", + "add": ["SELECT"], + }] + ) + ``` + + To verify the grant: + + ```python + # Get current grants + grants = w.grants.get( + securable_type="TABLE", + full_name="my_catalog.my_schema.my_table" + ) + for assignment in grants.privilege_assignments: + print(f"{assignment.principal}: {assignment.privileges}") + ``` + + To revoke: + + ```python + w.grants.update( + securable_type="TABLE", + full_name="my_catalog.my_schema.my_table", + changes=[{ + "principal": "data_readers", + "remove": ["SELECT"], + }] + ) + ``` + + Key points: + - `w.grants.update()` handles both grant (via `add`) and revoke (via `remove`) + - `securable_type` can be `"CATALOG"`, `"SCHEMA"`, `"TABLE"`, `"VOLUME"`, `"FUNCTION"` + - `w.grants.get()` returns current grants; `w.grants.get_effective()` includes inherited + execution_success: null + expectations: + expected_facts: + - "w.grants.update" + - "securable_type" + - "principal" + - "add" + - "SELECT" + expected_patterns: + - pattern: "w\\.grants\\.update\\(" + min_count: 1 + description: "SDK grant update call" + - pattern: '"add".*\\[.*"SELECT".*\\]' + min_count: 1 + description: "add SELECT privilege" + - pattern: "securable_type.*TABLE" + min_count: 1 + description: "securable type specified" + - pattern: "w\\.grants\\.get\\(" + min_count: 1 + description: "verify grants" + guidelines: + - "Must use w.grants.update() with add list" + - "Must specify securable_type and full_name" + - "Should show how to verify grants with w.grants.get()" + - "Should show how to revoke with remove list" + metadata: + category: "happy_path" + difficulty: "medium" + source: "manual" + tags: ["python", "sdk", "acl", "grant", "table"] From a8ba605b987766464ad19c2957b51982f511d1d3 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 13:10:57 -0600 Subject: [PATCH 30/34] Address low-priority nits from PR review - Add distinct error messages for each token validation failure mode - Remove redundant _validate_identifier calls in create/update/delete - Document why get_table_policies uses raw REST API (SDK lacks effective_masks) - Move routing_multi_004 to single-skill section as routing_fgac_007 - Extract TAG_PROPAGATION_DELAY_SECONDS constant and module-level tag helpers - Remove unused cleanup_governed_tags fixture from conftest - Use PermissionsChange/Privilege/SecurableType in SDK grant examples - Add 8 ground truth test cases (tags, drop, governed tags, quotas, etc.) --- .test/skills/_routing/ground_truth.yaml | 22 +- .../ground_truth.yaml | 485 +++++++++++++++++- .../databricks-unity-catalog/10-uc-acls.md | 25 +- .../unity_catalog/fgac_policies.py | 29 +- .../integration/unity_catalog/conftest.py | 30 -- .../unity_catalog/test_fgac_policies.py | 78 +-- 6 files changed, 544 insertions(+), 125 deletions(-) diff --git a/.test/skills/_routing/ground_truth.yaml b/.test/skills/_routing/ground_truth.yaml index 793626ad..e6c7d7bc 100644 --- a/.test/skills/_routing/ground_truth.yaml +++ b/.test/skills/_routing/ground_truth.yaml @@ -147,6 +147,17 @@ test_cases: difficulty: "medium" reasoning: "Mentions 'CREATE POLICY' and 'hasTagValue' - UC FGAC governance" + - id: "routing_fgac_007" + inputs: + prompt: "Set up FGAC policies for PII masking and query the audit logs to verify who accessed the masked columns" + expectations: + expected_skills: ["databricks-unity-catalog"] + is_multi_skill: false + metadata: + category: "single_skill" + difficulty: "hard" + reasoning: "Both FGAC policies and audit logs are in databricks-unity-catalog" + # Single-skill routing - UC ACLs - id: "routing_acl_001" inputs: @@ -221,17 +232,6 @@ test_cases: difficulty: "hard" reasoning: "Combines 'medallion' (SDP) with 'Genie space' (agent-bricks)" - - id: "routing_multi_004" - inputs: - prompt: "Set up FGAC policies for PII masking and query the audit logs to verify who accessed the masked columns" - expectations: - expected_skills: ["databricks-unity-catalog"] - is_multi_skill: false - metadata: - category: "single_skill" - difficulty: "hard" - reasoning: "Both FGAC policies and audit logs are in databricks-unity-catalog" - # No skill match - id: "routing_no_match_001" inputs: diff --git a/.test/skills/databricks-unity-catalog/ground_truth.yaml b/.test/skills/databricks-unity-catalog/ground_truth.yaml index d82f91be..2060654a 100644 --- a/.test/skills/databricks-unity-catalog/ground_truth.yaml +++ b/.test/skills/databricks-unity-catalog/ground_truth.yaml @@ -1005,6 +1005,446 @@ test_cases: source: "manual" tags: ["mcp", "discovery", "column-tags", "schema-info", "table-policies"] + # ============================================================================ + # SQL Generation — Tag Application + # ============================================================================ + - id: "fgac_tag_application_sql_001" + inputs: + prompt: "Apply PII governed tags to columns on my customers table" + outputs: + response: | + Here's how to apply governed tags to columns: + + ```sql + -- Modern syntax (DBR 16.1+) + SET TAG ON COLUMN my_catalog.my_schema.customers.ssn 'pii_type' = 'ssn'; + SET TAG ON COLUMN my_catalog.my_schema.customers.email 'pii_type' = 'email'; + SET TAG ON COLUMN my_catalog.my_schema.customers.phone 'pii_type' = 'phone'; + + -- Legacy syntax (all versions) + ALTER TABLE my_catalog.my_schema.customers + ALTER COLUMN ssn SET TAGS ('pii_type' = 'ssn'); + ``` + + To verify tags were applied: + ```sql + SELECT tag_name, tag_value, column_name + FROM system.information_schema.column_tags + WHERE catalog_name = 'my_catalog' + AND schema_name = 'my_schema' + AND table_name = 'customers'; + ``` + + Important: Tags must be **governed tags** (created via the Databricks UI, not SQL). + execution_success: null + expectations: + expected_facts: + - "SET TAG" + - "pii_type" + - "governed tags" + expected_patterns: + - pattern: "SET TAG|SET TAGS" + min_count: 1 + description: "tag application syntax" + - pattern: "pii_type" + min_count: 2 + description: "PII tag key usage" + - pattern: "column_tags" + min_count: 1 + description: "information_schema query for verification" + guidelines: + - "Must use SET TAG ON COLUMN or ALTER TABLE ALTER COLUMN SET TAGS syntax" + - "Must mention governed tags cannot be created via SQL" + - "Should show how to verify tags via information_schema" + metadata: + category: "happy_path" + difficulty: "easy" + source: "manual" + tags: ["sql", "tags", "column-tags", "pii"] + + # ============================================================================ + # SQL Generation — Drop Policy + # ============================================================================ + - id: "fgac_drop_policy_sql_001" + inputs: + prompt: "Drop an FGAC policy from a schema" + outputs: + response: | + Here's how to drop an FGAC policy: + + ```sql + -- Drop at schema level + DROP POLICY mask_pii_schema ON SCHEMA my_catalog.my_schema; + + -- Drop at catalog level + DROP POLICY mask_pii_catalog ON CATALOG my_catalog; + + -- Drop at table level + DROP POLICY mask_pii_table ON TABLE my_catalog.my_schema.my_table; + ``` + + Important notes: + - `DROP POLICY` is **irreversible** — the policy is permanently removed + - There is no `ALTER POLICY` — to change a policy, drop and recreate it + - Use `preview_policy_changes(action="DELETE", ...)` for the human-in-the-loop workflow + execution_success: null + expectations: + expected_facts: + - "DROP POLICY" + - "irreversible" + expected_patterns: + - pattern: "DROP POLICY" + min_count: 1 + description: "drop policy syntax" + - pattern: "ON (SCHEMA|CATALOG|TABLE)" + min_count: 1 + description: "scope specifier" + guidelines: + - "Must use DROP POLICY ... ON syntax" + - "Must warn that the operation is irreversible" + - "Must mention ALTER POLICY does not exist" + metadata: + category: "happy_path" + difficulty: "easy" + source: "manual" + tags: ["sql", "drop-policy", "delete"] + + # ============================================================================ + # Error/Edge Cases — Governed Tags Cannot Be Created via SQL + # ============================================================================ + - id: "fgac_error_governed_tags_001" + inputs: + prompt: "How do I create a governed tag using SQL?" + outputs: + response: | + **Governed tags cannot be created via SQL.** They must be created through the Databricks UI. + + To create a governed tag: + 1. Navigate to **Catalog** in your workspace + 2. Select **Governed Tags** from the left panel + 3. Click **Create governed tag** + 4. Configure the tag key and allowed values + + Example configuration: + - **Tag Key**: `pii_type` + - **Allowed Values**: `ssn`, `email`, `phone`, `credit_card` + + Once the governed tag exists, you can **apply** it to columns via SQL: + ```sql + SET TAG ON COLUMN catalog.schema.table.column 'pii_type' = 'ssn'; + ``` + execution_success: null + expectations: + expected_facts: + - "cannot be created via SQL" + - "Databricks UI" + - "Governed Tags" + expected_patterns: + - pattern: "cannot.*SQL|not.*SQL|UI" + min_count: 1 + description: "explains SQL limitation" + - pattern: "SET TAG" + min_count: 1 + description: "shows SQL tag application as alternative" + guidelines: + - "Must clearly state governed tags cannot be created via SQL" + - "Must describe the UI workflow for creating governed tags" + - "Should show how to apply tags via SQL after creation" + metadata: + category: "error_handling" + difficulty: "easy" + source: "manual" + tags: ["error", "governed-tags", "ui-only"] + + # ============================================================================ + # SQL Generation — hasTag vs hasTagValue + # ============================================================================ + - id: "fgac_has_tag_vs_value_001" + inputs: + prompt: "What is the difference between hasTag and hasTagValue in FGAC policies?" + outputs: + response: | + `hasTag` and `hasTagValue` are used in FGAC policies to match columns based on governed tags: + + **`hasTagValue('tag_name', 'tag_value')`** — Matches columns with a specific tag AND value: + ```sql + MATCH COLUMNS hasTagValue('pii_type', 'ssn') AS masked_col + ``` + This only matches columns tagged with `pii_type = ssn`. + + **`hasTag('tag_name')`** — Matches columns with a specific tag, regardless of value: + ```sql + MATCH COLUMNS hasTag('pii_type') AS masked_col + ``` + This matches ALL columns tagged with `pii_type`, regardless of whether the value is `ssn`, `email`, `phone`, etc. + + Best practices: + - Use `hasTagValue` when different tag values need different masking (e.g., SSN mask vs email mask) + - Use `hasTag` when all values should be masked the same way (e.g., full redaction for any PII) + execution_success: null + expectations: + expected_facts: + - "hasTag" + - "hasTagValue" + - "regardless of value" + expected_patterns: + - pattern: "hasTagValue\\s*\\(" + min_count: 1 + description: "hasTagValue example" + - pattern: "hasTag\\s*\\(" + min_count: 1 + description: "hasTag example" + - pattern: "MATCH COLUMNS" + min_count: 2 + description: "both match patterns shown" + guidelines: + - "Must explain the difference between hasTag and hasTagValue" + - "Must show SQL examples for both" + - "Should explain when to use each" + metadata: + category: "happy_path" + difficulty: "easy" + source: "manual" + tags: ["sql", "has-tag", "tag-matching", "fgac"] + + # ============================================================================ + # MCP Tool — Delete Policy Workflow + # ============================================================================ + - id: "fgac_mcp_delete_workflow_001" + inputs: + prompt: "Delete an FGAC policy using the human-in-the-loop MCP workflow" + outputs: + response: | + Here's the human-in-the-loop workflow for deleting an FGAC policy: + + **Step 1: PREVIEW** — Get a delete preview and approval token + + ``` + result = preview_policy_changes( + action="DELETE", + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance" + ) + ``` + + This returns: + - `equivalent_sql`: `DROP POLICY mask_ssn_finance ON SCHEMA prod.finance;` + - `warnings`: ["This action is irreversible. The policy will be permanently removed."] + - `approval_token`: Token to authorize the deletion + + **Step 2: APPROVE** — Human reviews the preview and confirms + + **Step 3: EXECUTE** — Delete with the approval token + + ``` + delete_fgac_policy( + policy_name="mask_ssn_finance", + securable_type="SCHEMA", + securable_fullname="prod.finance", + approval_token=result["approval_token"] + ) + ``` + + The approval token ensures the deletion matches exactly what was previewed. + execution_success: null + expectations: + expected_facts: + - "preview_policy_changes" + - "DELETE" + - "delete_fgac_policy" + - "approval_token" + - "irreversible" + expected_patterns: + - pattern: 'action.*=.*"DELETE"' + min_count: 1 + description: "DELETE action in preview" + - pattern: "delete_fgac_policy\\(" + min_count: 1 + description: "delete execution step" + - pattern: "approval_token" + min_count: 2 + description: "token in both preview and execute" + guidelines: + - "Must use preview_policy_changes with action=DELETE first" + - "Must pass approval_token to delete_fgac_policy" + - "Must warn the action is irreversible" + metadata: + category: "happy_path" + difficulty: "medium" + source: "manual" + tags: ["mcp", "delete", "workflow", "human-in-the-loop"] + + # ============================================================================ + # SQL Generation — Catalog-Scope Policy + # ============================================================================ + - id: "fgac_catalog_scope_sql_001" + inputs: + prompt: "Create a column mask policy at catalog level to mask all PII across the entire catalog" + outputs: + response: | + Here's a catalog-level column mask policy: + + ```sql + CREATE OR REPLACE POLICY mask_pii_catalog + ON CATALOG my_catalog + COMMENT 'Mask PII columns catalog-wide' + COLUMN MASK my_catalog.governance.mask_full + TO `analysts`, `data_scientists` + EXCEPT `gov_admin` + FOR TABLES + MATCH COLUMNS hasTag('pii_type') AS masked_col + ON COLUMN masked_col; + ``` + + Key points: + - `ON CATALOG` applies the policy to ALL tables in ALL schemas within the catalog + - Uses `hasTag('pii_type')` (without value) to match any PII column regardless of type + - Policy quotas: max 10 policies per catalog + - `EXCEPT `gov_admin`` protects admin access + execution_success: null + expectations: + expected_facts: + - "ON CATALOG" + - "COLUMN MASK" + - "FOR TABLES" + - "EXCEPT `gov_admin`" + expected_patterns: + - pattern: "ON CATALOG" + min_count: 1 + description: "catalog scope" + - pattern: "COLUMN MASK" + min_count: 1 + description: "column mask type" + - pattern: "EXCEPT\\s+`gov_admin`" + min_count: 1 + description: "admin exclusion" + guidelines: + - "Must use ON CATALOG scope" + - "Must include EXCEPT `gov_admin`" + - "Must include FOR TABLES" + - "Should mention catalog-level policy quotas" + metadata: + category: "happy_path" + difficulty: "medium" + source: "manual" + tags: ["sql", "column-mask", "catalog-scope"] + + # ============================================================================ + # Error/Edge Cases — Policy Quota + # ============================================================================ + - id: "fgac_error_quota_001" + inputs: + prompt: "I'm getting POLICY_QUOTA_EXCEEDED when creating a policy on my table. How do I fix this?" + outputs: + response: | + The `POLICY_QUOTA_EXCEEDED` error means you've reached the maximum number of policies on that securable. + + **Policy quotas:** + | Scope | Max Policies | + |-------|-------------| + | Per Catalog | 10 | + | Per Schema | 10 | + | Per Table | 5 | + + **How to fix:** + + 1. **Check current quota** using MCP: + ``` + check_policy_quota( + securable_type="TABLE", + securable_fullname="my_catalog.my_schema.my_table" + ) + ``` + + 2. **Consolidate policies** — Combine multiple policies that use the same UDF into one with broader tag matching (`hasTag` instead of multiple `hasTagValue` policies) + + 3. **Move to broader scope** — Instead of 5 table-level policies, create 1 schema-level policy + + 4. **Remove unused policies**: + ```sql + DROP POLICY old_policy_name ON TABLE my_catalog.my_schema.my_table; + ``` + execution_success: null + expectations: + expected_facts: + - "POLICY_QUOTA_EXCEEDED" + - "10" + - "5" + - "check_policy_quota" + expected_patterns: + - pattern: "10" + min_count: 1 + description: "catalog/schema quota" + - pattern: "5" + min_count: 1 + description: "table quota" + - pattern: "check_policy_quota|consolidat|broader scope" + min_count: 1 + description: "solution recommendation" + guidelines: + - "Must list quota limits for all three scopes" + - "Must suggest consolidation or broader scope as solutions" + - "Should show check_policy_quota MCP tool" + metadata: + category: "error_handling" + difficulty: "medium" + source: "manual" + tags: ["error", "quota", "policy-limits"] + + # ============================================================================ + # UC ACLs — Ownership Transfer + # ============================================================================ + - id: "uc_acl_ownership_001" + inputs: + prompt: "Transfer ownership of a schema to a new group" + outputs: + response: | + Here's how to transfer ownership of a schema: + + ```sql + ALTER SCHEMA my_catalog.my_schema OWNER TO `platform_team`; + ``` + + You can also transfer ownership of other securables: + ```sql + -- Catalog ownership + ALTER CATALOG my_catalog OWNER TO `platform_team`; + + -- Table ownership + ALTER TABLE my_catalog.my_schema.my_table OWNER TO `platform_team`; + + -- Volume ownership + ALTER VOLUME my_catalog.my_schema.my_volume OWNER TO `platform_team`; + ``` + + Key points: + - Only the current owner or a metastore admin can transfer ownership + - The owner has all privileges on the object + - The owner can grant/revoke privileges to others + - There is exactly one owner per securable + execution_success: null + expectations: + expected_facts: + - "ALTER" + - "OWNER TO" + expected_patterns: + - pattern: "ALTER SCHEMA.*OWNER TO" + min_count: 1 + description: "schema ownership transfer" + - pattern: "OWNER TO" + min_count: 1 + description: "ownership transfer syntax" + guidelines: + - "Must use ALTER ... OWNER TO syntax" + - "Must explain who can transfer ownership" + - "Should show examples for multiple securable types" + metadata: + category: "happy_path" + difficulty: "easy" + source: "manual" + tags: ["sql", "acl", "ownership", "transfer"] + # ============================================================================ # UC ACLs — Read-Only Access Pattern # ============================================================================ @@ -1199,17 +1639,18 @@ test_cases: ```python from databricks.sdk import WorkspaceClient + from databricks.sdk.service.catalog import Privilege, PermissionsChange, SecurableType w = WorkspaceClient() # Grant SELECT to data_readers w.grants.update( - securable_type="TABLE", + securable_type=SecurableType.TABLE, full_name="my_catalog.my_schema.my_table", - changes=[{ - "principal": "data_readers", - "add": ["SELECT"], - }] + changes=[PermissionsChange( + principal="data_readers", + add=[Privilege.SELECT], + )] ) ``` @@ -1218,7 +1659,7 @@ test_cases: ```python # Get current grants grants = w.grants.get( - securable_type="TABLE", + securable_type=SecurableType.TABLE, full_name="my_catalog.my_schema.my_table" ) for assignment in grants.privilege_assignments: @@ -1229,43 +1670,45 @@ test_cases: ```python w.grants.update( - securable_type="TABLE", + securable_type=SecurableType.TABLE, full_name="my_catalog.my_schema.my_table", - changes=[{ - "principal": "data_readers", - "remove": ["SELECT"], - }] + changes=[PermissionsChange( + principal="data_readers", + remove=[Privilege.SELECT], + )] ) ``` Key points: - `w.grants.update()` handles both grant (via `add`) and revoke (via `remove`) - - `securable_type` can be `"CATALOG"`, `"SCHEMA"`, `"TABLE"`, `"VOLUME"`, `"FUNCTION"` + - Use `SecurableType` enum: `CATALOG`, `SCHEMA`, `TABLE`, `VOLUME`, `FUNCTION` + - Use `Privilege` enum: `SELECT`, `MODIFY`, `CREATE_TABLE`, etc. + - Use `PermissionsChange` objects instead of raw dicts - `w.grants.get()` returns current grants; `w.grants.get_effective()` includes inherited execution_success: null expectations: expected_facts: - "w.grants.update" - - "securable_type" - - "principal" - - "add" + - "PermissionsChange" + - "SecurableType" + - "Privilege" - "SELECT" expected_patterns: - pattern: "w\\.grants\\.update\\(" min_count: 1 description: "SDK grant update call" - - pattern: '"add".*\\[.*"SELECT".*\\]' + - pattern: "PermissionsChange\\(" min_count: 1 - description: "add SELECT privilege" - - pattern: "securable_type.*TABLE" + description: "typed permissions change object" + - pattern: "Privilege\\.SELECT" min_count: 1 - description: "securable type specified" + description: "typed privilege enum" - pattern: "w\\.grants\\.get\\(" min_count: 1 description: "verify grants" guidelines: - - "Must use w.grants.update() with add list" - - "Must specify securable_type and full_name" + - "Must use w.grants.update() with PermissionsChange objects" + - "Must use SecurableType and Privilege enums, not raw strings/dicts" - "Should show how to verify grants with w.grants.get()" - "Should show how to revoke with remove list" metadata: diff --git a/databricks-skills/databricks-unity-catalog/10-uc-acls.md b/databricks-skills/databricks-unity-catalog/10-uc-acls.md index 038ceb85..f6560198 100644 --- a/databricks-skills/databricks-unity-catalog/10-uc-acls.md +++ b/databricks-skills/databricks-unity-catalog/10-uc-acls.md @@ -128,32 +128,33 @@ ALTER VOLUME my_catalog.my_schema.my_volume OWNER TO `new_owner`; ```python from databricks.sdk import WorkspaceClient +from databricks.sdk.service.catalog import Privilege, PermissionsChange, SecurableType w = WorkspaceClient() # Grant privileges w.grants.update( - securable_type="TABLE", + securable_type=SecurableType.TABLE, full_name="my_catalog.my_schema.my_table", - changes=[{ - "principal": "data_readers", - "add": ["SELECT"], - }] + changes=[PermissionsChange( + principal="data_readers", + add=[Privilege.SELECT], + )] ) # Revoke privileges w.grants.update( - securable_type="TABLE", + securable_type=SecurableType.TABLE, full_name="my_catalog.my_schema.my_table", - changes=[{ - "principal": "data_readers", - "remove": ["SELECT"], - }] + changes=[PermissionsChange( + principal="data_readers", + remove=[Privilege.SELECT], + )] ) # Get current grants grants = w.grants.get( - securable_type="TABLE", + securable_type=SecurableType.TABLE, full_name="my_catalog.my_schema.my_table" ) for assignment in grants.privilege_assignments: @@ -161,7 +162,7 @@ for assignment in grants.privilege_assignments: # Get effective grants (includes inherited) effective = w.grants.get_effective( - securable_type="TABLE", + securable_type=SecurableType.TABLE, full_name="my_catalog.my_schema.my_table", principal="data_readers" ) diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py index 9afbe93f..7e2f84db 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/fgac_policies.py @@ -69,25 +69,25 @@ def _validate_approval_token(approval_token: str, current_params: dict) -> None: try: signature, b64_payload = approval_token.split(":", 1) except (ValueError, AttributeError): - raise ValueError("Invalid or expired approval token") + raise ValueError("Malformed approval token: expected 'signature:payload' format") try: payload = base64.b64decode(b64_payload).decode() except Exception: - raise ValueError("Invalid or expired approval token") + raise ValueError("Malformed approval token: payload is not valid base64") expected_sig = hmac.new(_APPROVAL_SECRET.encode(), payload.encode(), hashlib.sha256).hexdigest() if not hmac.compare_digest(signature, expected_sig): - raise ValueError("Invalid or expired approval token") + raise ValueError("Invalid approval token: signature verification failed") try: token_data = json.loads(payload) except json.JSONDecodeError: - raise ValueError("Invalid or expired approval token") + raise ValueError("Malformed approval token: payload is not valid JSON") ts = token_data.pop("timestamp", 0) if abs(time.time() - ts) > _TOKEN_TTL_SECONDS: - raise ValueError("Invalid or expired approval token") + raise ValueError("Expired approval token: please run preview again to get a new token") # Map preview action to mutation action action_map = {"CREATE": "create", "UPDATE": "update", "DELETE": "delete"} @@ -95,12 +95,15 @@ def _validate_approval_token(approval_token: str, current_params: dict) -> None: current_action = params.pop("action", None) if token_action and current_action: if action_map.get(token_action) != current_action: - raise ValueError("Invalid or expired approval token") + raise ValueError( + f"Approval token action mismatch: token is for '{token_action}'" + f" but current action is '{current_action}'" + ) # Compare remaining params clean_current = {k: v for k, v in params.items() if v is not None} if token_data != clean_current: - raise ValueError("Invalid or expired approval token") + raise ValueError("Approval token parameter mismatch: params differ from what was previewed") def _check_admin_group() -> dict: @@ -278,8 +281,10 @@ def get_table_policies( """ Get column masks and row filters applied to a specific table. - Uses the Unity Catalog REST API directly to retrieve effective - column masks and row filters, including those derived from FGAC policies. + Uses the Unity Catalog REST API directly because the Python SDK's + TableInfo does not expose ``effective_masks`` (FGAC-derived masks). + The ``/api/2.1/unity-catalog/tables/`` endpoint returns both direct + column masks and effective masks from FGAC policies. Args: catalog: Catalog name @@ -948,8 +953,8 @@ def create_fgac_policy( """ ptype = _validate_policy_type(policy_type) stype = _validate_securable_type(securable_type) - _validate_identifier(securable_fullname) - _validate_identifier(function_name) + # Identifier validation is handled by preview_policy_changes() — the token + # binding ensures these values match what was already validated at preview time. current_params = { "action": "create", "policy_name": policy_name, @@ -1054,7 +1059,6 @@ def update_fgac_policy( Dict with update status and applied changes """ stype = _validate_securable_type(securable_type) - _validate_identifier(securable_fullname) current_params = { "action": "update", "policy_name": policy_name, @@ -1147,7 +1151,6 @@ def delete_fgac_policy( Dict with deletion status """ stype = _validate_securable_type(securable_type) - _validate_identifier(securable_fullname) current_params = { "action": "delete", "policy_name": policy_name, diff --git a/databricks-tools-core/tests/integration/unity_catalog/conftest.py b/databricks-tools-core/tests/integration/unity_catalog/conftest.py index dbff949b..339bc616 100644 --- a/databricks-tools-core/tests/integration/unity_catalog/conftest.py +++ b/databricks-tools-core/tests/integration/unity_catalog/conftest.py @@ -266,33 +266,3 @@ def register(policy_tuple: tuple): ) except Exception as e: logger.warning(f"Failed to cleanup policy {name}: {e}") - - -@pytest.fixture(scope="function") -def cleanup_governed_tags(): - """ - Track and cleanup governed tags (tag policies) created during tests. - - Uses the Tag Policies API (w.tag_policies) to delete governed tags. - - Usage: - def test_create_tag(cleanup_governed_tags): - w.tag_policies.create_tag_policy(...) - cleanup_governed_tags("my_tag_key") - """ - tags_to_cleanup = [] - - def register(tag_key: str): - if tag_key not in tags_to_cleanup: - tags_to_cleanup.append(tag_key) - logger.info(f"Registered governed tag for cleanup: {tag_key}") - - yield register - - w = get_workspace_client() - for tag_key in tags_to_cleanup: - try: - logger.info(f"Cleaning up governed tag: {tag_key}") - w.tag_policies.delete_tag_policy(tag_key=tag_key) - except Exception as e: - logger.warning(f"Failed to cleanup governed tag {tag_key}: {e}") diff --git a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py index 026e123e..dd0879f0 100644 --- a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py +++ b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py @@ -50,6 +50,37 @@ UC_TEST_PREFIX = "uc_test" +# Governed tags need time to propagate to the FGAC policy system after creation. +TAG_PROPAGATION_DELAY_SECONDS = 30 + + +def _create_governed_tag(tag_key: str, allowed_values: list[str]) -> None: + """Create a governed tag via the Tag Policies API and wait for propagation.""" + from databricks.sdk.service.tags import TagPolicy, Value + + w = get_workspace_client() + w.tag_policies.create_tag_policy( + tag_policy=TagPolicy( + tag_key=tag_key, + description=f"Integration test tag ({tag_key})", + values=[Value(name=v) for v in allowed_values], + ) + ) + logger.info(f"Created governed tag: {tag_key} (values={allowed_values})") + + logger.info(f"Waiting {TAG_PROPAGATION_DELAY_SECONDS}s for governed tag propagation...") + time.sleep(TAG_PROPAGATION_DELAY_SECONDS) + + +def _delete_governed_tag(tag_key: str) -> None: + """Delete a governed tag via the Tag Policies API.""" + try: + w = get_workspace_client() + w.tag_policies.delete_tag_policy(tag_key=tag_key) + logger.info(f"Deleted governed tag: {tag_key}") + except Exception as e: + logger.warning(f"Failed to delete governed tag {tag_key}: {e}") + # --------------------------------------------------------------------------- # Discovery tests @@ -617,7 +648,7 @@ def test_create_without_token_raises(self): def test_create_with_invalid_token_raises_value_error(self): """create_fgac_policy with an invalid token should raise ValueError before admin check.""" - with pytest.raises(ValueError, match="Invalid or expired approval token"): + with pytest.raises(ValueError, match="Malformed approval token"): create_fgac_policy( policy_name="test_bad_token", policy_type="COLUMN_MASK", @@ -692,7 +723,7 @@ def test_full_preview_then_create_workflow( cleanup_policies((policy_name, "SCHEMA", full_schema)) - TestFgacPolicyCRUD._create_governed_tag(tag_key, [tag_value]) + _create_governed_tag(tag_key, [tag_value]) try: fn_name = f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_tok_fn_{unique_name}" @@ -757,7 +788,7 @@ def test_full_preview_then_create_workflow( ) finally: - TestFgacPolicyCRUD._delete_governed_tag(tag_key) + _delete_governed_tag(tag_key) def test_token_with_mismatched_params_raises(self): """Token from preview with name A should not work for create with name B.""" @@ -805,7 +836,7 @@ def test_expired_token_raises(self): original_ttl = fgac_mod._TOKEN_TTL_SECONDS try: fgac_mod._TOKEN_TTL_SECONDS = 0 - with pytest.raises(ValueError, match="Invalid or expired approval token"): + with pytest.raises(ValueError, match="Expired approval token"): create_fgac_policy( policy_name="test_expire", policy_type="COLUMN_MASK", @@ -829,7 +860,7 @@ def test_cross_action_replay_raises(self): ) token = delete_preview["approval_token"] - with pytest.raises(ValueError, match="Invalid or expired approval token"): + with pytest.raises(ValueError, match="action mismatch"): create_fgac_policy( policy_name="test_replay", policy_type="COLUMN_MASK", @@ -887,35 +918,6 @@ class TestFgacPolicyCRUD: then cleans it up afterwards. No manual UI setup is required. """ - @staticmethod - def _create_governed_tag(tag_key: str, allowed_values: list[str]) -> None: - """Create a governed tag via the Tag Policies API.""" - from databricks.sdk.service.tags import TagPolicy, Value - - w = get_workspace_client() - w.tag_policies.create_tag_policy( - tag_policy=TagPolicy( - tag_key=tag_key, - description=f"Integration test tag ({tag_key})", - values=[Value(name=v) for v in allowed_values], - ) - ) - logger.info(f"Created governed tag: {tag_key} (values={allowed_values})") - - # Wait for governed tag to propagate to the FGAC policy system - logger.info("Waiting 30s for governed tag propagation...") - time.sleep(30) - - @staticmethod - def _delete_governed_tag(tag_key: str) -> None: - """Delete a governed tag via the Tag Policies API.""" - try: - w = get_workspace_client() - w.tag_policies.delete_tag_policy(tag_key=tag_key) - logger.info(f"Deleted governed tag: {tag_key}") - except Exception as e: - logger.warning(f"Failed to delete governed tag {tag_key}: {e}") - def test_create_get_update_delete_column_mask_policy( self, test_catalog: str, @@ -938,7 +940,7 @@ def test_create_get_update_delete_column_mask_policy( cleanup_policies((policy_name, "SCHEMA", full_schema)) # --- Setup: governed tag, masking UDF, column tag --- - self._create_governed_tag(tag_key, [tag_value]) + _create_governed_tag(tag_key, [tag_value]) try: fn_name = f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_mask_fn_{unique_name}" @@ -1072,7 +1074,7 @@ def test_create_get_update_delete_column_mask_policy( logger.info("Policy deleted") finally: - self._delete_governed_tag(tag_key) + _delete_governed_tag(tag_key) def test_create_row_filter_policy( self, @@ -1095,7 +1097,7 @@ def test_create_row_filter_policy( cleanup_policies((policy_name, "SCHEMA", full_schema)) # --- Setup: governed tag, zero-arg UDF, column tag --- - self._create_governed_tag(tag_key, [tag_value]) + _create_governed_tag(tag_key, [tag_value]) try: fn_name = f"{test_catalog}.{uc_test_schema}.{UC_TEST_PREFIX}_rf_fn_{unique_name}" @@ -1176,4 +1178,4 @@ def test_create_row_filter_policy( logger.info("Row filter policy deleted") finally: - self._delete_governed_tag(tag_key) + _delete_governed_tag(tag_key) From a28987b7045582c38458f2f5b1ce640205cc4b02 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 14:04:43 -0600 Subject: [PATCH 31/34] Expand SQL keywords in test scorers and fix FGAC test reliability Add DDL/DCL keywords (ALTER, DROP, GRANT, REVOKE, etc.) to SQL validation in executor and universal scorer. Add UC governance routing triggers. Fix admin group test to reliably trigger PermissionError and accept SDK-prefixed policy_type values. --- .test/src/skill_test/grp/executor.py | 5 ++- .test/src/skill_test/scorers/routing.py | 21 ++++++++++++ .test/src/skill_test/scorers/universal.py | 3 +- .../unity_catalog/test_fgac_policies.py | 33 ++++++++++++------- 4 files changed, 48 insertions(+), 14 deletions(-) diff --git a/.test/src/skill_test/grp/executor.py b/.test/src/skill_test/grp/executor.py index c5f99927..c8843bd4 100644 --- a/.test/src/skill_test/grp/executor.py +++ b/.test/src/skill_test/grp/executor.py @@ -156,7 +156,10 @@ def verify_sql_structure(code: str) -> ExecutionResult: issues = [] # Check for valid SQL statements - statements = ["SELECT", "CREATE", "INSERT", "UPDATE", "DELETE", "WITH", "MERGE"] + statements = [ + "SELECT", "CREATE", "INSERT", "UPDATE", "DELETE", "WITH", "MERGE", + "ALTER", "DROP", "GRANT", "REVOKE", "SET", "SHOW", "DESCRIBE", "MATCH", + ] has_statement = any(stmt in code.upper() for stmt in statements) if not has_statement: issues.append("No recognizable SQL statement found") diff --git a/.test/src/skill_test/scorers/routing.py b/.test/src/skill_test/scorers/routing.py index 4b38e84b..b7b3ad7c 100644 --- a/.test/src/skill_test/scorers/routing.py +++ b/.test/src/skill_test/scorers/routing.py @@ -30,6 +30,27 @@ "synthetic-data-generation": ["synthetic data", "fake data", "generate data", "mock data", "faker"], "mlflow-evaluation": ["mlflow eval", "evaluate agent", "scorer", "genai.evaluate", "llm judge"], "agent-bricks": ["agent brick", "knowledge assistant", "genie", "multi-agent", "supervisor"], + "databricks-unity-catalog": [ + "fgac", + "column mask", + "row filter", + "governed tag", + "masking udf", + "create policy", + "drop policy", + "hastagvalue", + "hastag", + "pii classification", + "grant select", + "grant use", + "revoke", + "permissions", + "acl", + "access control", + "read-only access", + "ownership", + "owner to", + ], } diff --git a/.test/src/skill_test/scorers/universal.py b/.test/src/skill_test/scorers/universal.py index c44c76d0..827d4178 100644 --- a/.test/src/skill_test/scorers/universal.py +++ b/.test/src/skill_test/scorers/universal.py @@ -44,7 +44,8 @@ def sql_syntax(outputs: Dict[str, Any]) -> Feedback: errors = [] for i, block in enumerate(sql_blocks): - if not re.search(r"(SELECT|CREATE|INSERT|UPDATE|DELETE|WITH|MERGE)", block, re.I): + sql_kw = r"(SELECT|CREATE|INSERT|UPDATE|DELETE|WITH|MERGE|ALTER|DROP|GRANT|REVOKE|SET|SHOW|DESCRIBE|MATCH)" + if not re.search(sql_kw, block, re.I): errors.append(f"Block {i + 1}: No recognizable SQL statement") if block.count("(") != block.count(")"): errors.append(f"Block {i + 1}: Unbalanced parentheses") diff --git a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py index dd0879f0..bc6a506e 100644 --- a/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py +++ b/databricks-tools-core/tests/integration/unity_catalog/test_fgac_policies.py @@ -128,7 +128,8 @@ def test_list_policies_with_type_filter(self, test_catalog: str): assert result["success"] is True for p in result["policies"]: - assert p.get("policy_type") == "COLUMN_MASK" + # SDK returns POLICY_TYPE_COLUMN_MASK; accept both forms + assert p.get("policy_type") in ("COLUMN_MASK", "POLICY_TYPE_COLUMN_MASK") logger.info(f"Found {result['policy_count']} COLUMN_MASK policies") def test_list_policies_without_inherited(self, test_catalog: str): @@ -662,6 +663,8 @@ def test_create_with_invalid_token_raises_value_error(self): def test_create_without_admin_group_raises_permission_error(self): """create_fgac_policy should raise PermissionError if user is not in admin group.""" + import databricks_tools_core.unity_catalog.fgac_policies as fgac_mod + # Get a valid token via preview so we pass token validation preview = preview_policy_changes( action="CREATE", @@ -673,17 +676,23 @@ def test_create_without_admin_group_raises_permission_error(self): function_name="cat.sch.fn", tag_name="pii", ) - with pytest.raises(PermissionError, match="not a member of admin group"): - create_fgac_policy( - policy_name="test_admin_check", - policy_type="COLUMN_MASK", - securable_type="SCHEMA", - securable_fullname="cat.sch", - function_name="cat.sch.fn", - to_principals=["analysts"], - tag_name="pii", - approval_token=preview["approval_token"], - ) + # Temporarily set admin group to a non-existent group so the check fails + original = fgac_mod._ADMIN_GROUP + try: + fgac_mod._ADMIN_GROUP = "nonexistent_group_xyz_12345" + with pytest.raises(PermissionError, match="not a member of admin group"): + create_fgac_policy( + policy_name="test_admin_check", + policy_type="COLUMN_MASK", + securable_type="SCHEMA", + securable_fullname="cat.sch", + function_name="cat.sch.fn", + to_principals=["analysts"], + tag_name="pii", + approval_token=preview["approval_token"], + ) + finally: + fgac_mod._ADMIN_GROUP = original def test_preview_returns_approval_token(self): """preview_policy_changes should return an approval_token.""" From 0e55553f9483d0375b1cd2f9436f6765e64730c3 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 14:12:12 -0600 Subject: [PATCH 32/34] Remove unused imports and fix ruff formatting --- .test/src/skill_test/grp/executor.py | 21 +++++++++++++++---- .test/src/skill_test/runners/compare.py | 2 +- .test/src/skill_test/runners/evaluate.py | 3 +-- .test/src/skill_test/scorers/trace.py | 2 -- .../skill_test/trace/mlflow_integration.py | 2 +- .test/src/skill_test/trace/source.py | 1 - .../unity_catalog/volume_files.py | 1 - 7 files changed, 20 insertions(+), 12 deletions(-) diff --git a/.test/src/skill_test/grp/executor.py b/.test/src/skill_test/grp/executor.py index c8843bd4..28100892 100644 --- a/.test/src/skill_test/grp/executor.py +++ b/.test/src/skill_test/grp/executor.py @@ -3,8 +3,8 @@ import ast import re import time -from dataclasses import dataclass, field -from typing import List, Tuple, Optional, Dict, Any, Callable, Protocol +from dataclasses import dataclass +from typing import List, Tuple, Optional, Dict, Any, Protocol @dataclass @@ -157,8 +157,21 @@ def verify_sql_structure(code: str) -> ExecutionResult: # Check for valid SQL statements statements = [ - "SELECT", "CREATE", "INSERT", "UPDATE", "DELETE", "WITH", "MERGE", - "ALTER", "DROP", "GRANT", "REVOKE", "SET", "SHOW", "DESCRIBE", "MATCH", + "SELECT", + "CREATE", + "INSERT", + "UPDATE", + "DELETE", + "WITH", + "MERGE", + "ALTER", + "DROP", + "GRANT", + "REVOKE", + "SET", + "SHOW", + "DESCRIBE", + "MATCH", ] has_statement = any(stmt in code.upper() for stmt in statements) if not has_statement: diff --git a/.test/src/skill_test/runners/compare.py b/.test/src/skill_test/runners/compare.py index 460d03db..d2d95b56 100644 --- a/.test/src/skill_test/runners/compare.py +++ b/.test/src/skill_test/runners/compare.py @@ -3,7 +3,7 @@ import json from datetime import datetime from pathlib import Path -from typing import Dict, Any, Optional, List +from typing import Dict, Optional, List from dataclasses import dataclass, asdict diff --git a/.test/src/skill_test/runners/evaluate.py b/.test/src/skill_test/runners/evaluate.py index 1dff1009..dc2530f4 100644 --- a/.test/src/skill_test/runners/evaluate.py +++ b/.test/src/skill_test/runners/evaluate.py @@ -4,7 +4,7 @@ from typing import Optional, Dict, Any, List import yaml import mlflow -from mlflow.genai.scorers import Guidelines, Safety +from mlflow.genai.scorers import Safety from ..config import SkillTestConfig from ..dataset import get_dataset_source @@ -25,7 +25,6 @@ file_existence, tool_sequence, category_limits, - get_trace_scorers, ) diff --git a/.test/src/skill_test/scorers/trace.py b/.test/src/skill_test/scorers/trace.py index 2532a01d..8cd6bf13 100644 --- a/.test/src/skill_test/scorers/trace.py +++ b/.test/src/skill_test/scorers/trace.py @@ -12,8 +12,6 @@ from mlflow.entities import Feedback from mlflow.genai.scorers import scorer -from ..trace.models import TraceMetrics - @scorer def tool_count( diff --git a/.test/src/skill_test/trace/mlflow_integration.py b/.test/src/skill_test/trace/mlflow_integration.py index 62f8ffb1..179391f7 100644 --- a/.test/src/skill_test/trace/mlflow_integration.py +++ b/.test/src/skill_test/trace/mlflow_integration.py @@ -13,7 +13,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Union -from .models import TraceMetrics, ToolCall, FileOperation +from .models import TraceMetrics, ToolCall from .parser import parse_and_compute_metrics diff --git a/.test/src/skill_test/trace/source.py b/.test/src/skill_test/trace/source.py index 74918875..8b7abef9 100644 --- a/.test/src/skill_test/trace/source.py +++ b/.test/src/skill_test/trace/source.py @@ -5,7 +5,6 @@ 2. Local fallback (~/.claude/projects/{hash}/*.jsonl) """ -import os import subprocess from dataclasses import dataclass from pathlib import Path diff --git a/databricks-tools-core/databricks_tools_core/unity_catalog/volume_files.py b/databricks-tools-core/databricks_tools_core/unity_catalog/volume_files.py index 0c1fd84a..fca8cedc 100644 --- a/databricks-tools-core/databricks_tools_core/unity_catalog/volume_files.py +++ b/databricks-tools-core/databricks_tools_core/unity_catalog/volume_files.py @@ -12,7 +12,6 @@ from pathlib import Path from typing import List, Optional -from databricks.sdk.service.files import DirectoryEntry from ..auth import get_workspace_client From 404fe6e06f884f0d177029c4d0b6beebafc8401c Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Thu, 12 Feb 2026 14:30:58 -0600 Subject: [PATCH 33/34] Document check_policy_quota and add FGAC to READMEs Add check_policy_quota to skill docs. Add FGAC tools section to MCP server README with architecture diagram update. Update root, tools-core, and unity-catalog skill READMEs to reference FGAC governance. --- README.md | 2 +- databricks-mcp-server/README.md | 9 +++++++ .../9-fgac-sdk-and-tools.md | 25 +++++++++++++++++++ .../databricks-unity-catalog/README.md | 13 +++++++--- databricks-tools-core/README.md | 2 +- 5 files changed, 46 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4cfa745d..701fe862 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ AI-Driven Development (vibe coding) on Databricks just got a whole lot better. T - **Spark Declarative Pipelines** (streaming tables, CDC, SCD Type 2, Auto Loader) - **Databricks Jobs** (scheduled workflows, multi-task DAGs) - **AI/BI Dashboards** (visualizations, KPIs, analytics) -- **Unity Catalog** (tables, volumes, governance) +- **Unity Catalog** (tables, volumes, governance, FGAC column masks & row filters) - **Genie Spaces** (natural language data exploration) - **Knowledge Assistants** (RAG-based document Q&A) - **MLflow Experiments** (evaluation, scoring, traces) diff --git a/databricks-mcp-server/README.md b/databricks-mcp-server/README.md index fdaa460e..17e1f260 100644 --- a/databricks-mcp-server/README.md +++ b/databricks-mcp-server/README.md @@ -172,6 +172,14 @@ Claude now has both: | `publish_dashboard` | Publish a dashboard to make it accessible | | `unpublish_dashboard` | Unpublish a dashboard | +### Unity Catalog FGAC Governance + +| Tool | Description | +|------|-------------| +| `manage_uc_fgac_policies` | Unified tool for FGAC policy governance — dispatches to discovery, analysis, preview, and management actions via the `action` parameter | + +**Actions:** `list_fgac_policies`, `get_fgac_policy`, `get_table_policies`, `get_masking_functions`, `get_column_tags_api`, `get_schema_info`, `get_catalog_info`, `list_table_policies_in_schema`, `analyze_fgac_coverage`, `check_policy_quota`, `preview_policy_changes`, `create_fgac_policy`, `update_fgac_policy`, `delete_fgac_policy` + ### Model Serving | Tool | Description | @@ -204,6 +212,7 @@ Claude now has both: │ tools/pipelines.py ────────┤ │ │ tools/agent_bricks.py ─────┤ │ │ tools/aibi_dashboards.py ──┤ │ +│ tools/fgac_policies.py ────┤ │ │ tools/serving.py ──────────┘ │ └──────────────────────────────┬──────────────────────────────┘ │ Python imports diff --git a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md index 867ff60e..f82cf774 100644 --- a/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md +++ b/databricks-skills/databricks-unity-catalog/9-fgac-sdk-and-tools.md @@ -356,6 +356,31 @@ analyze_fgac_coverage( } ``` +#### `check_policy_quota` + +Check if the policy quota allows creating a new policy on a securable. + +```python +check_policy_quota( + securable_type: str, # "CATALOG", "SCHEMA", or "TABLE" + securable_fullname: str, # e.g., "my_catalog.my_schema" +) +``` + +**Returns:** +```json +{ + "success": true, + "securable_type": "SCHEMA", + "securable_fullname": "my_catalog.my_schema", + "current": 3, + "max": 10, + "can_create": true +} +``` + +**Quotas:** CATALOG=10, SCHEMA=10, TABLE=5. + ### Preview Tool (Human-in-the-Loop Gate) #### `preview_policy_changes` diff --git a/databricks-skills/databricks-unity-catalog/README.md b/databricks-skills/databricks-unity-catalog/README.md index a0ea1219..f790969a 100644 --- a/databricks-skills/databricks-unity-catalog/README.md +++ b/databricks-skills/databricks-unity-catalog/README.md @@ -1,10 +1,10 @@ # Databricks Unity Catalog -Unity Catalog system tables and volumes -- query audit logs, lineage, billing, and manage volume file operations. +Unity Catalog system tables, volumes, access controls (ACLs), and FGAC policy governance (column masks, row filters, governed tags). ## Overview -This skill provides guidance for working with Unity Catalog system tables and volumes. It activates when users query system tables (audit, lineage, billing, compute, jobs, query history), perform volume file operations (upload, download, list files), or need to understand governance and access controls. The skill covers the `system` catalog schemas, SQL grant patterns, and MCP tool integration for both system table queries and volume management. +This skill provides guidance for working with Unity Catalog system tables, volumes, access controls, and Fine-Grained Access Control (FGAC) policies. It activates when users query system tables (audit, lineage, billing, compute, jobs, query history), perform volume file operations (upload, download, list files), manage UC permissions (GRANT/REVOKE), or manage FGAC policies (column masks, row filters, governed tags, masking UDFs). ## What's Included @@ -12,7 +12,11 @@ This skill provides guidance for working with Unity Catalog system tables and vo databricks-unity-catalog/ ├── SKILL.md ├── 5-system-tables.md -└── 6-volumes.md +├── 6-volumes.md +├── 7-fgac-overview.md +├── 8-fgac-sql-generation.md +├── 9-fgac-sdk-and-tools.md +└── 10-uc-acls.md ``` ## Key Topics @@ -36,6 +40,9 @@ databricks-unity-catalog/ - Tracking table dependencies and column-level lineage - Reviewing job execution history and query performance - Setting up governance and access controls for system data +- Managing FGAC policies: column masks, row filters, governed tags +- Analyzing FGAC coverage gaps and policy quotas +- Granting and revoking UC permissions (ACLs) ## Related Skills diff --git a/databricks-tools-core/README.md b/databricks-tools-core/README.md index d7831456..e5bb9f60 100644 --- a/databricks-tools-core/README.md +++ b/databricks-tools-core/README.md @@ -12,7 +12,7 @@ The `databricks-tools-core` package provides reusable, opinionated functions for |--------|-------------| | **sql/** | SQL execution, warehouse management, and table statistics | | **jobs/** | Job management and run operations (serverless by default) | -| **unity_catalog/** | Unity Catalog operations (catalogs, schemas, tables) | +| **unity_catalog/** | Unity Catalog operations (catalogs, schemas, tables, FGAC policies) | | **compute/** | Compute and execution context operations | | **spark_declarative_pipelines/** | Spark Declarative Pipeline management | | **synthetic_data_generation/** | Test data generation utilities | From 8ab21168ba68b9d8990aae710e337eb0c82011a1 Mon Sep 17 00:00:00 2001 From: Sreeram Thoom Date: Fri, 27 Feb 2026 11:03:39 -0600 Subject: [PATCH 34/34] Fix ruff formatting with line-length=120 to match CI --- .test/src/skill_test/grp/executor.py | 8 ++------ .test/src/skill_test/scorers/routing.py | 4 +--- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/.test/src/skill_test/grp/executor.py b/.test/src/skill_test/grp/executor.py index 32ab99d1..f0a30fd5 100644 --- a/.test/src/skill_test/grp/executor.py +++ b/.test/src/skill_test/grp/executor.py @@ -91,9 +91,7 @@ def verify_python_syntax(code: str) -> Tuple[bool, Optional[str]]: return False, f"Line {e.lineno}: {e.msg}" -def execute_python_block( - code: str, timeout_seconds: int = 30, verify_imports: bool = True -) -> ExecutionResult: +def execute_python_block(code: str, timeout_seconds: int = 30, verify_imports: bool = True) -> ExecutionResult: """ Execute Python code block. @@ -573,7 +571,5 @@ def execute_code_blocks_on_databricks( passed_blocks=passed, details=details, context_id=current_context_id, - execution_mode="databricks" - if any(d.get("execution_mode") == "databricks" for d in details) - else "local", + execution_mode="databricks" if any(d.get("execution_mode") == "databricks" for d in details) else "local", ) diff --git a/.test/src/skill_test/scorers/routing.py b/.test/src/skill_test/scorers/routing.py index 9b5976bc..667b330e 100644 --- a/.test/src/skill_test/scorers/routing.py +++ b/.test/src/skill_test/scorers/routing.py @@ -123,9 +123,7 @@ def detect_skills_from_prompt(prompt: str) -> Set[str]: @scorer -def skill_routing_accuracy( - inputs: Dict[str, Any], expectations: Dict[str, Any] -) -> Feedback: +def skill_routing_accuracy(inputs: Dict[str, Any], expectations: Dict[str, Any]) -> Feedback: """ Score skill routing accuracy.