diff --git a/.gitignore b/.gitignore index 1e834015454..3096e5ffe50 100644 --- a/.gitignore +++ b/.gitignore @@ -78,3 +78,4 @@ cli/azd/extensions/microsoft.azd.concurx/concurx.exe cli/azd/extensions/azure.appservice/azureappservice cli/azd/extensions/azure.appservice/azureappservice.exe .squad/ +cli/azd/coverage-* diff --git a/.vscode/cspell.misc.yaml b/.vscode/cspell.misc.yaml index 9b6242d0f58..30a201a835a 100644 --- a/.vscode/cspell.misc.yaml +++ b/.vscode/cspell.misc.yaml @@ -36,6 +36,21 @@ overrides: - Entra - CODEOWNERS - weikanglim + - filename: ./docs/azd-down-resource-group-safety/** + words: + - azapi + - TOCTOU + - goroutines + - Footgun + - Errorf + - vhvb + - nicklhw + - Breza + - wbreza + - armlocks + - hemarina + - underspecified + - Stringly - filename: ./README.md words: - VSIX diff --git a/cli/azd/.vscode/cspell-azd-dictionary.txt b/cli/azd/.vscode/cspell-azd-dictionary.txt index a1b73e669dd..c117b2a86ab 100644 --- a/cli/azd/.vscode/cspell-azd-dictionary.txt +++ b/cli/azd/.vscode/cspell-azd-dictionary.txt @@ -54,6 +54,7 @@ armappplatform armcognitiveservices armcosmos armdeploymentstacks +armlocks armmachinelearning armmsi armoperationalinsights diff --git a/cli/azd/.vscode/cspell.yaml b/cli/azd/.vscode/cspell.yaml index d9e20981c39..6acad88afe0 100644 --- a/cli/azd/.vscode/cspell.yaml +++ b/cli/azd/.vscode/cspell.yaml @@ -23,6 +23,7 @@ words: - cooldown - customtype - devcontainers + - diagnosticsettings - errgroup - errorhandler - extendee @@ -56,6 +57,7 @@ words: - structpb - syncmap - syscall + - reprovisioning - tsx - Retryable - runcontext diff --git a/cli/azd/go.mod b/cli/azd/go.mod index a2caceeb076..79857f8b031 100644 --- a/cli/azd/go.mod +++ b/cli/azd/go.mod @@ -22,6 +22,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/operationalinsights/armoperationalinsights/v2 v2.0.2 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resourcegraph/armresourcegraph v0.9.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armdeploymentstacks v1.0.1 + github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armlocks v1.2.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions v1.3.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/sql/armsql/v2 v2.0.0-beta.7 diff --git a/cli/azd/go.sum b/cli/azd/go.sum index 77ff368a5a0..406235d01e7 100644 --- a/cli/azd/go.sum +++ b/cli/azd/go.sum @@ -49,6 +49,8 @@ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resourcegraph/armresourceg github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resourcegraph/armresourcegraph v0.9.0/go.mod h1:wVEOJfGTj0oPAUGA1JuRAvz/lxXQsWW16axmHPP47Bk= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armdeploymentstacks v1.0.1 h1:bcgO/crpp7wqI0Froi/I4C2fme7Vk/WLusbV399Do8I= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armdeploymentstacks v1.0.1/go.mod h1:kvfPmsE8gpOwwC1qrO1FeyBDDNfnwBN5UU3MPNiWW7I= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armlocks v1.2.0 h1:CMp8GwmUfS/Stg5KBgduD8rPIk9GNj1HMaID/gUAJYg= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armlocks v1.2.0/go.mod h1:GE1wqa9Ny9eZ8wHtHqbCE7mMsFfVbdEY0itmzYV8JEg= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0 h1:Dd+RhdJn0OTtVGaeDLZpcumkIVCtA/3/Fo42+eoYvVM= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0/go.mod h1:5kakwfW5CjC9KK+Q4wjXAg+ShuIm2mBMua0ZFj2C8PE= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions v1.3.0 h1:wxQx2Bt4xzPIKvW59WQf1tJNx/ZZKPfN+EhPX3Z6CYY= diff --git a/cli/azd/pkg/azapi/deployments.go b/cli/azd/pkg/azapi/deployments.go index 1e079370a4c..b0f956f4a3d 100644 --- a/cli/azd/pkg/azapi/deployments.go +++ b/cli/azd/pkg/azapi/deployments.go @@ -226,6 +226,14 @@ type DeploymentService interface { options map[string]any, progress *async.Progress[DeleteDeploymentProgress], ) error + // VoidSubscriptionDeploymentState deploys an empty template to void the deployment state + // without deleting any resource groups. Used after classification-aware deletion. + VoidSubscriptionDeploymentState( + ctx context.Context, + subscriptionId string, + deploymentName string, + options map[string]any, + ) error } type DeleteResourceState string diff --git a/cli/azd/pkg/azapi/resource_group_classifier.go b/cli/azd/pkg/azapi/resource_group_classifier.go new file mode 100644 index 00000000000..60817de3410 --- /dev/null +++ b/cli/azd/pkg/azapi/resource_group_classifier.go @@ -0,0 +1,460 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package azapi + +import ( + "context" + "errors" + "fmt" + "log" + "slices" + "strings" + "sync" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" +) + +// ClassifyResult holds the outcome of resource group classification. +type ClassifyResult struct { + Owned []string // RGs classified as created by azd — safe to delete + Skipped []ClassifiedSkip // RGs classified as external/unknown/vetoed — not deleted +} + +// ClassifiedSkip represents a resource group that will NOT be deleted, with the reason. +type ClassifiedSkip struct { + Name string + Reason string // Human-readable, e.g. "external (snapshot: not in predictedResources)" +} + +// ResourceWithTags is a resource with its ARM tags, used for extra-resource checks. +type ResourceWithTags struct { + Name string + Type string // ARM resource type, e.g. "Microsoft.Compute/virtualMachines" + Tags map[string]*string // ARM tags on the resource; nil if none are set +} + +// ManagementLock represents an ARM management lock on a resource. +type ManagementLock struct { + Name string + LockType string // "CanNotDelete" or "ReadOnly" +} + +// ClassifyOptions configures the classification pipeline. +type ClassifyOptions struct { + // SnapshotPredictedRGs is the set of resource group names (lowercased) that the + // Bicep template declares as created resources (not 'existing' references). + // Populated from `bicep snapshot` → predictedResources filtered by RG type. + // + // When non-nil, snapshot-based classification is used: + // - RG in set → owned (template creates it) + // - RG not in set → external (template references it as existing) + // - Tier 4 still runs on all owned candidates (defense-in-depth) + // + // When nil, a simplified guard applies: + // - ForceMode: all RGs treated as owned (backward compat, zero API calls) + // - Interactive + Prompter: user prompted for each RG + // - Otherwise: all RGs skipped (cannot classify without snapshot) + SnapshotPredictedRGs map[string]bool + + // ForceMode skips interactive prompts and API-calling safety checks. + // + // With snapshot available: snapshot classifies RGs (deterministic, offline), + // Tier 4 vetoes are skipped (zero API calls, consistent with --force contract). + // + // Without snapshot: all RGs are treated as owned (backward compat, zero API + // calls). This is the only path where an external RG could be deleted — it + // requires both snapshot failure AND explicit --force. + ForceMode bool + // Interactive enables per-RG prompts for unknown and foreign-resource RGs. + // When false, unknown/unverified RGs are always skipped without deletion. + Interactive bool + EnvName string // Current azd environment name for tag matching + + // ListResourceGroupResources returns all resources in a resource group. + ListResourceGroupResources func(ctx context.Context, rgName string) ([]*ResourceWithTags, error) + // ListResourceGroupLocks returns management locks on a resource group. + ListResourceGroupLocks func(ctx context.Context, rgName string) ([]*ManagementLock, error) + // Prompter asks the user whether to delete an unknown RG. Returns true to delete. + Prompter func(rgName, reason string) (bool, error) +} + +const ( + cAzdEnvNameTag = "azd-env-name" + cLockCanNotDelete = "CanNotDelete" + cLockReadOnly = "ReadOnly" + cTier4Parallelism = 5 +) + +// LockLevelCanNotDelete and LockLevelReadOnly are the ARM lock levels that block deletion. +const ( + LockLevelCanNotDelete = cLockCanNotDelete + LockLevelReadOnly = cLockReadOnly +) + +// ClassifyResourceGroups determines which resource groups from a deployment are +// safe to delete (owned by azd) vs which should be skipped (external/unknown/vetoed). +// +// When SnapshotPredictedRGs is set, snapshot-based classification is used as the +// primary signal, with Tier 4 (locks + foreign resources) as defense-in-depth. +// +// When SnapshotPredictedRGs is nil (snapshot unavailable): +// - ForceMode: all RGs returned as owned (backward compat, zero API calls) +// - Interactive + Prompter: user prompted for each RG +// - Otherwise: all RGs skipped with reason "snapshot unavailable" +func ClassifyResourceGroups( + ctx context.Context, + rgNames []string, + opts ClassifyOptions, +) (*ClassifyResult, error) { + if len(rgNames) == 0 { + return &ClassifyResult{}, nil + } + + result := &ClassifyResult{} + + // --- Snapshot path: deterministic classification from bicep snapshot --- + if opts.SnapshotPredictedRGs != nil { + return classifyFromSnapshot(ctx, rgNames, opts, result) + } + + // --- Snapshot unavailable: simplified guard --- + + // ForceMode without snapshot: return all RGs as owned (backward compat). + if opts.ForceMode { + result.Owned = slices.Clone(rgNames) + return result, nil + } + + // Interactive without snapshot: prompt user for each RG. + if opts.Interactive && opts.Prompter != nil { + var owned []string + for _, rg := range rgNames { + accept, err := opts.Prompter( + rg, + "snapshot unavailable — cannot verify ownership", + ) + if err != nil { + return nil, fmt.Errorf( + "classify rg=%s prompt: %w", rg, err) + } + if accept { + owned = append(owned, rg) + } else { + result.Skipped = append(result.Skipped, + ClassifiedSkip{ + Name: rg, + Reason: "skipped (snapshot unavailable" + + " — user declined)", + }) + } + } + return runTier4Vetoes(ctx, owned, opts, result) + } + + // Non-interactive without snapshot: skip all RGs. + for _, rg := range rgNames { + result.Skipped = append(result.Skipped, ClassifiedSkip{ + Name: rg, + Reason: "skipped (snapshot unavailable" + + " — cannot classify without snapshot)", + }) + } + return result, nil +} + +// classifyFromSnapshot uses the Bicep snapshot predictedResources to classify RGs. +// RGs whose names appear in the predicted set are owned (the template creates them). +// RGs not in the predicted set are external (referenced via the `existing` keyword). +// +// Tier 4 (lock + foreign-resource veto) still runs on owned candidates unless ForceMode +// is active, providing defense-in-depth even when snapshot says "owned." +func classifyFromSnapshot( + ctx context.Context, + rgNames []string, + opts ClassifyOptions, + result *ClassifyResult, +) (*ClassifyResult, error) { + var owned []string + for _, rg := range rgNames { + if opts.SnapshotPredictedRGs[strings.ToLower(rg)] { + owned = append(owned, rg) + } else { + result.Skipped = append(result.Skipped, ClassifiedSkip{ + Name: rg, + Reason: "external (snapshot: not in predictedResources)", + }) + } + } + + // ForceMode + snapshot: deterministic classification, zero API calls, no Tier 4. + if opts.ForceMode { + result.Owned = owned + return result, nil + } + + // --- Tier 4: veto checks on all snapshot-owned candidates (defense-in-depth) --- + // Even if the snapshot says "owned," a management lock or foreign resources + // should still prevent deletion. + return runTier4Vetoes(ctx, owned, opts, result) +} + +// tier4Veto represents a resource group vetoed by a Tier 4 safety check. +type tier4Veto struct { + rg string + reason string +} + +// tier4PendingPrompt represents a Tier 4 foreign-resource finding that needs +// interactive confirmation (or becomes a hard veto in non-interactive mode). +type tier4PendingPrompt struct { + rg string + reason string +} + +// runTier4Vetoes runs lock + foreign-resource veto checks on all owned candidates +// in parallel (capped by cTier4Parallelism). Foreign-resource prompts are collected +// and executed sequentially on the caller's goroutine to avoid concurrent terminal +// output. Returns the final ClassifyResult with vetoed RGs moved to Skipped. +func runTier4Vetoes( + ctx context.Context, + owned []string, + opts ClassifyOptions, + result *ClassifyResult, +) (*ClassifyResult, error) { + // Goroutine invariant: every RG either (a) enters wg.Go — which sends at + // most once to vetoCh or promptCh (clean RGs send to neither) — or (b) sends + // to vetoCh directly (cancelled context). Both channels are buffered to + // len(owned) so sends never block and goroutines never leak. + vetoCh := make(chan tier4Veto, len(owned)) + promptCh := make(chan tier4PendingPrompt, len(owned)) + sem := make(chan struct{}, cTier4Parallelism) + var wg sync.WaitGroup + for _, rg := range owned { + // Context-aware semaphore: bail out if context is cancelled while waiting. + select { + case sem <- struct{}{}: + // Re-check cancellation after acquiring the semaphore. + // Go's select is non-deterministic when both cases are ready, + // so ctx.Done may have fired but the semaphore case was chosen. + if ctx.Err() != nil { + <-sem + vetoCh <- tier4Veto{ + rg: rg, + reason: "error during safety check: " + ctx.Err().Error(), + } + continue + } + case <-ctx.Done(): + vetoCh <- tier4Veto{ + rg: rg, + reason: "error during safety check: " + ctx.Err().Error(), + } + continue + } + wg.Go(func() { + defer func() { <-sem }() + reason, vetoed, needsPrompt, err := classifyTier4(ctx, rg, opts) + if err != nil { + // Fail safe: treat errors as vetoes to avoid accidental deletion. + log.Printf( + "ERROR: classify rg=%s tier=4: safety check failed: %v "+ + "(treating as veto)", rg, err, + ) + vetoCh <- tier4Veto{ + rg: rg, + reason: fmt.Sprintf( + "error during safety check: %s", err.Error()), + } + return + } + if needsPrompt { + promptCh <- tier4PendingPrompt{rg: rg, reason: reason} + return + } + if vetoed { + vetoCh <- tier4Veto{rg: rg, reason: reason} + } + }) + } + wg.Wait() + close(vetoCh) + close(promptCh) + + vetoedSet := make(map[string]string, len(owned)) + for v := range vetoCh { + vetoedSet[v.rg] = v.reason + } + + // Process foreign-resource prompts sequentially on the main goroutine + // to avoid concurrent terminal output. + for p := range promptCh { + if opts.Interactive && opts.Prompter != nil { + accept, err := opts.Prompter(p.rg, p.reason) + if err != nil { + return nil, fmt.Errorf( + "classify rg=%s tier=4 prompt: %w", p.rg, err) + } + if !accept { + vetoedSet[p.rg] = p.reason + } + } else { + // Non-interactive: foreign resources are a hard veto. + log.Printf( + "classify rg=%s tier=4: non-interactive veto: %s", + p.rg, p.reason, + ) + vetoedSet[p.rg] = p.reason + } + } + + for _, rg := range owned { + if reason, vetoed := vetoedSet[rg]; vetoed { + result.Skipped = append(result.Skipped, ClassifiedSkip{ + Name: rg, Reason: reason, + }) + } else { + result.Owned = append(result.Owned, rg) + } + } + + return result, nil +} + +// classifyTier4 runs lock and extra-resource veto checks on an owned RG. +// Returns (reason, vetoed, needsPrompt, error). +// When needsPrompt is true, the caller should prompt the user sequentially (not from a goroutine) +// and veto if the user declines. +func classifyTier4(ctx context.Context, rgName string, opts ClassifyOptions) (string, bool, bool, error) { + // Lock check — best-effort: 403 = no veto. + // Rationale: locks are an additive protection layer; inability to read + // them does not imply the RG is unsafe to delete. A user who can delete + // the RG but cannot read its locks should not be blocked by a permission + // gap in a defense-in-depth check. Contrast with resource 403 below. + if opts.ListResourceGroupLocks != nil { + lockVetoed, lockReason, lockErr := checkTier4Locks(ctx, rgName, opts) + if lockErr != nil { + return "", false, false, lockErr + } + if lockVetoed { + return lockReason, true, false, nil + } + } + + // Extra-resource check — strict: 403 = hard veto. + // Rationale: if we cannot enumerate resources in a resource group, we + // cannot verify that all resources belong to this azd environment. + // Deleting a resource group with unknown contents risks destroying + // foreign resources. Unlike lock 403 (where inability to read is + // benign), resource 403 means we lack visibility into what we'd delete. + if opts.ListResourceGroupResources != nil { + // When EnvName is empty, foreign-resource detection cannot distinguish owned from + // untagged resources. Veto to be safe rather than silently allowing deletion. + if opts.EnvName == "" { + return "vetoed (Tier 4: cannot verify resource ownership" + + " without environment name)", true, false, nil + } + + resources, err := opts.ListResourceGroupResources(ctx, rgName) + if err != nil { + if respErr, ok := errors.AsType[*azcore.ResponseError](err); ok { + switch respErr.StatusCode { + case 404: + // RG already deleted — no veto needed. + return "", false, false, nil + case 403: + // Cannot enumerate resources due to auth failure — veto to be safe. + reason := "vetoed (Tier 4: unable to enumerate resource group" + + " resources due to authorization failure)" + return reason, true, false, nil + } + } + return "", false, false, fmt.Errorf("classify rg=%s tier=4 resources: %w", rgName, err) + } + var foreign []string + for _, res := range resources { + // Skip known extension resource types that don't support tags + // (e.g. roleAssignments, diagnosticSettings). These are commonly + // created by azd scaffold templates and never carry azd-env-name. + if isExtensionResourceType(res.Type) { + continue + } + tv := tagValue(res.Tags, cAzdEnvNameTag) + if !strings.EqualFold(tv, opts.EnvName) { + foreign = append(foreign, res.Name) + } + } + if len(foreign) > 0 { + reason := fmt.Sprintf( + "vetoed (Tier 4: %d foreign resource(s) without azd-env-name=%q)", len(foreign), opts.EnvName, + ) + log.Printf("classify rg=%s tier=4: foreign resources: %v", rgName, foreign) + return reason, true, true, nil + } + } + + return "", false, false, nil +} + +// checkTier4Locks checks management locks on an RG. +// Returns (vetoed, reason, error). On 403/404, logs and returns no veto (best-effort). +func checkTier4Locks( + ctx context.Context, rgName string, opts ClassifyOptions, +) (bool, string, error) { + locks, err := opts.ListResourceGroupLocks(ctx, rgName) + if err != nil { + if respErr, ok := errors.AsType[*azcore.ResponseError](err); ok { + if respErr.StatusCode == 403 || respErr.StatusCode == 404 { + log.Printf("classify rg=%s tier=4: lock check skipped (HTTP %d)", rgName, respErr.StatusCode) + return false, "", nil + } + } + return false, "", fmt.Errorf("classify rg=%s tier=4 locks: %w", rgName, err) + } + for _, lock := range locks { + if strings.EqualFold(lock.LockType, cLockCanNotDelete) || + strings.EqualFold(lock.LockType, cLockReadOnly) { + reason := fmt.Sprintf( + "vetoed (Tier 4: management lock %q of type %q)", lock.Name, lock.LockType, + ) + return true, reason, nil + } + } + return false, "", nil +} + +// tagValue returns the dereferenced value of a tag, or "" if the key is absent or nil. +func tagValue(tags map[string]*string, key string) string { + if tags == nil { + return "" + } + for k, v := range tags { + if strings.EqualFold(k, key) { + if v != nil { + return *v + } + return "" + } + } + return "" +} + +// extensionResourceTypePrefixes lists ARM resource type prefixes for extension +// resources that don't support tags. These are skipped during Tier 4 +// foreign-resource detection to avoid false-positive vetoes on resources +// commonly created by azd scaffold templates. +// All values are pre-lowercased for efficient case-insensitive comparison. +var extensionResourceTypePrefixes = []string{ + "microsoft.authorization/", + "microsoft.insights/diagnosticsettings", + "microsoft.resources/links", +} + +// isExtensionResourceType returns true if the given ARM resource type is a +// known extension resource that does not support tags. +func isExtensionResourceType(resourceType string) bool { + lower := strings.ToLower(resourceType) + return slices.ContainsFunc(extensionResourceTypePrefixes, func(prefix string) bool { + return strings.HasPrefix(lower, prefix) + }) +} diff --git a/cli/azd/pkg/azapi/resource_group_classifier_test.go b/cli/azd/pkg/azapi/resource_group_classifier_test.go new file mode 100644 index 00000000000..7bff2898737 --- /dev/null +++ b/cli/azd/pkg/azapi/resource_group_classifier_test.go @@ -0,0 +1,777 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package azapi + +import ( + "context" + "fmt" + "net/http" + "sync/atomic" + "testing" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// makeResponseError builds an *azcore.ResponseError with the given HTTP status code. +func makeResponseError(statusCode int) error { + return &azcore.ResponseError{StatusCode: statusCode} +} + +// strPtr returns a pointer to the given string. +func strPtr(s string) *string { return new(s) } + +// noopOpts returns a ClassifyOptions wired to a specific env name. +func noopOpts(envName string) ClassifyOptions { + return ClassifyOptions{EnvName: envName} +} + +// snapshotOwned returns a ClassifyOptions with SnapshotPredictedRGs set to +// own the given resource group names (lowercased). +func snapshotOwned(envName string, rgs ...string) ClassifyOptions { + m := make(map[string]bool, len(rgs)) + for _, rg := range rgs { + m[rg] = true + } + return ClassifyOptions{ + EnvName: envName, + SnapshotPredictedRGs: m, + } +} + +func TestClassifyResourceGroups(t *testing.T) { + t.Parallel() + + const ( + rgA = "rg-alpha" + rgB = "rg-beta" + rgC = "rg-gamma" + envName = "myenv" + ) + + t.Run("empty RG list returns empty result", func(t *testing.T) { + t.Parallel() + res, err := ClassifyResourceGroups( + t.Context(), nil, noopOpts(envName)) + require.NoError(t, err) + assert.Empty(t, res.Owned) + assert.Empty(t, res.Skipped) + }) + + // --- Snapshot unavailable guard --- + + t.Run("snapshot unavailable non-interactive skips all", func(t *testing.T) { + t.Parallel() + opts := ClassifyOptions{ + EnvName: envName, + Interactive: false, + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA, rgB}, opts) + require.NoError(t, err) + assert.Empty(t, res.Owned) + require.Len(t, res.Skipped, 2) + assert.Contains(t, res.Skipped[0].Reason, "snapshot unavailable") + assert.Contains(t, res.Skipped[1].Reason, "snapshot unavailable") + }) + + t.Run("snapshot unavailable interactive prompts user", func(t *testing.T) { + t.Parallel() + var prompted []string + opts := ClassifyOptions{ + EnvName: envName, + Interactive: true, + Prompter: func(rg, reason string) (bool, error) { + prompted = append(prompted, rg) + return rg == rgA, nil // accept A, decline B + }, + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA, rgB}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA}, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Equal(t, rgB, res.Skipped[0].Name) + assert.Contains(t, res.Skipped[0].Reason, "user declined") + assert.Equal(t, []string{rgA, rgB}, prompted) + }) + + t.Run("snapshot unavailable interactive prompt error", func(t *testing.T) { + t.Parallel() + opts := ClassifyOptions{ + EnvName: envName, + Interactive: true, + Prompter: func(_, _ string) (bool, error) { + return false, fmt.Errorf("terminal closed") + }, + } + _, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.Error(t, err) + assert.Contains(t, err.Error(), "terminal closed") + }) + + // --- Snapshot-based classification --- + + t.Run("snapshot owned goes through Tier4", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return []*ResourceWithTags{ + { + Name: "vm1", + Type: "Microsoft.Compute/virtualMachines", + Tags: map[string]*string{ + cAzdEnvNameTag: strPtr(envName), + }, + }, + }, nil + } + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA}, res.Owned) + assert.Empty(t, res.Skipped) + }) + + t.Run("snapshot external skips RG", func(t *testing.T) { + t.Parallel() + // snapshot contains rgA but not rgB + opts := snapshotOwned(envName, rgA) + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return nil, nil + } + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA, rgB}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA}, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Equal(t, rgB, res.Skipped[0].Name) + assert.Contains(t, res.Skipped[0].Reason, "snapshot") + }) + + // --- Tier 4: Lock veto --- + + t.Run("Tier4 lock CanNotDelete vetoes owned RG", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return []*ManagementLock{ + {Name: "my-lock", LockType: cLockCanNotDelete}, + }, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Empty(t, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Contains(t, res.Skipped[0].Reason, "lock") + }) + + t.Run("Tier4 lock ReadOnly vetoes owned RG", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return []*ManagementLock{ + {Name: "ro-lock", LockType: cLockReadOnly}, + }, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Empty(t, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Contains(t, res.Skipped[0].Reason, "lock") + }) + + t.Run("Tier4 lock check 403 does not veto", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, makeResponseError(http.StatusForbidden) + } + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return nil, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA}, res.Owned) + }) + + t.Run("Tier4 lock check 404 does not veto", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, makeResponseError(http.StatusNotFound) + } + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return nil, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA}, res.Owned) + }) + + t.Run("Tier4 lock check 500 vetoes as safety", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, makeResponseError(http.StatusInternalServerError) + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Empty(t, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Contains(t, res.Skipped[0].Reason, "error during safety check") + }) + + // --- Tier 4: Foreign resource veto --- + + t.Run("Tier4 foreign resources vetoes non-interactive", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.Interactive = false + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + } + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return []*ResourceWithTags{ + { + Name: "alien-vm", + Type: "Microsoft.Compute/virtualMachines", + Tags: map[string]*string{ + cAzdEnvNameTag: strPtr("other-env"), + }, + }, + }, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Empty(t, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Contains(t, res.Skipped[0].Reason, "foreign") + }) + + t.Run("Tier4 foreign resources prompts interactive", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.Interactive = true + opts.Prompter = func(_, _ string) (bool, error) { + return true, nil + } + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + } + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return []*ResourceWithTags{ + { + Name: "alien-vm", + Type: "Microsoft.Compute/virtualMachines", + Tags: map[string]*string{ + cAzdEnvNameTag: strPtr("other-env"), + }, + }, + }, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA}, res.Owned) + assert.Empty(t, res.Skipped) + }) + + t.Run( + "Tier4 foreign resource prompt declined vetoes", + func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.Interactive = true + opts.Prompter = func(_, _ string) (bool, error) { + return false, nil + } + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + } + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return []*ResourceWithTags{ + { + Name: "alien-vm", + Type: "Microsoft.Compute/virtualMachines", + Tags: map[string]*string{ + cAzdEnvNameTag: strPtr("other-env"), + }, + }, + }, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Empty(t, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Contains(t, res.Skipped[0].Reason, "foreign") + }, + ) + + t.Run("Tier4 resource list 404 does not veto", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + } + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return nil, makeResponseError(http.StatusNotFound) + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA}, res.Owned) + }) + + t.Run("Tier4 resource list 403 vetoes as safety", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + } + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return nil, makeResponseError(http.StatusForbidden) + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Empty(t, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Contains(t, res.Skipped[0].Reason, "authorization") + }) + + t.Run("Tier4 resource list 500 vetoes as safety", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + } + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return nil, makeResponseError(http.StatusInternalServerError) + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Empty(t, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Contains(t, res.Skipped[0].Reason, "error during safety check") + }) + + t.Run("Tier4 empty envName vetoes for safety", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned("", rgA) + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + } + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return nil, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Empty(t, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Contains(t, res.Skipped[0].Reason, "cannot verify") + }) + + t.Run("Tier4 extension resources are skipped", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + } + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return []*ResourceWithTags{ + { + Name: "role-assignment", + Type: "Microsoft.Authorization/roleAssignments", + // No azd-env-name tag — should be skipped, not treated as foreign + }, + }, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA}, res.Owned) + assert.Empty(t, res.Skipped) + }) + + // --- Tag case insensitivity --- + + t.Run("tag matching is case insensitive", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + } + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return []*ResourceWithTags{ + { + Name: "vm1", + Type: "Microsoft.Compute/virtualMachines", + Tags: map[string]*string{ + "AZD-ENV-NAME": strPtr("MYENV"), + }, + }, + }, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA}, res.Owned) + assert.Empty(t, res.Skipped) + }) + + // --- Multi-RG parallelism --- + + t.Run("multiple RGs classified in parallel", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA, rgB, rgC) + var lockCalls atomic.Int32 + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + lockCalls.Add(1) + return nil, nil + } + var resCalls atomic.Int32 + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + resCalls.Add(1) + return []*ResourceWithTags{ + { + Name: "vm", + Type: "Microsoft.Compute/virtualMachines", + Tags: map[string]*string{ + cAzdEnvNameTag: strPtr(envName), + }, + }, + }, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA, rgB, rgC}, opts) + require.NoError(t, err) + assert.Len(t, res.Owned, 3) + assert.Empty(t, res.Skipped) + assert.Equal(t, int32(3), lockCalls.Load()) + assert.Equal(t, int32(3), resCalls.Load()) + }) + + t.Run("cancelled context vetoes remaining RGs", func(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithCancel(t.Context()) + cancel() // cancel immediately + opts := snapshotOwned(envName, rgA) + res, err := ClassifyResourceGroups(ctx, []string{rgA}, opts) + require.NoError(t, err) + assert.Empty(t, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Contains(t, res.Skipped[0].Reason, "error during safety check") + }) +} + +func TestIsExtensionResourceType(t *testing.T) { + t.Parallel() + tests := []struct { + name string + resType string + expected bool + }{ + { + name: "role assignment", + resType: "Microsoft.Authorization/roleAssignments", + expected: true, + }, + { + name: "role definition", + resType: "Microsoft.Authorization/roleDefinitions", + expected: true, + }, + { + name: "diagnostic setting", + resType: "Microsoft.Insights/diagnosticSettings", + expected: true, + }, + { + name: "resource link", + resType: "Microsoft.Resources/links", + expected: true, + }, + { + name: "case insensitive", + resType: "MICROSOFT.AUTHORIZATION/ROLEASSIGNMENTS", + expected: true, + }, + { + name: "compute VM is not extension", + resType: "Microsoft.Compute/virtualMachines", + expected: false, + }, + { + name: "storage account is not extension", + resType: "Microsoft.Storage/storageAccounts", + expected: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.expected, isExtensionResourceType(tt.resType)) + }) + } +} + +func TestClassifyResourceGroups_ForceMode(t *testing.T) { + t.Parallel() + + const ( + rgA = "rg-alpha" + rgB = "rg-beta" + envName = "myenv" + ) + + t.Run("without snapshot treats all as owned", func(t *testing.T) { + t.Parallel() + opts := ClassifyOptions{ + EnvName: envName, + ForceMode: true, + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA, rgB}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA, rgB}, res.Owned) + assert.Empty(t, res.Skipped) + }) + + t.Run("without snapshot skips all callbacks", func(t *testing.T) { + t.Parallel() + opts := ClassifyOptions{ + EnvName: envName, + ForceMode: true, + ListResourceGroupLocks: func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + t.Fatal("should not be called") + return nil, nil + }, + ListResourceGroupResources: func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + t.Fatal("should not be called") + return nil, nil + }, + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA}, res.Owned) + }) + + t.Run("with snapshot uses deterministic classification", func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.ForceMode = true + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA, rgB}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA}, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Equal(t, rgB, res.Skipped[0].Name) + }) + + t.Run( + "with snapshot skips Tier4 callbacks", + func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) + opts.ForceMode = true + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + t.Fatal("should not be called") + return nil, nil + } + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + t.Fatal("should not be called") + return nil, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA}, res.Owned) + }, + ) +} + +func TestClassifyResourceGroups_Snapshot(t *testing.T) { + t.Parallel() + + const ( + rgA = "rg-alpha" + rgB = "rg-beta" + envName = "myenv" + ) + + t.Run("nil snapshot falls back to guard", func(t *testing.T) { + t.Parallel() + opts := ClassifyOptions{ + EnvName: envName, + Interactive: false, + // No SnapshotPredictedRGs + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA}, opts) + require.NoError(t, err) + assert.Empty(t, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Contains(t, res.Skipped[0].Reason, "snapshot unavailable") + }) + + t.Run("empty snapshot map classifies all as external", func(t *testing.T) { + t.Parallel() + opts := ClassifyOptions{ + EnvName: envName, + SnapshotPredictedRGs: map[string]bool{}, + ListResourceGroupResources: func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return nil, nil + }, + ListResourceGroupLocks: func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + }, + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA, rgB}, opts) + require.NoError(t, err) + assert.Empty(t, res.Owned) + require.Len(t, res.Skipped, 2) + assert.Contains(t, res.Skipped[0].Reason, "snapshot") + }) + + t.Run("snapshot case-insensitive lookup", func(t *testing.T) { + t.Parallel() + // predictedRGs has lowercase "rg-alpha" + opts := snapshotOwned(envName, "rg-alpha") + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return nil, nil + } + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + } + // Query with "rg-alpha" — should match + res, err := ClassifyResourceGroups( + t.Context(), []string{"rg-alpha"}, opts) + require.NoError(t, err) + assert.Equal(t, []string{"rg-alpha"}, res.Owned) + }) + + t.Run( + "snapshot mixed owned and external", + func(t *testing.T) { + t.Parallel() + opts := snapshotOwned(envName, rgA) // only rgA is owned + opts.ListResourceGroupResources = func( + _ context.Context, _ string, + ) ([]*ResourceWithTags, error) { + return nil, nil + } + opts.ListResourceGroupLocks = func( + _ context.Context, _ string, + ) ([]*ManagementLock, error) { + return nil, nil + } + res, err := ClassifyResourceGroups( + t.Context(), []string{rgA, rgB}, opts) + require.NoError(t, err) + assert.Equal(t, []string{rgA}, res.Owned) + require.Len(t, res.Skipped, 1) + assert.Equal(t, rgB, res.Skipped[0].Name) + assert.Contains(t, res.Skipped[0].Reason, + "not in predictedResources") + }, + ) +} diff --git a/cli/azd/pkg/azapi/stack_deployments.go b/cli/azd/pkg/azapi/stack_deployments.go index 7a7b0ba71ec..d8da2b24f92 100644 --- a/cli/azd/pkg/azapi/stack_deployments.go +++ b/cli/azd/pkg/azapi/stack_deployments.go @@ -660,6 +660,17 @@ func (d *StackDeployments) CalculateTemplateHash( return d.standardDeployments.CalculateTemplateHash(ctx, subscriptionId, template) } +// VoidSubscriptionDeploymentState is a no-op for deployment stacks. +// Deployment stacks manage their own state; voiding is not applicable. +func (d *StackDeployments) VoidSubscriptionDeploymentState( + _ context.Context, + _ string, + _ string, + _ map[string]any, +) error { + return nil +} + func (d *StackDeployments) createClient(ctx context.Context, subscriptionId string) (*armdeploymentstacks.Client, error) { credential, err := d.credentialProvider.CredentialForSubscription(ctx, subscriptionId) if err != nil { diff --git a/cli/azd/pkg/azapi/standard_deployments.go b/cli/azd/pkg/azapi/standard_deployments.go index efc55ed44cb..6725d86080e 100644 --- a/cli/azd/pkg/azapi/standard_deployments.go +++ b/cli/azd/pkg/azapi/standard_deployments.go @@ -399,6 +399,12 @@ func resourceGroupsFromDeployment(deployment *ResourceDeployment) []string { return slices.Collect(maps.Keys(resourceGroups)) } +// ResourceGroupsFromDeployment extracts the unique resource group names from a deployment. +// This is the public version of the internal helper, used by the classification pipeline. +func ResourceGroupsFromDeployment(deployment *ResourceDeployment) []string { + return resourceGroupsFromDeployment(deployment) +} + func (ds *StandardDeployments) ListResourceGroupDeploymentResources( ctx context.Context, subscriptionId string, @@ -476,6 +482,17 @@ func (ds *StandardDeployments) DeleteSubscriptionDeployment( return ds.voidSubscriptionDeploymentState(ctx, subscriptionId, deploymentName, options) } +// VoidSubscriptionDeploymentState deploys an empty template to void the deployment state +// without deleting any resource groups. Used after classification-aware deletion. +func (ds *StandardDeployments) VoidSubscriptionDeploymentState( + ctx context.Context, + subscriptionId string, + deploymentName string, + options map[string]any, +) error { + return ds.voidSubscriptionDeploymentState(ctx, subscriptionId, deploymentName, options) +} + // voidSubscriptionDeploymentState deploys an empty template to void the provision state // and keep deployment history instead of deleting previous deployments. func (ds *StandardDeployments) voidSubscriptionDeploymentState( @@ -491,7 +508,7 @@ func (ds *StandardDeployments) voidSubscriptionDeploymentState( } envName, has := deployment.Tags[azure.TagKeyAzdEnvName] - if has { + if has && envName != nil && *envName != "" { var emptyTemplate json.RawMessage = []byte(emptySubscriptionArmTemplate) emptyDeploymentName := ds.GenerateDeploymentName(*envName) tags := map[string]*string{ diff --git a/cli/azd/pkg/azapi/standard_deployments_test.go b/cli/azd/pkg/azapi/standard_deployments_test.go index 9bbdc895048..181ce723db9 100644 --- a/cli/azd/pkg/azapi/standard_deployments_test.go +++ b/cli/azd/pkg/azapi/standard_deployments_test.go @@ -5,7 +5,7 @@ package azapi import ( "context" - "sort" + "slices" "testing" "time" @@ -120,7 +120,48 @@ func TestResourceGroupsFromDeployment(t *testing.T) { groups := resourceGroupsFromDeployment(&mockDeployment) - sort.Strings(groups) + slices.Sort(groups) require.Equal(t, []string{"groupA", "groupB", "groupC"}, groups) }) } + +func Test_StandardDeployments_VoidSubscriptionDeploymentState(t *testing.T) { + t.Parallel() + + // This test verifies that VoidSubscriptionDeploymentState is a valid public method + // that delegates to the private voidSubscriptionDeploymentState implementation. + // The method signature and delegation are verified at compile time. + mockContext := mocks.NewMockContext(context.Background()) + + deploymentService := NewStandardDeployments( + mockContext.SubscriptionCredentialProvider, + mockContext.ArmClientOptions, + NewResourceService(mockContext.SubscriptionCredentialProvider, mockContext.ArmClientOptions), + cloud.AzurePublic(), + mockContext.Clock, + ) + + // Verify the method exists and is callable (compilation check). + // A full integration test would require HTTP mocks for the ARM deployment API. + _ = deploymentService.VoidSubscriptionDeploymentState +} + +func TestResourceGroupsFromDeployment_Public(t *testing.T) { + t.Parallel() + + // Verify public wrapper returns same result as private function. + mockDeployment := &ResourceDeployment{ + Resources: []*armresources.ResourceReference{ + {ID: new("/subscriptions/sub-id/resourceGroups/myRG")}, + }, + ProvisioningState: DeploymentProvisioningStateSucceeded, + Timestamp: time.Now(), + } + + public := ResourceGroupsFromDeployment(mockDeployment) + private := resourceGroupsFromDeployment(mockDeployment) + + slices.Sort(public) + slices.Sort(private) + require.Equal(t, private, public) +} diff --git a/cli/azd/pkg/infra/provisioning/bicep/bicep_destroy.go b/cli/azd/pkg/infra/provisioning/bicep/bicep_destroy.go new file mode 100644 index 00000000000..76b5ef5f261 --- /dev/null +++ b/cli/azd/pkg/infra/provisioning/bicep/bicep_destroy.go @@ -0,0 +1,517 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package bicep + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "log" + "maps" + "os" + "path/filepath" + "slices" + "strings" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armlocks" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources" + "github.com/azure/azure-dev/cli/azd/pkg/account" + "github.com/azure/azure-dev/cli/azd/pkg/alpha" + "github.com/azure/azure-dev/cli/azd/pkg/async" + "github.com/azure/azure-dev/cli/azd/pkg/azapi" + "github.com/azure/azure-dev/cli/azd/pkg/convert" + "github.com/azure/azure-dev/cli/azd/pkg/environment" + "github.com/azure/azure-dev/cli/azd/pkg/infra" + "github.com/azure/azure-dev/cli/azd/pkg/infra/provisioning" + "github.com/azure/azure-dev/cli/azd/pkg/input" + "github.com/azure/azure-dev/cli/azd/pkg/output" + "github.com/azure/azure-dev/cli/azd/pkg/tools/bicep" +) + +// errUserCancelled is returned when the user declines the resource group deletion confirmation. +// The caller uses this to distinguish user cancellation from successful completion. +var errUserCancelled = errors.New("user cancelled resource group deletion") + +// forceDeleteLogAnalyticsIfPurge force-deletes Log Analytics Workspaces in the given resource +// groups when purge is enabled. This must happen while the workspaces still exist — force-delete +// is not possible after the containing resource group is deleted. +func (p *BicepProvider) forceDeleteLogAnalyticsIfPurge( + ctx context.Context, + resources map[string][]*azapi.Resource, + options provisioning.DestroyOptions, +) error { + if !options.Purge() { + return nil + } + workspaces, err := p.getLogAnalyticsWorkspacesToPurge(ctx, resources) + if err != nil { + return fmt.Errorf("getting log analytics workspaces to purge: %w", err) + } + if len(workspaces) > 0 { + if err := p.forceDeleteLogAnalyticsWorkspaces(ctx, workspaces); err != nil { + return fmt.Errorf( + "force deleting log analytics workspaces: %w", err, + ) + } + } + return nil +} + +// classifyResourceGroups classifies each resource group as owned or external +// using snapshot-based classification with Tier 4 vetoes as defense-in-depth. +// +// When a Bicep snapshot is available (bicepparam mode), snapshot-based classification +// is used as the primary mechanism: RGs in predictedResources are owned, others are external. +// Tier 4 (locks + foreign resources) still runs on owned candidates as defense-in-depth. +// +// When snapshot is unavailable (non-bicepparam mode, older Bicep CLI, or snapshot error): +// - ForceMode: all RGs returned as owned (backward compat, zero API calls) +// - Interactive: user prompted for each RG +// - Otherwise: all RGs skipped (cannot classify without snapshot) +// +// This function does NOT delete any resource groups — the caller is responsible +// for deletion after collecting purge targets (which require the RGs to still exist). +// +// Log Analytics Workspaces in owned RGs are force-deleted before the RG if purge is enabled, +// since force-delete requires the workspace to still exist. +// Returns the list of owned RG names and any skipped RG info. +func (p *BicepProvider) classifyResourceGroups( + ctx context.Context, + deployment infra.Deployment, + groupedResources map[string][]*azapi.Resource, + options provisioning.DestroyOptions, +) (owned []string, skipped []azapi.ClassifiedSkip, err error) { + // Extract RG names from the grouped resources map. + rgNames := slices.Collect(maps.Keys(groupedResources)) + + // Get deployment info for classification (used for logging and hash derivation). + deploymentInfo, deployInfoErr := deployment.Get(ctx) + if deployInfoErr == nil { + log.Printf("classifying resource groups for deployment: %s", deploymentInfo.Name) + } + + // Build classification options. + subscriptionId := deployment.SubscriptionId() + classifyOpts := azapi.ClassifyOptions{ + Interactive: !p.console.IsNoPromptMode(), + ForceMode: options.Force(), + EnvName: p.env.Name(), + SnapshotPredictedRGs: p.getSnapshotPredictedRGs(ctx), + } + + // Only wire Tier 4 callbacks when not --force (they won't be invoked in ForceMode). + if !options.Force() { + classifyOpts.ListResourceGroupLocks = func(ctx context.Context, rgName string) ([]*azapi.ManagementLock, error) { + return p.listResourceGroupLocks(ctx, subscriptionId, rgName) + } + classifyOpts.ListResourceGroupResources = func( + ctx context.Context, rgName string, + ) ([]*azapi.ResourceWithTags, error) { + return p.listResourceGroupResourcesWithTags(ctx, subscriptionId, rgName) + } + classifyOpts.Prompter = func(rgName, reason string) (bool, error) { + return p.console.Confirm(ctx, input.ConsoleOptions{ + Message: fmt.Sprintf("Delete resource group '%s'? (%s)", rgName, reason), + DefaultValue: false, + }) + } + } + + // Run classification. + result, err := azapi.ClassifyResourceGroups(ctx, rgNames, classifyOpts) + if err != nil { + return nil, nil, fmt.Errorf("classifying resource groups: %w", err) + } + + // Log classification results (user-facing display handled by caller). + for _, skip := range result.Skipped { + log.Printf("classify rg=%s decision=skip reason=%q", skip.Name, skip.Reason) + } + for _, owned := range result.Owned { + log.Printf("classify rg=%s decision=owned", owned) + } + + // Overall confirmation prompt for owned RGs (interactive only, not --force). + if len(result.Owned) > 0 && !options.Force() && !p.console.IsNoPromptMode() { + confirmMsg := fmt.Sprintf( + "Delete %d resource group(s): %s?", + len(result.Owned), + strings.Join(result.Owned, ", "), + ) + confirmed, confirmErr := p.console.Confirm(ctx, input.ConsoleOptions{ + Message: confirmMsg, + DefaultValue: false, + }) + if confirmErr != nil { + return nil, result.Skipped, fmt.Errorf("confirming resource group deletion: %w", confirmErr) + } + if !confirmed { + return nil, result.Skipped, errUserCancelled + } + } + + return result.Owned, result.Skipped, nil +} + +// deleteRGList deletes a list of resource groups, force-deleting Log Analytics Workspaces first +// in each RG when purge is enabled. +func (p *BicepProvider) deleteRGList( + ctx context.Context, + subscriptionId string, + rgNames []string, + groupedResources map[string][]*azapi.Resource, + options provisioning.DestroyOptions, +) (deleted []string, err error) { + var deleteErrors []error + for _, rgName := range rgNames { + // Force-delete Log Analytics Workspaces in this RG before deleting the RG. + rgResources := map[string][]*azapi.Resource{rgName: groupedResources[rgName]} + if laErr := p.forceDeleteLogAnalyticsIfPurge(ctx, rgResources, options); laErr != nil { + deleteErrors = append(deleteErrors, + fmt.Errorf("log analytics purge for %s: %w", rgName, laErr)) + continue + } + + p.console.ShowSpinner( + ctx, + fmt.Sprintf("Deleting resource group %s", output.WithHighLightFormat(rgName)), + input.Step, + ) + + if delErr := p.resourceService.DeleteResourceGroup(ctx, subscriptionId, rgName); delErr != nil { + p.console.StopSpinner( + ctx, + fmt.Sprintf("Failed deleting resource group %s", output.WithHighLightFormat(rgName)), + input.StepFailed, + ) + deleteErrors = append(deleteErrors, fmt.Errorf("deleting resource group %s: %w", rgName, delErr)) + continue + } + + p.console.StopSpinner( + ctx, + fmt.Sprintf("Deleted resource group %s", output.WithHighLightFormat(rgName)), + input.StepDone, + ) + deleted = append(deleted, rgName) + } + + if len(deleteErrors) > 0 { + return deleted, errors.Join(deleteErrors...) + } + return deleted, nil +} + +// listResourceGroupLocks retrieves management locks on a resource group using the ARM API. +// Returns an error if dependencies cannot be resolved — the classifier treats +// errors as vetoes (fail-safe) to avoid deleting locked resources without verification. +func (p *BicepProvider) listResourceGroupLocks( + ctx context.Context, + subscriptionId string, + rgName string, +) ([]*azapi.ManagementLock, error) { + var credProvider account.SubscriptionCredentialProvider + if err := p.serviceLocator.Resolve(&credProvider); err != nil { + return nil, fmt.Errorf( + "classify locks: credential provider unavailable for rg=%s: %w", + rgName, err, + ) + } + + var armOpts *arm.ClientOptions + _ = p.serviceLocator.Resolve(&armOpts) // optional; nil is a valid default + + credential, err := credProvider.CredentialForSubscription(ctx, subscriptionId) + if err != nil { + return nil, fmt.Errorf( + "classify locks: credential error for rg=%s: %w", rgName, err, + ) + } + + client, err := armlocks.NewManagementLocksClient(subscriptionId, credential, armOpts) + if err != nil { + return nil, fmt.Errorf( + "classify locks: ARM client error for rg=%s: %w", rgName, err, + ) + } + + var locks []*azapi.ManagementLock + pager := client.NewListAtResourceGroupLevelPager(rgName, nil) + for pager.More() { + page, err := pager.NextPage(ctx) + if err != nil { + return nil, err // propagate so caller can handle 404/403 + } + for _, lock := range page.Value { + if lock == nil || lock.Properties == nil { + continue + } + name := "" + if lock.Name != nil { + name = *lock.Name + } + lockType := "" + if lock.Properties.Level != nil { + lockType = string(*lock.Properties.Level) + } + ml := &azapi.ManagementLock{Name: name, LockType: lockType} + locks = append(locks, ml) + // Short-circuit: one blocking lock is enough to veto. + if strings.EqualFold(lockType, azapi.LockLevelCanNotDelete) || + strings.EqualFold(lockType, azapi.LockLevelReadOnly) { + return locks, nil + } + } + } + return locks, nil +} + +// listResourceGroupResourcesWithTags retrieves all resources in a resource group +// with their tags, used for Tier 4 foreign-resource detection. +// Returns an error if dependencies cannot be resolved — the classifier treats +// errors as vetoes (fail-safe) to avoid deleting resources without verification. +func (p *BicepProvider) listResourceGroupResourcesWithTags( + ctx context.Context, + subscriptionId string, + rgName string, +) ([]*azapi.ResourceWithTags, error) { + var credProvider account.SubscriptionCredentialProvider + if err := p.serviceLocator.Resolve(&credProvider); err != nil { + return nil, fmt.Errorf( + "classify resources: credential provider unavailable for rg=%s: %w", + rgName, err, + ) + } + + var armOpts *arm.ClientOptions + _ = p.serviceLocator.Resolve(&armOpts) // optional; nil is a valid default + + credential, err := credProvider.CredentialForSubscription(ctx, subscriptionId) + if err != nil { + return nil, fmt.Errorf( + "classify resources: credential error for rg=%s: %w", rgName, err, + ) + } + + client, err := armresources.NewClient(subscriptionId, credential, armOpts) + if err != nil { + return nil, fmt.Errorf( + "classify resources: ARM client error for rg=%s: %w", rgName, err, + ) + } + + // Tags are included by default in GenericResourceExpanded — no $expand needed. + var resources []*azapi.ResourceWithTags + pager := client.NewListByResourceGroupPager(rgName, nil) + for pager.More() { + page, err := pager.NextPage(ctx) + if err != nil { + return nil, err // propagate so caller can handle 404/403 + } + for _, res := range page.Value { + if res == nil { + continue + } + name := "" + if res.Name != nil { + name = *res.Name + } + resType := "" + if res.Type != nil { + resType = *res.Type + } + resources = append(resources, &azapi.ResourceWithTags{ + Name: name, + Type: resType, + Tags: res.Tags, + }) + } + } + return resources, nil +} + +// voidDeploymentState voids the deployment state by deploying an empty template. +// This ensures subsequent azd provision commands work correctly after a destroy, +// by establishing a new baseline deployment. +func (p *BicepProvider) voidDeploymentState(ctx context.Context, deployment infra.Deployment) error { + p.console.ShowSpinner(ctx, "Voiding deployment state...", input.Step) + + optionsMap, err := convert.ToMap(p.options) + if err != nil { + p.console.StopSpinner(ctx, "Failed to void deployment state", input.StepFailed) + return err + } + + if err := deployment.VoidState(ctx, optionsMap); err != nil { + p.console.StopSpinner(ctx, "Failed to void deployment state", input.StepFailed) + return fmt.Errorf("voiding deployment state: %w", err) + } + + p.console.StopSpinner(ctx, "Deployment state voided", input.StepDone) + return nil +} + +// isDeploymentStacksEnabled checks if the deployment stacks alpha feature is enabled. +// Used to determine whether to use the stack-based delete path (deployment.Delete) or +// the standard classification-based path (classifyResourceGroups + deleteRGList). +func (p *BicepProvider) isDeploymentStacksEnabled() bool { + var featureManager *alpha.FeatureManager + if err := p.serviceLocator.Resolve(&featureManager); err != nil { + return false + } + return featureManager.IsEnabled(azapi.FeatureDeploymentStacks) +} + +// getSnapshotPredictedRGs invokes `bicep snapshot` on the current template and extracts +// the set of resource group names from predictedResources. Returns a map of lowercased +// RG names (for case-insensitive lookup), or nil if snapshot is unavailable. +// +// Snapshot is only available in bicepparam mode (the modern default) because the Bicep CLI +// requires a .bicepparam file as input. In non-bicepparam mode with available parameters, +// a temporary .bicepparam file is generated. +// +// On any error (older Bicep CLI, compilation failure, etc.), logs a warning and returns nil, +// which causes the classifier to use the simplified guard (ForceMode, interactive prompt, or skip). +func (p *BicepProvider) getSnapshotPredictedRGs(ctx context.Context) map[string]bool { + if p.snapshotPredictedRGsOverride != nil { + return p.snapshotPredictedRGsOverride + } + compileResult := p.compileBicepMemoryCache + if compileResult == nil { + log.Printf("snapshot classification: compileBicep cache unavailable, skipping snapshot") + return nil + } + + // Determine the .bicepparam file to use for the snapshot. + var bicepParamFile string + var cleanupFn func() + + if p.mode == bicepparamMode { + // In bicepparam mode, p.path IS the .bicepparam file — use it directly. + bicepParamFile = p.path + } else if len(compileResult.Parameters) > 0 { + // Non-bicepparam mode with available parameters: generate a temp .bicepparam file. + bicepFileName := filepath.Base(p.path) + moduleDir := filepath.Dir(p.path) + + bicepParamContent := generateBicepParam(bicepFileName, compileResult.Parameters) + + tmpFile, err := os.CreateTemp(moduleDir, "snapshot-*.bicepparam") + if err != nil { + log.Printf("snapshot classification: failed to create temp bicepparam: %v", err) + return nil + } + bicepParamFile = tmpFile.Name() + cleanupFn = func() { + tmpFile.Close() + os.Remove(bicepParamFile) + } + + if _, err := tmpFile.WriteString(bicepParamContent); err != nil { + cleanupFn() + log.Printf("snapshot classification: failed to write temp bicepparam: %v", err) + return nil + } + if err := tmpFile.Close(); err != nil { + cleanupFn() + log.Printf("snapshot classification: failed to close temp bicepparam: %v", err) + return nil + } + } else { + // Non-bicepparam mode without parameters: cannot generate .bicepparam for snapshot. + log.Printf("snapshot classification: non-bicepparam mode without parameters, skipping snapshot") + return nil + } + if cleanupFn != nil { + defer cleanupFn() + } + + // Build snapshot options from environment. + snapshotOpts := bicep.NewSnapshotOptions(). + WithSubscriptionID(p.env.GetSubscriptionId()) + + if loc := p.env.GetLocation(); loc != "" { + snapshotOpts = snapshotOpts.WithLocation(loc) + } + if rg := p.env.Getenv(environment.ResourceGroupEnvVarName); rg != "" { + snapshotOpts = snapshotOpts.WithResourceGroup(rg) + } + + // Run the Bicep snapshot command. + data, err := p.bicepCli.Snapshot(ctx, bicepParamFile, snapshotOpts) + if err != nil { + log.Printf("snapshot classification: bicep snapshot unavailable: %v", err) + return nil + } + + // Parse and extract resource group names. + var snapshot snapshotResult + if err := json.Unmarshal(data, &snapshot); err != nil { + log.Printf("snapshot classification: failed to parse snapshot: %v", err) + return nil + } + + predictedRGs := make(map[string]bool) + for _, res := range snapshot.PredictedResources { + if strings.EqualFold(res.Type, "Microsoft.Resources/resourceGroups") && res.Name != "" { + predictedRGs[strings.ToLower(res.Name)] = true + } + } + + if len(predictedRGs) == 0 { + // No RGs in predictedResources — could mean a resource-group-scoped deployment + // where RGs aren't declared as resources. Fall back to tier system. + log.Printf("snapshot classification: no resource groups found in predictedResources, falling back to guard") + return nil + } + + log.Printf("snapshot classification: found %d predicted resource group(s): %v", + len(predictedRGs), slices.Collect(maps.Keys(predictedRGs))) + return predictedRGs +} + +// destroyViaDeploymentDelete deletes resources using deployment.Delete(), which routes +// through the deployment service (standard or stacks). For deployment stacks, this deletes +// the stack object which cascades to managed resources. This path does NOT perform +// resource group classification — it is the pre-existing behavior preserved for +// deployment stacks where the stack manages resource lifecycle. +func (p *BicepProvider) destroyViaDeploymentDelete( + ctx context.Context, + deployment infra.Deployment, + groupedResources map[string][]*azapi.Resource, + options provisioning.DestroyOptions, +) error { + // Force-delete Log Analytics Workspaces before deleting the deployment/stack, + // since force-delete requires the workspace to still exist. + if err := p.forceDeleteLogAnalyticsIfPurge(ctx, groupedResources, options); err != nil { + return fmt.Errorf("log analytics purge before deployment delete: %w", err) + } + + // Delete via the deployment service (standard: deletes RGs; stacks: deletes the stack). + err := async.RunWithProgressE(func(progressMessage azapi.DeleteDeploymentProgress) { + switch progressMessage.State { + case azapi.DeleteResourceStateInProgress: + p.console.ShowSpinner(ctx, progressMessage.Message, input.Step) + case azapi.DeleteResourceStateSucceeded: + p.console.StopSpinner(ctx, progressMessage.Message, input.StepDone) + case azapi.DeleteResourceStateFailed: + p.console.StopSpinner(ctx, progressMessage.Message, input.StepFailed) + } + }, func(progress *async.Progress[azapi.DeleteDeploymentProgress]) error { + optionsMap, err := convert.ToMap(p.options) + if err != nil { + return err + } + return deployment.Delete(ctx, optionsMap, progress) + }) + + if err != nil { + return err + } + + p.console.Message(ctx, "") + return nil +} diff --git a/cli/azd/pkg/infra/provisioning/bicep/bicep_destroy_test.go b/cli/azd/pkg/infra/provisioning/bicep/bicep_destroy_test.go new file mode 100644 index 00000000000..2814fedc853 --- /dev/null +++ b/cli/azd/pkg/infra/provisioning/bicep/bicep_destroy_test.go @@ -0,0 +1,643 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package bicep + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources" + "github.com/azure/azure-dev/cli/azd/pkg/account" + "github.com/azure/azure-dev/cli/azd/pkg/azure" + "github.com/azure/azure-dev/cli/azd/pkg/environment" + "github.com/azure/azure-dev/cli/azd/pkg/exec" + "github.com/azure/azure-dev/cli/azd/pkg/infra/provisioning" + "github.com/azure/azure-dev/cli/azd/pkg/tools/bicep" + "github.com/azure/azure-dev/cli/azd/test/mocks" + "github.com/azure/azure-dev/cli/azd/test/mocks/mockaccount" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// snapshotJSON builds a JSON byte string for a snapshotResult containing +// the given resource group names. +func snapshotJSON(rgNames ...string) []byte { + type resource struct { + Type string `json:"type"` + Name string `json:"name"` + } + type snapshot struct { + PredictedResources []resource `json:"predictedResources"` + } + s := snapshot{} + for _, rg := range rgNames { + s.PredictedResources = append( + s.PredictedResources, + resource{ + Type: "Microsoft.Resources/resourceGroups", + Name: rg, + }, + ) + } + b, _ := json.Marshal(s) + return b +} + +// mockSnapshotCommand registers a mock command runner response for +// "bicep snapshot" that writes the provided data to a .snapshot.json +// file, simulating the real bicep CLI behavior. +func mockSnapshotCommand( + mockContext *mocks.MockContext, + snapshotData []byte, +) { + mockContext.CommandRunner.When(func(args exec.RunArgs, command string) bool { + return strings.Contains(args.Cmd, "bicep") && + len(args.Args) > 0 && args.Args[0] == "snapshot" + }).RespondFn(func(args exec.RunArgs) (exec.RunResult, error) { + // The bicep CLI writes .snapshot.json next to the input. + inputFile := args.Args[1] + snapshotFile := strings.TrimSuffix( + inputFile, filepath.Ext(inputFile), + ) + ".snapshot.json" + if writeErr := os.WriteFile( + snapshotFile, snapshotData, 0600, + ); writeErr != nil { + return exec.RunResult{ExitCode: 1}, writeErr + } + return exec.NewRunResult(0, "", ""), nil + }) +} + +// mockBicepVersion registers a mock for "bicep --version". +func mockBicepVersion(mockContext *mocks.MockContext) { + mockContext.CommandRunner.When(func(args exec.RunArgs, command string) bool { + return strings.Contains(args.Cmd, "bicep") && + len(args.Args) > 0 && args.Args[0] == "--version" + }).RespondFn(func(args exec.RunArgs) (exec.RunResult, error) { + return exec.NewRunResult( + 0, + fmt.Sprintf( + "Bicep CLI version %s (abcdef0123)", + bicep.Version, + ), + "", + ), nil + }) +} + +// newTestBicepProvider builds a minimal *BicepProvider suitable for +// testing getSnapshotPredictedRGs. Only the fields accessed by that +// method are populated. +func newTestBicepProvider( + mockContext *mocks.MockContext, + mode bicepFileMode, + path string, + compileCache *compileBicepResult, + envValues map[string]string, +) *BicepProvider { + cli := bicep.NewCli( + mockContext.Console, mockContext.CommandRunner, + ) + env := environment.NewWithValues("test-env", envValues) + return &BicepProvider{ + bicepCli: cli, + env: env, + mode: mode, + path: path, + compileBicepMemoryCache: compileCache, + } +} + +func TestGetSnapshotPredictedRGs(t *testing.T) { + t.Parallel() + + envValues := map[string]string{ + environment.SubscriptionIdEnvVarName: "sub-123", + environment.LocationEnvVarName: "westus2", + } + + t.Run("nil compileBicep cache returns nil", func(t *testing.T) { + t.Parallel() + mockCtx := mocks.NewMockContext(t.Context()) + p := newTestBicepProvider( + mockCtx, bicepparamMode, "main.bicepparam", + nil, envValues, + ) + result := p.getSnapshotPredictedRGs(t.Context()) + assert.Nil(t, result) + }) + + t.Run("bicepparam mode returns predicted RGs", func(t *testing.T) { + t.Parallel() + mockCtx := mocks.NewMockContext(t.Context()) + + // Create a temp .bicepparam file (Snapshot reads its path). + dir := t.TempDir() + paramFile := filepath.Join(dir, "main.bicepparam") + require.NoError(t, os.WriteFile( + paramFile, []byte("using 'main.bicep'"), 0600, + )) + + mockBicepVersion(mockCtx) + mockSnapshotCommand( + mockCtx, + snapshotJSON("rg-app", "rg-data"), + ) + + p := newTestBicepProvider( + mockCtx, bicepparamMode, paramFile, + &compileBicepResult{}, + envValues, + ) + result := p.getSnapshotPredictedRGs(t.Context()) + + require.NotNil(t, result) + assert.True(t, result["rg-app"]) + assert.True(t, result["rg-data"]) + assert.Len(t, result, 2) + }) + + t.Run("non-bicepparam with params generates temp file", + func(t *testing.T) { + t.Parallel() + mockCtx := mocks.NewMockContext(t.Context()) + + // The .bicep file needs to exist in a writable directory + // because getSnapshotPredictedRGs creates a temp file + // next to it. + dir := t.TempDir() + bicepFile := filepath.Join(dir, "main.bicep") + require.NoError(t, os.WriteFile( + bicepFile, []byte("// bicep"), 0600, + )) + + mockBicepVersion(mockCtx) + mockSnapshotCommand( + mockCtx, + snapshotJSON("rg-infra"), + ) + + cache := &compileBicepResult{ + Parameters: azure.ArmParameters{ + "location": {Value: "westus2"}, + }, + } + p := newTestBicepProvider( + mockCtx, bicepMode, bicepFile, + cache, + envValues, + ) + result := p.getSnapshotPredictedRGs(t.Context()) + + require.NotNil(t, result) + assert.True(t, result["rg-infra"]) + assert.Len(t, result, 1) + }) + + t.Run("non-bicepparam without params returns nil", + func(t *testing.T) { + t.Parallel() + mockCtx := mocks.NewMockContext(t.Context()) + + p := newTestBicepProvider( + mockCtx, bicepMode, "main.bicep", + &compileBicepResult{Parameters: nil}, + envValues, + ) + result := p.getSnapshotPredictedRGs(t.Context()) + assert.Nil(t, result) + }) + + t.Run("snapshot CLI error returns nil", func(t *testing.T) { + t.Parallel() + mockCtx := mocks.NewMockContext(t.Context()) + + dir := t.TempDir() + paramFile := filepath.Join(dir, "main.bicepparam") + require.NoError(t, os.WriteFile( + paramFile, []byte("using 'main.bicep'"), 0600, + )) + + mockBicepVersion(mockCtx) + // Mock snapshot to return an error. + mockCtx.CommandRunner.When(func( + args exec.RunArgs, command string, + ) bool { + return strings.Contains(args.Cmd, "bicep") && + len(args.Args) > 0 && + args.Args[0] == "snapshot" + }).RespondFn(func( + args exec.RunArgs, + ) (exec.RunResult, error) { + return exec.RunResult{ExitCode: 1}, + errors.New("bicep snapshot not supported") + }) + + p := newTestBicepProvider( + mockCtx, bicepparamMode, paramFile, + &compileBicepResult{}, + envValues, + ) + result := p.getSnapshotPredictedRGs(t.Context()) + assert.Nil(t, result) + }) + + t.Run("JSON parse error returns nil", func(t *testing.T) { + t.Parallel() + mockCtx := mocks.NewMockContext(t.Context()) + + dir := t.TempDir() + paramFile := filepath.Join(dir, "main.bicepparam") + require.NoError(t, os.WriteFile( + paramFile, []byte("using 'main.bicep'"), 0600, + )) + + mockBicepVersion(mockCtx) + // Return invalid JSON from the snapshot command. + mockSnapshotCommand(mockCtx, []byte("not-json{{{")) + + p := newTestBicepProvider( + mockCtx, bicepparamMode, paramFile, + &compileBicepResult{}, + envValues, + ) + result := p.getSnapshotPredictedRGs(t.Context()) + assert.Nil(t, result) + }) + + t.Run("zero RGs in predicted resources returns nil", + func(t *testing.T) { + t.Parallel() + mockCtx := mocks.NewMockContext(t.Context()) + + dir := t.TempDir() + paramFile := filepath.Join(dir, "main.bicepparam") + require.NoError(t, os.WriteFile( + paramFile, []byte("using 'main.bicep'"), 0600, + )) + + mockBicepVersion(mockCtx) + // Return a valid snapshot with only non-RG resources. + noRGSnapshot, _ := json.Marshal(map[string]any{ + "predictedResources": []map[string]string{ + { + "type": "Microsoft.Storage/storageAccounts", + "name": "mystorageacct", + }, + }, + }) + mockSnapshotCommand(mockCtx, noRGSnapshot) + + p := newTestBicepProvider( + mockCtx, bicepparamMode, paramFile, + &compileBicepResult{}, + envValues, + ) + result := p.getSnapshotPredictedRGs(t.Context()) + assert.Nil(t, result) + }) + + t.Run("RG names are lowercased in result", func(t *testing.T) { + t.Parallel() + mockCtx := mocks.NewMockContext(t.Context()) + + dir := t.TempDir() + paramFile := filepath.Join(dir, "main.bicepparam") + require.NoError(t, os.WriteFile( + paramFile, []byte("using 'main.bicep'"), 0600, + )) + + mockBicepVersion(mockCtx) + mockSnapshotCommand( + mockCtx, + snapshotJSON("RG-MyApp", "RG-DATA"), + ) + + p := newTestBicepProvider( + mockCtx, bicepparamMode, paramFile, + &compileBicepResult{}, + envValues, + ) + result := p.getSnapshotPredictedRGs(t.Context()) + + require.NotNil(t, result) + assert.True(t, result["rg-myapp"]) + assert.True(t, result["rg-data"]) + assert.False(t, result["RG-MyApp"], + "keys should be lowercased") + }) + + t.Run("env resource group passed to snapshot options", + func(t *testing.T) { + t.Parallel() + mockCtx := mocks.NewMockContext(t.Context()) + + dir := t.TempDir() + paramFile := filepath.Join(dir, "main.bicepparam") + require.NoError(t, os.WriteFile( + paramFile, []byte("using 'main.bicep'"), 0600, + )) + + mockBicepVersion(mockCtx) + + // Capture snapshot args to verify options. + var capturedArgs []string + mockCtx.CommandRunner.When(func( + args exec.RunArgs, command string, + ) bool { + return strings.Contains(args.Cmd, "bicep") && + len(args.Args) > 0 && + args.Args[0] == "snapshot" + }).RespondFn(func( + args exec.RunArgs, + ) (exec.RunResult, error) { + capturedArgs = args.Args + inputFile := args.Args[1] + sf := strings.TrimSuffix( + inputFile, filepath.Ext(inputFile), + ) + ".snapshot.json" + data := snapshotJSON("rg-test") + _ = os.WriteFile(sf, data, 0600) + return exec.NewRunResult(0, "", ""), nil + }) + + vals := map[string]string{ + environment.SubscriptionIdEnvVarName: "sub-123", + environment.LocationEnvVarName: "westus2", + environment.ResourceGroupEnvVarName: "my-rg", + } + p := newTestBicepProvider( + mockCtx, bicepparamMode, paramFile, + &compileBicepResult{}, + vals, + ) + result := p.getSnapshotPredictedRGs(t.Context()) + + require.NotNil(t, result) + // Verify --resource-group was passed. + assert.Contains(t, capturedArgs, "--resource-group") + assert.Contains(t, capturedArgs, "my-rg") + // Verify --subscription-id was passed. + assert.Contains(t, capturedArgs, "--subscription-id") + assert.Contains(t, capturedArgs, "sub-123") + // Verify --location was passed. + assert.Contains(t, capturedArgs, "--location") + assert.Contains(t, capturedArgs, "westus2") + }) +} + +// prepareForceModeDestroyMocks registers all HTTP mocks needed for +// force-mode destroy tests: deployment GET/list, per-RG resources, +// RG deletion tracking, locks, LRO polling, and void state PUT. +// Returns a map of per-RG delete counters. +func prepareForceModeDestroyMocks( + t *testing.T, + mockContext *mocks.MockContext, + rgNames []string, +) map[string]*atomic.Int32 { + t.Helper() + + // Register SubscriptionCredentialProvider + ARM client options + // so Tier 4 helpers can resolve credentials. + mockContext.Container.MustRegisterSingleton( + func() account.SubscriptionCredentialProvider { + return mockaccount.SubscriptionCredentialProviderFunc( + func( + _ context.Context, _ string, + ) (azcore.TokenCredential, error) { + return mockContext.Credentials, nil + }, + ) + }, + ) + mockContext.Container.MustRegisterSingleton( + func() *arm.ClientOptions { + return mockContext.ArmClientOptions + }, + ) + + // Build a deployment referencing all RGs. + outputResources := make( + []*armresources.ResourceReference, len(rgNames), + ) + for i, rg := range rgNames { + outputResources[i] = &armresources.ResourceReference{ + ID: new(fmt.Sprintf( + "/subscriptions/SUBSCRIPTION_ID/"+ + "resourceGroups/%s", rg, + )), + } + } + + deployment := armresources.DeploymentExtended{ + ID: new("DEPLOYMENT_ID"), + Name: new("test-env"), + Location: new("eastus2"), + Tags: map[string]*string{ + "azd-env-name": new("test-env"), + }, + Type: new("Microsoft.Resources/deployments"), + Properties: &armresources.DeploymentPropertiesExtended{ + Outputs: map[string]any{ + "WEBSITE_URL": map[string]any{ + "value": "http://myapp.azurewebsites.net", + "type": "string", + }, + }, + OutputResources: outputResources, + ProvisioningState: new( + armresources.ProvisioningStateSucceeded, + ), + Timestamp: new(time.Now()), + }, + } + deployBytes, _ := json.Marshal(deployment) + + // GET single deployment + mockContext.HttpClient.When(func(r *http.Request) bool { + return r.Method == http.MethodGet && strings.HasSuffix( + r.URL.Path, + "/subscriptions/SUBSCRIPTION_ID/providers/"+ + "Microsoft.Resources/deployments/test-env", + ) + }).RespondFn(func(r *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser( + bytes.NewBuffer(deployBytes), + ), + }, nil + }) + + // GET list deployments + page := &armresources.DeploymentListResult{ + Value: []*armresources.DeploymentExtended{&deployment}, + } + pageBytes, _ := json.Marshal(page) + mockContext.HttpClient.When(func(r *http.Request) bool { + return r.Method == http.MethodGet && strings.HasSuffix( + r.URL.Path, + "/SUBSCRIPTION_ID/providers/"+ + "Microsoft.Resources/deployments/", + ) + }).RespondFn(func(r *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser( + bytes.NewBuffer(pageBytes), + ), + }, nil + }) + + // Per-RG resource listing (empty resources). + for _, rgName := range rgNames { + resList := armresources.ResourceListResult{ + Value: []*armresources.GenericResourceExpanded{}, + } + mockContext.HttpClient.When(func(r *http.Request) bool { + return r.Method == http.MethodGet && + strings.Contains( + r.URL.Path, + fmt.Sprintf( + "resourceGroups/%s/resources", + rgName, + ), + ) + }).RespondFn( + func(r *http.Request) (*http.Response, error) { + return mocks.CreateHttpResponseWithBody( + r, http.StatusOK, resList, + ) + }) + } + + // RG deletion mocks (tracked). + deleteCounters := map[string]*atomic.Int32{} + for _, rgName := range rgNames { + deleteCounters[rgName] = &atomic.Int32{} + counter := deleteCounters[rgName] + mockContext.HttpClient.When(func(r *http.Request) bool { + return r.Method == http.MethodDelete && + strings.HasSuffix( + r.URL.Path, + fmt.Sprintf( + "subscriptions/SUBSCRIPTION_ID/"+ + "resourcegroups/%s", rgName, + ), + ) + }).RespondFn( + func(r *http.Request) (*http.Response, error) { + counter.Add(1) + return httpRespondFn(r) + }) + } + + // Lock listing (empty). + for _, rgName := range rgNames { + mockContext.HttpClient.When(func(r *http.Request) bool { + return r.Method == http.MethodGet && + strings.Contains( + r.URL.Path, + fmt.Sprintf( + "resourceGroups/%s/providers/"+ + "Microsoft.Authorization/locks", + rgName, + ), + ) + }).RespondFn( + func(r *http.Request) (*http.Response, error) { + return mocks.CreateHttpResponseWithBody( + r, http.StatusOK, + azure.ArmTemplate{}, + ) + }) + } + + // LRO polling endpoint. + mockContext.HttpClient.When(func(r *http.Request) bool { + return r.Method == http.MethodGet && + strings.Contains( + r.URL.String(), "url-to-poll.net", + ) + }).RespondFn(func(r *http.Request) (*http.Response, error) { + return mocks.CreateEmptyHttpResponse(r, 204) + }) + + // Void state PUT. + mockContext.HttpClient.When(func(r *http.Request) bool { + return r.Method == http.MethodPut && + strings.Contains( + r.URL.Path, + "/subscriptions/SUBSCRIPTION_ID/providers/"+ + "Microsoft.Resources/deployments/", + ) + }).RespondFn(func(r *http.Request) (*http.Response, error) { + result := &armresources.DeploymentsClientCreateOrUpdateAtSubscriptionScopeResponse{ + DeploymentExtended: armresources.DeploymentExtended{ + ID: new("DEPLOYMENT_ID"), + Name: new("test-env"), + Location: new("eastus2"), + Tags: map[string]*string{ + "azd-env-name": new("test-env"), + }, + Type: new( + "Microsoft.Resources/deployments", + ), + Properties: &armresources.DeploymentPropertiesExtended{ + ProvisioningState: new( + armresources.ProvisioningStateSucceeded, + ), + Timestamp: new(time.Now()), + }, + }, + } + return mocks.CreateHttpResponseWithBody( + r, http.StatusOK, result, + ) + }) + + return deleteCounters +} + +// TestForceWithNoSnapshot verifies that when --force is set and +// snapshot is unavailable (nil), all resource groups are treated as +// owned (backward compatibility). This is the integration path in +// BicepProvider.classifyResourceGroups. +func TestForceWithNoSnapshot(t *testing.T) { + mockContext := mocks.NewMockContext(t.Context()) + prepareBicepMocks(mockContext) + + rgNames := []string{"rg-one", "rg-two"} + deleteCounters := prepareForceModeDestroyMocks( + t, mockContext, rgNames, + ) + + infraProvider := createBicepProvider(t, mockContext) + destroyOptions := provisioning.NewDestroyOptions(true, false) + result, err := infraProvider.Destroy( + *mockContext.Context, destroyOptions, + ) + + require.NoError(t, err) + require.NotNil(t, result) + + // Both RGs deleted — force + no snapshot = all owned. + assert.Equal(t, int32(1), deleteCounters["rg-one"].Load(), + "rg-one should be deleted (force+no snapshot → all owned)") + assert.Equal(t, int32(1), deleteCounters["rg-two"].Load(), + "rg-two should be deleted (force+no snapshot → all owned)") +} diff --git a/cli/azd/pkg/infra/provisioning/bicep/bicep_provider.go b/cli/azd/pkg/infra/provisioning/bicep/bicep_provider.go index d49d347b050..784913f0273 100644 --- a/cli/azd/pkg/infra/provisioning/bicep/bicep_provider.go +++ b/cli/azd/pkg/infra/provisioning/bicep/bicep_provider.go @@ -32,7 +32,6 @@ import ( "github.com/azure/azure-dev/cli/azd/internal/tracing/fields" "github.com/azure/azure-dev/cli/azd/pkg/account" "github.com/azure/azure-dev/cli/azd/pkg/ai" - "github.com/azure/azure-dev/cli/azd/pkg/async" "github.com/azure/azure-dev/cli/azd/pkg/azapi" "github.com/azure/azure-dev/cli/azd/pkg/azure" "github.com/azure/azure-dev/cli/azd/pkg/azureutil" @@ -95,6 +94,10 @@ type BicepProvider struct { // Internal state // compileBicepResult is cached to avoid recompiling the same bicep file multiple times in the same azd run. compileBicepMemoryCache *compileBicepResult + + // snapshotPredictedRGsOverride, when non-nil, bypasses the bicep CLI + // snapshot pipeline in getSnapshotPredictedRGs. Used by tests. + snapshotPredictedRGsOverride map[string]bool } // Name gets the name of the infra provider @@ -924,6 +927,89 @@ type itemToPurge struct { cognitiveAccounts []cognitiveAccount } +// collectPurgeItems gathers soft-deleted resources from the given resource groups and +// returns them as a list of itemToPurge entries ready for purgeItems. Used by both the +// deployment-stacks path (all RGs) and the classification path (owned RGs only). +func (p *BicepProvider) collectPurgeItems( + ctx context.Context, + resources map[string][]*azapi.Resource, +) ([]itemToPurge, error) { + keyVaults, err := p.getKeyVaultsToPurge(ctx, resources) + if err != nil { + return nil, fmt.Errorf("getting key vaults to purge: %w", err) + } + + managedHSMs, err := p.getManagedHSMsToPurge(ctx, resources) + if err != nil { + return nil, fmt.Errorf("getting managed hsms to purge: %w", err) + } + + appConfigs, err := p.getAppConfigsToPurge(ctx, resources) + if err != nil { + return nil, fmt.Errorf("getting app configurations to purge: %w", err) + } + + apiManagements, err := p.getApiManagementsToPurge(ctx, resources) + if err != nil { + return nil, fmt.Errorf("getting API managements to purge: %w", err) + } + + cognitiveAccounts, err := p.getCognitiveAccountsToPurge(ctx, resources) + if err != nil { + return nil, fmt.Errorf("getting cognitive accounts to purge: %w", err) + } + + var items []itemToPurge + for _, item := range []itemToPurge{ + { + resourceType: "Key Vault", + count: len(keyVaults), + purge: func(skipPurge bool, self *itemToPurge) error { + return p.purgeKeyVaults(ctx, keyVaults, skipPurge) + }, + }, + { + resourceType: "Managed HSM", + count: len(managedHSMs), + purge: func(skipPurge bool, self *itemToPurge) error { + return p.purgeManagedHSMs(ctx, managedHSMs, skipPurge) + }, + }, + { + resourceType: "App Configuration", + count: len(appConfigs), + purge: func(skipPurge bool, self *itemToPurge) error { + return p.purgeAppConfigs(ctx, appConfigs, skipPurge) + }, + }, + { + resourceType: "API Management", + count: len(apiManagements), + purge: func(skipPurge bool, self *itemToPurge) error { + return p.purgeAPIManagement(ctx, apiManagements, skipPurge) + }, + }, + } { + if item.count > 0 { + items = append(items, item) + } + } + + groupByKind := cognitiveAccountsByKind(cognitiveAccounts) + for name, cogAccounts := range groupByKind { + items = append(items, itemToPurge{ + resourceType: name, + count: len(cogAccounts), + cognitiveAccounts: cogAccounts, + purge: func(skipPurge bool, self *itemToPurge) error { + return p.purgeCognitiveAccounts(ctx, self.cognitiveAccounts, skipPurge) + }, + }) + } + + return items, nil +} + func (p *BicepProvider) scopeForTemplate(t azure.ArmTemplate) (infra.Scope, error) { deploymentScope, err := t.TargetScope() if err != nil { @@ -950,7 +1036,10 @@ func (p *BicepProvider) inferScopeFromEnv() (infra.Scope, error) { } } -// Destroys the specified deployment by deleting all azure resources, resource groups & deployments that are referenced. +// Destroy tears down the deployment by classifying each resource group and +// deleting only those azd created. External and unknown RGs are preserved. +// When deployment stacks are active, deletion is delegated to deployment.Delete(). +// Void deployment state is applied only after all intended deletions succeed. func (p *BicepProvider) Destroy( ctx context.Context, options provisioning.DestroyOptions, @@ -1005,120 +1094,113 @@ func (p *BicepProvider) Destroy( return nil, fmt.Errorf("mapping resources to resource groups: %w", err) } - // If no resources found, we still need to void the deployment state. - // This can happen when resources have been manually deleted before running azd down. - // Voiding the state ensures that subsequent azd provision commands work correctly - // by creating a new empty deployment that becomes the last successful deployment. - if len(groupedResources) == 0 { + // Deployment stacks must be checked FIRST, even when groupedResources is empty. + // A stack can have zero ARM-visible resources after manual cleanup, but the stack + // itself still needs to be deleted via deployment.Delete() to remove deny assignments. + if p.isDeploymentStacksEnabled() { p.console.StopSpinner(ctx, "", input.StepDone) - // Call deployment.Delete to void the state even though there are no resources to delete - if err := p.destroyDeployment(ctx, deploymentToDelete); err != nil { - return nil, fmt.Errorf("voiding deployment state: %w", err) - } - } else { - keyVaults, err := p.getKeyVaultsToPurge(ctx, groupedResources) - if err != nil { - return nil, fmt.Errorf("getting key vaults to purge: %w", err) - } - - managedHSMs, err := p.getManagedHSMsToPurge(ctx, groupedResources) - if err != nil { - return nil, fmt.Errorf("getting managed hsms to purge: %w", err) - } - - appConfigs, err := p.getAppConfigsToPurge(ctx, groupedResources) - if err != nil { - return nil, fmt.Errorf("getting app configurations to purge: %w", err) - } - apiManagements, err := p.getApiManagementsToPurge(ctx, groupedResources) + // Collect purge targets BEFORE stack deletion while RGs still exist. + // getKeyVaults, getManagedHSMs, etc. query live resources via ARM. + purgeItem, err := p.collectPurgeItems(ctx, groupedResources) if err != nil { - return nil, fmt.Errorf("getting API managements to purge: %w", err) + return nil, fmt.Errorf("collecting purge targets: %w", err) } - cognitiveAccounts, err := p.getCognitiveAccountsToPurge(ctx, groupedResources) - if err != nil { - return nil, fmt.Errorf("getting cognitive accounts to purge: %w", err) + if err := p.destroyViaDeploymentDelete(ctx, deploymentToDelete, groupedResources, options); err != nil { + return nil, fmt.Errorf("error deleting Azure resources: %w", err) } - logAnalyticsWorkspaces, err := p.getLogAnalyticsWorkspacesToPurge(ctx, groupedResources) - if err != nil { - return nil, fmt.Errorf("getting log analytics workspaces to purge: %w", err) + if err := p.purgeItems(ctx, purgeItem, options); err != nil { + return nil, fmt.Errorf("purging resources: %w", err) } - + } else if len(groupedResources) == 0 { + // No resources found — void the deployment state directly. + // This can happen when resources have been manually deleted before running azd down. + // Voiding the state ensures that subsequent azd provision commands work correctly + // by creating a new empty deployment that becomes the last successful deployment. p.console.StopSpinner(ctx, "", input.StepDone) - - // Prompt for confirmation before deleting resources - if err := p.promptDeletion(ctx, options, groupedResources, len(resourcesToDelete)); err != nil { - return nil, err + if err := p.voidDeploymentState(ctx, deploymentToDelete); err != nil { + return nil, fmt.Errorf("voiding deployment state: %w", err) } + } else { + p.console.StopSpinner(ctx, "", input.StepDone) - p.console.Message(ctx, output.WithGrayFormat("Deleting your resources can take some time.\n")) + // Step 1: Classify resource groups (no deletion yet). + owned, skipped, classifyErr := p.classifyResourceGroups( + ctx, deploymentToDelete, groupedResources, options, + ) - // Force delete Log Analytics Workspaces first if purge is enabled - // This must happen before deleting resource groups since force delete requires the workspace to exist - if options.Purge() && len(logAnalyticsWorkspaces) > 0 { - if err := p.forceDeleteLogAnalyticsWorkspaces(ctx, logAnalyticsWorkspaces); err != nil { - return nil, fmt.Errorf("force deleting log analytics workspaces: %w", err) + // If user cancelled the confirmation prompt, show skipped RGs and return without + // voiding deployment state or invalidating env keys. + if errors.Is(classifyErr, errUserCancelled) { + for _, skip := range skipped { + p.console.Message(ctx, fmt.Sprintf(" Skipped: %s (%s)", skip.Name, skip.Reason)) } + return nil, errUserCancelled } - - if err := p.destroyDeployment(ctx, deploymentToDelete); err != nil { - return nil, fmt.Errorf("deleting resource groups: %w", err) + if classifyErr != nil { + return nil, fmt.Errorf("classifying resource groups: %w", classifyErr) } - keyVaultsPurge := itemToPurge{ - resourceType: "Key Vault", - count: len(keyVaults), - purge: func(skipPurge bool, self *itemToPurge) error { - return p.purgeKeyVaults(ctx, keyVaults, skipPurge) - }, - } - managedHSMsPurge := itemToPurge{ - resourceType: "Managed HSM", - count: len(managedHSMs), - purge: func(skipPurge bool, self *itemToPurge) error { - return p.purgeManagedHSMs(ctx, managedHSMs, skipPurge) - }, - } - appConfigsPurge := itemToPurge{ - resourceType: "App Configuration", - count: len(appConfigs), - purge: func(skipPurge bool, self *itemToPurge) error { - return p.purgeAppConfigs(ctx, appConfigs, skipPurge) - }, + // Step 2: Collect purge targets from owned RGs while they still exist. + // Must happen BEFORE deletion because getKeyVaults, getManagedHSMs, etc. + // query live resources via ARM which requires the RG to exist. + ownedGroupedResources := make(map[string][]*azapi.Resource, len(owned)) + for _, rgName := range owned { + if resources, ok := groupedResources[rgName]; ok { + ownedGroupedResources[rgName] = resources + } } - aPIManagement := itemToPurge{ - resourceType: "API Management", - count: len(apiManagements), - purge: func(skipPurge bool, self *itemToPurge) error { - return p.purgeAPIManagement(ctx, apiManagements, skipPurge) - }, + purgeItem, err := p.collectPurgeItems(ctx, ownedGroupedResources) + if err != nil { + return nil, fmt.Errorf("collecting purge targets: %w", err) } - var purgeItem []itemToPurge - for _, item := range []itemToPurge{keyVaultsPurge, managedHSMsPurge, appConfigsPurge, aPIManagement} { - if item.count > 0 { - purgeItem = append(purgeItem, item) + // Step 3: Delete owned RGs. + // Log Analytics Workspaces are force-deleted inside deleteRGList + // (before each owned RG deletion) when purge is enabled. + _, deleteErr := p.deleteRGList( + ctx, deploymentToDelete.SubscriptionId(), owned, groupedResources, options, + ) + + // Void deployment state after successful deletion. + // This ensures subsequent azd provision works correctly even if all RGs were skipped. + if deleteErr == nil { + if err := p.voidDeploymentState(ctx, deploymentToDelete); err != nil { + return nil, fmt.Errorf("voiding deployment state: %w", err) } } - // cognitive services are grouped by resource group because the name of the resource group is required to purge - groupByKind := cognitiveAccountsByKind(cognitiveAccounts) - for name, cogAccounts := range groupByKind { - addPurgeItem := itemToPurge{ - resourceType: name, - count: len(cogAccounts), - purge: func(skipPurge bool, self *itemToPurge) error { - return p.purgeCognitiveAccounts(ctx, self.cognitiveAccounts, skipPurge) - }, - cognitiveAccounts: groupByKind[name], - } - purgeItem = append(purgeItem, addPurgeItem) + // Show skipped resource groups. + for _, skip := range skipped { + p.console.Message(ctx, fmt.Sprintf(" Skipped: %s (%s)", skip.Name, skip.Reason)) } - if err := p.purgeItems(ctx, purgeItem, options); err != nil { - return nil, fmt.Errorf("purging resources: %w", err) + // Step 4: Purge soft-deleted resources. + // Always attempt purge even after partial deletion failure — some RGs + // may have been deleted successfully, and their soft-deleted resources + // (Key Vaults, Managed HSMs, etc.) need purging to avoid name collisions + // on reprovisioning. On retry, deleted RGs will be classified as + // "already deleted" (Tier 2: 404) and their purge targets would be lost. + // + // Known limitation: purge items are collected from ALL owned RGs before + // deletion. On partial failure, purge attempts may fail for resources in + // non-deleted RGs (still live, not soft-deleted). Since purge functions + // abort on first error, iteration order may prevent some deleted-RG + // resources from being purged. This is strictly better than the previous + // behavior (no purge at all on partial failure). A future improvement + // could collect purge items per-RG and filter by the deleted set. + purgeErr := p.purgeItems(ctx, purgeItem, options) + + // Report deletion errors first — they're the primary failure. + // Purge errors after partial deletion are expected (resources in + // non-deleted RGs are still live and cannot be purged yet). + if deleteErr != nil { + return nil, fmt.Errorf("deleting resource groups: %w", deleteErr) + } + if purgeErr != nil { + return nil, fmt.Errorf("purging resources: %w", purgeErr) } } @@ -1181,119 +1263,6 @@ func getDeploymentOptions(deployments []*azapi.ResourceDeployment) []string { return promptValues } -func (p *BicepProvider) generateResourcesToDelete( - ctx context.Context, - groupedResources map[string][]*azapi.Resource, -) []string { - lines := []string{"Resource(s) to be deleted:"} - - for resourceGroupName, resources := range groupedResources { - lines = append(lines, "") - - // Resource Group - resourceGroupLink := fmt.Sprintf("%s/#@/resource/subscriptions/%s/resourceGroups/%s/overview", - p.portalUrlBase, - p.env.GetSubscriptionId(), - resourceGroupName, - ) - - lines = append(lines, - fmt.Sprintf("%s %s", - output.WithHighLightFormat("Resource Group:"), - output.WithHyperlink(resourceGroupLink, resourceGroupName), - ), - ) - - // Resources in each group - for _, resource := range resources { - resourceTypeName, err := p.resourceManager.GetResourceTypeDisplayName( - ctx, - p.env.GetSubscriptionId(), - resource.Id, - azapi.AzureResourceType(resource.Type), - ) - if err != nil { - // Fall back to static lookup if dynamic lookup fails - resourceTypeName = azapi.GetResourceTypeDisplayName(azapi.AzureResourceType(resource.Type)) - } - if resourceTypeName == "" { - continue - } - - lines = append(lines, fmt.Sprintf(" • %s: %s", resourceTypeName, resource.Name)) - } - } - - return append(lines, "\n") -} - -// promptDeletion prompts the user for confirmation before deleting resources. -// Returns nil if the user confirms, or an error if they deny or an error occurs. -func (p *BicepProvider) promptDeletion( - ctx context.Context, - options provisioning.DestroyOptions, - groupedResources map[string][]*azapi.Resource, - resourceCount int, -) error { - if options.Force() { - return nil - } - - p.console.MessageUxItem(ctx, &ux.MultilineMessage{ - Lines: p.generateResourcesToDelete(ctx, groupedResources)}, - ) - confirmDestroy, err := p.console.Confirm(ctx, input.ConsoleOptions{ - Message: fmt.Sprintf( - "Total resources to %s: %d, are you sure you want to continue?", - output.WithErrorFormat("delete"), - resourceCount, - ), - DefaultValue: false, - }) - - if err != nil { - return fmt.Errorf("prompting for delete confirmation: %w", err) - } - - if !confirmDestroy { - return errors.New("user denied delete confirmation") - } - - return nil -} - -// destroyDeployment deletes the azure resources within the deployment and voids the deployment state. -func (p *BicepProvider) destroyDeployment( - ctx context.Context, - deployment infra.Deployment, -) error { - err := async.RunWithProgressE(func(progressMessage azapi.DeleteDeploymentProgress) { - switch progressMessage.State { - case azapi.DeleteResourceStateInProgress: - p.console.ShowSpinner(ctx, progressMessage.Message, input.Step) - case azapi.DeleteResourceStateSucceeded: - p.console.StopSpinner(ctx, progressMessage.Message, input.StepDone) - case azapi.DeleteResourceStateFailed: - p.console.StopSpinner(ctx, progressMessage.Message, input.StepFailed) - } - }, func(progress *async.Progress[azapi.DeleteDeploymentProgress]) error { - optionsMap, err := convert.ToMap(p.options) - if err != nil { - return err - } - - return deployment.Delete(ctx, optionsMap, progress) - }) - - if err != nil { - return err - } - - p.console.Message(ctx, "") - - return nil -} - func itemsCountAsText(items []itemToPurge) string { count := len(items) if count < 1 { diff --git a/cli/azd/pkg/infra/provisioning/bicep/bicep_provider_test.go b/cli/azd/pkg/infra/provisioning/bicep/bicep_provider_test.go index 97f8a7d634b..4669ca88f2f 100644 --- a/cli/azd/pkg/infra/provisioning/bicep/bicep_provider_test.go +++ b/cli/azd/pkg/infra/provisioning/bicep/bicep_provider_test.go @@ -15,14 +15,16 @@ import ( "os" "path/filepath" "strings" + "sync/atomic" "testing" "time" "github.com/Azure/azure-sdk-for-go/sdk/azcore" - "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/apimanagement/armapimanagement" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/appconfiguration/armappconfiguration" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/keyvault/armkeyvault" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armlocks" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources" "github.com/azure/azure-dev/cli/azd/internal/tracing" "github.com/azure/azure-dev/cli/azd/pkg/account" @@ -181,18 +183,49 @@ func TestBicepDestroy(t *testing.T) { prepareStateMocks(mockContext) prepareDestroyMocks(mockContext) - // Setup console mocks - mockContext.Console.WhenConfirm(func(options input.ConsoleOptions) bool { - return strings.Contains(options.Message, "are you sure you want to continue") - }).Respond(true) + // Register credential provider so Tier 4 lock/resource checks work. + mockContext.Container.MustRegisterSingleton( + func() account.SubscriptionCredentialProvider { + return mockaccount.SubscriptionCredentialProviderFunc( + func(_ context.Context, _ string) (azcore.TokenCredential, error) { + return mockContext.Credentials, nil + }, + ) + }, + ) + + // Register ARM client options so Tier 4 helpers use mock HTTP transport. + mockContext.Container.MustRegisterSingleton( + func() *arm.ClientOptions { + return mockContext.ArmClientOptions + }, + ) + + // Tier 4 lock check: no locks on the RG. + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains(request.URL.Path, "providers/Microsoft.Authorization/locks") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + emptyLocks := armlocks.ManagementLockListResult{ + Value: []*armlocks.ManagementLockObject{}, + } + return mocks.CreateHttpResponseWithBody( + request, http.StatusOK, emptyLocks, + ) + }) + // Snapshot unavailable → prompts user for each unknown RG. mockContext.Console.WhenConfirm(func(options input.ConsoleOptions) bool { return strings.Contains( - options.Message, - "Would you like to permanently delete these resources instead", + options.Message, "Delete resource group 'RESOURCE_GROUP'?", ) }).Respond(true) + // After classification, an overall confirmation prompt fires for all owned RGs. + mockContext.Console.WhenConfirm(func(options input.ConsoleOptions) bool { + return strings.Contains(options.Message, "Delete 1 resource group(s)") + }).Respond(true) + infraProvider := createBicepProvider(t, mockContext) destroyOptions := provisioning.NewDestroyOptions(false, false) @@ -201,9 +234,11 @@ func TestBicepDestroy(t *testing.T) { require.Nil(t, err) require.NotNil(t, destroyResult) - // Verify console prompts + // Verify both prompts fired: snapshot-unavailable per-RG + overall confirmation. consoleOutput := mockContext.Console.Output() - require.Len(t, consoleOutput, 4) + require.Len(t, consoleOutput, 2) + require.Contains(t, consoleOutput[0], "Delete resource group 'RESOURCE_GROUP'?") + require.Contains(t, consoleOutput[1], "Delete 1 resource group(s)") }) t.Run("InteractiveForceAndPurge", func(t *testing.T) { @@ -220,11 +255,9 @@ func TestBicepDestroy(t *testing.T) { require.Nil(t, err) require.NotNil(t, destroyResult) - // Verify console prompts + // Verify console prompts — force+purge bypasses classification prompt and purge prompt. consoleOutput := mockContext.Console.Output() - require.Len(t, consoleOutput, 2) - require.Contains(t, consoleOutput[0], "Deleting your resources can take some time") - require.Contains(t, consoleOutput[1], "") + require.Len(t, consoleOutput, 0) }) } @@ -244,9 +277,272 @@ func TestBicepDestroyLogAnalyticsWorkspace(t *testing.T) { require.NotNil(t, destroyResult) consoleOutput := mockContext.Console.Output() - require.Len(t, consoleOutput, 2) - require.Contains(t, consoleOutput[0], "Deleting your resources can take some time") - require.Contains(t, consoleOutput[1], "") + require.Len(t, consoleOutput, 0) + }) +} + +// TestBicepDestroyClassifyAndDelete tests the classifyResourceGroups + deleteRGList orchestration, +// including force-bypass, snapshot classification, void-state lifecycle, and purge scoping. +func TestBicepDestroyClassifyAndDelete(t *testing.T) { + t.Run("ForceProtectsExternalRGs", func(t *testing.T) { + // When --force is set with a snapshot, snapshot still protects external RGs. + // Owned RGs are deleted, external RGs (not in snapshot) are skipped. + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + tracker, snapshot := prepareClassifyDestroyMocks(mockContext, classifyMockCfg{ + rgNames: []string{"rg-created", "rg-existing"}, + ownedRGs: []string{"rg-created"}, + }) + + infraProvider := createBicepProvider(t, mockContext) + infraProvider.snapshotPredictedRGsOverride = snapshot + + destroyOptions := provisioning.NewDestroyOptions(true, false) // force=true, purge=false + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + require.NoError(t, err) + require.NotNil(t, result) + + // Created RG is deleted (snapshot owned). + assert.Equal(t, int32(1), tracker.rgDeletes["rg-created"].Load(), + "rg-created should be deleted when force=true (snapshot owned)") + // External RG is protected even with --force (not in snapshot). + assert.Equal(t, int32(0), tracker.rgDeletes["rg-existing"].Load(), + "rg-existing should be SKIPPED when force=true (snapshot external)") + }) + + t.Run("ClassificationFiltersDeletion", func(t *testing.T) { + // Snapshot classification: owned RG deleted, external RG skipped. + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + tracker, snapshot := prepareClassifyDestroyMocks(mockContext, classifyMockCfg{ + rgNames: []string{"rg-created", "rg-existing"}, + ownedRGs: []string{"rg-created"}, + }) + + // Overall confirmation prompt fires for owned RGs. + mockContext.Console.WhenConfirm(func(options input.ConsoleOptions) bool { + return strings.Contains(options.Message, "Delete 1 resource group(s)") + }).Respond(true) + + infraProvider := createBicepProvider(t, mockContext) + infraProvider.snapshotPredictedRGsOverride = snapshot + + destroyOptions := provisioning.NewDestroyOptions(false, false) + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + require.NoError(t, err) + require.NotNil(t, result) + + // Only the owned RG should be deleted. + assert.Equal(t, int32(1), tracker.rgDeletes["rg-created"].Load(), + "rg-created (snapshot owned) should be deleted") + // External RG should be skipped. + assert.Equal(t, int32(0), tracker.rgDeletes["rg-existing"].Load(), + "rg-existing (snapshot external) should be skipped") + }) + + t.Run("VoidStateCalledOnSuccess", func(t *testing.T) { + // After successful classification + deletion, voidDeploymentState must be called. + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + tracker, snapshot := prepareClassifyDestroyMocks(mockContext, classifyMockCfg{ + rgNames: []string{"rg-created"}, + ownedRGs: []string{"rg-created"}, + }) + + // Overall confirmation prompt fires for owned RGs. + mockContext.Console.WhenConfirm(func(options input.ConsoleOptions) bool { + return strings.Contains(options.Message, "Delete 1 resource group(s)") + }).Respond(true) + + infraProvider := createBicepProvider(t, mockContext) + infraProvider.snapshotPredictedRGsOverride = snapshot + + destroyOptions := provisioning.NewDestroyOptions(false, false) + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + require.NoError(t, err) + require.NotNil(t, result) + + // Void state should be called exactly once after successful deletion. + assert.Equal(t, int32(1), tracker.voidStatePUTs.Load(), + "voidDeploymentState should be called after successful classification") + }) + + t.Run("VoidStateCalledWhenAllRGsSkipped", func(t *testing.T) { + // Even when all RGs are classified as external (all skipped), + // voidDeploymentState must still be called to maintain deployment state. + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + tracker, snapshot := prepareClassifyDestroyMocks(mockContext, classifyMockCfg{ + rgNames: []string{"rg-ext-1", "rg-ext-2"}, + ownedRGs: []string{}, // all external per snapshot + }) + + infraProvider := createBicepProvider(t, mockContext) + infraProvider.snapshotPredictedRGsOverride = snapshot + + destroyOptions := provisioning.NewDestroyOptions(false, false) + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + require.NoError(t, err) + require.NotNil(t, result) + + // Zero RGs deleted (all external). + assert.Equal(t, int32(0), tracker.rgDeletes["rg-ext-1"].Load()) + assert.Equal(t, int32(0), tracker.rgDeletes["rg-ext-2"].Load()) + + // Void state STILL called even though no RGs were deleted. + assert.Equal(t, int32(1), tracker.voidStatePUTs.Load(), + "voidDeploymentState should be called even when all RGs are skipped") + }) + + t.Run("PurgeTargetsScopedToOwnedRGs", func(t *testing.T) { + // Purge targets (KeyVaults, etc.) should only be collected from + // owned (deleted) RGs, not from skipped (external) RGs. + // kv-ext is intentionally NOT mocked — if the code incorrectly + // includes it in the purge set, the mock framework panics. + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + tracker, snapshot := prepareClassifyDestroyMocks(mockContext, classifyMockCfg{ + rgNames: []string{"rg-created", "rg-existing"}, + ownedRGs: []string{"rg-created"}, + withPurgeResources: true, // adds a KeyVault to each RG + }) + + // Overall confirmation prompt fires for owned RGs. + mockContext.Console.WhenConfirm(func(options input.ConsoleOptions) bool { + return strings.Contains(options.Message, "Delete 1 resource group(s)") + }).Respond(true) + + infraProvider := createBicepProvider(t, mockContext) + infraProvider.snapshotPredictedRGsOverride = snapshot + + destroyOptions := provisioning.NewDestroyOptions(false, true) // purge=true + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + require.NoError(t, err) + require.NotNil(t, result) + + // Only the owned RG's KeyVault should be inspected for purge properties. + assert.Equal(t, int32(1), tracker.kvGETs["kv-owned"].Load(), + "owned RG's KeyVault should be inspected for purge properties") + + // Owned RG's KeyVault should be purged (soft-delete enabled, purge protection off). + assert.Equal(t, int32(1), tracker.kvPurges["kv-owned"].Load(), + "owned RG's KeyVault should be purged") + }) + + t.Run("UserCancelPreservesDeploymentState", func(t *testing.T) { + // When user declines the "Delete N resource group(s)?" confirmation, + // voidDeploymentState must NOT be called and env keys must NOT be invalidated. + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + tracker, snapshot := prepareClassifyDestroyMocks(mockContext, classifyMockCfg{ + rgNames: []string{"rg-created"}, + ownedRGs: []string{"rg-created"}, + }) + + // User declines the overall confirmation prompt. + mockContext.Console.WhenConfirm(func(options input.ConsoleOptions) bool { + return strings.Contains(options.Message, "Delete 1 resource group(s)") + }).Respond(false) + + infraProvider := createBicepProvider(t, mockContext) + infraProvider.snapshotPredictedRGsOverride = snapshot + + destroyOptions := provisioning.NewDestroyOptions(false, false) + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + require.Error(t, err, "user cancellation should return an error") + require.ErrorIs(t, err, errUserCancelled) + require.Nil(t, result, "result should be nil on user cancellation") + + // No RGs should be deleted — user cancelled. + assert.Equal(t, int32(0), tracker.rgDeletes["rg-created"].Load(), + "rg-created should NOT be deleted when user cancels") + + // Void state should NOT be called — user cancelled. + assert.Equal(t, int32(0), tracker.voidStatePUTs.Load(), + "voidDeploymentState should NOT be called when user cancels confirmation") + }) + + t.Run("Tier4LockVetoPreventsDeletion", func(t *testing.T) { + // A RG with a CanNotDelete lock is vetoed by Tier 4, even though snapshot says owned. + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + tracker, snapshot := prepareClassifyDestroyMocks(mockContext, classifyMockCfg{ + rgNames: []string{"rg-unlocked", "rg-locked"}, + ownedRGs: []string{"rg-unlocked", "rg-locked"}, + rgLocks: map[string][]*armlocks.ManagementLockObject{ + "rg-locked": { + { + Name: new("no-delete"), + Properties: &armlocks.ManagementLockProperties{ + Level: new(armlocks.LockLevelCanNotDelete), + }, + }, + }, + }, + }) + + // Confirmation prompt for owned RGs (only rg-unlocked should reach confirmation). + mockContext.Console.WhenConfirm(func(options input.ConsoleOptions) bool { + return strings.Contains(options.Message, "Delete") + }).Respond(true) + + infraProvider := createBicepProvider(t, mockContext) + infraProvider.snapshotPredictedRGsOverride = snapshot + + destroyOptions := provisioning.NewDestroyOptions(false, false) + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + require.NoError(t, err) + require.NotNil(t, result) + + // Unlocked RG should be deleted. + assert.Equal(t, int32(1), tracker.rgDeletes["rg-unlocked"].Load(), + "rg-unlocked should be deleted (no lock)") + // Locked RG should NOT be deleted (Tier 4 veto). + assert.Equal(t, int32(0), tracker.rgDeletes["rg-locked"].Load(), + "rg-locked should NOT be deleted (Tier 4 CanNotDelete lock veto)") + }) + + t.Run("MixedOwnedExternalOnlyOwnedDeleted", func(t *testing.T) { + // End-to-end: 3 RGs — 1 owned (snapshot), 2 external (not in snapshot). + // Only the owned RG should be deleted. + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + mockContext.Console.SetNoPromptMode(true) // non-interactive + + tracker, snapshot := prepareClassifyDestroyMocks(mockContext, classifyMockCfg{ + rgNames: []string{"rg-mine", "rg-shared", "rg-mystery"}, + ownedRGs: []string{"rg-mine"}, + }) + + infraProvider := createBicepProvider(t, mockContext) + infraProvider.snapshotPredictedRGsOverride = snapshot + + destroyOptions := provisioning.NewDestroyOptions(false, false) + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + require.NoError(t, err) + require.NotNil(t, result) + + assert.Equal(t, int32(1), tracker.rgDeletes["rg-mine"].Load(), + "rg-mine (snapshot owned) should be deleted") + assert.Equal(t, int32(0), tracker.rgDeletes["rg-shared"].Load(), + "rg-shared (snapshot external) should be skipped") + assert.Equal(t, int32(0), tracker.rgDeletes["rg-mystery"].Load(), + "rg-mystery (snapshot external) should be skipped") }) } @@ -523,7 +819,7 @@ var testEnvDeployment armresources.DeploymentExtended = armresources.DeploymentE ID: new("/subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP"), }, }, - ProvisioningState: to.Ptr(armresources.ProvisioningStateSucceeded), + ProvisioningState: new(armresources.ProvisioningStateSucceeded), Timestamp: new(time.Now()), }, } @@ -575,6 +871,7 @@ func prepareDestroyMocks(mockContext *mocks.MockContext) { Name: new(resourceName), Type: new(string(resourceType)), Location: new("eastus2"), + Tags: map[string]*string{"azd-env-name": new("test-env")}, } } @@ -596,7 +893,7 @@ func prepareDestroyMocks(mockContext *mocks.MockContext) { ID: new(azure.ResourceGroupRID("SUBSCRIPTION_ID", "RESOURCE_GROUP")), Location: new("eastus2"), Name: new("RESOURCE_GROUP"), - Type: to.Ptr(string(azapi.AzureResourceTypeResourceGroup)), + Type: new(string(azapi.AzureResourceTypeResourceGroup)), Tags: map[string]*string{ "azd-env-name": new("test-env"), }, @@ -615,6 +912,14 @@ func prepareDestroyMocks(mockContext *mocks.MockContext) { return mocks.CreateHttpResponseWithBody(request, http.StatusOK, result) }) + // GET individual resource group by name (kept for HTTP mock coverage). + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.HasSuffix(request.URL.Path, "subscriptions/SUBSCRIPTION_ID/resourcegroups/RESOURCE_GROUP") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, *resourceGroup) + }) + // Get list of resources to delete mockContext.HttpClient.When(func(request *http.Request) bool { return request.Method == http.MethodGet && strings.Contains(request.URL.Path, "/resources") @@ -622,6 +927,17 @@ func prepareDestroyMocks(mockContext *mocks.MockContext) { return mocks.CreateHttpResponseWithBody(request, http.StatusOK, resourceList) }) + // Tier 4 lock check: no management locks on the RG. + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains(request.URL.Path, "providers/Microsoft.Authorization/locks") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + emptyLocks := armlocks.ManagementLockListResult{ + Value: []*armlocks.ManagementLockObject{}, + } + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, emptyLocks) + }) + // Get Key Vault getKeyVaultMock(mockContext, "/vaults/kv-123", "kv-123", "eastus2") getKeyVaultMock(mockContext, "/vaults/kv2-123", "kv2-123", "eastus2") @@ -711,7 +1027,7 @@ func prepareDestroyMocks(mockContext *mocks.MockContext) { }, Type: new("Microsoft.Resources/deployments"), Properties: &armresources.DeploymentPropertiesExtended{ - ProvisioningState: to.Ptr(armresources.ProvisioningStateSucceeded), + ProvisioningState: new(armresources.ProvisioningStateSucceeded), Timestamp: new(time.Now()), }, }, @@ -869,7 +1185,7 @@ func prepareLogAnalyticsDestroyMocks(mockContext *mocks.MockContext) { ID: new(azure.ResourceGroupRID("SUBSCRIPTION_ID", "RESOURCE_GROUP")), Location: new("eastus2"), Name: new("RESOURCE_GROUP"), - Type: to.Ptr(string(azapi.AzureResourceTypeResourceGroup)), + Type: new(string(azapi.AzureResourceTypeResourceGroup)), Tags: map[string]*string{ "azd-env-name": new("test-env"), }, @@ -937,7 +1253,7 @@ func prepareLogAnalyticsDestroyMocks(mockContext *mocks.MockContext) { }, Type: new("Microsoft.Resources/deployments"), Properties: &armresources.DeploymentPropertiesExtended{ - ProvisioningState: to.Ptr(armresources.ProvisioningStateSucceeded), + ProvisioningState: new(armresources.ProvisioningStateSucceeded), Timestamp: new(time.Now()), }, }, @@ -956,81 +1272,347 @@ func httpRespondFn(request *http.Request) (*http.Response, error) { }, nil } -// From a mocked list of deployments where there are multiple deployments with the matching tag, expect to pick the most -// recent one. -func TestFindCompletedDeployments(t *testing.T) { - mockContext := mocks.NewMockContext(context.Background()) - mockContext.CommandRunner.When(func(args exec.RunArgs, command string) bool { - return strings.Contains(args.Cmd, "bicep") && strings.Contains(command, "--version") - }).RespondFn(func(args exec.RunArgs) (exec.RunResult, error) { - return exec.NewRunResult(0, fmt.Sprintf("Bicep CLI version %s (abcdef0123)", bicep.Version), ""), nil - }) - // Have `bicep build` return a ARM template that targets a resource group. - mockContext.CommandRunner.When(func(args exec.RunArgs, command string) bool { - return strings.Contains(args.Cmd, "bicep") && args.Args[0] == "build" - }).RespondFn(func(args exec.RunArgs) (exec.RunResult, error) { - armTemplate := azure.ArmTemplate{ - Schema: "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", - ContentVersion: "1.0.0.0", - Parameters: azure.ArmTemplateParameterDefinitions{ - "environmentName": {Type: "string"}, - "location": {Type: "string"}, - }, - Outputs: azure.ArmTemplateOutputs{ - "WEBSITE_URL": {Type: "string"}, - }, - } +// --- Multi-RG classification destroy test helpers --- - bicepBytes, _ := json.Marshal(armTemplate) +// classifyMockCfg configures a multi-RG destroy test scenario. +type classifyMockCfg struct { + rgNames []string // RG names referenced in the deployment + ownedRGs []string // RG names the snapshot considers owned + withPurgeResources bool // adds a KeyVault to each RG for purge testing + rgLocks map[string][]*armlocks.ManagementLockObject // per-RG locks (nil key = empty locks) +} - return exec.RunResult{ - Stdout: string(bicepBytes), - }, nil - }) +// classifyCallTracker tracks HTTP calls made during classification integration tests. +type classifyCallTracker struct { + rgDeletes map[string]*atomic.Int32 // per-RG DELETE call counts + voidStatePUTs atomic.Int32 // void state PUT calls + kvGETs map[string]*atomic.Int32 // per-KeyVault GET calls (purge property inspection) + kvPurges map[string]*atomic.Int32 // per-KeyVault purge POST calls +} - bicepProvider := createBicepProvider(t, mockContext) +// prepareClassifyDestroyMocks sets up HTTP mocks for multi-RG destroy + classification tests. +// It registers deployment state, per-RG resource listing, RG deletion, +// void state, and optionally KeyVault purge mocks. Returns a tracker for asserting call counts +// and a snapshot map that must be injected into the provider via snapshotPredictedRGsOverride. +func prepareClassifyDestroyMocks( + mockContext *mocks.MockContext, + cfg classifyMockCfg, +) (*classifyCallTracker, map[string]bool) { + // Register SubscriptionCredentialProvider in the mock container so Tier 4 + // helpers (listResourceGroupLocks, listResourceGroupResourcesWithTags) can + // resolve credentials. Without this, the fail-safe error handling vetoes all RGs. + mockContext.Container.MustRegisterSingleton( + func() account.SubscriptionCredentialProvider { + return mockaccount.SubscriptionCredentialProviderFunc( + func(_ context.Context, _ string) (azcore.TokenCredential, error) { + return mockContext.Credentials, nil + }, + ) + }, + ) - baseDate := "1989-10-31" - envTag := "env-tag" - layerName := "" + // Register ARM client options so Tier 4 helpers use the mock HTTP transport. + mockContext.Container.MustRegisterSingleton( + func() *arm.ClientOptions { + return mockContext.ArmClientOptions + }, + ) - deployments, err := bicepProvider.deploymentManager.CompletedDeployments( - *mockContext.Context, &mockedScope{ - baseDate: baseDate, - envTag: envTag, - }, envTag, layerName, "") - require.NoError(t, err) - require.Equal(t, 1, len(deployments)) - // should take the base date + 2 years - expectedDate, err := time.Parse(time.DateOnly, baseDate) - require.NoError(t, err) - expectedDate = expectedDate.Add(time.Hour * 24 * 365 * 2) + tracker := &classifyCallTracker{ + rgDeletes: make(map[string]*atomic.Int32, len(cfg.rgNames)), + kvGETs: make(map[string]*atomic.Int32), + kvPurges: make(map[string]*atomic.Int32), + } + for _, rg := range cfg.rgNames { + tracker.rgDeletes[rg] = &atomic.Int32{} + } - deploymentDate := deployments[0].Timestamp - require.Equal(t, expectedDate, deploymentDate) -} + // --- Build multi-RG deployment with OutputResources referencing each RG --- + outputResources := make([]*armresources.ResourceReference, len(cfg.rgNames)) + for i, rg := range cfg.rgNames { + id := fmt.Sprintf("/subscriptions/SUBSCRIPTION_ID/resourceGroups/%s", rg) + outputResources[i] = &armresources.ResourceReference{ID: &id} + } -type mockedScope struct { - envTag string - baseDate string -} + deployment := armresources.DeploymentExtended{ + ID: new("DEPLOYMENT_ID"), + Name: new("test-env"), + Location: new("eastus2"), + Tags: map[string]*string{"azd-env-name": new("test-env")}, + Type: new("Microsoft.Resources/deployments"), + Properties: &armresources.DeploymentPropertiesExtended{ + Outputs: map[string]any{ + "WEBSITE_URL": map[string]any{"value": "http://myapp.azurewebsites.net", "type": "string"}, + }, + OutputResources: outputResources, + ProvisioningState: new(armresources.ProvisioningStateSucceeded), + Timestamp: new(time.Now()), + }, + } -type mockResourceManager struct{} + deployResultBytes, _ := json.Marshal(deployment) -func (m *mockResourceManager) WalkDeploymentOperations( - ctx context.Context, - deployment infra.Deployment, - fn infra.WalkDeploymentOperationFunc, -) error { - return nil -} + // GET single deployment (used by Resources(), VoidState(), and Get()) + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && strings.HasSuffix( + request.URL.Path, + "/subscriptions/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/test-env", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(deployResultBytes)), + }, nil + }) -func (m *mockResourceManager) GetResourceTypeDisplayName( - ctx context.Context, - subscriptionId string, - resourceId string, - resourceType azapi.AzureResourceType, -) (string, error) { + // GET list deployments (used by CompletedDeployments) + deploymentsPage := &armresources.DeploymentListResult{ + Value: []*armresources.DeploymentExtended{&deployment}, + } + deploymentsPageBytes, _ := json.Marshal(deploymentsPage) + + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && strings.HasSuffix( + request.URL.Path, + "/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(deploymentsPageBytes)), + }, nil + }) + + // --- Per-RG resource listing --- + // When withPurgeResources is true, the first RG gets "kv-owned" and the second gets "kv-ext". + kvMapping := map[string]string{} // rgName -> kvName + if cfg.withPurgeResources && len(cfg.rgNames) >= 2 { + kvMapping[cfg.rgNames[0]] = "kv-owned" + kvMapping[cfg.rgNames[1]] = "kv-ext" + } + + for _, rgName := range cfg.rgNames { + resources := []*armresources.GenericResourceExpanded{} + + if kvName, ok := kvMapping[rgName]; ok { + kvID := fmt.Sprintf( + "/subscriptions/SUBSCRIPTION_ID/resourceGroups/%s/providers/%s/%s", + rgName, string(azapi.AzureResourceTypeKeyVault), kvName, + ) + resources = append(resources, &armresources.GenericResourceExpanded{ + ID: &kvID, + Name: new(kvName), + Type: new(string(azapi.AzureResourceTypeKeyVault)), + Location: new("eastus2"), + Tags: map[string]*string{"azd-env-name": new("test-env")}, + }) + } + + resList := armresources.ResourceListResult{Value: resources} + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains(request.URL.Path, fmt.Sprintf("resourceGroups/%s/resources", rgName)) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, resList) + }) + } + + // --- Per-RG deletion mocks (tracked) --- + for _, rgName := range cfg.rgNames { + counter := tracker.rgDeletes[rgName] + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodDelete && + strings.HasSuffix( + request.URL.Path, + fmt.Sprintf("subscriptions/SUBSCRIPTION_ID/resourcegroups/%s", rgName), + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + counter.Add(1) + return httpRespondFn(request) + }) + } + + // --- Tier 4 lock listing mocks (return configured locks or empty for each RG) --- + for _, rgName := range cfg.rgNames { + locks := cfg.rgLocks[rgName] // nil = empty locks + lockResult := armlocks.ManagementLockListResult{Value: locks} + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains( + request.URL.Path, + fmt.Sprintf( + "resourceGroups/%s/providers/Microsoft.Authorization/locks", + rgName, + ), + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, lockResult) + }) + } + + // --- LRO polling endpoint --- + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains(request.URL.String(), "url-to-poll.net") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return mocks.CreateEmptyHttpResponse(request, 204) + }) + + // --- Void state: PUT empty deployment (tracked) --- + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodPut && + strings.Contains( + request.URL.Path, + "/subscriptions/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + tracker.voidStatePUTs.Add(1) + result := &armresources.DeploymentsClientCreateOrUpdateAtSubscriptionScopeResponse{ + DeploymentExtended: armresources.DeploymentExtended{ + ID: new("DEPLOYMENT_ID"), + Name: new("test-env"), + Location: new("eastus2"), + Tags: map[string]*string{"azd-env-name": new("test-env")}, + Type: new("Microsoft.Resources/deployments"), + Properties: &armresources.DeploymentPropertiesExtended{ + ProvisioningState: new(armresources.ProvisioningStateSucceeded), + Timestamp: new(time.Now()), + }, + }, + } + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, result) + }) + + // --- KeyVault mocks (for purge scoping test) --- + if cfg.withPurgeResources { + // Only mock the owned RG's KeyVault (kv-owned). + // kv-ext is intentionally NOT mocked — if the code incorrectly includes it + // in the purge set, the mock framework panics (which fails the test). + kvOwnedGetCounter := &atomic.Int32{} + tracker.kvGETs["kv-owned"] = kvOwnedGetCounter + + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.HasSuffix(request.URL.Path, "/vaults/kv-owned") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + kvOwnedGetCounter.Add(1) + kvResponse := armkeyvault.VaultsClientGetResponse{ + Vault: armkeyvault.Vault{ + ID: new(fmt.Sprintf( + "/subscriptions/SUBSCRIPTION_ID/resourceGroups/%s/providers/%s/kv-owned", + cfg.rgNames[0], string(azapi.AzureResourceTypeKeyVault), + )), + Name: new("kv-owned"), + Location: new("eastus2"), + Properties: &armkeyvault.VaultProperties{ + EnableSoftDelete: new(true), + EnablePurgeProtection: new(false), + }, + }, + } + kvBytes, _ := json.Marshal(kvResponse) + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(kvBytes)), + }, nil + }) + + // Purge mock for kv-owned (tracked) + kvPurgeCounter := &atomic.Int32{} + tracker.kvPurges["kv-owned"] = kvPurgeCounter + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodPost && + strings.HasSuffix(request.URL.Path, "deletedVaults/kv-owned/purge") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + kvPurgeCounter.Add(1) + return httpRespondFn(request) + }) + } + + // Build snapshot map from ownedRGs. + snapshotMap := make(map[string]bool, len(cfg.ownedRGs)) + for _, rg := range cfg.ownedRGs { + snapshotMap[strings.ToLower(rg)] = true + } + + return tracker, snapshotMap +} + +// From a mocked list of deployments where there are multiple deployments with the matching tag, expect to pick the most +// recent one. +func TestFindCompletedDeployments(t *testing.T) { + mockContext := mocks.NewMockContext(context.Background()) + mockContext.CommandRunner.When(func(args exec.RunArgs, command string) bool { + return strings.Contains(args.Cmd, "bicep") && strings.Contains(command, "--version") + }).RespondFn(func(args exec.RunArgs) (exec.RunResult, error) { + return exec.NewRunResult(0, fmt.Sprintf("Bicep CLI version %s (abcdef0123)", bicep.Version), ""), nil + }) + // Have `bicep build` return a ARM template that targets a resource group. + mockContext.CommandRunner.When(func(args exec.RunArgs, command string) bool { + return strings.Contains(args.Cmd, "bicep") && args.Args[0] == "build" + }).RespondFn(func(args exec.RunArgs) (exec.RunResult, error) { + armTemplate := azure.ArmTemplate{ + Schema: "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", + ContentVersion: "1.0.0.0", + Parameters: azure.ArmTemplateParameterDefinitions{ + "environmentName": {Type: "string"}, + "location": {Type: "string"}, + }, + Outputs: azure.ArmTemplateOutputs{ + "WEBSITE_URL": {Type: "string"}, + }, + } + + bicepBytes, _ := json.Marshal(armTemplate) + + return exec.RunResult{ + Stdout: string(bicepBytes), + }, nil + }) + + bicepProvider := createBicepProvider(t, mockContext) + + baseDate := "1989-10-31" + envTag := "env-tag" + layerName := "" + + deployments, err := bicepProvider.deploymentManager.CompletedDeployments( + *mockContext.Context, &mockedScope{ + baseDate: baseDate, + envTag: envTag, + }, envTag, layerName, "") + require.NoError(t, err) + require.Equal(t, 1, len(deployments)) + // should take the base date + 2 years + expectedDate, err := time.Parse(time.DateOnly, baseDate) + require.NoError(t, err) + expectedDate = expectedDate.Add(time.Hour * 24 * 365 * 2) + + deploymentDate := deployments[0].Timestamp + require.Equal(t, expectedDate, deploymentDate) +} + +type mockedScope struct { + envTag string + baseDate string +} + +type mockResourceManager struct{} + +func (m *mockResourceManager) WalkDeploymentOperations( + ctx context.Context, + deployment infra.Deployment, + fn infra.WalkDeploymentOperationFunc, +) error { + return nil +} + +func (m *mockResourceManager) GetResourceTypeDisplayName( + ctx context.Context, + subscriptionId string, + resourceId string, + resourceType azapi.AzureResourceType, +) (string, error) { return azapi.GetResourceTypeDisplayName(resourceType), nil } @@ -1592,7 +2174,7 @@ func TestPreviewWithNilResourceState(t *testing.T) { Changes: []*armresources.WhatIfChange{ // Create scenario: Before is nil, After has value { - ChangeType: to.Ptr(armresources.ChangeTypeCreate), + ChangeType: new(armresources.ChangeTypeCreate), ResourceID: new("/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Web/sites/app1"), Before: nil, After: map[string]any{ @@ -1602,7 +2184,7 @@ func TestPreviewWithNilResourceState(t *testing.T) { }, // Delete scenario: After is nil, Before has value { - ChangeType: to.Ptr(armresources.ChangeTypeDelete), + ChangeType: new(armresources.ChangeTypeDelete), ResourceID: new("/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Web/sites/app2"), Before: map[string]any{ "type": "Microsoft.Web/sites", @@ -1612,7 +2194,7 @@ func TestPreviewWithNilResourceState(t *testing.T) { }, // Modify scenario: Both Before and After have values { - ChangeType: to.Ptr(armresources.ChangeTypeModify), + ChangeType: new(armresources.ChangeTypeModify), ResourceID: new("/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Web/sites/app3"), Before: map[string]any{ "type": "Microsoft.Web/sites", @@ -1625,7 +2207,7 @@ func TestPreviewWithNilResourceState(t *testing.T) { }, // Edge case: Both Before and After are nil (should be skipped) { - ChangeType: to.Ptr(armresources.ChangeTypeUnsupported), + ChangeType: new(armresources.ChangeTypeUnsupported), ResourceID: new("/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Unknown/unknown"), Before: nil, After: nil, @@ -2245,3 +2827,835 @@ func TestPlannedOutputsSkipsSecureOutputs(t *testing.T) { {Name: "config"}, }, outputs) } + +// --------------------------------------------------------------------------- +// Coverage-gap tests for destroyViaDeploymentDelete, isDeploymentStacksEnabled, +// deleteRGList error accumulation, and ARM-wiring credential failures. +// --------------------------------------------------------------------------- + +// enableDeploymentStacks enables the deployment.stacks alpha feature via environment +// variable for the duration of the test. Uses t.Setenv for automatic cleanup. +func enableDeploymentStacks(t *testing.T) { + t.Setenv("AZD_ALPHA_ENABLE_DEPLOYMENT_STACKS", "true") +} + +// TestBicepDestroyViaDeploymentStacks tests the deployment-stacks branch of +// Destroy(), covering destroyViaDeploymentDelete (previously 0%) and the +// isDeploymentStacksEnabled true-path (previously 75%). +func TestBicepDestroyViaDeploymentStacks(t *testing.T) { + t.Run("SuccessNoPurge", func(t *testing.T) { + // With deployment stacks enabled and no purge resources, the destroy flow + // should call deployment.Delete() (which deletes each RG), void state, + // and skip purge entirely. + enableDeploymentStacks(t) + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + tracker, _ := prepareClassifyDestroyMocks(mockContext, classifyMockCfg{ + rgNames: []string{"rg-alpha", "rg-beta"}, + withPurgeResources: false, + }) + + infraProvider := createBicepProvider(t, mockContext) + destroyOptions := provisioning.NewDestroyOptions(false, false) // force=false, purge=false + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + require.NoError(t, err) + require.NotNil(t, result) + + // Both RGs deleted via deployment.Delete → DeleteSubscriptionDeployment. + assert.Equal(t, int32(1), tracker.rgDeletes["rg-alpha"].Load(), + "rg-alpha should be deleted via deployment.Delete") + assert.Equal(t, int32(1), tracker.rgDeletes["rg-beta"].Load(), + "rg-beta should be deleted via deployment.Delete") + + // Void state called once (inside DeleteSubscriptionDeployment). + assert.Equal(t, int32(1), tracker.voidStatePUTs.Load(), + "void state should be called once inside DeleteSubscriptionDeployment") + }) + + t.Run("SuccessWithPurge", func(t *testing.T) { + // With deployment stacks enabled AND purge, the deployment-stacks path + // deletes RGs, then collects and purges soft-delete resources from ALL RGs. + enableDeploymentStacks(t) + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + tracker, _ := prepareClassifyDestroyMocks(mockContext, classifyMockCfg{ + rgNames: []string{"rg-alpha", "rg-beta"}, + withPurgeResources: true, + }) + + // In the deployment-stacks path, ALL RGs are purged (not just owned ones). + // prepareClassifyDestroyMocks intentionally omits the kv-ext mock (to catch + // incorrect inclusion in the classification path). Add it here for stacks path. + kvExtGetCounter := &atomic.Int32{} + tracker.kvGETs["kv-ext"] = kvExtGetCounter + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.HasSuffix(request.URL.Path, "/vaults/kv-ext") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + kvExtGetCounter.Add(1) + kvResponse := armkeyvault.VaultsClientGetResponse{ + Vault: armkeyvault.Vault{ + ID: new(fmt.Sprintf( + "/subscriptions/SUBSCRIPTION_ID/resourceGroups/rg-beta/providers/%s/kv-ext", + string(azapi.AzureResourceTypeKeyVault), + )), + Name: new("kv-ext"), + Location: new("eastus2"), + Properties: &armkeyvault.VaultProperties{ + EnableSoftDelete: new(true), + EnablePurgeProtection: new(false), + }, + }, + } + kvBytes, _ := json.Marshal(kvResponse) + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(kvBytes)), + }, nil + }) + + kvExtPurgeCounter := &atomic.Int32{} + tracker.kvPurges["kv-ext"] = kvExtPurgeCounter + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodPost && + strings.HasSuffix(request.URL.Path, "deletedVaults/kv-ext/purge") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + kvExtPurgeCounter.Add(1) + return httpRespondFn(request) + }) + + // The purge prompt: "Would you like to permanently delete these resources instead?" + mockContext.Console.WhenConfirm(func(options input.ConsoleOptions) bool { + return strings.Contains(options.Message, "permanently delete") + }).Respond(true) + + infraProvider := createBicepProvider(t, mockContext) + destroyOptions := provisioning.NewDestroyOptions(false, true) // force=false, purge=true + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + require.NoError(t, err) + require.NotNil(t, result) + + // Both RGs deleted. + assert.Equal(t, int32(1), tracker.rgDeletes["rg-alpha"].Load()) + assert.Equal(t, int32(1), tracker.rgDeletes["rg-beta"].Load()) + + // Both KeyVaults inspected and purged (deployment stacks purges ALL RGs). + assert.Equal(t, int32(1), tracker.kvGETs["kv-owned"].Load(), + "kv-owned should be inspected for purge in deployment-stacks path") + assert.Equal(t, int32(1), tracker.kvPurges["kv-owned"].Load(), + "kv-owned should be purged in deployment-stacks path") + assert.Equal(t, int32(1), tracker.kvGETs["kv-ext"].Load(), + "kv-ext should be inspected for purge in deployment-stacks path (ALL RGs)") + assert.Equal(t, int32(1), tracker.kvPurges["kv-ext"].Load(), + "kv-ext should be purged in deployment-stacks path (ALL RGs)") + }) + + t.Run("DeploymentDeleteFailure", func(t *testing.T) { + // When deployment.Delete() fails (e.g., RG deletion returns HTTP 500), + // destroyViaDeploymentDelete propagates the error and Destroy returns it. + enableDeploymentStacks(t) + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + // Register credential/ARM providers that prepareClassifyDestroyMocks normally sets up. + mockContext.Container.MustRegisterSingleton( + func() account.SubscriptionCredentialProvider { + return mockaccount.SubscriptionCredentialProviderFunc( + func(_ context.Context, _ string) (azcore.TokenCredential, error) { + return mockContext.Credentials, nil + }, + ) + }, + ) + mockContext.Container.MustRegisterSingleton( + func() *arm.ClientOptions { + return mockContext.ArmClientOptions + }, + ) + + // Build deployment referencing a single RG. + rgName := "rg-fail" + rgID := fmt.Sprintf("/subscriptions/SUBSCRIPTION_ID/resourceGroups/%s", rgName) + deployment := armresources.DeploymentExtended{ + ID: new("DEPLOYMENT_ID"), + Name: new("test-env"), + Location: new("eastus2"), + Tags: map[string]*string{"azd-env-name": new("test-env")}, + Type: new("Microsoft.Resources/deployments"), + Properties: &armresources.DeploymentPropertiesExtended{ + Outputs: map[string]any{ + "WEBSITE_URL": map[string]any{"value": "http://myapp.azurewebsites.net", "type": "string"}, + }, + OutputResources: []*armresources.ResourceReference{{ID: &rgID}}, + ProvisioningState: new(armresources.ProvisioningStateSucceeded), + Timestamp: new(time.Now()), + }, + } + deployResultBytes, _ := json.Marshal(deployment) + + // GET single deployment + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && strings.HasSuffix( + request.URL.Path, + "/subscriptions/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/test-env", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(deployResultBytes)), + }, nil + }) + + // GET list deployments + deploymentsPage := &armresources.DeploymentListResult{ + Value: []*armresources.DeploymentExtended{&deployment}, + } + deploymentsPageBytes, _ := json.Marshal(deploymentsPage) + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && strings.HasSuffix( + request.URL.Path, + "/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(deploymentsPageBytes)), + }, nil + }) + + // Per-RG resource listing: empty + resList := armresources.ResourceListResult{Value: []*armresources.GenericResourceExpanded{}} + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains(request.URL.Path, fmt.Sprintf("resourceGroups/%s/resources", rgName)) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, resList) + }) + + // DELETE RG returns 500 Internal Server Error. + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodDelete && + strings.HasSuffix( + request.URL.Path, + fmt.Sprintf("subscriptions/SUBSCRIPTION_ID/resourcegroups/%s", rgName), + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + // Use 409 Conflict (non-retryable) to avoid SDK retry delays. + return &http.Response{ + Request: request, + Header: http.Header{}, + StatusCode: http.StatusConflict, + Body: io.NopCloser(strings.NewReader(`{"error":{"code":"Conflict","message":"simulated failure"}}`)), + }, nil + }) + + // LRO polling endpoint (needed for mock framework). + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains(request.URL.String(), "url-to-poll.net") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return mocks.CreateEmptyHttpResponse(request, 204) + }) + + infraProvider := createBicepProvider(t, mockContext) + destroyOptions := provisioning.NewDestroyOptions(false, false) + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + require.Error(t, err) + require.Nil(t, result) + assert.Contains(t, err.Error(), "error deleting Azure resources") + }) + + t.Run("ZeroResourcesStillDeletesStack", func(t *testing.T) { + // When deployment stacks are enabled and zero resources are found + // (e.g., after manual cleanup), the stack itself must still be deleted + // via deployment.Delete(). Regression: previously the zero-resources + // fast-path ran before the stacks check, causing a no-op VoidState + // and leaving the stack/deny-assignments behind. + enableDeploymentStacks(t) + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + tracker, _ := prepareClassifyDestroyMocks(mockContext, classifyMockCfg{ + rgNames: []string{}, // zero resource groups + withPurgeResources: false, + }) + + infraProvider := createBicepProvider(t, mockContext) + destroyOptions := provisioning.NewDestroyOptions(false, false) + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + require.NoError(t, err) + require.NotNil(t, result) + + // Void state called via deployment.Delete (inside DeleteSubscriptionDeployment). + assert.Equal(t, int32(1), tracker.voidStatePUTs.Load(), + "void state should be called via deployment.Delete even with zero resources") + }) +} + +// TestBicepDestroyDeleteRGListPartialFailure tests that deleteRGList continues +// attempting remaining RGs when one delete fails, and returns a joined error +// containing all individual failures. This covers the error-accumulation loop +// at deleteRGList lines 175-183 (previously 65% coverage). +func TestBicepDestroyDeleteRGListPartialFailure(t *testing.T) { + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + // Register credential/ARM providers. + mockContext.Container.MustRegisterSingleton( + func() account.SubscriptionCredentialProvider { + return mockaccount.SubscriptionCredentialProviderFunc( + func(_ context.Context, _ string) (azcore.TokenCredential, error) { + return mockContext.Credentials, nil + }, + ) + }, + ) + mockContext.Container.MustRegisterSingleton( + func() *arm.ClientOptions { + return mockContext.ArmClientOptions + }, + ) + + rgNames := []string{"rg-ok", "rg-fail", "rg-ok2"} + + // Build deployment referencing three RGs. + outputResources := make([]*armresources.ResourceReference, len(rgNames)) + for i, rg := range rgNames { + id := fmt.Sprintf("/subscriptions/SUBSCRIPTION_ID/resourceGroups/%s", rg) + outputResources[i] = &armresources.ResourceReference{ID: &id} + } + + deployment := armresources.DeploymentExtended{ + ID: new("DEPLOYMENT_ID"), + Name: new("test-env"), + Location: new("eastus2"), + Tags: map[string]*string{"azd-env-name": new("test-env")}, + Type: new("Microsoft.Resources/deployments"), + Properties: &armresources.DeploymentPropertiesExtended{ + Outputs: map[string]any{ + "WEBSITE_URL": map[string]any{"value": "http://myapp.azurewebsites.net", "type": "string"}, + }, + OutputResources: outputResources, + ProvisioningState: new(armresources.ProvisioningStateSucceeded), + Timestamp: new(time.Now()), + }, + } + deployResultBytes, _ := json.Marshal(deployment) + + // GET single deployment + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && strings.HasSuffix( + request.URL.Path, + "/subscriptions/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/test-env", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(deployResultBytes)), + }, nil + }) + + // GET list deployments + deploymentsPage := &armresources.DeploymentListResult{ + Value: []*armresources.DeploymentExtended{&deployment}, + } + deploymentsPageBytes, _ := json.Marshal(deploymentsPage) + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && strings.HasSuffix( + request.URL.Path, + "/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(deploymentsPageBytes)), + }, nil + }) + + // Per-RG resource listing: empty + for _, rgName := range rgNames { + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains(request.URL.Path, fmt.Sprintf("resourceGroups/%s/resources", rgName)) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + resList := armresources.ResourceListResult{Value: []*armresources.GenericResourceExpanded{}} + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, resList) + }) + } + + // Deployment operations mocks removed — classification now uses snapshot. + + // Tier 4 lock listing: no locks for each RG. + for _, rgName := range rgNames { + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains( + request.URL.Path, + fmt.Sprintf("resourceGroups/%s/providers/Microsoft.Authorization/locks", rgName), + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + emptyLocks := armlocks.ManagementLockListResult{Value: []*armlocks.ManagementLockObject{}} + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, emptyLocks) + }) + } + + // DELETE mocks: rg-ok and rg-ok2 succeed, rg-fail returns HTTP 500. + rgDeleteCounts := map[string]*atomic.Int32{ + "rg-ok": {}, + "rg-fail": {}, + "rg-ok2": {}, + } + + for _, rg := range rgNames { + counter := rgDeleteCounts[rg] + failRG := rg == "rg-fail" + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodDelete && + strings.HasSuffix( + request.URL.Path, + fmt.Sprintf("subscriptions/SUBSCRIPTION_ID/resourcegroups/%s", rg), + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + counter.Add(1) + if failRG { + // Use 409 Conflict (non-retryable) to avoid SDK retry noise. + return &http.Response{ + Request: request, + Header: http.Header{}, + StatusCode: http.StatusConflict, + Body: io.NopCloser(strings.NewReader( + `{"error":{"code":"Conflict","message":"simulated RG delete failure"}}`, + )), + }, nil + } + return httpRespondFn(request) + }) + } + + // LRO polling endpoint. + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains(request.URL.String(), "url-to-poll.net") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return mocks.CreateEmptyHttpResponse(request, 204) + }) + + // Void state PUT. + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodPut && + strings.Contains( + request.URL.Path, + "/subscriptions/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + voidResult := &armresources.DeploymentsClientCreateOrUpdateAtSubscriptionScopeResponse{ + DeploymentExtended: armresources.DeploymentExtended{ + ID: new("DEPLOYMENT_ID"), + Name: new("test-env"), + Location: new("eastus2"), + Tags: map[string]*string{"azd-env-name": new("test-env")}, + Type: new("Microsoft.Resources/deployments"), + Properties: &armresources.DeploymentPropertiesExtended{ + ProvisioningState: new(armresources.ProvisioningStateSucceeded), + Timestamp: new(time.Now()), + }, + }, + } + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, voidResult) + }) + + // force=true: snapshot injection makes classification deterministic. + infraProvider := createBicepProvider(t, mockContext) + infraProvider.snapshotPredictedRGsOverride = map[string]bool{ + "rg-ok": true, "rg-fail": true, "rg-ok2": true, + } + destroyOptions := provisioning.NewDestroyOptions(true, false) // force=true, purge=false + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + // The partial failure in deleteRGList should propagate as an error. + require.Error(t, err) + require.Nil(t, result) + assert.Contains(t, err.Error(), "rg-fail", + "error should mention the failed resource group") + + // Verify ALL RGs were attempted (deleteRGList doesn't stop on first failure). + assert.Equal(t, int32(1), rgDeleteCounts["rg-ok"].Load(), + "rg-ok should be attempted") + assert.Equal(t, int32(1), rgDeleteCounts["rg-fail"].Load(), + "rg-fail should be attempted") + assert.Equal(t, int32(1), rgDeleteCounts["rg-ok2"].Load(), + "rg-ok2 should still be attempted after rg-fail fails") +} + +// TestBicepDestroyPartialDeleteAttemptsPurge verifies that when deleteRGList +// partially fails (some RGs deleted, some not), purgeItems still runs and +// purges soft-deleted resources from successfully-deleted RGs. +// Regression test for: purge was skipped entirely when deleteErr != nil, +// causing soft-deleted resources (Key Vaults, etc.) to become unreachable +// on retry (deleted RGs no longer exist, losing their purge targets). +func TestBicepDestroyPartialDeleteAttemptsPurge(t *testing.T) { + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + // Register credential/ARM providers. + mockContext.Container.MustRegisterSingleton( + func() account.SubscriptionCredentialProvider { + return mockaccount.SubscriptionCredentialProviderFunc( + func(_ context.Context, _ string) (azcore.TokenCredential, error) { + return mockContext.Credentials, nil + }, + ) + }, + ) + mockContext.Container.MustRegisterSingleton( + func() *arm.ClientOptions { + return mockContext.ArmClientOptions + }, + ) + + rgNames := []string{"rg-ok", "rg-fail"} + + // Build deployment referencing two RGs (both owned via snapshot). + outputResources := make([]*armresources.ResourceReference, len(rgNames)) + for i, rg := range rgNames { + id := fmt.Sprintf("/subscriptions/SUBSCRIPTION_ID/resourceGroups/%s", rg) + outputResources[i] = &armresources.ResourceReference{ID: &id} + } + + deployment := armresources.DeploymentExtended{ + ID: new("DEPLOYMENT_ID"), + Name: new("test-env"), + Location: new("eastus2"), + Tags: map[string]*string{"azd-env-name": new("test-env")}, + Type: new("Microsoft.Resources/deployments"), + Properties: &armresources.DeploymentPropertiesExtended{ + Outputs: map[string]any{ + "WEBSITE_URL": map[string]any{"value": "http://myapp.azurewebsites.net", "type": "string"}, + }, + OutputResources: outputResources, + ProvisioningState: new(armresources.ProvisioningStateSucceeded), + Timestamp: new(time.Now()), + }, + } + deployResultBytes, _ := json.Marshal(deployment) + + // GET single deployment + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && strings.HasSuffix( + request.URL.Path, + "/subscriptions/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/test-env", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(deployResultBytes)), + }, nil + }) + + // GET list deployments + deploymentsPage := &armresources.DeploymentListResult{ + Value: []*armresources.DeploymentExtended{&deployment}, + } + deploymentsPageBytes, _ := json.Marshal(deploymentsPage) + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && strings.HasSuffix( + request.URL.Path, + "/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(deploymentsPageBytes)), + }, nil + }) + + // Per-RG resource listing: rg-ok has a KeyVault, rg-fail is empty. + kvID := fmt.Sprintf( + "/subscriptions/SUBSCRIPTION_ID/resourceGroups/rg-ok/providers/%s/kv-ok", + string(azapi.AzureResourceTypeKeyVault), + ) + rgResources := map[string][]*armresources.GenericResourceExpanded{ + "rg-ok": { + { + ID: &kvID, + Name: new("kv-ok"), + Type: new(string(azapi.AzureResourceTypeKeyVault)), + Location: new("eastus2"), + Tags: map[string]*string{"azd-env-name": new("test-env")}, + }, + }, + "rg-fail": {}, + } + + for _, rgName := range rgNames { + resources := rgResources[rgName] + resList := armresources.ResourceListResult{Value: resources} + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains(request.URL.Path, fmt.Sprintf("resourceGroups/%s/resources", rgName)) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, resList) + }) + } + + // Deployment operations mocks removed — classification now uses snapshot. + + // Tier 4 lock listing: no locks. + for _, rgName := range rgNames { + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains( + request.URL.Path, + fmt.Sprintf("resourceGroups/%s/providers/Microsoft.Authorization/locks", rgName), + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + emptyLocks := armlocks.ManagementLockListResult{Value: []*armlocks.ManagementLockObject{}} + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, emptyLocks) + }) + } + + // DELETE mocks: rg-ok succeeds, rg-fail returns 409 Conflict. + for _, rg := range rgNames { + failRG := rg == "rg-fail" + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodDelete && + strings.HasSuffix( + request.URL.Path, + fmt.Sprintf("subscriptions/SUBSCRIPTION_ID/resourcegroups/%s", rg), + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + if failRG { + return &http.Response{ + Request: request, + Header: http.Header{}, + StatusCode: http.StatusConflict, + Body: io.NopCloser(strings.NewReader( + `{"error":{"code":"Conflict","message":"simulated RG delete failure"}}`, + )), + }, nil + } + return httpRespondFn(request) + }) + } + + // LRO polling endpoint. + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains(request.URL.String(), "url-to-poll.net") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return mocks.CreateEmptyHttpResponse(request, 204) + }) + + // Void state PUT (should NOT be called — partial deletion skips void state). + voidStateCalls := atomic.Int32{} + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodPut && + strings.Contains( + request.URL.Path, + "/subscriptions/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + voidStateCalls.Add(1) + result := &armresources.DeploymentsClientCreateOrUpdateAtSubscriptionScopeResponse{ + DeploymentExtended: armresources.DeploymentExtended{ + ID: new("DEPLOYMENT_ID"), + Name: new("test-env"), + Location: new("eastus2"), + Tags: map[string]*string{"azd-env-name": new("test-env")}, + Type: new("Microsoft.Resources/deployments"), + Properties: &armresources.DeploymentPropertiesExtended{ + ProvisioningState: new(armresources.ProvisioningStateSucceeded), + Timestamp: new(time.Now()), + }, + }, + } + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, result) + }) + + // KeyVault GET mock (for collectPurgeItems — inspects soft-delete properties). + kvGetCalls := atomic.Int32{} + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.HasSuffix(request.URL.Path, "/vaults/kv-ok") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + kvGetCalls.Add(1) + kvResponse := armkeyvault.VaultsClientGetResponse{ + Vault: armkeyvault.Vault{ + ID: &kvID, + Name: new("kv-ok"), + Location: new("eastus2"), + Properties: &armkeyvault.VaultProperties{ + EnableSoftDelete: new(true), + EnablePurgeProtection: new(false), + }, + }, + } + kvBytes, _ := json.Marshal(kvResponse) + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(kvBytes)), + }, nil + }) + + // KeyVault purge mock (the critical assertion: this MUST be called even after partial delete). + kvPurgeCalls := atomic.Int32{} + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodPost && + strings.HasSuffix(request.URL.Path, "deletedVaults/kv-ok/purge") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + kvPurgeCalls.Add(1) + return httpRespondFn(request) + }) + + infraProvider := createBicepProvider(t, mockContext) + infraProvider.snapshotPredictedRGsOverride = map[string]bool{ + "rg-ok": true, "rg-fail": true, + } + destroyOptions := provisioning.NewDestroyOptions(true, true) // force=true, purge=true + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + // Partial failure should propagate as an error. + require.Error(t, err) + require.Nil(t, result) + assert.Contains(t, err.Error(), "rg-fail", + "error should mention the failed resource group") + + // Key assertion: purge was attempted despite partial deletion failure. + // This verifies the fix: purgeItems runs BEFORE checking deleteErr. + assert.Equal(t, int32(1), kvGetCalls.Load(), + "kv-ok should be inspected for purge properties (collectPurgeItems runs before deletion)") + assert.Equal(t, int32(1), kvPurgeCalls.Load(), + "kv-ok should be purged even after partial RG deletion failure") + + // Void state should NOT be called when deletion partially failed. + assert.Equal(t, int32(0), voidStateCalls.Load(), + "voidDeploymentState should be skipped when deletion partially fails") +} + +// TestBicepDestroyCredentialResolutionFailure tests that when the credential +// provider is NOT registered in the container, the ARM wiring fails gracefully +// for listResourceGroupLocks (returns error → fail-safe veto). +// This covers the credential-failure branches in listResourceGroupLocks. +func TestBicepDestroyCredentialResolutionFailure(t *testing.T) { + mockContext := mocks.NewMockContext(context.Background()) + prepareBicepMocks(mockContext) + + // Intentionally do NOT register SubscriptionCredentialProvider or arm.ClientOptions. + // This causes listResourceGroupLocks to fail on credential resolution. + + rgNames := []string{"rg-alpha"} + + // Build deployment referencing one RG. + rgID := fmt.Sprintf("/subscriptions/SUBSCRIPTION_ID/resourceGroups/%s", rgNames[0]) + deployment := armresources.DeploymentExtended{ + ID: new("DEPLOYMENT_ID"), + Name: new("test-env"), + Location: new("eastus2"), + Tags: map[string]*string{"azd-env-name": new("test-env")}, + Type: new("Microsoft.Resources/deployments"), + Properties: &armresources.DeploymentPropertiesExtended{ + Outputs: map[string]any{ + "WEBSITE_URL": map[string]any{"value": "http://myapp.azurewebsites.net", "type": "string"}, + }, + OutputResources: []*armresources.ResourceReference{{ID: &rgID}}, + ProvisioningState: new(armresources.ProvisioningStateSucceeded), + Timestamp: new(time.Now()), + }, + } + deployResultBytes, _ := json.Marshal(deployment) + + // GET single deployment + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && strings.HasSuffix( + request.URL.Path, + "/subscriptions/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/test-env", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(deployResultBytes)), + }, nil + }) + + // GET list deployments + deploymentsPage := &armresources.DeploymentListResult{ + Value: []*armresources.DeploymentExtended{&deployment}, + } + deploymentsPageBytes, _ := json.Marshal(deploymentsPage) + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && strings.HasSuffix( + request.URL.Path, + "/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewBuffer(deploymentsPageBytes)), + }, nil + }) + + // Per-RG resource listing: empty + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains(request.URL.Path, fmt.Sprintf("resourceGroups/%s/resources", rgNames[0])) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + resList := armresources.ResourceListResult{Value: []*armresources.GenericResourceExpanded{}} + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, resList) + }) + + // LRO polling endpoint. + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && + strings.Contains(request.URL.String(), "url-to-poll.net") + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return mocks.CreateEmptyHttpResponse(request, 204) + }) + + // Void state PUT (after classification completes with all RGs skipped). + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodPut && + strings.Contains( + request.URL.Path, + "/subscriptions/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + voidResult := &armresources.DeploymentsClientCreateOrUpdateAtSubscriptionScopeResponse{ + DeploymentExtended: armresources.DeploymentExtended{ + ID: new("DEPLOYMENT_ID"), + Name: new("test-env"), + Location: new("eastus2"), + Tags: map[string]*string{"azd-env-name": new("test-env")}, + Type: new("Microsoft.Resources/deployments"), + Properties: &armresources.DeploymentPropertiesExtended{ + ProvisioningState: new(armresources.ProvisioningStateSucceeded), + Timestamp: new(time.Now()), + }, + }, + } + return mocks.CreateHttpResponseWithBody(request, http.StatusOK, voidResult) + }) + + infraProvider := createBicepProvider(t, mockContext) + + // Inject snapshot so the RG is classified as owned, triggering Tier 4 checks + // where the credential resolution failure will be exercised. + infraProvider.snapshotPredictedRGsOverride = map[string]bool{ + "rg-alpha": true, + } + + destroyOptions := provisioning.NewDestroyOptions(false, false) // force=false, purge=false + result, err := infraProvider.Destroy(*mockContext.Context, destroyOptions) + + // Tier 4 listResourceGroupLocks fails on credential resolution. + // fail-safe behavior vetoes all RGs → all RGs skipped, no RGs deleted. + // Since ALL RGs are vetoed, classifyResourceGroups returns (nil, skipped, nil). + // Then voidDeploymentState runs (no classify error), so Destroy succeeds. + require.NoError(t, err) + require.NotNil(t, result) +} diff --git a/cli/azd/pkg/infra/scope.go b/cli/azd/pkg/infra/scope.go index 303766d2d95..28c953b134e 100644 --- a/cli/azd/pkg/infra/scope.go +++ b/cli/azd/pkg/infra/scope.go @@ -54,6 +54,8 @@ type Deployment interface { options map[string]any, progress *async.Progress[azapi.DeleteDeploymentProgress], ) error + // VoidState deploys an empty template to void the deployment state without deleting resources. + VoidState(ctx context.Context, options map[string]any) error // Deploy a given template with a set of parameters. DeployPreview( ctx context.Context, @@ -114,6 +116,12 @@ func (s *ResourceGroupDeployment) Delete( ) } +// VoidState is a no-op for resource group-scoped deployments. +// The deployment lives within the resource group itself; voiding state is not applicable. +func (s *ResourceGroupDeployment) VoidState(_ context.Context, _ map[string]any) error { + return nil +} + func (s *ResourceGroupDeployment) DeployPreview( ctx context.Context, template azure.RawArmTemplate, @@ -324,6 +332,11 @@ func (s *SubscriptionDeployment) Delete( return s.deploymentService.DeleteSubscriptionDeployment(ctx, s.subscriptionId, s.name, options, progress) } +// VoidState deploys an empty template to void the deployment state without deleting resources. +func (s *SubscriptionDeployment) VoidState(ctx context.Context, options map[string]any) error { + return s.deploymentService.VoidSubscriptionDeploymentState(ctx, s.subscriptionId, s.name, options) +} + // Deploy a given template with a set of parameters. func (s *SubscriptionDeployment) DeployPreview( ctx context.Context, diff --git a/cli/azd/pkg/infra/scope_test.go b/cli/azd/pkg/infra/scope_test.go index 0ba0961b082..1997184653d 100644 --- a/cli/azd/pkg/infra/scope_test.go +++ b/cli/azd/pkg/infra/scope_test.go @@ -332,3 +332,47 @@ var testArmTemplate string = `{ "value": "[reference('Microsoft.Compute/availabilitySets/availabilitySet1')]" } }}` + +func TestVoidState(t *testing.T) { + t.Parallel() + + t.Run("SubscriptionDeploymentVoidStateNotFound", func(t *testing.T) { + t.Parallel() + // VoidState on SubscriptionDeployment returns an error when the deployment does not exist. + // Verifies the method delegates to VoidSubscriptionDeploymentState. + mockContext := mocks.NewMockContext(context.Background()) + deploymentService := mockazapi.NewDeploymentsServiceFromMockContext(mockContext) + + mockContext.HttpClient.When(func(request *http.Request) bool { + return request.Method == http.MethodGet && strings.Contains( + request.URL.Path, + "/subscriptions/SUBSCRIPTION_ID/providers/Microsoft.Resources/deployments/DEPLOYMENT_NAME", + ) + }).RespondFn(func(request *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusNotFound, + Body: io.NopCloser(strings.NewReader(`{"error":{"code":"DeploymentNotFound"}}`)), + Header: http.Header{"Content-Type": []string{"application/json"}}, + }, nil + }) + + scope := newSubscriptionScope(deploymentService, "SUBSCRIPTION_ID", "eastus2") + target := NewSubscriptionDeployment(scope, "DEPLOYMENT_NAME") + + err := target.VoidState(t.Context(), nil) + require.Error(t, err) + }) + + t.Run("ResourceGroupDeploymentVoidStateNoOp", func(t *testing.T) { + t.Parallel() + // VoidState on ResourceGroupDeployment is a no-op and always returns nil. + mockContext := mocks.NewMockContext(context.Background()) + deploymentService := mockazapi.NewDeploymentsServiceFromMockContext(mockContext) + + scope := newResourceGroupScope(deploymentService, "SUBSCRIPTION_ID", "RESOURCE_GROUP") + target := NewResourceGroupDeployment(scope, "DEPLOYMENT_NAME") + + err := target.VoidState(t.Context(), nil) + require.NoError(t, err) + }) +} diff --git a/docs/azd-down-resource-group-safety/architecture.md b/docs/azd-down-resource-group-safety/architecture.md new file mode 100644 index 00000000000..94e1e440252 --- /dev/null +++ b/docs/azd-down-resource-group-safety/architecture.md @@ -0,0 +1,432 @@ + +# Architecture Design: Snapshot-Based Resource Group Safety for `azd down` + +## Overview + +### Problem Statement + +`azd down` deletes pre-existing resource groups that were merely referenced (via +Bicep `existing` keyword) but not created by the deployment. This causes +catastrophic, unrecoverable data loss. + +**Root cause**: `resourceGroupsFromDeployment()` in `standard_deployments.go` +extracts ALL resource groups from ARM's `outputResources` and `dependencies` +fields without distinguishing created-vs-referenced resources. +`DeleteSubscriptionDeployment()` then calls `DeleteResourceGroup()` on every +discovered RG indiscriminately. + +**Real-world impact**: A user with a subscription-scoped Bicep template that +creates `rg-lego2` for Container Apps and references pre-existing `rg-lego-db` +(via `existing`) to assign a Cosmos DB role ran `azd down`. Both resource groups +were deleted — destroying a Cosmos DB account, PostgreSQL Flexible Server, role +assignments, and the resource group itself. + +**Permission-dependent behavior**: With `Contributor` role, RG deletion may fail +(masking the bug). With `Owner` role, it succeeds silently. + +### Scope + +This design covers the `azd down` command's resource group deletion logic for +**Standard Deployments** (non-deployment-stacks), including **layered +provisioning** (multi-layer `azure.yaml` configurations). + +**In scope**: +- `StandardDeployments.DeleteSubscriptionDeployment()` — subscription-scoped +- `StandardDeployments.DeleteResourceGroupDeployment()` — RG-scoped +- Layered provisioning (`infra.layers[]` in `azure.yaml`) — cross-layer + resource group safety +- `ClassifyResourceGroups` pipeline + +**Out of scope — Deployment Stacks**: +- `StackDeployments` (`stack_deployments.go`) is **not modified** by this design. + Deployment stacks natively track managed vs unmanaged resources via ARM + Deployment Stacks and already handle this correctly. When + `FeatureDeploymentStacks` is enabled, the classification pipeline is + bypassed entirely. This design exclusively targets the `StandardDeployments` + code path, which is the default behavior for all azd users. + +### Constraints + +- **No deployment stacks dependency** — the fix must work with the default + standard deployment path, not behind an alpha flag +- **Machine-independent** — must work when `azd up` runs on machine A and + `azd down` runs on machine B +- **Graceful degradation** — must handle API failures, missing snapshot data, + etc. without defaulting to "delete everything" +- **Backward compatible** — resources provisioned before this change must not + become undeletable; the system must degrade gracefully for pre-existing + deployments +- **No new Azure permissions** — must work within the same permission set + currently required by `azd down` + +## Architecture + +### Design Principle: Fail Safe + +Every failure mode is **"skip deletion"** — never "delete anyway." The only path +to deleting a resource group requires positive confirmation from the snapshot +classification with no vetoes from the defense-in-depth safeguards. The correct +failure direction for a destructive operation is "we didn't delete something we +could have" not "we deleted something we shouldn't have." + +### Classification Approach: Bicep Snapshot + +`bicep snapshot` produces a `predictedResources` list containing **only resources +the template will CREATE** — resources declared with the Bicep `existing` keyword +are excluded by design. This provides a deterministic, offline, zero-API-call +answer to the question "does this template own this resource group?" + +| Aspect | Snapshot | +|--------|----------| +| Data source | Template intent (deterministic, compile-time) | +| API calls | 0 (offline, local bicep CLI) | +| Handles template changes | Reflects current template (not stale deploy history) | +| `existing` handling | Excluded by design | +| Nested modules | Normalized — all predicted resources flattened | +| Conditional resources | Evaluated with provided parameter values | + +### Component Design + +#### 1. ClassifyResourceGroups + +**Location**: `cli/azd/pkg/azapi/resource_group_classifier.go` + +**Responsibility**: Determines whether azd owns each resource group by consulting +the Bicep snapshot and running defense-in-depth safety checks. + +```go +func ClassifyResourceGroups( + ctx context.Context, + rgNames []string, + opts ClassifyOptions, +) (*ClassifyResult, error) +``` + +The classifier operates in two modes: + +1. **Snapshot available** (`SnapshotPredictedRGs != nil`): RGs in the predicted + set are owned; RGs absent are external. Tier 4 (locks + foreign resources) + runs on all owned candidates as defense-in-depth. + +2. **Snapshot unavailable** (`SnapshotPredictedRGs == nil`): Conservative guard: + - `--force`: all RGs treated as owned (backward compat, zero API calls) + - Interactive: user prompted per-RG ("snapshot unavailable — cannot verify + ownership") + - Non-interactive: all RGs skipped + +#### 2. Restructured Destroy Flow + +**Location**: `cli/azd/pkg/infra/provisioning/bicep/bicep_destroy.go` + +The deletion loop has been moved out of `DeleteSubscriptionDeployment()` into +`BicepProvider.Destroy()`, which now orchestrates: + +1. `compileBicep()` → template + parameters (existing) +2. `scopeForTemplate()` → deployment scope (existing) +3. `completedDeployments()` → find most recent deployment (existing) +4. `deployment.Resources()` → grouped resources (existing) +5. **`getSnapshotPredictedRGs()`** → set of RG names from `bicep snapshot` +6. **`classifyResourceGroups()`** → snapshot classification + Tier 4 +7. Delete only owned RGs, skip external/unknown +8. Purge soft-deleted resources (Key Vault, etc.) in owned RGs only +9. `VoidSubscriptionDeploymentState()` only after all deletions succeed + +### Data Flow + +``` +azd down + │ + ├─ BicepProvider.Destroy() + │ │ + │ ├─ CompletedDeployments() ─── find most recent deployment + │ │ + │ ├─ deployment.Resources() ─── get all resources (existing behavior) + │ │ + │ ├─ GroupByResourceGroup() ─── group resources by RG name + │ │ + │ ├─ getSnapshotPredictedRGs() + │ │ ├─ Invoke `bicep snapshot` on current template + │ │ ├─ Extract RGs from predictedResources (type = Microsoft.Resources/resourceGroups) + │ │ └─ Return lowercased RG name set (nil on any error → triggers guard) + │ │ + │ ├─ ClassifyResourceGroups() + │ │ │ + │ │ ├─ [Snapshot Path] ─── when SnapshotPredictedRGs is non-nil + │ │ │ ├─ RG in predicted set? → classified "owned" + │ │ │ ├─ RG NOT in predicted set? → classified "external" → SKIP + │ │ │ └─ Tier 4 runs on owned candidates (defense-in-depth) + │ │ │ + │ │ └─ [Snapshot Unavailable Guard] + │ │ ├─ ForceMode? → all RGs owned (backward compat) + │ │ ├─ Interactive? → prompt user per RG + │ │ └─ Non-interactive? → skip all + │ │ + │ ├─ Delete only "owned" RGs + │ │ ├─ Purge soft-deleted resources (Key Vault, Cognitive, AppConfig) + │ │ ├─ Delete resource group + │ │ └─ Report skipped RGs to progress callback + │ │ + │ └─ VoidSubscriptionDeploymentState() ─── only after all deletions succeed + │ + └─ Done +``` + +### Classification Flow Diagram + +```mermaid +flowchart TD + Start([azd down]) --> Snapshot{Snapshot
available?} + + Snapshot -->|Yes| SnapClass[Snapshot Classification] + SnapClass --> SnapResult{RG in
predictedResources?} + SnapResult -->|Yes| Owned[Classified: Owned] + SnapResult -->|No| SnapSkip[External → SKIP ✓] + + Owned --> ForceCheck{--force?} + ForceCheck -->|Yes| ForceDelete[DELETE
skip Tier 4] + ForceCheck -->|No| Tier4[Tier 4: Defense-in-Depth] + + Tier4 --> LockCheck{Management
lock?} + LockCheck -->|CanNotDelete/ReadOnly| LockSkip[SKIP ✓
lock veto] + LockCheck -->|None/403/404| ForeignCheck{Foreign
resources?} + + ForeignCheck -->|None| Clean[DELETE] + ForeignCheck -->|Found| Interactive{Interactive?} + Interactive -->|Yes| Prompt[Prompt user] + Prompt -->|Accept| PromptDelete[DELETE] + Prompt -->|Decline| PromptSkip[SKIP ✓] + Interactive -->|No| HardVeto[SKIP ✓
hard veto] + + Snapshot -->|No| GuardMode{Mode?} + GuardMode -->|--force| ForceAll[All → owned
DELETE] + GuardMode -->|Interactive| PromptAll[Prompt each RG] + PromptAll -->|Accept| GuardTier4[Tier 4 checks] + PromptAll -->|Decline| GuardSkip[SKIP ✓] + GuardMode -->|Non-interactive| SkipAll[SKIP all ✓] + + ForeignCheck -->|Error| ErrSkip[SKIP ✓
fail-safe] + + style SnapSkip fill:#2d6,stroke:#333,color:#000 + style LockSkip fill:#2d6,stroke:#333,color:#000 + style PromptSkip fill:#2d6,stroke:#333,color:#000 + style HardVeto fill:#2d6,stroke:#333,color:#000 + style GuardSkip fill:#2d6,stroke:#333,color:#000 + style SkipAll fill:#2d6,stroke:#333,color:#000 + style ErrSkip fill:#2d6,stroke:#333,color:#000 + style Clean fill:#f66,stroke:#333,color:#000 + style ForceDelete fill:#f66,stroke:#333,color:#000 + style ForceAll fill:#f96,stroke:#333,color:#000 + style PromptDelete fill:#f66,stroke:#333,color:#000 +``` + +## Key Decisions + +### Decision 1: Bicep Snapshot as Primary Classification Signal + +**Pattern**: Leverage compile-time intent over runtime history + +**Why**: `bicep snapshot` → `predictedResources` provides the single best answer +to "does this template own this resource group?" It reflects the template's +*current intent* — resources declared with `existing` are excluded by design. +Unlike deployment operations (which reflect the *last deploy* and can be stale, +incomplete, or purged), the snapshot is deterministic, offline, and always +current. + +**How it works**: +1. `getSnapshotPredictedRGs()` invokes `bicep snapshot` with the template and + parameters +2. Filters `predictedResources` for `type == "Microsoft.Resources/resourceGroups"` +3. Returns a lowercased set of RG names +4. `nil` return signals snapshot failure → triggers conservative guard + +**Edge cases handled**: +- Conditional RGs (`if (condition)`) — evaluated with provided parameters +- Nested modules — snapshot normalizes to a flat list +- ARM expression names (`rg-${env}`) — resolved to concrete values +- Case-insensitive comparison via `strings.EqualFold` / lowercased set + +### Decision 2: `--force` Uses Snapshot (Deterministic, Zero API Calls) + +**Pattern**: Minimal-overhead safety even in CI/CD automation + +**Why**: `--force` is used in CI/CD pipelines where operators want teardown +without prompts. With snapshot available, classification is deterministic and +free — no extra API calls. External RGs identified by the snapshot are still +protected (skipped). + +**Behavior**: +- **With snapshot**: Snapshot classifies RGs. Tier 4 is skipped (zero API calls, + consistent with `--force` contract of no interactive checks). +- **Without snapshot**: All RGs treated as owned (backward compat). This is the + only path where an external RG could be deleted — it requires *both* snapshot + failure *and* explicit `--force`. + +### Decision 3: Tier 4 Defense-in-Depth (Locks + Foreign Resources) + +**Pattern**: Defense in depth / Fail safe + +**Why**: Even when the snapshot says "owned," a management lock or foreign +resources should prevent deletion. Tier 4 catches edge cases the snapshot cannot: +user-added locks, resources deployed outside azd into an azd-owned RG, etc. + +**Lock check (best-effort)**: +- `CanNotDelete` or `ReadOnly` lock → hard veto (skip deletion) +- 403 → no veto (best-effort: locks are additive protection; inability to read + them does not imply the RG is unsafe to delete) +- 404 → no veto (RG already deleted) + +**Foreign resource check (strict)**: +- Resources without matching `azd-env-name` tag → prompt if interactive, hard + veto otherwise +- 403 → hard veto (cannot enumerate resources = cannot verify safety; unlike + lock 403 where inability to read is benign, resource 403 means we lack + visibility into what we'd delete) +- Extension resource types (roleAssignments, diagnosticSettings, resource links) + → skipped (these commonly lack tags and are created by azd scaffold templates) + +**Errors → veto**: Any unexpected error in Tier 4 is treated as a veto +(fail-safe). We log the error and skip deletion rather than risk destroying +unknown resources. + +### Decision 4: Skip Classification When Deployment Stacks Active + +Deployment stacks natively track managed vs unmanaged resources via ARM +Deployment Stacks. When `FeatureDeploymentStacks` is enabled, the snapshot +classification pipeline is bypassed entirely — ARM handles it correctly already. + +### Decision 5: VoidState Only After Full Success + +`VoidSubscriptionDeploymentState()` clears the deployment from ARM, destroying +the evidence needed for future classification. This MUST only happen after all +intended deletions succeed. On partial failure, the deployment state is preserved +so a subsequent `azd down` can retry. + +## Layered Provisioning Support + +### Background + +azd supports **layered provisioning** where `azure.yaml` defines multiple +infrastructure layers under `infra.layers[]`. Each layer is a separate Bicep +module with its own ARM deployment. During `azd down`, layers are processed in +**reverse order** — the last layer provisioned is the first layer destroyed. + +Each layer gets its own deployment name (`{envName}-{layerName}`), its own ARM +deployment with tags, and its own independent `Destroy()` cycle. + +### Cross-Layer Resource Group Scenarios + +The classification pipeline runs per-layer (each layer processes independently). +The snapshot for each layer reflects that layer's template. + +**Scenario 1: Layer 1 creates RG, Layer 2 references it via `existing`** + +Processing order: Layer 2 first, then Layer 1. + +1. Layer 2 snapshot: RG not in `predictedResources` → external → **SKIP** +2. Layer 1 snapshot: RG in `predictedResources` → owned → **DELETE** + +Result: Correct. The creating layer deletes the RG after the referencing layer. + +**Scenario 2: Both layers reference a pre-existing RG** + +1. Layer 2: not in snapshot → external → SKIP +2. Layer 1: not in snapshot → external → SKIP + +Result: Correct. Pre-existing RG is preserved. + +**Scenario 3: Layer 1 creates RG, Layer 2 deploys resources into it** + +Layer 2 processes first and skips the RG (not in Layer 2's snapshot). When +Layer 1 processes, the RG contains resources from both layers. Tier 4's +foreign-resource check could find Layer 2's resources. + +**Solution: `azd-env-name`-aware foreign resource check** + +Tier 4's extra-resource check distinguishes truly foreign resources from +sibling-layer resources: + +- For each resource in the RG: check its `azd-env-name` tag +- If tag matches current environment → sibling-layer resource → not foreign +- If tag missing or mismatched → truly foreign → triggers veto/prompt + +This works because azd tags resources with `azd-env-name` during provisioning. +No pre-scan pass across layers is needed. + +## Risks & Trade-offs + +### Risk 1: Snapshot Unavailable + +**When**: Older Bicep CLI without `snapshot` support, non-bicepparam mode, +snapshot errors. + +**Impact**: Falls back to conservative guard (skip all in non-interactive, +prompt in interactive, all-owned in --force). + +**Mitigation**: azd bundles Bicep 0.42.1+ which supports snapshot. +`generateBicepParam()` handles non-bicepparam case. Snapshot failure is +effectively unreachable in normal azd flows. + +### Risk 2: Snapshot Excludes a Created RG (False Negative) + +**When**: Bug in `bicep snapshot` implementation. + +**Impact**: Medium — the created RG would not be deleted, requiring manual +cleanup. + +**Mitigation**: This is the safe failure direction. Users can re-run with +`--force` if needed. + +### Risk 3: Snapshot Includes an Existing RG (False Positive) + +**When**: Bug in `bicep snapshot` where `existing` resources appear in +`predictedResources`. + +**Impact**: High — would classify an external RG as owned. + +**Mitigation**: Tier 4 defense-in-depth catches this: management locks block +deletion, and foreign-resource detection triggers a veto/prompt for resources +without matching `azd-env-name` tags. + +### Risk 4: Backward Compatibility with Pre-Existing Deployments + +**When**: User provisioned with older azd (no snapshot tag support), now runs +`azd down` with new azd. + +**Impact**: None — snapshot is computed from the *current* template, not from +stored deployment state. If the template still exists locally, snapshot works. +If it doesn't, the snapshot-unavailable guard applies. + +### Risk 5: Performance + +**When**: `bicep snapshot` adds latency to `azd down`. + +**Impact**: Low — snapshot runs locally (~1-3s), no Azure API calls. + +**Mitigation**: The snapshot replaces what would have been API calls (deployment +operations, tag fetches). Net performance is likely better. + +## Affected Files + +### New Files +- `cli/azd/pkg/azapi/resource_group_classifier.go` — Snapshot classification + + Tier 4 defense-in-depth (~460 lines) +- `cli/azd/pkg/azapi/resource_group_classifier_test.go` — 31 classifier subtests +- `cli/azd/pkg/infra/provisioning/bicep/bicep_destroy.go` — Snapshot extraction + + classify-then-delete orchestrator (~520 lines) +- `cli/azd/pkg/infra/provisioning/bicep/bicep_destroy_test.go` — Destroy + orchestrator tests + +### Modified Files +- `cli/azd/pkg/azapi/deployments.go` — `VoidSubscriptionDeploymentState` method +- `cli/azd/pkg/azapi/standard_deployments.go` — Public `VoidSubscriptionDeploymentState`, + `ResourceGroupsFromDeployment` +- `cli/azd/pkg/azapi/stack_deployments.go` — VoidState no-op stub +- `cli/azd/pkg/tools/bicep/bicep.go` — `Snapshot()` method + `SnapshotOptions` + builder +- `cli/azd/pkg/infra/provisioning/bicep/bicep_provider.go` — Restructured + `Destroy()` flow, snapshot override for testing +- `cli/azd/pkg/infra/provisioning/bicep/bicep_provider_test.go` — Integration + tests +- `cli/azd/pkg/infra/provisioning/bicep/local_preflight.go` — Shared + `snapshotResult` struct used by both preflight and destroy +- `cli/azd/pkg/infra/scope.go` — `VoidState` on Deployment interface