From a82eb44b751bbb1be2866b2b257d9f1d90917d3f Mon Sep 17 00:00:00 2001 From: Mark Old Date: Thu, 12 Feb 2026 13:34:41 -0800 Subject: [PATCH] Debounce the degraded status for flaky conditions --- .../credentialsrequest_controller_test.go | 62 ++++++++++++++++++- pkg/operator/credentialsrequest/status.go | 5 +- .../podidentitywebhook_controller.go | 21 ++++--- 3 files changed, 79 insertions(+), 9 deletions(-) diff --git a/pkg/operator/credentialsrequest/credentialsrequest_controller_test.go b/pkg/operator/credentialsrequest/credentialsrequest_controller_test.go index 09953918ea..bddd440521 100644 --- a/pkg/operator/credentialsrequest/credentialsrequest_controller_test.go +++ b/pkg/operator/credentialsrequest/credentialsrequest_controller_test.go @@ -311,7 +311,7 @@ func TestCredentialsRequestReconcile(t *testing.T) { }, { // This indicates an error state. - name: "new credential no root creds available", + name: "new credential no root creds available (initially)", existing: []runtime.Object{ testOperatorConfig(""), createTestNamespace(testNamespace), @@ -337,6 +337,60 @@ func TestCredentialsRequestReconcile(t *testing.T) { assert.False(t, cr.Status.Provisioned) }, expectErr: true, + expectedConditions: []ExpectedCondition{ + { + conditionType: minterv1.CredentialsProvisionFailure, + reason: "CredentialsProvisionFailure", + status: corev1.ConditionTrue, + }, + }, + expectedCOConditions: []ExpectedCOCondition{ + { + conditionType: configv1.OperatorProgressing, + status: corev1.ConditionTrue, + }, + }, + }, + { + // This indicates an error state. + name: "new credential no root creds available (after waiting period)", + existing: []runtime.Object{ + testOperatorConfig(""), + createTestNamespace(testNamespace), + createTestNamespace(testSecretNamespace), + testCredentialsRequestWithCondition(t, minterv1.CredentialsRequestCondition{ + Type: minterv1.CredentialsProvisionFailure, + Reason: "CredentialsProvisionFailure", + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)), + }), + testAWSCredsSecret("openshift-cloud-credential-operator", "cloud-credential-operator-iam-ro-creds", testReadAWSAccessKeyID, testReadAWSSecretAccessKey), + testClusterVersion(), + testInfrastructure(testInfraName), + }, + existingAdmin: []runtime.Object{}, + mockRootAWSClient: func(mockCtrl *gomock.Controller) *mockaws.MockClient { + mockAWSClient := mockaws.NewMockClient(mockCtrl) + return mockAWSClient + }, + mockReadAWSClient: func(mockCtrl *gomock.Controller) *mockaws.MockClient { + mockAWSClient := mockaws.NewMockClient(mockCtrl) + return mockAWSClient + }, + validate: func(c client.Client, t *testing.T) { + targetSecret := getSecret(c) + assert.Nil(t, targetSecret) + cr := getCR(c) + assert.False(t, cr.Status.Provisioned) + }, + expectErr: true, + expectedConditions: []ExpectedCondition{ + { + conditionType: minterv1.CredentialsProvisionFailure, + reason: "CredentialsProvisionFailure", + status: corev1.ConditionTrue, + }, + }, expectedCOConditions: []ExpectedCOCondition{ { conditionType: configv1.OperatorProgressing, @@ -1683,6 +1737,12 @@ func testProvisionedCredentialsRequest(t *testing.T) *minterv1.CredentialsReques return cr } +func testCredentialsRequestWithCondition(t *testing.T, condition minterv1.CredentialsRequestCondition) *minterv1.CredentialsRequest { + cr := testCredentialsRequest(t) + cr.Status.Conditions = append(cr.Status.Conditions, condition) + return cr +} + func createTestNamespace(namespace string) *corev1.Namespace { return &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/operator/credentialsrequest/status.go b/pkg/operator/credentialsrequest/status.go index a433dc0b15..cec89fb5d5 100644 --- a/pkg/operator/credentialsrequest/status.go +++ b/pkg/operator/credentialsrequest/status.go @@ -3,6 +3,7 @@ package credentialsrequest import ( "context" "fmt" + "time" log "github.com/sirupsen/logrus" @@ -131,7 +132,9 @@ func computeStatusConditions( for _, t := range minterv1.FailureConditionTypes { failureCond := utils.FindCredentialsRequestCondition(cr.Status.Conditions, t) if failureCond != nil && failureCond.Status == corev1.ConditionTrue { - foundFailure = true + if time.Since(failureCond.LastTransitionTime.Time) > 5*time.Minute { + foundFailure = true + } break } } diff --git a/pkg/operator/podidentity/podidentitywebhook_controller.go b/pkg/operator/podidentity/podidentitywebhook_controller.go index 134f51e01f..bff45c7533 100644 --- a/pkg/operator/podidentity/podidentitywebhook_controller.go +++ b/pkg/operator/podidentity/podidentitywebhook_controller.go @@ -285,6 +285,7 @@ type staticResourceReconciler struct { conditions []configv1.ClusterOperatorStatusCondition cache resourceapply.ResourceCache podIdentityType PodIdentityManifestSource + degradedSince time.Time } var _ reconcile.Reconciler = &staticResourceReconciler{} @@ -294,16 +295,22 @@ func (r *staticResourceReconciler) Reconcile(ctx context.Context, request reconc err := r.ReconcileResources(ctx) if err != nil { r.logger.Errorf("reconciliation failed, retrying in %s", retryInterval.String()) - r.conditions = []configv1.ClusterOperatorStatusCondition{ - { - Type: configv1.OperatorDegraded, - Status: configv1.ConditionTrue, - Reason: reasonStaticResourceReconcileFailed, - Message: fmt.Sprintf("static resource reconciliation failed: %v", err), - }, + if r.degradedSince.IsZero() { + r.degradedSince = time.Now() + } else if time.Since(r.degradedSince) > 5*time.Minute { + r.conditions = []configv1.ClusterOperatorStatusCondition{ + { + Type: configv1.OperatorDegraded, + Status: configv1.ConditionTrue, + Reason: reasonStaticResourceReconcileFailed, + Message: fmt.Sprintf("static resource reconciliation failed: %v", err), + }, + } } return reconcile.Result{RequeueAfter: retryInterval}, err } + + r.degradedSince = time.Time{} r.conditions = []configv1.ClusterOperatorStatusCondition{} return reconcile.Result{}, nil }