From 068f7985527e6ce9a8bcac8ab318cffe0ea72018 Mon Sep 17 00:00:00 2001 From: lukasmetzner Date: Mon, 16 Mar 2026 10:05:51 +0100 Subject: [PATCH 1/7] test: refactor e2e test suite --- tests/e2e/cloud_test.go | 111 +++++++++++++++++++++------------------ tests/e2e/e2e_test.go | 5 +- tests/e2e/helper_test.go | 106 ++++++++++++++++++++----------------- tests/e2e/robot_test.go | 20 +++---- 4 files changed, 132 insertions(+), 110 deletions(-) diff --git a/tests/e2e/cloud_test.go b/tests/e2e/cloud_test.go index 5dab6cd00..772134795 100644 --- a/tests/e2e/cloud_test.go +++ b/tests/e2e/cloud_test.go @@ -12,6 +12,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" @@ -25,12 +26,10 @@ import ( func TestNodeSetCorrectNodeLabelsAndIPAddresses(t *testing.T) { t.Parallel() - ctx := context.Background() - - node, err := testCluster.k8sClient.CoreV1().Nodes().Get(ctx, testCluster.ControlNodeName(), metav1.GetOptions{}) + node, err := testCluster.k8sClient.CoreV1().Nodes().Get(t.Context(), testCluster.ControlNodeName(), metav1.GetOptions{}) assert.NoError(t, err) - server, _, err := testCluster.hcloud.Server.Get(ctx, testCluster.ControlNodeName()) + server, _, err := testCluster.hcloud.Server.Get(t.Context(), testCluster.ControlNodeName()) if err != nil { return } @@ -77,32 +76,41 @@ func TestServiceLoadBalancersMinimalSetup(t *testing.T) { t: t, podName: "loadbalancer-minimal", } + t.Cleanup(func() { + lbTest.TearDown() + }) - pod := lbTest.DeployTestPod() + pod, err := lbTest.DeployTestPod() + require.NoError(t, err) lbSvc := lbTest.ServiceDefinition(pod, map[string]string{ string(annotation.LBLocation): "nbg1", }) - lbSvc, err := lbTest.CreateService(lbSvc) - if assert.NoError(t, err, "deploying test svc") { - WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false) - } + lbSvc, err = lbTest.CreateService(lbSvc) + require.NoError(t, err) - lbTest.TearDown() + err = lbTest.WaitForHTTPAvailable(lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + require.NoError(t, err) } func TestServiceLoadBalancersHTTPS(t *testing.T) { t.Parallel() - cert := testCluster.CreateTLSCertificate(t, "loadbalancer-https") lbTest := lbTestHelper{ t: t, podName: "loadbalancer-https", port: 443, } + t.Cleanup(func() { + lbTest.TearDown() + }) + + cert, err := testCluster.CreateTLSCertificate(t, "loadbalancer-https") + require.NoError(t, err) - pod := lbTest.DeployTestPod() + pod, err := lbTest.DeployTestPod() + require.NoError(t, err) lbSvc := lbTest.ServiceDefinition(pod, map[string]string{ string(annotation.LBLocation): "nbg1", @@ -110,19 +118,16 @@ func TestServiceLoadBalancersHTTPS(t *testing.T) { string(annotation.LBSvcProtocol): "https", }) - lbSvc, err := lbTest.CreateService(lbSvc) - if assert.NoError(t, err, "deploying test svc") { - WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, true) - } + lbSvc, err = lbTest.CreateService(lbSvc) + require.NoError(t, err) - lbTest.TearDown() + err = lbTest.WaitForHTTPAvailable(lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + require.NoError(t, err) } func TestServiceLoadBalancersHTTPSWithManagedCertificate(t *testing.T) { t.Parallel() - ctx := context.Background() - if testCluster.certDomain == "" { t.Skip("Skipping because CERT_DOMAIN is not set") } @@ -133,8 +138,12 @@ func TestServiceLoadBalancersHTTPSWithManagedCertificate(t *testing.T) { podName: "loadbalancer-https", port: 443, } + t.Cleanup(func() { + lbTest.TearDown() + }) - pod := lbTest.DeployTestPod() + pod, err := lbTest.DeployTestPod() + require.NoError(t, err) lbSvc := lbTest.ServiceDefinition(pod, map[string]string{ string(annotation.LBLocation): "nbg1", @@ -144,62 +153,62 @@ func TestServiceLoadBalancersHTTPSWithManagedCertificate(t *testing.T) { string(annotation.LBSvcHTTPManagedCertificateUseACMEStaging): "true", }) - lbSvc, err := lbTest.CreateService(lbSvc) - if assert.NoError(t, err, "deploying test svc") { - certs, err := testCluster.hcloud.Certificate.AllWithOpts(ctx, hcloud.CertificateListOpts{ - ListOpts: hcloud.ListOpts{ - LabelSelector: fmt.Sprintf("%s=%s", hcops.LabelServiceUID, lbSvc.ObjectMeta.UID), - }, - }) - assert.NoError(t, err) - if assert.Len(t, certs, 1) { - testCluster.certificates.Add(certs[0].ID) - } - } + lbSvc, err = lbTest.CreateService(lbSvc) + require.NoError(t, err) - lbTest.TearDown() + certs, err := testCluster.hcloud.Certificate.AllWithOpts(t.Context(), hcloud.CertificateListOpts{ + ListOpts: hcloud.ListOpts{ + LabelSelector: fmt.Sprintf("%s=%s", hcops.LabelServiceUID, lbSvc.ObjectMeta.UID), + }, + }) + assert.NoError(t, err) + if assert.Len(t, certs, 1) { + testCluster.certificates.Add(certs[0].ID) + } } func TestServiceLoadBalancersWithPrivateNetwork(t *testing.T) { t.Parallel() lbTest := lbTestHelper{t: t, podName: "loadbalancer-private-network"} + t.Cleanup(func() { + lbTest.TearDown() + }) - pod := lbTest.DeployTestPod() + pod, err := lbTest.DeployTestPod() + require.NoError(t, err) ipRange := &net.IPNet{ IP: net.IPv4(10, 0, 0, 0), Mask: net.CIDRMask(24, 32), } - lbSvcDefinition := lbTest.ServiceDefinition(pod, map[string]string{ + lbSvc := lbTest.ServiceDefinition(pod, map[string]string{ string(annotation.LBLocation): "nbg1", string(annotation.LBUsePrivateIP): "true", string(annotation.PrivateSubnetIPRange): ipRange.String(), }) - lbSvc, err := lbTest.CreateService(lbSvcDefinition) - if assert.NoError(t, err, "deploying test svc") { - WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + lbSvc, err = lbTest.CreateService(lbSvc) + require.NoError(t, err) - anyInIPRange := slices.ContainsFunc(lbSvc.Status.LoadBalancer.Ingress, func(ingress corev1.LoadBalancerIngress) bool { - ip := net.ParseIP(ingress.IP) - if ip == nil { - return false - } - return ipRange.Contains(ip) - }) - - assert.True(t, anyInIPRange) - } + err = lbTest.WaitForHTTPAvailable(lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + require.NoError(t, err) - lbTest.TearDown() + anyInIPRange := slices.ContainsFunc(lbSvc.Status.LoadBalancer.Ingress, func(ingress corev1.LoadBalancerIngress) bool { + ip := net.ParseIP(ingress.IP) + if ip == nil { + return false + } + return ipRange.Contains(ip) + }) + assert.True(t, anyInIPRange) } func TestRouteNetworksPodIPsAreAccessible(t *testing.T) { t.Parallel() - err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + err := wait.PollUntilContextTimeout(t.Context(), 1*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { node, err := testCluster.k8sClient.CoreV1().Nodes().Get(ctx, testCluster.ControlNodeName(), metav1.GetOptions{}) if err != nil { return false, err @@ -222,6 +231,6 @@ func TestRouteNetworksPodIPsAreAccessible(t *testing.T) { return false, nil }) if err != nil { - t.Fatal(err) + t.Errorf("error waiting for pod IPs being accessible: %v", err) } } diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index 2bce4a6f5..2d76d9fae 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -3,7 +3,6 @@ package e2e import ( - "context" "fmt" "os" "strings" @@ -34,7 +33,7 @@ func TestPodIsPresent(t *testing.T) { t.Parallel() t.Run("hcloud-cloud-controller-manager pod is present in kube-system", func(t *testing.T) { - pods, err := testCluster.k8sClient.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{}) + pods, err := testCluster.k8sClient.CoreV1().Pods("kube-system").List(t.Context(), metav1.ListOptions{}) assert.NoError(t, err) found := false @@ -51,7 +50,7 @@ func TestPodIsPresent(t *testing.T) { t.Run("pod with app=hcloud-cloud-controller-manager is present in kube-system", func(t *testing.T) { pods, err := testCluster.k8sClient.CoreV1().Pods("kube-system"). - List(context.Background(), metav1.ListOptions{ + List(t.Context(), metav1.ListOptions{ LabelSelector: "app.kubernetes.io/name=hcloud-cloud-controller-manager", }) assert.NoError(t, err) diff --git a/tests/e2e/helper_test.go b/tests/e2e/helper_test.go index 5957cf39b..2292ca487 100644 --- a/tests/e2e/helper_test.go +++ b/tests/e2e/helper_test.go @@ -126,7 +126,7 @@ func (tc *TestCluster) Stop() error { // // The baseName of the certificate gets a random number suffix attached. // baseName and suffix are separated by a single "-" character. -func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) *hcloud.Certificate { +func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) (*hcloud.Certificate, error) { rndInt := rng.Int() name := fmt.Sprintf("%s-%d", baseName, rndInt) @@ -138,15 +138,15 @@ func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) *hclo } cert, _, err := tc.hcloud.Certificate.Create(context.Background(), opts) if err != nil { - t.Fatalf("%s: %v", name, err) + return nil, fmt.Errorf("%s: %v", name, err) } if cert == nil { - t.Fatalf("no certificate created") + return nil, fmt.Errorf("no certificate created") } tc.certificates.Add(cert.ID) - return cert + return cert, nil } // NetworkName returns the network name. @@ -174,7 +174,7 @@ type lbTestHelper struct { // DeployTestPod deploys a basic nginx pod within the k8s cluster // and waits until it is "ready". -func (l *lbTestHelper) DeployTestPod() *corev1.Pod { +func (l *lbTestHelper) DeployTestPod() (*corev1.Pod, error) { l.t.Helper() ctx := context.Background() @@ -188,7 +188,7 @@ func (l *lbTestHelper) DeployTestPod() *corev1.Pod { }, }, metav1.CreateOptions{}) if err != nil && !k8serrors.IsAlreadyExists(err) { - l.t.Fatal(err) + return nil, fmt.Errorf("error deploying test pod: %w", err) } podName := fmt.Sprintf("pod-%s", l.podName) @@ -217,8 +217,9 @@ func (l *lbTestHelper) DeployTestPod() *corev1.Pod { pod, err := testCluster.k8sClient.CoreV1().Pods(l.namespace).Create(ctx, &testPod, metav1.CreateOptions{}) if err != nil { - l.t.Fatalf("could not create test pod: %s", err) + return nil, fmt.Errorf("could not create test pod: %w", err) } + err = wait.PollUntilContextTimeout(ctx, 1*time.Second, 1*time.Minute, false, func(ctx context.Context) (done bool, err error) { p, err := testCluster.k8sClient.CoreV1().Pods(l.namespace).Get(ctx, podName, metav1.GetOptions{}) if err != nil { @@ -233,9 +234,10 @@ func (l *lbTestHelper) DeployTestPod() *corev1.Pod { return false, nil }) if err != nil { - l.t.Fatalf("pod %s did not come up after 1 minute: %s", podName, err) + return nil, fmt.Errorf("pod %s did not come up after 1 minute: %w", podName, err) } - return pod + + return pod, nil } // ServiceDefinition returns a service definition for a Hetzner Cloud Load Balancer (k8s service). @@ -274,37 +276,36 @@ func (l *lbTestHelper) ServiceDefinition(pod *corev1.Pod, annotations map[string func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, error) { l.t.Helper() - ctx := context.Background() - - // Default is 15s interval, 10s timeout, 3 retries => 45 seconds until up - // With these changes it should be 1 seconds until up - // lbSvc.Annotations[string(annotation.LBSvcHealthCheckInterval)] = "1s" - // lbSvc.Annotations[string(annotation.LBSvcHealthCheckTimeout)] = "2s" - // lbSvc.Annotations[string(annotation.LBSvcHealthCheckRetries)] = "1" - // lbSvc.Annotations[string(annotation.LBSvcHealthCheckProtocol)] = "tcp" - - _, err := testCluster.k8sClient.CoreV1().Services(l.namespace).Create(ctx, lbSvc, metav1.CreateOptions{}) + lbSvc, err := testCluster.k8sClient.CoreV1().Services(l.namespace).Create(l.t.Context(), lbSvc, metav1.CreateOptions{}) if err != nil { return nil, fmt.Errorf("could not create service: %s", err) } - err = wait.PollUntilContextTimeout(ctx, 1*time.Second, 5*time.Minute, false, func(ctx context.Context) (done bool, err error) { + ctx, cancel := context.WithTimeout(l.t.Context(), 4*time.Minute) + defer cancel() + + backoffFunc := hcloud.ExponentialBackoff(2.0, time.Second) + retries := 0 + for { svc, err := testCluster.k8sClient.CoreV1().Services(l.namespace).Get(ctx, lbSvc.Name, metav1.GetOptions{}) if err != nil { - return false, err + return nil, fmt.Errorf("error fetching load balancer service: %w", err) } ingressIPs := svc.Status.LoadBalancer.Ingress if len(ingressIPs) > 0 { lbSvc = svc - return true, nil + return lbSvc, nil + } + + select { + case <-ctx.Done(): + return nil, fmt.Errorf("timed out waiting for load balancer service to receive ingress IPs") + case <-time.After(backoffFunc(retries)): + retries++ + continue } - return false, nil - }) - if err != nil { - return nil, fmt.Errorf("test service (load balancer) did not come up after 5 minute: %s", err) } - return lbSvc, nil } // TearDown deletes the created pod and service. @@ -323,12 +324,14 @@ func (l *lbTestHelper) TearDown() { } } -// WaitForHTTPAvailable tries to connect to the given IP via http -// It tries it for 2 minutes, if after two minutes the connection -// wasn't successful and it wasn't a HTTP 200 response it will fail. -func WaitForHTTPAvailable(t *testing.T, ingressIP string, useHTTPS bool) { +// WaitForHTTPAvailable tries to connect to the given IP via HTTP. +// It uses exponential backoff starting at 1s and capping at 30s, +// waiting up to 6 minutes for a successful HTTP 200 response. +func (l *lbTestHelper) WaitForHTTPAvailable(ingressIP string, useHTTPS bool) error { + l.t.Helper() + client := &http.Client{ - Timeout: 1 * time.Second, + Timeout: 5 * time.Second, Transport: &http.Transport{ TLSClientConfig: &tls.Config{ InsecureSkipVerify: true, // nolint @@ -340,24 +343,33 @@ func WaitForHTTPAvailable(t *testing.T, ingressIP string, useHTTPS bool) { proto = "https" } - err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 4*time.Minute, false, func(_ context.Context) (bool, error) { + ctx, cancel := context.WithTimeout(l.t.Context(), 4*time.Minute) + defer cancel() + + backoffFunc := hcloud.ExponentialBackoff(2.0, time.Second) + retries := 0 + for { resp, err := client.Get(fmt.Sprintf("%s://%s", proto, ingressIP)) if err != nil { - return false, nil + l.t.Log("service still unavailable, keep waiting") + } else { + resp.Body.Close() + switch resp.StatusCode { + case http.StatusOK: + return nil + case http.StatusServiceUnavailable: + l.t.Log("service still unavailable, keep waiting") + default: + return fmt.Errorf("got unexpected HTTP status %d", resp.StatusCode) + } } - defer resp.Body.Close() - switch resp.StatusCode { - case http.StatusOK: - // Success - return true, nil - case http.StatusServiceUnavailable: - // Health checks are still evaluating - return false, nil - default: - return false, fmt.Errorf("got HTTP Code %d instead of 200", resp.StatusCode) + + select { + case <-ctx.Done(): + return fmt.Errorf("timed out after 6m waiting for %s to be available", ingressIP) + case <-time.After(backoffFunc(retries)): + retries++ + continue } - }) - if err != nil { - t.Errorf("%s not available: %s", ingressIP, err) } } diff --git a/tests/e2e/robot_test.go b/tests/e2e/robot_test.go index 66d11581d..97d5b0886 100644 --- a/tests/e2e/robot_test.go +++ b/tests/e2e/robot_test.go @@ -3,10 +3,10 @@ package e2e import ( - "context" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -20,10 +20,9 @@ func TestRobotClientIsAvailable(t *testing.T) { func TestNodeSetCorrectNodeLabelsAndIPAddressesRobot(t *testing.T) { t.Parallel() - ctx := context.Background() // Get a random Robot server from all Nodes in the cluster - nodes, err := testCluster.k8sClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{ + nodes, err := testCluster.k8sClient.CoreV1().Nodes().List(t.Context(), metav1.ListOptions{ LabelSelector: "instance.hetzner.cloud/is-root-server=true", }) assert.NoError(t, err) @@ -75,8 +74,12 @@ func TestServiceLoadBalancersRobot(t *testing.T) { t: t, podName: "loadbalancer-robot-only", } + t.Cleanup(func() { + lbTest.TearDown() + }) - pod := lbTest.DeployTestPod() + pod, err := lbTest.DeployTestPod() + require.NoError(t, err) lbSvc := lbTest.ServiceDefinition(pod, map[string]string{ string(annotation.LBLocation): "nbg1", @@ -84,10 +87,9 @@ func TestServiceLoadBalancersRobot(t *testing.T) { string(annotation.LBNodeSelector): "instance.hetzner.cloud/is-root-server=true", }) - lbSvc, err := lbTest.CreateService(lbSvc) - if assert.NoError(t, err, "deploying test svc") { - WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false) - } + lbSvc, err = lbTest.CreateService(lbSvc) + require.NoError(t, err) - lbTest.TearDown() + err = lbTest.WaitForHTTPAvailable(lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + require.NoError(t, err) } From a5ecde49c180a071cb1f78220863a6031bcf839e Mon Sep 17 00:00:00 2001 From: lukasmetzner Date: Wed, 15 Apr 2026 07:46:25 +0200 Subject: [PATCH 2/7] test: fix regression --- tests/e2e/cloud_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/cloud_test.go b/tests/e2e/cloud_test.go index 772134795..973a5e4d6 100644 --- a/tests/e2e/cloud_test.go +++ b/tests/e2e/cloud_test.go @@ -121,7 +121,7 @@ func TestServiceLoadBalancersHTTPS(t *testing.T) { lbSvc, err = lbTest.CreateService(lbSvc) require.NoError(t, err) - err = lbTest.WaitForHTTPAvailable(lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + err = lbTest.WaitForHTTPAvailable(lbSvc.Status.LoadBalancer.Ingress[0].IP, true) require.NoError(t, err) } From 4fb5a3b3cfc1e7b377c3fb6d98227875ee409b97 Mon Sep 17 00:00:00 2001 From: lukasmetzner Date: Wed, 15 Apr 2026 07:46:44 +0200 Subject: [PATCH 3/7] test: refactor error messages --- tests/e2e/helper_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/e2e/helper_test.go b/tests/e2e/helper_test.go index 2292ca487..ffcce022d 100644 --- a/tests/e2e/helper_test.go +++ b/tests/e2e/helper_test.go @@ -138,7 +138,7 @@ func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) (*hcl } cert, _, err := tc.hcloud.Certificate.Create(context.Background(), opts) if err != nil { - return nil, fmt.Errorf("%s: %v", name, err) + return nil, fmt.Errorf("%s: %w", name, err) } if cert == nil { return nil, fmt.Errorf("no certificate created") @@ -278,7 +278,7 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er lbSvc, err := testCluster.k8sClient.CoreV1().Services(l.namespace).Create(l.t.Context(), lbSvc, metav1.CreateOptions{}) if err != nil { - return nil, fmt.Errorf("could not create service: %s", err) + return nil, fmt.Errorf("could not create service: %w", err) } ctx, cancel := context.WithTimeout(l.t.Context(), 4*time.Minute) @@ -351,7 +351,7 @@ func (l *lbTestHelper) WaitForHTTPAvailable(ingressIP string, useHTTPS bool) err for { resp, err := client.Get(fmt.Sprintf("%s://%s", proto, ingressIP)) if err != nil { - l.t.Log("service still unavailable, keep waiting") + l.t.Logf("request to %s failed, keep waiting: %v", ingressIP, err) } else { resp.Body.Close() switch resp.StatusCode { From 9b935e3dc32d96bca32410f66fb073bfc9799fe3 Mon Sep 17 00:00:00 2001 From: lukasmetzner Date: Wed, 15 Apr 2026 07:50:56 +0200 Subject: [PATCH 4/7] test: adjust backoff settings --- tests/e2e/helper_test.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/e2e/helper_test.go b/tests/e2e/helper_test.go index ffcce022d..210517a32 100644 --- a/tests/e2e/helper_test.go +++ b/tests/e2e/helper_test.go @@ -324,9 +324,10 @@ func (l *lbTestHelper) TearDown() { } } -// WaitForHTTPAvailable tries to connect to the given IP via HTTP. -// It uses exponential backoff starting at 1s and capping at 30s, -// waiting up to 6 minutes for a successful HTTP 200 response. +// WaitForHTTPAvailable tries to connect to the given IP via HTTP or HTTPS +// (controlled by useHTTPS). It uses exponential backoff starting at 1s and +// capping at 30s, waiting up to 6 minutes for a successful HTTP 200 response. +// Each individual request has a 5s timeout. func (l *lbTestHelper) WaitForHTTPAvailable(ingressIP string, useHTTPS bool) error { l.t.Helper() @@ -343,10 +344,14 @@ func (l *lbTestHelper) WaitForHTTPAvailable(ingressIP string, useHTTPS bool) err proto = "https" } - ctx, cancel := context.WithTimeout(l.t.Context(), 4*time.Minute) + ctx, cancel := context.WithTimeout(l.t.Context(), 6*time.Minute) defer cancel() - backoffFunc := hcloud.ExponentialBackoff(2.0, time.Second) + backoffFunc := hcloud.ExponentialBackoffWithOpts(hcloud.ExponentialBackoffOpts{ + Base: time.Second, + Multiplier: 2, + Cap: 30 * time.Second, + }) retries := 0 for { resp, err := client.Get(fmt.Sprintf("%s://%s", proto, ingressIP)) From 9ef5c70eec697469ca82ec4aac153f7be4100f6f Mon Sep 17 00:00:00 2001 From: lukasmetzner Date: Wed, 15 Apr 2026 08:00:54 +0200 Subject: [PATCH 5/7] test: refactor use require assert --- tests/e2e/cloud_test.go | 2 +- tests/e2e/e2e_test.go | 13 +++++-------- tests/e2e/helper_test.go | 11 ++++++++--- tests/e2e/robot_test.go | 8 ++++---- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/tests/e2e/cloud_test.go b/tests/e2e/cloud_test.go index 973a5e4d6..586237a6c 100644 --- a/tests/e2e/cloud_test.go +++ b/tests/e2e/cloud_test.go @@ -27,7 +27,7 @@ func TestNodeSetCorrectNodeLabelsAndIPAddresses(t *testing.T) { t.Parallel() node, err := testCluster.k8sClient.CoreV1().Nodes().Get(t.Context(), testCluster.ControlNodeName(), metav1.GetOptions{}) - assert.NoError(t, err) + require.NoError(t, err) server, _, err := testCluster.hcloud.Server.Get(t.Context(), testCluster.ControlNodeName()) if err != nil { diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index 2d76d9fae..d7273077c 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -9,6 +9,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -34,7 +35,7 @@ func TestPodIsPresent(t *testing.T) { t.Run("hcloud-cloud-controller-manager pod is present in kube-system", func(t *testing.T) { pods, err := testCluster.k8sClient.CoreV1().Pods("kube-system").List(t.Context(), metav1.ListOptions{}) - assert.NoError(t, err) + require.NoError(t, err) found := false for _, pod := range pods.Items { @@ -43,9 +44,7 @@ func TestPodIsPresent(t *testing.T) { break } } - if !found { - t.Error("kube-system does not contain a pod named hcloud-cloud-controller-manager") - } + assert.True(t, found, "kube-system does not contain a pod named hcloud-cloud-controller-manager") }) t.Run("pod with app=hcloud-cloud-controller-manager is present in kube-system", func(t *testing.T) { @@ -53,10 +52,8 @@ func TestPodIsPresent(t *testing.T) { List(t.Context(), metav1.ListOptions{ LabelSelector: "app.kubernetes.io/name=hcloud-cloud-controller-manager", }) - assert.NoError(t, err) + require.NoError(t, err) - if len(pods.Items) == 0 { - t.Fatal("kube-system does not contain a pod with label app=hcloud-cloud-controller-manager") - } + require.NotEmpty(t, pods.Items, "kube-system does not contain a pod with label app=hcloud-cloud-controller-manager") }) } diff --git a/tests/e2e/helper_test.go b/tests/e2e/helper_test.go index 210517a32..b4642f498 100644 --- a/tests/e2e/helper_test.go +++ b/tests/e2e/helper_test.go @@ -14,6 +14,7 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" hrobot "github.com/syself/hrobot-go" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" @@ -312,6 +313,12 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er func (l *lbTestHelper) TearDown() { l.t.Helper() + // No namespace was created yet (e.g. DeployTestPod never ran because a + // prior step failed); nothing to clean up. + if l.namespace == "" { + return + } + err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 3*time.Minute, true, func(ctx context.Context) (bool, error) { err := testCluster.k8sClient.CoreV1().Namespaces().Delete(ctx, l.namespace, metav1.DeleteOptions{}) if err != nil && !k8serrors.IsNotFound(err) { @@ -319,9 +326,7 @@ func (l *lbTestHelper) TearDown() { } return k8serrors.IsNotFound(err), nil }) - if err != nil { - l.t.Fatal(err) - } + require.NoError(l.t, err) } // WaitForHTTPAvailable tries to connect to the given IP via HTTP or HTTPS diff --git a/tests/e2e/robot_test.go b/tests/e2e/robot_test.go index 97d5b0886..1de81ebe4 100644 --- a/tests/e2e/robot_test.go +++ b/tests/e2e/robot_test.go @@ -25,18 +25,18 @@ func TestNodeSetCorrectNodeLabelsAndIPAddressesRobot(t *testing.T) { nodes, err := testCluster.k8sClient.CoreV1().Nodes().List(t.Context(), metav1.ListOptions{ LabelSelector: "instance.hetzner.cloud/is-root-server=true", }) - assert.NoError(t, err) - assert.GreaterOrEqual(t, len(nodes.Items), 1) + require.NoError(t, err) + require.GreaterOrEqual(t, len(nodes.Items), 1) node := nodes.Items[0] // Parse the server number from the ProviderID id, isCloudServer, err := providerid.ToServerID(node.Spec.ProviderID) - assert.NoError(t, err) + require.NoError(t, err) assert.False(t, isCloudServer) // Get the server from the Robot API to cross-check Labels server, err := testCluster.hrobot.ServerGet(int(id)) - assert.NoError(t, err) + require.NoError(t, err) labels := node.Labels expectedLabels := map[string]string{ From 54d921c994716a86e4bfbeae5106d24d3fe59ec4 Mon Sep 17 00:00:00 2001 From: lukasmetzner Date: Wed, 15 Apr 2026 08:09:54 +0200 Subject: [PATCH 6/7] test: refactor wip --- tests/e2e/cloud_test.go | 8 ++------ tests/e2e/helper_test.go | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/tests/e2e/cloud_test.go b/tests/e2e/cloud_test.go index 586237a6c..df31c63c1 100644 --- a/tests/e2e/cloud_test.go +++ b/tests/e2e/cloud_test.go @@ -30,9 +30,7 @@ func TestNodeSetCorrectNodeLabelsAndIPAddresses(t *testing.T) { require.NoError(t, err) server, _, err := testCluster.hcloud.Server.Get(t.Context(), testCluster.ControlNodeName()) - if err != nil { - return - } + require.NoError(t, err) labels := node.Labels expectedLabels := map[string]string{ @@ -230,7 +228,5 @@ func TestRouteNetworksPodIPsAreAccessible(t *testing.T) { } return false, nil }) - if err != nil { - t.Errorf("error waiting for pod IPs being accessible: %v", err) - } + assert.NoError(t, err, "error waiting for pod IPs being accessible") } diff --git a/tests/e2e/helper_test.go b/tests/e2e/helper_test.go index b4642f498..ad8621f44 100644 --- a/tests/e2e/helper_test.go +++ b/tests/e2e/helper_test.go @@ -137,12 +137,12 @@ func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) (*hcl Certificate: p.Cert, PrivateKey: p.Key, } - cert, _, err := tc.hcloud.Certificate.Create(context.Background(), opts) + cert, _, err := tc.hcloud.Certificate.Create(t.Context(), opts) if err != nil { return nil, fmt.Errorf("%s: %w", name, err) } if cert == nil { - return nil, fmt.Errorf("no certificate created") + return nil, errors.New("no certificate created") } tc.certificates.Add(cert.ID) @@ -178,7 +178,7 @@ type lbTestHelper struct { func (l *lbTestHelper) DeployTestPod() (*corev1.Pod, error) { l.t.Helper() - ctx := context.Background() + ctx := l.t.Context() if l.namespace == "" { l.namespace = "hccm-test-" + strconv.Itoa(rand.Int()) @@ -285,7 +285,11 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er ctx, cancel := context.WithTimeout(l.t.Context(), 4*time.Minute) defer cancel() - backoffFunc := hcloud.ExponentialBackoff(2.0, time.Second) + backoffFunc := hcloud.ExponentialBackoffWithOpts(hcloud.ExponentialBackoffOpts{ + Base: time.Second, + Multiplier: 2, + Cap: 30 * time.Second, + }) retries := 0 for { svc, err := testCluster.k8sClient.CoreV1().Services(l.namespace).Get(ctx, lbSvc.Name, metav1.GetOptions{}) @@ -293,10 +297,8 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er return nil, fmt.Errorf("error fetching load balancer service: %w", err) } - ingressIPs := svc.Status.LoadBalancer.Ingress - if len(ingressIPs) > 0 { - lbSvc = svc - return lbSvc, nil + if len(svc.Status.LoadBalancer.Ingress) > 0 { + return svc, nil } select { @@ -304,7 +306,6 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er return nil, fmt.Errorf("timed out waiting for load balancer service to receive ingress IPs") case <-time.After(backoffFunc(retries)): retries++ - continue } } } @@ -319,6 +320,8 @@ func (l *lbTestHelper) TearDown() { return } + // Use context.Background() rather than t.Context(): cleanup must run to + // completion even when the test has already been cancelled or failed. err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 3*time.Minute, true, func(ctx context.Context) (bool, error) { err := testCluster.k8sClient.CoreV1().Namespaces().Delete(ctx, l.namespace, metav1.DeleteOptions{}) if err != nil && !k8serrors.IsNotFound(err) { @@ -379,7 +382,6 @@ func (l *lbTestHelper) WaitForHTTPAvailable(ingressIP string, useHTTPS bool) err return fmt.Errorf("timed out after 6m waiting for %s to be available", ingressIP) case <-time.After(backoffFunc(retries)): retries++ - continue } } } From 82702999f81ea74060426d5d8560042037908cb9 Mon Sep 17 00:00:00 2001 From: lukasmetzner Date: Wed, 15 Apr 2026 08:17:02 +0200 Subject: [PATCH 7/7] test: refactor wip --- tests/e2e/cloud_test.go | 46 ++++++++++++++++++---------------------- tests/e2e/helper_test.go | 22 +++++++++---------- 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/tests/e2e/cloud_test.go b/tests/e2e/cloud_test.go index df31c63c1..931164eaf 100644 --- a/tests/e2e/cloud_test.go +++ b/tests/e2e/cloud_test.go @@ -32,7 +32,6 @@ func TestNodeSetCorrectNodeLabelsAndIPAddresses(t *testing.T) { server, _, err := testCluster.hcloud.Server.Get(t.Context(), testCluster.ControlNodeName()) require.NoError(t, err) - labels := node.Labels expectedLabels := map[string]string{ "node.kubernetes.io/instance-type": server.ServerType.Name, "topology.kubernetes.io/region": server.Location.Name, @@ -43,26 +42,15 @@ func TestNodeSetCorrectNodeLabelsAndIPAddresses(t *testing.T) { "instance.hetzner.cloud/provided-by": "cloud", } for expectedLabel, expectedValue := range expectedLabels { - if labelValue, ok := labels[expectedLabel]; !ok || labelValue != expectedValue { - t.Errorf("node have a not expected label %s, ok: %v, given value %s, expected value %s", expectedLabel, ok, labelValue, expectedValue) - } - } - - for _, address := range node.Status.Addresses { - if address.Type == corev1.NodeExternalIP { - expectedIP := server.PublicNet.IPv4.IP.String() - if expectedIP != address.Address { - t.Errorf("Got %s as NodeExternalIP but expected %s", address.Address, expectedIP) - } - } + assert.Equal(t, expectedValue, node.Labels[expectedLabel], "unexpected value for label %s", expectedLabel) } for _, address := range node.Status.Addresses { - if address.Type == corev1.NodeInternalIP { - expectedIP := server.PrivateNet[0].IP.String() - if expectedIP != address.Address { - t.Errorf("Got %s as NodeInternalIP but expected %s", address.Address, expectedIP) - } + switch address.Type { + case corev1.NodeExternalIP: + assert.Equal(t, server.PublicNet.IPv4.IP.String(), address.Address, "unexpected NodeExternalIP") + case corev1.NodeInternalIP: + assert.Equal(t, server.PrivateNet[0].IP.String(), address.Address, "unexpected NodeInternalIP") } } } @@ -206,6 +194,10 @@ func TestServiceLoadBalancersWithPrivateNetwork(t *testing.T) { func TestRouteNetworksPodIPsAreAccessible(t *testing.T) { t.Parallel() + var ( + nodeInternalIP string + routeGateway string + ) err := wait.PollUntilContextTimeout(t.Context(), 1*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { node, err := testCluster.k8sClient.CoreV1().Nodes().Get(ctx, testCluster.ControlNodeName(), metav1.GetOptions{}) if err != nil { @@ -217,16 +209,20 @@ func TestRouteNetworksPodIPsAreAccessible(t *testing.T) { return false, err } for _, route := range network.Routes { - if route.Destination.String() == node.Spec.PodCIDR { - for _, a := range node.Status.Addresses { - if a.Type == corev1.NodeInternalIP { - assert.Equal(t, a.Address, route.Gateway.String()) - } + if route.Destination.String() != node.Spec.PodCIDR { + continue + } + routeGateway = route.Gateway.String() + for _, a := range node.Status.Addresses { + if a.Type == corev1.NodeInternalIP { + nodeInternalIP = a.Address + break } - return true, nil } + return true, nil } return false, nil }) - assert.NoError(t, err, "error waiting for pod IPs being accessible") + require.NoError(t, err, "error waiting for pod IPs being accessible") + assert.Equal(t, nodeInternalIP, routeGateway, "route gateway should match node internal IP") } diff --git a/tests/e2e/helper_test.go b/tests/e2e/helper_test.go index ad8621f44..9d9a6726d 100644 --- a/tests/e2e/helper_test.go +++ b/tests/e2e/helper_test.go @@ -35,6 +35,14 @@ func init() { rng = rand.New(rand.NewSource(time.Now().UnixNano())) } +// pollBackoff is the standard exponential backoff used while polling for +// k8s/hcloud state in this suite: 1s base, doubling, capped at 30s. +var pollBackoff = hcloud.ExponentialBackoffWithOpts(hcloud.ExponentialBackoffOpts{ + Base: time.Second, + Multiplier: 2, + Cap: 30 * time.Second, +}) + type TestCluster struct { hcloud *hcloud.Client hrobot hrobot.RobotClient @@ -285,11 +293,6 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er ctx, cancel := context.WithTimeout(l.t.Context(), 4*time.Minute) defer cancel() - backoffFunc := hcloud.ExponentialBackoffWithOpts(hcloud.ExponentialBackoffOpts{ - Base: time.Second, - Multiplier: 2, - Cap: 30 * time.Second, - }) retries := 0 for { svc, err := testCluster.k8sClient.CoreV1().Services(l.namespace).Get(ctx, lbSvc.Name, metav1.GetOptions{}) @@ -304,7 +307,7 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er select { case <-ctx.Done(): return nil, fmt.Errorf("timed out waiting for load balancer service to receive ingress IPs") - case <-time.After(backoffFunc(retries)): + case <-time.After(pollBackoff(retries)): retries++ } } @@ -355,11 +358,6 @@ func (l *lbTestHelper) WaitForHTTPAvailable(ingressIP string, useHTTPS bool) err ctx, cancel := context.WithTimeout(l.t.Context(), 6*time.Minute) defer cancel() - backoffFunc := hcloud.ExponentialBackoffWithOpts(hcloud.ExponentialBackoffOpts{ - Base: time.Second, - Multiplier: 2, - Cap: 30 * time.Second, - }) retries := 0 for { resp, err := client.Get(fmt.Sprintf("%s://%s", proto, ingressIP)) @@ -380,7 +378,7 @@ func (l *lbTestHelper) WaitForHTTPAvailable(ingressIP string, useHTTPS bool) err select { case <-ctx.Done(): return fmt.Errorf("timed out after 6m waiting for %s to be available", ingressIP) - case <-time.After(backoffFunc(retries)): + case <-time.After(pollBackoff(retries)): retries++ } }