From 08225f90a6d0c406de19dbe0670cb641e088348a Mon Sep 17 00:00:00 2001 From: Alejandro Gullon Date: Fri, 13 Mar 2026 10:06:21 +0100 Subject: [PATCH] add: test to add an extra worker on TNA cluster pre-commit.check-secrets: ENABLED --- test/extended/two_node/tna_extra_worker.go | 451 +++++++++++++++++++++ 1 file changed, 451 insertions(+) create mode 100644 test/extended/two_node/tna_extra_worker.go diff --git a/test/extended/two_node/tna_extra_worker.go b/test/extended/two_node/tna_extra_worker.go new file mode 100644 index 000000000000..c20462a4778f --- /dev/null +++ b/test/extended/two_node/tna_extra_worker.go @@ -0,0 +1,451 @@ +package two_node + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + + v1 "github.com/openshift/api/config/v1" + machineconfigclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned" + "github.com/openshift/origin/test/extended/baremetal" + "github.com/openshift/origin/test/extended/two_node/utils" + "github.com/openshift/origin/test/extended/two_node/utils/apis" + exutil "github.com/openshift/origin/test/extended/util" + "github.com/openshift/origin/test/extended/util/image" + corev1 "k8s.io/api/core/v1" + kerrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/dynamic" + e2e "k8s.io/kubernetes/test/e2e/framework" +) + +const ( + extraWorkerProvisioningTimeout = 15 * time.Minute +) + +// initialState holds the cluster state recorded before the test begins, +// so that post-test assertions can compare against the baseline. +type initialState struct { + nodes *corev1.NodeList + nodeCount int + mcoClient machineconfigclient.Interface + workerMCPCount int32 +} + +var bmhGVR = schema.GroupVersionResource{ + Group: "metal3.io", Resource: "baremetalhosts", Version: "v1alpha1", +} + +var _ = g.Describe("[sig-node][apigroup:config.openshift.io][OCPFeatureGate:HighlyAvailableArbiter][Serial] Extra worker scaling in HighlyAvailableArbiterMode", func() { + defer g.GinkgoRecover() + + oc := exutil.NewCLIWithoutNamespace("").AsAdmin() + var helper *baremetal.BaremetalTestHelper + + g.BeforeEach(func() { + g.By("SETUP: Validating cluster topology is HighlyAvailableArbiter") + utils.SkipIfNotTopology(oc, v1.HighlyAvailableArbiterMode) + + // We intentionally do NOT call helper.Setup() here: its internal waitForDeletion + // has a hardcoded 5-minute timeout with o.Expect, which fails the test if a stale + // BMH from a previous (killed) run is still deprovisioning. Instead, we use our + // own cleanup with a 30-minute timeout that logs instead of failing. + g.By("SETUP: Cleaning up stale extra worker BMHs from previous runs") + bmhClient := oc.AdminDynamicClient().Resource(bmhGVR).Namespace(machineAPINamespace) + deleteExtraWorkerBMHs(bmhClient) + + g.By("SETUP: Initializing baremetal test helper") + helper = baremetal.NewBaremetalTestHelper(oc.AdminDynamicClient()) + + g.By("SETUP: Ensuring extra worker data is available") + if !helper.CanDeployExtraWorkers() { + g.Skip("Extra worker data not available - deploy with NUM_EXTRA_WORKERS=1 in dev-scripts config") + } + }) + + g.AfterEach(func() { + if helper == nil { + return + } + // Do NOT call helper.DeleteAllExtraWorkers() here: its internal waitForDeletion has a + // hardcoded 5-minute timeout, which is insufficient for a provisioned BMH — Metal3 must + // complete a full deprovisioning cycle (power-off + disk clean via Redfish) before the + // object is removed, which routinely takes >5 minutes. + // + // Instead, we delete only the extraworker BMHs (by name prefix) and wait up to 30 minutes. + g.By("CLEANUP: Deleting extra worker BMHs and waiting for Metal3 deprovisioning") + bmhClient := oc.AdminDynamicClient().Resource(bmhGVR).Namespace(machineAPINamespace) + deleteExtraWorkerBMHs(bmhClient) + + g.By("CLEANUP: Removing stale extra worker Node objects from the cluster") + deleteExtraWorkerNodes(oc) + }) + + g.It("should deploy an extra worker node that joins the cluster and becomes Ready [Timeout:60m][Slow]", func() { + g.By("STEP-01: Recording initial values for the test") + state := recordInitialState(oc) + + g.By("STEP-02: Provisioning extra worker BMH") + provisionExtraWorker(oc, helper) + + g.By("STEP-03: Approving pending CSRs for the new worker") + approveWorkerCSRs(oc) + + g.By("STEP-04: Waiting for a new worker node to become Ready") + newWorkerName := waitForNewWorkerReady(oc, state.nodes) + + g.By("STEP-05: Verifying new node membership and worker label") + newNode := verifyNodeMembership(oc, newWorkerName, state.nodeCount) + + g.By("STEP-06: Verifying the new node has no pressure conditions") + verifyNodeHealth(newNode) + + g.By("STEP-07: Verifying a pod can be scheduled and run on the new node") + verifyPodSchedulable(oc, newWorkerName) + + g.By("STEP-08: Verifying the worker MachineConfigPool converges with the new node") + waitForWorkerMCPConvergence(state.mcoClient, state.workerMCPCount) + + g.By("STEP-09: Verifying the new node's kubelet version matches existing nodes") + verifyKubeletVersion(oc, newNode) + + e2e.Logf("Extra worker %s joined the cluster successfully", newWorkerName) + }) +}) + +// --- Test step helpers --- + +func recordInitialState(oc *exutil.CLI) initialState { + nodes, err := oc.AdminKubeClient().CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + mcoClient := machineconfigclient.NewForConfigOrDie(oc.AdminConfig()) + mcpCount := getWorkerMCPMachineCount(mcoClient) + e2e.Logf("Initial state: %d nodes, %d machines in worker MCP", len(nodes.Items), mcpCount) + return initialState{ + nodes: nodes, + nodeCount: len(nodes.Items), + mcoClient: mcoClient, + workerMCPCount: mcpCount, + } +} + +func getWorkerMCPMachineCount(mcoClient machineconfigclient.Interface) int32 { + mcp, err := mcoClient.MachineconfigurationV1().MachineConfigPools().Get(context.Background(), "worker", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "Failed to get worker MachineConfigPool") + return mcp.Status.MachineCount +} + +// provisionExtraWorker deploys the extra worker BMH from extraworkers-secret, patches it with +// the fields needed for provisioning, and waits for it to reach the "provisioned" state. +func provisionExtraWorker(oc *exutil.CLI, helper *baremetal.BaremetalTestHelper) { + // DeployExtraWorker returns (BMH, BMC secret) — we only need the BMH object. + host, _ := helper.DeployExtraWorker(0) + e2e.Logf("Extra worker BMH %s deployed and reached available state", host.GetName()) + + bmhClient := oc.AdminDynamicClient().Resource(bmhGVR).Namespace(machineAPINamespace) + + triggerBMHProvisioning(bmhClient, host.GetName()) + waitForBMHProvisioned(bmhClient, host.GetName()) +} + +func approveWorkerCSRs(oc *exutil.CLI) { + approvedCount := apis.ApproveCSRs(oc, 10*time.Minute, 1*time.Minute, 2) + o.Expect(approvedCount).To(o.BeNumerically(">=", 2), + fmt.Sprintf("Expected at least 2 CSRs (kubelet client + serving) but only approved %d", approvedCount)) +} + +// waitForNewWorkerReady polls until a new worker node (not present in initialNodes) becomes Ready. +func waitForNewWorkerReady(oc *exutil.CLI, initialNodes *corev1.NodeList) string { + var newWorkerName string + + err := wait.PollUntilContextTimeout(context.Background(), utils.ThirtySecondPollInterval, 10*time.Minute, true, func(ctx context.Context) (bool, error) { + nodes, err := oc.AdminKubeClient().CoreV1().Nodes().List(ctx, metav1.ListOptions{ + LabelSelector: utils.LabelNodeRoleWorker, + }) + if err != nil { + e2e.Logf("Failed to list worker nodes: %v", err) + return false, nil + } + for i := range nodes.Items { + node := &nodes.Items[i] + if isNodeInList(node.Name, initialNodes.Items) { + continue + } + if utils.IsNodeReady(oc, node.Name) { + newWorkerName = node.Name + e2e.Logf("New worker node %s is Ready", node.Name) + return true, nil + } + e2e.Logf("New worker node %s found but not yet Ready", node.Name) + } + return false, nil + }) + o.Expect(err).NotTo(o.HaveOccurred(), "Timed out waiting for new worker node to become Ready") + o.Expect(newWorkerName).NotTo(o.BeEmpty(), "Expected to find a new worker node") + return newWorkerName +} + +// verifyNodeMembership checks that the new node increased the cluster node count and has the worker label. +func verifyNodeMembership(oc *exutil.CLI, newWorkerName string, initialNodeCount int) *corev1.Node { + finalNodes, err := oc.AdminKubeClient().CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(len(finalNodes.Items)).To(o.Equal(initialNodeCount+1), + fmt.Sprintf("Expected %d nodes but found %d", initialNodeCount+1, len(finalNodes.Items))) + + newNode, err := oc.AdminKubeClient().CoreV1().Nodes().Get(context.Background(), newWorkerName, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + _, hasWorkerLabel := newNode.Labels[utils.LabelNodeRoleWorker] + o.Expect(hasWorkerLabel).To(o.BeTrue(), + fmt.Sprintf("Expected node %s to have label %s", newWorkerName, utils.LabelNodeRoleWorker)) + + return newNode +} + +func verifyNodeHealth(newNode *corev1.Node) { + for _, condition := range newNode.Status.Conditions { + switch condition.Type { + case corev1.NodeMemoryPressure, corev1.NodeDiskPressure, corev1.NodePIDPressure: + o.Expect(condition.Status).To(o.Equal(corev1.ConditionFalse), + fmt.Sprintf("Expected node %s condition %s to be False but got %s", + newNode.Name, condition.Type, condition.Status)) + } + } + e2e.Logf("Node %s has no pressure conditions", newNode.Name) +} + +func verifyPodSchedulable(oc *exutil.CLI, nodeName string) { + testPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "extra-worker-sched-", + Namespace: "default", + }, + Spec: corev1.PodSpec{ + NodeName: nodeName, + RestartPolicy: corev1.RestartPolicyNever, + Containers: []corev1.Container{ + { + Name: "test", + Image: image.ShellImage(), + Command: []string{"/bin/bash", "-c", "echo schedulability-ok"}, + }, + }, + }, + } + createdPod, err := oc.AdminKubeClient().CoreV1().Pods("default").Create(context.Background(), testPod, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "Failed to create test pod on node %s", nodeName) + defer func() { + _ = oc.AdminKubeClient().CoreV1().Pods("default").Delete(context.Background(), createdPod.Name, metav1.DeleteOptions{}) + }() + + err = wait.PollUntilContextTimeout(context.Background(), utils.FiveSecondPollInterval, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + pod, err := oc.AdminKubeClient().CoreV1().Pods("default").Get(ctx, createdPod.Name, metav1.GetOptions{}) + if err != nil { + return false, nil + } + e2e.Logf("Test pod %s phase: %s", createdPod.Name, pod.Status.Phase) + return pod.Status.Phase == corev1.PodSucceeded, nil + }) + o.Expect(err).NotTo(o.HaveOccurred(), "Test pod did not reach Succeeded on node %s", nodeName) + + logs := getPodLogs(oc, "default", createdPod.Name) + o.Expect(logs).To(o.ContainSubstring("schedulability-ok"), + fmt.Sprintf("Expected test pod output to contain 'schedulability-ok' but got: %s", logs)) + e2e.Logf("Pod successfully scheduled and ran on node %s", nodeName) +} + +func getPodLogs(oc *exutil.CLI, namespace, podName string) string { + req := oc.AdminKubeClient().CoreV1().Pods(namespace).GetLogs(podName, &corev1.PodLogOptions{}) + logBytes, err := req.DoRaw(context.Background()) + if err != nil { + e2e.Logf("Failed to get logs for pod %s/%s: %v", namespace, podName, err) + return "" + } + return string(logBytes) +} + +func waitForWorkerMCPConvergence(mcoClient machineconfigclient.Interface, initialMachineCount int32) { + expectedCount := initialMachineCount + 1 + err := wait.PollUntilContextTimeout(context.Background(), utils.ThirtySecondPollInterval, 20*time.Minute, true, func(ctx context.Context) (bool, error) { + mcp, err := mcoClient.MachineconfigurationV1().MachineConfigPools().Get(ctx, "worker", metav1.GetOptions{}) + if err != nil { + e2e.Logf("Failed to get worker MCP: %v", err) + return false, nil + } + s := mcp.Status + e2e.Logf("Worker MCP: machineCount=%d, readyMachineCount=%d, updatedMachineCount=%d, degradedMachineCount=%d", + s.MachineCount, s.ReadyMachineCount, s.UpdatedMachineCount, s.DegradedMachineCount) + return s.MachineCount == expectedCount && + s.ReadyMachineCount == s.MachineCount && + s.UpdatedMachineCount == s.MachineCount && + s.DegradedMachineCount == 0, nil + }) + o.Expect(err).NotTo(o.HaveOccurred(), "Worker MachineConfigPool did not converge after adding new node") + e2e.Logf("Worker MachineConfigPool converged with %d machines", expectedCount) +} + +func verifyKubeletVersion(oc *exutil.CLI, newNode *corev1.Node) { + masterNodes, err := utils.GetNodes(oc, utils.LabelNodeRoleControlPlane) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(masterNodes.Items).NotTo(o.BeEmpty(), "Expected at least one control-plane node") + + workerVersion := newNode.Status.NodeInfo.KubeletVersion + masterVersions := make(map[string]bool) + for i := range masterNodes.Items { + masterVersions[masterNodes.Items[i].Status.NodeInfo.KubeletVersion] = true + } + e2e.Logf("Control-plane kubelet versions: %v, new worker kubelet version: %s", masterVersions, workerVersion) + o.Expect(masterVersions).To(o.HaveKey(workerVersion), + fmt.Sprintf("New node %s kubelet version %s does not match any control-plane kubelet version %v", + newNode.Name, workerVersion, masterVersions)) +} + +// --- BMH lifecycle helpers --- + +// triggerBMHProvisioning patches an available BMH with the fields needed to start provisioning: +// customDeploy method, bootMode, rootDeviceHints, and userData. The extraworkers-secret BMH only +// contains basic connectivity fields (bmc, bootMACAddress) — without customDeploy the BMH stays +// "available" forever, and without userData the node boots CoreOS but never fetches its ignition +// config from MCS, so no kubelet starts and no CSRs are generated. +// +// NOTE: bootMode=UEFI and rootDeviceHints=/dev/sda are specific to dev-scripts VMs (libvirt/QEMU). +// Real hardware deployments would need values derived from the actual hardware inventory. +func triggerBMHProvisioning(bmhClient dynamic.ResourceInterface, bmhName string) { + patch := map[string]interface{}{ + "spec": map[string]interface{}{ + "customDeploy": map[string]interface{}{ + "method": "install_coreos", + }, + "bootMode": "UEFI", + "rootDeviceHints": map[string]interface{}{ + "deviceName": "/dev/sda", + }, + "userData": map[string]interface{}{ + "name": "worker-user-data-managed", + "namespace": machineAPINamespace, + }, + }, + } + + patchBytes, err := json.Marshal(patch) + o.Expect(err).NotTo(o.HaveOccurred()) + + _, err = bmhClient.Patch(context.Background(), bmhName, types.MergePatchType, patchBytes, metav1.PatchOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "Failed to patch BMH %s with provisioning config", bmhName) + + e2e.Logf("Patched BMH %s with customDeploy=install_coreos, bootMode=UEFI, rootDeviceHints=/dev/sda, userData=worker-user-data-managed", bmhName) +} + +// waitForBMHProvisioned polls until the BMH reaches the "provisioned" state. +func waitForBMHProvisioned(bmhClient dynamic.ResourceInterface, bmhName string) { + err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, extraWorkerProvisioningTimeout, true, func(ctx context.Context) (bool, error) { + host, err := bmhClient.Get(ctx, bmhName, metav1.GetOptions{}) + if err != nil { + e2e.Logf("Failed to get BMH %s: %v", bmhName, err) + return false, nil + } + + state, found, err := unstructured.NestedString(host.Object, "status", "provisioning", "state") + if err != nil || !found { + e2e.Logf("BMH %s provisioning state not found yet", bmhName) + return false, nil + } + + e2e.Logf("BMH %s provisioning state: %s", bmhName, state) + + if state == "provisioned" { + return true, nil + } + if state == "error" { + errMsg, _, _ := unstructured.NestedString(host.Object, "status", "errorMessage") + return false, fmt.Errorf("BMH %s entered error state: %s", bmhName, errMsg) + } + return false, nil + }) + o.Expect(err).NotTo(o.HaveOccurred(), "Timed out waiting for BMH %s to reach provisioned state", bmhName) +} + +// --- Cleanup helpers --- + +// deleteExtraWorkerBMHs deletes all BMHs whose name contains "extraworker" and waits up to +// 30 minutes for each to disappear. The long timeout is necessary because Metal3 deprovisioning +// a provisioned BMH (power-off + disk clean via Redfish) routinely takes >5 minutes. +// +// This intentionally does NOT touch arbiter/master BMHs: a name-prefix filter avoids the +// mistake of targeting all provisioned BMHs in the namespace. +func deleteExtraWorkerBMHs(bmhClient dynamic.ResourceInterface) { + bmhList, err := bmhClient.List(context.Background(), metav1.ListOptions{}) + if err != nil { + e2e.Logf("Failed to list BMHs for cleanup: %v", err) + return + } + + for i := range bmhList.Items { + name := bmhList.Items[i].GetName() + if !strings.Contains(name, "extraworker") { + continue + } + + e2e.Logf("Deleting extraworker BMH %s", name) + if err := bmhClient.Delete(context.Background(), name, metav1.DeleteOptions{}); err != nil { + if kerrors.IsNotFound(err) { + e2e.Logf("BMH %s already deleted", name) + continue + } + e2e.Logf("Failed to delete BMH %s: %v", name, err) + continue + } + + err := wait.PollUntilContextTimeout(context.Background(), 15*time.Second, 30*time.Minute, true, func(ctx context.Context) (bool, error) { + _, err := bmhClient.Get(ctx, name, metav1.GetOptions{}) + if kerrors.IsNotFound(err) { + e2e.Logf("BMH %s deleted", name) + return true, nil + } + e2e.Logf("Waiting for BMH %s to be deleted...", name) + return false, nil + }) + if err != nil { + e2e.Logf("BMH %s was not deleted within timeout: %v", name, err) + } + } +} + +// deleteExtraWorkerNodes removes any Node objects whose name contains "extraworker" from the +// cluster. After BMH deletion, Metal3 powers off the host but the Node object persists in +// NotReady state. Cleaning it up prevents stale nodes from accumulating on long-lived clusters. +func deleteExtraWorkerNodes(oc *exutil.CLI) { + nodes, err := oc.AdminKubeClient().CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) + if err != nil { + e2e.Logf("Failed to list nodes for cleanup: %v", err) + return + } + for i := range nodes.Items { + name := nodes.Items[i].Name + if !strings.Contains(name, "extraworker") { + continue + } + e2e.Logf("Deleting stale extra worker node %s", name) + if err := oc.AdminKubeClient().CoreV1().Nodes().Delete(context.Background(), name, metav1.DeleteOptions{}); err != nil && !kerrors.IsNotFound(err) { + e2e.Logf("Failed to delete node %s: %v", name, err) + } + } +} + +// isNodeInList checks whether a node name exists in the given node list. +func isNodeInList(name string, nodes []corev1.Node) bool { + for i := range nodes { + if nodes[i].Name == name { + return true + } + } + return false +}