From ee11fac0411f576b9519f7b3a5bae146e0073470 Mon Sep 17 00:00:00 2001 From: Waleed Mousa <32266980+wmousa@users.noreply.github.com> Date: Mon, 30 Mar 2026 12:39:42 +0200 Subject: [PATCH] R25.10 hotfix fix cpu topology (#939) * Wrokaround the SMT issue for chess by settin the cpu to 2 for spdk-proxy-container * fix: enforce HT-aware SPDK core alignment and dynamic CPU limits - build_unisolated_stride: add nodes_per_socket param; pre-adjust num_unisolated so remaining SPDK cores are divisible by 4 (nodes_per_socket>=2, HT enabled) or 2 (HT enabled), preserving pool-stride and HT pair selection; return adjusted num_unisolated alongside the core list - generate_core_allocation: unpack new return value and pass nodes_per_socket into build_unisolated_stride - storage_deploy_spdk.yaml.j2: set cpu for spdk-proxy-container and copy-script to 2 if HT enabled and CPU topology enabled, 1 if CPU topology enabled without HT, omit cpu limit/request entirely when CPU topology is disabled - kubernetes.py: pass HT_ENABLED (via is_hyperthreading_enabled_via_siblings) to the pod template --- simplyblock_core/utils/__init__.py | 35 ++++++++++++++++--- .../api/internal/storage_node/kubernetes.py | 1 + .../templates/storage_deploy_spdk.yaml.j2 | 23 ++++++++---- 3 files changed, 48 insertions(+), 11 deletions(-) mode change 100644 => 100755 simplyblock_core/utils/__init__.py mode change 100644 => 100755 simplyblock_web/api/internal/storage_node/kubernetes.py mode change 100644 => 100755 simplyblock_web/templates/storage_deploy_spdk.yaml.j2 diff --git a/simplyblock_core/utils/__init__.py b/simplyblock_core/utils/__init__.py old mode 100644 new mode 100755 index f874db5eb..f68435ee8 --- a/simplyblock_core/utils/__init__.py +++ b/simplyblock_core/utils/__init__.py @@ -1459,6 +1459,7 @@ def build_unisolated_stride( num_unisolated: int, client_qpair_count: int, pool_stride: int = 2, + nodes_per_socket: int = 1, ) -> List[int]: """ Build a list of 'unisolated' CPUs by picking from per-qpair pools. @@ -1473,6 +1474,10 @@ def build_unisolated_stride( If hyper_thread=True, append sibling right after each selected core, where sibling is defined by *index pairing* across halves of the sorted list: sibling(cores[i]) = cores[i + half] if i < half else cores[i - half] + + When hyper_thread=True and nodes_per_socket >= 2, the remaining SPDK cores + (all_cores - unisolated) are trimmed to a multiple of 4 so that when split + across two nodes each node gets an even (paired) count. """ hyper_thread = is_hyperthreading_enabled_via_siblings() @@ -1496,9 +1501,27 @@ def build_unisolated_stride( raise ValueError(f"hyper_thread=True but total logical CPUs ({total}) is not even") half = total // 2 - # If you REQUIRE strict pairing (cpu+sibling always together), uncomment: - # if hyper_thread and (num_unisolated % 2 != 0): - # raise ValueError("num_unisolated must be even when hyper_thread=True") + # Cores are always selected in complete HT pairs (cpu + sibling), so + # num_unisolated must be even — round down if needed. + if num_unisolated % 2 != 0: + num_unisolated -= 1 + + if nodes_per_socket >= 2: + # When splitting across 2 nodes each SPDK core must pair with its HT + # sibling, so the SPDK pool (total - num_unisolated) must be a multiple + # of 4. Both values are already even, so the only possible misalignment + # is a remainder of 2 — corrected by adjusting num_unisolated by one + # HT pair (2 cores). + spdk_count = total - num_unisolated + if spdk_count % 4 != 0: + # spdk_count % 4 == 2: give one extra HT pair to SPDK + if num_unisolated >= 2: + num_unisolated -= 2 + else: + # No room to shrink unisolated; take one pair from SPDK instead + num_unisolated += 2 + + num_unisolated = max(0, min(num_unisolated, total)) core_to_idx = {c: i for i, c in enumerate(cores)} @@ -1568,7 +1591,7 @@ def add_cpu(cpu: int) -> bool: break - return out[:num_unisolated] + return out[:num_unisolated], num_unisolated def generate_core_allocation(cores_by_numa, sockets_to_use, nodes_per_socket, cores_percentage=0): node_distribution: dict = {} @@ -1578,7 +1601,9 @@ def generate_core_allocation(cores_by_numa, sockets_to_use, nodes_per_socket, co continue all_cores = sorted(cores_by_numa[numa_node]) num_unisolated = calculate_unisolated_cores(all_cores, cores_percentage) - unisolated = build_unisolated_stride(all_cores, num_unisolated, constants.CLIENT_QPAIR_COUNT) + unisolated, num_unisolated = build_unisolated_stride( + all_cores, num_unisolated, constants.CLIENT_QPAIR_COUNT, nodes_per_socket=nodes_per_socket + ) available_cores = [c for c in all_cores if c not in unisolated] q1 = len(available_cores) // 4 diff --git a/simplyblock_web/api/internal/storage_node/kubernetes.py b/simplyblock_web/api/internal/storage_node/kubernetes.py old mode 100644 new mode 100755 index b26879faa..b1ec67252 --- a/simplyblock_web/api/internal/storage_node/kubernetes.py +++ b/simplyblock_web/api/internal/storage_node/kubernetes.py @@ -371,6 +371,7 @@ def spdk_process_start(body: SPDKParams): 'NSOCKET': body.socket, 'FW_PORT': body.firewall_port, 'CPU_TOPOLOGY_ENABLED': cpu_topology_enabled, + 'HT_ENABLED': core_utils.is_hyperthreading_enabled_via_siblings(), 'RESERVED_SYSTEM_CPUS': reserved_system_cpus } diff --git a/simplyblock_web/templates/storage_deploy_spdk.yaml.j2 b/simplyblock_web/templates/storage_deploy_spdk.yaml.j2 old mode 100644 new mode 100755 index df152797a..7d7ee1361 --- a/simplyblock_web/templates/storage_deploy_spdk.yaml.j2 +++ b/simplyblock_web/templates/storage_deploy_spdk.yaml.j2 @@ -68,10 +68,16 @@ spec: mountPath: /etc/foundationdb resources: requests: - cpu: "100m" + {% if CPU_TOPOLOGY_ENABLED %} + cpu: {{ 2 if HT_ENABLED else 1 }} + numa-align/numa-{{ NSOCKET }}: 1 + {% endif %} memory: "64Mi" limits: - cpu: "100m" + {% if CPU_TOPOLOGY_ENABLED %} + cpu: {{ 2 if HT_ENABLED else 1 }} + numa-align/numa-{{ NSOCKET }}: 1 + {% endif %} memory: "64Mi" containers: @@ -138,6 +144,7 @@ spec: {% endif %} requests: hugepages-2Mi: {{ MEM_MEGA }}Mi + cpu: {{ CORES }} {% if CPU_TOPOLOGY_ENABLED %} memory: {{ MEM2_MEGA }}Mi numa-align/numa-{{ NSOCKET }}: 1 @@ -163,15 +170,19 @@ spec: value: "True" - name: TIMEOUT value: "300" - {% if CPU_TOPOLOGY_ENABLED %} resources: limits: - cpu: 1 + {% if CPU_TOPOLOGY_ENABLED %} + cpu: {{ 2 if HT_ENABLED else 1 }} + numa-align/numa-{{ NSOCKET }}: 1 + {% endif %} memory: "128Mi" requests: - cpu: 1 + {% if CPU_TOPOLOGY_ENABLED %} + cpu: {{ 2 if HT_ENABLED else 1 }} + numa-align/numa-{{ NSOCKET }}: 1 + {% endif %} memory: "128Mi" - {% endif %} {% if MODE == "docker" %} ---