feat: add qws section model with allreduce scaling [code:qws] [system:RC_GH200]

yoshifuminakamura · yoshifuminakamura · commit c2ffcd10a77b · 2026-04-03T23:31:03.000+09:00
diff --git a/programs/qws/estimate.sh b/programs/qws/estimate.sh
@@ -3,11 +3,14 @@
 
 source scripts/estimate_common.sh
 
-BK_ESTIMATION_PACKAGE="${BK_ESTIMATION_PACKAGE:-lightweight_fom_scaling}"
+BK_ESTIMATION_PACKAGE="${BK_ESTIMATION_PACKAGE:-instrumented_app_sections_dummy}"
 BK_ESTIMATION_BASELINE_SYSTEM="Fugaku"
 BK_ESTIMATION_BASELINE_EXP="CASE0"
 BK_ESTIMATION_FUTURE_SYSTEM="FugakuNEXT"
-BK_ESTIMATION_FUTURE_FOM_FACTOR="${BK_ESTIMATION_FUTURE_FOM_FACTOR:-1}"
+BK_ESTIMATION_CURRENT_TARGET_NODES="${BK_ESTIMATION_CURRENT_TARGET_NODES:-1024}"
+BK_ESTIMATION_FUTURE_TARGET_NODES="${BK_ESTIMATION_FUTURE_TARGET_NODES:-256}"
+BK_ESTIMATION_SECTION_DEFAULT_FACTOR="${BK_ESTIMATION_SECTION_DEFAULT_FACTOR:-0.5}"
+BK_ESTIMATION_LOGP_SECTION_NAME="${BK_ESTIMATION_LOGP_SECTION_NAME:-allreduce}"
 case "$BK_ESTIMATION_PACKAGE" in
   lightweight_fom_scaling)
     BK_ESTIMATION_MODEL_NAME="${BK_ESTIMATION_MODEL_NAME:-scale-mock}"
@@ -28,9 +31,6 @@ source "scripts/estimation/packages/${BK_ESTIMATION_PACKAGE}.sh"
 
 read_values "$BK_ESTIMATION_INPUT_JSON"
 
-BK_ESTIMATION_CURRENT_TARGET_NODES="${BK_ESTIMATION_CURRENT_TARGET_NODES:-$est_node_count}"
-BK_ESTIMATION_FUTURE_TARGET_NODES="${BK_ESTIMATION_FUTURE_TARGET_NODES:-$est_node_count}"
-
 if ! bk_estimation_package_check_applicability; then
   echo "ERROR: estimation package ${BK_ESTIMATION_PACKAGE} is not applicable for input ${BK_ESTIMATION_INPUT_JSON}" >&2
   exit 1
diff --git a/programs/qws/run.sh b/programs/qws/run.sh
@@ -35,18 +35,21 @@ print_results() {
     local section_compute_hopping
     local section_compute_solver
     local section_halo_exchange
+    local section_allreduce
     local section_write_result
     local overlap_compute_halo
-    section_prepare_rhs=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.18}')
-    section_compute_hopping=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.34}')
-    section_compute_solver=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.20}')
-    section_halo_exchange=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.26}')
-    section_write_result=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.10}')
-    overlap_compute_halo=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.08}')
+    section_prepare_rhs=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.16}')
+    section_compute_hopping=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.28}')
+    section_compute_solver=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.18}')
+    section_halo_exchange=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.18}')
+    section_allreduce=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.16}')
+    section_write_result=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.08}')
+    overlap_compute_halo=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.04}')
     bk_emit_section prepare_rhs "$section_prepare_rhs"
     bk_emit_section compute_hopping "$section_compute_hopping"
     bk_emit_section compute_solver "$section_compute_solver"
     bk_emit_section halo_exchange "$section_halo_exchange"
+    bk_emit_section allreduce "$section_allreduce"
     bk_emit_section write_result "$section_write_result"
     bk_emit_overlap compute_hopping,halo_exchange "$overlap_compute_halo"
 }
diff --git a/scripts/estimation/packages/instrumented_app_sections_dummy.sh b/scripts/estimation/packages/instrumented_app_sections_dummy.sh
@@ -47,6 +47,48 @@ bk_estimation_package_check_applicability() {
   return 0
 }
 
+_bk_breakdown_total_time() {
+  local breakdown_json="$1"
+
+  if [[ -z "$breakdown_json" || "$breakdown_json" == "null" ]]; then
+    echo ""
+    return 0
+  fi
+
+  echo "$breakdown_json" | jq -r '
+    (
+      (.sections // [])
+      | map(.time // .bench_time // 0)
+      | add // 0
+    ) - (
+      (.overlaps // [])
+      | map(.time // .bench_time // 0)
+      | add // 0
+    )
+  '
+}
+
+_bk_scale_breakdown_to_total() {
+  local breakdown_json="$1"
+  local target_total="$2"
+
+  if [[ -z "$breakdown_json" || "$breakdown_json" == "null" ]]; then
+    echo ""
+    return 0
+  fi
+
+  local source_total
+  source_total=$(_bk_breakdown_total_time "$breakdown_json")
+  if [[ -z "$source_total" || "$source_total" == "0" ]]; then
+    echo "$breakdown_json"
+    return 0
+  fi
+
+  local factor
+  factor=$(awk -v target="$target_total" -v source="$source_total" 'BEGIN {printf "%.12f", target / source}')
+  _bk_scale_breakdown_times "$breakdown_json" "$factor"
+}
+
 _bk_attach_section_package_name() {
   local breakdown_json="$1"
   local package_name="$2"
@@ -86,15 +128,71 @@ _bk_scale_breakdown_times() {
   '
 }
 
+_bk_logp_factor() {
+  local target_nodes="$1"
+  local bench_nodes="$2"
+
+  awk -v target="$target_nodes" -v bench="$bench_nodes" '
+    function safe_nodes(x) { return (x < 2 ? 2 : x) }
+    function lg2(x) { return log(x) / log(2) }
+    BEGIN {
+      printf "%.12f", lg2(safe_nodes(target)) / lg2(safe_nodes(bench))
+    }'
+}
+
+_bk_transform_breakdown_for_qws_demo() {
+  local breakdown_json="$1"
+  local target_nodes="$2"
+  local bench_nodes="$3"
+  local default_factor="$4"
+  local logp_section_name="$5"
+
+  if [[ -z "$breakdown_json" || "$breakdown_json" == "null" ]]; then
+    echo ""
+    return 0
+  fi
+
+  local logp_factor
+  logp_factor=$(_bk_logp_factor "$target_nodes" "$bench_nodes")
+
+  echo "$breakdown_json" | jq -c \
+    --arg logp_section_name "$logp_section_name" \
+    --argjson default_factor "$default_factor" \
+    --argjson logp_factor "$logp_factor" '
+      .sections |= map(
+        if .name == $logp_section_name then
+          .
+          + {time: ((.time // .bench_time // 0) * $logp_factor)}
+          + {bench_time: ((.bench_time // .time // 0) * $logp_factor)}
+          + {scaling_method: "logP"}
+        else
+          .
+          + {time: ((.time // .bench_time // 0) * $default_factor)}
+          + {bench_time: ((.bench_time // .time // 0) * $default_factor)}
+          + {scaling_method: "fixed-factor"}
+        end
+      )
+      | .overlaps |= map(
+        .
+        + {time: ((.time // .bench_time // 0) * $default_factor)}
+        + {bench_time: ((.bench_time // .time // 0) * $default_factor)}
+        + {scaling_method: "fixed-factor"}
+      )
+    '
+}
+
 bk_estimation_package_run() {
   local baseline_system="${BK_ESTIMATION_BASELINE_SYSTEM:-Fugaku}"
   local baseline_exp="${BK_ESTIMATION_BASELINE_EXP:-CASE0}"
   local future_system="${BK_ESTIMATION_FUTURE_SYSTEM:-FugakuNEXT}"
   local current_target_nodes="${BK_ESTIMATION_CURRENT_TARGET_NODES:-$est_node_count}"
   local future_target_nodes="${BK_ESTIMATION_FUTURE_TARGET_NODES:-$est_node_count}"
-  local future_fom_factor="${BK_ESTIMATION_FUTURE_FOM_FACTOR:-1}"
   local model_name="${BK_ESTIMATION_MODEL_NAME:-instrumented-app-sections-dummy}"
   local model_version="${BK_ESTIMATION_MODEL_VERSION:-0.1}"
+  local default_section_factor="${BK_ESTIMATION_SECTION_DEFAULT_FACTOR:-0.5}"
+  local logp_section_name="${BK_ESTIMATION_LOGP_SECTION_NAME:-allreduce}"
+  local breakdown_template
+  local baseline_breakdown
 
   est_future_bench_system="$est_system"
   est_future_bench_fom="$est_fom"
@@ -105,18 +203,39 @@ bk_estimation_package_run() {
 
   est_current_system="$baseline_system"
   fetch_current_fom "$baseline_system" "$est_code" "$baseline_exp"
+  baseline_breakdown="$est_current_fom_breakdown"
+  if [[ -z "$baseline_breakdown" || "$baseline_breakdown" == "null" ]]; then
+    baseline_breakdown="$est_input_fom_breakdown"
+  elif ! echo "$baseline_breakdown" | jq -e --arg section_name "$logp_section_name" '.sections // [] | any(.name == $section_name)' >/dev/null; then
+    baseline_breakdown="$est_input_fom_breakdown"
+  fi
+
+  if [[ -n "$baseline_breakdown" && "$baseline_breakdown" != "null" ]]; then
+    breakdown_template=$(_bk_scale_breakdown_to_total "$baseline_breakdown" "$est_current_fom")
+    est_current_fom_breakdown=$(_bk_transform_breakdown_for_qws_demo \
+      "$breakdown_template" \
+      "$current_target_nodes" \
+      "${est_current_bench_nodes:-1}" \
+      "$default_section_factor" \
+      "$logp_section_name")
+    est_current_fom_breakdown=$(_bk_attach_section_package_name "$est_current_fom_breakdown" "instrumented_app_sections_dummy")
+    est_current_fom=$(_bk_breakdown_total_time "$est_current_fom_breakdown")
+  fi
   est_current_target_nodes="$current_target_nodes"
-  est_current_scaling_method="measured"
+  est_current_scaling_method="$model_name"
 
   est_future_system="$future_system"
-  est_future_fom=$(awk -v fom="$est_fom" -v factor="$future_fom_factor" 'BEGIN {printf "%.3f", fom * factor}')
+  est_future_fom_breakdown=$(_bk_transform_breakdown_for_qws_demo \
+    "$est_input_fom_breakdown" \
+    "$future_target_nodes" \
+    "$est_node_count" \
+    "$default_section_factor" \
+    "$logp_section_name")
+  est_future_fom_breakdown=$(_bk_attach_section_package_name "$est_future_fom_breakdown" "instrumented_app_sections_dummy")
+  est_future_fom=$(_bk_breakdown_total_time "$est_future_fom_breakdown")
   est_future_target_nodes="$future_target_nodes"
   est_future_scaling_method="$model_name"
 
-  est_current_fom_breakdown=$(_bk_attach_section_package_name "$est_current_fom_breakdown" "instrumented_app_sections_dummy")
-  est_future_fom_breakdown=$(_bk_scale_breakdown_times "$est_input_fom_breakdown" "${future_fom_factor}")
-  est_future_fom_breakdown=$(_bk_attach_section_package_name "$est_future_fom_breakdown" "instrumented_app_sections_dummy")
-
   est_measurement_json=$(jq -cn '
     {
       tool: "application-section-timer",
@@ -131,20 +250,17 @@ bk_estimation_package_run() {
     --arg baseline_system "$baseline_system" \
     --arg current_target_nodes "$current_target_nodes" \
     --arg future_target_nodes "$future_target_nodes" \
-    --arg future_fom_factor "$future_fom_factor" \
+    --arg default_section_factor "$default_section_factor" \
+    --arg logp_section_name "$logp_section_name" \
     '{
       scaling_assumption: "weak-scaling",
       future_system_assumption: $future_system,
       baseline_system: $baseline_system,
       current_target_nodes: $current_target_nodes,
       future_target_nodes: $future_target_nodes,
-      section_model_rule: (
-        if $future_fom_factor == "1" then
-          "carry measured section and overlap timings through unchanged as a dummy reference implementation"
-        else
-          ($future_fom_factor + "x uniform scaling is applied to section and overlap timings as a dummy reference implementation")
-        end
-      )
+      default_section_rule: ("sections except " + $logp_section_name + " are scaled by " + $default_section_factor),
+      logp_section_rule: ($logp_section_name + " is scaled with logP"),
+      overlap_rule: "overlap timings are scaled by the default section factor"
     }')
 
   est_model_json=$(jq -cn \
@@ -161,7 +277,7 @@ bk_estimation_package_run() {
 
   est_confidence_json='{"level":"experimental","score":0.20}'
   est_notes_json=$(jq -cn \
-    --arg note "Reference implementation for application-defined section timings in BenchKit." \
+    --arg note "Reference implementation for qws-style application-defined section timings in BenchKit." \
     '{summary: $note}')
 }