Skip to content

Commit c2ffcd1

Browse files
feat: add qws section model with allreduce scaling [code:qws] [system:RC_GH200]
1 parent b822512 commit c2ffcd1

3 files changed

Lines changed: 146 additions & 27 deletions

File tree

programs/qws/estimate.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,14 @@
33

44
source scripts/estimate_common.sh
55

6-
BK_ESTIMATION_PACKAGE="${BK_ESTIMATION_PACKAGE:-lightweight_fom_scaling}"
6+
BK_ESTIMATION_PACKAGE="${BK_ESTIMATION_PACKAGE:-instrumented_app_sections_dummy}"
77
BK_ESTIMATION_BASELINE_SYSTEM="Fugaku"
88
BK_ESTIMATION_BASELINE_EXP="CASE0"
99
BK_ESTIMATION_FUTURE_SYSTEM="FugakuNEXT"
10-
BK_ESTIMATION_FUTURE_FOM_FACTOR="${BK_ESTIMATION_FUTURE_FOM_FACTOR:-1}"
10+
BK_ESTIMATION_CURRENT_TARGET_NODES="${BK_ESTIMATION_CURRENT_TARGET_NODES:-1024}"
11+
BK_ESTIMATION_FUTURE_TARGET_NODES="${BK_ESTIMATION_FUTURE_TARGET_NODES:-256}"
12+
BK_ESTIMATION_SECTION_DEFAULT_FACTOR="${BK_ESTIMATION_SECTION_DEFAULT_FACTOR:-0.5}"
13+
BK_ESTIMATION_LOGP_SECTION_NAME="${BK_ESTIMATION_LOGP_SECTION_NAME:-allreduce}"
1114
case "$BK_ESTIMATION_PACKAGE" in
1215
lightweight_fom_scaling)
1316
BK_ESTIMATION_MODEL_NAME="${BK_ESTIMATION_MODEL_NAME:-scale-mock}"
@@ -28,9 +31,6 @@ source "scripts/estimation/packages/${BK_ESTIMATION_PACKAGE}.sh"
2831

2932
read_values "$BK_ESTIMATION_INPUT_JSON"
3033

31-
BK_ESTIMATION_CURRENT_TARGET_NODES="${BK_ESTIMATION_CURRENT_TARGET_NODES:-$est_node_count}"
32-
BK_ESTIMATION_FUTURE_TARGET_NODES="${BK_ESTIMATION_FUTURE_TARGET_NODES:-$est_node_count}"
33-
3434
if ! bk_estimation_package_check_applicability; then
3535
echo "ERROR: estimation package ${BK_ESTIMATION_PACKAGE} is not applicable for input ${BK_ESTIMATION_INPUT_JSON}" >&2
3636
exit 1

programs/qws/run.sh

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,21 @@ print_results() {
3535
local section_compute_hopping
3636
local section_compute_solver
3737
local section_halo_exchange
38+
local section_allreduce
3839
local section_write_result
3940
local overlap_compute_halo
40-
section_prepare_rhs=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.18}')
41-
section_compute_hopping=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.34}')
42-
section_compute_solver=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.20}')
43-
section_halo_exchange=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.26}')
44-
section_write_result=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.10}')
45-
overlap_compute_halo=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.08}')
41+
section_prepare_rhs=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.16}')
42+
section_compute_hopping=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.28}')
43+
section_compute_solver=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.18}')
44+
section_halo_exchange=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.18}')
45+
section_allreduce=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.16}')
46+
section_write_result=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.08}')
47+
overlap_compute_halo=$(awk -v x="$fom" 'BEGIN {printf "%.3f", x * 0.04}')
4648
bk_emit_section prepare_rhs "$section_prepare_rhs"
4749
bk_emit_section compute_hopping "$section_compute_hopping"
4850
bk_emit_section compute_solver "$section_compute_solver"
4951
bk_emit_section halo_exchange "$section_halo_exchange"
52+
bk_emit_section allreduce "$section_allreduce"
5053
bk_emit_section write_result "$section_write_result"
5154
bk_emit_overlap compute_hopping,halo_exchange "$overlap_compute_halo"
5255
}

scripts/estimation/packages/instrumented_app_sections_dummy.sh

Lines changed: 132 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,48 @@ bk_estimation_package_check_applicability() {
4747
return 0
4848
}
4949

50+
_bk_breakdown_total_time() {
51+
local breakdown_json="$1"
52+
53+
if [[ -z "$breakdown_json" || "$breakdown_json" == "null" ]]; then
54+
echo ""
55+
return 0
56+
fi
57+
58+
echo "$breakdown_json" | jq -r '
59+
(
60+
(.sections // [])
61+
| map(.time // .bench_time // 0)
62+
| add // 0
63+
) - (
64+
(.overlaps // [])
65+
| map(.time // .bench_time // 0)
66+
| add // 0
67+
)
68+
'
69+
}
70+
71+
_bk_scale_breakdown_to_total() {
72+
local breakdown_json="$1"
73+
local target_total="$2"
74+
75+
if [[ -z "$breakdown_json" || "$breakdown_json" == "null" ]]; then
76+
echo ""
77+
return 0
78+
fi
79+
80+
local source_total
81+
source_total=$(_bk_breakdown_total_time "$breakdown_json")
82+
if [[ -z "$source_total" || "$source_total" == "0" ]]; then
83+
echo "$breakdown_json"
84+
return 0
85+
fi
86+
87+
local factor
88+
factor=$(awk -v target="$target_total" -v source="$source_total" 'BEGIN {printf "%.12f", target / source}')
89+
_bk_scale_breakdown_times "$breakdown_json" "$factor"
90+
}
91+
5092
_bk_attach_section_package_name() {
5193
local breakdown_json="$1"
5294
local package_name="$2"
@@ -86,15 +128,71 @@ _bk_scale_breakdown_times() {
86128
'
87129
}
88130

131+
_bk_logp_factor() {
132+
local target_nodes="$1"
133+
local bench_nodes="$2"
134+
135+
awk -v target="$target_nodes" -v bench="$bench_nodes" '
136+
function safe_nodes(x) { return (x < 2 ? 2 : x) }
137+
function lg2(x) { return log(x) / log(2) }
138+
BEGIN {
139+
printf "%.12f", lg2(safe_nodes(target)) / lg2(safe_nodes(bench))
140+
}'
141+
}
142+
143+
_bk_transform_breakdown_for_qws_demo() {
144+
local breakdown_json="$1"
145+
local target_nodes="$2"
146+
local bench_nodes="$3"
147+
local default_factor="$4"
148+
local logp_section_name="$5"
149+
150+
if [[ -z "$breakdown_json" || "$breakdown_json" == "null" ]]; then
151+
echo ""
152+
return 0
153+
fi
154+
155+
local logp_factor
156+
logp_factor=$(_bk_logp_factor "$target_nodes" "$bench_nodes")
157+
158+
echo "$breakdown_json" | jq -c \
159+
--arg logp_section_name "$logp_section_name" \
160+
--argjson default_factor "$default_factor" \
161+
--argjson logp_factor "$logp_factor" '
162+
.sections |= map(
163+
if .name == $logp_section_name then
164+
.
165+
+ {time: ((.time // .bench_time // 0) * $logp_factor)}
166+
+ {bench_time: ((.bench_time // .time // 0) * $logp_factor)}
167+
+ {scaling_method: "logP"}
168+
else
169+
.
170+
+ {time: ((.time // .bench_time // 0) * $default_factor)}
171+
+ {bench_time: ((.bench_time // .time // 0) * $default_factor)}
172+
+ {scaling_method: "fixed-factor"}
173+
end
174+
)
175+
| .overlaps |= map(
176+
.
177+
+ {time: ((.time // .bench_time // 0) * $default_factor)}
178+
+ {bench_time: ((.bench_time // .time // 0) * $default_factor)}
179+
+ {scaling_method: "fixed-factor"}
180+
)
181+
'
182+
}
183+
89184
bk_estimation_package_run() {
90185
local baseline_system="${BK_ESTIMATION_BASELINE_SYSTEM:-Fugaku}"
91186
local baseline_exp="${BK_ESTIMATION_BASELINE_EXP:-CASE0}"
92187
local future_system="${BK_ESTIMATION_FUTURE_SYSTEM:-FugakuNEXT}"
93188
local current_target_nodes="${BK_ESTIMATION_CURRENT_TARGET_NODES:-$est_node_count}"
94189
local future_target_nodes="${BK_ESTIMATION_FUTURE_TARGET_NODES:-$est_node_count}"
95-
local future_fom_factor="${BK_ESTIMATION_FUTURE_FOM_FACTOR:-1}"
96190
local model_name="${BK_ESTIMATION_MODEL_NAME:-instrumented-app-sections-dummy}"
97191
local model_version="${BK_ESTIMATION_MODEL_VERSION:-0.1}"
192+
local default_section_factor="${BK_ESTIMATION_SECTION_DEFAULT_FACTOR:-0.5}"
193+
local logp_section_name="${BK_ESTIMATION_LOGP_SECTION_NAME:-allreduce}"
194+
local breakdown_template
195+
local baseline_breakdown
98196

99197
est_future_bench_system="$est_system"
100198
est_future_bench_fom="$est_fom"
@@ -105,18 +203,39 @@ bk_estimation_package_run() {
105203

106204
est_current_system="$baseline_system"
107205
fetch_current_fom "$baseline_system" "$est_code" "$baseline_exp"
206+
baseline_breakdown="$est_current_fom_breakdown"
207+
if [[ -z "$baseline_breakdown" || "$baseline_breakdown" == "null" ]]; then
208+
baseline_breakdown="$est_input_fom_breakdown"
209+
elif ! echo "$baseline_breakdown" | jq -e --arg section_name "$logp_section_name" '.sections // [] | any(.name == $section_name)' >/dev/null; then
210+
baseline_breakdown="$est_input_fom_breakdown"
211+
fi
212+
213+
if [[ -n "$baseline_breakdown" && "$baseline_breakdown" != "null" ]]; then
214+
breakdown_template=$(_bk_scale_breakdown_to_total "$baseline_breakdown" "$est_current_fom")
215+
est_current_fom_breakdown=$(_bk_transform_breakdown_for_qws_demo \
216+
"$breakdown_template" \
217+
"$current_target_nodes" \
218+
"${est_current_bench_nodes:-1}" \
219+
"$default_section_factor" \
220+
"$logp_section_name")
221+
est_current_fom_breakdown=$(_bk_attach_section_package_name "$est_current_fom_breakdown" "instrumented_app_sections_dummy")
222+
est_current_fom=$(_bk_breakdown_total_time "$est_current_fom_breakdown")
223+
fi
108224
est_current_target_nodes="$current_target_nodes"
109-
est_current_scaling_method="measured"
225+
est_current_scaling_method="$model_name"
110226

111227
est_future_system="$future_system"
112-
est_future_fom=$(awk -v fom="$est_fom" -v factor="$future_fom_factor" 'BEGIN {printf "%.3f", fom * factor}')
228+
est_future_fom_breakdown=$(_bk_transform_breakdown_for_qws_demo \
229+
"$est_input_fom_breakdown" \
230+
"$future_target_nodes" \
231+
"$est_node_count" \
232+
"$default_section_factor" \
233+
"$logp_section_name")
234+
est_future_fom_breakdown=$(_bk_attach_section_package_name "$est_future_fom_breakdown" "instrumented_app_sections_dummy")
235+
est_future_fom=$(_bk_breakdown_total_time "$est_future_fom_breakdown")
113236
est_future_target_nodes="$future_target_nodes"
114237
est_future_scaling_method="$model_name"
115238

116-
est_current_fom_breakdown=$(_bk_attach_section_package_name "$est_current_fom_breakdown" "instrumented_app_sections_dummy")
117-
est_future_fom_breakdown=$(_bk_scale_breakdown_times "$est_input_fom_breakdown" "${future_fom_factor}")
118-
est_future_fom_breakdown=$(_bk_attach_section_package_name "$est_future_fom_breakdown" "instrumented_app_sections_dummy")
119-
120239
est_measurement_json=$(jq -cn '
121240
{
122241
tool: "application-section-timer",
@@ -131,20 +250,17 @@ bk_estimation_package_run() {
131250
--arg baseline_system "$baseline_system" \
132251
--arg current_target_nodes "$current_target_nodes" \
133252
--arg future_target_nodes "$future_target_nodes" \
134-
--arg future_fom_factor "$future_fom_factor" \
253+
--arg default_section_factor "$default_section_factor" \
254+
--arg logp_section_name "$logp_section_name" \
135255
'{
136256
scaling_assumption: "weak-scaling",
137257
future_system_assumption: $future_system,
138258
baseline_system: $baseline_system,
139259
current_target_nodes: $current_target_nodes,
140260
future_target_nodes: $future_target_nodes,
141-
section_model_rule: (
142-
if $future_fom_factor == "1" then
143-
"carry measured section and overlap timings through unchanged as a dummy reference implementation"
144-
else
145-
($future_fom_factor + "x uniform scaling is applied to section and overlap timings as a dummy reference implementation")
146-
end
147-
)
261+
default_section_rule: ("sections except " + $logp_section_name + " are scaled by " + $default_section_factor),
262+
logp_section_rule: ($logp_section_name + " is scaled with logP"),
263+
overlap_rule: "overlap timings are scaled by the default section factor"
148264
}')
149265

150266
est_model_json=$(jq -cn \
@@ -161,7 +277,7 @@ bk_estimation_package_run() {
161277

162278
est_confidence_json='{"level":"experimental","score":0.20}'
163279
est_notes_json=$(jq -cn \
164-
--arg note "Reference implementation for application-defined section timings in BenchKit." \
280+
--arg note "Reference implementation for qws-style application-defined section timings in BenchKit." \
165281
'{summary: $note}')
166282
}
167283

0 commit comments

Comments
 (0)