Skip to content

Commit efb52e8

Browse files
refactor: align lightweight estimation with weak scaling [code:qws] [system:RC_GH200]
1 parent 4bb3874 commit efb52e8

3 files changed

Lines changed: 42 additions & 24 deletions

File tree

programs/qws/estimate.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@ BK_ESTIMATION_PACKAGE="lightweight_fom_scaling"
88
BK_ESTIMATION_BASELINE_SYSTEM="Fugaku"
99
BK_ESTIMATION_BASELINE_EXP="CASE0"
1010
BK_ESTIMATION_FUTURE_SYSTEM="FugakuNEXT"
11-
BK_ESTIMATION_SCALE_FACTOR="2"
11+
BK_ESTIMATION_FUTURE_FOM_FACTOR="${BK_ESTIMATION_FUTURE_FOM_FACTOR:-1}"
1212
BK_ESTIMATION_MODEL_NAME="scale-mock"
1313
BK_ESTIMATION_MODEL_VERSION="0.1"
1414
BK_ESTIMATION_INPUT_JSON="$1"
1515

1616
read_values "$BK_ESTIMATION_INPUT_JSON"
1717

18+
BK_ESTIMATION_CURRENT_TARGET_NODES="${BK_ESTIMATION_CURRENT_TARGET_NODES:-$est_node_count}"
19+
BK_ESTIMATION_FUTURE_TARGET_NODES="${BK_ESTIMATION_FUTURE_TARGET_NODES:-$est_node_count}"
20+
1821
if ! bk_estimation_package_check_applicability; then
1922
echo "ERROR: estimation package ${BK_ESTIMATION_PACKAGE} is not applicable for input ${BK_ESTIMATION_INPUT_JSON}" >&2
2023
exit 1

scripts/estimate_common.sh

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -186,19 +186,21 @@ bk_estimation_set_applicability() {
186186
}
187187

188188
# ---------------------------------------------------------------------------
189-
# fetch_current_fom — Fetch Fugaku FOM from result_server API
189+
# fetch_current_fom — Fetch baseline-system FOM from result_server API
190190
#
191191
# Arguments:
192-
# $1 code (e.g. qws)
193-
# $2 exp (optional, e.g. default)
192+
# $1 system (e.g. Fugaku)
193+
# $2 code (e.g. qws)
194+
# $3 exp (optional, e.g. default)
194195
#
195196
# Requires: RESULT_SERVER, RESULT_SERVER_KEY environment variables
196-
# Sets: est_current_fom (FOM value from Fugaku result)
197+
# Sets: est_current_fom (FOM value from the selected baseline-system result)
197198
# Exits with 1 on failure.
198199
# ---------------------------------------------------------------------------
199200
fetch_current_fom() {
200-
local code="$1"
201-
local exp="${2:-}"
201+
local system="$1"
202+
local code="$2"
203+
local exp="${3:-}"
202204

203205
if [[ -z "${RESULT_SERVER:-}" ]]; then
204206
echo "ERROR: RESULT_SERVER is not set" >&2
@@ -209,7 +211,7 @@ fetch_current_fom() {
209211
exit 1
210212
fi
211213

212-
local url="${RESULT_SERVER}/api/query/result?system=Fugaku&code=${code}"
214+
local url="${RESULT_SERVER}/api/query/result?system=${system}&code=${code}"
213215
if [[ -n "$exp" ]]; then
214216
url="${url}&exp=${exp}"
215217
fi
@@ -221,26 +223,26 @@ fetch_current_fom() {
221223
curl_exit=$?
222224
set -e
223225
if [[ $curl_exit -ne 0 || -z "$response" ]]; then
224-
echo "ERROR: Failed to fetch Fugaku result for code=${code}, exp=${exp} (curl exit=$curl_exit)" >&2
226+
echo "ERROR: Failed to fetch baseline result for system=${system}, code=${code}, exp=${exp} (curl exit=$curl_exit)" >&2
225227
echo "ERROR: URL was: ${url}" >&2
226228
exit 1
227229
fi
228230

229231
est_current_fom=$(echo "$response" | jq -r '.FOM')
230232
if [[ -z "$est_current_fom" || "$est_current_fom" == "null" ]]; then
231-
echo "ERROR: FOM not found in Fugaku result for code=${code}, exp=${exp}" >&2
233+
echo "ERROR: FOM not found in baseline result for system=${system}, code=${code}, exp=${exp}" >&2
232234
exit 1
233235
fi
234236

235237
# Populate benchmark sub-object variables for current_system
236-
est_current_bench_system="Fugaku"
238+
est_current_bench_system="$system"
237239
est_current_bench_fom="$est_current_fom"
238240
est_current_bench_nodes=$(echo "$response" | jq -r '.node_count // empty')
239241
est_current_bench_numproc_node=$(echo "$response" | jq -r '.numproc_node // empty')
240242
est_current_bench_timestamp=$(echo "$response" | jq -r '._meta.timestamp // empty')
241243
est_current_bench_uuid=$(echo "$response" | jq -r '._meta.uuid // empty')
242244

243-
echo "Fetched Fugaku FOM for ${code}: ${est_current_fom}"
245+
echo "Fetched baseline FOM for ${system}/${code}: ${est_current_fom}"
244246
}
245247

246248
# ---------------------------------------------------------------------------

scripts/estimation/packages/lightweight_fom_scaling.sh

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ bk_estimation_package_metadata() {
99
"method_class": "lightweight",
1010
"detail_level": "basic",
1111
"required_inputs": {
12-
"mandatory": ["result_json", "fom", "node_count"],
12+
"mandatory": ["result_json", "fom", "target_nodes_current", "target_nodes_future"],
1313
"optional": ["fom_breakdown"],
1414
"external": []
1515
},
@@ -46,7 +46,9 @@ bk_estimation_package_run() {
4646
local baseline_system="${BK_ESTIMATION_BASELINE_SYSTEM:-Fugaku}"
4747
local baseline_exp="${BK_ESTIMATION_BASELINE_EXP:-CASE0}"
4848
local future_system="${BK_ESTIMATION_FUTURE_SYSTEM:-FugakuNEXT}"
49-
local scale_factor="${BK_ESTIMATION_SCALE_FACTOR:-2}"
49+
local current_target_nodes="${BK_ESTIMATION_CURRENT_TARGET_NODES:-$est_node_count}"
50+
local future_target_nodes="${BK_ESTIMATION_FUTURE_TARGET_NODES:-$est_node_count}"
51+
local future_fom_factor="${BK_ESTIMATION_FUTURE_FOM_FACTOR:-${BK_ESTIMATION_SCALE_FACTOR:-1}}"
5052
local model_name="${BK_ESTIMATION_MODEL_NAME:-scale-mock}"
5153
local model_version="${BK_ESTIMATION_MODEL_VERSION:-0.1}"
5254

@@ -60,14 +62,14 @@ bk_estimation_package_run() {
6062

6163
# Current/baseline side.
6264
est_current_system="$baseline_system"
63-
fetch_current_fom "$est_code" "$baseline_exp"
64-
est_current_target_nodes="$est_node_count"
65+
fetch_current_fom "$baseline_system" "$est_code" "$baseline_exp"
66+
est_current_target_nodes="$current_target_nodes"
6567
est_current_scaling_method="measured"
6668

6769
# Future/predicted side.
6870
est_future_system="$future_system"
69-
est_future_fom=$(awk -v fom="$est_fom" -v factor="$scale_factor" 'BEGIN {printf "%.3f", fom * factor}')
70-
est_future_target_nodes="$est_node_count"
71+
est_future_fom=$(awk -v fom="$est_fom" -v factor="$future_fom_factor" 'BEGIN {printf "%.3f", fom * factor}')
72+
est_future_target_nodes="$future_target_nodes"
7173
est_future_scaling_method="$model_name"
7274

7375
est_measurement_json=$(jq -cn \
@@ -87,11 +89,22 @@ bk_estimation_package_run() {
8789
est_assumptions_json=$(jq -cn \
8890
--arg future_system "$future_system" \
8991
--arg baseline_system "$baseline_system" \
90-
--arg scale_factor "$scale_factor" \
92+
--arg current_target_nodes "$current_target_nodes" \
93+
--arg future_target_nodes "$future_target_nodes" \
94+
--arg future_fom_factor "$future_fom_factor" \
9195
'{
96+
scaling_assumption: "weak-scaling",
9297
future_system_assumption: $future_system,
9398
baseline_system: $baseline_system,
94-
future_fom_rule: ($scale_factor + "x benchmark FOM when no detailed model is available")
99+
current_target_nodes: $current_target_nodes,
100+
future_target_nodes: $future_target_nodes,
101+
future_fom_rule: (
102+
if $future_fom_factor == "1" then
103+
"keep FOM constant unless additional correction terms are explicitly introduced"
104+
else
105+
($future_fom_factor + "x benchmark FOM with explicit lightweight correction")
106+
end
107+
)
95108
}')
96109

97110
est_model_json=$(jq -cn \
@@ -117,9 +130,9 @@ bk_estimation_package_run() {
117130
fi
118131

119132
if [[ -n "$raw_breakdown" ]]; then
120-
est_future_fom_breakdown=$(echo "$raw_breakdown" | jq -c --arg scale_factor "$scale_factor" --arg model_name "$model_name" '{
121-
sections: [.sections[] | {name, bench_time: .time, scaling_method: $model_name, time: (.time * ($scale_factor | tonumber))}],
122-
overlaps: [(.overlaps // [])[] | {sections, bench_time: .time, scaling_method: $model_name, time: (.time * ($scale_factor | tonumber))}]
133+
est_future_fom_breakdown=$(echo "$raw_breakdown" | jq -c --arg future_fom_factor "$future_fom_factor" --arg model_name "$model_name" '{
134+
sections: [.sections[] | {name, bench_time: .time, scaling_method: $model_name, time: (.time * ($future_fom_factor | tonumber))}],
135+
overlaps: [(.overlaps // [])[] | {sections, bench_time: .time, scaling_method: $model_name, time: (.time * ($future_fom_factor | tonumber))}]
123136
}')
124137

125138
est_current_fom_breakdown=$(echo "$raw_breakdown" | jq -c '{
@@ -138,7 +151,7 @@ bk_estimation_package_run() {
138151
bk_estimation_package_apply_metadata() {
139152
bk_estimation_set_package_metadata \
140153
"lightweight_fom_scaling" \
141-
"${BK_ESTIMATION_MODEL_VERSION:-0.1}" \
154+
"0.1" \
142155
"lightweight" \
143156
"basic"
144157

0 commit comments

Comments
 (0)