Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions python/lsst/analysis/tools/tasks/gatherResourceUsage.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,10 @@ def run(self, **kwargs: Any) -> Struct:
).reset_index()
df["task"] = input_name.replace("_resource_usage", "")
df["quanta"] = len(ru_table)
df["integrated_runtime"] = ru_table["run_time"].sum()
df["integrated_runtime_hrs"] = ru_table["run_time"].sum() / 3600.0
# USDF nodes have 120 CPU and 480GiB RAM for ~4GiB per CPU
integrated = (ru_table["run_time"] * ru_table["memory"]).sum()
df["integrated_allocated_core_hrs"] = integrated / 4 / 1024**3 / 3600.0
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To make things slightly more future-proof, you might consider creating a memory per core variable rather than hard-coding it to 4 here. Granted you still have to set it somewhere, but you would also be able to use it for other calculations, and if the value ever changes, you only have to change it in one place.


quantiles.append(
df[
Expand All @@ -139,7 +142,8 @@ def run(self, **kwargs: Any) -> Struct:
"init_time",
"run_time",
"wall_time",
"integrated_runtime",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be useful to keep integrated runtime around too? Granted you can always multiply back out to get it, and it keeps the memory usage down. I certainly don't insist on it.

"integrated_runtime_hrs",
"integrated_allocated_core_hrs",
]
]
)
Expand All @@ -148,7 +152,6 @@ def run(self, **kwargs: Any) -> Struct:
full_quantiles["percentile"] = (full_quantiles["index"] * 100).astype(int)
full_quantiles["percentile_name"] = "p" + full_quantiles["percentile"].astype(str).str.zfill(3)
full_quantiles["memoryGB"] = full_quantiles["memory"] / 1024 / 1024 / 1024
full_quantiles["integrated_runtime_hrs"] = full_quantiles["integrated_runtime"] / 3600.0
memoryGB = pd.pivot_table(
full_quantiles, values="memoryGB", columns=["percentile_name"], index=["task"]
).add_prefix("mem_GB_")
Expand All @@ -170,7 +173,7 @@ def run(self, **kwargs: Any) -> Struct:
right_on="task",
)
memrun = pd.merge(
full_quantiles[["task", "quanta", "integrated_runtime_hrs"]]
full_quantiles[["task", "quanta", "integrated_runtime_hrs", "integrated_allocated_core_hrs"]]
.drop_duplicates()
.sort_values("task"),
memrun,
Expand Down
Loading