Skip to content

Commit ee64896

Browse files
Produce baseline csv files
1 parent 6bf443e commit ee64896

4 files changed

Lines changed: 28 additions & 46 deletions

File tree

.github/workflows/standalone-benchmark.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,13 @@ jobs:
9090
${STANDALONE_DIR}/ca -e 50kHz -g --memSize 15000000000 --sync --runs 20 --debug 1 --PROCtimingCSV ${BENCHMARK_CSV}
9191
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
9292
93-
- name: Display table on GitHub web
94-
run: |
95-
source /etc/profile.d/modules.sh
96-
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
97-
python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py ${BENCHMARK_CSV} ${BENCHMARK_CSV}
98-
python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py ${BENCHMARK_CSV} ${STANDALONE_DIR}/baseline/${{ matrix.name }}.csv >> ${GITHUB_STEP_SUMMARY}
99-
rm -rf ${STANDALONE_DIR}/baseline
93+
# - name: Display table on GitHub web
94+
# run: |
95+
# source /etc/profile.d/modules.sh
96+
# module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
97+
# python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py ${BENCHMARK_CSV} ${BENCHMARK_CSV}
98+
# python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py ${BENCHMARK_CSV} ${STANDALONE_DIR}/baseline/${{ matrix.name }}.csv >> ${GITHUB_STEP_SUMMARY}
99+
# rm -rf ${STANDALONE_DIR}/baseline
100100

101101
- name: Upload Artifact
102102
uses: actions/upload-artifact@v4

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 18 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -266,42 +266,19 @@ int32_t GPUReconstructionCPU::RunChains()
266266
std::vector<double> kernelStepTimes(gpudatatypes::N_RECO_STEPS, 0.);
267267
std::ofstream timingCSVFile;
268268
if (!GetProcessingSettings().timingCSV.empty()) {
269-
bool needHeader = true;
270-
{
271-
std::ifstream timingCSVIn(GetProcessingSettings().timingCSV);
272-
needHeader = !timingCSVIn.good() || timingCSVIn.peek() == std::ifstream::traits_type::eof();
273-
}
274-
timingCSVFile.open(GetProcessingSettings().timingCSV, std::ios::out | std::ios::app);
275-
if (!timingCSVFile.is_open()) {
276-
GPUError("Could not open timing CSV file '%s' for writing", GetProcessingSettings().timingCSV.c_str());
277-
} else if (needHeader) {
278-
timingCSVFile << "name,time,count,events\n";
279-
}
269+
timingCSVFile.open(GetProcessingSettings().timingCSV, std::ios::binary | std::ofstream::app);
270+
if (mNEventsProcessed == 1) timingCSVFile << "name,time,count,type\n";
271+
if (!timingCSVFile.is_open()) GPUError("Could not open timing CSV file '%s' for writing", GetProcessingSettings().timingCSV.c_str());
280272
}
281-
auto writeCSVString = [](std::ostream& out, const std::string& s) {
282-
out << '"';
283-
for (char c : s) {
284-
if (c == '"') {
285-
out << "\"\"";
286-
} else {
287-
out << c;
288-
}
289-
}
290-
out << '"';
291-
};
292273

293274
if (GetProcessingSettings().debugLevel >= 1) {
294275
for (uint32_t i = 0; i < mTimers.size(); i++) {
295276
double time = 0;
296-
if (mTimers[i] == nullptr) {
297-
continue;
298-
}
277+
if (mTimers[i] == nullptr) continue;
299278
for (int32_t j = 0; j < mTimers[i]->num; j++) {
300279
HighResTimer& timer = mTimers[i]->timer[j];
301280
time += timer.GetElapsedTime();
302-
if (GetProcessingSettings().resetTimers) {
303-
timer.Reset();
304-
}
281+
if (GetProcessingSettings().resetTimers) timer.Reset();
305282
}
306283

307284
uint32_t type = mTimers[i]->type;
@@ -314,11 +291,9 @@ int32_t GPUReconstructionCPU::RunChains()
314291
if (mTimers[i]->memSize && mStatNEvents && time != 0.) {
315292
snprintf(bandwidth, 256, " (%8.3f GB/s - %'14zu bytes - %'14zu per call)", mTimers[i]->memSize / time * 1e-9, mTimers[i]->memSize / mStatNEvents, mTimers[i]->memSize / mStatNEvents / mTimers[i]->count);
316293
}
317-
printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth);
318-
if (timingCSVFile.is_open()) {
319-
writeCSVString(timingCSVFile, mTimers[i]->name);
320-
timingCSVFile << "," << (time * 1000000 / mStatNEvents) << "," << mTimers[i]->count << "," << mStatNEvents << "\n";
321-
}
294+
double elapsedTime_ms = time * 1000000 / mStatNEvents;
295+
printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), elapsedTime_ms, bandwidth);
296+
if (timingCSVFile.is_open()) timingCSVFile << mTimers[i]->name << "," << elapsedTime_ms << "," << mTimers[i]->count << ",Task\n";
322297
if (GetProcessingSettings().resetTimers) {
323298
mTimers[i]->count = 0;
324299
mTimers[i]->memSize = 0;
@@ -328,8 +303,10 @@ int32_t GPUReconstructionCPU::RunChains()
328303
if (GetProcessingSettings().recoTaskTiming) {
329304
for (int32_t i = 0; i < gpudatatypes::N_RECO_STEPS; i++) {
330305
if (kernelStepTimes[i] != 0. || mTimersRecoSteps[i].timerTotal.GetElapsedTime() != 0.) {
306+
double elapsedTime_ms = kernelStepTimes[i] * 1000000 / mStatNEvents;
331307
printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n", "Tasks",
332-
gpudatatypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU / mTimersRecoSteps[i].timerTotal.GetElapsedTime());
308+
gpudatatypes::RECO_STEP_NAMES[i], elapsedTime_ms, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU / mTimersRecoSteps[i].timerTotal.GetElapsedTime());
309+
if (timingCSVFile.is_open()) timingCSVFile << gpudatatypes::RECO_STEP_NAMES[i] << "," << elapsedTime_ms << ",1,Step\n";
333310
}
334311
if (mTimersRecoSteps[i].bytesToGPU) {
335312
printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents,
@@ -350,17 +327,22 @@ int32_t GPUReconstructionCPU::RunChains()
350327
}
351328
}
352329
for (int32_t i = 0; i < gpudatatypes::N_GENERAL_STEPS; i++) {
353-
if (mTimersGeneralSteps[i].GetElapsedTime() != 0.) {
354-
printf("Execution Time: General Step : %50s Time: %'10.0f us\n", gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents);
330+
double elapsedTime_ms = mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents;
331+
if (elapsedTime_ms != 0.) {
332+
printf("Execution Time: General Step : %50s Time: %'10.0f us\n", gpudatatypes::GENERAL_STEP_NAMES[i], elapsedTime_ms);
333+
if (timingCSVFile.is_open()) timingCSVFile << gpudatatypes::GENERAL_STEP_NAMES[i] << "," << elapsedTime_ms << ",1,Step\n";
355334
}
356335
}
357336
if (GetProcessingSettings().debugLevel >= 1) {
358337
mStatKernelTime = kernelTotal * 1000000 / mStatNEvents;
359338
printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str());
339+
if (timingCSVFile.is_open()) timingCSVFile << "Total Kernel" << "," << mStatKernelTime << ",1,Total\n";
360340
}
361341
printf("Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n", "Total Wall", mStatWallTime, mStatCPUTime * 1000000 / mStatNEvents, mStatCPUTime / mTimerTotal.GetElapsedTime(), nEventReport.c_str());
342+
if (timingCSVFile.is_open()) timingCSVFile << "Total Wall" << "," << mStatWallTime << ",1,Total\n";
362343
} else if (GetProcessingSettings().debugLevel >= 0) {
363344
GPUInfo("Total Wall Time: %10.0f us%s", mStatWallTime, nEventReport.c_str());
345+
if (timingCSVFile.is_open()) timingCSVFile << "Total Wall" << "," << mStatWallTime << ",1,Total\n";
364346
}
365347
if (GetProcessingSettings().resetTimers) {
366348
mStatNEvents = 0;

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent,
307307
AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)")
308308
AddOption(debugMask, uint32_t, (1 << 18) - 1, "debugMask", 0, "Mask for debug output dumps to file")
309309
AddOption(debugLogSuffix, std::string, "", "debugSuffix", 0, "Suffix for debug log files with --debug 6")
310-
AddOption(timingCSV, std::string, "", "", 0, "Append per-task timing rows to this CSV file")
310+
AddOption(timingCSV, std::string, "", "", 0, "CSV filename to append the benchmark results. Verbosity determined by parameter --debug.")
311311
AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures")
312312
AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks")
313313
AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6 or deterministic compile flag set", def(1))

GPU/GPUTracking/Standalone/Benchmark/standalone.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,7 @@ int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingU
627627
if (configStandalone.runs > 1) {
628628
printf("Run %d (thread %d)\n", iteration + 1, threadId);
629629
}
630-
recUse->SetResetTimers(iRun < configStandalone.runsInit);
630+
recUse->SetResetTimers(iRun < configStandalone.runsInit || configStandalone.proc.resetTimers);
631631
if (configStandalone.outputcontrolmem) {
632632
recUse->SetOutputControl(threadId ? outputmemoryPipeline.get() : outputmemory.get(), configStandalone.outputcontrolmem);
633633
}
@@ -685,7 +685,7 @@ int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingU
685685
chainTrackingAsync->mIOPtrs.nRawClusters[i] = 0;
686686
}
687687
chainTrackingAsync->mIOPtrs.clustersNative = nullptr;
688-
recAsync->SetResetTimers(iRun < configStandalone.runsInit);
688+
recAsync->SetResetTimers(iRun < configStandalone.runsInit || configStandalone.proc.resetTimers);
689689
tmpRetVal = recAsync->RunChains();
690690
if (tmpRetVal == 0 || tmpRetVal == 2) {
691691
OutputStat(chainTrackingAsync, nullptr, nullptr);

0 commit comments

Comments
 (0)