@@ -17,67 +17,113 @@ jobs:
1717 name : [nvidia-h100, nvidia-l40s, amd-mi300x, amd-w7900]
1818 include :
1919 - name : nvidia-h100
20+ vendor : nvidia
2021 runner : cern-nextgen-h100
2122 cmake_args : -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90
22- ca_args : --gpuType CUDA # --RTCTECHloadLaunchBoundsFromFile genGPUArch/nvidia-h100.par
23+ profiler : nsys profile -o nvidia-h100
24+ profiler_post : nsys stats --report cuda_gpu_kern_sum --force-export=true --format csv nvidia-h100.nsys-rep >
2325 - name : nvidia-l40s
26+ vendor : nvidia
2427 runner : cern-nextgen-l40s
2528 cmake_args : -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89
26- ca_args : --gpuType CUDA # --RTCTECHloadLaunchBoundsFromFile genGPUArch/nvidia-l40s.par
29+ profiler : nsys profile -o nvidia-l40s
30+ profiler_post : nsys stats --report cuda_gpu_kern_sum --force-export=true --format csv nvidia-l40s.nsys-rep >
2731 - name : amd-mi300x
32+ vendor : amd
2833 runner : cern-nextgen-mi300x
2934 cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942
30- ca_args : --gpuType HIP
35+ profiler : rocprofv2 --basenames --output-directory /root --output-file-name amd-mi300x
36+ profiler_post : touch
3137 - name : amd-w7900
38+ vendor : amd
3239 runner : cern-nextgen-w7900
3340 cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100
34- ca_args : --gpuType HIP --RTCTECHloadLaunchBoundsFromFile genGPUArch/amd-w7900.par
41+ profiler : rocprofv2 --basenames --output-directory /root --output-file-name amd-w7900
42+ profiler_post : touch
43+ env :
44+ WORK_DIR : /cvmfs/alice.cern.ch
45+ ALIBUILD_ARCH_PREFIX : el9-x86_64/Packages
46+ MODULEPATH : /cvmfs/alice.cern.ch/etc/toolchain/modulefiles/el9-x86_64:/cvmfs/alice.cern.ch/el9-x86_64/Modules/modulefiles
47+ STANDALONE_DIR : /root/standalone
48+ BENCHMARK_CSV : ${{ matrix.name }}.csv
49+ PROFILER_CSV : results_${{ matrix.name }}.csv
50+ LD_LIBRARY_PATH : /usr/local/cuda-13.0/compat
3551
3652 name : ${{ matrix.name }}
3753 steps :
3854 - name : Checkout Repository
39- uses : actions/checkout@v4
55+ uses : actions/checkout@v6
4056
41- - name : Build and Run
57+ - name : Download Files
4258 run : |
4359 mkdir -p ${STANDALONE_DIR}
44- . ${WORK_DIR}/${ALIBUILD_ARCH_PREFIX}/ninja-fortran/fortran-v1.11.1.g9-3/etc/profile.d/init.sh
45- . ${WORK_DIR}/${ALIBUILD_ARCH_PREFIX}/O2/${O2_REVISION}/etc/profile.d/init.sh
60+
61+ if [[ "${{ matrix.vendor }}" == "nvidia" ]]; then
62+ curl -fL --retry 3 -o ${STANDALONE_DIR}/nsys.rpm https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2026_2/NsightSystems-linux-cli-public-2026.2.1.210-3763964.rpm
63+ dnf install -y ${STANDALONE_DIR}/nsys.rpm
64+ rm -f ${STANDALONE_DIR}/nsys.rpm
65+ fi
4666
47- cmake -B ${BUILD_DIR} ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=GPU -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
48- cd ${BUILD_DIR}
49- make install -j8
67+ curl -fL --retry 3 -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
5068
51- cd ${STANDALONE_DIR}
52- mkdir -p ${STANDALONE_DIR}/genGPUArch
53- curl -v - o ${STANDALONE_DIR}/genGPUArch /${{ matrix.name }}.par https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/genGPUArch /${{ matrix.name }}.par
69+ mkdir -p ${STANDALONE_DIR}/baseline
70+ curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${PROFILER_CSV}
71+ curl -fL --retry 3 - o ${STANDALONE_DIR}/baseline /${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline /${BENCHMARK_CSV}
5472
5573 mkdir -p ${STANDALONE_DIR}/events
74+ curl -fL --retry 3 -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
75+ tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events
5676
57- curl -v -o ${STANDALONE_DIR}/events/50kHz.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/50kHz.tar.xz
77+ curl -fL --retry 3 -o ${STANDALONE_DIR}/events/50kHz.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/50kHz.tar.xz
5878 tar -xf ${STANDALONE_DIR}/events/50kHz.tar.xz -C ${STANDALONE_DIR}/events
59- ${STANDALONE_DIR}/ca -e 50kHz -g --seed 0 --memSize 15000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptSpecialCode 1 --debug 1 ${{ matrix.ca_args }} > ${ARTIFACT_FILE}
6079
61- curl -v -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
62- tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events
63- ${STANDALONE_DIR}/ca -e o2-simple -g --seed 0 --memSize 20000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptSpecialCode 1 --debug 6 ${{ matrix.ca_args }}
80+ - name : Build Deterministic
81+ run : &build |
82+ source /etc/profile.d/modules.sh
83+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
6484
65- curl -v -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
85+ mkdir -p ${STANDALONE_DIR}
86+ cmake -B ${STANDALONE_DIR}/build ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=${DETERMINISTIC_MODE} -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
87+ cmake --build ${STANDALONE_DIR}/build --target install -j 8
88+ env :
89+ DETERMINISTIC_MODE : GPU
90+
91+ - name : Test GPU Track Reconstruction
92+ run : |
93+ source /etc/profile.d/modules.sh
94+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
95+ cd ${STANDALONE_DIR}
96+ ${STANDALONE_DIR}/ca -e o2-simple -g --seed 0 --memSize 20000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptConstexpr 1 --RTCoptSpecialCode 1 --debug 6
6697 cmp ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out
67- rm -rf ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out
98+ rm -rf ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out ${STANDALONE_DIR}/events/o2-simple ${STANDALONE_DIR}/build
6899
69- rm -rf ${STANDALONE_DIR}/events
100+ - name : Build Non-Deterministic
101+ run : *build
70102 env :
71- WORK_DIR : /cvmfs/alice.cern.ch
72- ALIBUILD_ARCH_PREFIX : el9-x86_64/Packages
73- O2_REVISION : daily-20260217-0000-1
74- STANDALONE_DIR : /root/standalone
75- BUILD_DIR : /root/standalone/build
76- ARTIFACT_FILE : /root/artifact.txt
77- LD_LIBRARY_PATH : /usr/local/cuda-13.0/compat
103+ DETERMINISTIC_MODE : OFF
104+
105+ - name : Benchmark GPU Track Reconstruction
106+ run : |
107+ source /etc/profile.d/modules.sh
108+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
109+ cd ${STANDALONE_DIR}
110+ ${{ matrix.profiler }} ${STANDALONE_DIR}/ca -e 50kHz -g --memSize 15000000000 --sync --debug 1 --runs 12 --runsInit 2 --debugMarkdown 1 --PROCresetTimers 1 --PROCdebugCSV /root/${BENCHMARK_CSV}
111+ ${{ matrix.profiler_post }} /root/${PROFILER_CSV}
112+ rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
113+
114+ - name : Display table on GitHub web
115+ run : |
116+ source /etc/profile.d/modules.sh
117+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
118+ python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_${{ matrix.vendor }}.py --runs 12 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
119+ python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input /root/${BENCHMARK_CSV} --output /root/${BENCHMARK_CSV}
120+ python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${PROFILER_CSV} --current /root/${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY}
121+ echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY}
122+ python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} --current /root/${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}
123+ rm -rf ${STANDALONE_DIR}/baseline
78124
79125 - name : Upload Artifact
80- uses : actions/upload-artifact@v4
126+ uses : actions/upload-artifact@v6
81127 with :
82128 name : ${{ matrix.name }}-artifact
83- path : /root/artifact.txt
129+ path : " /root/*.csv "
0 commit comments