Skip to content

Commit 92cd379

Browse files
committed
ITS: finalize tracking code
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent 821c93c commit 92cd379

23 files changed

+202
-169
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
5757
void loadUsedClustersDevice();
5858
void loadROFrameClustersDevice(const int, const int);
5959
void createROFrameClustersDeviceArray(const int);
60-
void loadMultiplicityCutMask(const int);
60+
void loadROFCutMask(const int);
6161
void loadVertices(const int);
6262
void loadROFOverlapTable(const int);
6363
void loadROFVertexLookupTable(const int);
@@ -181,8 +181,6 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
181181
ROFMaskTableN::View mDeviceROFMaskTableView;
182182

183183
// Hybrid pref
184-
uint8_t* mMultMaskDevice;
185-
int32_t* mMultMaskOffsetsDevice;
186184
Vertex* mPrimaryVerticesDevice;
187185
int* mROFramesPVDevice;
188186
std::array<Cluster*, NLayers> mClustersDevice;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class ExternalAllocator;
3636

3737
template <int NLayers = 7>
3838
void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
39-
const typename ROFMaskTable<NLayers>::View& multMask,
39+
const typename ROFMaskTable<NLayers>::View& rofMask,
4040
const int layer,
4141
const typename ROFOverlapTable<NLayers>::View& rofOverlaps,
4242
const typename ROFVertexLookupTable<NLayers>::View& vertexLUT,
@@ -66,7 +66,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
6666

6767
template <int NLayers = 7>
6868
void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
69-
const typename ROFMaskTable<NLayers>::View& multMask,
69+
const typename ROFMaskTable<NLayers>::View& rofMask,
7070
const int layer,
7171
const typename ROFOverlapTable<NLayers>::View& rofOverlaps,
7272
const typename ROFVertexLookupTable<NLayers>::View& vertexLUT,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -248,22 +248,24 @@ void TimeFrameGPU<NLayers>::loadTrackingFrameInfoDevice(const int iteration, con
248248
}
249249

250250
template <int NLayers>
251-
void TimeFrameGPU<NLayers>::loadMultiplicityCutMask(const int iteration)
251+
void TimeFrameGPU<NLayers>::loadROFCutMask(const int iteration)
252252
{
253253
if (!iteration || iteration == 3) { // we need to re-load the swapped mult-mask in upc iteration
254254
GPUTimer timer("loading multiplicity cut mask");
255-
const auto& hostTable = this->mMultiplicityCutMask;
255+
const auto& hostTable = *(this->mROFMask);
256256
const auto hostView = hostTable.getView();
257+
using TableEntry = ROFMaskTable<NLayers>::TableEntry;
258+
using TableIndex = ROFMaskTable<NLayers>::TableIndex;
259+
TableEntry* d_flatTable{nullptr};
260+
TableIndex* d_indices{nullptr};
257261
GPULog("gpu-transfer: iteration {} loading multiplicity cut mask with {} elements, for {:.2f} MB.",
258-
iteration, hostTable.getFlatMaskSize(), hostTable.getFlatMaskSize() * sizeof(uint8_t) / constants::MB);
259-
if (!iteration) { // only allocate on first call; offsets are stable across iterations
260-
allocMem(reinterpret_cast<void**>(&mMultMaskDevice), hostTable.getFlatMaskSize() * sizeof(uint8_t), this->hasFrameworkAllocator());
261-
allocMem(reinterpret_cast<void**>(&mMultMaskOffsetsDevice), NLayers * sizeof(int32_t), this->hasFrameworkAllocator());
262-
GPUChkErrS(cudaMemcpy(mMultMaskOffsetsDevice, hostView.mLayerROFOffsets, NLayers * sizeof(int32_t), cudaMemcpyHostToDevice));
263-
}
262+
iteration, hostTable.getFlatMaskSize(), hostTable.getFlatMaskSize() * sizeof(TableEntry) / constants::MB);
263+
allocMem(reinterpret_cast<void**>(&d_flatTable), hostTable.getFlatMaskSize() * sizeof(TableEntry), this->hasFrameworkAllocator());
264+
allocMem(reinterpret_cast<void**>(&d_indices), NLayers * sizeof(uint32_t), this->hasFrameworkAllocator());
265+
GPUChkErrS(cudaMemcpy(d_indices, hostView.mLayerROFOffsets, NLayers * sizeof(TableIndex), cudaMemcpyHostToDevice));
264266
// Re-copy the flat mask on every qualifying iteration (e.g. after swapMasks() for UPC)
265-
GPUChkErrS(cudaMemcpy(mMultMaskDevice, hostView.mFlatMask, hostTable.getFlatMaskSize() * sizeof(uint8_t), cudaMemcpyHostToDevice));
266-
mDeviceROFMaskTableView = hostTable.getDeviceView(mMultMaskDevice, mMultMaskOffsetsDevice);
267+
GPUChkErrS(cudaMemcpy(d_flatTable, hostView.mFlatMask, hostTable.getFlatMaskSize() * sizeof(TableEntry), cudaMemcpyHostToDevice));
268+
mDeviceROFMaskTableView = hostTable.getDeviceView(d_flatTable, d_indices);
267269
}
268270
}
269271

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ void TrackerTraitsGPU<NLayers>::initialiseTimeFrame(const int iteration)
3636
// once the tables are in persistent memory just update the vertex one
3737
// mTimeFrameGPU->updateROFVertexLookupTable(iteration);
3838
mTimeFrameGPU->loadIndexTableUtils(iteration);
39-
mTimeFrameGPU->loadMultiplicityCutMask(iteration);
39+
mTimeFrameGPU->loadROFCutMask(iteration);
4040
// pinned on host
4141
mTimeFrameGPU->createUsedClustersDeviceArray(iteration);
4242
mTimeFrameGPU->createClustersDeviceArray(iteration);

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerCellsKernel(
536536
template <bool initRun, int NLayers>
537537
GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel(
538538
const IndexTableUtils<NLayers>* utils,
539-
const typename ROFMaskTable<NLayers>::View multMask,
539+
const typename ROFMaskTable<NLayers>::View rofMask,
540540
const int layerIndex,
541541
const typename ROFOverlapTable<NLayers>::View rofOverlaps,
542542
const typename ROFVertexLookupTable<NLayers>::View vertexLUT,
@@ -565,7 +565,7 @@ GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel(
565565
const int totalROFs0 = rofOverlaps.getLayer(layerIndex).mNROFsTF;
566566
const int totalROFs1 = rofOverlaps.getLayer(layerIndex + 1).mNROFsTF;
567567
for (unsigned int pivotROF{blockIdx.x}; pivotROF < totalROFs0; pivotROF += gridDim.x) {
568-
if (!multMask.isROFEnabled(layerIndex, pivotROF)) {
568+
if (!rofMask.isROFEnabled(layerIndex, pivotROF)) {
569569
continue;
570570
}
571571

@@ -631,6 +631,9 @@ GPUg() void __launch_bounds__(256, 1) computeLayerTrackletsMultiROFKernel(
631631
}
632632

633633
for (short targetROF = rofOverlap.getFirstEntry(); targetROF < rofOverlap.getEntriesBound(); ++targetROF) {
634+
if (!rofMask.isROFEnabled(layerIndex + 1, pivotROF)) {
635+
continue;
636+
}
634637
auto clustersNextLayer = getClustersOnLayer(targetROF, totalROFs1, layerIndex + 1, ROFClusters, clusters);
635638
if (clustersNextLayer.empty()) {
636639
continue;
@@ -786,7 +789,7 @@ GPUg() void __launch_bounds__(256, 1) processNeighboursKernel(
786789

787790
template <int NLayers>
788791
void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
789-
const typename ROFMaskTable<NLayers>::View& multMask,
792+
const typename ROFMaskTable<NLayers>::View& rofMask,
790793
const int layer,
791794
const typename ROFOverlapTable<NLayers>::View& rofOverlaps,
792795
const typename ROFVertexLookupTable<NLayers>::View& vertexLUT,
@@ -816,7 +819,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
816819
{
817820
gpu::computeLayerTrackletsMultiROFKernel<true><<<nBlocks, nThreads, 0, streams[layer].get()>>>(
818821
utils,
819-
multMask,
822+
rofMask,
820823
layer,
821824
rofOverlaps,
822825
vertexLUT,
@@ -844,7 +847,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
844847

845848
template <int NLayers>
846849
void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
847-
const typename ROFMaskTable<NLayers>::View& multMask,
850+
const typename ROFMaskTable<NLayers>::View& rofMask,
848851
const int layer,
849852
const typename ROFOverlapTable<NLayers>::View& rofOverlaps,
850853
const typename ROFVertexLookupTable<NLayers>::View& vertexLUT,
@@ -877,7 +880,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
877880
{
878881
gpu::computeLayerTrackletsMultiROFKernel<false><<<nBlocks, nThreads, 0, streams[layer].get()>>>(
879882
utils,
880-
multMask,
883+
rofMask,
881884
layer,
882885
rofOverlaps,
883886
vertexLUT,
@@ -1304,7 +1307,7 @@ void computeTrackSeedHandler(CellSeed<NLayers>* trackSeeds,
13041307

13051308
/// Explicit instantiation of ITS2 handlers
13061309
template void countTrackletsInROFsHandler<7>(const IndexTableUtils<7>* utils,
1307-
const ROFMaskTable<7>::View& multMask,
1310+
const ROFMaskTable<7>::View& rofMask,
13081311
const int layer,
13091312
const ROFOverlapTable<7>::View& rofOverlaps,
13101313
const ROFVertexLookupTable<7>::View& vertexLUT,
@@ -1333,7 +1336,7 @@ template void countTrackletsInROFsHandler<7>(const IndexTableUtils<7>* utils,
13331336
gpu::Streams& streams);
13341337

13351338
template void computeTrackletsInROFsHandler<7>(const IndexTableUtils<7>* utils,
1336-
const ROFMaskTable<7>::View& multMask,
1339+
const ROFMaskTable<7>::View& rofMask,
13371340
const int layer,
13381341
const ROFOverlapTable<7>::View& rofOverlaps,
13391342
const ROFVertexLookupTable<7>::View& vertexLUT,

Detectors/ITSMFT/ITS/tracking/include/ITStracking/FastMultEst.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,7 @@
2626
#include <gsl/span>
2727
#include <array>
2828

29-
namespace o2
30-
{
31-
namespace its
29+
namespace o2::its
3230
{
3331

3432
struct FastMultEst {
@@ -53,7 +51,7 @@ struct FastMultEst {
5351
bool doStaggering,
5452
const ROFOverlapTableN::View& overlapView,
5553
ROFMaskTableN& sel);
56-
void selectROFsWithVertices(const auto& vertices, const ROFOverlapTableN::View& overlapView, ROFMaskTableN& sel)
54+
void selectROFsWithVertices(const auto& vertices, const ROFOverlapTableN::View& overlapView, ROFMaskTableN& sel) const
5755
{
5856
const auto& multEstConf = FastMultEstConfig::Instance();
5957
if (!multEstConf.isVtxMultCutRequested()) {
@@ -90,7 +88,6 @@ struct FastMultEst {
9088
ClassDefNV(FastMultEst, 1);
9189
};
9290

93-
} // namespace its
94-
} // namespace o2
91+
} // namespace o2::its
9592

9693
#endif

Detectors/ITSMFT/ITS/tracking/include/ITStracking/FastMultEstConfig.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,7 @@
2020
#include "CommonUtils/ConfigurableParamHelper.h"
2121
#include "ITSMFTReconstruction/ChipMappingITS.h"
2222

23-
namespace o2
24-
{
25-
namespace its
23+
namespace o2::its
2624
{
2725
struct FastMultEstConfig : public o2::conf::ConfigurableParamHelper<FastMultEstConfig> {
2826
static constexpr int NLayers = o2::itsmft::ChipMappingITS::NLayers;
@@ -52,7 +50,6 @@ struct FastMultEstConfig : public o2::conf::ConfigurableParamHelper<FastMultEstC
5250
O2ParamDef(FastMultEstConfig, "fastMultConfig");
5351
};
5452

55-
} // namespace its
56-
} // namespace o2
53+
} // namespace o2::its
5754

5855
#endif

Detectors/ITSMFT/ITS/tracking/include/ITStracking/ROFLookupTables.h

Lines changed: 45 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717
#include <limits>
1818
#include <string>
1919
#include <vector>
20+
#include <ranges>
21+
2022
#ifndef GPUCA_GPUCODE
2123
#include <format>
22-
2324
#include "Framework/Logger.h"
2425
#endif
26+
2527
#include "CommonConstants/LHCConstants.h"
2628
#include "CommonDataFormat/RangeReference.h"
2729
#include "DataFormatsITS/TimeEstBC.h"
@@ -159,15 +161,19 @@ struct ROFOverlapTableView {
159161
return mLayers[layer];
160162
}
161163

162-
GPUh() int getClock() const noexcept
164+
GPUh() int32_t getClock() const noexcept
163165
{
164166
// we take the fastest layer as clock
165-
int fastest = 0;
166-
uint32_t shortestROF{std::numeric_limits<uint32_t>::max()};
167-
for (int iL{0}; iL < NLayers; ++iL) {
167+
int32_t fastest = 0;
168+
uint32_t maxNROFs{0};
169+
for (int32_t iL{0}; iL < NLayers; ++iL) {
168170
const auto& layer = getLayer(iL);
169-
if (layer.mROFLength < shortestROF) {
171+
// by definition the fastest layer has the most ROFs
172+
// this also solves the problem of a delay large than ROFLength
173+
// if mNROFsTF is correct
174+
if (layer.mNROFsTF > maxNROFs) {
170175
fastest = iL;
176+
maxNROFs = layer.mNROFsTF;
171177
}
172178
}
173179
return fastest;
@@ -524,7 +530,6 @@ class ROFVertexLookupTable : public LayerTimingBase<NLayers>
524530
using BCType = LayerTiming::BCType;
525531
using TableEntry = dataformats::RangeReference<T, T>;
526532
using TableIndex = dataformats::RangeReference<T, T>;
527-
528533
using View = ROFVertexLookupTableView<NLayers, TableEntry, TableIndex>;
529534

530535
ROFVertexLookupTable() = default;
@@ -684,15 +689,15 @@ class ROFVertexLookupTable : public LayerTimingBase<NLayers>
684689
};
685690

686691
// GPU-friendly view of the ROF mask table
687-
template <int32_t NLayers>
692+
template <int32_t NLayers, typename TableEntry, typename TableIndex>
688693
struct ROFMaskTableView {
689-
const uint8_t* mFlatMask{nullptr};
690-
const int32_t* mLayerROFOffsets{nullptr}; // size NLayers+1
694+
const TableEntry* mFlatMask{nullptr};
695+
const TableIndex* mLayerROFOffsets{nullptr}; // size NLayers+1
691696

692697
GPUhdi() bool isROFEnabled(int32_t layer, int32_t rofId) const noexcept
693698
{
694699
assert(layer >= 0 && layer < NLayers);
695-
return mFlatMask[mLayerROFOffsets[layer] + rofId] != 0;
700+
return mFlatMask[mLayerROFOffsets[layer] + rofId] != 0u;
696701
}
697702

698703
#ifndef GPUCA_GPUCODE
@@ -715,6 +720,23 @@ struct ROFMaskTableView {
715720
LOGF(info, "%*d | %*d", w_rof, i, w_active, (int)isROFEnabled(layer, i));
716721
}
717722
}
723+
724+
GPUh() std::string asString(int32_t layer) const
725+
{
726+
int32_t nROFs = mLayerROFOffsets[layer + 1] - mLayerROFOffsets[layer];
727+
int32_t enabledROFs = 0;
728+
for (int32_t j = 0; j < nROFs; ++j) {
729+
if (isROFEnabled(layer, j)) {
730+
++enabledROFs;
731+
}
732+
}
733+
return std::format("ROFMask on Layer {} ROFs enabled: {}/{}", layer, enabledROFs, nROFs);
734+
}
735+
736+
GPUh() void print(int32_t layer) const
737+
{
738+
LOG(info) << asString(layer);
739+
}
718740
#endif
719741
};
720742

@@ -723,10 +745,13 @@ template <int32_t NLayers>
723745
class ROFMaskTable : public LayerTimingBase<NLayers>
724746
{
725747
public:
726-
using BCRange = dataformats::RangeReference<LayerTiming::BCType, LayerTiming::BCType>;
727-
using View = ROFMaskTableView<NLayers>;
748+
using T = LayerTimingBase<NLayers>::T;
749+
using BCRange = dataformats::RangeReference<T, T>;
750+
using TableIndex = uint32_t;
751+
using TableEntry = uint8_t;
752+
using View = ROFMaskTableView<NLayers, TableEntry, TableIndex>;
728753

729-
GPUdDefault() ROFMaskTable() = default;
754+
ROFMaskTable() = default;
730755
GPUh() explicit ROFMaskTable(const LayerTimingBase<NLayers>& timingBase) : LayerTimingBase<NLayers>(timingBase) { init(); }
731756

732757
GPUh() void init()
@@ -737,13 +762,11 @@ class ROFMaskTable : public LayerTimingBase<NLayers>
737762
totalROFs += this->getLayer(layer).mNROFsTF;
738763
}
739764
mLayerROFOffsets[NLayers] = totalROFs; // sentinel
740-
mFlatMask.resize(totalROFs, 1);
765+
mFlatMask.resize(totalROFs, 0u);
741766
}
742767

743768
GPUh() size_t getFlatMaskSize() const noexcept { return mFlatMask.size(); }
744769

745-
GPUh() bool isROFEnabled(int32_t layer, int32_t rofId) const noexcept { return mFlatMask[mLayerROFOffsets[layer] + rofId] != 0; }
746-
747770
GPUh() void setROFEnabled(int32_t layer, int32_t rofId, uint8_t state = 1) noexcept
748771
{
749772
assert(layer >= 0 && layer < NLayers);
@@ -770,7 +793,7 @@ class ROFMaskTable : public LayerTimingBase<NLayers>
770793
for (int32_t rofId{0}; rofId < lay.mNROFsTF; ++rofId) {
771794
if (static_cast<int32_t>(lay.getROFStartInBC(rofId)) < bcEnd &&
772795
static_cast<int32_t>(lay.getROFEndInBC(rofId)) > bcStart) {
773-
mFlatMask[offset + rofId] = 1;
796+
mFlatMask[offset + rofId] = 1u;
774797
}
775798
}
776799
}
@@ -785,7 +808,7 @@ class ROFMaskTable : public LayerTimingBase<NLayers>
785808
}
786809
}
787810

788-
GPUh() void resetMask(uint8_t s = 0)
811+
GPUh() void resetMask(uint8_t s = 0u)
789812
{
790813
std::memset(mFlatMask.data(), s, mFlatMask.size());
791814
}
@@ -809,39 +832,17 @@ class ROFMaskTable : public LayerTimingBase<NLayers>
809832
return view;
810833
}
811834

812-
GPUh() View getDeviceView(const uint8_t* deviceFlatMaskPtr, const int32_t* deviceOffsetPtr) const
835+
GPUh() View getDeviceView(const TableEntry* deviceFlatMaskPtr, const TableIndex* deviceOffsetPtr) const
813836
{
814837
View view;
815838
view.mFlatMask = deviceFlatMaskPtr;
816839
view.mLayerROFOffsets = deviceOffsetPtr;
817840
return view;
818841
}
819-
#ifndef GPUCA_GPUCODE
820-
GPUh() std::string asString() const
821-
{
822-
std::string mask_str;
823-
for (int32_t i = 0; i < NLayers; ++i) {
824-
int32_t nROFs = mLayerROFOffsets[i + 1] - mLayerROFOffsets[i];
825-
int32_t enabledROFs = 0;
826-
for (int32_t j = 0; j < nROFs; ++j) {
827-
if (isROFEnabled(i, j)) {
828-
++enabledROFs;
829-
}
830-
}
831-
mask_str += std::format("Layer {} ROFs enabled: {}/{} | ", i, enabledROFs, nROFs);
832-
}
833-
return mask_str;
834-
}
835-
836-
GPUh() void print() const
837-
{
838-
LOG(info) << asString();
839-
}
840-
#endif
841842

842843
private:
843-
int32_t mLayerROFOffsets[NLayers + 1]{}; // NLayers entries + 1 sentinel
844-
std::vector<uint8_t> mFlatMask;
844+
TableIndex mLayerROFOffsets[NLayers + 1] = {0};
845+
std::vector<TableEntry> mFlatMask;
845846
};
846847

847848
} // namespace o2::its

0 commit comments

Comments
 (0)