Skip to content

Commit dd608d6

Browse files
committed
Implement new kind of multiplicity mask
1 parent e48c4bd commit dd608d6

File tree

12 files changed

+490
-215
lines changed

12 files changed

+490
-215
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
3131
using typename TimeFrame<NLayers>::IndexTableUtilsN;
3232
using typename TimeFrame<NLayers>::ROFOverlapTableN;
3333
using typename TimeFrame<NLayers>::ROFVertexLookupTableN;
34+
using typename TimeFrame<NLayers>::ROFMaskTableN;
3435

3536
public:
3637
TimeFrameGPU() = default;
@@ -107,6 +108,7 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
107108
IndexTableUtilsN* getDeviceIndexTableUtils() { return mIndexTableUtilsDevice; }
108109
const auto getDeviceROFOverlapTableView() { return mDeviceROFOverlapTableView; }
109110
const auto getDeviceROFVertexLookupTableView() { return mDeviceROFVertexLookupTableView; }
111+
const auto getDeviceROFMaskTableView() { return mDeviceROFMaskTableView; }
110112
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
111113
auto& getTrackITSExt() { return mTrackITSExt; }
112114
Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
@@ -177,9 +179,11 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
177179
// device navigation views
178180
ROFOverlapTableN::View mDeviceROFOverlapTableView;
179181
ROFVertexLookupTableN::View mDeviceROFVertexLookupTableView;
182+
ROFMaskTableN::View mDeviceROFMaskTableView;
180183

181184
// Hybrid pref
182185
uint8_t* mMultMaskDevice;
186+
int32_t* mMultMaskOffsetsDevice;
183187
Vertex* mPrimaryVerticesDevice;
184188
int* mROFramesPVDevice;
185189
std::array<Cluster*, NLayers> mClustersDevice;

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -252,11 +252,18 @@ void TimeFrameGPU<NLayers>::loadMultiplicityCutMask(const int iteration)
252252
{
253253
if (!iteration || iteration == 3) { // we need to re-load the swapped mult-mask in upc iteration
254254
GPUTimer timer("loading multiplicity cut mask");
255-
GPULog("gpu-transfer: iteration {} loading multiplicity cut mask with {} elements, for {:.2f} MB.", iteration, this->mMultiplicityCutMask.size(), this->mMultiplicityCutMask.size() * sizeof(uint8_t) / constants::MB);
256-
if (!iteration) { // only allocate on first call
257-
allocMem(reinterpret_cast<void**>(&mMultMaskDevice), this->mMultiplicityCutMask.size() * sizeof(uint8_t), this->hasFrameworkAllocator());
255+
const auto& hostTable = this->mMultiplicityCutMask;
256+
const auto hostView = hostTable.getView();
257+
GPULog("gpu-transfer: iteration {} loading multiplicity cut mask with {} elements, for {:.2f} MB.",
258+
iteration, hostTable.getFlatMaskSize(), hostTable.getFlatMaskSize() * sizeof(uint8_t) / constants::MB);
259+
if (!iteration) { // only allocate on first call; offsets are stable across iterations
260+
allocMem(reinterpret_cast<void**>(&mMultMaskDevice), hostTable.getFlatMaskSize() * sizeof(uint8_t), this->hasFrameworkAllocator());
261+
allocMem(reinterpret_cast<void**>(&mMultMaskOffsetsDevice), NLayers * sizeof(int32_t), this->hasFrameworkAllocator());
262+
GPUChkErrS(cudaMemcpy(mMultMaskOffsetsDevice, hostView.mLayerROFOffsets, NLayers * sizeof(int32_t), cudaMemcpyHostToDevice));
258263
}
259-
GPUChkErrS(cudaMemcpy(mMultMaskDevice, this->mMultiplicityCutMask.data(), this->mMultiplicityCutMask.size() * sizeof(uint8_t), cudaMemcpyHostToDevice));
264+
// Re-copy the flat mask on every qualifying iteration (e.g. after swapMasks() for UPC)
265+
GPUChkErrS(cudaMemcpy(mMultMaskDevice, hostView.mFlatMask, hostTable.getFlatMaskSize() * sizeof(uint8_t), cudaMemcpyHostToDevice));
266+
mDeviceROFMaskTableView = hostTable.getDeviceView(mMultMaskDevice, mMultMaskOffsetsDevice);
260267
}
261268
}
262269

Detectors/ITSMFT/ITS/tracking/include/ITStracking/FastMultEst.h

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717
#define ALICEO2_ITS_FASTMULTEST_
1818

1919
#include "ITSMFTReconstruction/ChipMappingITS.h"
20+
#include "DataFormatsITS/Vertex.h"
2021
#include "DataFormatsITSMFT/ROFRecord.h"
2122
#include "DataFormatsITSMFT/CompCluster.h"
22-
#include <DataFormatsITSMFT/PhysTrigger.h>
23+
#include "DataFormatsITSMFT/PhysTrigger.h"
2324
#include "ITStracking/FastMultEstConfig.h"
25+
#include "ITStracking/ROFLookupTables.h"
2426
#include <gsl/span>
2527
#include <array>
2628

@@ -32,32 +34,56 @@ namespace its
3234
struct FastMultEst {
3335

3436
static constexpr int NLayers = o2::itsmft::ChipMappingITS::NLayers;
37+
using ROFOverlapTableN = ROFOverlapTable<NLayers>;
38+
using ROFMaskTableN = ROFMaskTable<NLayers>;
3539

36-
float mult = 0.; /// estimated signal clusters multipliciy at reference (1st?) layer
37-
float noisePerChip = 0.; /// estimated or imposed noise per chip
38-
float cov[3] = {0.}; /// covariance matrix of estimation
39-
float chi2 = 0.; /// chi2
40-
int nLayersUsed = 0; /// number of layers actually used
40+
float mult = 0.; /// estimated signal clusters multiplicity on the selected multiplicity layer
41+
float noisePerChip = 0.; /// imposed noise per chip (when enabled by configuration)
42+
float cov[3] = {0.}; /// retained for compatibility; set to zero in single-layer mode
43+
float chi2 = 0.; /// retained for compatibility; set to zero in single-layer mode
44+
int nLayersUsed = 0; /// number of layers used by estimator (0/1 in single-layer mode)
4145
uint32_t lastRandomSeed = 0; /// state of the gRandom before
42-
43-
std::array<int, NLayers> nClPerLayer{0}; // measured N Cl per layer selectROFs
4446
FastMultEst();
4547

4648
static uint32_t getCurrentRandomSeed();
47-
int selectROFs(const gsl::span<const o2::itsmft::ROFRecord> rofs, const gsl::span<const o2::itsmft::CompClusterExt> clus,
48-
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<uint8_t>& sel);
49+
int selectROFs(const std::array<gsl::span<const o2::itsmft::ROFRecord>, NLayers>& rofs,
50+
const std::array<gsl::span<const o2::itsmft::CompClusterExt>, NLayers>& clus,
51+
const gsl::span<const o2::itsmft::PhysTrigger> trig,
52+
uint32_t firstTForbit,
53+
bool doStaggering,
54+
const ROFOverlapTableN::View& overlapView,
55+
ROFMaskTableN& sel);
56+
void selectROFsWithVertices(const auto& vertices, const ROFOverlapTableN::View& overlapView, ROFMaskTableN& sel)
57+
{
58+
const auto& multEstConf = FastMultEstConfig::Instance();
59+
if (!multEstConf.isVtxMultCutRequested()) {
60+
return;
61+
}
62+
63+
for (const auto& vertex : vertices) {
64+
if (!multEstConf.isPassingVtxMultCut(vertex.getNContributors())) {
65+
const auto& timestamp{vertex.getTimeStamp()};
66+
for (int layer = 0; layer < NLayers; ++layer) {
67+
uint32_t startROF = sel.getLayer(layer).getROF(timestamp.lower());
68+
uint32_t endROF = sel.getLayer(layer).getROF(timestamp.upper());
69+
for (uint32_t rof = startROF; rof <= endROF; ++rof) {
70+
sel.setROFsEnabled(layer, rof, 0);
71+
}
72+
}
73+
}
74+
}
75+
}
4976

50-
void fillNClPerLayer(const gsl::span<const o2::itsmft::CompClusterExt>& clusters);
51-
float process(const std::array<int, NLayers> ncl)
77+
int countClustersOnLayer(const gsl::span<const o2::itsmft::CompClusterExt>& clusters) const;
78+
float process(int nClusters)
5279
{
53-
return FastMultEstConfig::Instance().imposeNoisePerChip > 0 ? processNoiseImposed(ncl) : processNoiseFree(ncl);
80+
return FastMultEstConfig::Instance().imposeNoisePerChip > 0 ? processNoiseImposed(nClusters) : processNoiseFree(nClusters);
5481
}
55-
float processNoiseFree(const std::array<int, NLayers> ncl);
56-
float processNoiseImposed(const std::array<int, NLayers> ncl);
82+
float processNoiseFree(int nClusters);
83+
float processNoiseImposed(int nClusters);
5784
float process(const gsl::span<const o2::itsmft::CompClusterExt>& clusters)
5885
{
59-
fillNClPerLayer(clusters);
60-
return process(nClPerLayer);
86+
return process(countClustersOnLayer(clusters));
6187
}
6288
static bool sSeedSet;
6389

Detectors/ITSMFT/ITS/tracking/include/ITStracking/FastMultEstConfig.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ struct FastMultEstConfig : public o2::conf::ConfigurableParamHelper<FastMultEstC
3434
float imposeNoisePerChip = 1.e-9 * 1024 * 512; // assumed noise, free parameter if<0
3535

3636
// cuts to reject to low or too high mult events
37+
int cutMultClusLayer = NLayers - 1; /// layer used for cluster multiplicity selection (by default the outermost one)
3738
float cutMultClusLow = 0; /// reject ROF with estimated cluster mult. below this value (no cut if <0)
3839
float cutMultClusHigh = -1; /// reject ROF with estimated cluster mult. above this value (no cut if <0)
3940
float cutMultVtxLow = -1; /// reject seed vertex if its multiplicity below this value (no cut if <0)

Detectors/ITSMFT/ITS/tracking/include/ITStracking/ROFLookupTables.h

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,167 @@ class ROFVertexLookupTable : public LayerTimingBase<NLayers>
683683
std::vector<TableEntry> mFlatTable;
684684
};
685685

686+
// GPU-friendly view of the ROF mask table
687+
template <int32_t NLayers>
688+
struct ROFMaskTableView {
689+
const uint8_t* mFlatMask{nullptr};
690+
const int32_t* mLayerROFOffsets{nullptr}; // size NLayers+1
691+
692+
GPUhdi() bool isROFEnabled(int32_t layer, int32_t rofId) const noexcept
693+
{
694+
assert(layer >= 0 && layer < NLayers);
695+
return mFlatMask[mLayerROFOffsets[layer] + rofId] != 0;
696+
}
697+
698+
#ifndef GPUCA_GPUCODE
699+
GPUh() void printAll() const
700+
{
701+
for (int32_t i = 0; i < NLayers; ++i) {
702+
printLayer(i);
703+
}
704+
}
705+
706+
GPUh() void printLayer(int32_t layer) const
707+
{
708+
constexpr int w_rof = 10;
709+
constexpr int w_active = 10;
710+
int32_t nROFs = mLayerROFOffsets[layer + 1] - mLayerROFOffsets[layer];
711+
LOGF(info, "Mask table: Layer %d", layer);
712+
LOGF(info, "%*s | %*s", w_rof, "ROF", w_active, "Enabled");
713+
LOGF(info, "%.*s-+-%.*s", w_rof, "----------", w_active, "----------");
714+
for (int32_t i = 0; i < nROFs; ++i) {
715+
LOGF(info, "%*d | %*d", w_rof, i, w_active, (int)isROFEnabled(layer, i));
716+
}
717+
}
718+
#endif
719+
};
720+
721+
// Per-ROF per-layer boolean mask (uint8_t for GPU compatibility).
722+
template <int32_t NLayers>
723+
class ROFMaskTable : public LayerTimingBase<NLayers>
724+
{
725+
public:
726+
using BCRange = dataformats::RangeReference<LayerTiming::BCType, LayerTiming::BCType>;
727+
using View = ROFMaskTableView<NLayers>;
728+
729+
GPUdDefault() ROFMaskTable() = default;
730+
GPUh() explicit ROFMaskTable(const LayerTimingBase<NLayers>& timingBase) : LayerTimingBase<NLayers>(timingBase) { init(); }
731+
732+
GPUh() void init()
733+
{
734+
int32_t totalROFs = 0;
735+
for (int32_t layer{0}; layer < NLayers; ++layer) {
736+
mLayerROFOffsets[layer] = totalROFs;
737+
totalROFs += this->getLayer(layer).mNROFsTF;
738+
}
739+
mLayerROFOffsets[NLayers] = totalROFs; // sentinel
740+
mFlatMask.resize(totalROFs, 1);
741+
}
742+
743+
GPUh() size_t getFlatMaskSize() const noexcept { return mFlatMask.size(); }
744+
745+
GPUh() bool isROFEnabled(int32_t layer, int32_t rofId) const noexcept { return mFlatMask[mLayerROFOffsets[layer] + rofId] != 0; }
746+
747+
GPUh() void setROFEnabled(int32_t layer, int32_t rofId, uint8_t state = 1) noexcept
748+
{
749+
assert(layer >= 0 && layer < NLayers);
750+
assert(rofId >= 0 && rofId < mLayerROFOffsets[layer + 1] - mLayerROFOffsets[layer]);
751+
mFlatMask[mLayerROFOffsets[layer] + rofId] = state;
752+
}
753+
754+
GPUh() void setROFsEnabled(int32_t layer, int32_t firstRof, int32_t nRofs, uint8_t state = 1) noexcept
755+
{
756+
assert(layer >= 0 && layer < NLayers);
757+
assert(firstRof >= 0);
758+
assert(firstRof + nRofs <= mLayerROFOffsets[layer + 1] - mLayerROFOffsets[layer]);
759+
std::memset(mFlatMask.data() + mLayerROFOffsets[layer] + firstRof, state, nRofs);
760+
}
761+
762+
// Enable all ROFs in all layers that are time-compatible with the given BC range
763+
GPUh() void selectROF(const BCRange& t)
764+
{
765+
const int32_t bcStart = t.getFirstEntry();
766+
const int32_t bcEnd = t.getEntriesBound();
767+
for (int32_t layer{0}; layer < NLayers; ++layer) {
768+
const auto& lay = this->getLayer(layer);
769+
const int32_t offset = mLayerROFOffsets[layer];
770+
for (int32_t rofId{0}; rofId < lay.mNROFsTF; ++rofId) {
771+
if (static_cast<int32_t>(lay.getROFStartInBC(rofId)) < bcEnd &&
772+
static_cast<int32_t>(lay.getROFEndInBC(rofId)) > bcStart) {
773+
mFlatMask[offset + rofId] = 1;
774+
}
775+
}
776+
}
777+
}
778+
779+
// Reset mask to 0, then enable all ROFs compatible with any of the given BC ranges
780+
GPUh() void selectROFs(const std::vector<BCRange>& ts)
781+
{
782+
resetMask();
783+
for (const auto& t : ts) {
784+
selectROF(t);
785+
}
786+
}
787+
788+
GPUh() void resetMask(uint8_t s = 0)
789+
{
790+
std::memset(mFlatMask.data(), s, mFlatMask.size());
791+
}
792+
793+
GPUh() void invertMask()
794+
{
795+
std::ranges::transform(mFlatMask, mFlatMask.begin(), [](uint8_t x) { return 1 - x; });
796+
}
797+
798+
GPUh() void swap(ROFMaskTable& other) noexcept
799+
{
800+
std::swap(mFlatMask, other.mFlatMask);
801+
std::swap(mLayerROFOffsets, other.mLayerROFOffsets);
802+
}
803+
804+
GPUh() View getView() const
805+
{
806+
View view;
807+
view.mFlatMask = mFlatMask.data();
808+
view.mLayerROFOffsets = mLayerROFOffsets;
809+
return view;
810+
}
811+
812+
GPUh() View getDeviceView(const uint8_t* deviceFlatMaskPtr, const int32_t* deviceOffsetPtr) const
813+
{
814+
View view;
815+
view.mFlatMask = deviceFlatMaskPtr;
816+
view.mLayerROFOffsets = deviceOffsetPtr;
817+
return view;
818+
}
819+
#ifndef GPUCA_GPUCODE
820+
GPUh() std::string asString() const
821+
{
822+
std::string mask_str;
823+
for (int32_t i = 0; i < NLayers; ++i) {
824+
int32_t nROFs = mLayerROFOffsets[i + 1] - mLayerROFOffsets[i];
825+
int32_t enabledROFs = 0;
826+
for (int32_t j = 0; j < nROFs; ++j) {
827+
if (isROFEnabled(i, j)) {
828+
++enabledROFs;
829+
}
830+
}
831+
mask_str += std::format("Layer {} ROFs enabled: {}/{} | ", i, enabledROFs, nROFs);
832+
}
833+
return mask_str;
834+
}
835+
836+
GPUh() void print() const
837+
{
838+
LOG(info) << asString();
839+
}
840+
#endif
841+
842+
private:
843+
int32_t mLayerROFOffsets[NLayers + 1]{}; // NLayers entries + 1 sentinel
844+
std::vector<uint8_t> mFlatMask;
845+
};
846+
686847
} // namespace o2::its
687848

688849
#endif

Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ struct TimeFrame {
6767
using IndexTableUtilsN = IndexTableUtils<NLayers>;
6868
using ROFOverlapTableN = ROFOverlapTable<NLayers>;
6969
using ROFVertexLookupTableN = ROFVertexLookupTable<NLayers>;
70+
using ROFMaskTableN = ROFMaskTable<NLayers>;
7071
using CellSeedN = CellSeed<NLayers>;
7172
friend class gpu::TimeFrameGPU<NLayers>;
7273

@@ -220,8 +221,8 @@ struct TimeFrame {
220221
std::array<float, 2>& getBeamXY() { return mBeamPos; }
221222
// \Vertexer
222223

223-
void setMultiplicityCutMask(const std::vector<uint8_t>& cutMask) { mMultiplicityCutMask = cutMask; }
224-
void setROFMask(const std::vector<uint8_t>& rofMask) { mROFMask = rofMask; }
224+
void setMultiplicityCutMask(const ROFMaskTableN& cutMask) { mMultiplicityCutMask = cutMask; }
225+
void setROFMask(const ROFMaskTableN& rofMask) { mROFMask = rofMask; }
225226
void swapMasks() { mMultiplicityCutMask.swap(mROFMask); }
226227

227228
int hasBogusClusters() const { return std::accumulate(mBogusClusters.begin(), mBogusClusters.end(), 0); }
@@ -266,7 +267,7 @@ struct TimeFrame {
266267
bounded_vector<MCCompLabel> mTracksLabel;
267268
std::vector<bounded_vector<int>> mCellsNeighbours;
268269
std::vector<bounded_vector<int>> mCellsLookupTable;
269-
std::vector<uint8_t> mMultiplicityCutMask;
270+
ROFMaskTableN mMultiplicityCutMask;
270271

271272
const o2::base::PropagatorImpl<float>* mPropagatorDevice = nullptr; // Needed only for GPU
272273

@@ -290,7 +291,7 @@ struct TimeFrame {
290291
bounded_vector<float> mPositionResolution;
291292
std::array<bounded_vector<uint8_t>, NLayers> mClusterSize;
292293

293-
std::vector<uint8_t> mROFMask;
294+
ROFMaskTableN mROFMask;
294295
bounded_vector<std::array<float, 2>> mPValphaX; /// PV x and alpha for track propagation
295296
std::vector<bounded_vector<MCCompLabel>> mTrackletLabels;
296297
std::vector<bounded_vector<MCCompLabel>> mCellLabels;

0 commit comments

Comments
 (0)