Skip to content

Commit 821c93c

Browse files
authored
Merge pull request #59 from mpuccio/its/trk/stag
New fastmultEst and related reworking for UPC
2 parents 507223a + 32114cb commit 821c93c

File tree

22 files changed

+655
-375
lines changed

22 files changed

+655
-375
lines changed

Detectors/ITSMFT/ITS/reconstruction/CMakeLists.txt

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,11 @@
1111

1212
o2_add_library(ITSReconstruction
1313
SOURCES src/RecoGeomHelper.cxx
14-
src/FastMultEstConfig.cxx
15-
src/FastMultEst.cxx
1614
PUBLIC_LINK_LIBRARIES O2::ITSBase
1715
O2::ITSMFTReconstruction
1816
O2::DataFormatsITS
1917
O2::CommonUtils)
2018

2119
o2_target_root_dictionary(
2220
ITSReconstruction
23-
HEADERS include/ITSReconstruction/RecoGeomHelper.h
24-
include/ITSReconstruction/FastMultEst.h
25-
include/ITSReconstruction/FastMultEstConfig.h)
21+
HEADERS include/ITSReconstruction/RecoGeomHelper.h)

Detectors/ITSMFT/ITS/reconstruction/include/ITSReconstruction/FastMultEst.h

Lines changed: 0 additions & 70 deletions
This file was deleted.

Detectors/ITSMFT/ITS/reconstruction/src/FastMultEst.cxx

Lines changed: 0 additions & 189 deletions
This file was deleted.

Detectors/ITSMFT/ITS/reconstruction/src/ITSReconstructionLinkDef.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,5 @@
1616
#pragma link off all functions;
1717

1818
#pragma link C++ class o2::its::RecoGeomHelper + ;
19-
#pragma link C++ class o2::its::FastMultEst + ;
20-
#pragma link C++ class o2::its::FastMultEstConfig + ;
21-
#pragma link C++ class o2::conf::ConfigurableParamHelper < o2::its::FastMultEstConfig> + ;
2219

2320
#endif

Detectors/ITSMFT/ITS/tracking/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ o2_add_library(ITStracking
1414
SOURCES src/ClusterLines.cxx
1515
src/Cluster.cxx
1616
src/Configuration.cxx
17+
src/FastMultEstConfig.cxx
18+
src/FastMultEst.cxx
1719
src/TimeFrame.cxx
1820
src/IOUtils.cxx
1921
src/Tracker.cxx
@@ -28,6 +30,7 @@ o2_add_library(ITStracking
2830
O2::DataFormatsITSMFT
2931
O2::SimulationDataFormat
3032
O2::ITSBase
33+
O2::CommonUtils
3134
O2::ITSReconstruction
3235
O2::ITSMFTReconstruction
3336
O2::DataFormatsITS
@@ -49,6 +52,8 @@ o2_target_root_dictionary(ITStracking
4952
include/ITStracking/Tracklet.h
5053
include/ITStracking/Cluster.h
5154
include/ITStracking/Definitions.h
55+
include/ITStracking/FastMultEst.h
56+
include/ITStracking/FastMultEstConfig.h
5257
include/ITStracking/TrackingConfigParam.h
5358
LINKDEF src/TrackingLinkDef.h)
5459

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
3131
using typename TimeFrame<NLayers>::IndexTableUtilsN;
3232
using typename TimeFrame<NLayers>::ROFOverlapTableN;
3333
using typename TimeFrame<NLayers>::ROFVertexLookupTableN;
34+
using typename TimeFrame<NLayers>::ROFMaskTableN;
3435

3536
public:
3637
TimeFrameGPU() = default;
@@ -107,6 +108,7 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
107108
IndexTableUtilsN* getDeviceIndexTableUtils() { return mIndexTableUtilsDevice; }
108109
const auto getDeviceROFOverlapTableView() { return mDeviceROFOverlapTableView; }
109110
const auto getDeviceROFVertexLookupTableView() { return mDeviceROFVertexLookupTableView; }
111+
const auto getDeviceROFMaskTableView() { return mDeviceROFMaskTableView; }
110112
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
111113
auto& getTrackITSExt() { return mTrackITSExt; }
112114
Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
@@ -141,7 +143,6 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
141143
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
142144
float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
143145
int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
144-
uint8_t* getDeviceMultCutMask() { return mMultMaskDevice; }
145146

146147
void setDevicePropagator(const o2::base::PropagatorImpl<float>* p) final { this->mPropagatorDevice = p; }
147148

@@ -177,9 +178,11 @@ class TimeFrameGPU final : public TimeFrame<NLayers>
177178
// device navigation views
178179
ROFOverlapTableN::View mDeviceROFOverlapTableView;
179180
ROFVertexLookupTableN::View mDeviceROFVertexLookupTableView;
181+
ROFMaskTableN::View mDeviceROFMaskTableView;
180182

181183
// Hybrid pref
182184
uint8_t* mMultMaskDevice;
185+
int32_t* mMultMaskOffsetsDevice;
183186
Vertex* mPrimaryVerticesDevice;
184187
int* mROFramesPVDevice;
185188
std::array<Cluster*, NLayers> mClustersDevice;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class ExternalAllocator;
3636

3737
template <int NLayers = 7>
3838
void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
39-
const uint8_t* multMask,
39+
const typename ROFMaskTable<NLayers>::View& multMask,
4040
const int layer,
4141
const typename ROFOverlapTable<NLayers>::View& rofOverlaps,
4242
const typename ROFVertexLookupTable<NLayers>::View& vertexLUT,
@@ -66,7 +66,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
6666

6767
template <int NLayers = 7>
6868
void computeTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
69-
const uint8_t* multMask,
69+
const typename ROFMaskTable<NLayers>::View& multMask,
7070
const int layer,
7171
const typename ROFOverlapTable<NLayers>::View& rofOverlaps,
7272
const typename ROFVertexLookupTable<NLayers>::View& vertexLUT,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -252,11 +252,18 @@ void TimeFrameGPU<NLayers>::loadMultiplicityCutMask(const int iteration)
252252
{
253253
if (!iteration || iteration == 3) { // we need to re-load the swapped mult-mask in upc iteration
254254
GPUTimer timer("loading multiplicity cut mask");
255-
GPULog("gpu-transfer: iteration {} loading multiplicity cut mask with {} elements, for {:.2f} MB.", iteration, this->mMultiplicityCutMask.size(), this->mMultiplicityCutMask.size() * sizeof(uint8_t) / constants::MB);
256-
if (!iteration) { // only allocate on first call
257-
allocMem(reinterpret_cast<void**>(&mMultMaskDevice), this->mMultiplicityCutMask.size() * sizeof(uint8_t), this->hasFrameworkAllocator());
255+
const auto& hostTable = this->mMultiplicityCutMask;
256+
const auto hostView = hostTable.getView();
257+
GPULog("gpu-transfer: iteration {} loading multiplicity cut mask with {} elements, for {:.2f} MB.",
258+
iteration, hostTable.getFlatMaskSize(), hostTable.getFlatMaskSize() * sizeof(uint8_t) / constants::MB);
259+
if (!iteration) { // only allocate on first call; offsets are stable across iterations
260+
allocMem(reinterpret_cast<void**>(&mMultMaskDevice), hostTable.getFlatMaskSize() * sizeof(uint8_t), this->hasFrameworkAllocator());
261+
allocMem(reinterpret_cast<void**>(&mMultMaskOffsetsDevice), NLayers * sizeof(int32_t), this->hasFrameworkAllocator());
262+
GPUChkErrS(cudaMemcpy(mMultMaskOffsetsDevice, hostView.mLayerROFOffsets, NLayers * sizeof(int32_t), cudaMemcpyHostToDevice));
258263
}
259-
GPUChkErrS(cudaMemcpy(mMultMaskDevice, this->mMultiplicityCutMask.data(), this->mMultiplicityCutMask.size() * sizeof(uint8_t), cudaMemcpyHostToDevice));
264+
// Re-copy the flat mask on every qualifying iteration (e.g. after swapMasks() for UPC)
265+
GPUChkErrS(cudaMemcpy(mMultMaskDevice, hostView.mFlatMask, hostTable.getFlatMaskSize() * sizeof(uint8_t), cudaMemcpyHostToDevice));
266+
mDeviceROFMaskTableView = hostTable.getDeviceView(mMultMaskDevice, mMultMaskOffsetsDevice);
260267
}
261268
}
262269

0 commit comments

Comments
 (0)