Skip to content

Commit adf6dfb

Browse files
shahor02davidrohr
authored andcommitted
Add POD version of TPCFastTransform
The TPCFastTransformPOD is a pointerless version of the TPCFastTransform. It can be created from the original TPCFastTransform as e.g. auto lold = o2::gpu::TPCFastTransform::loadFromFile("o2-gpu-TPCFastTransform.root","ccdb_object"); // load original transform std::vector<char> v; // one has to provide a vector (could be a std or pmr), which later can be messaged via DPL auto* pod = o2::gpu::TPCFastTransformPOD::create(v, *lold); // pointer pod is just v.data() cast to TPCFastTransformPOD* // run test: pod->test(*lold); [INFO] (ns per call) original this Nmissmatch [INFO] getCorrection 1.330e+02 1.400e+02 0 [INFO] getCorrectionInvCorrectedX 8.856e+01 8.434e+01 0 [INFO] getCorrectionInvUV 6.266e+01 6.142e+01 0 It can be also created directly from the TPCFastSpaceChargeCorrection as TPCFastSpaceChargeCorrection& oldCorr = lold->getCorrection(); auto* pod = o2::gpu::TPCFastTransformPOD::create(v, oldCorr); but in this case one should afterwards set the vdrift and t0 using provided getters. TPCFastTransformPOD replicates all the methods of the TPCFastTransform (and of the TPCFastSpaceChargeCorrection), including those which allow to query rescaled corrections (by providing refernce maps and scaling coefficients). Since the idea of this class is to create a final correction map as a weighted sum of different contribution and to distribute it to consumer processes via shared memory, also the query methods w/o rescaling are added, they have the suffix _new added. Eventually, the scalable legacy methods can be suppressed and the suffix new can be dropped.
1 parent 33cf955 commit adf6dfb

File tree

5 files changed

+1162
-0
lines changed

5 files changed

+1162
-0
lines changed

GPU/TPCFastTransformation/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ set(SRCS
2626
TPCFastSpaceChargeCorrectionMap.cxx
2727
TPCFastTransform.cxx
2828
CorrectionMapsHelper.cxx
29+
TPCFastTransformPOD.cxx
2930
)
3031

3132
if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone")

GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ namespace gpu
4141
///
4242
class TPCFastSpaceChargeCorrection : public FlatObject
4343
{
44+
friend class TPCFastTransformPOD;
45+
4446
public:
4547
// obsolete structure, declared here only for backward compatibility
4648
struct SliceInfo {
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2+
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3+
// All rights not expressly granted are reserved.
4+
//
5+
// This software is distributed under the terms of the GNU General Public
6+
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7+
//
8+
// In applying this license CERN does not waive the privileges and immunities
9+
// granted to it by virtue of its status as an Intergovernmental Organization
10+
// or submit itself to any jurisdiction.
11+
12+
/// \file TPCFastTransformPOD.cxx
13+
/// \brief Implementation of POD correction map
14+
///
15+
/// \author ruben.shahoayn@cern.ch
16+
17+
#include "TPCFastTransformPOD.h"
18+
/// \brief Implementation of POD correction map
19+
///
20+
/// \author ruben.shahoayn@cern.ch
21+
22+
#include "TPCFastTransformPOD.h"
23+
#include "GPUDebugStreamer.h"
24+
#if !defined(GPUCA_GPUCODE)
25+
#include <TRandom.h>
26+
#endif
27+
28+
namespace o2
29+
{
30+
namespace gpu
31+
{
32+
33+
#if !defined(GPUCA_GPUCODE)
34+
35+
size_t TPCFastTransformPOD::estimateSize(const TPCFastSpaceChargeCorrection& origCorr)
36+
{
37+
// estimate size of own buffer
38+
const size_t selfSizeFix = sizeof(TPCFastTransformPOD);
39+
size_t nextDynOffs = alignOffset(selfSizeFix);
40+
nextDynOffs = alignOffset(nextDynOffs + origCorr.mNumberOfScenarios * sizeof(size_t)); // spline scenarios start here
41+
// space for splines
42+
for (int isc = 0; isc < origCorr.mNumberOfScenarios; isc++) {
43+
const auto& spline = origCorr.mScenarioPtr[isc];
44+
nextDynOffs = alignOffset(nextDynOffs + sizeof(spline));
45+
}
46+
// space for splines data
47+
for (int is = 0; is < 3; is++) {
48+
for (int sector = 0; sector < origCorr.mGeo.getNumberOfSectors(); sector++) {
49+
for (int row = 0; row < NROWS; row++) {
50+
const auto& spline = origCorr.getSpline(sector, row);
51+
int nPar = spline.getNumberOfParameters();
52+
if (is == 1) {
53+
nPar = nPar / 3;
54+
}
55+
if (is == 2) {
56+
nPar = nPar * 2 / 3;
57+
}
58+
nextDynOffs += nPar * sizeof(float);
59+
}
60+
}
61+
}
62+
nextDynOffs = alignOffset(nextDynOffs);
63+
return nextDynOffs;
64+
}
65+
66+
TPCFastTransformPOD* TPCFastTransformPOD::create(char* buff, size_t buffSize, const TPCFastSpaceChargeCorrection& origCorr)
67+
{
68+
// instantiate object to already created buffer of the right size
69+
assert(buffSize > sizeof(TPCFastTransformPOD));
70+
auto& podMap = getNonConst(buff);
71+
podMap.mApplyCorrection = true; // by default always apply corrections
72+
73+
// copy fixed size data --- start
74+
podMap.mNumberOfScenarios = origCorr.mNumberOfScenarios;
75+
std::memcpy(&podMap.mGeo, &origCorr.mGeo, sizeof(TPCFastTransformGeo)); // copy geometry (fixed size)
76+
for (int sector = 0; sector < TPCFastTransformGeo::getNumberOfSectors(); sector++) {
77+
for (int row = 0; row < NROWS; row++) {
78+
podMap.mSectorRowInfos[NROWS * sector + row] = origCorr.getSectorRowInfo(sector, row);
79+
}
80+
}
81+
podMap.mTimeStamp = origCorr.mTimeStamp;
82+
//
83+
// init data members coming from the TPCFastTrasform
84+
podMap.mVdrift = 0.;
85+
podMap.mT0 = 0.;
86+
// copy fixed size data --- end
87+
88+
size_t nextDynOffs = alignOffset(sizeof(TPCFastTransformPOD));
89+
90+
// copy sector scenarios
91+
podMap.mOffsScenariosOffsets = nextDynOffs; // spline scenarios offsets start here
92+
LOGP(debug, "Set mOffsScenariosOffsets = {}", podMap.mOffsScenariosOffsets);
93+
nextDynOffs = alignOffset(nextDynOffs + podMap.mNumberOfScenarios * sizeof(size_t)); // spline scenarios start here
94+
95+
// copy spline objects
96+
size_t* scenOffs = reinterpret_cast<size_t*>(buff + podMap.mOffsScenariosOffsets);
97+
for (int isc = 0; isc < origCorr.mNumberOfScenarios; isc++) {
98+
scenOffs[isc] = nextDynOffs;
99+
const auto& spline = origCorr.mScenarioPtr[isc];
100+
if (buffSize < nextDynOffs + sizeof(spline)) {
101+
throw std::runtime_error(fmt::format("attempt to copy {} bytes for spline for scenario {} to {}, overflowing the buffer of size {}", sizeof(spline), isc, nextDynOffs + sizeof(spline), buffSize));
102+
}
103+
std::memcpy(buff + scenOffs[isc], &spline, sizeof(spline));
104+
nextDynOffs = alignOffset(nextDynOffs + sizeof(spline));
105+
LOGP(debug, "Copy {} bytes for spline scenario {} (ptr:{}) to offsset {}", sizeof(spline), isc, (void*)&spline, scenOffs[isc]);
106+
}
107+
108+
// copy splines data
109+
for (int is = 0; is < 3; is++) {
110+
float* data = reinterpret_cast<float*>(buff + nextDynOffs);
111+
LOGP(debug, "splinID={} start offset {} -> {}", is, nextDynOffs, (void*)data);
112+
for (int sector = 0; sector < origCorr.mGeo.getNumberOfSectors(); sector++) {
113+
podMap.mSplineDataOffsets[sector][is] = nextDynOffs;
114+
size_t rowDataOffs = 0;
115+
for (int row = 0; row < NROWS; row++) {
116+
const auto& spline = origCorr.getSpline(sector, row);
117+
const float* dataOr = origCorr.getCorrectionData(sector, row, is);
118+
int nPar = spline.getNumberOfParameters();
119+
if (is == 1) {
120+
nPar = nPar / 3;
121+
}
122+
if (is == 2) {
123+
nPar = nPar * 2 / 3;
124+
}
125+
LOGP(debug, "Copying {} floats for spline{} of sector:{} row:{} to offset {}", nPar, is, sector, row, nextDynOffs);
126+
size_t nbcopy = nPar * sizeof(float);
127+
if (buffSize < nextDynOffs + nbcopy) {
128+
throw std::runtime_error(fmt::format("attempt to copy {} bytes of data for spline{} of sector{}/row{} to {}, overflowing the buffer of size {}", nbcopy, is, sector, row, nextDynOffs, buffSize));
129+
}
130+
std::memcpy(data, dataOr, nbcopy);
131+
podMap.getSectorRowInfo(sector, row).dataOffsetBytes[is] = rowDataOffs;
132+
rowDataOffs += nbcopy;
133+
data += nPar;
134+
nextDynOffs += nbcopy;
135+
}
136+
}
137+
}
138+
podMap.mTotalSize = alignOffset(nextDynOffs);
139+
if (buffSize != podMap.mTotalSize) {
140+
throw std::runtime_error(fmt::format("Estimated buffer size {} differs from filled one {}", buffSize, podMap.mTotalSize));
141+
}
142+
return &getNonConst(buff);
143+
}
144+
145+
TPCFastTransformPOD* TPCFastTransformPOD::create(char* buff, size_t buffSize, const TPCFastTransform& src)
146+
{
147+
// instantiate objec to already created buffer of the right size
148+
auto podMap = create(buff, buffSize, src.getCorrection());
149+
// set data members of TPCFastTransform
150+
podMap->mVdrift = src.getVDrift();
151+
podMap->mT0 = src.getT0();
152+
// copy fixed size data --- end
153+
return podMap;
154+
}
155+
156+
bool TPCFastTransformPOD::test(const TPCFastSpaceChargeCorrection& origCorr, int npoints) const
157+
{
158+
if (npoints < 1) {
159+
return false;
160+
}
161+
std::vector<unsigned char> sector, row;
162+
std::vector<float> y, z;
163+
std::vector<std::array<float, 3>> corr0, corr1;
164+
std::vector<std::array<float, 2>> corrInv0, corrInv1;
165+
std::vector<float> corrInvX0, corrInvX1;
166+
167+
sector.reserve(npoints);
168+
row.reserve(npoints);
169+
y.reserve(npoints);
170+
z.reserve(npoints);
171+
corr0.resize(npoints);
172+
corr1.resize(npoints);
173+
corrInv0.resize(npoints);
174+
corrInv1.resize(npoints);
175+
corrInvX0.resize(npoints);
176+
corrInvX1.resize(npoints);
177+
178+
for (int i = 0; i < npoints; i++) {
179+
sector.push_back(gRandom->Integer(NSECTORS));
180+
row.push_back(gRandom->Integer(NROWS));
181+
y.push_back(2 * (gRandom->Rndm() - 0.5) * mGeo.getRowInfo(row.back()).getYmax());
182+
z.push_back((sector.back() < NSECTORS / 2 ? 1.f : -1.f) * gRandom->Rndm() * 240);
183+
}
184+
long origStart[3], origEnd[3], thisStart[3], thisEnd[3];
185+
origStart[0] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
186+
for (int i = 0; i < npoints; i++) {
187+
corr0.push_back(origCorr.getCorrectionLocal(sector[i], row[i], y[i], z[i]));
188+
}
189+
190+
origEnd[0] = origStart[1] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
191+
for (int i = 0; i < npoints; i++) {
192+
corrInv0.push_back(origCorr.getCorrectionYZatRealYZ(sector[i], row[i], y[i], z[i]));
193+
}
194+
195+
origEnd[1] = origStart[2] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
196+
for (int i = 0; i < npoints; i++) {
197+
corrInvX0.push_back(origCorr.getCorrectionXatRealYZ(sector[i], row[i], y[i], z[i]));
198+
}
199+
//
200+
origEnd[2] = thisStart[0] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
201+
for (int i = 0; i < npoints; i++) {
202+
corr1.push_back(this->getCorrectionLocal(sector[i], row[i], y[i], z[i]));
203+
}
204+
thisEnd[0] = thisStart[1] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
205+
for (int i = 0; i < npoints; i++) {
206+
corrInv1.push_back(this->getCorrectionYZatRealYZ(sector[i], row[i], y[i], z[i]));
207+
}
208+
209+
thisEnd[1] = thisStart[2] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
210+
for (int i = 0; i < npoints; i++) {
211+
corrInvX1.push_back(this->getCorrectionXatRealYZ(sector[i], row[i], y[i], z[i]));
212+
}
213+
thisEnd[2] = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
214+
//
215+
size_t ndiff[3] = {};
216+
for (int i = 0; i < npoints; i++) {
217+
if (corr0[i][0] != corr1[i][0] || corr0[i][1] != corr1[i][1] || corr0[i][2] != corr1[i][2]) {
218+
ndiff[0]++;
219+
}
220+
if (corrInv0[i][0] != corrInv1[i][0] || corrInv0[i][1] != corrInv1[i][1]) {
221+
ndiff[1]++;
222+
}
223+
if (corrInvX0[i] != corrInvX1[i]) {
224+
ndiff[2]++;
225+
}
226+
}
227+
//
228+
LOGP(info, " (ns per call) original this Nmissmatch");
229+
LOGP(info, "getCorrection {:.3e} {:.3e} {}", double(origEnd[0] - origStart[0]) / npoints * 1000., double(thisEnd[0] - thisStart[0]) / npoints * 1000., ndiff[0]);
230+
LOGP(info, "getCorrectionInvCorrectedX {:.3e} {:.3e} {}", double(origEnd[1] - origStart[1]) / npoints * 1000., double(thisEnd[1] - thisStart[1]) / npoints * 1000., ndiff[1]);
231+
LOGP(info, "getCorrectionInvUV {:.3e} {:.3e} {}", double(origEnd[2] - origStart[2]) / npoints * 1000., double(thisEnd[2] - thisStart[2]) / npoints * 1000., ndiff[2]);
232+
return ndiff[0] == 0 && ndiff[1] == 0 && ndiff[2] == 0;
233+
}
234+
235+
#endif
236+
237+
} // namespace gpu
238+
} // namespace o2

0 commit comments

Comments
 (0)