Skip to content

Commit ddec5e3

Browse files
authored
aodMerger: Special option to merge only folders of the same name (#15205)
Adding a special `merge-by-name` option which asks the merger only to merge together dataframe folders of the same name. This is needed only in special situations where we want to enfore the output structure of the AOD to be the same as that of a reference (data) AOD. An example is MC-DATA embedding.
1 parent 5a5010a commit ddec5e3

File tree

1 file changed

+41
-26
lines changed

1 file changed

+41
-26
lines changed

Framework/AODMerger/src/aodMerger.cxx

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ int main(int argc, char* argv[])
3838
long maxDirSize = 100000000;
3939
bool skipNonExistingFiles = false;
4040
bool skipParentFilesList = false;
41+
bool mergeByName = false;
4142
int verbosity = 2;
4243
int exitCode = 0; // 0: success, >0: failure
4344
int compression = 505;
@@ -50,6 +51,7 @@ int main(int argc, char* argv[])
5051
{"skip-non-existing-files", no_argument, nullptr, 3},
5152
{"skip-parent-files-list", no_argument, nullptr, 4},
5253
{"compression", required_argument, nullptr, 5},
54+
{"merge-by-name", no_argument, nullptr, 6},
5355
{"verbosity", required_argument, nullptr, 'v'},
5456
{"help", no_argument, nullptr, 'h'},
5557
{nullptr, 0, nullptr, 0}};
@@ -70,6 +72,8 @@ int main(int argc, char* argv[])
7072
skipParentFilesList = true;
7173
} else if (c == 5) {
7274
compression = atoi(optarg);
75+
} else if (c == 6) {
76+
mergeByName = true;
7377
} else if (c == 'v') {
7478
verbosity = atoi(optarg);
7579
} else if (c == 'h') {
@@ -80,6 +84,7 @@ int main(int argc, char* argv[])
8084
printf(" --skip-non-existing-files Flag to allow skipping of non-existing files in the input list.\n");
8185
printf(" --skip-parent-files-list Flag to allow skipping the merging of the parent files list.\n");
8286
printf(" --compression <root compression id> Compression algorithm / level to use (default: %d)\n", compression);
87+
printf(" --merge-by-name Only merge TTrees from folders with the same name.\n");
8388
printf(" --verbosity <flag> Verbosity of output (default: %d).\n", verbosity);
8489
return -1;
8590
} else {
@@ -94,6 +99,9 @@ int main(int argc, char* argv[])
9499
if (skipNonExistingFiles) {
95100
printf(" WARNING: Skipping non-existing files.\n");
96101
}
102+
if (mergeByName) {
103+
printf(" Merging only folders with the same name.\n");
104+
}
97105

98106
std::map<std::string, TTree*> trees;
99107
std::map<std::string, uint64_t> sizeCompressed;
@@ -112,6 +120,28 @@ int main(int argc, char* argv[])
112120
TMap* parentFiles = nullptr;
113121
int totalMergedDFs = 0;
114122
int mergedDFs = 0;
123+
124+
// Write all accumulated trees to outputDir, update stats, and clean up state.
125+
auto flushTrees = [&](bool resetState) {
126+
if (!outputDir) {
127+
return;
128+
}
129+
for (auto const& tree : trees) {
130+
outputDir->cd();
131+
tree.second->Write();
132+
sizeCompressed[tree.first] += tree.second->GetZipBytes();
133+
sizeUncompressed[tree.first] += tree.second->GetTotBytes();
134+
delete tree.second;
135+
}
136+
if (resetState) {
137+
outputDir = nullptr;
138+
trees.clear();
139+
offsets.clear();
140+
mergedDFs = 0;
141+
currentDirSize = 0;
142+
}
143+
};
144+
115145
while (in.good() && exitCode == 0) {
116146
in >> line;
117147

@@ -182,6 +212,14 @@ int main(int argc, char* argv[])
182212

183213
auto dfName = ((TObjString*)key1)->GetString().Data();
184214

215+
// If merge-by-name is active, flush accumulated trees when the folder name changes
216+
if (mergeByName && outputDir && std::string(outputDir->GetName()) != std::string(dfName)) {
217+
if (verbosity > 0) {
218+
printf("Folder name changed: closing folder %s.\n", outputDir->GetName());
219+
}
220+
flushTrees(true);
221+
}
222+
185223
if (verbosity > 0) {
186224
printf(" Processing folder %s\n", dfName);
187225
}
@@ -396,21 +434,7 @@ int main(int argc, char* argv[])
396434
if (verbosity > 0) {
397435
printf("Maximum size reached: %ld. Closing folder %s.\n", currentDirSize, dfName);
398436
}
399-
for (auto const& tree : trees) {
400-
// printf("Writing %s\n", tree.first.c_str());
401-
outputDir->cd();
402-
tree.second->Write();
403-
404-
// stats
405-
sizeCompressed[tree.first] += tree.second->GetZipBytes();
406-
sizeUncompressed[tree.first] += tree.second->GetTotBytes();
407-
408-
delete tree.second;
409-
}
410-
outputDir = nullptr;
411-
trees.clear();
412-
offsets.clear();
413-
mergedDFs = 0;
437+
flushTrees(true);
414438
}
415439
}
416440
inputFile->Close();
@@ -421,16 +445,7 @@ int main(int argc, char* argv[])
421445
parentFiles->Write("parentFiles", TObject::kSingleKey);
422446
}
423447

424-
for (auto const& tree : trees) {
425-
outputDir->cd();
426-
tree.second->Write();
427-
428-
// stats
429-
sizeCompressed[tree.first] += tree.second->GetZipBytes();
430-
sizeUncompressed[tree.first] += tree.second->GetTotBytes();
431-
432-
delete tree.second;
433-
}
448+
flushTrees(false);
434449

435450
outputFile->Write();
436451
outputFile->Close();
@@ -462,4 +477,4 @@ int main(int argc, char* argv[])
462477
printf("\n");
463478

464479
return exitCode;
465-
}
480+
}

0 commit comments

Comments
 (0)