Skip to content

Commit b5beeee

Browse files
authored
Storage: Fix TableScan performance regression under wide-sparse table (#10379)
close #10361 Storage: Fix TableScan performance regression under wide-sparse table * Use merged_file_info.size as the buffer size when reading data (mark, min-max index, col-data) from merged file to minimize read amplification * Use merged_file_info.size as the buffer size when parsing data as ChecksumReadBuffer to minimize the memory allocation overhead * Introduce class `MinMaxIndexLoader` to tidy codes of reading min-max index Signed-off-by: JaySon-Huang <tshent@qq.com>
1 parent c077d8b commit b5beeee

8 files changed

Lines changed: 172 additions & 124 deletions

File tree

dbms/src/IO/FileProvider/ChecksumReadBufferBuilder.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,11 @@ std::unique_ptr<ReadBufferFromFileBase> ChecksumReadBufferBuilder::build(
5555
std::unique_ptr<ReadBufferFromFileBase> ChecksumReadBufferBuilder::build(
5656
String && data,
5757
const String & file_name,
58-
size_t estimated_size,
5958
ChecksumAlgo checksum_algorithm,
6059
size_t checksum_frame_size)
6160
{
61+
auto allocation_size = std::min(data.size(), checksum_frame_size);
6262
auto file = std::make_shared<MemoryRandomAccessFile>(file_name, std::forward<String>(data));
63-
auto allocation_size = std::min(estimated_size, checksum_frame_size);
6463
switch (checksum_algorithm)
6564
{
6665
case ChecksumAlgo::None:

dbms/src/IO/FileProvider/ChecksumReadBufferBuilder.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ class ChecksumReadBufferBuilder
4040
static std::unique_ptr<ReadBufferFromFileBase> build(
4141
String && data,
4242
const String & file_name,
43-
size_t estimated_size,
4443
ChecksumAlgo checksum_algorithm,
4544
size_t checksum_frame_size);
4645
};

dbms/src/IO/FileProvider/CompressedReadBufferFromFileBuilder.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,11 @@ std::unique_ptr<LegacyCompressedReadBufferFromFile> CompressedReadBufferFromFile
4040
std::unique_ptr<CompressedReadBufferFromFile> CompressedReadBufferFromFileBuilder::build(
4141
String && data,
4242
const String & file_name,
43-
size_t estimated_size,
4443
ChecksumAlgo checksum_algorithm,
4544
size_t checksum_frame_size)
4645
{
47-
auto file_in = ChecksumReadBufferBuilder::build(
48-
std::move(data),
49-
file_name,
50-
estimated_size,
51-
checksum_algorithm,
52-
checksum_frame_size);
46+
auto file_in
47+
= ChecksumReadBufferBuilder::build(std::move(data), file_name, checksum_algorithm, checksum_frame_size);
5348
return std::make_unique<CompressedReadBufferFromFileImpl<false>>(std::move(file_in));
5449
}
5550

dbms/src/IO/FileProvider/CompressedReadBufferFromFileBuilder.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ class CompressedReadBufferFromFileBuilder
3434
static std::unique_ptr<CompressedReadBufferFromFile> build(
3535
String && data,
3636
const String & file_name,
37-
size_t estimated_size,
3837
ChecksumAlgo checksum_algorithm,
3938
size_t checksum_frame_size);
4039

dbms/src/Server/DTTool/DTToolBench.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,7 @@ int benchEntry(const std::vector<std::string> & opts)
418418
opt.emplace(std::map<std::string, std::string>{}, frame, algorithm);
419419
if (version == 2)
420420
{
421+
// frame checksum
421422
DB::STORAGE_FORMAT_CURRENT = DB::STORAGE_FORMAT_V3;
422423
}
423424
else if (version == 3)
@@ -443,21 +444,21 @@ int benchEntry(const std::vector<std::string> & opts)
443444
= genBlocks(random_seed, num_rows, num_cols, field, sparse_ratio, logger);
444445
}
445446

446-
size_t write_cost_ms = 0;
447+
TableID table_id = 1;
447448
auto settings = DB::Settings();
448449
auto db_context = env.getContext();
449450
auto path_pool
450451
= std::make_shared<DB::StoragePathPool>(db_context->getPathPool().withTable("test", "t1", false));
451452
auto storage_pool
452-
= std::make_shared<DB::DM::StoragePool>(*db_context, NullspaceID, /*ns_id*/ 1, *path_pool, "test.t1");
453+
= std::make_shared<DB::DM::StoragePool>(*db_context, NullspaceID, table_id, *path_pool, "test.t1");
453454
auto dm_settings = DB::DM::DeltaMergeStore::Settings{};
454455
auto dm_context = DB::DM::DMContext::createUnique(
455456
*db_context,
456457
path_pool,
457458
storage_pool,
458459
/*min_version_*/ 0,
459460
NullspaceID,
460-
/*physical_table_id*/ 1,
461+
table_id,
461462
/*pk_col_id*/ 0,
462463
false,
463464
1,
@@ -469,6 +470,7 @@ int benchEntry(const std::vector<std::string> & opts)
469470
// Write
470471
if (write_repeat > 0)
471472
{
473+
size_t write_cost_ms = 0;
472474
LOG_INFO(logger, "start writing");
473475
for (size_t i = 0; i < write_repeat; ++i)
474476
{
@@ -489,15 +491,17 @@ int benchEntry(const std::vector<std::string> & opts)
489491
write_cost_ms += duration;
490492
LOG_INFO(logger, "attempt {} finished in {} ms", i, duration);
491493
}
494+
size_t effective_size_on_disk = dmfile->getBytesOnDisk();
492495
LOG_INFO(
493496
logger,
494497
"average write time: {} ms",
495498
(static_cast<double>(write_cost_ms) / static_cast<double>(repeat)));
496499
LOG_INFO(
497500
logger,
498-
"throughput (MB/s): {:.3f}",
499-
(static_cast<double>(effective_size) * 1'000 * static_cast<double>(repeat)
500-
/ static_cast<double>(write_cost_ms) / 1024 / 1024));
501+
"write throughput by uncompressed size: {:.3f}MiB/s;"
502+
" write throughput by compressed size: {:.3f}MiB/s",
503+
(effective_size * 1'000.0 * repeat / write_cost_ms / 1024 / 1024),
504+
(effective_size_on_disk * 1'000.0 * repeat / write_cost_ms / 1024 / 1024));
501505
}
502506

503507
// Read
@@ -548,12 +552,10 @@ int benchEntry(const std::vector<std::string> & opts)
548552
LOG_INFO(logger, "average read time: {} ms", (static_cast<double>(read_cost_ms) / static_cast<double>(repeat)));
549553
LOG_INFO(
550554
logger,
551-
"throughput by deserialized bytes (MB/s): {:.3f}"
552-
" throughput by compressed bytes (MB/s): {:.3f}",
553-
(static_cast<double>(effective_size_read) * 1'000 * static_cast<double>(repeat)
554-
/ static_cast<double>(read_cost_ms) / 1024 / 1024),
555-
(static_cast<double>(effective_size_on_disk) * 1'000 * static_cast<double>(repeat)
556-
/ static_cast<double>(read_cost_ms) / 1024 / 1024));
555+
"read throughput by uncompressed bytes: {:.3f}MiB/s;"
556+
" read throughput by compressed bytes: {:.3f}MiB/s",
557+
(effective_size_read * 1'000.0 * repeat / read_cost_ms / 1024 / 1024),
558+
(effective_size_on_disk * 1'000.0 * repeat / read_cost_ms / 1024 / 1024));
557559
}
558560
catch (const boost::wrapexcept<boost::bad_any_cast> & e)
559561
{

dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -106,34 +106,33 @@ class MarkLoader
106106
col_mark_fname);
107107

108108
const auto & merged_file_info = info_iter->second;
109-
auto file_path = dmfile_meta->mergedPath(merged_file_info.number);
110-
auto encrypt_path = dmfile_meta->encryptionMergedPath(merged_file_info.number);
111-
auto offset = merged_file_info.offset;
112-
auto data_size = merged_file_info.size;
109+
const auto file_path = dmfile_meta->mergedPath(merged_file_info.number);
110+
const auto offset = merged_file_info.offset;
111+
const auto data_size = merged_file_info.size;
113112

114113
if (data_size == 0)
115114
return res;
116115

117116
// First, read from merged file to get the raw data(contains the header)
117+
// Note that we use min(`data_size`, checksum_frame_size) as the size of buffer size in order
118+
// to minimize read amplification in the merged file.
118119
auto buffer = ReadBufferFromRandomAccessFileBuilder::build(
119120
reader.file_provider,
120121
file_path,
121-
encrypt_path,
122-
reader.dmfile->getConfiguration()->getChecksumFrameLength(),
122+
dmfile_meta->encryptionMergedPath(merged_file_info.number),
123+
std::min(data_size, reader.dmfile->getConfiguration()->getChecksumFrameLength()),
123124
read_limiter);
124125
buffer.seek(offset);
125126

126127
// Read the raw data into memory. It is OK because the mark merged into
127128
// merged_file is small enough.
128-
String raw_data;
129-
raw_data.resize(data_size);
129+
String raw_data(data_size, '\0');
130130
buffer.read(reinterpret_cast<char *>(raw_data.data()), data_size);
131131

132-
// Then read from the buffer based on the raw data
132+
// Then read from the buffer based on the raw data. The buffer size is min(data.size(), checksum_frame_size)
133133
auto buf = ChecksumReadBufferBuilder::build(
134134
std::move(raw_data),
135135
file_path, // just for debug, the buffer is part of the merged file
136-
reader.dmfile->getConfiguration()->getChecksumFrameLength(),
137136
reader.dmfile->getConfiguration()->getChecksumAlgorithm(),
138137
reader.dmfile->getConfiguration()->getChecksumFrameLength());
139138
buf->readBig(reinterpret_cast<char *>(res->data()), bytes_size);
@@ -234,9 +233,10 @@ std::unique_ptr<CompressedSeekableReaderBuffer> ColumnReadStream::buildColDataRe
234233
{
235234
const auto * dmfile_meta = typeid_cast<const DMFileMetaV2 *>(reader.dmfile->meta.get());
236235
assert(dmfile_meta != nullptr);
237-
const auto & info = dmfile_meta->merged_sub_file_infos.find(colDataFileName(file_name_base));
238-
if (info == dmfile_meta->merged_sub_file_infos.end())
236+
const auto & info_iter = dmfile_meta->merged_sub_file_infos.find(colDataFileName(file_name_base));
237+
if (info_iter == dmfile_meta->merged_sub_file_infos.end())
239238
{
239+
// Not merged into merged file, read from the original data file.
240240
return CompressedReadBufferFromFileBuilder::build(
241241
reader.file_provider,
242242
reader.dmfile->colDataPath(file_name_base),
@@ -247,32 +247,31 @@ std::unique_ptr<CompressedSeekableReaderBuffer> ColumnReadStream::buildColDataRe
247247
reader.dmfile->getConfiguration()->getChecksumFrameLength());
248248
}
249249

250-
assert(info != dmfile_meta->merged_sub_file_infos.end());
251-
auto file_path = dmfile_meta->mergedPath(info->second.number);
252-
auto encrypt_path = dmfile_meta->encryptionMergedPath(info->second.number);
253-
auto offset = info->second.offset;
254-
auto size = info->second.size;
250+
assert(info_iter != dmfile_meta->merged_sub_file_infos.end());
251+
auto file_path = dmfile_meta->mergedPath(info_iter->second.number);
252+
const auto offset = info_iter->second.offset;
253+
const auto data_size = info_iter->second.size;
255254

256255
// First, read from merged file to get the raw data(contains the header)
256+
// Note that we use min(`data_size`, checksum_frame_size) as the size of buffer size in order
257+
// to minimize read amplification in the merged file.
257258
auto buffer = ReadBufferFromRandomAccessFileBuilder::build(
258259
reader.file_provider,
259260
file_path,
260-
encrypt_path,
261-
reader.dmfile->getConfiguration()->getChecksumFrameLength(),
261+
dmfile_meta->encryptionMergedPath(info_iter->second.number),
262+
std::min(data_size, reader.dmfile->getConfiguration()->getChecksumFrameLength()),
262263
read_limiter);
263264
buffer.seek(offset);
264265

265266
// Read the raw data into memory. It is OK because the mark merged into
266267
// merged_file is small enough.
267-
String raw_data;
268-
raw_data.resize(size);
269-
buffer.read(reinterpret_cast<char *>(raw_data.data()), size);
268+
String raw_data(data_size, '\0');
269+
buffer.read(reinterpret_cast<char *>(raw_data.data()), data_size);
270270

271-
// Then read from the buffer based on the raw data
271+
// Then read from the buffer based on the raw data. The buffer size is min(data.size(), checksum_frame_size)
272272
return CompressedReadBufferFromFileBuilder::build(
273273
std::move(raw_data),
274274
file_path,
275-
reader.dmfile->getConfiguration()->getChecksumFrameLength(),
276275
reader.dmfile->getConfiguration()->getChecksumAlgorithm(),
277276
reader.dmfile->getConfiguration()->getChecksumFrameLength());
278277
}

dbms/src/Storages/DeltaMerge/File/DMFile.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ class DMFile : private boost::noncopyable
358358
friend class DMFileLocalIndexWriter;
359359
friend class DMFileReader;
360360
friend class MarkLoader;
361+
friend class MinMaxIndexLoader;
361362
friend class ColumnReadStream;
362363
friend class DMFilePackFilter;
363364
friend class DMFileBlockInputStreamBuilder;

0 commit comments

Comments
 (0)