From da281d554f6679293af4cee40e13aa65ebaa2e90 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Thu, 19 Mar 2026 17:44:49 +0800 Subject: [PATCH 1/3] feat: add roaring-based position bitmap --- src/iceberg/CMakeLists.txt | 2 + src/iceberg/deletes/CMakeLists.txt | 18 + src/iceberg/deletes/meson.build | 18 + .../deletes/roaring_position_bitmap.cc | 254 +++++++++ src/iceberg/deletes/roaring_position_bitmap.h | 109 ++++ src/iceberg/meson.build | 2 + src/iceberg/test/CMakeLists.txt | 2 + src/iceberg/test/resources/64map32bitvals.bin | Bin 0 -> 48 bytes src/iceberg/test/resources/64mapempty.bin | Bin 0 -> 8 bytes src/iceberg/test/resources/64maphighvals.bin | Bin 0 -> 1086 bytes .../test/resources/64mapspreadvals.bin | Bin 0 -> 408 bytes .../test/roaring_position_bitmap_test.cc | 490 ++++++++++++++++++ 12 files changed, 895 insertions(+) create mode 100644 src/iceberg/deletes/CMakeLists.txt create mode 100644 src/iceberg/deletes/meson.build create mode 100644 src/iceberg/deletes/roaring_position_bitmap.cc create mode 100644 src/iceberg/deletes/roaring_position_bitmap.h create mode 100644 src/iceberg/test/resources/64map32bitvals.bin create mode 100644 src/iceberg/test/resources/64mapempty.bin create mode 100644 src/iceberg/test/resources/64maphighvals.bin create mode 100644 src/iceberg/test/resources/64mapspreadvals.bin create mode 100644 src/iceberg/test/roaring_position_bitmap_test.cc diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index b503a41ea..2f34fd038 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -25,6 +25,7 @@ set(ICEBERG_SOURCES data/position_delete_writer.cc data/writer.cc delete_file_index.cc + deletes/roaring_position_bitmap.cc expression/aggregate.cc expression/binder.cc expression/evaluator.cc @@ -166,6 +167,7 @@ iceberg_install_all_headers(iceberg) add_subdirectory(catalog) add_subdirectory(data) +add_subdirectory(deletes) add_subdirectory(expression) add_subdirectory(manifest) add_subdirectory(puffin) diff --git a/src/iceberg/deletes/CMakeLists.txt b/src/iceberg/deletes/CMakeLists.txt new file mode 100644 index 000000000..2ce7ccf15 --- /dev/null +++ b/src/iceberg/deletes/CMakeLists.txt @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +iceberg_install_all_headers(iceberg/deletes) diff --git a/src/iceberg/deletes/meson.build b/src/iceberg/deletes/meson.build new file mode 100644 index 000000000..28a01de16 --- /dev/null +++ b/src/iceberg/deletes/meson.build @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +install_headers(['roaring_position_bitmap.h'], subdir: 'iceberg/deletes') diff --git a/src/iceberg/deletes/roaring_position_bitmap.cc b/src/iceberg/deletes/roaring_position_bitmap.cc new file mode 100644 index 000000000..2d812ef21 --- /dev/null +++ b/src/iceberg/deletes/roaring_position_bitmap.cc @@ -0,0 +1,254 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/deletes/roaring_position_bitmap.h" + +#include +#include +#include +#include +#include + +#include + +#include "iceberg/util/endian.h" +#include "iceberg/util/macros.h" + +namespace iceberg { + +namespace { + +constexpr size_t kBitmapCountSizeBytes = 8; +constexpr size_t kBitmapKeySizeBytes = 4; + +// Extracts high 32 bits from a 64-bit position (the key). +int32_t Key(int64_t pos) { return static_cast(pos >> 32); } + +// Extracts low 32 bits from a 64-bit position. +uint32_t Pos32Bits(int64_t pos) { return static_cast(pos); } + +// Combines key (high 32 bits) and pos32 (low 32 bits) into a 64-bit +// position. The low 32 bits are zero-extended to avoid sign extension. +int64_t ToPosition(int32_t key, uint32_t pos32) { + return (static_cast(key) << 32) | static_cast(pos32); +} + +void WriteLE64(char* buf, int64_t value) { + auto le = ToLittleEndian(static_cast(value)); + std::memcpy(buf, &le, sizeof(le)); +} + +void WriteLE32(char* buf, int32_t value) { + auto le = ToLittleEndian(static_cast(value)); + std::memcpy(buf, &le, sizeof(le)); +} + +int64_t ReadLE64(const char* buf) { + uint64_t v; + std::memcpy(&v, buf, sizeof(v)); + return static_cast(FromLittleEndian(v)); +} + +int32_t ReadLE32(const char* buf) { + uint32_t v; + std::memcpy(&v, buf, sizeof(v)); + return static_cast(FromLittleEndian(v)); +} + +Status ValidatePosition(int64_t pos) { + if (pos < 0 || pos > RoaringPositionBitmap::kMaxPosition) { + return InvalidArgument("Bitmap supports positions that are >= 0 and <= {}: {}", + RoaringPositionBitmap::kMaxPosition, pos); + } + return {}; +} + +} // namespace + +struct RoaringPositionBitmap::Impl { + std::vector bitmaps; + + void AllocateBitmapsIfNeeded(int32_t required_length) { + if (std::cmp_less(bitmaps.size(), required_length)) { + bitmaps.resize(static_cast(required_length)); + } + } +}; + +RoaringPositionBitmap::RoaringPositionBitmap() : impl_(std::make_unique()) {} + +RoaringPositionBitmap::~RoaringPositionBitmap() = default; + +RoaringPositionBitmap::RoaringPositionBitmap(RoaringPositionBitmap&&) noexcept = default; + +RoaringPositionBitmap& RoaringPositionBitmap::operator=( + RoaringPositionBitmap&&) noexcept = default; + +RoaringPositionBitmap::RoaringPositionBitmap(std::unique_ptr impl) + : impl_(std::move(impl)) {} + +Status RoaringPositionBitmap::Add(int64_t pos) { + ICEBERG_RETURN_UNEXPECTED(ValidatePosition(pos)); + int32_t key = Key(pos); + uint32_t pos32 = Pos32Bits(pos); + impl_->AllocateBitmapsIfNeeded(key + 1); + impl_->bitmaps[key].add(pos32); + return {}; +} + +Status RoaringPositionBitmap::AddRange(int64_t pos_start, int64_t pos_end) { + for (int64_t pos = pos_start; pos < pos_end; ++pos) { + ICEBERG_RETURN_UNEXPECTED(Add(pos)); + } + return {}; +} + +Result RoaringPositionBitmap::Contains(int64_t pos) const { + ICEBERG_RETURN_UNEXPECTED(ValidatePosition(pos)); + int32_t key = Key(pos); + uint32_t pos32 = Pos32Bits(pos); + return std::cmp_less(key, impl_->bitmaps.size()) && impl_->bitmaps[key].contains(pos32); +} + +bool RoaringPositionBitmap::IsEmpty() const { return Cardinality() == 0; } + +size_t RoaringPositionBitmap::Cardinality() const { + size_t total = 0; + for (const auto& bitmap : impl_->bitmaps) { + total += bitmap.cardinality(); + } + return total; +} + +void RoaringPositionBitmap::Or(const RoaringPositionBitmap& other) { + impl_->AllocateBitmapsIfNeeded(static_cast(other.impl_->bitmaps.size())); + for (size_t key = 0; key < other.impl_->bitmaps.size(); ++key) { + impl_->bitmaps[key] |= other.impl_->bitmaps[key]; + } +} + +bool RoaringPositionBitmap::Optimize() { + bool changed = false; + for (auto& bitmap : impl_->bitmaps) { + changed |= bitmap.runOptimize(); + } + return changed; +} + +void RoaringPositionBitmap::ForEach(const std::function& fn) const { + for (size_t key = 0; key < impl_->bitmaps.size(); ++key) { + for (uint32_t pos32 : impl_->bitmaps[key]) { + fn(ToPosition(static_cast(key), pos32)); + } + } +} + +size_t RoaringPositionBitmap::SerializedSizeInBytes() const { + size_t size = kBitmapCountSizeBytes; + for (const auto& bitmap : impl_->bitmaps) { + size += kBitmapKeySizeBytes + bitmap.getSizeInBytes(/*portable=*/true); + } + return size; +} + +Result RoaringPositionBitmap::Serialize() const { + size_t size = SerializedSizeInBytes(); + std::string result(size, '\0'); + char* buf = result.data(); + + // Write bitmap count (array length including empties) + WriteLE64(buf, static_cast(impl_->bitmaps.size())); + buf += kBitmapCountSizeBytes; + + // Write each bitmap with its key + for (int32_t key = 0; key < static_cast(impl_->bitmaps.size()); ++key) { + WriteLE32(buf, key); + buf += kBitmapKeySizeBytes; + size_t written = impl_->bitmaps[key].write(buf, /*portable=*/true); + buf += written; + } + + return result; +} + +Result RoaringPositionBitmap::Deserialize(std::string_view bytes) { + const char* buf = bytes.data(); + size_t remaining = bytes.size(); + + ICEBERG_PRECHECK(remaining >= kBitmapCountSizeBytes, + "Buffer too small for bitmap count: {} bytes", remaining); + + int64_t bitmap_count = ReadLE64(buf); + buf += kBitmapCountSizeBytes; + remaining -= kBitmapCountSizeBytes; + + ICEBERG_PRECHECK( + bitmap_count >= 0 && bitmap_count <= std::numeric_limits::max(), + "Invalid bitmap count: {}", bitmap_count); + + auto impl = std::make_unique(); + int32_t last_key = -1; + auto remaining_count = static_cast(bitmap_count); + + while (remaining_count > 0) { + ICEBERG_PRECHECK(remaining >= kBitmapKeySizeBytes, + "Buffer too small for bitmap key: {} bytes", remaining); + + int32_t key = ReadLE32(buf); + buf += kBitmapKeySizeBytes; + remaining -= kBitmapKeySizeBytes; + + ICEBERG_PRECHECK(key >= 0, "Invalid unsigned key: {}", key); + ICEBERG_PRECHECK(key < std::numeric_limits::max(), "Key is too large: {}", + key); + ICEBERG_PRECHECK(key > last_key, + "Keys must be sorted in ascending order, got key {} after {}", key, + last_key); + + // Fill gaps with empty bitmaps + while (last_key < key - 1) { + impl->bitmaps.emplace_back(); + ++last_key; + } + + // Read bitmap using portable safe deserialization. + // CRoaring's readSafe may throw on corrupted data. + roaring::Roaring bitmap; + try { + bitmap = roaring::Roaring::readSafe(buf, remaining); + } catch (const std::exception& e) { + return InvalidArgument("Failed to deserialize bitmap at key {}: {}", key, e.what()); + } + size_t bitmap_size = bitmap.getSizeInBytes(/*portable=*/true); + ICEBERG_PRECHECK( + bitmap_size <= remaining, + "Buffer too small for bitmap key {}: {} bytes needed, {} bytes available", key, + bitmap_size, remaining); + buf += bitmap_size; + remaining -= bitmap_size; + + impl->bitmaps.push_back(std::move(bitmap)); + last_key = key; + --remaining_count; + } + + return RoaringPositionBitmap(std::move(impl)); +} + +} // namespace iceberg diff --git a/src/iceberg/deletes/roaring_position_bitmap.h b/src/iceberg/deletes/roaring_position_bitmap.h new file mode 100644 index 000000000..df197740b --- /dev/null +++ b/src/iceberg/deletes/roaring_position_bitmap.h @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/deletes/roaring_position_bitmap.h +/// A 64-bit position bitmap using an array of 32-bit Roaring bitmaps. + +#include +#include +#include +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" + +namespace iceberg { + +/// \brief A bitmap that supports positive 64-bit positions, optimized +/// for cases where most positions fit in 32 bits. +/// +/// Incoming 64-bit positions are divided into a 32-bit "key" using the +/// most significant 4 bytes and a 32-bit position using the least +/// significant 4 bytes. For each key, a 32-bit Roaring bitmap is +/// maintained to store positions for that key. +/// +/// \note The Puffin deletion-vector-v1 wrapping (length prefix, magic +/// bytes, CRC-32) is handled by the Puffin writer/reader layer, not +/// this class. +class ICEBERG_EXPORT RoaringPositionBitmap { + public: + /// \brief Maximum supported position. + static constexpr int64_t kMaxPosition = 0x7FFFFFFE80000000LL; + + RoaringPositionBitmap(); + ~RoaringPositionBitmap(); + + RoaringPositionBitmap(RoaringPositionBitmap&& other) noexcept; + RoaringPositionBitmap& operator=(RoaringPositionBitmap&& other) noexcept; + + RoaringPositionBitmap(const RoaringPositionBitmap&) = delete; + RoaringPositionBitmap& operator=(const RoaringPositionBitmap&) = delete; + + /// \brief Sets a position in the bitmap. + /// \param pos the position (must be >= 0 and <= kMaxPosition) + /// \return Status indicating success or InvalidArgument error + [[nodiscard]] Status Add(int64_t pos); + + /// \brief Sets a range of positions [pos_start, pos_end). + /// \return Status indicating success or InvalidArgument error + [[nodiscard]] Status AddRange(int64_t pos_start, int64_t pos_end); + + /// \brief Checks if a position is set in the bitmap. + /// \return Result or InvalidArgument error + [[nodiscard]] Result Contains(int64_t pos) const; + + /// \brief Returns true if the bitmap has no positions set. + bool IsEmpty() const; + + /// \brief Returns the number of set positions in the bitmap. + size_t Cardinality() const; + + /// \brief Merges all positions from the other bitmap into this one + /// (in-place union). + void Or(const RoaringPositionBitmap& other); + + /// \brief Optimizes the bitmap by applying run-length encoding to + /// containers where it is more space efficient than array or bitset + /// representations. + /// \return true if any container was changed + bool Optimize(); + + /// \brief Iterates over all set positions in ascending order. + void ForEach(const std::function& fn) const; + + /// \brief Returns the serialized size in bytes. + size_t SerializedSizeInBytes() const; + + /// \brief Serializes using the portable format (little-endian). + Result Serialize() const; + + /// \brief Deserializes a bitmap from bytes. + static Result Deserialize(std::string_view bytes); + + private: + struct Impl; + std::unique_ptr impl_; + + explicit RoaringPositionBitmap(std::unique_ptr impl); +}; + +} // namespace iceberg diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index 2cf1065b0..42f46d5a9 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -43,6 +43,7 @@ iceberg_sources = files( 'arrow_c_data_guard_internal.cc', 'catalog/memory/in_memory_catalog.cc', 'delete_file_index.cc', + 'deletes/roaring_position_bitmap.cc', 'expression/aggregate.cc', 'expression/binder.cc', 'expression/evaluator.cc', @@ -221,6 +222,7 @@ install_headers( ) subdir('catalog') +subdir('deletes') subdir('expression') subdir('manifest') subdir('puffin') diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 768e0507e..995579b20 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -124,6 +124,8 @@ add_iceberg_test(util_test add_iceberg_test(roaring_test SOURCES roaring_test.cc) +add_iceberg_test(roaring_position_bitmap_test SOURCES roaring_position_bitmap_test.cc) + if(ICEBERG_BUILD_BUNDLE) add_iceberg_test(avro_test USE_BUNDLE diff --git a/src/iceberg/test/resources/64map32bitvals.bin b/src/iceberg/test/resources/64map32bitvals.bin new file mode 100644 index 0000000000000000000000000000000000000000..475b894417e44cff61d8810057fc1530cef05718 GIT binary patch literal 48 ocmZQ%KmaQP1_nkjmy9 literal 0 HcmV?d00001 diff --git a/src/iceberg/test/resources/64maphighvals.bin b/src/iceberg/test/resources/64maphighvals.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4312b8d22713991026a36d5d1293cf1960d89ed GIT binary patch literal 1086 zcmd;PfPnY=_rj5t0RsagP#7Y>#UKD@!S*SERgUMnOxf@r{zi~~v PF5QrBO1Grj(uH&%!J7vn literal 0 HcmV?d00001 diff --git a/src/iceberg/test/roaring_position_bitmap_test.cc b/src/iceberg/test/roaring_position_bitmap_test.cc new file mode 100644 index 000000000..e3428784e --- /dev/null +++ b/src/iceberg/test/roaring_position_bitmap_test.cc @@ -0,0 +1,490 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/deletes/roaring_position_bitmap.h" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "iceberg/test/matchers.h" +#include "iceberg/test/test_config.h" + +namespace iceberg { + +namespace { + +constexpr int64_t kBitmapSize = 0xFFFFFFFFL; +constexpr int64_t kBitmapOffset = kBitmapSize + 1L; +constexpr int64_t kContainerSize = 0xFFFF; // Character.MAX_VALUE +constexpr int64_t kContainerOffset = kContainerSize + 1L; + +// Helper to construct a position from bitmap index, container index, and value +int64_t Position(int32_t bitmap_index, int32_t container_index, int64_t value) { + return bitmap_index * kBitmapOffset + container_index * kContainerOffset + value; +} + +std::string ReadTestResource(const std::string& filename) { + std::filesystem::path path = std::filesystem::path(ICEBERG_TEST_RESOURCES) / filename; + std::ifstream file(path, std::ios::binary); + EXPECT_TRUE(file.good()) << "Cannot open: " << path; + return {std::istreambuf_iterator(file), std::istreambuf_iterator()}; +} + +RoaringPositionBitmap RoundTripSerialize(const RoaringPositionBitmap& bitmap) { + auto serialized = bitmap.Serialize(); + EXPECT_THAT(serialized, IsOk()); + auto deserialized = RoaringPositionBitmap::Deserialize(serialized.value()); + EXPECT_THAT(deserialized, IsOk()); + return std::move(deserialized.value()); +} + +void AssertEqualContent(const RoaringPositionBitmap& bitmap, + const std::set& positions) { + ASSERT_EQ(bitmap.Cardinality(), positions.size()); + for (int64_t pos : positions) { + auto result = bitmap.Contains(pos); + ASSERT_THAT(result, IsOk()) << "Error for pos: " << pos; + ASSERT_TRUE(result.value()) << "Missing position: " << pos; + } + bitmap.ForEach([&](int64_t pos) { + ASSERT_TRUE(positions.count(pos) > 0) << "Unexpected position: " << pos; + }); +} + +void AssertEqual(RoaringPositionBitmap& bitmap, const std::set& positions) { + AssertEqualContent(bitmap, positions); + + auto copy1 = RoundTripSerialize(bitmap); + AssertEqualContent(copy1, positions); + + bitmap.Optimize(); + auto copy2 = RoundTripSerialize(bitmap); + AssertEqualContent(copy2, positions); +} + +} // namespace + +TEST(RoaringPositionBitmapTest, TestAdd) { + RoaringPositionBitmap bitmap; + + EXPECT_THAT(bitmap.Add(10L), IsOk()); + EXPECT_TRUE(bitmap.Contains(10L).value()); + + EXPECT_THAT(bitmap.Add(0L), IsOk()); + EXPECT_TRUE(bitmap.Contains(0L).value()); + + EXPECT_THAT(bitmap.Add(10L), IsOk()); // duplicate + EXPECT_TRUE(bitmap.Contains(10L).value()); +} + +TEST(RoaringPositionBitmapTest, TestAddPositionsRequiringMultipleBitmaps) { + RoaringPositionBitmap bitmap; + + int64_t pos1 = (static_cast(0) << 32) | 10L; + int64_t pos2 = (static_cast(1) << 32) | 20L; + int64_t pos3 = (static_cast(2) << 32) | 30L; + int64_t pos4 = (static_cast(100) << 32) | 40L; + + EXPECT_THAT(bitmap.Add(pos1), IsOk()); + EXPECT_THAT(bitmap.Add(pos2), IsOk()); + EXPECT_THAT(bitmap.Add(pos3), IsOk()); + EXPECT_THAT(bitmap.Add(pos4), IsOk()); + + EXPECT_TRUE(bitmap.Contains(pos1).value()); + EXPECT_TRUE(bitmap.Contains(pos2).value()); + EXPECT_TRUE(bitmap.Contains(pos3).value()); + EXPECT_TRUE(bitmap.Contains(pos4).value()); + EXPECT_EQ(bitmap.Cardinality(), 4); + EXPECT_GT(bitmap.SerializedSizeInBytes(), 4); +} + +TEST(RoaringPositionBitmapTest, TestAddRange) { + RoaringPositionBitmap bitmap; + + int64_t start = 10; + int64_t end = 20; + EXPECT_THAT(bitmap.AddRange(start, end), IsOk()); + + for (int64_t pos = start; pos < end; ++pos) { + EXPECT_TRUE(bitmap.Contains(pos).value()); + } + EXPECT_FALSE(bitmap.Contains(9).value()); + EXPECT_FALSE(bitmap.Contains(20).value()); + EXPECT_EQ(bitmap.Cardinality(), 10); +} + +TEST(RoaringPositionBitmapTest, TestAddRangeAcrossKeys) { + RoaringPositionBitmap bitmap; + + int64_t start = (static_cast(1) << 32) - 5; + int64_t end = (static_cast(1) << 32) + 5; + EXPECT_THAT(bitmap.AddRange(start, end), IsOk()); + + for (int64_t pos = start; pos < end; ++pos) { + EXPECT_TRUE(bitmap.Contains(pos).value()); + } + EXPECT_FALSE(bitmap.Contains(0).value()); + EXPECT_FALSE(bitmap.Contains(end).value()); + EXPECT_EQ(bitmap.Cardinality(), 10); +} + +TEST(RoaringPositionBitmapTest, TestAddEmptyRange) { + RoaringPositionBitmap bitmap; + EXPECT_THAT(bitmap.AddRange(10, 10), IsOk()); + EXPECT_TRUE(bitmap.IsEmpty()); +} + +TEST(RoaringPositionBitmapTest, TestOr) { + RoaringPositionBitmap bitmap1; + EXPECT_THAT(bitmap1.Add(10L), IsOk()); + EXPECT_THAT(bitmap1.Add(20L), IsOk()); + + RoaringPositionBitmap bitmap2; + EXPECT_THAT(bitmap2.Add(30L), IsOk()); + EXPECT_THAT(bitmap2.Add(40L), IsOk()); + EXPECT_THAT(bitmap2.Add(static_cast(2) << 32), IsOk()); + + bitmap1.Or(bitmap2); + + EXPECT_TRUE(bitmap1.Contains(10L).value()); + EXPECT_TRUE(bitmap1.Contains(20L).value()); + EXPECT_TRUE(bitmap1.Contains(30L).value()); + EXPECT_TRUE(bitmap1.Contains(40L).value()); + EXPECT_TRUE(bitmap1.Contains(static_cast(2) << 32).value()); + EXPECT_EQ(bitmap1.Cardinality(), 5); + + // bitmap2 should be unchanged + EXPECT_FALSE(bitmap2.Contains(10L).value()); + EXPECT_FALSE(bitmap2.Contains(20L).value()); + EXPECT_EQ(bitmap2.Cardinality(), 3); +} + +TEST(RoaringPositionBitmapTest, TestOrWithEmptyBitmap) { + RoaringPositionBitmap bitmap1; + EXPECT_THAT(bitmap1.Add(10L), IsOk()); + EXPECT_THAT(bitmap1.Add(20L), IsOk()); + + RoaringPositionBitmap empty_bitmap; + bitmap1.Or(empty_bitmap); + + EXPECT_TRUE(bitmap1.Contains(10L).value()); + EXPECT_TRUE(bitmap1.Contains(20L).value()); + EXPECT_EQ(bitmap1.Cardinality(), 2); + + EXPECT_FALSE(empty_bitmap.Contains(10L).value()); + EXPECT_FALSE(empty_bitmap.Contains(20L).value()); + EXPECT_EQ(empty_bitmap.Cardinality(), 0); + EXPECT_TRUE(empty_bitmap.IsEmpty()); +} + +TEST(RoaringPositionBitmapTest, TestOrWithOverlapping) { + RoaringPositionBitmap bitmap1; + EXPECT_THAT(bitmap1.Add(10L), IsOk()); + EXPECT_THAT(bitmap1.Add(20L), IsOk()); + EXPECT_THAT(bitmap1.Add(30L), IsOk()); + + RoaringPositionBitmap bitmap2; + EXPECT_THAT(bitmap2.Add(20L), IsOk()); + EXPECT_THAT(bitmap2.Add(40L), IsOk()); + + bitmap1.Or(bitmap2); + + EXPECT_TRUE(bitmap1.Contains(10L).value()); + EXPECT_TRUE(bitmap1.Contains(20L).value()); + EXPECT_TRUE(bitmap1.Contains(30L).value()); + EXPECT_TRUE(bitmap1.Contains(40L).value()); + EXPECT_EQ(bitmap1.Cardinality(), 4); + + EXPECT_FALSE(bitmap2.Contains(10L).value()); + EXPECT_TRUE(bitmap2.Contains(20L).value()); + EXPECT_FALSE(bitmap2.Contains(30L).value()); + EXPECT_TRUE(bitmap2.Contains(40L).value()); + EXPECT_EQ(bitmap2.Cardinality(), 2); +} + +TEST(RoaringPositionBitmapTest, TestOrSparseBitmaps) { + RoaringPositionBitmap bitmap1; + EXPECT_THAT(bitmap1.Add((static_cast(0) << 32) | 100L), IsOk()); + EXPECT_THAT(bitmap1.Add((static_cast(1) << 32) | 200L), IsOk()); + + RoaringPositionBitmap bitmap2; + EXPECT_THAT(bitmap2.Add((static_cast(2) << 32) | 300L), IsOk()); + EXPECT_THAT(bitmap2.Add((static_cast(3) << 32) | 400L), IsOk()); + + bitmap1.Or(bitmap2); + + EXPECT_TRUE(bitmap1.Contains((static_cast(0) << 32) | 100L).value()); + EXPECT_TRUE(bitmap1.Contains((static_cast(1) << 32) | 200L).value()); + EXPECT_TRUE(bitmap1.Contains((static_cast(2) << 32) | 300L).value()); + EXPECT_TRUE(bitmap1.Contains((static_cast(3) << 32) | 400L).value()); + EXPECT_EQ(bitmap1.Cardinality(), 4); +} + +TEST(RoaringPositionBitmapTest, TestCardinality) { + RoaringPositionBitmap bitmap; + + EXPECT_EQ(bitmap.Cardinality(), 0); + + EXPECT_THAT(bitmap.Add(10L), IsOk()); + EXPECT_THAT(bitmap.Add(20L), IsOk()); + EXPECT_THAT(bitmap.Add(30L), IsOk()); + EXPECT_EQ(bitmap.Cardinality(), 3); + + EXPECT_THAT(bitmap.Add(10L), IsOk()); // already exists + EXPECT_EQ(bitmap.Cardinality(), 3); +} + +TEST(RoaringPositionBitmapTest, TestCardinalitySparseBitmaps) { + RoaringPositionBitmap bitmap; + + EXPECT_THAT(bitmap.Add((static_cast(0) << 32) | 100L), IsOk()); + EXPECT_THAT(bitmap.Add((static_cast(0) << 32) | 101L), IsOk()); + EXPECT_THAT(bitmap.Add((static_cast(0) << 32) | 105L), IsOk()); + EXPECT_THAT(bitmap.Add((static_cast(1) << 32) | 200L), IsOk()); + EXPECT_THAT(bitmap.Add((static_cast(100) << 32) | 300L), IsOk()); + + EXPECT_EQ(bitmap.Cardinality(), 5); +} + +TEST(RoaringPositionBitmapTest, TestSerializeDeserializeRoundTrip) { + RoaringPositionBitmap bitmap; + EXPECT_THAT(bitmap.Add(10L), IsOk()); + EXPECT_THAT(bitmap.Add(20L), IsOk()); + EXPECT_THAT(bitmap.Add((static_cast(1) << 32) | 30L), IsOk()); + + auto copy = RoundTripSerialize(bitmap); + + EXPECT_EQ(copy.Cardinality(), bitmap.Cardinality()); + EXPECT_TRUE(copy.Contains(10L).value()); + EXPECT_TRUE(copy.Contains(20L).value()); + EXPECT_TRUE(copy.Contains((static_cast(1) << 32) | 30L).value()); +} + +TEST(RoaringPositionBitmapTest, TestSerializeDeserializeEmpty) { + RoaringPositionBitmap bitmap; + auto copy = RoundTripSerialize(bitmap); + EXPECT_TRUE(copy.IsEmpty()); + EXPECT_EQ(copy.Cardinality(), 0); +} + +TEST(RoaringPositionBitmapTest, TestSerializeDeserializeAllContainerBitmap) { + RoaringPositionBitmap bitmap; + + // bitmap 0, container 0 (array - few elements) + EXPECT_THAT(bitmap.Add(Position(0, 0, 5)), IsOk()); + EXPECT_THAT(bitmap.Add(Position(0, 0, 7)), IsOk()); + + // bitmap 0, container 1 (array that can be compressed) + EXPECT_THAT(bitmap.AddRange(Position(0, 1, 1), Position(0, 1, 1000)), IsOk()); + + // bitmap 0, container 2 (bitset - nearly full container) + EXPECT_THAT(bitmap.AddRange(Position(0, 2, 1), Position(0, 2, kContainerOffset - 1)), + IsOk()); + + // bitmap 1, container 0 (array) + EXPECT_THAT(bitmap.Add(Position(1, 0, 10)), IsOk()); + EXPECT_THAT(bitmap.Add(Position(1, 0, 20)), IsOk()); + + // bitmap 1, container 1 (array that can be compressed) + EXPECT_THAT(bitmap.AddRange(Position(1, 1, 10), Position(1, 1, 500)), IsOk()); + + // bitmap 1, container 2 (bitset) + EXPECT_THAT(bitmap.AddRange(Position(1, 2, 1), Position(1, 2, kContainerOffset - 1)), + IsOk()); + + EXPECT_TRUE(bitmap.Optimize()); + + auto copy = RoundTripSerialize(bitmap); + + EXPECT_EQ(copy.Cardinality(), bitmap.Cardinality()); + copy.ForEach([&](int64_t pos) { EXPECT_TRUE(bitmap.Contains(pos).value()); }); + bitmap.ForEach([&](int64_t pos) { EXPECT_TRUE(copy.Contains(pos).value()); }); +} + +TEST(RoaringPositionBitmapTest, TestForEach) { + RoaringPositionBitmap bitmap; + EXPECT_THAT(bitmap.Add(30L), IsOk()); + EXPECT_THAT(bitmap.Add(10L), IsOk()); + EXPECT_THAT(bitmap.Add(20L), IsOk()); + EXPECT_THAT(bitmap.Add((static_cast(1) << 32) | 5L), IsOk()); + + std::vector positions; + bitmap.ForEach([&](int64_t pos) { positions.push_back(pos); }); + + ASSERT_EQ(positions.size(), 4u); + EXPECT_EQ(positions[0], 10L); + EXPECT_EQ(positions[1], 20L); + EXPECT_EQ(positions[2], 30L); + EXPECT_EQ(positions[3], (static_cast(1) << 32) | 5L); +} + +TEST(RoaringPositionBitmapTest, TestIsEmpty) { + RoaringPositionBitmap bitmap; + EXPECT_TRUE(bitmap.IsEmpty()); + + EXPECT_THAT(bitmap.Add(10L), IsOk()); + EXPECT_FALSE(bitmap.IsEmpty()); +} + +TEST(RoaringPositionBitmapTest, TestOptimize) { + RoaringPositionBitmap bitmap; + EXPECT_THAT(bitmap.AddRange(0, 10000), IsOk()); + + bool changed = bitmap.Optimize(); + EXPECT_TRUE(changed); + + // Content should be unchanged after RLE optimization + EXPECT_EQ(bitmap.Cardinality(), 10000); + for (int64_t i = 0; i < 10000; ++i) { + EXPECT_TRUE(bitmap.Contains(i).value()); + } + + // Round-trip should preserve content after RLE + auto copy = RoundTripSerialize(bitmap); + EXPECT_EQ(copy.Cardinality(), 10000); +} + +TEST(RoaringPositionBitmapTest, TestUnsupportedPositions) { + RoaringPositionBitmap bitmap; + + // Negative position + EXPECT_THAT(bitmap.Add(-1L), IsError(ErrorKind::kInvalidArgument)); + + // Contains with negative position + EXPECT_THAT(bitmap.Contains(-1L), IsError(ErrorKind::kInvalidArgument)); + + // Position exceeding MAX_POSITION + EXPECT_THAT(bitmap.Add(RoaringPositionBitmap::kMaxPosition + 1L), + IsError(ErrorKind::kInvalidArgument)); + + // Contains with position exceeding MAX_POSITION + EXPECT_THAT(bitmap.Contains(RoaringPositionBitmap::kMaxPosition + 1L), + IsError(ErrorKind::kInvalidArgument)); +} + +TEST(RoaringPositionBitmapTest, TestDeserializeSupportedRoaringExamples) { + for (const auto& file : + {"64map32bitvals.bin", "64mapempty.bin", "64mapspreadvals.bin"}) { + std::string data = ReadTestResource(file); + auto result = RoaringPositionBitmap::Deserialize(data); + EXPECT_THAT(RoaringPositionBitmap::Deserialize(data), IsOk()); + } +} + +TEST(RoaringPositionBitmapTest, TestDeserializeUnsupportedRoaringExample) { + // This file contains a value with key larger than max supported + std::string data = ReadTestResource("64maphighvals.bin"); + auto result = RoaringPositionBitmap::Deserialize(data); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + EXPECT_THAT(result, HasErrorMessage("Invalid unsigned key")); +} + +TEST(RoaringPositionBitmapTest, TestRandomSparseBitmap) { + std::mt19937_64 rng(42); + RoaringPositionBitmap bitmap; + std::set positions; + + std::uniform_int_distribution dist(0, static_cast(5) << 32); + + for (int i = 0; i < 100000; ++i) { + int64_t pos = dist(rng); + positions.insert(pos); + ASSERT_THAT(bitmap.Add(pos), IsOk()); + } + + AssertEqual(bitmap, positions); + + // Random lookups + std::mt19937_64 rng2(123); + std::uniform_int_distribution lookup_dist(0, + RoaringPositionBitmap::kMaxPosition); + for (int i = 0; i < 20000; ++i) { + int64_t pos = lookup_dist(rng2); + auto result = bitmap.Contains(pos); + ASSERT_THAT(result, IsOk()); + EXPECT_EQ(result.value(), positions.count(pos) > 0); + } +} + +TEST(RoaringPositionBitmapTest, TestRandomDenseBitmap) { + RoaringPositionBitmap bitmap; + std::set positions; + + // Create dense ranges across multiple bitmap keys + for (int64_t offset : {static_cast(0), static_cast(2) << 32, + static_cast(5) << 32}) { + for (int64_t i = 0; i < 10000; ++i) { + ASSERT_THAT(bitmap.Add(offset + i), IsOk()); + positions.insert(offset + i); + } + } + + AssertEqual(bitmap, positions); +} + +TEST(RoaringPositionBitmapTest, TestRandomMixedBitmap) { + std::mt19937_64 rng(42); + RoaringPositionBitmap bitmap; + std::set positions; + + // Sparse positions in [3<<32, 5<<32) + std::uniform_int_distribution dist1(static_cast(3) << 32, + static_cast(5) << 32); + for (int i = 0; i < 50000; ++i) { + int64_t pos = dist1(rng); + positions.insert(pos); + ASSERT_THAT(bitmap.Add(pos), IsOk()); + } + + // Dense range in [0, 10000) + for (int64_t i = 0; i < 10000; ++i) { + ASSERT_THAT(bitmap.Add(i), IsOk()); + positions.insert(i); + } + + // More sparse positions in [0, 1<<32) + std::uniform_int_distribution dist2(0, static_cast(1) << 32); + for (int i = 0; i < 5000; ++i) { + int64_t pos = dist2(rng); + positions.insert(pos); + ASSERT_THAT(bitmap.Add(pos), IsOk()); + } + + AssertEqual(bitmap, positions); +} + +TEST(RoaringPositionBitmapTest, TestDeserializeInvalidData) { + // Buffer too small + auto result = RoaringPositionBitmap::Deserialize(""); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + + // Invalid bitmap count (very large) + std::string buf(8, '\xFF'); + result = RoaringPositionBitmap::Deserialize(buf); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); +} + +} // namespace iceberg From c3d16d8ae7bcc6b48e9701123fef13f52db31066 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Tue, 24 Mar 2026 10:14:26 +0800 Subject: [PATCH 2/3] address comments and polish test --- .../deletes/roaring_position_bitmap.cc | 22 +- src/iceberg/deletes/roaring_position_bitmap.h | 6 +- src/iceberg/test/CMakeLists.txt | 3 +- src/iceberg/test/meson.build | 1 + .../test/roaring_position_bitmap_test.cc | 505 ++++++++++-------- 5 files changed, 314 insertions(+), 223 deletions(-) diff --git a/src/iceberg/deletes/roaring_position_bitmap.cc b/src/iceberg/deletes/roaring_position_bitmap.cc index 2d812ef21..e47c2551f 100644 --- a/src/iceberg/deletes/roaring_position_bitmap.cc +++ b/src/iceberg/deletes/roaring_position_bitmap.cc @@ -41,12 +41,12 @@ constexpr size_t kBitmapKeySizeBytes = 4; int32_t Key(int64_t pos) { return static_cast(pos >> 32); } // Extracts low 32 bits from a 64-bit position. -uint32_t Pos32Bits(int64_t pos) { return static_cast(pos); } +uint32_t Pos32Bits(int64_t pos) { return static_cast(0xFFFFFFFF & pos); } // Combines key (high 32 bits) and pos32 (low 32 bits) into a 64-bit // position. The low 32 bits are zero-extended to avoid sign extension. int64_t ToPosition(int32_t key, uint32_t pos32) { - return (static_cast(key) << 32) | static_cast(pos32); + return (int64_t{key} << 32) | int64_t{pos32}; } void WriteLE64(char* buf, int64_t value) { @@ -95,6 +95,20 @@ RoaringPositionBitmap::RoaringPositionBitmap() : impl_(std::make_unique()) RoaringPositionBitmap::~RoaringPositionBitmap() = default; +RoaringPositionBitmap::RoaringPositionBitmap(const RoaringPositionBitmap& other) + : impl_(other.impl_ != nullptr ? std::make_unique(*other.impl_) + : std::make_unique()) {} + +RoaringPositionBitmap& RoaringPositionBitmap::operator=( + const RoaringPositionBitmap& other) { + if (this == &other) { + return *this; + } + impl_ = other.impl_ != nullptr ? std::make_unique(*other.impl_) + : std::make_unique(); + return *this; +} + RoaringPositionBitmap::RoaringPositionBitmap(RoaringPositionBitmap&&) noexcept = default; RoaringPositionBitmap& RoaringPositionBitmap::operator=( @@ -167,6 +181,8 @@ size_t RoaringPositionBitmap::SerializedSizeInBytes() const { return size; } +// Serializes using the portable format (little-endian). +// See https://iceberg.apache.org/puffin-spec/#deletion-vector-v1-blob-type Result RoaringPositionBitmap::Serialize() const { size_t size = SerializedSizeInBytes(); std::string result(size, '\0'); @@ -243,7 +259,7 @@ Result RoaringPositionBitmap::Deserialize(std::string_vie buf += bitmap_size; remaining -= bitmap_size; - impl->bitmaps.push_back(std::move(bitmap)); + impl->bitmaps.emplace_back(std::move(bitmap)); last_key = key; --remaining_count; } diff --git a/src/iceberg/deletes/roaring_position_bitmap.h b/src/iceberg/deletes/roaring_position_bitmap.h index df197740b..650160c1a 100644 --- a/src/iceberg/deletes/roaring_position_bitmap.h +++ b/src/iceberg/deletes/roaring_position_bitmap.h @@ -46,7 +46,7 @@ namespace iceberg { /// this class. class ICEBERG_EXPORT RoaringPositionBitmap { public: - /// \brief Maximum supported position. + /// \brief Maximum supported position (aligned with the Java implementation). static constexpr int64_t kMaxPosition = 0x7FFFFFFE80000000LL; RoaringPositionBitmap(); @@ -55,8 +55,8 @@ class ICEBERG_EXPORT RoaringPositionBitmap { RoaringPositionBitmap(RoaringPositionBitmap&& other) noexcept; RoaringPositionBitmap& operator=(RoaringPositionBitmap&& other) noexcept; - RoaringPositionBitmap(const RoaringPositionBitmap&) = delete; - RoaringPositionBitmap& operator=(const RoaringPositionBitmap&) = delete; + RoaringPositionBitmap(const RoaringPositionBitmap& other); + RoaringPositionBitmap& operator=(const RoaringPositionBitmap& other); /// \brief Sets a position in the bitmap. /// \param pos the position (must be >= 0 and <= kMaxPosition) diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 995579b20..c802df8a5 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -115,6 +115,7 @@ add_iceberg_test(util_test endian_test.cc formatter_test.cc location_util_test.cc + roaring_position_bitmap_test.cc string_util_test.cc transform_util_test.cc truncate_util_test.cc @@ -124,8 +125,6 @@ add_iceberg_test(util_test add_iceberg_test(roaring_test SOURCES roaring_test.cc) -add_iceberg_test(roaring_position_bitmap_test SOURCES roaring_position_bitmap_test.cc) - if(ICEBERG_BUILD_BUNDLE) add_iceberg_test(avro_test USE_BUNDLE diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index df2d5db8e..1eafc5fe1 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -90,6 +90,7 @@ iceberg_tests = { 'endian_test.cc', 'formatter_test.cc', 'location_util_test.cc', + 'roaring_position_bitmap_test.cc', 'string_util_test.cc', 'transform_util_test.cc', 'truncate_util_test.cc', diff --git a/src/iceberg/test/roaring_position_bitmap_test.cc b/src/iceberg/test/roaring_position_bitmap_test.cc index e3428784e..8d7020b09 100644 --- a/src/iceberg/test/roaring_position_bitmap_test.cc +++ b/src/iceberg/test/roaring_position_bitmap_test.cc @@ -19,6 +19,7 @@ #include "iceberg/deletes/roaring_position_bitmap.h" +#include #include #include #include @@ -65,12 +66,11 @@ void AssertEqualContent(const RoaringPositionBitmap& bitmap, const std::set& positions) { ASSERT_EQ(bitmap.Cardinality(), positions.size()); for (int64_t pos : positions) { - auto result = bitmap.Contains(pos); - ASSERT_THAT(result, IsOk()) << "Error for pos: " << pos; - ASSERT_TRUE(result.value()) << "Missing position: " << pos; + ASSERT_THAT(bitmap.Contains(pos), HasValue(::testing::Eq(true))) + << "Position not found: " << pos; } bitmap.ForEach([&](int64_t pos) { - ASSERT_TRUE(positions.count(pos) > 0) << "Unexpected position: " << pos; + ASSERT_TRUE(positions.contains(pos)) << "Unexpected position: " << pos; }); } @@ -85,329 +85,368 @@ void AssertEqual(RoaringPositionBitmap& bitmap, const std::set& positio AssertEqualContent(copy2, positions); } -} // namespace - -TEST(RoaringPositionBitmapTest, TestAdd) { +RoaringPositionBitmap BuildBitmap(const std::set& positions) { RoaringPositionBitmap bitmap; - - EXPECT_THAT(bitmap.Add(10L), IsOk()); - EXPECT_TRUE(bitmap.Contains(10L).value()); - - EXPECT_THAT(bitmap.Add(0L), IsOk()); - EXPECT_TRUE(bitmap.Contains(0L).value()); - - EXPECT_THAT(bitmap.Add(10L), IsOk()); // duplicate - EXPECT_TRUE(bitmap.Contains(10L).value()); + for (int64_t pos : positions) { + EXPECT_THAT(bitmap.Add(pos), IsOk()); + } + return bitmap; } -TEST(RoaringPositionBitmapTest, TestAddPositionsRequiringMultipleBitmaps) { - RoaringPositionBitmap bitmap; +struct AddRangeParams { + const char* name; + int64_t start; + int64_t end; + std::vector absent_positions; +}; - int64_t pos1 = (static_cast(0) << 32) | 10L; - int64_t pos2 = (static_cast(1) << 32) | 20L; - int64_t pos3 = (static_cast(2) << 32) | 30L; - int64_t pos4 = (static_cast(100) << 32) | 40L; - - EXPECT_THAT(bitmap.Add(pos1), IsOk()); - EXPECT_THAT(bitmap.Add(pos2), IsOk()); - EXPECT_THAT(bitmap.Add(pos3), IsOk()); - EXPECT_THAT(bitmap.Add(pos4), IsOk()); - - EXPECT_TRUE(bitmap.Contains(pos1).value()); - EXPECT_TRUE(bitmap.Contains(pos2).value()); - EXPECT_TRUE(bitmap.Contains(pos3).value()); - EXPECT_TRUE(bitmap.Contains(pos4).value()); - EXPECT_EQ(bitmap.Cardinality(), 4); - EXPECT_GT(bitmap.SerializedSizeInBytes(), 4); -} +class RoaringPositionBitmapAddRangeTest + : public ::testing::TestWithParam {}; -TEST(RoaringPositionBitmapTest, TestAddRange) { +TEST_P(RoaringPositionBitmapAddRangeTest, AddsExpectedPositions) { + const auto& param = GetParam(); RoaringPositionBitmap bitmap; + ASSERT_THAT(bitmap.AddRange(param.start, param.end), IsOk()); - int64_t start = 10; - int64_t end = 20; - EXPECT_THAT(bitmap.AddRange(start, end), IsOk()); - - for (int64_t pos = start; pos < end; ++pos) { - EXPECT_TRUE(bitmap.Contains(pos).value()); + std::set expected_positions; + for (int64_t pos = param.start; pos < param.end; ++pos) { + expected_positions.insert(pos); + } + AssertEqualContent(bitmap, expected_positions); + for (int64_t pos : param.absent_positions) { + ASSERT_FALSE(bitmap.Contains(pos).value()); } - EXPECT_FALSE(bitmap.Contains(9).value()); - EXPECT_FALSE(bitmap.Contains(20).value()); - EXPECT_EQ(bitmap.Cardinality(), 10); } -TEST(RoaringPositionBitmapTest, TestAddRangeAcrossKeys) { - RoaringPositionBitmap bitmap; +INSTANTIATE_TEST_SUITE_P( + AddRangeScenarios, RoaringPositionBitmapAddRangeTest, + ::testing::Values(AddRangeParams{.name = "single_key", + .start = 10, + .end = 20, + .absent_positions = {9, 20}}, + AddRangeParams{ + .name = "across_keys", + .start = (int64_t{1} << 32) - 5, + .end = (int64_t{1} << 32) + 5, + .absent_positions = {0, (int64_t{1} << 32) + 5}, + }), + [](const ::testing::TestParamInfo& info) { return info.param.name; }); + +struct OrParams { + const char* name; + std::set lhs_input; + std::set rhs_input; + std::set lhs_expected; + std::set rhs_expected; +}; + +class RoaringPositionBitmapOrTest : public ::testing::TestWithParam {}; + +TEST_P(RoaringPositionBitmapOrTest, ProducesExpectedUnionAndKeepsRhsUnchanged) { + const auto& param = GetParam(); + auto lhs = BuildBitmap(param.lhs_input); + auto rhs = BuildBitmap(param.rhs_input); + + lhs.Or(rhs); + + AssertEqualContent(lhs, param.lhs_expected); + AssertEqualContent(rhs, param.rhs_expected); +} - int64_t start = (static_cast(1) << 32) - 5; - int64_t end = (static_cast(1) << 32) + 5; - EXPECT_THAT(bitmap.AddRange(start, end), IsOk()); +INSTANTIATE_TEST_SUITE_P( + OrScenarios, RoaringPositionBitmapOrTest, + ::testing::Values( + OrParams{ + .name = "disjoint", + .lhs_input = {10L, 20L}, + .rhs_input = {30L, 40L, int64_t{2} << 32}, + .lhs_expected = {10L, 20L, 30L, 40L, int64_t{2} << 32}, + .rhs_expected = {30L, 40L, int64_t{2} << 32}, + }, + OrParams{ + .name = "rhs_empty", + .lhs_input = {10L, 20L}, + .rhs_input = {}, + .lhs_expected = {10L, 20L}, + .rhs_expected = {}, + }, + OrParams{ + .name = "overlapping", + .lhs_input = {10L, 20L, 30L}, + .rhs_input = {20L, 40L}, + .lhs_expected = {10L, 20L, 30L, 40L}, + .rhs_expected = {20L, 40L}, + }, + OrParams{ + .name = "sparse_multi_key", + .lhs_input = {100L, (int64_t{1} << 32) | 200L}, + .rhs_input = {(int64_t{2} << 32) | 300L, (int64_t{3} << 32) | 400L}, + .lhs_expected = {100L, (int64_t{1} << 32) | 200L, (int64_t{2} << 32) | 300L, + (int64_t{3} << 32) | 400L}, + .rhs_expected = {(int64_t{2} << 32) | 300L, (int64_t{3} << 32) | 400L}, + }), + [](const ::testing::TestParamInfo& info) { return info.param.name; }); + +enum class InteropBitmapShape { + kEmpty, + kOnly32BitPositions, + kSpreadAcrossKeys, +}; + +struct InteropCase { + const char* file_name; + InteropBitmapShape expected_shape; +}; + +void AssertInteropBitmapShape(const RoaringPositionBitmap& bitmap, + InteropBitmapShape expected_shape) { + bool saw_pos_lt_32_bit = false; + bool saw_pos_ge_32_bit = false; - for (int64_t pos = start; pos < end; ++pos) { - EXPECT_TRUE(bitmap.Contains(pos).value()); - } - EXPECT_FALSE(bitmap.Contains(0).value()); - EXPECT_FALSE(bitmap.Contains(end).value()); - EXPECT_EQ(bitmap.Cardinality(), 10); -} + bitmap.ForEach([&](int64_t pos) { + if (pos < (int64_t{1} << 32)) { + saw_pos_lt_32_bit = true; + } else { + saw_pos_ge_32_bit = true; + } + }); -TEST(RoaringPositionBitmapTest, TestAddEmptyRange) { - RoaringPositionBitmap bitmap; - EXPECT_THAT(bitmap.AddRange(10, 10), IsOk()); - EXPECT_TRUE(bitmap.IsEmpty()); + switch (expected_shape) { + case InteropBitmapShape::kEmpty: + ASSERT_TRUE(bitmap.IsEmpty()); + ASSERT_EQ(bitmap.Cardinality(), 0u); + break; + case InteropBitmapShape::kOnly32BitPositions: + ASSERT_GT(bitmap.Cardinality(), 0u); + ASSERT_TRUE(saw_pos_lt_32_bit); + ASSERT_FALSE(saw_pos_ge_32_bit); + break; + case InteropBitmapShape::kSpreadAcrossKeys: + ASSERT_GT(bitmap.Cardinality(), 0u); + ASSERT_TRUE(saw_pos_lt_32_bit); + ASSERT_TRUE(saw_pos_ge_32_bit); + break; + } } -TEST(RoaringPositionBitmapTest, TestOr) { - RoaringPositionBitmap bitmap1; - EXPECT_THAT(bitmap1.Add(10L), IsOk()); - EXPECT_THAT(bitmap1.Add(20L), IsOk()); - - RoaringPositionBitmap bitmap2; - EXPECT_THAT(bitmap2.Add(30L), IsOk()); - EXPECT_THAT(bitmap2.Add(40L), IsOk()); - EXPECT_THAT(bitmap2.Add(static_cast(2) << 32), IsOk()); - - bitmap1.Or(bitmap2); - - EXPECT_TRUE(bitmap1.Contains(10L).value()); - EXPECT_TRUE(bitmap1.Contains(20L).value()); - EXPECT_TRUE(bitmap1.Contains(30L).value()); - EXPECT_TRUE(bitmap1.Contains(40L).value()); - EXPECT_TRUE(bitmap1.Contains(static_cast(2) << 32).value()); - EXPECT_EQ(bitmap1.Cardinality(), 5); - - // bitmap2 should be unchanged - EXPECT_FALSE(bitmap2.Contains(10L).value()); - EXPECT_FALSE(bitmap2.Contains(20L).value()); - EXPECT_EQ(bitmap2.Cardinality(), 3); -} +} // namespace -TEST(RoaringPositionBitmapTest, TestOrWithEmptyBitmap) { - RoaringPositionBitmap bitmap1; - EXPECT_THAT(bitmap1.Add(10L), IsOk()); - EXPECT_THAT(bitmap1.Add(20L), IsOk()); +TEST(RoaringPositionBitmapTest, TestAdd) { + RoaringPositionBitmap bitmap; - RoaringPositionBitmap empty_bitmap; - bitmap1.Or(empty_bitmap); + ASSERT_THAT(bitmap.Add(10L), IsOk()); + ASSERT_TRUE(bitmap.Contains(10L).value()); - EXPECT_TRUE(bitmap1.Contains(10L).value()); - EXPECT_TRUE(bitmap1.Contains(20L).value()); - EXPECT_EQ(bitmap1.Cardinality(), 2); + ASSERT_THAT(bitmap.Add(0L), IsOk()); + ASSERT_TRUE(bitmap.Contains(0L).value()); - EXPECT_FALSE(empty_bitmap.Contains(10L).value()); - EXPECT_FALSE(empty_bitmap.Contains(20L).value()); - EXPECT_EQ(empty_bitmap.Cardinality(), 0); - EXPECT_TRUE(empty_bitmap.IsEmpty()); + ASSERT_THAT(bitmap.Add(10L), IsOk()); // duplicate + ASSERT_TRUE(bitmap.Contains(10L).value()); } -TEST(RoaringPositionBitmapTest, TestOrWithOverlapping) { - RoaringPositionBitmap bitmap1; - EXPECT_THAT(bitmap1.Add(10L), IsOk()); - EXPECT_THAT(bitmap1.Add(20L), IsOk()); - EXPECT_THAT(bitmap1.Add(30L), IsOk()); - - RoaringPositionBitmap bitmap2; - EXPECT_THAT(bitmap2.Add(20L), IsOk()); - EXPECT_THAT(bitmap2.Add(40L), IsOk()); - - bitmap1.Or(bitmap2); - - EXPECT_TRUE(bitmap1.Contains(10L).value()); - EXPECT_TRUE(bitmap1.Contains(20L).value()); - EXPECT_TRUE(bitmap1.Contains(30L).value()); - EXPECT_TRUE(bitmap1.Contains(40L).value()); - EXPECT_EQ(bitmap1.Cardinality(), 4); - - EXPECT_FALSE(bitmap2.Contains(10L).value()); - EXPECT_TRUE(bitmap2.Contains(20L).value()); - EXPECT_FALSE(bitmap2.Contains(30L).value()); - EXPECT_TRUE(bitmap2.Contains(40L).value()); - EXPECT_EQ(bitmap2.Cardinality(), 2); -} +TEST(RoaringPositionBitmapTest, TestAddPositionsRequiringMultipleBitmaps) { + RoaringPositionBitmap bitmap; -TEST(RoaringPositionBitmapTest, TestOrSparseBitmaps) { - RoaringPositionBitmap bitmap1; - EXPECT_THAT(bitmap1.Add((static_cast(0) << 32) | 100L), IsOk()); - EXPECT_THAT(bitmap1.Add((static_cast(1) << 32) | 200L), IsOk()); + int64_t pos1 = 10L; + int64_t pos2 = (int64_t{1} << 32) | 20L; + int64_t pos3 = (int64_t{2} << 32) | 30L; + int64_t pos4 = (int64_t{100} << 32) | 40L; - RoaringPositionBitmap bitmap2; - EXPECT_THAT(bitmap2.Add((static_cast(2) << 32) | 300L), IsOk()); - EXPECT_THAT(bitmap2.Add((static_cast(3) << 32) | 400L), IsOk()); + ASSERT_THAT(bitmap.Add(pos1), IsOk()); + ASSERT_THAT(bitmap.Add(pos2), IsOk()); + ASSERT_THAT(bitmap.Add(pos3), IsOk()); + ASSERT_THAT(bitmap.Add(pos4), IsOk()); - bitmap1.Or(bitmap2); + AssertEqualContent(bitmap, {pos1, pos2, pos3, pos4}); + ASSERT_EQ(bitmap.SerializedSizeInBytes(), 1260); +} - EXPECT_TRUE(bitmap1.Contains((static_cast(0) << 32) | 100L).value()); - EXPECT_TRUE(bitmap1.Contains((static_cast(1) << 32) | 200L).value()); - EXPECT_TRUE(bitmap1.Contains((static_cast(2) << 32) | 300L).value()); - EXPECT_TRUE(bitmap1.Contains((static_cast(3) << 32) | 400L).value()); - EXPECT_EQ(bitmap1.Cardinality(), 4); +TEST(RoaringPositionBitmapTest, TestAddEmptyRange) { + RoaringPositionBitmap bitmap; + ASSERT_THAT(bitmap.AddRange(10, 10), IsOk()); + ASSERT_TRUE(bitmap.IsEmpty()); } TEST(RoaringPositionBitmapTest, TestCardinality) { RoaringPositionBitmap bitmap; - EXPECT_EQ(bitmap.Cardinality(), 0); + ASSERT_EQ(bitmap.Cardinality(), 0); - EXPECT_THAT(bitmap.Add(10L), IsOk()); - EXPECT_THAT(bitmap.Add(20L), IsOk()); - EXPECT_THAT(bitmap.Add(30L), IsOk()); - EXPECT_EQ(bitmap.Cardinality(), 3); + ASSERT_THAT(bitmap.Add(10L), IsOk()); + ASSERT_THAT(bitmap.Add(20L), IsOk()); + ASSERT_THAT(bitmap.Add(30L), IsOk()); + ASSERT_EQ(bitmap.Cardinality(), 3); - EXPECT_THAT(bitmap.Add(10L), IsOk()); // already exists - EXPECT_EQ(bitmap.Cardinality(), 3); + ASSERT_THAT(bitmap.Add(10L), IsOk()); // already exists + ASSERT_EQ(bitmap.Cardinality(), 3); } TEST(RoaringPositionBitmapTest, TestCardinalitySparseBitmaps) { RoaringPositionBitmap bitmap; - EXPECT_THAT(bitmap.Add((static_cast(0) << 32) | 100L), IsOk()); - EXPECT_THAT(bitmap.Add((static_cast(0) << 32) | 101L), IsOk()); - EXPECT_THAT(bitmap.Add((static_cast(0) << 32) | 105L), IsOk()); - EXPECT_THAT(bitmap.Add((static_cast(1) << 32) | 200L), IsOk()); - EXPECT_THAT(bitmap.Add((static_cast(100) << 32) | 300L), IsOk()); + ASSERT_THAT(bitmap.Add(100L), IsOk()); + ASSERT_THAT(bitmap.Add(101L), IsOk()); + ASSERT_THAT(bitmap.Add(105L), IsOk()); + ASSERT_THAT(bitmap.Add((int64_t{1} << 32) | 200L), IsOk()); + ASSERT_THAT(bitmap.Add((int64_t{100} << 32) | 300L), IsOk()); - EXPECT_EQ(bitmap.Cardinality(), 5); + ASSERT_EQ(bitmap.Cardinality(), 5); } TEST(RoaringPositionBitmapTest, TestSerializeDeserializeRoundTrip) { RoaringPositionBitmap bitmap; - EXPECT_THAT(bitmap.Add(10L), IsOk()); - EXPECT_THAT(bitmap.Add(20L), IsOk()); - EXPECT_THAT(bitmap.Add((static_cast(1) << 32) | 30L), IsOk()); + ASSERT_THAT(bitmap.Add(10L), IsOk()); + ASSERT_THAT(bitmap.Add(20L), IsOk()); + ASSERT_THAT(bitmap.Add((int64_t{1} << 32) | 30L), IsOk()); auto copy = RoundTripSerialize(bitmap); + AssertEqualContent(copy, {10L, 20L, (int64_t{1} << 32) | 30L}); +} + +TEST(RoaringPositionBitmapTest, TestCopyConstructor) { + RoaringPositionBitmap bitmap; + ASSERT_THAT(bitmap.Add(10L), IsOk()); + ASSERT_THAT(bitmap.Add((int64_t{2} << 32) | 30L), IsOk()); + + RoaringPositionBitmap copy(bitmap); - EXPECT_EQ(copy.Cardinality(), bitmap.Cardinality()); - EXPECT_TRUE(copy.Contains(10L).value()); - EXPECT_TRUE(copy.Contains(20L).value()); - EXPECT_TRUE(copy.Contains((static_cast(1) << 32) | 30L).value()); + AssertEqualContent(copy, {10L, (int64_t{2} << 32) | 30L}); + + // Copy is independent from source. + ASSERT_THAT(copy.Add(99L), IsOk()); + ASSERT_THAT(bitmap.Contains(99L), HasValue(::testing::Eq(false))); +} + +TEST(RoaringPositionBitmapTest, TestCopyAssignment) { + RoaringPositionBitmap bitmap; + ASSERT_THAT(bitmap.Add(10L), IsOk()); + ASSERT_THAT(bitmap.Add((int64_t{3} << 32) | 40L), IsOk()); + + RoaringPositionBitmap assigned; + ASSERT_THAT(assigned.Add(1L), IsOk()); + + assigned = bitmap; + AssertEqualContent(assigned, {10L, (int64_t{3} << 32) | 40L}); + + // Assignment result is independent from source. + ASSERT_THAT(bitmap.Add(200L), IsOk()); + ASSERT_THAT(assigned.Contains(200L), HasValue(::testing::Eq(false))); } TEST(RoaringPositionBitmapTest, TestSerializeDeserializeEmpty) { RoaringPositionBitmap bitmap; auto copy = RoundTripSerialize(bitmap); - EXPECT_TRUE(copy.IsEmpty()); - EXPECT_EQ(copy.Cardinality(), 0); + ASSERT_TRUE(copy.IsEmpty()); + ASSERT_EQ(copy.Cardinality(), 0); } TEST(RoaringPositionBitmapTest, TestSerializeDeserializeAllContainerBitmap) { RoaringPositionBitmap bitmap; // bitmap 0, container 0 (array - few elements) - EXPECT_THAT(bitmap.Add(Position(0, 0, 5)), IsOk()); - EXPECT_THAT(bitmap.Add(Position(0, 0, 7)), IsOk()); + ASSERT_THAT(bitmap.Add(Position(0, 0, 5)), IsOk()); + ASSERT_THAT(bitmap.Add(Position(0, 0, 7)), IsOk()); // bitmap 0, container 1 (array that can be compressed) - EXPECT_THAT(bitmap.AddRange(Position(0, 1, 1), Position(0, 1, 1000)), IsOk()); + ASSERT_THAT(bitmap.AddRange(Position(0, 1, 1), Position(0, 1, 1000)), IsOk()); // bitmap 0, container 2 (bitset - nearly full container) - EXPECT_THAT(bitmap.AddRange(Position(0, 2, 1), Position(0, 2, kContainerOffset - 1)), + ASSERT_THAT(bitmap.AddRange(Position(0, 2, 1), Position(0, 2, kContainerOffset - 1)), IsOk()); // bitmap 1, container 0 (array) - EXPECT_THAT(bitmap.Add(Position(1, 0, 10)), IsOk()); - EXPECT_THAT(bitmap.Add(Position(1, 0, 20)), IsOk()); + ASSERT_THAT(bitmap.Add(Position(1, 0, 10)), IsOk()); + ASSERT_THAT(bitmap.Add(Position(1, 0, 20)), IsOk()); // bitmap 1, container 1 (array that can be compressed) - EXPECT_THAT(bitmap.AddRange(Position(1, 1, 10), Position(1, 1, 500)), IsOk()); + ASSERT_THAT(bitmap.AddRange(Position(1, 1, 10), Position(1, 1, 500)), IsOk()); // bitmap 1, container 2 (bitset) - EXPECT_THAT(bitmap.AddRange(Position(1, 2, 1), Position(1, 2, kContainerOffset - 1)), + ASSERT_THAT(bitmap.AddRange(Position(1, 2, 1), Position(1, 2, kContainerOffset - 1)), IsOk()); - EXPECT_TRUE(bitmap.Optimize()); + ASSERT_TRUE(bitmap.Optimize()); auto copy = RoundTripSerialize(bitmap); + std::set expected_positions; + bitmap.ForEach([&](int64_t pos) { expected_positions.insert(pos); }); - EXPECT_EQ(copy.Cardinality(), bitmap.Cardinality()); - copy.ForEach([&](int64_t pos) { EXPECT_TRUE(bitmap.Contains(pos).value()); }); - bitmap.ForEach([&](int64_t pos) { EXPECT_TRUE(copy.Contains(pos).value()); }); + AssertEqualContent(copy, expected_positions); } TEST(RoaringPositionBitmapTest, TestForEach) { RoaringPositionBitmap bitmap; - EXPECT_THAT(bitmap.Add(30L), IsOk()); - EXPECT_THAT(bitmap.Add(10L), IsOk()); - EXPECT_THAT(bitmap.Add(20L), IsOk()); - EXPECT_THAT(bitmap.Add((static_cast(1) << 32) | 5L), IsOk()); + ASSERT_THAT(bitmap.Add(30L), IsOk()); + ASSERT_THAT(bitmap.Add(10L), IsOk()); + ASSERT_THAT(bitmap.Add(20L), IsOk()); + ASSERT_THAT(bitmap.Add((int64_t{1} << 32) | 5L), IsOk()); std::vector positions; bitmap.ForEach([&](int64_t pos) { positions.push_back(pos); }); ASSERT_EQ(positions.size(), 4u); - EXPECT_EQ(positions[0], 10L); - EXPECT_EQ(positions[1], 20L); - EXPECT_EQ(positions[2], 30L); - EXPECT_EQ(positions[3], (static_cast(1) << 32) | 5L); + ASSERT_EQ(positions[0], 10L); + ASSERT_EQ(positions[1], 20L); + ASSERT_EQ(positions[2], 30L); + ASSERT_EQ(positions[3], (int64_t{1} << 32) | 5L); } TEST(RoaringPositionBitmapTest, TestIsEmpty) { RoaringPositionBitmap bitmap; - EXPECT_TRUE(bitmap.IsEmpty()); + ASSERT_TRUE(bitmap.IsEmpty()); - EXPECT_THAT(bitmap.Add(10L), IsOk()); - EXPECT_FALSE(bitmap.IsEmpty()); + ASSERT_THAT(bitmap.Add(10L), IsOk()); + ASSERT_FALSE(bitmap.IsEmpty()); } TEST(RoaringPositionBitmapTest, TestOptimize) { RoaringPositionBitmap bitmap; - EXPECT_THAT(bitmap.AddRange(0, 10000), IsOk()); + ASSERT_THAT(bitmap.AddRange(0, 10000), IsOk()); + size_t size_before = bitmap.SerializedSizeInBytes(); bool changed = bitmap.Optimize(); - EXPECT_TRUE(changed); + ASSERT_TRUE(changed); + + // RLE optimization should reduce size for this dense range + size_t size_after = bitmap.SerializedSizeInBytes(); + ASSERT_GT(size_before, size_after); // Content should be unchanged after RLE optimization - EXPECT_EQ(bitmap.Cardinality(), 10000); + std::set expected_positions; for (int64_t i = 0; i < 10000; ++i) { - EXPECT_TRUE(bitmap.Contains(i).value()); + expected_positions.insert(i); } + AssertEqualContent(bitmap, expected_positions); // Round-trip should preserve content after RLE auto copy = RoundTripSerialize(bitmap); - EXPECT_EQ(copy.Cardinality(), 10000); + AssertEqualContent(copy, expected_positions); } TEST(RoaringPositionBitmapTest, TestUnsupportedPositions) { RoaringPositionBitmap bitmap; // Negative position - EXPECT_THAT(bitmap.Add(-1L), IsError(ErrorKind::kInvalidArgument)); + ASSERT_THAT(bitmap.Add(-1L), IsError(ErrorKind::kInvalidArgument)); // Contains with negative position - EXPECT_THAT(bitmap.Contains(-1L), IsError(ErrorKind::kInvalidArgument)); + ASSERT_THAT(bitmap.Contains(-1L), IsError(ErrorKind::kInvalidArgument)); // Position exceeding MAX_POSITION - EXPECT_THAT(bitmap.Add(RoaringPositionBitmap::kMaxPosition + 1L), + ASSERT_THAT(bitmap.Add(RoaringPositionBitmap::kMaxPosition + 1L), IsError(ErrorKind::kInvalidArgument)); // Contains with position exceeding MAX_POSITION - EXPECT_THAT(bitmap.Contains(RoaringPositionBitmap::kMaxPosition + 1L), + ASSERT_THAT(bitmap.Contains(RoaringPositionBitmap::kMaxPosition + 1L), IsError(ErrorKind::kInvalidArgument)); } -TEST(RoaringPositionBitmapTest, TestDeserializeSupportedRoaringExamples) { - for (const auto& file : - {"64map32bitvals.bin", "64mapempty.bin", "64mapspreadvals.bin"}) { - std::string data = ReadTestResource(file); - auto result = RoaringPositionBitmap::Deserialize(data); - EXPECT_THAT(RoaringPositionBitmap::Deserialize(data), IsOk()); - } -} - -TEST(RoaringPositionBitmapTest, TestDeserializeUnsupportedRoaringExample) { - // This file contains a value with key larger than max supported - std::string data = ReadTestResource("64maphighvals.bin"); - auto result = RoaringPositionBitmap::Deserialize(data); - EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); - EXPECT_THAT(result, HasErrorMessage("Invalid unsigned key")); -} - TEST(RoaringPositionBitmapTest, TestRandomSparseBitmap) { std::mt19937_64 rng(42); RoaringPositionBitmap bitmap; std::set positions; - std::uniform_int_distribution dist(0, static_cast(5) << 32); + std::uniform_int_distribution dist(0, int64_t{5} << 32); for (int i = 0; i < 100000; ++i) { int64_t pos = dist(rng); @@ -425,7 +464,7 @@ TEST(RoaringPositionBitmapTest, TestRandomSparseBitmap) { int64_t pos = lookup_dist(rng2); auto result = bitmap.Contains(pos); ASSERT_THAT(result, IsOk()); - EXPECT_EQ(result.value(), positions.count(pos) > 0); + ASSERT_EQ(result.value(), positions.contains(pos)); } } @@ -434,8 +473,7 @@ TEST(RoaringPositionBitmapTest, TestRandomDenseBitmap) { std::set positions; // Create dense ranges across multiple bitmap keys - for (int64_t offset : {static_cast(0), static_cast(2) << 32, - static_cast(5) << 32}) { + for (int64_t offset : {int64_t{0}, int64_t{2} << 32, int64_t{5} << 32}) { for (int64_t i = 0; i < 10000; ++i) { ASSERT_THAT(bitmap.Add(offset + i), IsOk()); positions.insert(offset + i); @@ -451,8 +489,7 @@ TEST(RoaringPositionBitmapTest, TestRandomMixedBitmap) { std::set positions; // Sparse positions in [3<<32, 5<<32) - std::uniform_int_distribution dist1(static_cast(3) << 32, - static_cast(5) << 32); + std::uniform_int_distribution dist1(int64_t{3} << 32, int64_t{5} << 32); for (int i = 0; i < 50000; ++i) { int64_t pos = dist1(rng); positions.insert(pos); @@ -466,7 +503,7 @@ TEST(RoaringPositionBitmapTest, TestRandomMixedBitmap) { } // More sparse positions in [0, 1<<32) - std::uniform_int_distribution dist2(0, static_cast(1) << 32); + std::uniform_int_distribution dist2(0, int64_t{1} << 32); for (int i = 0; i < 5000; ++i) { int64_t pos = dist2(rng); positions.insert(pos); @@ -479,12 +516,50 @@ TEST(RoaringPositionBitmapTest, TestRandomMixedBitmap) { TEST(RoaringPositionBitmapTest, TestDeserializeInvalidData) { // Buffer too small auto result = RoaringPositionBitmap::Deserialize(""); - EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument)); // Invalid bitmap count (very large) std::string buf(8, '\xFF'); result = RoaringPositionBitmap::Deserialize(buf); - EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument)); +} + +TEST(RoaringPositionBitmapInteropTest, TestDeserializeSupportedRoaringExamples) { + // These .bin fixtures are copied from the Apache Iceberg Java repository's + // roaring position bitmap interoperability test resources. + static const std::vector kCases = { + {.file_name = "64map32bitvals.bin", + .expected_shape = InteropBitmapShape::kOnly32BitPositions}, + {.file_name = "64mapempty.bin", .expected_shape = InteropBitmapShape::kEmpty}, + {.file_name = "64mapspreadvals.bin", + .expected_shape = InteropBitmapShape::kSpreadAcrossKeys}, + }; + + for (const auto& test_case : kCases) { + SCOPED_TRACE(test_case.file_name); + std::string data = ReadTestResource(test_case.file_name); + auto result = RoaringPositionBitmap::Deserialize(data); + ASSERT_THAT(result, IsOk()); + + const auto& bitmap = result.value(); + AssertInteropBitmapShape(bitmap, test_case.expected_shape); + + std::set positions; + bitmap.ForEach([&](int64_t pos) { positions.insert(pos); }); + AssertEqualContent(bitmap, positions); + + auto copy = RoundTripSerialize(bitmap); + AssertEqualContent(copy, positions); + } +} + +TEST(RoaringPositionBitmapInteropTest, TestDeserializeUnsupportedRoaringExample) { + // This file is copied from the Apache Iceberg Java repository and it contains + // a value with key larger than max supported + std::string data = ReadTestResource("64maphighvals.bin"); + auto result = RoaringPositionBitmap::Deserialize(data); + ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, HasErrorMessage("Invalid unsigned key")); } } // namespace iceberg From 40e01c029cff3d7d294ebe50bf603b0aada7d16c Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Tue, 24 Mar 2026 11:03:58 +0800 Subject: [PATCH 3/3] fix linter src/iceberg/deletes/roaring_position_bitmap.cc:196:25 [modernize-use-integer-sign-comparison] Check warning: src/iceberg/deletes/roaring_position_bitmap.cc:196:25 [modernize-use-integer-sign-comparison] comparison between 'signed' and 'unsigned' integers --- src/iceberg/deletes/roaring_position_bitmap.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/iceberg/deletes/roaring_position_bitmap.cc b/src/iceberg/deletes/roaring_position_bitmap.cc index e47c2551f..fec037887 100644 --- a/src/iceberg/deletes/roaring_position_bitmap.cc +++ b/src/iceberg/deletes/roaring_position_bitmap.cc @@ -193,7 +193,7 @@ Result RoaringPositionBitmap::Serialize() const { buf += kBitmapCountSizeBytes; // Write each bitmap with its key - for (int32_t key = 0; key < static_cast(impl_->bitmaps.size()); ++key) { + for (int32_t key = 0; std::cmp_less(key, impl_->bitmaps.size()); ++key) { WriteLE32(buf, key); buf += kBitmapKeySizeBytes; size_t written = impl_->bitmaps[key].write(buf, /*portable=*/true);