From 1e98a87b2f8daf09b36f98056992d4ca7aaf54cb Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Wed, 21 Jan 2026 12:04:40 +0000 Subject: [PATCH 1/7] feat: Implement CF.RESERVE command with bucket-based storage --- src/commands/cmd_cuckoo_filter.cc | 109 ++++++++++++++++ src/storage/redis_metadata.cc | 44 +++++++ src/storage/redis_metadata.h | 37 +++++- src/types/cuckoo_filter.h | 61 +++++++++ src/types/redis_cuckoo_chain.cc | 113 +++++++++++++++++ src/types/redis_cuckoo_chain.h | 53 ++++++++ tests/cppunit/types/cuckoo_filter_test.cc | 147 ++++++++++++++++++++++ 7 files changed, 563 insertions(+), 1 deletion(-) create mode 100644 src/commands/cmd_cuckoo_filter.cc create mode 100644 src/types/cuckoo_filter.h create mode 100644 src/types/redis_cuckoo_chain.cc create mode 100644 src/types/redis_cuckoo_chain.h create mode 100644 tests/cppunit/types/cuckoo_filter_test.cc diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc new file mode 100644 index 00000000000..eb5e6ce3c94 --- /dev/null +++ b/src/commands/cmd_cuckoo_filter.cc @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "command_parser.h" +#include "commander.h" +#include "error_constants.h" +#include "server/server.h" +#include "types/redis_cuckoo_chain.h" + +namespace redis { + +class CommandCFReserve : public Commander { + public: + Status Parse(const std::vector &args) override { + // CF.RESERVE key capacity [BUCKETSIZE bs] [MAXITERATIONS mi] [EXPANSION ex] + if (args.size() < 3) { + return {Status::RedisParseErr, "wrong number of arguments"}; + } + + // Parse capacity (required) + auto parse_capacity = ParseInt(args[2], 10); + if (!parse_capacity) { + return {Status::RedisParseErr, "invalid capacity"}; + } + capacity_ = *parse_capacity; + if (capacity_ <= 0) { + return {Status::RedisParseErr, "capacity must be larger than 0"}; + } + + // Parse optional parameters + CommandParser parser(args, 3); + while (parser.Good()) { + if (parser.EatEqICase("BUCKETSIZE")) { + auto parse_bucket_size = parser.TakeInt(); + if (!parse_bucket_size.IsOK()) { + return {Status::RedisParseErr, "invalid bucket size"}; + } + bucket_size_ = parse_bucket_size.GetValue(); + if (bucket_size_ == 0 || bucket_size_ > 255) { + return {Status::RedisParseErr, "bucket size must be between 1 and 255"}; + } + } else if (parser.EatEqICase("MAXITERATIONS")) { + auto parse_max_iterations = parser.TakeInt(); + if (!parse_max_iterations.IsOK()) { + return {Status::RedisParseErr, "invalid max iterations"}; + } + max_iterations_ = parse_max_iterations.GetValue(); + if (max_iterations_ == 0) { + return {Status::RedisParseErr, "max iterations must be larger than 0"}; + } + } else if (parser.EatEqICase("EXPANSION")) { + auto parse_expansion = parser.TakeInt(); + if (!parse_expansion.IsOK()) { + return {Status::RedisParseErr, "invalid expansion factor"}; + } + expansion_ = parse_expansion.GetValue(); + } else { + return {Status::RedisParseErr, errInvalidSyntax}; + } + } + + return Commander::Parse(args); + } + + Status Execute(engine::Context &ctx, Server *srv, Connection *conn, std::string *output) override { + redis::CuckooChain cuckoo_db(srv->storage, conn->GetNamespace()); + auto s = cuckoo_db.Reserve(ctx, args_[1], capacity_, bucket_size_, max_iterations_, expansion_); + + if (!s.ok()) { + if (s.IsInvalidArgument()) { + // Return error message to client + return {Status::RedisExecErr, s.ToString()}; + } + return {Status::RedisExecErr, "failed to create cuckoo filter"}; + } + + *output = redis::SimpleString("OK"); + return Status::OK(); + } + + private: + uint64_t capacity_ = kCFDefaultCapacity; + uint8_t bucket_size_ = kCFDefaultBucketSize; + uint16_t max_iterations_ = kCFDefaultMaxIterations; + uint8_t expansion_ = kCFDefaultExpansion; +}; + +// Register the CF.RESERVE command +REDIS_REGISTER_COMMANDS(CuckooFilter, + MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1)) + +} // namespace redis \ No newline at end of file diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index 16fb190fe26..a004917f329 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -567,3 +567,47 @@ rocksdb::Status TimeSeriesMetadata::Decode(Slice *input) { return rocksdb::Status::OK(); } + + +void CuckooChainMetadata::Encode(std::string *dst) const { + Metadata::Encode(dst); + + PutFixed16(dst, n_filters); + PutFixed16(dst, expansion); + PutFixed64(dst, base_capacity); + PutFixed8(dst, bucket_size); + PutFixed16(dst, max_iterations); + PutFixed64(dst, num_deleted_items); +} + +rocksdb::Status CuckooChainMetadata::Decode(Slice *input) { + if (auto s = Metadata::Decode(input); !s.ok()) { + return s; + } + + if (input->size() < 21) { + return rocksdb::Status::InvalidArgument(kErrMetadataTooShort); + } + + GetFixed16(input, &n_filters); + GetFixed16(input, &expansion); + GetFixed64(input, &base_capacity); + GetFixed8(input, &bucket_size); + GetFixed16(input, &max_iterations); + GetFixed64(input, &num_deleted_items); + + return rocksdb::Status::OK(); +} + +uint64_t CuckooChainMetadata::GetTotalCapacity() const { + if (expansion == 0 || n_filters == 1) { + return base_capacity; + } + + // Calculate total capacity across all filters + uint64_t total = 0; + for (uint16_t i = 0; i < n_filters; i++) { + total += base_capacity * std::pow(expansion, i); + } + return total; +} diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index 2db3e9d0cec..4bdd66b500b 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -54,12 +54,13 @@ enum RedisType : uint8_t { kRedisHyperLogLog = 11, kRedisTDigest = 12, kRedisTimeSeries = 13, + kRedisCuckooFilter = 14, kRedisTypeMax }; inline constexpr const std::array RedisTypeNames = { "none", "string", "hash", "list", "set", "zset", "bitmap", - "sortedint", "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries"}; + "sortedint", "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "cuckoofilter"}; struct RedisTypes { RedisTypes(std::initializer_list list) { @@ -306,6 +307,40 @@ class BloomChainMetadata : public Metadata { bool IsScaling() const { return expansion != 0; }; }; +class CuckooChainMetadata : public Metadata { + public: + /// The number of sub-filters in the chain + uint16_t n_filters; + + /// Expansion factor for new filters + /// When a filter is full, a new one is created with capacity = base_capacity * expansion^n + uint16_t expansion; + + /// The capacity of the first filter + uint64_t base_capacity; + + /// Number of fingerprints per bucket + uint8_t bucket_size; + + /// Maximum number of cuckoo kicks before considering filter full + uint16_t max_iterations; + + /// Track number of deleted items for maintenance + uint64_t num_deleted_items; + + explicit CuckooChainMetadata(bool generate_version = true) + : Metadata(kRedisCuckooFilter, generate_version), + n_filters(0), expansion(0), base_capacity(0), + bucket_size(0), max_iterations(0), num_deleted_items(0) {} + + void Encode(std::string *dst) const override; + using Metadata::Decode; + rocksdb::Status Decode(Slice *input) override; + + uint64_t GetTotalCapacity() const; + bool IsScaling() const { return expansion > 0; } +}; + enum class JsonStorageFormat : uint8_t { JSON = 0, CBOR = 1, diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h new file mode 100644 index 00000000000..f305f8c4480 --- /dev/null +++ b/src/types/cuckoo_filter.h @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include +#include +#include +#include + +namespace redis { + +// Cuckoo filter implementation from the paper: +// "Cuckoo Filter: Practically Better Than Bloom" by Fan et al. +// This is a bucket-based storage implementation where each bucket is stored +// as an independent key-value pair in RocksDB +class CuckooFilter { + public: + // Calculate the optimal number of buckets for the filter + static uint32_t OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size) { + // A load factor of 95.5% is chosen for the cuckoo filter + uint32_t num_buckets = static_cast(capacity / bucket_size / 0.955); + // Round up to next power of 2 for better hash distribution + if (num_buckets == 0) num_buckets = 1; + uint32_t power = 1; + while (power < num_buckets) power <<= 1; + return power; + } + + // Generate fingerprint from hash (8-bit fingerprint, non-zero) + static uint8_t GenerateFingerprint(uint64_t hash) { + uint8_t fp = hash & 0xFF; + return fp == 0 ? 1 : fp; // Ensure non-zero fingerprint + } + + // Calculate alternate bucket index using XOR + static uint32_t GetAltBucketIndex(uint32_t bucket_idx, uint8_t fingerprint, uint32_t num_buckets) { + // Use a simple hash of the fingerprint for the XOR operation + uint32_t fp_hash = fingerprint * 0x5bd1e995; // MurmurHash2 constant + return (bucket_idx ^ fp_hash) % num_buckets; + } +}; + +} // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc new file mode 100644 index 00000000000..5f75e57fdc8 --- /dev/null +++ b/src/types/redis_cuckoo_chain.cc @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "redis_cuckoo_chain.h" + +#include + +#include + +#include "cuckoo_filter.h" + +namespace redis { + +rocksdb::Status CuckooChain::getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, + CuckooChainMetadata *metadata) { + return Database::GetMetadata(ctx, {kRedisCuckooFilter}, ns_key, metadata); +} + +std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, + uint16_t filter_index, uint32_t bucket_index) { + // Create a sub-key that includes both filter index and bucket index + std::string sub_key; + PutFixed16(&sub_key, filter_index); + PutFixed32(&sub_key, bucket_index); + + // Create the internal key using the storage encoding + std::string bucket_key = InternalKey(ns_key, sub_key, metadata.version, storage_->IsSlotIdEncoded()).Encode(); + return bucket_key; +} + +rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, + uint8_t bucket_size, uint16_t max_iterations, uint8_t expansion) { + // Validate parameters + if (capacity == 0) { + return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); + } + + if (bucket_size == 0 || bucket_size > 255) { + return rocksdb::Status::InvalidArgument("bucket_size must be between 1 and 255"); + } + + if (max_iterations == 0) { + return rocksdb::Status::InvalidArgument("max_iterations must be larger than 0"); + } + + std::string ns_key = AppendNamespacePrefix(user_key); + + // Check if the key already exists + CuckooChainMetadata metadata; + rocksdb::Status s = getCuckooChainMetadata(ctx, ns_key, &metadata); + if (s.ok()) { + return rocksdb::Status::InvalidArgument("the key already exists"); + } + if (!s.IsNotFound()) { + return s; // Return other errors + } + + // Initialize metadata for the new cuckoo filter + metadata.size = 0; + metadata.base_capacity = capacity; + metadata.bucket_size = bucket_size; + metadata.max_iterations = max_iterations; + metadata.expansion = expansion; + metadata.n_filters = 1; + metadata.num_deleted_items = 0; + + // Calculate the number of buckets needed for this filter + uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(capacity, bucket_size); + + LOG(INFO) << "Creating cuckoo filter with capacity=" << capacity + << ", bucket_size=" << bucket_size + << ", num_buckets=" << num_buckets + << ", max_iterations=" << max_iterations + << ", expansion=" << static_cast(expansion); + + // Create a write batch for atomic operation + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.RESERVE", user_key.ToString()}); + batch->PutLogData(log_data.Encode()); + + // Store the metadata + std::string metadata_bytes; + metadata.Encode(&metadata_bytes); + batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + + // Note: With bucket-based storage, we don't pre-allocate all buckets + // Buckets will be created lazily on first write + // This saves memory for sparse filters + + // Optionally, we could create the first few buckets to ensure the filter is ready + // But for now, we'll keep it fully lazy for maximum memory efficiency + + return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); +} + +} // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h new file mode 100644 index 00000000000..f606f5f8c39 --- /dev/null +++ b/src/types/redis_cuckoo_chain.h @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include "cuckoo_filter.h" +#include "storage/redis_db.h" +#include "storage/redis_metadata.h" + +namespace redis { + +// Default values for Cuckoo Filter +const uint32_t kCFDefaultCapacity = 1024; +const uint8_t kCFDefaultBucketSize = 4; // 4 fingerprints per bucket +const uint16_t kCFDefaultMaxIterations = 500; +const uint8_t kCFDefaultExpansion = 2; + +class CuckooChain : public Database { + public: + CuckooChain(engine::Storage *storage, const std::string &ns) : Database(storage, ns) {} + + // CF.RESERVE command - creates a new cuckoo filter with specified capacity + rocksdb::Status Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, + uint16_t max_iterations, uint8_t expansion); + + private: + // Get metadata for the cuckoo filter + rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); + + // Generate key for a specific bucket in bucket-based storage + // Format: cf:{namespace}:{user_key}:{filter_index}:{bucket_index} + std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, + uint16_t filter_index, uint32_t bucket_index); +}; + +} // namespace redis \ No newline at end of file diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc new file mode 100644 index 00000000000..8a4a43c0f8f --- /dev/null +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include +#include + +#include "storage/redis_db.h" +#include "storage/redis_metadata.h" +#include "test_base.h" +#include "types/redis_cuckoo_chain.h" + +class RedisCuckooFilterTest : public TestBase { + protected: + explicit RedisCuckooFilterTest() : TestBase() { + cuckoo_ = std::make_unique(storage_.get(), namespace_); + } + ~RedisCuckooFilterTest() override = default; + + void SetUp() override { + key_ = "test_cuckoo_filter_key"; + } + + void TearDown() override { + [[maybe_unused]] auto s = cuckoo_->Del(*ctx_, key_); + } + + std::unique_ptr cuckoo_; + std::string key_; +}; + +TEST_F(RedisCuckooFilterTest, ReserveBasic) { + // Test basic reserve operation + uint64_t capacity = 1000; + uint8_t bucket_size = 4; + uint16_t max_iterations = 500; + uint8_t expansion = 2; + + auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << "Failed to reserve cuckoo filter: " << s.ToString(); +} + +TEST_F(RedisCuckooFilterTest, ReserveDuplicate) { + // First reserve should succeed + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Second reserve with same key should fail + s = cuckoo_->Reserve(*ctx_, key_, 2000, 4, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_NE(s.ToString().find("already exists"), std::string::npos); +} + +TEST_F(RedisCuckooFilterTest, ReserveInvalidParams) { + // Test with zero capacity + auto s = cuckoo_->Reserve(*ctx_, key_, 0, 4, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + + // Test with zero bucket size + s = cuckoo_->Reserve(*ctx_, "key2", 1000, 0, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + + // Test with zero max iterations + s = cuckoo_->Reserve(*ctx_, "key3", 1000, 4, 0, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); +} + +TEST_F(RedisCuckooFilterTest, ReserveVariousCapacities) { + // Test with different capacities + std::vector capacities = {100, 1000, 10000, 100000}; + + for (size_t i = 0; i < capacities.size(); ++i) { + std::string test_key = key_ + "_" + std::to_string(i); + auto s = cuckoo_->Reserve(*ctx_, test_key, capacities[i], 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Failed for capacity " << capacities[i]; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveWithDifferentBucketSizes) { + // Test with different valid bucket sizes + std::vector bucket_sizes = {1, 2, 4, 8, 16}; + + for (size_t i = 0; i < bucket_sizes.size(); ++i) { + std::string test_key = key_ + "_bucket_" + std::to_string(i); + auto s = cuckoo_->Reserve(*ctx_, test_key, 1000, bucket_sizes[i], 500, 2); + ASSERT_TRUE(s.ok()) << "Failed for bucket size " << static_cast(bucket_sizes[i]); + } +} + +TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { + // Test the static helper function + uint64_t capacity = 1000; + uint8_t bucket_size = 4; + + uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(capacity, bucket_size); + + // Should be a power of 2 + ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Number of buckets should be power of 2"; + + // Should be able to hold the capacity with 95.5% load factor + uint32_t expected_min = static_cast(capacity / bucket_size / 0.955); + ASSERT_GE(num_buckets, expected_min) << "Number of buckets too small for capacity"; +} + +TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { + // Test fingerprint generation ensures non-zero values + for (uint64_t hash = 0; hash < 256; ++hash) { + uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash); + ASSERT_NE(fp, 0) << "Fingerprint should never be zero"; + } +} + +TEST_F(RedisCuckooFilterTest, AlternateBucketCalculation) { + uint32_t num_buckets = 1024; + + // Test that alternate bucket is different from original + for (uint32_t bucket = 0; bucket < 100; ++bucket) { + for (uint8_t fp = 1; fp < 10; ++fp) { + uint32_t alt_bucket = redis::CuckooFilter::GetAltBucketIndex(bucket, fp, num_buckets); + ASSERT_LT(alt_bucket, num_buckets) << "Alternate bucket out of range"; + + // Applying alternate twice should give back original + uint32_t double_alt = redis::CuckooFilter::GetAltBucketIndex(alt_bucket, fp, num_buckets); + ASSERT_EQ(double_alt, bucket) << "Double alternate should give original bucket"; + } + } +} \ No newline at end of file From 577ee71e710f18297b0affdcb8f22872bbba1d9b Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Thu, 22 Jan 2026 10:42:41 +0000 Subject: [PATCH 2/7] feat: Implement CF.RESERVE command with bucket-based storage --- src/commands/commander.h | 1 + src/types/redis_cuckoo_chain.cc | 11 ++++------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/commands/commander.h b/src/commands/commander.h index 8b012ff27f1..d34129f58c4 100644 --- a/src/commands/commander.h +++ b/src/commands/commander.h @@ -92,6 +92,7 @@ enum class CommandCategory : uint8_t { Bit, BloomFilter, Cluster, + CuckooFilter, Function, Geo, Hash, diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 5f75e57fdc8..803fd5ffb7d 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -20,10 +20,10 @@ #include "redis_cuckoo_chain.h" -#include - #include +#include "logging.h" + #include "cuckoo_filter.h" namespace redis { @@ -84,11 +84,8 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key // Calculate the number of buckets needed for this filter uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(capacity, bucket_size); - LOG(INFO) << "Creating cuckoo filter with capacity=" << capacity - << ", bucket_size=" << bucket_size - << ", num_buckets=" << num_buckets - << ", max_iterations=" << max_iterations - << ", expansion=" << static_cast(expansion); + info("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", + capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion)); // Create a write batch for atomic operation auto batch = storage_->GetWriteBatchBase(); From 77b4c4ad612d907da268dfc8947a1edb7507bde6 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Fri, 23 Jan 2026 10:16:45 +0000 Subject: [PATCH 3/7] feat: Implement CF.RESERVE command with bucket-based storage --- src/types/cuckoo_filter.h | 41 +++++++++++--- src/types/redis_cuckoo_chain.cc | 9 ++- tests/cppunit/types/cuckoo_filter_test.cc | 68 +++++++++++++++++++---- 3 files changed, 98 insertions(+), 20 deletions(-) diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index f305f8c4480..eb10eba4758 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -25,12 +25,20 @@ #include #include +#include "vendor/murmurhash2.h" + namespace redis { // Cuckoo filter implementation from the paper: // "Cuckoo Filter: Practically Better Than Bloom" by Fan et al. // This is a bucket-based storage implementation where each bucket is stored // as an independent key-value pair in RocksDB +// +// Hash calculation follows RedisBloom's design: +// - fp = hash % 255 + 1 (fingerprint, non-zero, range: 1-255) +// - h1 = hash (primary hash) +// - h2 = h1 ^ (fp * 0x5bd1e995) (alternate hash via XOR) +// - bucket_index = hash % num_buckets (only apply modulo when indexing) class CuckooFilter { public: // Calculate the optimal number of buckets for the filter @@ -44,18 +52,37 @@ class CuckooFilter { return power; } - // Generate fingerprint from hash (8-bit fingerprint, non-zero) + // Generate fingerprint from hash (8-bit fingerprint, non-zero, range: 1-255) + // Following RedisBloom: fp = hash % 255 + 1 static uint8_t GenerateFingerprint(uint64_t hash) { - uint8_t fp = hash & 0xFF; - return fp == 0 ? 1 : fp; // Ensure non-zero fingerprint + return static_cast(hash % 255 + 1); + } + + // Calculate alternate hash using XOR (following RedisBloom) + // h2 = h1 ^ (fp * 0x5bd1e995) + // This preserves symmetry: GetAltHash(fp, GetAltHash(fp, h)) == h + static uint64_t GetAltHash(uint8_t fingerprint, uint64_t hash) { + return hash ^ (static_cast(fingerprint) * 0x5bd1e995); } - // Calculate alternate bucket index using XOR + // Legacy function for backward compatibility with tests + // Converts bucket index to hash, applies GetAltHash, then converts back to bucket index static uint32_t GetAltBucketIndex(uint32_t bucket_idx, uint8_t fingerprint, uint32_t num_buckets) { - // Use a simple hash of the fingerprint for the XOR operation - uint32_t fp_hash = fingerprint * 0x5bd1e995; // MurmurHash2 constant - return (bucket_idx ^ fp_hash) % num_buckets; + // Treat bucket_idx as a hash value for the calculation + uint64_t hash = bucket_idx; + uint64_t alt_hash = GetAltHash(fingerprint, hash); + // Convert back to bucket index + return static_cast(alt_hash % num_buckets); } + + // Compute hash for a given item using MurmurHash2 (compatible with RedisBloom) + // This is the entry point for hashing items before they are inserted/checked in the filter + static uint64_t Hash(const char* data, size_t length) { + return HllMurMurHash64A(data, static_cast(length), 0); + } + + // Convenience overload for std::string + static uint64_t Hash(const std::string& item) { return Hash(item.data(), item.size()); } }; } // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 803fd5ffb7d..952af9f2a9a 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -63,8 +63,10 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key std::string ns_key = AppendNamespacePrefix(user_key); // Check if the key already exists - CuckooChainMetadata metadata; - rocksdb::Status s = getCuckooChainMetadata(ctx, ns_key, &metadata); + // Read without snapshot to ensure we see any committed data + std::string raw_value; + rocksdb::ReadOptions read_options; // No snapshot + auto s = storage_->Get(ctx, read_options, metadata_cf_handle_, ns_key, &raw_value); if (s.ok()) { return rocksdb::Status::InvalidArgument("the key already exists"); } @@ -72,6 +74,9 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return s; // Return other errors } + // Initialize metadata for the new cuckoo filter + CuckooChainMetadata metadata; + // Initialize metadata for the new cuckoo filter metadata.size = 0; metadata.base_capacity = capacity; diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 8a4a43c0f8f..69183713cd8 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -29,7 +29,7 @@ class RedisCuckooFilterTest : public TestBase { protected: explicit RedisCuckooFilterTest() : TestBase() { - cuckoo_ = std::make_unique(storage_.get(), namespace_); + cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); } ~RedisCuckooFilterTest() override = default; @@ -123,25 +123,71 @@ TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { } TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { - // Test fingerprint generation ensures non-zero values - for (uint64_t hash = 0; hash < 256; ++hash) { + // Test fingerprint generation ensures non-zero values in range [1, 255] + // Following RedisBloom: fp = hash % 255 + 1 + for (uint64_t hash = 0; hash < 1000; ++hash) { uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash); - ASSERT_NE(fp, 0) << "Fingerprint should never be zero"; + ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; + ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; } + + // Verify the formula: fp = hash % 255 + 1 + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(0), 1); + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(254), 255); + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(255), 1); + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(256), 2); } TEST_F(RedisCuckooFilterTest, AlternateBucketCalculation) { uint32_t num_buckets = 1024; - // Test that alternate bucket is different from original - for (uint32_t bucket = 0; bucket < 100; ++bucket) { + // Test GetAltHash symmetry at hash level (following RedisBloom design) + // h2 = GetAltHash(fp, h1) + // h1 = GetAltHash(fp, h2) <- this is the symmetry property + for (uint64_t hash = 0; hash < 100; ++hash) { for (uint8_t fp = 1; fp < 10; ++fp) { - uint32_t alt_bucket = redis::CuckooFilter::GetAltBucketIndex(bucket, fp, num_buckets); - ASSERT_LT(alt_bucket, num_buckets) << "Alternate bucket out of range"; + uint64_t alt_hash = redis::CuckooFilter::GetAltHash(fp, hash); + + // Applying GetAltHash twice should return original hash + uint64_t double_alt_hash = redis::CuckooFilter::GetAltHash(fp, alt_hash); + ASSERT_EQ(double_alt_hash, hash) << "Double alternate hash should give original hash"; - // Applying alternate twice should give back original - uint32_t double_alt = redis::CuckooFilter::GetAltBucketIndex(alt_bucket, fp, num_buckets); - ASSERT_EQ(double_alt, bucket) << "Double alternate should give original bucket"; + // Both hashes should map to valid bucket indices + uint32_t bucket1 = hash % num_buckets; + uint32_t bucket2 = alt_hash % num_buckets; + ASSERT_LT(bucket1, num_buckets) << "Bucket 1 out of range"; + ASSERT_LT(bucket2, num_buckets) << "Bucket 2 out of range"; } } +} + +TEST_F(RedisCuckooFilterTest, HashFunction) { + // Test that Hash function produces consistent 64-bit values + std::string test_item = "hello"; + uint64_t hash1 = redis::CuckooFilter::Hash(test_item); + uint64_t hash2 = redis::CuckooFilter::Hash(test_item.data(), test_item.size()); + + // Both methods should produce the same result + ASSERT_EQ(hash1, hash2) << "Hash methods should be consistent"; + + // Hash should be deterministic + uint64_t hash3 = redis::CuckooFilter::Hash(test_item); + ASSERT_EQ(hash1, hash3) << "Hash should be deterministic"; + + // Different items should produce different hashes (with high probability) + uint64_t hash_world = redis::CuckooFilter::Hash("world"); + ASSERT_NE(hash1, hash_world) << "Different items should have different hashes"; + + // Empty string produces hash value 0 (this is expected with MurmurHash) + uint64_t hash_empty = redis::CuckooFilter::Hash(""); + ASSERT_EQ(hash_empty, 0) << "Empty string should produce hash value 0 with MurmurHash"; + + // Even with hash=0, fingerprint should be non-zero + uint8_t fp_empty = redis::CuckooFilter::GenerateFingerprint(hash_empty); + ASSERT_EQ(fp_empty, 1) << "Fingerprint of hash=0 should be 1 (0 % 255 + 1)"; + + // Test that hash can be used with fingerprint generation + uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash1); + ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; + ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; } \ No newline at end of file From 025ee864df24a0e3f7504e1f204b763a3d0e9683 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Fri, 23 Jan 2026 10:58:29 +0000 Subject: [PATCH 4/7] feat: Implement CF.RESERVE command with bucket-based storage --- src/types/redis_cuckoo_chain.cc | 6 + tests/cppunit/types/cuckoo_filter_test.cc | 172 +++++++++++++++++++++- 2 files changed, 172 insertions(+), 6 deletions(-) diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 952af9f2a9a..8ab69e38555 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -52,6 +52,12 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); } + // RedisBloom requires minimum capacity to ensure at least one bucket can be created + // With load factor 0.955, capacity=1 and bucket_size=4 results in 0 buckets + if (capacity < 2) { + return rocksdb::Status::InvalidArgument("capacity must be at least 2"); + } + if (bucket_size == 0 || bucket_size > 255) { return rocksdb::Status::InvalidArgument("bucket_size must be between 1 and 255"); } diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 69183713cd8..56000f930d6 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -31,14 +31,17 @@ class RedisCuckooFilterTest : public TestBase { explicit RedisCuckooFilterTest() : TestBase() { cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); } - ~RedisCuckooFilterTest() override = default; - - void SetUp() override { - key_ = "test_cuckoo_filter_key"; + ~RedisCuckooFilterTest() override { + // Ensure cuckoo_ is destroyed before storage_ + cuckoo_.reset(); } - void TearDown() override { - [[maybe_unused]] auto s = cuckoo_->Del(*ctx_, key_); + void SetUp() override { + // Use a unique key for each test to avoid conflicts + // Include test name to make debugging easier + const ::testing::TestInfo* const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + key_ = std::string("cf_test_") + test_info->name(); } std::unique_ptr cuckoo_; @@ -190,4 +193,161 @@ TEST_F(RedisCuckooFilterTest, HashFunction) { uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash1); ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; +} + + +TEST_F(RedisCuckooFilterTest, ReserveTooSmallCapacity) { + // Test capacity = 1 (too small, following RedisBloom behavior) + // With load factor 0.955 and bucket_size=4, this would result in 0 buckets + auto s = cuckoo_->Reserve(*ctx_, key_, 1, 4, 500, 2); + ASSERT_FALSE(s.ok()) << "Should reject capacity = 1"; + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_NE(s.ToString().find("at least 2"), std::string::npos) << "Error message should mention minimum capacity"; +} + +TEST_F(RedisCuckooFilterTest, ReserveBucketSizeBoundary) { + // Test valid upper boundary (bucket_size is uint8_t, max = 255) + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 255, 500, 2); + ASSERT_TRUE(s.ok()) << "bucket_size=255 should be valid"; + + // Test bucket_size = 1 (minimum valid value) + s = cuckoo_->Reserve(*ctx_, "key_bs1", 1000, 1, 500, 2); + ASSERT_TRUE(s.ok()) << "bucket_size=1 should be valid"; + + // Test common power-of-2 bucket sizes + std::vector valid_sizes = {2, 4, 8, 16, 32, 64, 128}; + for (size_t i = 0; i < valid_sizes.size(); ++i) { + std::string test_key = "key_bs_" + std::to_string(valid_sizes[i]); + s = cuckoo_->Reserve(*ctx_, test_key, 1000, valid_sizes[i], 500, 2); + ASSERT_TRUE(s.ok()) << "bucket_size=" << static_cast(valid_sizes[i]) << " should be valid"; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { + uint64_t capacity = 1000; + uint8_t bucket_size = 4; + uint16_t max_iterations = 500; + uint8_t expansion = 2; + + // Create the filter + auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << "First reserve should succeed"; + + // Verify metadata was stored by trying to reserve again with same key + // This should fail with "already exists" error + s = cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion); + ASSERT_FALSE(s.ok()) << "Second reserve with same key should fail"; + ASSERT_TRUE(s.IsInvalidArgument()) << "Should return InvalidArgument error"; + ASSERT_NE(s.ToString().find("already exists"), std::string::npos) + << "Error message should mention 'already exists'"; + + // Verify we can still create filters with different keys + s = cuckoo_->Reserve(*ctx_, "different_key", capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << "Should be able to create filter with different key"; + + // Verify the original key still exists (can't create it again) + s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + ASSERT_FALSE(s.ok()) << "Original key should still exist"; + ASSERT_NE(s.ToString().find("already exists"), std::string::npos); +} + + +TEST_F(RedisCuckooFilterTest, ReserveNoExpansion) { + // expansion=0 means no auto-expansion when filter is full + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 0); + ASSERT_TRUE(s.ok()) << "expansion=0 should be valid (no auto-growth)"; + + // Verify different expansion values + std::vector expansions = {0, 1, 2, 4, 8}; + for (size_t i = 0; i < expansions.size(); ++i) { + std::string test_key = "key_exp_" + std::to_string(expansions[i]); + s = cuckoo_->Reserve(*ctx_, test_key, 1000, 4, 500, expansions[i]); + ASSERT_TRUE(s.ok()) << "expansion=" << static_cast(expansions[i]) << " should be valid"; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveLargeCapacity) { + // Test with very large capacity + uint64_t large_capacity = 10000000; // 10 million + auto s = cuckoo_->Reserve(*ctx_, key_, large_capacity, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Should handle large capacity"; + + // Verify num_buckets calculation doesn't overflow + uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(large_capacity, 4); + ASSERT_GT(num_buckets, 0) << "Should not overflow to 0"; + ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Should be power of 2"; + + // Test even larger capacity (100 million) + uint64_t huge_capacity = 100000000; + s = cuckoo_->Reserve(*ctx_, "huge_key", huge_capacity, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Should handle 100M capacity"; + + num_buckets = redis::CuckooFilter::OptimalNumBuckets(huge_capacity, 4); + ASSERT_GT(num_buckets, 0) << "Should not overflow with 100M capacity"; +} + +TEST_F(RedisCuckooFilterTest, ReserveMaxIterationsBoundary) { + // Test different max_iterations values + std::vector iterations = {1, 10, 100, 500, 1000, 5000, 65535}; + + for (size_t i = 0; i < iterations.size(); ++i) { + std::string test_key = "key_iter_" + std::to_string(iterations[i]); + auto s = cuckoo_->Reserve(*ctx_, test_key, 1000, 4, iterations[i], 2); + ASSERT_TRUE(s.ok()) << "max_iterations=" << iterations[i] << " should be valid"; + } + + // Test max_iterations = 0 (should fail) + auto s = cuckoo_->Reserve(*ctx_, "iter_zero", 1000, 4, 0, 2); + ASSERT_FALSE(s.ok()) << "max_iterations=0 should fail"; +} + +TEST_F(RedisCuckooFilterTest, ReserveEdgeCaseCapacities) { + // Test minimum valid capacity + auto s = cuckoo_->Reserve(*ctx_, "min_cap", 2, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "capacity=2 should be valid (minimum)"; + + // Test small but valid capacities + std::vector small_capacities = {2, 3, 4, 5, 10, 50, 100}; + for (size_t i = 0; i < small_capacities.size(); ++i) { + std::string test_key = "small_" + std::to_string(small_capacities[i]); + s = cuckoo_->Reserve(*ctx_, test_key, small_capacities[i], 4, 500, 2); + ASSERT_TRUE(s.ok()) << "capacity=" << small_capacities[i] << " should be valid"; + + // Verify at least one bucket is created + uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(small_capacities[i], 4); + ASSERT_GE(num_buckets, 1) << "Should have at least 1 bucket for capacity=" << small_capacities[i]; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveParameterCombinations) { + // Test various parameter combinations to ensure robustness + struct TestCase { + uint64_t capacity; + uint8_t bucket_size; + uint16_t max_iterations; + uint8_t expansion; + bool should_succeed; + std::string description; + }; + + std::vector test_cases = { + {1000, 4, 500, 2, true, "Standard parameters"}, + {2, 1, 1, 0, true, "Minimum all parameters"}, + {100000, 255, 65535, 255, true, "Maximum all parameters"}, + {1000, 2, 100, 1, true, "Small bucket, moderate iterations"}, + {50000, 8, 1000, 4, true, "Large capacity, large bucket"}, + {10, 16, 50, 0, true, "Small capacity, large bucket, no expansion"}, + }; + + for (size_t i = 0; i < test_cases.size(); ++i) { + const auto& tc = test_cases[i]; + std::string test_key = "combo_" + std::to_string(i); + auto s = cuckoo_->Reserve(*ctx_, test_key, tc.capacity, tc.bucket_size, tc.max_iterations, tc.expansion); + + if (tc.should_succeed) { + ASSERT_TRUE(s.ok()) << "Test case failed: " << tc.description; + } else { + ASSERT_FALSE(s.ok()) << "Test case should have failed: " << tc.description; + } + } } \ No newline at end of file From 4784725aeddab0ce761a3cd9b24e8b9c4a09b5d7 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Fri, 27 Feb 2026 11:05:56 +0000 Subject: [PATCH 5/7] 3122 cf fix log error --- src/types/redis_cuckoo_chain.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 8ab69e38555..72cc5b75cf4 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -95,7 +95,7 @@ rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key // Calculate the number of buckets needed for this filter uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(capacity, bucket_size); - info("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", + INFO("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion)); // Create a write batch for atomic operation From 56ef3de721b5f7110786889d50ea7acb0e6913d9 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Fri, 6 Mar 2026 11:52:18 +0000 Subject: [PATCH 6/7] 3122 cf fix type --- src/types/cuckoo_filter.h | 2 +- tests/cppunit/types/cuckoo_filter_test.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index eb10eba4758..153a4e9a176 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -44,7 +44,7 @@ class CuckooFilter { // Calculate the optimal number of buckets for the filter static uint32_t OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size) { // A load factor of 95.5% is chosen for the cuckoo filter - uint32_t num_buckets = static_cast(capacity / bucket_size / 0.955); + auto num_buckets = static_cast(capacity / bucket_size / 0.955); // Round up to next power of 2 for better hash distribution if (num_buckets == 0) num_buckets = 1; uint32_t power = 1; diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 56000f930d6..58d96970ce0 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -121,7 +121,7 @@ TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Number of buckets should be power of 2"; // Should be able to hold the capacity with 95.5% load factor - uint32_t expected_min = static_cast(capacity / bucket_size / 0.955); + auto expected_min = static_cast(capacity / bucket_size / 0.955); ASSERT_GE(num_buckets, expected_min) << "Number of buckets too small for capacity"; } From ac1cb2a26ff40aa2673ca39cdcfa363735648b0a Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Fri, 6 Mar 2026 12:08:04 +0000 Subject: [PATCH 7/7] 3122 code format --- src/commands/cmd_cuckoo_filter.cc | 3 +-- src/storage/redis_metadata.cc | 1 - src/storage/redis_metadata.h | 12 ++++++++---- src/types/cuckoo_filter.h | 8 ++------ src/types/redis_cuckoo_chain.cc | 7 +++---- src/types/redis_cuckoo_chain.h | 4 ++-- tests/cppunit/types/cuckoo_filter_test.cc | 9 +++------ 7 files changed, 19 insertions(+), 25 deletions(-) diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc index eb5e6ce3c94..8be12386ecf 100644 --- a/src/commands/cmd_cuckoo_filter.cc +++ b/src/commands/cmd_cuckoo_filter.cc @@ -103,7 +103,6 @@ class CommandCFReserve : public Commander { }; // Register the CF.RESERVE command -REDIS_REGISTER_COMMANDS(CuckooFilter, - MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1)) +REDIS_REGISTER_COMMANDS(CuckooFilter, MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1)) } // namespace redis \ No newline at end of file diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index 5f44b151ab6..f385aa6fe24 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -570,7 +570,6 @@ rocksdb::Status TimeSeriesMetadata::Decode(Slice *input) { return rocksdb::Status::OK(); } - void CuckooChainMetadata::Encode(std::string *dst) const { Metadata::Encode(dst); diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index 8d8b5b21666..7d6e1c61575 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -59,8 +59,8 @@ enum RedisType : uint8_t { }; inline constexpr const std::array RedisTypeNames = { - "none", "string", "hash", "list", "set", "zset", "bitmap", - "sortedint", "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "cuckoofilter"}; + "none", "string", "hash", "list", "set", "zset", "bitmap", "sortedint", + "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "cuckoofilter"}; struct RedisTypes { RedisTypes(std::initializer_list list) { @@ -330,8 +330,12 @@ class CuckooChainMetadata : public Metadata { explicit CuckooChainMetadata(bool generate_version = true) : Metadata(kRedisCuckooFilter, generate_version), - n_filters(0), expansion(0), base_capacity(0), - bucket_size(0), max_iterations(0), num_deleted_items(0) {} + n_filters(0), + expansion(0), + base_capacity(0), + bucket_size(0), + max_iterations(0), + num_deleted_items(0) {} void Encode(std::string *dst) const override; using Metadata::Decode; diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h index 153a4e9a176..52b159d4469 100644 --- a/src/types/cuckoo_filter.h +++ b/src/types/cuckoo_filter.h @@ -54,9 +54,7 @@ class CuckooFilter { // Generate fingerprint from hash (8-bit fingerprint, non-zero, range: 1-255) // Following RedisBloom: fp = hash % 255 + 1 - static uint8_t GenerateFingerprint(uint64_t hash) { - return static_cast(hash % 255 + 1); - } + static uint8_t GenerateFingerprint(uint64_t hash) { return static_cast(hash % 255 + 1); } // Calculate alternate hash using XOR (following RedisBloom) // h2 = h1 ^ (fp * 0x5bd1e995) @@ -77,9 +75,7 @@ class CuckooFilter { // Compute hash for a given item using MurmurHash2 (compatible with RedisBloom) // This is the entry point for hashing items before they are inserted/checked in the filter - static uint64_t Hash(const char* data, size_t length) { - return HllMurMurHash64A(data, static_cast(length), 0); - } + static uint64_t Hash(const char* data, size_t length) { return HllMurMurHash64A(data, static_cast(length), 0); } // Convenience overload for std::string static uint64_t Hash(const std::string& item) { return Hash(item.data(), item.size()); } diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc index 72cc5b75cf4..edf2b64380d 100644 --- a/src/types/redis_cuckoo_chain.cc +++ b/src/types/redis_cuckoo_chain.cc @@ -22,9 +22,8 @@ #include -#include "logging.h" - #include "cuckoo_filter.h" +#include "logging.h" namespace redis { @@ -33,8 +32,8 @@ rocksdb::Status CuckooChain::getCuckooChainMetadata(engine::Context &ctx, const return Database::GetMetadata(ctx, {kRedisCuckooFilter}, ns_key, metadata); } -std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, - uint16_t filter_index, uint32_t bucket_index) { +std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t bucket_index) { // Create a sub-key that includes both filter index and bucket index std::string sub_key; PutFixed16(&sub_key, filter_index); diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h index f606f5f8c39..41de5559b9c 100644 --- a/src/types/redis_cuckoo_chain.h +++ b/src/types/redis_cuckoo_chain.h @@ -46,8 +46,8 @@ class CuckooChain : public Database { // Generate key for a specific bucket in bucket-based storage // Format: cf:{namespace}:{user_key}:{filter_index}:{bucket_index} - std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, - uint16_t filter_index, uint32_t bucket_index); + std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t bucket_index); }; } // namespace redis \ No newline at end of file diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc index 58d96970ce0..75f15ea092d 100644 --- a/tests/cppunit/types/cuckoo_filter_test.cc +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -19,6 +19,7 @@ */ #include + #include #include "storage/redis_db.h" @@ -39,8 +40,7 @@ class RedisCuckooFilterTest : public TestBase { void SetUp() override { // Use a unique key for each test to avoid conflicts // Include test name to make debugging easier - const ::testing::TestInfo* const test_info = - ::testing::UnitTest::GetInstance()->current_test_info(); + const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); key_ = std::string("cf_test_") + test_info->name(); } @@ -195,7 +195,6 @@ TEST_F(RedisCuckooFilterTest, HashFunction) { ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; } - TEST_F(RedisCuckooFilterTest, ReserveTooSmallCapacity) { // Test capacity = 1 (too small, following RedisBloom behavior) // With load factor 0.955 and bucket_size=4, this would result in 0 buckets @@ -238,8 +237,7 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { s = cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion); ASSERT_FALSE(s.ok()) << "Second reserve with same key should fail"; ASSERT_TRUE(s.IsInvalidArgument()) << "Should return InvalidArgument error"; - ASSERT_NE(s.ToString().find("already exists"), std::string::npos) - << "Error message should mention 'already exists'"; + ASSERT_NE(s.ToString().find("already exists"), std::string::npos) << "Error message should mention 'already exists'"; // Verify we can still create filters with different keys s = cuckoo_->Reserve(*ctx_, "different_key", capacity, bucket_size, max_iterations, expansion); @@ -251,7 +249,6 @@ TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { ASSERT_NE(s.ToString().find("already exists"), std::string::npos); } - TEST_F(RedisCuckooFilterTest, ReserveNoExpansion) { // expansion=0 means no auto-expansion when filter is full auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 0);