diff --git a/src/commands/cmd_cuckoo_filter.cc b/src/commands/cmd_cuckoo_filter.cc new file mode 100644 index 00000000000..8be12386ecf --- /dev/null +++ b/src/commands/cmd_cuckoo_filter.cc @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "command_parser.h" +#include "commander.h" +#include "error_constants.h" +#include "server/server.h" +#include "types/redis_cuckoo_chain.h" + +namespace redis { + +class CommandCFReserve : public Commander { + public: + Status Parse(const std::vector &args) override { + // CF.RESERVE key capacity [BUCKETSIZE bs] [MAXITERATIONS mi] [EXPANSION ex] + if (args.size() < 3) { + return {Status::RedisParseErr, "wrong number of arguments"}; + } + + // Parse capacity (required) + auto parse_capacity = ParseInt(args[2], 10); + if (!parse_capacity) { + return {Status::RedisParseErr, "invalid capacity"}; + } + capacity_ = *parse_capacity; + if (capacity_ <= 0) { + return {Status::RedisParseErr, "capacity must be larger than 0"}; + } + + // Parse optional parameters + CommandParser parser(args, 3); + while (parser.Good()) { + if (parser.EatEqICase("BUCKETSIZE")) { + auto parse_bucket_size = parser.TakeInt(); + if (!parse_bucket_size.IsOK()) { + return {Status::RedisParseErr, "invalid bucket size"}; + } + bucket_size_ = parse_bucket_size.GetValue(); + if (bucket_size_ == 0 || bucket_size_ > 255) { + return {Status::RedisParseErr, "bucket size must be between 1 and 255"}; + } + } else if (parser.EatEqICase("MAXITERATIONS")) { + auto parse_max_iterations = parser.TakeInt(); + if (!parse_max_iterations.IsOK()) { + return {Status::RedisParseErr, "invalid max iterations"}; + } + max_iterations_ = parse_max_iterations.GetValue(); + if (max_iterations_ == 0) { + return {Status::RedisParseErr, "max iterations must be larger than 0"}; + } + } else if (parser.EatEqICase("EXPANSION")) { + auto parse_expansion = parser.TakeInt(); + if (!parse_expansion.IsOK()) { + return {Status::RedisParseErr, "invalid expansion factor"}; + } + expansion_ = parse_expansion.GetValue(); + } else { + return {Status::RedisParseErr, errInvalidSyntax}; + } + } + + return Commander::Parse(args); + } + + Status Execute(engine::Context &ctx, Server *srv, Connection *conn, std::string *output) override { + redis::CuckooChain cuckoo_db(srv->storage, conn->GetNamespace()); + auto s = cuckoo_db.Reserve(ctx, args_[1], capacity_, bucket_size_, max_iterations_, expansion_); + + if (!s.ok()) { + if (s.IsInvalidArgument()) { + // Return error message to client + return {Status::RedisExecErr, s.ToString()}; + } + return {Status::RedisExecErr, "failed to create cuckoo filter"}; + } + + *output = redis::SimpleString("OK"); + return Status::OK(); + } + + private: + uint64_t capacity_ = kCFDefaultCapacity; + uint8_t bucket_size_ = kCFDefaultBucketSize; + uint16_t max_iterations_ = kCFDefaultMaxIterations; + uint8_t expansion_ = kCFDefaultExpansion; +}; + +// Register the CF.RESERVE command +REDIS_REGISTER_COMMANDS(CuckooFilter, MakeCmdAttr("cf.reserve", -3, "write", 1, 1, 1)) + +} // namespace redis \ No newline at end of file diff --git a/src/commands/commander.h b/src/commands/commander.h index 3f38db02580..cee52bbdba3 100644 --- a/src/commands/commander.h +++ b/src/commands/commander.h @@ -96,6 +96,7 @@ enum class CommandCategory : uint8_t { Bit, BloomFilter, Cluster, + CuckooFilter, Function, Geo, Hash, diff --git a/src/storage/redis_metadata.cc b/src/storage/redis_metadata.cc index 692f8804db1..f385aa6fe24 100644 --- a/src/storage/redis_metadata.cc +++ b/src/storage/redis_metadata.cc @@ -569,3 +569,46 @@ rocksdb::Status TimeSeriesMetadata::Decode(Slice *input) { return rocksdb::Status::OK(); } + +void CuckooChainMetadata::Encode(std::string *dst) const { + Metadata::Encode(dst); + + PutFixed16(dst, n_filters); + PutFixed16(dst, expansion); + PutFixed64(dst, base_capacity); + PutFixed8(dst, bucket_size); + PutFixed16(dst, max_iterations); + PutFixed64(dst, num_deleted_items); +} + +rocksdb::Status CuckooChainMetadata::Decode(Slice *input) { + if (auto s = Metadata::Decode(input); !s.ok()) { + return s; + } + + if (input->size() < 21) { + return rocksdb::Status::InvalidArgument(kErrMetadataTooShort); + } + + GetFixed16(input, &n_filters); + GetFixed16(input, &expansion); + GetFixed64(input, &base_capacity); + GetFixed8(input, &bucket_size); + GetFixed16(input, &max_iterations); + GetFixed64(input, &num_deleted_items); + + return rocksdb::Status::OK(); +} + +uint64_t CuckooChainMetadata::GetTotalCapacity() const { + if (expansion == 0 || n_filters == 1) { + return base_capacity; + } + + // Calculate total capacity across all filters + uint64_t total = 0; + for (uint16_t i = 0; i < n_filters; i++) { + total += base_capacity * std::pow(expansion, i); + } + return total; +} diff --git a/src/storage/redis_metadata.h b/src/storage/redis_metadata.h index fd80e5a5ba0..7d6e1c61575 100644 --- a/src/storage/redis_metadata.h +++ b/src/storage/redis_metadata.h @@ -54,12 +54,13 @@ enum RedisType : uint8_t { kRedisHyperLogLog = 11, kRedisTDigest = 12, kRedisTimeSeries = 13, + kRedisCuckooFilter = 14, kRedisTypeMax }; inline constexpr const std::array RedisTypeNames = { - "none", "string", "hash", "list", "set", "zset", "bitmap", - "sortedint", "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries"}; + "none", "string", "hash", "list", "set", "zset", "bitmap", "sortedint", + "stream", "MBbloom--", "ReJSON-RL", "hyperloglog", "TDIS-TYPE", "timeseries", "cuckoofilter"}; struct RedisTypes { RedisTypes(std::initializer_list list) { @@ -306,6 +307,44 @@ class BloomChainMetadata : public Metadata { bool IsScaling() const { return expansion != 0; }; }; +class CuckooChainMetadata : public Metadata { + public: + /// The number of sub-filters in the chain + uint16_t n_filters; + + /// Expansion factor for new filters + /// When a filter is full, a new one is created with capacity = base_capacity * expansion^n + uint16_t expansion; + + /// The capacity of the first filter + uint64_t base_capacity; + + /// Number of fingerprints per bucket + uint8_t bucket_size; + + /// Maximum number of cuckoo kicks before considering filter full + uint16_t max_iterations; + + /// Track number of deleted items for maintenance + uint64_t num_deleted_items; + + explicit CuckooChainMetadata(bool generate_version = true) + : Metadata(kRedisCuckooFilter, generate_version), + n_filters(0), + expansion(0), + base_capacity(0), + bucket_size(0), + max_iterations(0), + num_deleted_items(0) {} + + void Encode(std::string *dst) const override; + using Metadata::Decode; + rocksdb::Status Decode(Slice *input) override; + + uint64_t GetTotalCapacity() const; + bool IsScaling() const { return expansion > 0; } +}; + enum class JsonStorageFormat : uint8_t { JSON = 0, CBOR = 1, diff --git a/src/types/cuckoo_filter.h b/src/types/cuckoo_filter.h new file mode 100644 index 00000000000..52b159d4469 --- /dev/null +++ b/src/types/cuckoo_filter.h @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include +#include +#include +#include + +#include "vendor/murmurhash2.h" + +namespace redis { + +// Cuckoo filter implementation from the paper: +// "Cuckoo Filter: Practically Better Than Bloom" by Fan et al. +// This is a bucket-based storage implementation where each bucket is stored +// as an independent key-value pair in RocksDB +// +// Hash calculation follows RedisBloom's design: +// - fp = hash % 255 + 1 (fingerprint, non-zero, range: 1-255) +// - h1 = hash (primary hash) +// - h2 = h1 ^ (fp * 0x5bd1e995) (alternate hash via XOR) +// - bucket_index = hash % num_buckets (only apply modulo when indexing) +class CuckooFilter { + public: + // Calculate the optimal number of buckets for the filter + static uint32_t OptimalNumBuckets(uint64_t capacity, uint8_t bucket_size) { + // A load factor of 95.5% is chosen for the cuckoo filter + auto num_buckets = static_cast(capacity / bucket_size / 0.955); + // Round up to next power of 2 for better hash distribution + if (num_buckets == 0) num_buckets = 1; + uint32_t power = 1; + while (power < num_buckets) power <<= 1; + return power; + } + + // Generate fingerprint from hash (8-bit fingerprint, non-zero, range: 1-255) + // Following RedisBloom: fp = hash % 255 + 1 + static uint8_t GenerateFingerprint(uint64_t hash) { return static_cast(hash % 255 + 1); } + + // Calculate alternate hash using XOR (following RedisBloom) + // h2 = h1 ^ (fp * 0x5bd1e995) + // This preserves symmetry: GetAltHash(fp, GetAltHash(fp, h)) == h + static uint64_t GetAltHash(uint8_t fingerprint, uint64_t hash) { + return hash ^ (static_cast(fingerprint) * 0x5bd1e995); + } + + // Legacy function for backward compatibility with tests + // Converts bucket index to hash, applies GetAltHash, then converts back to bucket index + static uint32_t GetAltBucketIndex(uint32_t bucket_idx, uint8_t fingerprint, uint32_t num_buckets) { + // Treat bucket_idx as a hash value for the calculation + uint64_t hash = bucket_idx; + uint64_t alt_hash = GetAltHash(fingerprint, hash); + // Convert back to bucket index + return static_cast(alt_hash % num_buckets); + } + + // Compute hash for a given item using MurmurHash2 (compatible with RedisBloom) + // This is the entry point for hashing items before they are inserted/checked in the filter + static uint64_t Hash(const char* data, size_t length) { return HllMurMurHash64A(data, static_cast(length), 0); } + + // Convenience overload for std::string + static uint64_t Hash(const std::string& item) { return Hash(item.data(), item.size()); } +}; + +} // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.cc b/src/types/redis_cuckoo_chain.cc new file mode 100644 index 00000000000..edf2b64380d --- /dev/null +++ b/src/types/redis_cuckoo_chain.cc @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "redis_cuckoo_chain.h" + +#include + +#include "cuckoo_filter.h" +#include "logging.h" + +namespace redis { + +rocksdb::Status CuckooChain::getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, + CuckooChainMetadata *metadata) { + return Database::GetMetadata(ctx, {kRedisCuckooFilter}, ns_key, metadata); +} + +std::string CuckooChain::getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t bucket_index) { + // Create a sub-key that includes both filter index and bucket index + std::string sub_key; + PutFixed16(&sub_key, filter_index); + PutFixed32(&sub_key, bucket_index); + + // Create the internal key using the storage encoding + std::string bucket_key = InternalKey(ns_key, sub_key, metadata.version, storage_->IsSlotIdEncoded()).Encode(); + return bucket_key; +} + +rocksdb::Status CuckooChain::Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, + uint8_t bucket_size, uint16_t max_iterations, uint8_t expansion) { + // Validate parameters + if (capacity == 0) { + return rocksdb::Status::InvalidArgument("capacity must be larger than 0"); + } + + // RedisBloom requires minimum capacity to ensure at least one bucket can be created + // With load factor 0.955, capacity=1 and bucket_size=4 results in 0 buckets + if (capacity < 2) { + return rocksdb::Status::InvalidArgument("capacity must be at least 2"); + } + + if (bucket_size == 0 || bucket_size > 255) { + return rocksdb::Status::InvalidArgument("bucket_size must be between 1 and 255"); + } + + if (max_iterations == 0) { + return rocksdb::Status::InvalidArgument("max_iterations must be larger than 0"); + } + + std::string ns_key = AppendNamespacePrefix(user_key); + + // Check if the key already exists + // Read without snapshot to ensure we see any committed data + std::string raw_value; + rocksdb::ReadOptions read_options; // No snapshot + auto s = storage_->Get(ctx, read_options, metadata_cf_handle_, ns_key, &raw_value); + if (s.ok()) { + return rocksdb::Status::InvalidArgument("the key already exists"); + } + if (!s.IsNotFound()) { + return s; // Return other errors + } + + // Initialize metadata for the new cuckoo filter + CuckooChainMetadata metadata; + + // Initialize metadata for the new cuckoo filter + metadata.size = 0; + metadata.base_capacity = capacity; + metadata.bucket_size = bucket_size; + metadata.max_iterations = max_iterations; + metadata.expansion = expansion; + metadata.n_filters = 1; + metadata.num_deleted_items = 0; + + // Calculate the number of buckets needed for this filter + uint32_t num_buckets = CuckooFilter::OptimalNumBuckets(capacity, bucket_size); + + INFO("Creating cuckoo filter with capacity={}, bucket_size={}, num_buckets={}, max_iterations={}, expansion={}", + capacity, bucket_size, num_buckets, max_iterations, static_cast(expansion)); + + // Create a write batch for atomic operation + auto batch = storage_->GetWriteBatchBase(); + WriteBatchLogData log_data(kRedisCuckooFilter, {"CF.RESERVE", user_key.ToString()}); + batch->PutLogData(log_data.Encode()); + + // Store the metadata + std::string metadata_bytes; + metadata.Encode(&metadata_bytes); + batch->Put(metadata_cf_handle_, ns_key, metadata_bytes); + + // Note: With bucket-based storage, we don't pre-allocate all buckets + // Buckets will be created lazily on first write + // This saves memory for sparse filters + + // Optionally, we could create the first few buckets to ensure the filter is ready + // But for now, we'll keep it fully lazy for maximum memory efficiency + + return storage_->Write(ctx, storage_->DefaultWriteOptions(), batch->GetWriteBatch()); +} + +} // namespace redis \ No newline at end of file diff --git a/src/types/redis_cuckoo_chain.h b/src/types/redis_cuckoo_chain.h new file mode 100644 index 00000000000..41de5559b9c --- /dev/null +++ b/src/types/redis_cuckoo_chain.h @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include "cuckoo_filter.h" +#include "storage/redis_db.h" +#include "storage/redis_metadata.h" + +namespace redis { + +// Default values for Cuckoo Filter +const uint32_t kCFDefaultCapacity = 1024; +const uint8_t kCFDefaultBucketSize = 4; // 4 fingerprints per bucket +const uint16_t kCFDefaultMaxIterations = 500; +const uint8_t kCFDefaultExpansion = 2; + +class CuckooChain : public Database { + public: + CuckooChain(engine::Storage *storage, const std::string &ns) : Database(storage, ns) {} + + // CF.RESERVE command - creates a new cuckoo filter with specified capacity + rocksdb::Status Reserve(engine::Context &ctx, const Slice &user_key, uint64_t capacity, uint8_t bucket_size, + uint16_t max_iterations, uint8_t expansion); + + private: + // Get metadata for the cuckoo filter + rocksdb::Status getCuckooChainMetadata(engine::Context &ctx, const Slice &ns_key, CuckooChainMetadata *metadata); + + // Generate key for a specific bucket in bucket-based storage + // Format: cf:{namespace}:{user_key}:{filter_index}:{bucket_index} + std::string getBucketKey(const Slice &ns_key, const CuckooChainMetadata &metadata, uint16_t filter_index, + uint32_t bucket_index); +}; + +} // namespace redis \ No newline at end of file diff --git a/tests/cppunit/types/cuckoo_filter_test.cc b/tests/cppunit/types/cuckoo_filter_test.cc new file mode 100644 index 00000000000..75f15ea092d --- /dev/null +++ b/tests/cppunit/types/cuckoo_filter_test.cc @@ -0,0 +1,350 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include + +#include + +#include "storage/redis_db.h" +#include "storage/redis_metadata.h" +#include "test_base.h" +#include "types/redis_cuckoo_chain.h" + +class RedisCuckooFilterTest : public TestBase { + protected: + explicit RedisCuckooFilterTest() : TestBase() { + cuckoo_ = std::make_unique(storage_.get(), "cuckoo_ns"); + } + ~RedisCuckooFilterTest() override { + // Ensure cuckoo_ is destroyed before storage_ + cuckoo_.reset(); + } + + void SetUp() override { + // Use a unique key for each test to avoid conflicts + // Include test name to make debugging easier + const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); + key_ = std::string("cf_test_") + test_info->name(); + } + + std::unique_ptr cuckoo_; + std::string key_; +}; + +TEST_F(RedisCuckooFilterTest, ReserveBasic) { + // Test basic reserve operation + uint64_t capacity = 1000; + uint8_t bucket_size = 4; + uint16_t max_iterations = 500; + uint8_t expansion = 2; + + auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << "Failed to reserve cuckoo filter: " << s.ToString(); +} + +TEST_F(RedisCuckooFilterTest, ReserveDuplicate) { + // First reserve should succeed + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 2); + ASSERT_TRUE(s.ok()); + + // Second reserve with same key should fail + s = cuckoo_->Reserve(*ctx_, key_, 2000, 4, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_NE(s.ToString().find("already exists"), std::string::npos); +} + +TEST_F(RedisCuckooFilterTest, ReserveInvalidParams) { + // Test with zero capacity + auto s = cuckoo_->Reserve(*ctx_, key_, 0, 4, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + + // Test with zero bucket size + s = cuckoo_->Reserve(*ctx_, "key2", 1000, 0, 500, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); + + // Test with zero max iterations + s = cuckoo_->Reserve(*ctx_, "key3", 1000, 4, 0, 2); + ASSERT_FALSE(s.ok()); + ASSERT_TRUE(s.IsInvalidArgument()); +} + +TEST_F(RedisCuckooFilterTest, ReserveVariousCapacities) { + // Test with different capacities + std::vector capacities = {100, 1000, 10000, 100000}; + + for (size_t i = 0; i < capacities.size(); ++i) { + std::string test_key = key_ + "_" + std::to_string(i); + auto s = cuckoo_->Reserve(*ctx_, test_key, capacities[i], 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Failed for capacity " << capacities[i]; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveWithDifferentBucketSizes) { + // Test with different valid bucket sizes + std::vector bucket_sizes = {1, 2, 4, 8, 16}; + + for (size_t i = 0; i < bucket_sizes.size(); ++i) { + std::string test_key = key_ + "_bucket_" + std::to_string(i); + auto s = cuckoo_->Reserve(*ctx_, test_key, 1000, bucket_sizes[i], 500, 2); + ASSERT_TRUE(s.ok()) << "Failed for bucket size " << static_cast(bucket_sizes[i]); + } +} + +TEST_F(RedisCuckooFilterTest, OptimalNumBucketsCalculation) { + // Test the static helper function + uint64_t capacity = 1000; + uint8_t bucket_size = 4; + + uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(capacity, bucket_size); + + // Should be a power of 2 + ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Number of buckets should be power of 2"; + + // Should be able to hold the capacity with 95.5% load factor + auto expected_min = static_cast(capacity / bucket_size / 0.955); + ASSERT_GE(num_buckets, expected_min) << "Number of buckets too small for capacity"; +} + +TEST_F(RedisCuckooFilterTest, FingerprintGeneration) { + // Test fingerprint generation ensures non-zero values in range [1, 255] + // Following RedisBloom: fp = hash % 255 + 1 + for (uint64_t hash = 0; hash < 1000; ++hash) { + uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash); + ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; + ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; + } + + // Verify the formula: fp = hash % 255 + 1 + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(0), 1); + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(254), 255); + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(255), 1); + ASSERT_EQ(redis::CuckooFilter::GenerateFingerprint(256), 2); +} + +TEST_F(RedisCuckooFilterTest, AlternateBucketCalculation) { + uint32_t num_buckets = 1024; + + // Test GetAltHash symmetry at hash level (following RedisBloom design) + // h2 = GetAltHash(fp, h1) + // h1 = GetAltHash(fp, h2) <- this is the symmetry property + for (uint64_t hash = 0; hash < 100; ++hash) { + for (uint8_t fp = 1; fp < 10; ++fp) { + uint64_t alt_hash = redis::CuckooFilter::GetAltHash(fp, hash); + + // Applying GetAltHash twice should return original hash + uint64_t double_alt_hash = redis::CuckooFilter::GetAltHash(fp, alt_hash); + ASSERT_EQ(double_alt_hash, hash) << "Double alternate hash should give original hash"; + + // Both hashes should map to valid bucket indices + uint32_t bucket1 = hash % num_buckets; + uint32_t bucket2 = alt_hash % num_buckets; + ASSERT_LT(bucket1, num_buckets) << "Bucket 1 out of range"; + ASSERT_LT(bucket2, num_buckets) << "Bucket 2 out of range"; + } + } +} + +TEST_F(RedisCuckooFilterTest, HashFunction) { + // Test that Hash function produces consistent 64-bit values + std::string test_item = "hello"; + uint64_t hash1 = redis::CuckooFilter::Hash(test_item); + uint64_t hash2 = redis::CuckooFilter::Hash(test_item.data(), test_item.size()); + + // Both methods should produce the same result + ASSERT_EQ(hash1, hash2) << "Hash methods should be consistent"; + + // Hash should be deterministic + uint64_t hash3 = redis::CuckooFilter::Hash(test_item); + ASSERT_EQ(hash1, hash3) << "Hash should be deterministic"; + + // Different items should produce different hashes (with high probability) + uint64_t hash_world = redis::CuckooFilter::Hash("world"); + ASSERT_NE(hash1, hash_world) << "Different items should have different hashes"; + + // Empty string produces hash value 0 (this is expected with MurmurHash) + uint64_t hash_empty = redis::CuckooFilter::Hash(""); + ASSERT_EQ(hash_empty, 0) << "Empty string should produce hash value 0 with MurmurHash"; + + // Even with hash=0, fingerprint should be non-zero + uint8_t fp_empty = redis::CuckooFilter::GenerateFingerprint(hash_empty); + ASSERT_EQ(fp_empty, 1) << "Fingerprint of hash=0 should be 1 (0 % 255 + 1)"; + + // Test that hash can be used with fingerprint generation + uint8_t fp = redis::CuckooFilter::GenerateFingerprint(hash1); + ASSERT_GE(fp, 1) << "Fingerprint should be at least 1"; + ASSERT_LE(fp, 255) << "Fingerprint should be at most 255"; +} + +TEST_F(RedisCuckooFilterTest, ReserveTooSmallCapacity) { + // Test capacity = 1 (too small, following RedisBloom behavior) + // With load factor 0.955 and bucket_size=4, this would result in 0 buckets + auto s = cuckoo_->Reserve(*ctx_, key_, 1, 4, 500, 2); + ASSERT_FALSE(s.ok()) << "Should reject capacity = 1"; + ASSERT_TRUE(s.IsInvalidArgument()); + ASSERT_NE(s.ToString().find("at least 2"), std::string::npos) << "Error message should mention minimum capacity"; +} + +TEST_F(RedisCuckooFilterTest, ReserveBucketSizeBoundary) { + // Test valid upper boundary (bucket_size is uint8_t, max = 255) + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 255, 500, 2); + ASSERT_TRUE(s.ok()) << "bucket_size=255 should be valid"; + + // Test bucket_size = 1 (minimum valid value) + s = cuckoo_->Reserve(*ctx_, "key_bs1", 1000, 1, 500, 2); + ASSERT_TRUE(s.ok()) << "bucket_size=1 should be valid"; + + // Test common power-of-2 bucket sizes + std::vector valid_sizes = {2, 4, 8, 16, 32, 64, 128}; + for (size_t i = 0; i < valid_sizes.size(); ++i) { + std::string test_key = "key_bs_" + std::to_string(valid_sizes[i]); + s = cuckoo_->Reserve(*ctx_, test_key, 1000, valid_sizes[i], 500, 2); + ASSERT_TRUE(s.ok()) << "bucket_size=" << static_cast(valid_sizes[i]) << " should be valid"; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveVerifyMetadata) { + uint64_t capacity = 1000; + uint8_t bucket_size = 4; + uint16_t max_iterations = 500; + uint8_t expansion = 2; + + // Create the filter + auto s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << "First reserve should succeed"; + + // Verify metadata was stored by trying to reserve again with same key + // This should fail with "already exists" error + s = cuckoo_->Reserve(*ctx_, key_, capacity * 2, bucket_size, max_iterations, expansion); + ASSERT_FALSE(s.ok()) << "Second reserve with same key should fail"; + ASSERT_TRUE(s.IsInvalidArgument()) << "Should return InvalidArgument error"; + ASSERT_NE(s.ToString().find("already exists"), std::string::npos) << "Error message should mention 'already exists'"; + + // Verify we can still create filters with different keys + s = cuckoo_->Reserve(*ctx_, "different_key", capacity, bucket_size, max_iterations, expansion); + ASSERT_TRUE(s.ok()) << "Should be able to create filter with different key"; + + // Verify the original key still exists (can't create it again) + s = cuckoo_->Reserve(*ctx_, key_, capacity, bucket_size, max_iterations, expansion); + ASSERT_FALSE(s.ok()) << "Original key should still exist"; + ASSERT_NE(s.ToString().find("already exists"), std::string::npos); +} + +TEST_F(RedisCuckooFilterTest, ReserveNoExpansion) { + // expansion=0 means no auto-expansion when filter is full + auto s = cuckoo_->Reserve(*ctx_, key_, 1000, 4, 500, 0); + ASSERT_TRUE(s.ok()) << "expansion=0 should be valid (no auto-growth)"; + + // Verify different expansion values + std::vector expansions = {0, 1, 2, 4, 8}; + for (size_t i = 0; i < expansions.size(); ++i) { + std::string test_key = "key_exp_" + std::to_string(expansions[i]); + s = cuckoo_->Reserve(*ctx_, test_key, 1000, 4, 500, expansions[i]); + ASSERT_TRUE(s.ok()) << "expansion=" << static_cast(expansions[i]) << " should be valid"; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveLargeCapacity) { + // Test with very large capacity + uint64_t large_capacity = 10000000; // 10 million + auto s = cuckoo_->Reserve(*ctx_, key_, large_capacity, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Should handle large capacity"; + + // Verify num_buckets calculation doesn't overflow + uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(large_capacity, 4); + ASSERT_GT(num_buckets, 0) << "Should not overflow to 0"; + ASSERT_EQ(num_buckets & (num_buckets - 1), 0) << "Should be power of 2"; + + // Test even larger capacity (100 million) + uint64_t huge_capacity = 100000000; + s = cuckoo_->Reserve(*ctx_, "huge_key", huge_capacity, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "Should handle 100M capacity"; + + num_buckets = redis::CuckooFilter::OptimalNumBuckets(huge_capacity, 4); + ASSERT_GT(num_buckets, 0) << "Should not overflow with 100M capacity"; +} + +TEST_F(RedisCuckooFilterTest, ReserveMaxIterationsBoundary) { + // Test different max_iterations values + std::vector iterations = {1, 10, 100, 500, 1000, 5000, 65535}; + + for (size_t i = 0; i < iterations.size(); ++i) { + std::string test_key = "key_iter_" + std::to_string(iterations[i]); + auto s = cuckoo_->Reserve(*ctx_, test_key, 1000, 4, iterations[i], 2); + ASSERT_TRUE(s.ok()) << "max_iterations=" << iterations[i] << " should be valid"; + } + + // Test max_iterations = 0 (should fail) + auto s = cuckoo_->Reserve(*ctx_, "iter_zero", 1000, 4, 0, 2); + ASSERT_FALSE(s.ok()) << "max_iterations=0 should fail"; +} + +TEST_F(RedisCuckooFilterTest, ReserveEdgeCaseCapacities) { + // Test minimum valid capacity + auto s = cuckoo_->Reserve(*ctx_, "min_cap", 2, 4, 500, 2); + ASSERT_TRUE(s.ok()) << "capacity=2 should be valid (minimum)"; + + // Test small but valid capacities + std::vector small_capacities = {2, 3, 4, 5, 10, 50, 100}; + for (size_t i = 0; i < small_capacities.size(); ++i) { + std::string test_key = "small_" + std::to_string(small_capacities[i]); + s = cuckoo_->Reserve(*ctx_, test_key, small_capacities[i], 4, 500, 2); + ASSERT_TRUE(s.ok()) << "capacity=" << small_capacities[i] << " should be valid"; + + // Verify at least one bucket is created + uint32_t num_buckets = redis::CuckooFilter::OptimalNumBuckets(small_capacities[i], 4); + ASSERT_GE(num_buckets, 1) << "Should have at least 1 bucket for capacity=" << small_capacities[i]; + } +} + +TEST_F(RedisCuckooFilterTest, ReserveParameterCombinations) { + // Test various parameter combinations to ensure robustness + struct TestCase { + uint64_t capacity; + uint8_t bucket_size; + uint16_t max_iterations; + uint8_t expansion; + bool should_succeed; + std::string description; + }; + + std::vector test_cases = { + {1000, 4, 500, 2, true, "Standard parameters"}, + {2, 1, 1, 0, true, "Minimum all parameters"}, + {100000, 255, 65535, 255, true, "Maximum all parameters"}, + {1000, 2, 100, 1, true, "Small bucket, moderate iterations"}, + {50000, 8, 1000, 4, true, "Large capacity, large bucket"}, + {10, 16, 50, 0, true, "Small capacity, large bucket, no expansion"}, + }; + + for (size_t i = 0; i < test_cases.size(); ++i) { + const auto& tc = test_cases[i]; + std::string test_key = "combo_" + std::to_string(i); + auto s = cuckoo_->Reserve(*ctx_, test_key, tc.capacity, tc.bucket_size, tc.max_iterations, tc.expansion); + + if (tc.should_succeed) { + ASSERT_TRUE(s.ok()) << "Test case failed: " << tc.description; + } else { + ASSERT_FALSE(s.ok()) << "Test case should have failed: " << tc.description; + } + } +} \ No newline at end of file