diff --git a/crates/core/src/host/instance_env.rs b/crates/core/src/host/instance_env.rs index 96b876a0368..0d3d41632b1 100644 --- a/crates/core/src/host/instance_env.rs +++ b/crates/core/src/host/instance_env.rs @@ -23,7 +23,7 @@ use spacetimedb_lib::{http as st_http, ConnectionId, Identity, Timestamp}; use spacetimedb_primitives::{ColId, ColList, IndexId, TableId}; use spacetimedb_sats::{ bsatn::{self, ToBsatn}, - buffer::{CountWriter, TeeWriter}, + buffer::CountWriter, AlgebraicValue, ProductValue, }; use spacetimedb_schema::identifier::Identifier; @@ -343,16 +343,16 @@ impl InstanceEnv { fn project_cols_bsatn(buffer: &mut [u8], cols: ColList, row_ref: RowRef<'_>) -> usize { // We get back a col-list with the columns with generated values. // Write those back to `buffer` and then the encoded length to `row_len`. - let counter = CountWriter::default(); - let mut writer = TeeWriter::new(counter, buffer); - for col in cols.iter() { - // Read the column value to AV and then serialize. - let val = row_ref - .read_col::(col) - .expect("reading col as AV never panics"); - bsatn::to_writer(&mut writer, &val).unwrap(); - } - writer.w1.finish() + let (_, count) = CountWriter::run(buffer, |writer| { + for col in cols.iter() { + // Read the column value to AV and then serialize. + let val = row_ref + .read_col::(col) + .expect("reading col as AV never panics"); + bsatn::to_writer(writer, &val).unwrap(); + } + }); + count } pub fn insert(&self, table_id: TableId, buffer: &mut [u8]) -> Result { diff --git a/crates/datastore/src/locking_tx_datastore/committed_state.rs b/crates/datastore/src/locking_tx_datastore/committed_state.rs index 3472fae82d5..51948d914fd 100644 --- a/crates/datastore/src/locking_tx_datastore/committed_state.rs +++ b/crates/datastore/src/locking_tx_datastore/committed_state.rs @@ -888,7 +888,8 @@ impl CommittedState { let index = table.new_index(&algo, is_unique)?; // SAFETY: `index` was derived from `table`. - unsafe { table.insert_index(blob_store, index_id, index) }; + unsafe { table.insert_index(blob_store, index_id, index) } + .expect("rebuilding should not cause constraint violations"); index_id_map.insert(index_id, table_id); } Ok(()) diff --git a/crates/sats/src/buffer.rs b/crates/sats/src/buffer.rs index 56008fa77d4..f8b03fe3a15 100644 --- a/crates/sats/src/buffer.rs +++ b/crates/sats/src/buffer.rs @@ -329,6 +329,14 @@ pub struct CountWriter { } impl CountWriter { + /// Run `work` on `writer`, but also count the number of bytes written. + pub fn run(writer: W, work: impl FnOnce(&mut TeeWriter) -> R) -> (R, usize) { + let counter = Self::default(); + let mut writer = TeeWriter::new(writer, counter); + let ret = work(&mut writer); + (ret, writer.w2.finish()) + } + /// Consumes the counter and returns the final count. pub fn finish(self) -> usize { self.num_bytes diff --git a/crates/sats/src/product_value.rs b/crates/sats/src/product_value.rs index 634b4a935cc..2df0a3a8cfc 100644 --- a/crates/sats/src/product_value.rs +++ b/crates/sats/src/product_value.rs @@ -69,6 +69,14 @@ impl From for InvalidFieldError { } impl ProductValue { + /// Pushes a single value to he product. + pub fn push(self, val: impl Into) -> Self { + let mut vals: Vec<_> = self.elements.into(); + vals.reserve(1); + vals.push(val.into()); + Self::from(vals) + } + /// Borrow the value at field of `self` identified by `col_pos`. /// /// The `name` is non-functional and is only used for error-messages. diff --git a/crates/table/benches/page_manager.rs b/crates/table/benches/page_manager.rs index 70f5f5ddb7e..10fa708e8d2 100644 --- a/crates/table/benches/page_manager.rs +++ b/crates/table/benches/page_manager.rs @@ -763,7 +763,7 @@ fn make_table_with_index(unique: bool) -> (Table, IndexId) { let algo = BTreeAlgorithm { columns: cols }.into(); let idx = tbl.new_index(&algo, unique).unwrap(); // SAFETY: index was derived from the table. - unsafe { tbl.insert_index(&NullBlobStore, index_id, idx) }; + unsafe { tbl.insert_index(&NullBlobStore, index_id, idx) }.unwrap(); (tbl, index_id) } diff --git a/crates/table/proptest-regressions/table.txt b/crates/table/proptest-regressions/table.txt index fef12ce0d07..74159bc6d9e 100644 --- a/crates/table/proptest-regressions/table.txt +++ b/crates/table/proptest-regressions/table.txt @@ -12,3 +12,4 @@ cc 1f295db61a02ac3378f5ffcceb084637d2391bcc1758af6fb2df8355a713e998 # shrinks to cc 776d142680b35d7dad5b558fea7071b095f7e6a23c8549e9b32b452d5eebf92b # shrinks to (ty, val) = (ProductType { elements: [ProductTypeElement { name: None, algebraic_type: Builtin(String) }] }, ProductValue { elements: [String("\u{16af0}a®ਲ𒒀A 𑌅 ಎ꒐𑍇A A𐫫Aⷀ𑌵ૠ\u{b55} aㄱ \u{f99}a ")] }) cc 66d99531b8e513d0fd558f492f708d110e1e117dfc7f3f42188bcc57c23bb89e # shrinks to (ty, val) = (ProductType { elements: [ProductTypeElement { name: None, algebraic_type: Builtin(Map(MapType { key_ty: Builtin(U8), ty: Builtin(Map(MapType { key_ty: Builtin(I32), ty: Builtin(F32) })) })) }] }, ProductValue { elements: [Map({U8(0): Map({I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(-0.0)), I32(-1): F32(Total(-0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0))}), U8(1): Map({I32(-5): F32(Total(0.0)), I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(-0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0)), I32(4): F32(Total(0.0)), I32(5): F32(Total(0.0)), I32(6): F32(Total(0.0)), I32(7): F32(Total(0.0))}), U8(2): Map({I32(-3): F32(Total(-0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(-0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0))}), U8(3): Map({I32(-10): F32(Total(0.0)), I32(-9): F32(Total(0.0)), I32(-8): F32(Total(-0.0)), I32(-7): F32(Total(0.0)), I32(-6): F32(Total(0.0)), I32(-5): F32(Total(0.0)), I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0))}), U8(4): Map({I32(-7): F32(Total(0.0)), I32(-6): F32(Total(0.0)), I32(-5): F32(Total(0.0)), I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0))}), U8(5): Map({I32(-9): F32(Total(0.0)), I32(-8): F32(Total(0.0)), I32(-7): F32(Total(0.0)), I32(-6): F32(Total(0.0)), I32(-5): F32(Total(0.0)), I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0)), I32(4): F32(Total(0.0)), I32(5): F32(Total(0.0))}), U8(6): Map({I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0))}), U8(7): Map({I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(-0.0)), I32(3): F32(Total(0.0))}), U8(8): Map({I32(-7): F32(Total(0.0)), I32(-6): F32(Total(-0.0)), I32(-5): F32(Total(0.0)), I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0)), I32(4): F32(Total(0.0)), I32(5): F32(Total(0.0)), I32(6): F32(Total(-0.0)), I32(7): F32(Total(0.0))}), U8(9): Map({I32(-1349171619): F32(Total(418648100.0)), I32(-665792478): F32(Total(-5.3081414e23)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0)), I32(5): F32(Total(-0.0)), I32(906732021): F32(Total(1.952517e16)), I32(1965197035): F32(Total(1020.84216))}), U8(11): Map({I32(-7): F32(Total(0.0)), I32(-6): F32(Total(0.0)), I32(-5): F32(Total(0.0)), I32(-4): F32(Total(0.0)), I32(-3): F32(Total(0.0)), I32(-2): F32(Total(0.0)), I32(-1): F32(Total(0.0)), I32(0): F32(Total(0.0)), I32(1): F32(Total(0.0)), I32(2): F32(Total(0.0)), I32(3): F32(Total(0.0)), I32(4): F32(Total(0.0)), I32(5): F32(Total(0.0)), I32(6): F32(Total(0.0))})})] }) cc 7f478c4dd0f24e715a74949c6d06af8ca2b4c8b82fae4f53c953a2b323cff851 # shrinks to (ty, val) = (ProductType { elements: [ProductTypeElement { name: None, algebraic_type: Builtin(Array(ArrayType { elem_ty: Builtin(Map(MapType { key_ty: Builtin(U64), ty: Builtin(Bool) })) })) }] }, ProductValue { elements: [Array([{U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false), U64(10): Bool(false), U64(11): Bool(false), U64(12): Bool(false), U64(13): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false), U64(10): Bool(false), U64(11): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false), U64(10): Bool(false), U64(11): Bool(false), U64(12): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false), U64(10): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false), U64(6): Bool(false), U64(7): Bool(false), U64(8): Bool(false), U64(9): Bool(false), U64(10): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false), U64(2): Bool(false), U64(3): Bool(false), U64(4): Bool(false), U64(5): Bool(false)}, {U64(0): Bool(false), U64(1): Bool(false)}])] }) +cc 01bfd4449bee7eaa0b61b60792baed8d52d3589f4a5bb313bf057194a6248a83 diff --git a/crates/table/src/table.rs b/crates/table/src/table.rs index 3540c5bdff6..bd724b8b851 100644 --- a/crates/table/src/table.rs +++ b/crates/table/src/table.rs @@ -1413,36 +1413,57 @@ impl Table { /// # Safety /// /// Caller must promise that `index` was constructed with the same row type/layout as this table. - pub unsafe fn insert_index(&mut self, blob_store: &dyn BlobStore, index_id: IndexId, mut index: TableIndex) { + pub unsafe fn insert_index( + &mut self, + blob_store: &dyn BlobStore, + index_id: IndexId, + mut index: TableIndex, + ) -> Result<(), String> { let rows = self.scan_rows(blob_store); // SAFETY: Caller promised that table's row type/layout // matches that which `index` was constructed with. // It follows that this applies to any `rows`, as required. let violation = unsafe { index.build_from_rows(rows) }; - violation.unwrap_or_else(|ptr| { - let index_schema = &self.schema.indexes.iter().find(|index_schema| index_schema.index_id == index_id).expect("Index should exist"); - let indexed_column = if let IndexAlgorithm::BTree(BTreeAlgorithm { columns }) = &index_schema.index_algorithm { - Some(columns) - } else { None }; - let indexed_column = indexed_column.and_then(|columns| columns.as_singleton()); - let indexed_column_info = indexed_column.and_then(|column| self.schema.get_column(column.idx())); + violation.map_err(|ptr| { // SAFETY: `ptr` just came out of `self.scan_rows`, so it is present. let row = unsafe { self.get_row_ref_unchecked(blob_store, ptr) }.to_product_value(); - panic!( - "Adding index `{}` {:?} to table `{}` {:?} on column `{}` {:?} should cause no unique constraint violations. - -Found violation at pointer {ptr:?} to row {:?}.", - index_schema.index_name, - index_schema.index_id, - self.schema.table_name, - self.schema.table_id, - indexed_column_info.map(|column| &column.col_name[..]).unwrap_or("unknown column"), - indexed_column, - row, - ); - }); + + if let Some(index_schema) = self.schema.indexes.iter().find(|index_schema| index_schema.index_id == index_id) { + let indexed_column = if let IndexAlgorithm::BTree(BTreeAlgorithm { columns }) = &index_schema.index_algorithm { + Some(columns) + } else { + None + }; + let indexed_column = indexed_column.and_then(|columns| columns.as_singleton()); + let indexed_column_info = indexed_column.and_then(|column| self.schema.get_column(column.idx())); + + format!( + "Adding index `{}` {:?} to table `{}` {:?} on column `{}` {:?} should cause no unique constraint violations.\ + Found violation at pointer {ptr:?} to row {:?}.", + index_schema.index_name, + index_schema.index_id, + self.schema.table_name, + self.schema.table_id, + indexed_column_info.map(|column| &column.col_name[..]).unwrap_or("unknown column"), + indexed_column, + row, + ) + } else { + format!( + "Adding index to table `{}` {:?} on columns `{:?}` with key type {:?} should cause no unique constraint violations.\ + Found violation at pointer {ptr:?} to row {:?}.", + self.schema.table_name, + self.schema.table_id, + index.indexed_columns, + index.key_type, + row, + ) + } + })?; + // SAFETY: Forward caller requirement. unsafe { self.add_index(index_id, index) }; + Ok(()) } /// Adds an index to the table without populating. @@ -2453,7 +2474,7 @@ pub(crate) mod test { let index = table.new_index(&algo, true).unwrap(); // SAFETY: Index was derived from `table`. - unsafe { table.insert_index(&NullBlobStore, index_schema.index_id, index) }; + unsafe { table.insert_index(&NullBlobStore, index_schema.index_id, index) }.unwrap(); // Reserve a page so that we can check the hash. let pi = table.inner.pages.reserve_empty_page(&pool, table.row_size()).unwrap(); @@ -2553,6 +2574,8 @@ pub(crate) mod test { ty: ProductType, vals: Vec, indexed_columns: ColList, + index_kind: IndexKind, + is_unique: bool, ) -> Result<(), TestCaseError> { let pool = PagePool::new_for_test(); let mut blob_store = HashMapBlobStore::default(); @@ -2565,13 +2588,13 @@ pub(crate) mod test { // We haven't added any indexes yet, so there should be 0 rows in indexes. prop_assert_eq!(table.num_rows_in_indexes(), 0); - let index_id = IndexId(0); + let index_id = IndexId::SENTINEL; - let index = TableIndex::new(&ty, indexed_columns.clone(), IndexKind::BTree, false).unwrap(); + let index = TableIndex::new(&ty, indexed_columns.clone(), index_kind, is_unique).unwrap(); // Add an index on column 0. // Safety: // We're using `ty` as the row type for both `table` and the new index. - unsafe { table.insert_index(&blob_store, index_id, index) }; + prop_assume!(unsafe { table.insert_index(&blob_store, index_id, index) }.is_ok()); // We have one index, which should be fully populated, // so in total we should have the same number of rows in indexes as we have rows. @@ -2595,14 +2618,15 @@ pub(crate) mod test { let key_size_in_pvs = vals .iter() .map(|row| crate::table_index::KeySize::key_size_in_bytes(&row.project(&indexed_columns).unwrap()) as u64) - .sum(); + .sum::(); prop_assert_eq!(index.num_key_bytes(), key_size_in_pvs); let index = TableIndex::new(&ty, indexed_columns, IndexKind::BTree, false).unwrap(); // Add a duplicate of the same index, so we can check that all above quantities double. // Safety: // As above, we're using `ty` as the row type for both `table` and the new index. - unsafe { table.insert_index(&blob_store, IndexId(1), index) }; + unsafe { table.insert_index(&blob_store, IndexId(1), index) } + .expect("already inserted this index, should not error"); prop_assert_eq!(table.num_rows_in_indexes(), table.num_rows() * 2); prop_assert_eq!(table.bytes_used_by_index_keys(), key_size_in_pvs * 2); @@ -2722,13 +2746,21 @@ pub(crate) mod test { } #[test] - fn index_size_reporting_matches_slow_implementations_single_column((ty, vals) in generate_typed_row_vec(1..SIZE, 128, 2048)) { - test_index_size_reporting(ty, vals, ColList::from(ColId(0)))?; + fn index_size_reporting_matches_slow_implementations_single_column( + (ty, vals) in generate_typed_row_vec(1..SIZE, 128, 2048), + index_kind: IndexKind, + is_unique: bool + ) { + test_index_size_reporting(ty, vals, [0].into(), index_kind, is_unique)?; } #[test] - fn index_size_reporting_matches_slow_implementations_two_column((ty, vals) in generate_typed_row_vec(2..SIZE, 128, 2048)) { - test_index_size_reporting(ty, vals, ColList::from([ColId(0), ColId(1)]))?; + fn index_size_reporting_matches_slow_implementations_two_column( + (ty, vals) in generate_typed_row_vec(2..SIZE, 128, 2048), + index_kind: IndexKind, + is_unique: bool + ) { + test_index_size_reporting(ty, vals, [0, 1].into(), index_kind, is_unique)? } } diff --git a/crates/table/src/table_index/bytes_key.rs b/crates/table/src/table_index/bytes_key.rs index 3dfc7c9cf0a..2ab7caf99e7 100644 --- a/crates/table/src/table_index/bytes_key.rs +++ b/crates/table/src/table_index/bytes_key.rs @@ -1,11 +1,16 @@ +use super::key_size::KeySize; use super::{DecodeResult, RowRef}; use crate::indexes::RowPointer; +use core::cmp::Ordering; +use core::hash::{Hash, Hasher}; use core::mem; +use core::ops::Deref; +use spacetimedb_lib::buffer::{CountWriter, TeeWriter}; use spacetimedb_memory_usage::MemoryUsage; use spacetimedb_primitives::ColList; use spacetimedb_sats::bsatn::{DecodeError, Deserializer, Serializer}; use spacetimedb_sats::de::{DeserializeSeed, Error as _}; -use spacetimedb_sats::{u256, AlgebraicType, AlgebraicValue, ProductTypeElement, Serialize as _, WithTypespace}; +use spacetimedb_sats::{i256, u256, AlgebraicType, AlgebraicValue, ProductTypeElement, Serialize as _, WithTypespace}; /// A key for an all-primitive multi-column index /// serialized to a byte array. @@ -16,15 +21,62 @@ use spacetimedb_sats::{u256, AlgebraicType, AlgebraicValue, ProductTypeElement, /// which is the same as little-endian encoding of the keys for primitive types. /// /// As we cannot have too many different `N`s, -/// we have a few `N`s, where each is a power of 2. +/// we have a few `N`s, where `N = 2^x - 1`. /// A key is then padded with zeroes to the nearest `N`. -/// For example, a key `(x: u8, y: u16, z: u32)` for a 3-column index -/// would have `N = 1 + 2 + 4 = 7` but would be padded to `N = 8`. -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] -pub(super) struct BytesKey([u8; N]); +/// For example, a key `(y: u16, z: u32)` for a 2-column index +/// would have `N = 2 + 4 = 6` but would be padded to `N = 7`. +/// The reason for the `-1`, i.e., `N = 7` and not `N = 8` +/// is because `length` takes up one byte. +/// +/// The `length` stores the number of actual bytes used by the key. +#[derive(Debug, Eq, Clone, Copy)] +pub(super) struct BytesKey { + length: u8, + bytes: [u8; N], +} impl MemoryUsage for BytesKey {} +impl KeySize for BytesKey { + type MemoStorage = u64; + + fn key_size_in_bytes(&self) -> usize { + self.length as usize + } +} + +impl Deref for BytesKey { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.bytes[0..self.length as usize] + } +} + +impl PartialEq for BytesKey { + fn eq(&self, other: &Self) -> bool { + self.deref() == other.deref() + } +} + +impl PartialOrd for BytesKey { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for BytesKey { + fn cmp(&self, other: &Self) -> Ordering { + self.deref().cmp(other.deref()) + } +} + +impl Hash for BytesKey { + fn hash(&self, state: &mut H) { + self.deref().hash(state); + } +} + /// A difference between btree indices and hash indices /// is that the former btree indices store keys and values separately, /// i.e., as `([K], [RowPointer])` @@ -32,10 +84,23 @@ impl MemoryUsage for BytesKey {} /// i.e., as `([K, RowPointer])`. /// /// For hash indices, it's therefore profitable to ensure -/// that the key and the value together fit into an `N` that is a power of 2. -/// An `N` that is a power of 2 is well aligned around cache line sizes. -pub(super) const fn size_sub_row_pointer(n: usize) -> usize { - n - mem::size_of::() +/// that the key and the value together fit into an `N + 1` that is a power of 2. +/// An `N + 1` that is a power of 2 is well aligned around cache line sizes. +pub(super) const fn size_for_hash_bytes_key(n: usize) -> usize { + size_for_btree_bytes_key(n) - mem::size_of::() +} + +/// A difference between btree indices and hash indices +/// is that the former btree indices store keys and values separately, +/// i.e., as `([K], [RowPointer])` +/// whereas hash indices store them together, +/// i.e., as `([K, RowPointer])`. +/// +/// For btree indices, it's therefore sufficient to enure +/// that the key alone fits into an `N + 1` that is a power of 2. +/// An `N + 1` that is a power of 2 is well aligned around cache line sizes. +pub(super) const fn size_for_btree_bytes_key(n: usize) -> usize { + n - 1 } /// Returns the number of bytes required at most to store a key at `ty` @@ -43,8 +108,9 @@ pub(super) const fn size_sub_row_pointer(n: usize) -> usize { /// /// If keys at `ty` are incompatible with fixed byte keys, /// e.g., because they are of unbounded length, +/// or because `is_ranged_idx` and `ty` contains a float, /// then `None` is returned. -pub(super) fn required_bytes_key_size(ty: &AlgebraicType) -> Option { +pub(super) fn required_bytes_key_size(ty: &AlgebraicType, is_ranged_idx: bool) -> Option { use AlgebraicType::*; match ty { @@ -55,10 +121,18 @@ pub(super) fn required_bytes_key_size(ty: &AlgebraicType) -> Option { // For sum, we report the greatest possible fixed size. // A key may be of variable size, a long as it fits within an upper bound. + // + // It's valid to use `RangeCompatBytesKey`-ified sums in range index, + // i.e., when `is_range_idx`, + // as `Ord for AlgebraicValue` delegates to `Ord for SumValue` + // which compares the `tag` first and the payload (`value`) second, + // The `RangeCompatBytesKey` encoding of sums places the `tag` first and the payload second. + // When comparing two `[u8]` slices with encoded sums, + // this produces an ordering that also compares the `tag` first and the payload second. Sum(ty) => { let mut max_size = 0; for var in &ty.variants { - let variant_size = required_bytes_key_size(&var.algebraic_type)?; + let variant_size = required_bytes_key_size(&var.algebraic_type, is_ranged_idx)?; max_size = max_size.max(variant_size); } // The sum tag is represented as a u8 in BSATN, @@ -70,11 +144,15 @@ pub(super) fn required_bytes_key_size(ty: &AlgebraicType) -> Option { Product(ty) => { let mut total_size = 0; for elem in &ty.elements { - total_size += required_bytes_key_size(&elem.algebraic_type)?; + total_size += required_bytes_key_size(&elem.algebraic_type, is_ranged_idx)?; } Some(total_size) } + // Floats are stored in IEEE 754 format, + // so their byte representation is not order-preserving. + F32 | F64 if is_ranged_idx => None, + // Primitives: Bool | U8 | I8 => Some(mem::size_of::()), U16 | I16 => Some(mem::size_of::()), @@ -85,7 +163,42 @@ pub(super) fn required_bytes_key_size(ty: &AlgebraicType) -> Option { } } +/// Validates BSATN `byte` to conform to `seed`. +/// +/// The BSATN can originate from untrusted sources, e.g., from module code. +/// This also means that e.g., a `BytesKey` can be trusted to hold valid BSATN +/// for the key type, which we can rely on in e.g., `decode_algebraic_value`, +/// which isn't used in a context where it would be appropriate to fail. +/// +/// Another reason to validate is that we wish for `BytesKey` to be strictly +/// an optimization and not allow things that would be rejected by the non-optimized code. +/// +/// After validating, we also don't need to validate that `bytes` +/// will fit into e.g., a `BytesKey` +/// since if all parts that are encoded into it are valid according to a key type, +/// then `bytes` cannot be longer than `N`. +fn validate<'a, 'de, S: 'a + ?Sized>(seed: &'a S, mut bytes: &'de [u8]) -> DecodeResult<()> +where + WithTypespace<'a, S>: DeserializeSeed<'de>, +{ + WithTypespace::empty(seed).validate(Deserializer::new(&mut bytes))?; + + if !bytes.is_empty() { + return Err(DecodeError::custom(format_args!( + "after decoding, there are {} extra bytes", + bytes.len() + ))); + } + + Ok(()) +} + impl BytesKey { + fn new(length: usize, bytes: [u8; N]) -> Self { + let length = length as _; + Self { length, bytes } + } + /// Decodes `self` as an [`AlgebraicValue`] at `key_type`. /// /// An incorrect `key_type`, @@ -94,22 +207,167 @@ impl BytesKey { /// The method could also silently succeed /// if the passed `key_type` incidentally happens to be compatible the stored bytes in `self`. pub(super) fn decode_algebraic_value(&self, key_type: &AlgebraicType) -> AlgebraicValue { - AlgebraicValue::decode(key_type, &mut self.0.as_slice()) + AlgebraicValue::decode(key_type, &mut self.deref()) .expect("A `BytesKey` should by construction always deserialize to the right `key_type`") } - /// Ensure bytes of length `got` fit in `N` or return an error. - fn ensure_key_fits(got: usize) -> DecodeResult<()> { - if got > N { - return Err(DecodeError::custom(format_args!( - "key provided is too long, expected at most {N}, but got {got}" - ))); + /// Decodes `bytes` in BSATN to a [`BytesKey`] + /// by copying over the bytes if they fit into the key. + pub(super) fn from_bsatn(ty: &AlgebraicType, bytes: &[u8]) -> DecodeResult { + // Validate the BSATN. + validate(ty, bytes)?; + // Copy the bytes over. + let got = bytes.len(); + let mut arr = [0; N]; + arr[..got].copy_from_slice(bytes); + Ok(Self::new(got, arr)) + } + + fn via_serializer(work: impl FnOnce(Serializer<'_, TeeWriter<&mut [u8], CountWriter>>)) -> Self { + let mut bytes = [0; N]; + let (_, length) = CountWriter::run(bytes.as_mut_slice(), |writer| { + let ser = Serializer::new(writer); + work(ser) + }); + Self::new(length, bytes) + } + + /// Serializes the columns `cols` in `row_ref` to a [`BytesKey`]. + /// + /// It's assumed that `row_ref` projected to `cols` + /// will fit into `N` bytes when serialized into BSATN. + /// The method panics otherwise. + /// + /// SAFETY: Any `col` in `cols` is in-bounds of `row_ref`'s layout. + pub(super) unsafe fn from_row_ref(cols: &ColList, row_ref: RowRef<'_>) -> Self { + Self::via_serializer(|ser| { + unsafe { row_ref.serialize_columns_unchecked(cols, ser) } + .expect("should've serialized a `row_ref` to BSATN successfully"); + }) + } + + /// Serializes `av` to a [`BytesKey`]. + /// + /// It's assumed that `av` + /// will fit into `N` bytes when serialized into BSATN. + /// The method panics otherwise. + pub(super) fn from_algebraic_value(av: &AlgebraicValue) -> Self { + Self::via_serializer(|ser| { + av.serialize_into_bsatn(ser) + .expect("should've serialized an `AlgebraicValue` to BSATN successfully") + }) + } +} + +/// A key for an all-primitive multi-column index +/// serialized to a byte array. +/// +/// These keys are derived from [`BytesKey`] +/// but are post-processed to work with ranges, +/// unlike the former type, +/// which only work with point indices (e.g., hash indices). +/// +/// The post-processing converts how some types are stored in the encoding: +/// - unsigned integer types `uN`, where `N > 8` from little-endian to big-endian. +/// - signed integers are shifted such that `iN::MIN` is stored as `0` +/// and `iN:MAX` is stored as `uN::MAX`. +/// +/// The `length` stores the number of actual bytes used by the key. +#[derive(Debug, Eq, Clone, Copy)] +pub(super) struct RangeCompatBytesKey { + length: u8, + bytes: [u8; N], +} + +impl MemoryUsage for RangeCompatBytesKey {} + +impl KeySize for RangeCompatBytesKey { + type MemoStorage = u64; + + fn key_size_in_bytes(&self) -> usize { + self.length as usize + } +} + +impl Deref for RangeCompatBytesKey { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.bytes[0..self.length as usize] + } +} + +impl PartialEq for RangeCompatBytesKey { + fn eq(&self, other: &Self) -> bool { + self.deref() == other.deref() + } +} + +impl PartialOrd for RangeCompatBytesKey { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for RangeCompatBytesKey { + fn cmp(&self, other: &Self) -> Ordering { + self.deref().cmp(other.deref()) + } +} + +impl Hash for RangeCompatBytesKey { + fn hash(&self, state: &mut H) { + self.deref().hash(state); + } +} + +/// Splits `slice` into the first `N` bytes converting the former via `map_bytes` +/// and returning the rest. +fn split_map_write_back(slice: &mut [u8], map_bytes: impl FnOnce([u8; N]) -> [u8; N]) -> &mut [u8] { + let (bytes, rest) = slice.split_first_chunk_mut().unwrap(); + *bytes = map_bytes(*bytes); + rest +} + +impl RangeCompatBytesKey { + fn new(length: usize, bytes: [u8; N]) -> Self { + let length = length as _; + Self { length, bytes } + } + + /// Decodes `self` as an [`AlgebraicValue`] at `key_type`. + /// + /// An incorrect `key_type`, + /// i.e., one other than what was used when the index was created, + /// may lead to a panic, but this is not guaranteed. + /// The method could also silently succeed + /// if the passed `key_type` incidentally happens to be compatible the stored bytes in `self`. + pub(super) fn decode_algebraic_value(&self, key_type: &AlgebraicType) -> AlgebraicValue { + Self::to_bytes_key(*self, key_type).decode_algebraic_value(key_type) + } + + /// Decodes `prefix` in BSATN to a [`RangeCompatBytesKey`] + /// by copying over `prefix` and massaging if they fit into the key. + pub(super) fn from_bsatn_prefix(prefix: &[u8], prefix_types: &[ProductTypeElement]) -> DecodeResult { + // Validate the BSATN. + validate(prefix_types, prefix)?; + + // Copy the `prefix` over. + let mut bytes = [0; N]; + let got = prefix.len(); + bytes[..got].copy_from_slice(prefix); + + // Massage the `bytes`. + let mut slice = bytes.as_mut_slice(); + for ty in prefix_types { + slice = Self::process_from_bytes_key(slice, &ty.algebraic_type); } - Ok(()) + + Ok(Self::new(got, bytes)) } - /// Decodes `prefix` and `endpoint` in BSATN to a [`BytesKey`] - /// by copying over both if they fit into the key. + /// Decodes `prefix` and `endpoint` in BSATN to a [`RangeCompatBytesKey`] + /// by copying over both and massaging if they fit into the key. pub(super) fn from_bsatn_prefix_and_endpoint( prefix: &[u8], prefix_types: &[ProductTypeElement], @@ -117,39 +375,34 @@ impl BytesKey { range_type: &AlgebraicType, ) -> DecodeResult { // Validate the BSATN. - // - // The BSATN can originate from untrusted sources, e.g., from module code. - // This also means that a `BytesKey` can be trusted to hold valid BSATN - // for the key type, which we can rely on in e.g., `decode_algebraic_value`, - // which isn't used in a context where it would be appropriate to fail. - // - // Another reason to validate is that we wish for `BytesKey` to be strictly - // an optimization and not allow things that would be rejected by the non-optimized code. - WithTypespace::empty(prefix_types).validate(Deserializer::new(&mut { prefix }))?; - WithTypespace::empty(range_type).validate(Deserializer::new(&mut { endpoint }))?; - // Check that the `prefix` and the `endpoint` together fit into the key. + validate(prefix_types, prefix)?; + validate(range_type, endpoint)?; + + // Sum up the lengths. let prefix_len = prefix.len(); let endpoint_len = endpoint.len(); - Self::ensure_key_fits(prefix_len + endpoint_len)?; + let total_len = prefix_len + endpoint_len; + // Copy the `prefix` and the `endpoint` over. - let mut arr = [0; N]; - arr[..prefix_len].copy_from_slice(prefix); - arr[prefix_len..prefix_len + endpoint_len].copy_from_slice(endpoint); - Ok(Self(arr)) + let mut bytes = [0; N]; + bytes[..prefix_len].copy_from_slice(prefix); + bytes[prefix_len..total_len].copy_from_slice(endpoint); + + // Massage the bytes. + let mut slice = bytes.as_mut_slice(); + for ty in prefix_types { + slice = Self::process_from_bytes_key(slice, &ty.algebraic_type); + } + Self::process_from_bytes_key(slice, range_type); + + Ok(Self::new(total_len, bytes)) } - /// Decodes `bytes` in BSATN to a [`BytesKey`] + /// Decodes `bytes` in BSATN to a [`RangeCompatBytesKey`] /// by copying over the bytes if they fit into the key. pub(super) fn from_bsatn(ty: &AlgebraicType, bytes: &[u8]) -> DecodeResult { - // Validate the BSATN. See `Self::from_bsatn_prefix_and_endpoint` for more details. - WithTypespace::empty(ty).validate(Deserializer::new(&mut { bytes }))?; - // Check that the `bytes` fit into the key. - let got = bytes.len(); - Self::ensure_key_fits(got)?; - // Copy the bytes over. - let mut arr = [0; N]; - arr[..got].copy_from_slice(bytes); - Ok(Self(arr)) + let key = BytesKey::from_bsatn(ty, bytes)?; + Ok(Self::from_bytes_key(key, ty)) } /// Serializes the columns `cols` in `row_ref` to a [`BytesKey`]. @@ -159,13 +412,10 @@ impl BytesKey { /// The method panics otherwise. /// /// SAFETY: Any `col` in `cols` is in-bounds of `row_ref`'s layout. - pub(super) unsafe fn from_row_ref(cols: &ColList, row_ref: RowRef<'_>) -> Self { - let mut arr = [0; N]; - let mut sink = arr.as_mut_slice(); - let ser = Serializer::new(&mut sink); - unsafe { row_ref.serialize_columns_unchecked(cols, ser) } - .expect("should've serialized a `row_ref` to BSATN successfully"); - Self(arr) + pub(super) unsafe fn from_row_ref(cols: &ColList, row_ref: RowRef<'_>, ty: &AlgebraicType) -> Self { + // SAFETY: same as caller requirements. + let key = unsafe { BytesKey::from_row_ref(cols, row_ref) }; + Self::from_bytes_key(key, ty) } /// Serializes `av` to a [`BytesKey`]. @@ -173,26 +423,121 @@ impl BytesKey { /// It's assumed that `av` /// will fit into `N` bytes when serialized into BSATN. /// The method panics otherwise. - pub(super) fn from_algebraic_value(av: &AlgebraicValue) -> Self { - let mut arr = [0; N]; - let mut sink = arr.as_mut_slice(); - let ser = Serializer::new(&mut sink); - av.serialize_into_bsatn(ser) - .expect("should've serialized an `AlgebraicValue` to BSATN successfully"); - Self(arr) + pub(super) fn from_algebraic_value(av: &AlgebraicValue, ty: &AlgebraicType) -> Self { + let key = BytesKey::from_algebraic_value(av); + Self::from_bytes_key(key, ty) + } + + fn from_bytes_key(key: BytesKey, ty: &AlgebraicType) -> Self { + let BytesKey { length, mut bytes } = key; + Self::process_from_bytes_key(bytes.as_mut_slice(), ty); + Self { length, bytes } + } + + fn process_from_bytes_key<'a>(mut slice: &'a mut [u8], ty: &AlgebraicType) -> &'a mut [u8] { + use AlgebraicType::*; + match ty { + // For sums, read the tag and process the active variant. + Sum(ty) => { + let (&mut tag, rest) = slice.split_first_mut().unwrap(); + let ty = &ty.variants[tag as usize].algebraic_type; + Self::process_from_bytes_key(rest, ty) + } + // For products, just process each field in sequence. + Product(ty) => { + for ty in &ty.elements { + slice = Self::process_from_bytes_key(slice, &ty.algebraic_type); + } + slice + } + // No need to do anything as these are only a single byte long. + Bool | U8 => &mut slice[1..], + // For unsigned integers, read them as LE and write back as BE. + U16 => split_map_write_back(slice, |b| u16::from_le_bytes(b).to_be_bytes()), + U32 => split_map_write_back(slice, |b| u32::from_le_bytes(b).to_be_bytes()), + U64 => split_map_write_back(slice, |b| u64::from_le_bytes(b).to_be_bytes()), + U128 => split_map_write_back(slice, |b| u128::from_le_bytes(b).to_be_bytes()), + U256 => split_map_write_back(slice, |b| u256::from_le_bytes(b).to_be_bytes()), + // For signed integers, read them as LE, make them unsigned, and write back as BE. + I8 => split_map_write_back(slice, |b| i8::from_le_bytes(b).wrapping_sub(i8::MIN).to_be_bytes()), + I16 => split_map_write_back(slice, |b| i16::from_le_bytes(b).wrapping_sub(i16::MIN).to_be_bytes()), + I32 => split_map_write_back(slice, |b| i32::from_le_bytes(b).wrapping_sub(i32::MIN).to_be_bytes()), + I64 => split_map_write_back(slice, |b| i64::from_le_bytes(b).wrapping_sub(i64::MIN).to_be_bytes()), + I128 => split_map_write_back(slice, |b| i128::from_le_bytes(b).wrapping_sub(i128::MIN).to_be_bytes()), + I256 => split_map_write_back(slice, |b| i256::from_le_bytes(b).wrapping_sub(i256::MIN).to_be_bytes()), + // Refs don't exist here and + // arrays and strings are of unbounded length. + // For floats, we haven't considred them yet. + Ref(_) | Array(_) | String | F32 | F64 => unreachable!(), + } + } + + fn to_bytes_key(key: Self, ty: &AlgebraicType) -> BytesKey { + fn process<'a>(mut slice: &'a mut [u8], ty: &AlgebraicType) -> &'a mut [u8] { + use AlgebraicType::*; + match ty { + // For sums, read the tag and process the active variant. + Sum(ty) => { + let (&mut tag, rest) = slice.split_first_mut().unwrap(); + let ty = &ty.variants[tag as usize].algebraic_type; + process(rest, ty) + } + // For products, just process each field in sequence. + Product(ty) => { + for ty in &ty.elements { + slice = process(slice, &ty.algebraic_type); + } + slice + } + // No need to do anything as these are only a single byte long. + Bool | U8 => &mut slice[1..], + // For unsigned integers, read them as BE and write back as LE. + U16 => split_map_write_back(slice, |b| u16::from_be_bytes(b).to_le_bytes()), + U32 => split_map_write_back(slice, |b| u32::from_be_bytes(b).to_le_bytes()), + U64 => split_map_write_back(slice, |b| u64::from_be_bytes(b).to_le_bytes()), + U128 => split_map_write_back(slice, |b| u128::from_be_bytes(b).to_le_bytes()), + U256 => split_map_write_back(slice, |b| u256::from_be_bytes(b).to_le_bytes()), + // For signed integers, read them as LE, make them unsigned, and write back as BE. + I8 => split_map_write_back(slice, |b| i8::from_be_bytes(b).wrapping_add(i8::MIN).to_le_bytes()), + I16 => split_map_write_back(slice, |b| i16::from_be_bytes(b).wrapping_add(i16::MIN).to_le_bytes()), + I32 => split_map_write_back(slice, |b| i32::from_be_bytes(b).wrapping_add(i32::MIN).to_le_bytes()), + I64 => split_map_write_back(slice, |b| i64::from_be_bytes(b).wrapping_add(i64::MIN).to_le_bytes()), + I128 => split_map_write_back(slice, |b| i128::from_be_bytes(b).wrapping_add(i128::MIN).to_le_bytes()), + I256 => split_map_write_back(slice, |b| i256::from_be_bytes(b).wrapping_add(i256::MIN).to_le_bytes()), + // Refs don't exist here and + // arrays and strings are of unbounded length. + // For floats, we haven't considred them yet. + Ref(_) | Array(_) | String | F32 | F64 => unreachable!(), + } + } + + let Self { length, mut bytes } = key; + process(bytes.as_mut_slice(), ty); + BytesKey { length, bytes } + } + + /// Extend the length to `N` by filling with `u8::MAX`. + pub(super) fn add_max_suffix(mut self) -> Self { + let len = self.len(); + self.bytes[len..].fill(u8::MAX); + self.length = N as u8; + self } } #[cfg(test)] mod test { use super::*; + use proptest::array::uniform; use proptest::prelude::*; use spacetimedb_sats::bsatn::to_len; - use spacetimedb_sats::proptest::generate_typed_row; + use spacetimedb_sats::proptest::{gen_with, generate_product_value, generate_row_type, generate_typed_row, SIZE}; - const N: usize = 4096; + const N: usize = u8::MAX as usize; proptest! { + #![proptest_config(ProptestConfig { max_global_rejects: 65536, ..<_>::default() })] + #[test] fn test_bytes_key_round_trip((ty, av) in generate_typed_row()) { let len = to_len(&av).unwrap(); @@ -202,20 +547,14 @@ mod test { let av = AlgebraicValue::Product(av); let key = BytesKey::::from_algebraic_value(&av); let decoded_av = key.decode_algebraic_value(&ty); - assert_eq!(av, decoded_av); + prop_assert_eq!(av, decoded_av); } - /* - // This test turned out not to hold for integers larger than u8, - // as BSATN stores them little-endian, - // but `Ord for AlgebraicValue` compares them as big-endian. - // It's included here for posterity and in case we'd like to - // massage the BSATN before storing it in the `BytesKey` - // to make it order-preserving. - - use proptest::array::uniform; - use spacetimedb_sats::proptest::{gen_with, generate_product_value, generate_row_type, SIZE}; - + /// This test does not hold for `BytesKey` + /// as BSATN stores them little-endian, + /// but `Ord for AlgebraicValue` compares them as big-endian. + /// It does however hold for `RangeCompatBytesKey` which + /// massages the BSATN to make it order-preserving. #[test] fn order_in_bsatn_is_preserved((ty, [r1, r2]) in gen_with(generate_row_type(0..=SIZE), |ty| uniform(generate_product_value(ty)))) { let ty: AlgebraicType = ty.into(); @@ -223,17 +562,17 @@ mod test { let r2: AlgebraicValue = r2.into(); let Some(required) = required_bytes_key_size(&ty, true) else { - //dbg!(&ty); return Err(TestCaseError::reject("type is incompatible with fixed byte keys in range indices")); }; prop_assume!(required <= N); let k1 = BytesKey::::from_algebraic_value(&r1); + let kr1 = RangeCompatBytesKey::from_bytes_key(k1, &ty); let k2 = BytesKey::::from_algebraic_value(&r2); - let ord_k = k1.cmp(&k2); + let kr2 = RangeCompatBytesKey::from_bytes_key(k2, &ty); + let ord_kr = kr1.cmp(&kr2); let ord_r = r1.cmp(&r2); - prop_assert_eq!(ord_k, ord_r); + prop_assert_eq!(ord_kr, ord_r); } - */ } } diff --git a/crates/table/src/table_index/hash_index.rs b/crates/table/src/table_index/hash_index.rs index 804eea1e644..98ea3b599aa 100644 --- a/crates/table/src/table_index/hash_index.rs +++ b/crates/table/src/table_index/hash_index.rs @@ -98,6 +98,10 @@ impl Index for HashIndex { self.map.len() } + fn num_key_bytes(&self) -> u64 { + self.num_key_bytes + } + fn num_rows(&self) -> usize { self.num_rows } diff --git a/crates/table/src/table_index/key_size.rs b/crates/table/src/table_index/key_size.rs index be087adff70..a51e42eea3d 100644 --- a/crates/table/src/table_index/key_size.rs +++ b/crates/table/src/table_index/key_size.rs @@ -1,5 +1,3 @@ -use crate::table_index::BytesKey; - use super::Index; use core::mem; use spacetimedb_memory_usage::MemoryUsage; @@ -218,7 +216,3 @@ impl KeySize for ArrayValue { } } } - -impl KeySize for BytesKey { - type MemoStorage = (); -} diff --git a/crates/table/src/table_index/mod.rs b/crates/table/src/table_index/mod.rs index aa9bf6f94ac..839479b6010 100644 --- a/crates/table/src/table_index/mod.rs +++ b/crates/table/src/table_index/mod.rs @@ -26,21 +26,22 @@ //! we support direct unique indices, where key are indices into `Vec`s. use self::btree_index::{BTreeIndex, BTreeIndexRangeIter}; -use self::bytes_key::{size_sub_row_pointer, BytesKey}; +use self::bytes_key::{ + required_bytes_key_size, size_for_btree_bytes_key, size_for_hash_bytes_key, BytesKey, RangeCompatBytesKey, +}; use self::hash_index::HashIndex; +use self::index::Despecialize; use self::same_key_entry::SameKeyEntryIter; use self::unique_btree_index::{UniqueBTreeIndex, UniqueBTreeIndexRangeIter, UniquePointIter}; use self::unique_direct_fixed_cap_index::{UniqueDirectFixedCapIndex, UniqueDirectFixedCapIndexRangeIter}; -use self::unique_direct_index::{UniqueDirectIndex, UniqueDirectIndexRangeIter}; +use self::unique_direct_index::{ToFromUsize, UniqueDirectIndex, UniqueDirectIndexRangeIter}; use self::unique_hash_index::UniqueHashIndex; use super::indexes::RowPointer; use super::table::RowRef; -use crate::table_index::bytes_key::required_bytes_key_size; -use crate::table_index::index::Despecialize; -use crate::table_index::unique_direct_index::ToFromUsize; use crate::{read_column::ReadColumn, static_assert_size}; use core::ops::{Bound, RangeBounds}; use core::{fmt, iter}; +use enum_as_inner::EnumAsInner; use spacetimedb_primitives::{ColId, ColList}; use spacetimedb_sats::bsatn::{decode, from_reader}; use spacetimedb_sats::buffer::{DecodeError, DecodeResult}; @@ -72,6 +73,7 @@ macro_rules! table_iter { $($(#[$vattr:meta])* $var:ident($varty:ty),)* }) => { $(#[$wattr])* + #[derive(Clone)] pub struct $wrapper<'a> { iter: $base<'a>, } @@ -84,8 +86,15 @@ macro_rules! table_iter { } } + impl fmt::Debug for $wrapper<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let iter = self.clone(); + f.debug_list().entries(iter).finish() + } + } + $(#[$battr])* - #[derive(derive_more::From)] + #[derive(Clone, derive_more::From)] enum $base<'a> { $($(#[$vattr])* $var($varty)),* } @@ -139,6 +148,11 @@ table_iter! { BTreeF64( as Index>::Iter<'a>), BTreeString(> as Index>::Iter<'a>), BTreeAV( as Index>::Iter<'a>), + BTreeBytesKey8(> as Index>::Iter<'a>), + BTreeBytesKey16(> as Index>::Iter<'a>), + BTreeBytesKey32(> as Index>::Iter<'a>), + BTreeBytesKey64(> as Index>::Iter<'a>), + BTreeBytesKey128(> as Index>::Iter<'a>), // All the unique btree index iterators. UniqueBTreeBool( as Index>::Iter<'a>), @@ -159,6 +173,11 @@ table_iter! { UniqueBTreeF64( as Index>::Iter<'a>), UniqueBTreeString(> as Index>::Iter<'a>), UniqueBTreeAV( as Index>::Iter<'a>), + UniqueBTreeBytesKey8(> as Index>::Iter<'a>), + UniqueBTreeBytesKey16(> as Index>::Iter<'a>), + UniqueBTreeBytesKey32(> as Index>::Iter<'a>), + UniqueBTreeBytesKey64(> as Index>::Iter<'a>), + UniqueBTreeBytesKey128(> as Index>::Iter<'a>), // All the non-unique hash index iterators. HashBool( as Index>::Iter<'a>), @@ -216,12 +235,10 @@ table_iter! { table_iter! { /// An iterator over rows matching a range of [`AlgebraicValue`]s on the [`TableIndex`]. - #[derive(Clone)] pub struct TableIndexRangeIter => /// A ranged iterator over a [`TypedIndex`], with a specialized key type. /// /// See module docs for info about specialization. - #[derive(Clone)] enum TypedIndexRangeIter { /// The range itself provided was empty. RangeEmpty(iter::Empty), @@ -245,6 +262,11 @@ table_iter! { BTreeF64(BTreeIndexRangeIter<'a, F64>), BTreeString(BTreeIndexRangeIter<'a, Box>), BTreeAV(BTreeIndexRangeIter<'a, AlgebraicValue>), + BTreeBytesKey8(BTreeIndexRangeIter<'a, RangeCompatBytesKey>), + BTreeBytesKey16(BTreeIndexRangeIter<'a, RangeCompatBytesKey>), + BTreeBytesKey32(BTreeIndexRangeIter<'a, RangeCompatBytesKey>), + BTreeBytesKey64(BTreeIndexRangeIter<'a, RangeCompatBytesKey>), + BTreeBytesKey128(BTreeIndexRangeIter<'a, RangeCompatBytesKey>), // All the unique btree index iterators. UniqueBTreeBool(UniqueBTreeIndexRangeIter<'a, bool>), @@ -265,19 +287,17 @@ table_iter! { UniqueBTreeF64(UniqueBTreeIndexRangeIter<'a, F64>), UniqueBTreeString(UniqueBTreeIndexRangeIter<'a, Box>), UniqueBTreeAV(UniqueBTreeIndexRangeIter<'a, AlgebraicValue>), + UniqueBTreeBytesKey8(UniqueBTreeIndexRangeIter<'a, RangeCompatBytesKey>), + UniqueBTreeBytesKey16(UniqueBTreeIndexRangeIter<'a, RangeCompatBytesKey>), + UniqueBTreeBytesKey32(UniqueBTreeIndexRangeIter<'a, RangeCompatBytesKey>), + UniqueBTreeBytesKey64(UniqueBTreeIndexRangeIter<'a, RangeCompatBytesKey>), + UniqueBTreeBytesKey128(UniqueBTreeIndexRangeIter<'a, RangeCompatBytesKey>), UniqueDirect(UniqueDirectIndexRangeIter<'a>), UniqueDirectU8(UniqueDirectFixedCapIndexRangeIter<'a>), } } -impl fmt::Debug for TableIndexRangeIter<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let iter = self.clone(); - f.debug_list().entries(iter).finish() - } -} - #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, derive_more::From)] enum BowStr<'a> { Borrowed(&'a str), @@ -328,19 +348,19 @@ impl<'a> CowAV<'a> { } } -// The various sizes passed to `BytesKey`. +// The various sizes passed to `RangeCompatBytesKey` and `BytesKey`. // A `B` suffix is for a size used for a btree index // and an `H` suffix is for a size used for a hash index. -const BYTES_KEY_SIZE_8_B: usize = 8; -const BYTES_KEY_SIZE_8_H: usize = size_sub_row_pointer(16); +const BYTES_KEY_SIZE_8_B: usize = size_for_btree_bytes_key(8); +const BYTES_KEY_SIZE_8_H: usize = size_for_hash_bytes_key(16); const _: () = assert!(BYTES_KEY_SIZE_8_B == BYTES_KEY_SIZE_8_H); -//const BYTES_KEY_SIZE_16_B: usize = 16; -const BYTES_KEY_SIZE_24_H: usize = size_sub_row_pointer(32); -//const BYTES_KEY_SIZE_32_B: usize = 32; -const BYTES_KEY_SIZE_56_H: usize = size_sub_row_pointer(64); -//const BYTES_KEY_SIZE_64_B: usize = 64; -const BYTES_KEY_SIZE_120_H: usize = size_sub_row_pointer(128); -//const BYTES_KEY_SIZE_128_B: usize = 128; +const BYTES_KEY_SIZE_16_B: usize = size_for_btree_bytes_key(16); +const BYTES_KEY_SIZE_24_H: usize = size_for_hash_bytes_key(32); +const BYTES_KEY_SIZE_32_B: usize = size_for_btree_bytes_key(32); +const BYTES_KEY_SIZE_56_H: usize = size_for_hash_bytes_key(64); +const BYTES_KEY_SIZE_64_B: usize = size_for_btree_bytes_key(64); +const BYTES_KEY_SIZE_120_H: usize = size_for_hash_bytes_key(128); +const BYTES_KEY_SIZE_128_B: usize = size_for_btree_bytes_key(128); /// A key into a [`TypedIndex`]. #[derive(enum_as_inner::EnumAsInner, PartialEq, Eq, PartialOrd, Ord, Debug)] @@ -364,13 +384,18 @@ enum TypedIndexKey<'a> { String(BowStr<'a>), AV(CowAV<'a>), - BytesKey8(BytesKey), + BytesKey8B(RangeCompatBytesKey), + BytesKey8H(BytesKey), + BytesKey16(RangeCompatBytesKey), BytesKey24(BytesKey), + BytesKey32(RangeCompatBytesKey), BytesKey56(BytesKey), + BytesKey64(RangeCompatBytesKey), BytesKey120(BytesKey), + BytesKey128(RangeCompatBytesKey), } -static_assert_size!(TypedIndexKey<'_>, 128); +static_assert_size!(TypedIndexKey<'_>, 144); /// Transposes a `Bound>` to a `Result, E>`. fn transpose_bound(bound: Bound>) -> Result, E> { @@ -386,7 +411,7 @@ impl<'a> TypedIndexKey<'a> { /// Derives a [`TypedIndexKey`] from an [`AlgebraicValue`] /// driven by the kind of [`TypedIndex`] provided in `index`. #[inline] - fn from_algebraic_value(index: &TypedIndex, value: &'a AlgebraicValue) -> Self { + fn from_algebraic_value(key_type: &AlgebraicType, index: &TypedIndex, value: &'a AlgebraicValue) -> Self { use AlgebraicValue::*; use TypedIndex::*; match (value, index) { @@ -426,7 +451,23 @@ impl<'a> TypedIndexKey<'a> { (av, BTreeAV(_) | HashAV(_) | UniqueBTreeAV(_) | UniqueHashAV(_)) => Self::AV(CowAV::Borrowed(av)), - (av, HashBytesKey8(_) | UniqueHashBytesKey8(_)) => Self::BytesKey8(BytesKey::from_algebraic_value(av)), + (av, BTreeBytesKey8(_) | UniqueBTreeBytesKey8(_)) => { + Self::BytesKey8B(RangeCompatBytesKey::from_algebraic_value(av, key_type)) + } + (av, BTreeBytesKey16(_) | UniqueBTreeBytesKey16(_)) => { + Self::BytesKey16(RangeCompatBytesKey::from_algebraic_value(av, key_type)) + } + (av, BTreeBytesKey32(_) | UniqueBTreeBytesKey32(_)) => { + Self::BytesKey32(RangeCompatBytesKey::from_algebraic_value(av, key_type)) + } + (av, BTreeBytesKey64(_) | UniqueBTreeBytesKey64(_)) => { + Self::BytesKey64(RangeCompatBytesKey::from_algebraic_value(av, key_type)) + } + (av, BTreeBytesKey128(_) | UniqueBTreeBytesKey128(_)) => { + Self::BytesKey128(RangeCompatBytesKey::from_algebraic_value(av, key_type)) + } + + (av, HashBytesKey8(_) | UniqueHashBytesKey8(_)) => Self::BytesKey8H(BytesKey::from_algebraic_value(av)), (av, HashBytesKey24(_) | UniqueHashBytesKey24(_)) => Self::BytesKey24(BytesKey::from_algebraic_value(av)), (av, HashBytesKey56(_) | UniqueHashBytesKey56(_)) => Self::BytesKey56(BytesKey::from_algebraic_value(av)), (av, HashBytesKey120(_) | UniqueHashBytesKey120(_)) => { @@ -483,7 +524,22 @@ impl<'a> TypedIndexKey<'a> { AlgebraicValue::decode(ty, reader).map(CowAV::Owned).map(Self::AV) } - HashBytesKey8(_) | UniqueHashBytesKey8(_) => BytesKey::from_bsatn(ty, bytes).map(Self::BytesKey8), + BTreeBytesKey8(_) | UniqueBTreeBytesKey8(_) => { + RangeCompatBytesKey::from_bsatn(ty, bytes).map(Self::BytesKey8B) + } + BTreeBytesKey16(_) | UniqueBTreeBytesKey16(_) => { + RangeCompatBytesKey::from_bsatn(ty, bytes).map(Self::BytesKey16) + } + BTreeBytesKey32(_) | UniqueBTreeBytesKey32(_) => { + RangeCompatBytesKey::from_bsatn(ty, bytes).map(Self::BytesKey32) + } + BTreeBytesKey64(_) | UniqueBTreeBytesKey64(_) => { + RangeCompatBytesKey::from_bsatn(ty, bytes).map(Self::BytesKey64) + } + BTreeBytesKey128(_) | UniqueBTreeBytesKey128(_) => { + RangeCompatBytesKey::from_bsatn(ty, bytes).map(Self::BytesKey128) + } + HashBytesKey8(_) | UniqueHashBytesKey8(_) => BytesKey::from_bsatn(ty, bytes).map(Self::BytesKey8H), HashBytesKey24(_) | UniqueHashBytesKey24(_) => BytesKey::from_bsatn(ty, bytes).map(Self::BytesKey24), HashBytesKey56(_) | UniqueHashBytesKey56(_) => BytesKey::from_bsatn(ty, bytes).map(Self::BytesKey56), HashBytesKey120(_) | UniqueHashBytesKey120(_) => BytesKey::from_bsatn(ty, bytes).map(Self::BytesKey120), @@ -501,7 +557,7 @@ impl<'a> TypedIndexKey<'a> { /// 1. Caller promises that `cols` matches what was given at construction (`TableIndex::new`). /// 2. Caller promises that the projection of `row_ref`'s type's equals the index's key type. #[inline] - unsafe fn from_row_ref(index: &TypedIndex, cols: &ColList, row_ref: RowRef<'_>) -> Self { + unsafe fn from_row_ref(key_type: &AlgebraicType, index: &TypedIndex, cols: &ColList, row_ref: RowRef<'_>) -> Self { fn proj(cols: &ColList, row_ref: RowRef<'_>) -> T { // Extract the column. let col_pos = cols.as_singleton(); @@ -582,8 +638,23 @@ impl<'a> TypedIndexKey<'a> { // This entails that each `ColId` in `self.indexed_columns`, // and by 1. also `cols`, // must be in-bounds of `row_ref`'s layout. + BTreeBytesKey8(_) | UniqueBTreeBytesKey8(_) => { + Self::BytesKey8B(unsafe { RangeCompatBytesKey::from_row_ref(cols, row_ref, key_type) }) + } + BTreeBytesKey16(_) | UniqueBTreeBytesKey16(_) => { + Self::BytesKey16(unsafe { RangeCompatBytesKey::from_row_ref(cols, row_ref, key_type) }) + } + BTreeBytesKey32(_) | UniqueBTreeBytesKey32(_) => { + Self::BytesKey32(unsafe { RangeCompatBytesKey::from_row_ref(cols, row_ref, key_type) }) + } + BTreeBytesKey64(_) | UniqueBTreeBytesKey64(_) => { + Self::BytesKey64(unsafe { RangeCompatBytesKey::from_row_ref(cols, row_ref, key_type) }) + } + BTreeBytesKey128(_) | UniqueBTreeBytesKey128(_) => { + Self::BytesKey128(unsafe { RangeCompatBytesKey::from_row_ref(cols, row_ref, key_type) }) + } HashBytesKey8(_) | UniqueHashBytesKey8(_) => { - Self::BytesKey8(unsafe { BytesKey::from_row_ref(cols, row_ref) }) + Self::BytesKey8H(unsafe { BytesKey::from_row_ref(cols, row_ref) }) } HashBytesKey24(_) | UniqueHashBytesKey24(_) => { Self::BytesKey24(unsafe { BytesKey::from_row_ref(cols, row_ref) }) @@ -619,10 +690,15 @@ impl<'a> TypedIndexKey<'a> { Self::F64(x) => TypedIndexKey::F64(*x), Self::String(x) => TypedIndexKey::String(x.borrow().into()), Self::AV(x) => TypedIndexKey::AV(x.borrow().into()), - Self::BytesKey8(x) => TypedIndexKey::BytesKey8(*x), + Self::BytesKey8B(x) => TypedIndexKey::BytesKey8B(*x), + Self::BytesKey8H(x) => TypedIndexKey::BytesKey8H(*x), + Self::BytesKey16(x) => TypedIndexKey::BytesKey16(*x), Self::BytesKey24(x) => TypedIndexKey::BytesKey24(*x), + Self::BytesKey32(x) => TypedIndexKey::BytesKey32(*x), Self::BytesKey56(x) => TypedIndexKey::BytesKey56(*x), + Self::BytesKey64(x) => TypedIndexKey::BytesKey64(*x), Self::BytesKey120(x) => TypedIndexKey::BytesKey120(*x), + Self::BytesKey128(x) => TypedIndexKey::BytesKey128(*x), } } @@ -647,10 +723,15 @@ impl<'a> TypedIndexKey<'a> { Self::F64(x) => x.into(), Self::String(x) => x.into_owned().into(), Self::AV(x) => x.into_owned(), - Self::BytesKey8(x) => x.decode_algebraic_value(key_type), + Self::BytesKey8B(x) => x.decode_algebraic_value(key_type), + Self::BytesKey8H(x) => x.decode_algebraic_value(key_type), + Self::BytesKey16(x) => x.decode_algebraic_value(key_type), Self::BytesKey24(x) => x.decode_algebraic_value(key_type), + Self::BytesKey32(x) => x.decode_algebraic_value(key_type), Self::BytesKey56(x) => x.decode_algebraic_value(key_type), + Self::BytesKey64(x) => x.decode_algebraic_value(key_type), Self::BytesKey120(x) => x.decode_algebraic_value(key_type), + Self::BytesKey128(x) => x.decode_algebraic_value(key_type), } } } @@ -682,6 +763,11 @@ enum TypedIndex { // TODO(perf, centril): consider `UmbraString` or some "German string". BTreeString(BTreeIndex>), BTreeAV(BTreeIndex), + BTreeBytesKey8(BTreeIndex>), + BTreeBytesKey16(BTreeIndex>), + BTreeBytesKey32(BTreeIndex>), + BTreeBytesKey64(BTreeIndex>), + BTreeBytesKey128(BTreeIndex>), // All the non-unique hash index types. HashBool(HashIndex), @@ -728,6 +814,11 @@ enum TypedIndex { // TODO(perf, centril): consider `UmbraString` or some "German string". UniqueBTreeString(UniqueBTreeIndex>), UniqueBTreeAV(UniqueBTreeIndex), + UniqueBTreeBytesKey8(UniqueBTreeIndex>), + UniqueBTreeBytesKey16(UniqueBTreeIndex>), + UniqueBTreeBytesKey32(UniqueBTreeIndex>), + UniqueBTreeBytesKey64(UniqueBTreeIndex>), + UniqueBTreeBytesKey128(UniqueBTreeIndex>), // All the unique hash index types. UniqueHashBool(UniqueHashIndex), @@ -785,6 +876,11 @@ macro_rules! same_for_all_types { Self::BTreeF64($this) => $body, Self::BTreeString($this) => $body, Self::BTreeAV($this) => $body, + Self::BTreeBytesKey8($this) => $body, + Self::BTreeBytesKey16($this) => $body, + Self::BTreeBytesKey32($this) => $body, + Self::BTreeBytesKey64($this) => $body, + Self::BTreeBytesKey128($this) => $body, Self::HashBool($this) => $body, Self::HashU8($this) => $body, @@ -827,6 +923,11 @@ macro_rules! same_for_all_types { Self::UniqueBTreeF64($this) => $body, Self::UniqueBTreeString($this) => $body, Self::UniqueBTreeAV($this) => $body, + Self::UniqueBTreeBytesKey8($this) => $body, + Self::UniqueBTreeBytesKey16($this) => $body, + Self::UniqueBTreeBytesKey32($this) => $body, + Self::UniqueBTreeBytesKey64($this) => $body, + Self::UniqueBTreeBytesKey128($this) => $body, Self::UniqueHashBool($this) => $body, Self::UniqueHashU8($this) => $body, @@ -948,10 +1049,16 @@ impl TypedIndex { // We use a direct index here AlgebraicType::Sum(sum) if sum.is_simple_enum() => UniqueBTreeSumTag(<_>::default()), - // The index is either multi-column, - // or we don't care to specialize on the key type, - // so use a map keyed on `AlgebraicValue`. - _ => UniqueBTreeAV(<_>::default()), + ty => match required_bytes_key_size(ty, true) { + Some(..=BYTES_KEY_SIZE_8_B) => UniqueBTreeBytesKey8(<_>::default()), + Some(..=BYTES_KEY_SIZE_16_B) => UniqueBTreeBytesKey16(<_>::default()), + Some(..=BYTES_KEY_SIZE_32_B) => UniqueBTreeBytesKey32(<_>::default()), + Some(..=BYTES_KEY_SIZE_64_B) => UniqueBTreeBytesKey64(<_>::default()), + Some(..=BYTES_KEY_SIZE_128_B) => UniqueBTreeBytesKey128(<_>::default()), + // The key type cannot use the fixed byte key optimization, + // so use a map keyed on `AlgebraicValue`. + Some(_) | None => UniqueBTreeAV(<_>::default()), + }, } } else { match key_type { @@ -975,10 +1082,16 @@ impl TypedIndex { // For a plain enum, use `u8` as the native type. AlgebraicType::Sum(sum) if sum.is_simple_enum() => BTreeSumTag(<_>::default()), - // The index is either multi-column, - // or we don't care to specialize on the key type, - // so use a map keyed on `AlgebraicValue`. - _ => BTreeAV(<_>::default()), + ty => match required_bytes_key_size(ty, true) { + Some(..=BYTES_KEY_SIZE_8_B) => BTreeBytesKey8(<_>::default()), + Some(..=BYTES_KEY_SIZE_16_B) => BTreeBytesKey16(<_>::default()), + Some(..=BYTES_KEY_SIZE_32_B) => BTreeBytesKey32(<_>::default()), + Some(..=BYTES_KEY_SIZE_64_B) => BTreeBytesKey64(<_>::default()), + Some(..=BYTES_KEY_SIZE_128_B) => BTreeBytesKey128(<_>::default()), + // The key type cannot use the fixed byte key optimization, + // so use a map keyed on `AlgebraicValue`. + Some(_) | None => BTreeAV(<_>::default()), + }, } } } @@ -1011,7 +1124,7 @@ impl TypedIndex { // We use a direct index here AlgebraicType::Sum(sum) if sum.is_simple_enum() => UniqueHashSumTag(<_>::default()), - ty => match required_bytes_key_size(ty) { + ty => match required_bytes_key_size(ty, false) { Some(..=BYTES_KEY_SIZE_8_H) => UniqueHashBytesKey8(<_>::default()), Some(..=BYTES_KEY_SIZE_24_H) => UniqueHashBytesKey24(<_>::default()), Some(..=BYTES_KEY_SIZE_56_H) => UniqueHashBytesKey56(<_>::default()), @@ -1043,7 +1156,7 @@ impl TypedIndex { // For a plain enum, use `u8` as the native type. AlgebraicType::Sum(sum) if sum.is_simple_enum() => HashSumTag(<_>::default()), - ty => match required_bytes_key_size(ty) { + ty => match required_bytes_key_size(ty, false) { Some(..=BYTES_KEY_SIZE_8_H) => HashBytesKey8(<_>::default()), Some(..=BYTES_KEY_SIZE_24_H) => HashBytesKey24(<_>::default()), Some(..=BYTES_KEY_SIZE_56_H) => HashBytesKey56(<_>::default()), @@ -1068,10 +1181,12 @@ impl TypedIndex { match self { BTreeBool(_) | BTreeU8(_) | BTreeSumTag(_) | BTreeI8(_) | BTreeU16(_) | BTreeI16(_) | BTreeU32(_) | BTreeI32(_) | BTreeU64(_) | BTreeI64(_) | BTreeU128(_) | BTreeI128(_) | BTreeU256(_) | BTreeI256(_) - | BTreeF32(_) | BTreeF64(_) | BTreeString(_) | BTreeAV(_) | HashBool(_) | HashU8(_) | HashSumTag(_) - | HashI8(_) | HashU16(_) | HashI16(_) | HashU32(_) | HashI32(_) | HashU64(_) | HashI64(_) | HashU128(_) - | HashI128(_) | HashU256(_) | HashI256(_) | HashF32(_) | HashF64(_) | HashString(_) | HashAV(_) - | HashBytesKey8(_) | HashBytesKey24(_) | HashBytesKey56(_) | HashBytesKey120(_) => false, + | BTreeF32(_) | BTreeF64(_) | BTreeString(_) | BTreeAV(_) | BTreeBytesKey8(_) | BTreeBytesKey16(_) + | BTreeBytesKey32(_) | BTreeBytesKey64(_) | BTreeBytesKey128(_) | HashBool(_) | HashU8(_) + | HashSumTag(_) | HashI8(_) | HashU16(_) | HashI16(_) | HashU32(_) | HashI32(_) | HashU64(_) + | HashI64(_) | HashU128(_) | HashI128(_) | HashU256(_) | HashI256(_) | HashF32(_) | HashF64(_) + | HashString(_) | HashAV(_) | HashBytesKey8(_) | HashBytesKey24(_) | HashBytesKey56(_) + | HashBytesKey120(_) => false, UniqueBTreeBool(_) | UniqueBTreeU8(_) | UniqueBTreeSumTag(_) @@ -1090,6 +1205,11 @@ impl TypedIndex { | UniqueBTreeF64(_) | UniqueBTreeString(_) | UniqueBTreeAV(_) + | UniqueBTreeBytesKey8(_) + | UniqueBTreeBytesKey16(_) + | UniqueBTreeBytesKey32(_) + | UniqueBTreeBytesKey64(_) + | UniqueBTreeBytesKey128(_) | UniqueHashBool(_) | UniqueHashU8(_) | UniqueHashSumTag(_) @@ -1172,6 +1292,11 @@ impl TypedIndex { (BTreeF64(i), F64(k)) => (i.insert(k, ptr), None), (BTreeString(i), String(k)) => (i.insert(k.into_owned(), ptr), None), (BTreeAV(i), AV(k)) => (i.insert(k.into_owned(), ptr), None), + (BTreeBytesKey8(i), BytesKey8B(k)) => (i.insert(k, ptr), None), + (BTreeBytesKey16(i), BytesKey16(k)) => (i.insert(k, ptr), None), + (BTreeBytesKey32(i), BytesKey32(k)) => (i.insert(k, ptr), None), + (BTreeBytesKey64(i), BytesKey64(k)) => (i.insert(k, ptr), None), + (BTreeBytesKey128(i), BytesKey128(k)) => (i.insert(k, ptr), None), (HashBool(i), Bool(k)) => (i.insert(k, ptr), None), (HashU8(i), U8(k)) => (i.insert(k, ptr), None), (HashSumTag(i), SumTag(k)) => (i.insert(k, ptr), None), @@ -1190,7 +1315,7 @@ impl TypedIndex { (HashF64(i), F64(k)) => (i.insert(k, ptr), None), (HashString(i), String(k)) => (i.insert(k.into_owned(), ptr), None), (HashAV(i), AV(k)) => (i.insert(k.into_owned(), ptr), None), - (HashBytesKey8(i), BytesKey8(k)) => (i.insert(k, ptr), None), + (HashBytesKey8(i), BytesKey8H(k)) => (i.insert(k, ptr), None), (HashBytesKey24(i), BytesKey24(k)) => (i.insert(k, ptr), None), (HashBytesKey56(i), BytesKey56(k)) => (i.insert(k, ptr), None), (HashBytesKey120(i), BytesKey120(k)) => (i.insert(k, ptr), None), @@ -1212,6 +1337,11 @@ impl TypedIndex { (UniqueBTreeF64(i), F64(k)) => (i.insert(k, ptr), None), (UniqueBTreeString(i), String(k)) => (i.insert(k.into_owned(), ptr), None), (UniqueBTreeAV(i), AV(k)) => (i.insert(k.into_owned(), ptr), None), + (UniqueBTreeBytesKey8(i), BytesKey8B(k)) => (i.insert(k, ptr), None), + (UniqueBTreeBytesKey16(i), BytesKey16(k)) => (i.insert(k, ptr), None), + (UniqueBTreeBytesKey32(i), BytesKey32(k)) => (i.insert(k, ptr), None), + (UniqueBTreeBytesKey64(i), BytesKey64(k)) => (i.insert(k, ptr), None), + (UniqueBTreeBytesKey128(i), BytesKey128(k)) => (i.insert(k, ptr), None), (UniqueHashBool(i), Bool(k)) => (i.insert(k, ptr), None), (UniqueHashU8(i), U8(k)) => (i.insert(k, ptr), None), (UniqueHashSumTag(i), SumTag(k)) => (i.insert(k, ptr), None), @@ -1230,7 +1360,7 @@ impl TypedIndex { (UniqueHashF64(i), F64(k)) => (i.insert(k, ptr), None), (UniqueHashString(i), String(k)) => (i.insert(k.into_owned(), ptr), None), (UniqueHashAV(i), AV(k)) => (i.insert(k.into_owned(), ptr), None), - (UniqueHashBytesKey8(i), BytesKey8(k)) => (i.insert(k, ptr), None), + (UniqueHashBytesKey8(i), BytesKey8H(k)) => (i.insert(k, ptr), None), (UniqueHashBytesKey24(i), BytesKey24(k)) => (i.insert(k, ptr), None), (UniqueHashBytesKey56(i), BytesKey56(k)) => (i.insert(k, ptr), None), (UniqueHashBytesKey120(i), BytesKey120(k)) => (i.insert(k, ptr), None), @@ -1279,6 +1409,11 @@ impl TypedIndex { (BTreeF64(i), F64(k)) => i.delete(k, ptr), (BTreeString(i), String(k)) => i.delete(k.borrow(), ptr), (BTreeAV(i), AV(k)) => i.delete(k.borrow(), ptr), + (BTreeBytesKey8(i), BytesKey8B(k)) => i.delete(k, ptr), + (BTreeBytesKey16(i), BytesKey16(k)) => i.delete(k, ptr), + (BTreeBytesKey32(i), BytesKey32(k)) => i.delete(k, ptr), + (BTreeBytesKey64(i), BytesKey64(k)) => i.delete(k, ptr), + (BTreeBytesKey128(i), BytesKey128(k)) => i.delete(k, ptr), (HashBool(i), Bool(k)) => i.delete(k, ptr), (HashU8(i), U8(k)) => i.delete(k, ptr), (HashSumTag(i), SumTag(k)) => i.delete(k, ptr), @@ -1297,7 +1432,7 @@ impl TypedIndex { (HashF64(i), F64(k)) => i.delete(k, ptr), (HashString(i), String(k)) => i.delete(k.borrow(), ptr), (HashAV(i), AV(k)) => i.delete(k.borrow(), ptr), - (HashBytesKey8(i), BytesKey8(k)) => i.delete(k, ptr), + (HashBytesKey8(i), BytesKey8H(k)) => i.delete(k, ptr), (HashBytesKey24(i), BytesKey24(k)) => i.delete(k, ptr), (HashBytesKey56(i), BytesKey56(k)) => i.delete(k, ptr), (HashBytesKey120(i), BytesKey120(k)) => i.delete(k, ptr), @@ -1319,6 +1454,11 @@ impl TypedIndex { (UniqueBTreeF64(i), F64(k)) => i.delete(k, ptr), (UniqueBTreeString(i), String(k)) => i.delete(k.borrow(), ptr), (UniqueBTreeAV(i), AV(k)) => i.delete(k.borrow(), ptr), + (UniqueBTreeBytesKey8(i), BytesKey8B(k)) => i.delete(k, ptr), + (UniqueBTreeBytesKey16(i), BytesKey16(k)) => i.delete(k, ptr), + (UniqueBTreeBytesKey32(i), BytesKey32(k)) => i.delete(k, ptr), + (UniqueBTreeBytesKey64(i), BytesKey64(k)) => i.delete(k, ptr), + (UniqueBTreeBytesKey128(i), BytesKey128(k)) => i.delete(k, ptr), (UniqueHashBool(i), Bool(k)) => i.delete(k, ptr), (UniqueHashU8(i), U8(k)) => i.delete(k, ptr), (UniqueHashSumTag(i), SumTag(k)) => i.delete(k, ptr), @@ -1337,7 +1477,7 @@ impl TypedIndex { (UniqueHashF64(i), F64(k)) => i.delete(k, ptr), (UniqueHashString(i), String(k)) => i.delete(k.borrow(), ptr), (UniqueHashAV(i), AV(k)) => i.delete(k.borrow(), ptr), - (UniqueHashBytesKey8(i), BytesKey8(k)) => i.delete(k, ptr), + (UniqueHashBytesKey8(i), BytesKey8H(k)) => i.delete(k, ptr), (UniqueHashBytesKey24(i), BytesKey24(k)) => i.delete(k, ptr), (UniqueHashBytesKey56(i), BytesKey56(k)) => i.delete(k, ptr), (UniqueHashBytesKey120(i), BytesKey120(k)) => i.delete(k, ptr), @@ -1373,6 +1513,11 @@ impl TypedIndex { (BTreeF64(this), F64(key)) => this.seek_point(key).into(), (BTreeString(this), String(key)) => this.seek_point(key.borrow()).into(), (BTreeAV(this), AV(key)) => this.seek_point(key.borrow()).into(), + (BTreeBytesKey8(this), BytesKey8B(key)) => this.seek_point(key).into(), + (BTreeBytesKey16(this), BytesKey16(key)) => this.seek_point(key).into(), + (BTreeBytesKey32(this), BytesKey32(key)) => this.seek_point(key).into(), + (BTreeBytesKey64(this), BytesKey64(key)) => this.seek_point(key).into(), + (BTreeBytesKey128(this), BytesKey128(key)) => this.seek_point(key).into(), (HashBool(this), Bool(key)) => this.seek_point(key).into(), (HashU8(this), U8(key)) => this.seek_point(key).into(), (HashSumTag(this), SumTag(key)) => this.seek_point(key).into(), @@ -1391,7 +1536,7 @@ impl TypedIndex { (HashF64(this), F64(key)) => this.seek_point(key).into(), (HashString(this), String(key)) => this.seek_point(key.borrow()).into(), (HashAV(this), AV(key)) => this.seek_point(key.borrow()).into(), - (HashBytesKey8(this), BytesKey8(key)) => this.seek_point(key).into(), + (HashBytesKey8(this), BytesKey8H(key)) => this.seek_point(key).into(), (HashBytesKey24(this), BytesKey24(key)) => this.seek_point(key).into(), (HashBytesKey56(this), BytesKey56(key)) => this.seek_point(key).into(), (HashBytesKey120(this), BytesKey120(key)) => this.seek_point(key).into(), @@ -1413,6 +1558,11 @@ impl TypedIndex { (UniqueBTreeF64(this), F64(key)) => this.seek_point(key).into(), (UniqueBTreeString(this), String(key)) => this.seek_point(key.borrow()).into(), (UniqueBTreeAV(this), AV(key)) => this.seek_point(key.borrow()).into(), + (UniqueBTreeBytesKey8(this), BytesKey8B(key)) => this.seek_point(key).into(), + (UniqueBTreeBytesKey16(this), BytesKey16(key)) => this.seek_point(key).into(), + (UniqueBTreeBytesKey32(this), BytesKey32(key)) => this.seek_point(key).into(), + (UniqueBTreeBytesKey64(this), BytesKey64(key)) => this.seek_point(key).into(), + (UniqueBTreeBytesKey128(this), BytesKey128(key)) => this.seek_point(key).into(), (UniqueHashBool(this), Bool(key)) => this.seek_point(key).into(), (UniqueHashU8(this), U8(key)) => this.seek_point(key).into(), (UniqueHashSumTag(this), SumTag(key)) => this.seek_point(key).into(), @@ -1431,7 +1581,7 @@ impl TypedIndex { (UniqueHashF64(this), F64(key)) => this.seek_point(key).into(), (UniqueHashString(this), String(key)) => this.seek_point(key.borrow()).into(), (UniqueHashAV(this), AV(key)) => this.seek_point(key.borrow()).into(), - (UniqueHashBytesKey8(this), BytesKey8(key)) => this.seek_point(key).into(), + (UniqueHashBytesKey8(this), BytesKey8H(key)) => this.seek_point(key).into(), (UniqueHashBytesKey24(this), BytesKey24(key)) => this.seek_point(key).into(), (UniqueHashBytesKey56(this), BytesKey56(key)) => this.seek_point(key).into(), (UniqueHashBytesKey120(this), BytesKey120(key)) => this.seek_point(key).into(), @@ -1548,7 +1698,11 @@ impl TypedIndex { this.seek_range(&range).into() } Self::BTreeAV(this) => this.seek_range(&map(range, |k| k.as_av().map(|s| s.borrow()))).into(), - + Self::BTreeBytesKey8(this) => this.seek_range(&map(range, TypedIndexKey::as_bytes_key8_b)).into(), + Self::BTreeBytesKey16(this) => this.seek_range(&map(range, TypedIndexKey::as_bytes_key16)).into(), + Self::BTreeBytesKey32(this) => this.seek_range(&map(range, TypedIndexKey::as_bytes_key32)).into(), + Self::BTreeBytesKey64(this) => this.seek_range(&map(range, TypedIndexKey::as_bytes_key64)).into(), + Self::BTreeBytesKey128(this) => this.seek_range(&map(range, TypedIndexKey::as_bytes_key128)).into(), Self::UniqueBTreeBool(this) => this.seek_range(&map(range, TypedIndexKey::as_bool)).into(), Self::UniqueBTreeU8(this) => this.seek_range(&map(range, TypedIndexKey::as_u8)).into(), Self::UniqueBTreeSumTag(this) => this.seek_range(&map(range, TypedIndexKey::as_sum_tag)).into(), @@ -1570,7 +1724,11 @@ impl TypedIndex { this.seek_range(&range).into() } Self::UniqueBTreeAV(this) => this.seek_range(&map(range, |k| k.as_av().map(|s| s.borrow()))).into(), - + Self::UniqueBTreeBytesKey8(this) => this.seek_range(&map(range, TypedIndexKey::as_bytes_key8_b)).into(), + Self::UniqueBTreeBytesKey16(this) => this.seek_range(&map(range, TypedIndexKey::as_bytes_key16)).into(), + Self::UniqueBTreeBytesKey32(this) => this.seek_range(&map(range, TypedIndexKey::as_bytes_key32)).into(), + Self::UniqueBTreeBytesKey64(this) => this.seek_range(&map(range, TypedIndexKey::as_bytes_key64)).into(), + Self::UniqueBTreeBytesKey128(this) => this.seek_range(&map(range, TypedIndexKey::as_bytes_key128)).into(), Self::UniqueDirectSumTag(this) => this.seek_range(&map(range, TypedIndexKey::as_sum_tag)).into(), Self::UniqueDirectU8(this) => this.seek_range(&map(range, TypedIndexKey::as_u8)).into(), Self::UniqueDirectU16(this) => this.seek_range(&map(range, TypedIndexKey::as_u16)).into(), @@ -1615,7 +1773,7 @@ impl TypedIndex { } /// A key into a [`TableIndex`]. -#[derive(derive_more::From)] +#[derive(Debug, derive_more::From)] pub struct IndexKey<'a> { key: TypedIndexKey<'a>, } @@ -1628,6 +1786,7 @@ impl IndexKey<'_> { } /// A decoded range scan bound, which may be a point or a range. +#[derive(Debug, EnumAsInner)] pub enum PointOrRange<'a> { /// A point scan. Point(IndexKey<'a>), @@ -1704,7 +1863,7 @@ impl TableIndex { /// Panics if `value` is not consistent with this index's key type. #[inline] pub fn key_from_algebraic_value<'a>(&self, value: &'a AlgebraicValue) -> IndexKey<'a> { - TypedIndexKey::from_algebraic_value(&self.idx, value).into() + TypedIndexKey::from_algebraic_value(&self.key_type, &self.idx, value).into() } /// Derives a key for this index from BSATN-encoded `bytes`. @@ -1753,7 +1912,31 @@ impl TableIndex { let range_type = &range_type.algebraic_type; let suffix_len = suffix_types.len(); + macro_rules! bounds_from_bsatn_bytes_key { + ($ctor:expr) => { + Self::bounds_from_bsatn_bytes_key( + prefix, + prefix_types, + start, + end, + range_type, + suffix_len, + $ctor, + ) + }; + } + match &self.idx { + BTreeBytesKey8(_) => bounds_from_bsatn_bytes_key!(TypedIndexKey::BytesKey8B), + BTreeBytesKey16(_) => bounds_from_bsatn_bytes_key!(TypedIndexKey::BytesKey16), + BTreeBytesKey32(_) => bounds_from_bsatn_bytes_key!(TypedIndexKey::BytesKey32), + BTreeBytesKey64(_) => bounds_from_bsatn_bytes_key!(TypedIndexKey::BytesKey64), + BTreeBytesKey128(_) => bounds_from_bsatn_bytes_key!(TypedIndexKey::BytesKey128), + UniqueBTreeBytesKey8(_) => bounds_from_bsatn_bytes_key!(TypedIndexKey::BytesKey8B), + UniqueBTreeBytesKey16(_) => bounds_from_bsatn_bytes_key!(TypedIndexKey::BytesKey16), + UniqueBTreeBytesKey32(_) => bounds_from_bsatn_bytes_key!(TypedIndexKey::BytesKey32), + UniqueBTreeBytesKey64(_) => bounds_from_bsatn_bytes_key!(TypedIndexKey::BytesKey64), + UniqueBTreeBytesKey128(_) => bounds_from_bsatn_bytes_key!(TypedIndexKey::BytesKey128), BTreeAV(_) | HashAV(_) | UniqueBTreeAV(_) | UniqueHashAV(_) => { // The index is not specialized. // We now have the types, @@ -1798,9 +1981,8 @@ impl TableIndex { } /// Decodes `prefix` ++ `start` and `prefix` ++ `end` - /// as BSATN-encoded bounds for a bytes key index. + /// as [`RangeCompatBytesKey`] bounds. /// The `suffix_len` is used to determine whether this is a point scan or a range scan. - #[allow(dead_code)] fn bounds_from_bsatn_bytes_key<'de, const N: usize>( prefix: &'de [u8], prefix_types: &[ProductTypeElement], @@ -1808,18 +1990,51 @@ impl TableIndex { end: Bound<&'de [u8]>, range_type: &AlgebraicType, suffix_len: usize, - ctor: impl Copy + FnOnce(BytesKey) -> TypedIndexKey<'de>, + ctor: impl Copy + FnOnce(RangeCompatBytesKey) -> TypedIndexKey<'de>, ) -> DecodeResult> { // Is this really a point scan? let from = |k| ctor(k).into(); + let decode_prefix = || RangeCompatBytesKey::from_bsatn_prefix(prefix, prefix_types); let decode = - |bytes| BytesKey::from_bsatn_prefix_and_endpoint(prefix, prefix_types, bytes, range_type).map(from); + |bytes| RangeCompatBytesKey::from_bsatn_prefix_and_endpoint(prefix, prefix_types, bytes, range_type); Ok(if let Some(point) = Self::as_point_scan(&start, &end, suffix_len) { - PointOrRange::Point(decode(point)?) + PointOrRange::Point(from(decode(point)?)) } else { // It's not a point scan. let decode_bound = |b: Bound<_>| transpose_bound(b.map(decode)); - PointOrRange::Range(decode_bound(start)?, decode_bound(end)?) + + // For the start endpoint, + // it's only necessary to consider the `prefix`. + // The suffix is implicitly taken care of as a shorter slice is lesser than a longer one. + // That is, we have e.g., `prefix ++ [] <= prefix ++ suffix`. + // + // The exception to this is `Excluded`, where we need to fill with `Max`. + let prefix_is_empty = prefix.is_empty(); + let start = match decode_bound(start)? { + Bound::Included(r) => Bound::Included(r), + Bound::Excluded(r) => Bound::Excluded(r.add_max_suffix()), + Bound::Unbounded if prefix_is_empty => Bound::Unbounded, + // We have a prefix, so the start is actually the prefix. + Bound::Unbounded => Bound::Included(decode_prefix()?), + }; + + // The end endpoint needs "max" as the suffix-filling element, + // as it imposes the least and acts like `Unbounded`. + // + // The exception to this is `Excluded`, + // where e.g., `[0]..[1]` should not find [1, 2], + // which it would if "max" was used. + // Instead, "min" must be used, but it can be omitted, as it's implicit. + let end = match decode_bound(end)? { + Bound::Included(r) => Bound::Included(r.add_max_suffix()), + Bound::Excluded(r) => Bound::Excluded(r), + // Prefix is empty, and suffix will be `Max`, + // so simplify `(Max, Max, ...)` to `Unbounded`. + Bound::Unbounded if prefix_is_empty => Bound::Unbounded, + Bound::Unbounded => Bound::Included(decode_prefix()?.add_max_suffix()), + }; + + PointOrRange::Range(start.map(from), end.map(from)) }) } @@ -1851,6 +2066,8 @@ impl TableIndex { suffix_len: usize, ) -> (Bound, Bound) { let prefix_is_empty = prefix.elements.is_empty(); + // Conses value to prefix. + let cons = |prefix: ProductValue, val| prefix.push(val).into(); // Concatenate prefix, value, and the most permissive value for the suffix. let concat = |prefix: ProductValue, val, fill| { let mut vals: Vec<_> = prefix.elements.into(); @@ -1859,27 +2076,34 @@ impl TableIndex { vals.extend(iter::repeat_n(fill, suffix_len)); AlgebraicValue::product(vals) }; - // The start endpoint needs `Min` as the suffix-filling element, - // as it imposes the least and acts like `Unbounded`. - let concat_start = |val| concat(prefix.clone(), val, AlgebraicValue::Min); + + // For the start endpoint, + // the suffix is implicitly taken care of as a shorter slice is lesser than a longer one. + // That is, we have e.g., `(prefix : val) <= (prefix : val) ++ suffix`. + // + // The exception to this is `Excluded`, where we need to fill with `Max`. let range_start = match start { - Bound::Included(r) => Bound::Included(concat_start(r)), - Bound::Excluded(r) => Bound::Excluded(concat_start(r)), + Bound::Included(r) => Bound::Included(cons(prefix.clone(), r)), + Bound::Excluded(r) => Bound::Excluded(concat(prefix.clone(), r, AlgebraicValue::Max)), // Prefix is empty, and suffix will be `Min`, // so simplify `(Min, Min, ...)` to `Unbounded`. Bound::Unbounded if prefix_is_empty => Bound::Unbounded, - Bound::Unbounded => Bound::Included(concat_start(AlgebraicValue::Min)), + Bound::Unbounded => Bound::Included(prefix.clone().into()), }; // The end endpoint needs `Max` as the suffix-filling element, // as it imposes the least and acts like `Unbounded`. - let concat_end = |val| concat(prefix, val, AlgebraicValue::Max); + // + // The exception to this is `Excluded`, + // where e.g., `[0]..[1]` should not find [1, 2], + // which it would if `Max` was used. + // Instead, `Min` must be used, but it can be omitted, as it's implicit. let range_end = match end { - Bound::Included(r) => Bound::Included(concat_end(r)), - Bound::Excluded(r) => Bound::Excluded(concat_end(r)), + Bound::Included(r) => Bound::Included(concat(prefix, r, AlgebraicValue::Max)), + Bound::Excluded(r) => Bound::Excluded(cons(prefix, r)), // Prefix is empty, and suffix will be `Max`, // so simplify `(Max, Max, ...)` to `Unbounded`. Bound::Unbounded if prefix_is_empty => Bound::Unbounded, - Bound::Unbounded => Bound::Included(concat_end(AlgebraicValue::Max)), + Bound::Unbounded => Bound::Included(concat(prefix, AlgebraicValue::Max, AlgebraicValue::Max)), }; (range_start, range_end) } @@ -1895,7 +2119,7 @@ impl TableIndex { // SAFETY: // 1. We're passing the same `ColList` that was provided during construction. // 2. Forward caller requirements. - unsafe { TypedIndexKey::from_row_ref(&self.idx, &self.indexed_columns, row_ref) }.into() + unsafe { TypedIndexKey::from_row_ref(&self.key_type, &self.idx, &self.indexed_columns, row_ref) }.into() } /// Inserts `ptr` with the value `row` to this index. @@ -2029,6 +2253,11 @@ impl TableIndex { | (BTreeF64(_), BTreeF64(_)) | (BTreeString(_), BTreeString(_)) | (BTreeAV(_), BTreeAV(_)) + | (BTreeBytesKey8(_), BTreeBytesKey8(_)) + | (BTreeBytesKey16(_), BTreeBytesKey16(_)) + | (BTreeBytesKey32(_), BTreeBytesKey32(_)) + | (BTreeBytesKey64(_), BTreeBytesKey64(_)) + | (BTreeBytesKey128(_), BTreeBytesKey128(_)) | (HashBool(_), HashBool(_)) | (HashU8(_), HashU8(_)) | (HashSumTag(_), HashSumTag(_)) @@ -2070,6 +2299,11 @@ impl TableIndex { (UniqueBTreeF64(idx), UniqueBTreeF64(other)) => idx.can_merge(other, ignore), (UniqueBTreeString(idx), UniqueBTreeString(other)) => idx.can_merge(other, ignore), (UniqueBTreeAV(idx), UniqueBTreeAV(other)) => idx.can_merge(other, ignore), + (UniqueBTreeBytesKey8(idx), UniqueBTreeBytesKey8(other)) => idx.can_merge(other, ignore), + (UniqueBTreeBytesKey16(idx), UniqueBTreeBytesKey16(other)) => idx.can_merge(other, ignore), + (UniqueBTreeBytesKey32(idx), UniqueBTreeBytesKey32(other)) => idx.can_merge(other, ignore), + (UniqueBTreeBytesKey64(idx), UniqueBTreeBytesKey64(other)) => idx.can_merge(other, ignore), + (UniqueBTreeBytesKey128(idx), UniqueBTreeBytesKey128(other)) => idx.can_merge(other, ignore), (UniqueHashBool(idx), UniqueHashBool(other)) => idx.can_merge(other, ignore), (UniqueHashU8(idx), UniqueHashU8(other)) => idx.can_merge(other, ignore), (UniqueHashSumTag(idx), UniqueHashSumTag(other)) => idx.can_merge(other, ignore), @@ -2142,19 +2376,26 @@ impl TableIndex { mod test { use super::*; use crate::page_pool::PagePool; + use crate::table::Table; use crate::{blob_store::HashMapBlobStore, table::test::table}; + use core::cmp::Ordering; use core::ops::Bound::*; use decorum::Total; + use proptest::array::uniform; use proptest::prelude::*; use proptest::{ collection::{hash_set, vec}, test_runner::TestCaseResult, }; use spacetimedb_data_structures::map::HashMap; + use spacetimedb_lib::bsatn::to_vec; use spacetimedb_lib::ProductTypeElement; - use spacetimedb_primitives::ColId; + use spacetimedb_primitives::{ColId, IndexId}; use spacetimedb_sats::algebraic_value::Packed; - use spacetimedb_sats::proptest::{generate_algebraic_value, generate_primitive_algebraic_type}; + use spacetimedb_sats::proptest::{ + gen_with, generate_algebraic_type, generate_algebraic_value, generate_primitive_algebraic_type, + generate_typed_value, + }; use spacetimedb_sats::{ product, proptest::{generate_product_value, generate_row_type}, @@ -2183,6 +2424,10 @@ mod test { } } + fn setup(ty: ProductType) -> (Table, PagePool, HashMapBlobStore) { + (table(ty), PagePool::new_for_test(), HashMapBlobStore::default()) + } + fn new_index(row_type: &ProductType, cols: &ColList, is_unique: bool, kind: IndexKind) -> TableIndex { TableIndex::new(row_type, cols.clone(), kind, is_unique).unwrap() } @@ -2246,6 +2491,22 @@ mod test { index.seek_range(&(start, end)) } + fn find_start_mid_end(a: T, b: T, c: T) -> (T, T, T) { + use Ordering::*; + match (a.cmp(&b), b.cmp(&c)) { + (Less | Equal, Less | Equal) => (a, b, c), + (Less, Greater) => match a.cmp(&c) { + Less | Equal => (a, c, b), + Greater => (c, a, b), + }, + (Greater, Less) => match a.cmp(&c) { + Less | Equal => (b, a, c), + Greater => (b, c, a), + }, + (Greater | Equal, Greater | Equal) => (c, b, a), + } + } + proptest! { #![proptest_config(ProptestConfig { max_shrink_iters: 0x10000000, ..Default::default() })] @@ -2260,9 +2521,7 @@ mod test { #[test] fn remove_nonexistent_noop((ty, cols, pv) in gen_row_and_cols(), kind: IndexKind, is_unique: bool) { let mut index = new_index(&ty, &cols, is_unique, kind); - let mut table = table(ty); - let pool = PagePool::new_for_test(); - let mut blob_store = HashMapBlobStore::default(); + let (mut table, pool, mut blob_store) = setup(ty); let row_ref = table.insert(&pool, &mut blob_store, &pv).unwrap().1; prop_assert_eq!(unsafe { index.delete(row_ref) }, false); prop_assert!(index.idx.is_empty()); @@ -2274,9 +2533,7 @@ mod test { #[test] fn insert_delete_noop((ty, cols, pv) in gen_row_and_cols(), kind: IndexKind, is_unique: bool) { let mut index = new_index(&ty, &cols, is_unique, kind); - let mut table = table(ty); - let pool = PagePool::new_for_test(); - let mut blob_store = HashMapBlobStore::default(); + let (mut table, pool, mut blob_store) = setup(ty); let row_ref = table.insert(&pool, &mut blob_store, &pv).unwrap().1; let value = get_fields(&cols, &pv); @@ -2307,9 +2564,7 @@ mod test { let ty = ProductType::from(ty.into_boxed_slice()); let mut index = new_index(&ty, &cols, false, kind); - let mut table = table(ty); - let pool = PagePool::new_for_test(); - let mut blob_store = HashMapBlobStore::default(); + let (mut table, pool, mut blob_store) = setup(ty); let num_vals = vals.len(); for val in vals { @@ -2330,9 +2585,7 @@ mod test { #[test] fn insert_again_violates_unique_constraint((ty, cols, pv) in gen_row_and_cols(), kind: IndexKind) { let mut index = new_index(&ty, &cols, true, kind); - let mut table = table(ty); - let pool = PagePool::new_for_test(); - let mut blob_store = HashMapBlobStore::default(); + let (mut table, pool, mut blob_store) = setup(ty); let row_ref = table.insert(&pool, &mut blob_store, &pv).unwrap().1; let value = get_fields(&cols, &pv); @@ -2369,9 +2622,7 @@ mod test { let cols = 0.into(); let ty = ProductType::from_iter([AlgebraicType::U64]); let mut index = new_index(&ty, &cols, is_unique, kind); - let mut table = table(ty); - let pool = PagePool::new_for_test(); - let mut blob_store = HashMapBlobStore::default(); + let (mut table, pool, mut blob_store) = setup(ty); let prev = needle - 1; let next = needle + 1; @@ -2470,9 +2721,7 @@ mod test { let mut index = new_index(&row_ty, &[0].into(), is_unique, kind); // Construct the table and add `val` as a row. - let mut table = table(row_ty); - let pool = PagePool::new_for_test(); - let mut blob_store = HashMapBlobStore::default(); + let (mut table, pool, mut blob_store) = setup(row_ty); let pv = product![val.clone()]; let row_ref = table.insert(&pool, &mut blob_store, &pv).unwrap().1; @@ -2496,5 +2745,69 @@ mod test { let rows = seek_range(&index, &(Excluded(&val), Excluded(&val))).unwrap().collect::>(); assert_eq!(rows, []); } + + #[test] + fn btree_multi_col_range_scans_work( + is_unique: bool, + (prefix_ty, prefix_val) in generate_typed_value(), + (middle_ty, [start, middle, end]) in gen_with(generate_algebraic_type(), |ty| uniform(generate_algebraic_value(ty))), + (suffix_ty, suffix_val) in generate_typed_value(), + ) { + // Make the product type. + let ty = ProductType::from([prefix_ty, middle_ty, suffix_ty]); + let index = new_index(&ty, &[0, 1, 2].into(), is_unique, IndexKind::BTree); + + // Find the actual start, middle, and end. + let (start, middle, end) = find_start_mid_end(start, middle, end); + let row = product![prefix_val.clone(), middle.clone(), suffix_val.clone()]; + + // Make a table, add the index, and insert the row. + let (mut table, pool, mut blob_store) = setup(ty); + unsafe { table.add_index(IndexId::SENTINEL, index) }; + let (_, row_ref) = table.insert(&pool, &mut blob_store, &row).unwrap(); + let row_ptr = row_ref.pointer(); + let index = table.get_index_by_id(IndexId::SENTINEL).unwrap(); + + // Test sanity of various bounds. + let seek = |start, end| { + let prefix = to_vec(&prefix_val).unwrap(); + let rstart = to_vec(&start).unwrap(); + let rend = to_vec(&end).unwrap(); + let range = index + .bounds_from_bsatn(&prefix, 1.into(), &rstart, &rend) + .unwrap() + .into_range() + .unwrap(); + index.seek_range(&range).unwrap().collect::>() + }; + use Bound::*; + // An unbounded range on both ends will find the row. + assert_eq!(seek(Unbounded, Unbounded), [row_ptr]); + // Including middle as the start/end should find the row. + assert_eq!(seek(Included(middle.clone()), Unbounded), [row_ptr]); + assert_eq!(seek(Unbounded, Included(middle.clone())), [row_ptr]); + assert_eq!(seek(Included(middle.clone()), Included(middle.clone())), [row_ptr]); + // Excluding middle as the start/end shouldn't find the row. + assert_eq!(seek(Excluded(middle.clone()), Unbounded), []); + assert_eq!(seek(Excluded(middle.clone()), Included(end.clone())), []); + assert_eq!(seek(Unbounded, Excluded(middle.clone())), []); + assert_eq!(seek(Included(start.clone()), Excluded(middle.clone())), []); + // Including start and end should find the row. + assert_eq!(seek(Included(start.clone()), Unbounded), [row_ptr]); + assert_eq!(seek(Unbounded, Included(end.clone())), [row_ptr]); + assert_eq!(seek(Included(start.clone()), Included(end.clone())), [row_ptr]); + // Excluding start/end should find the row when `start, end != middle`. + if start < middle && middle < end { + assert_eq!(seek(Excluded(start.clone()), Excluded(end.clone())), [row_ptr]); + } + if start < middle { + assert_eq!(seek(Excluded(start.clone()), Included(end.clone())), [row_ptr]); + assert_eq!(seek(Excluded(start.clone()), Unbounded), [row_ptr]); + } + if middle < end { + assert_eq!(seek(Included(start.clone()), Excluded(end.clone())), [row_ptr]); + assert_eq!(seek(Unbounded, Excluded(end.clone())), [row_ptr]); + } + } } } diff --git a/crates/table/src/table_index/unique_btree_index.rs b/crates/table/src/table_index/unique_btree_index.rs index 3f539935740..40342068898 100644 --- a/crates/table/src/table_index/unique_btree_index.rs +++ b/crates/table/src/table_index/unique_btree_index.rs @@ -140,6 +140,7 @@ impl UniqueBTreeIndex { } /// An iterator over the potential value in a unique index for a given key. +#[derive(Clone)] pub struct UniquePointIter { /// The iterator seeking for matching keys in the range. pub(super) iter: IntoIter,