diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 057dcf4..1f28e1c 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -19,11 +19,11 @@ jobs: - name: Install cargo-hack run: cargo install cargo-hack - name: Build - run: cargo hack build --manifest-path cold-string/Cargo.toml --verbose --feature-powerset --version-range 1.60.. + run: cargo hack build --manifest-path cold-string/Cargo.toml --verbose --feature-powerset --version-range 1.60.. --exclude-features rkyv - name: Check - run: cargo hack check --manifest-path cold-string/Cargo.toml --verbose --feature-powerset --version-range 1.60.. + run: cargo hack check --manifest-path cold-string/Cargo.toml --verbose --feature-powerset --version-range 1.60.. --exclude-features rkyv - name: Test No Exposed Provenance - run: cargo +1.74 hack test --manifest-path cold-string/Cargo.toml --verbose --feature-powerset + run: cargo +1.74 hack test --manifest-path cold-string/Cargo.toml --verbose --feature-powerset --exclude-features rkyv - name: Tests run: cargo hack test --manifest-path cold-string/Cargo.toml --verbose --feature-powerset - name: Install nightly + Miri @@ -31,10 +31,10 @@ jobs: rustup toolchain install nightly rustup component add miri --toolchain nightly - name: Miri 64 bit LE - run: cargo +nightly miri test --manifest-path cold-string/Cargo.toml + run: cargo +nightly miri test --all-features --manifest-path cold-string/Cargo.toml - name: Miri 64 bit BE - run: cargo +nightly miri test --manifest-path cold-string/Cargo.toml --target powerpc64-unknown-linux-gnu + run: cargo +nightly miri test --all-features --manifest-path cold-string/Cargo.toml --target powerpc64-unknown-linux-gnu - name: Miri 32 bit LE - run: cargo +nightly miri test --target i686-unknown-linux-gnu + run: cargo +nightly miri test --all-features --target i686-unknown-linux-gnu - name: Miri 32 bit BE - run: cargo +nightly miri test --manifest-path cold-string/Cargo.toml --target mips-unknown-linux-gnu + run: cargo +nightly miri test --all-features --manifest-path cold-string/Cargo.toml --target mips-unknown-linux-gnu diff --git a/bench/benches/bench.rs b/bench/benches/bench.rs index 0dc3e8d..254a9c5 100644 --- a/bench/benches/bench.rs +++ b/bench/benches/bench.rs @@ -69,10 +69,11 @@ fn bench_as_str_inner>( indices: &[usize], // Pass pre-shuffled indices ) { // Pre-convert to the target type - let strings: Vec<_> = strings.iter() + let strings: Vec<_> = strings + .iter() .map(|s| T::from_str(s).map_err(|_| ()).unwrap()) .collect(); - + let strings = black_box(strings); let label = format!("{}-len={}-{}", name, min, max); diff --git a/bench/tests/memory.rs b/bench/tests/memory.rs index 94d8ecb..7962075 100644 --- a/bench/tests/memory.rs +++ b/bench/tests/memory.rs @@ -155,9 +155,9 @@ fn system_memory(name: &str, workload: impl Fn(usize, usize)) { /// cargo test test_system_memory --release -- --no-capture --include-ignored /// ``` #[test] +#[rustfmt::skip] #[ignore] fn test_system_memory() { - // Print table header print!("{:CELL_WIDTH$}", format!("{}..={}", 0, size)); @@ -170,13 +170,9 @@ fn test_system_memory() { } println!(); - system_memory("cold-string", hash_map_workload::); system_memory("compact_str", hash_map_workload::); - system_memory( - "compact_string", - hash_map_workload::, - ); + system_memory("compact_string", hash_map_workload::); system_memory("smallstr", hash_map_workload::>); system_memory("smartstring", hash_map_workload::); system_memory("smol_str", hash_map_workload::); diff --git a/cold-string/Cargo.lock b/cold-string/Cargo.lock index 6c993dd..e4e1d7d 100644 --- a/cold-string/Cargo.lock +++ b/cold-string/Cargo.lock @@ -40,6 +40,29 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +[[package]] +name = "bytecheck" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0caa33a2c0edca0419d15ac723dff03f1956f7978329b1e3b5fdaaaed9d3ca8b" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "rancor", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89385e82b5d1821d2219e0b095efa2cc1f246cbf99080f3be46a1a85c0d392d9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -50,8 +73,9 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" name = "cold-string" version = "0.1.0" dependencies = [ - "hashbrown", + "hashbrown 0.12.3", "proptest", + "rkyv", "rustversion", "serde", "serde_test", @@ -90,6 +114,12 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + [[package]] name = "lazy_static" version = "1.5.0" @@ -102,6 +132,26 @@ version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +[[package]] +name = "munge" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c" +dependencies = [ + "munge_macro", +] + +[[package]] +name = "munge_macro" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -153,6 +203,26 @@ dependencies = [ "unarray", ] +[[package]] +name = "ptr_meta" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "quote" version = "1.0.44" @@ -168,6 +238,15 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rancor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a063ea72381527c2a0561da9c80000ef822bdd7c3241b1cc1b12100e3df081ee" +dependencies = [ + "ptr_meta", +] + [[package]] name = "rand" version = "0.9.2" @@ -212,6 +291,42 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" +[[package]] +name = "rend" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cadadef317c2f20755a64d7fdc48f9e7178ee6b0e1f7fce33fa60f1d68a276e6" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "rkyv" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a30e631b7f4a03dee9056b8ef6982e8ba371dd5bedb74d3ec86df4499132c70" +dependencies = [ + "bytecheck", + "hashbrown 0.16.1", + "munge", + "ptr_meta", + "rancor", + "rend", + "rkyv_derive", + "tinyvec", +] + +[[package]] +name = "rkyv_derive" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8100bb34c0a1d0f907143db3149e6b4eea3c33b9ee8b189720168e818303986f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -256,6 +371,12 @@ dependencies = [ "serde", ] +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "sptr" version = "0.3.2" @@ -273,6 +394,21 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "unarray" version = "0.1.4" diff --git a/cold-string/Cargo.toml b/cold-string/Cargo.toml index f270347..a60aba7 100644 --- a/cold-string/Cargo.toml +++ b/cold-string/Cargo.toml @@ -18,9 +18,11 @@ maintenance = { status = "actively-developed" } [features] default = [] serde = ["dep:serde", "serde/alloc"] +rkyv = ["dep:rkyv", "rkyv/alloc", "rkyv/bytecheck"] [dependencies] serde = { version = "1.0.228", optional = true, default-features = false } +rkyv = { version = "0.8.15", optional = true, default-features = false } sptr = { version = "0.3.2", default-features = false } rustversion = "1.0.22" diff --git a/cold-string/README.md b/cold-string/README.md index 5713d7e..4badad7 100644 --- a/cold-string/README.md +++ b/cold-string/README.md @@ -5,7 +5,33 @@ ![MSRV](https://img.shields.io/crates/msrv/cold-string?style=for-the-badge) ![Downloads](https://img.shields.io/crates/d/cold-string?style=for-the-badge) -A 1-word sized representation of immutable UTF-8 strings. In-lines up to 1 word bytes. Optimized for memory usage and struct packing. +A 1-word (8-byte) sized representation of immutable UTF-8 strings that in-lines up to 8 bytes. Optimized for memory usage and struct packing. + +# Overview + +`ColdString` is optimized for memory efficiency for **large** and **short** strings: +- 0..=8 bytes: always 8 bytes total (fully inlined). +- 9..=128 bytes: 8-byte pointer + 1-byte length encoding +- 129..=16384 bytes: 8-byte pointer + 2-byte length encoding +- Continues logarithmically up to 18 bytes overhead for sizes up to `isize::MAX`. + +Compared to `String`, which stores capacity and length inline (3 machine words), `ColdString` avoids storing length inline for heap strings and compresses metadata into tagged pointer space. This leads to substantial memory savings in benchmarks (see [Memory Comparison (System RSS)](#memory-comparison-system-rss)): +- **36% – 68%** smaller than `String` in `HashMap` +- **28% – 65%** smaller than other short-string crates in `HashMap` +- **30% – 75%** smaller than `String` in `BTreeSet` +- **13% – 63%** smaller than other short-string crates in `BTreeSet` + +`ColdString`'s MSRV is 1.60, is `no_std` compatible, and is a drop in replacement for immutable Strings. + +### Safety +`ColdString` is written using [Rust's strict provenance API](https://doc.rust-lang.org/beta/std/ptr/index.html#strict-provenance), carefully handles unaligned access internally, and is validated with property testing and MIRI. + +### Why "Cold"? + +The heap representation stores the length on the heap, not inline in the struct. This saves memory in the struct itself but *slightly* increases the cost of `len()` since it requires a heap read. In practice, the `len()` cost is only marginally slower than inline storage and is typically negligible compared to: +- Memory savings +- Cache density improvements +- Faster collection operations due to reduced footprint # Usage @@ -45,13 +71,13 @@ pub struct ColdString { ``` `encoded` acts as either a pointer to the heap for strings longer than 8 bytes or is the inlined data itself. The first/"tag" byte indicates one of 3 encodings: -## Inline Mode (0 to 7 Bytes) +### Inline Mode (0 to 7 Bytes) The tag byte has bits 11111xxx, where xxx is the length. `self.0[1]` to `self.0[7]` store the bytes of string. -## Inline Mode (8 Bytes) +### Inline Mode (8 Bytes) The tag byte is any valid UTF-8 byte. `self.0` stores the bytes of string. Since the string is UTF-8, the tag byte is guaranteed to not be 10xxxxx or 11111xxx. -## Heap Mode +### Heap Mode `self.0` encodes the pointer to heap, where tag byte is 10xxxxxx. 10xxxxxx is chosen because it's a UTF-8 continuation byte and therefore an impossible tag byte for inline mode. Since a heap-alignment of 4 is chosen, the pointer's least significant 2 bits are guaranteed to be 0 ([See more](https://doc.rust-lang.org/beta/std/alloc/struct.Layout.html#method.from_size_align)). These bits are swapped with the 10 "tag" bits when de/coding between `self.0` and the address value. On the heap, the data starts with a variable length integer encoding of the length, followed by the bytes. @@ -59,11 +85,13 @@ On the heap, the data starts with a variable length integer encoding of the leng ptr --> ``` -# Memory Comparisons +# Memory Comparisons (Allocator) + +Memory usage per string, measured by tracking the memory requested by the allocator: -![string_memory](https://github.com/user-attachments/assets/6644ae40-1da7-42e2-9ae6-0596e77e953e) +![string_memory](https://github.com/user-attachments/assets/adf09756-9910-4618-a97f-b5ab91a2515a) -## Memory Usage Comparison +## Memory Comparison (System RSS) RSS per insertion of various collections containing strings of random lengths 0..=N: diff --git a/cold-string/src/lib.rs b/cold-string/src/lib.rs index 0695997..3a494a2 100644 --- a/cold-string/src/lib.rs +++ b/cold-string/src/lib.rs @@ -10,22 +10,27 @@ use sptr::Strict; use alloc::{ alloc::{alloc, dealloc, Layout}, + borrow::{Cow, ToOwned}, + boxed::Box, str::Utf8Error, string::String, }; use core::{ + cmp::Ordering, fmt, hash::{Hash, Hasher}, iter::FromIterator, mem, ops::Deref, ptr, slice, str, - cmp::Ordering, }; mod vint; use crate::vint::VarInt; +#[cfg(feature = "rkyv")] +mod rkyv; + const HEAP_ALIGN: usize = 4; const WIDTH: usize = mem::size_of::(); @@ -52,7 +57,7 @@ pub struct ColdString { /// with the LSB bits of the tag byte. The address is always a multiple of 4 (`HEAP_ALIGN`). /// - 11111xxx: xxx is the length in range 0..=7, followed by length UTF-8 bytes. /// - xxxxxxxx (valid UTF-8): 8 UTF-8 bytes. - encoded: *mut u8, + encoded: *const u8, } impl ColdString { @@ -90,8 +95,8 @@ impl ColdString { /// /// assert!(result.is_err()); /// ``` - pub fn from_utf8(v: &[u8]) -> Result { - Ok(Self::new(str::from_utf8(v)?)) + pub fn from_utf8>(v: B) -> Result { + Ok(Self::new(str::from_utf8(v.as_ref())?)) } /// Converts a vector of bytes to a [`ColdString`] without checking that the string contains @@ -114,8 +119,8 @@ impl ColdString { /// /// assert_eq!("💖", sparkle_heart); /// ``` - pub unsafe fn from_utf8_unchecked(v: &[u8]) -> Self { - Self::new(str::from_utf8_unchecked(v)) + pub unsafe fn from_utf8_unchecked>(v: B) -> Self { + Self::new(str::from_utf8_unchecked(v.as_ref())) } /// Creates a new [`ColdString`] from any type that implements `AsRef`. @@ -197,7 +202,7 @@ impl ColdString { #[rustversion::attr(since(1.71), const)] #[inline] - unsafe fn ptr(&self) -> *mut u8 { + unsafe fn ptr(&self) -> *const u8 { ptr::read_unaligned(ptr::addr_of!(self.encoded)) } @@ -220,8 +225,7 @@ impl ColdString { #[inline] fn new_heap(s: &str) -> Self { let len = s.len(); - let mut len_buf = [0u8; 10]; - let vint_len = VarInt::write(len as u64, &mut len_buf); + let (vint_len, len_buf) = VarInt::write(len as u64); let total = vint_len + len; let layout = Layout::from_size_align(total, HEAP_ALIGN).unwrap(); @@ -245,7 +249,7 @@ impl ColdString { } #[inline] - fn heap_ptr(&self) -> *mut u8 { + fn heap_ptr(&self) -> *const u8 { debug_assert!(!self.is_inline()); unsafe { self.ptr().map_addr(|mut addr| { @@ -311,7 +315,7 @@ impl ColdString { let ptr = self.heap_ptr(); let (len, header) = VarInt::read(ptr); let data = ptr.add(header); - slice::from_raw_parts(data, len as usize) + slice::from_raw_parts(data, len) } /// Returns a byte slice of this `ColdString`'s contents. @@ -381,9 +385,9 @@ impl Drop for ColdString { unsafe { let ptr = self.heap_ptr(); let (len, header) = VarInt::read(ptr); - let total = header + len as usize; + let total = header + len; let layout = Layout::from_size_align(total, HEAP_ALIGN).unwrap(); - dealloc(ptr, layout); + dealloc(ptr as *mut u8, layout); } } } @@ -444,6 +448,43 @@ impl From for ColdString { } } +impl From for String { + fn from(s: ColdString) -> Self { + s.as_str().to_owned() + } +} + +impl From for Cow<'_, str> { + #[inline] + fn from(s: ColdString) -> Self { + Self::Owned(s.into()) + } +} + +impl<'a> From<&'a ColdString> for Cow<'a, str> { + #[inline] + fn from(s: &'a ColdString) -> Self { + Self::Borrowed(s) + } +} + +impl<'a> From> for ColdString { + fn from(cow: Cow<'a, str>) -> Self { + match cow { + Cow::Borrowed(s) => s.into(), + Cow::Owned(s) => s.into(), + } + } +} + +impl From> for ColdString { + #[inline] + #[track_caller] + fn from(b: Box) -> Self { + Self::new(&b) + } +} + impl FromIterator for ColdString { fn from_iter>(iter: I) -> Self { let s: String = iter.into_iter().collect(); @@ -635,10 +676,10 @@ mod tests { "AaAa0 ® ", str::from_utf8(&[240, 158, 186, 128, 240, 145, 143, 151]).unwrap(), ] { - assert_correct(s); + assert_correct(s); } } - + fn char_from_leading_byte(b: u8) -> Option { match b { 0x00..=0x7F => Some(b as char), @@ -646,9 +687,18 @@ mod tests { 0xE0 => str::from_utf8(&[b, 0xA0, 0x91]).unwrap().chars().next(), 0xE1..=0xEC | 0xEE..=0xEF => str::from_utf8(&[b, 0x91, 0xA5]).unwrap().chars().next(), 0xED => str::from_utf8(&[b, 0x80, 0x91]).unwrap().chars().next(), - 0xF0 => str::from_utf8(&[b, 0x90, 0x91, 0xA5]).unwrap().chars().next(), - 0xF1..=0xF3 => str::from_utf8(&[b, 0x91, 0xA5, 0x82]).unwrap().chars().next(), - 0xF4 => str::from_utf8(&[b, 0x80, 0x91, 0x82]).unwrap().chars().next(), + 0xF0 => str::from_utf8(&[b, 0x90, 0x91, 0xA5]) + .unwrap() + .chars() + .next(), + 0xF1..=0xF3 => str::from_utf8(&[b, 0x91, 0xA5, 0x82]) + .unwrap() + .chars() + .next(), + 0xF4 => str::from_utf8(&[b, 0x80, 0x91, 0x82]) + .unwrap() + .chars() + .next(), _ => None, } } @@ -670,7 +720,7 @@ mod tests { let c = core::char::from_digit((len - s.len()) as u32, 10).unwrap(); s.push(c); } - + assert_correct(&s); } } diff --git a/cold-string/src/rkyv.rs b/cold-string/src/rkyv.rs new file mode 100644 index 0000000..5a67a14 --- /dev/null +++ b/cold-string/src/rkyv.rs @@ -0,0 +1,91 @@ +#![cfg_attr(docsrs, doc(cfg(feature = "rkyv")))] + +use crate::ColdString; + +use rkyv::{ + rancor::{Fallible, Source}, + ser::{Allocator, Writer}, + string::{ArchivedString, StringResolver}, + Archive, Deserialize, Place, Serialize, +}; + +impl Archive for ColdString { + type Archived = ArchivedString; + type Resolver = StringResolver; + + #[inline] + fn resolve(&self, resolver: Self::Resolver, out: Place) { + ArchivedString::resolve_from_str(self, resolver, out); + } +} + +impl Serialize for ColdString +where + S: Fallible + Allocator + Writer + ?Sized, + S::Error: Source, +{ + #[inline] + fn serialize(&self, serializer: &mut S) -> Result { + ArchivedString::serialize_from_str(self, serializer) + } +} + +impl Deserialize for ArchivedString { + #[inline] + fn deserialize(&self, _deserializer: &mut D) -> Result { + Ok(ColdString::new(self.as_str())) + } +} + +impl PartialEq for ArchivedString { + #[inline] + fn eq(&self, other: &ColdString) -> bool { + other.as_str() == self.as_str() + } +} + +impl PartialEq for ColdString { + #[inline] + fn eq(&self, other: &ArchivedString) -> bool { + other.as_str() == self.as_str() + } +} + +impl PartialOrd for ArchivedString { + #[inline] + fn partial_cmp(&self, other: &ColdString) -> Option<::core::cmp::Ordering> { + Some(self.as_str().cmp(other.as_str())) + } +} + +impl PartialOrd for ColdString { + #[inline] + fn partial_cmp(&self, other: &ArchivedString) -> Option<::core::cmp::Ordering> { + Some(self.as_str().cmp(other.as_str())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rkyv::rancor::Error; + + #[cfg_attr(miri, ignore)] // https://github.com/rust-lang/unsafe-code-guidelines/issues/134 + #[test] + fn roundtrip_cold_string() { + for s in ["", "hello", "this is a longer cold string"] { + let data = ColdString::from(s); + let bytes = rkyv::to_bytes::(&data).unwrap(); + let archived = + rkyv::access::, rkyv::rancor::Error>(&bytes).unwrap(); + assert_eq!(&data, archived); + let deserialized: ColdString = + rkyv::deserialize::(archived).unwrap(); + assert_eq!(data, deserialized); + + let bytes = rkyv::to_bytes::(&data).unwrap(); + let deserialized = rkyv::from_bytes::(&bytes).unwrap(); + assert_eq!(data, deserialized); + } + } +} diff --git a/cold-string/src/vint.rs b/cold-string/src/vint.rs index df3595c..ac35a39 100644 --- a/cold-string/src/vint.rs +++ b/cold-string/src/vint.rs @@ -1,7 +1,8 @@ pub struct VarInt; impl VarInt { - pub fn write(mut value: u64, buf: &mut [u8; 10]) -> usize { + pub const fn write(mut value: u64) -> (usize, [u8; 10]) { + let mut buf = [0u8; 10]; let mut i = 0; loop { let mut byte = (value & 0x7F) as u8; @@ -15,17 +16,17 @@ impl VarInt { break; } } - i + (i, buf) } #[allow(unsafe_op_in_unsafe_fn)] - pub unsafe fn read(ptr: *const u8) -> (u64, usize) { - let mut result = 0u64; + pub unsafe fn read(ptr: *const u8) -> (usize, usize) { + let mut result = 0usize; let mut shift = 0; let mut i = 0; loop { let byte = *ptr.add(i); - result |= ((byte & 0x7F) as u64) << shift; + result |= ((byte & 0x7F) as usize) << shift; shift += 7; i += 1; @@ -41,31 +42,27 @@ impl VarInt { mod tests { use super::*; + fn assert_correct(x: u64) { + let (wrote, b) = VarInt::write(x); + assert!(wrote >= 1 && wrote <= 10); + let ptr = b.as_ptr(); + let (y, read) = unsafe { VarInt::read(ptr) }; + assert_eq!(wrote, read); + assert_eq!(x, y as u64); + } + #[test] fn vint_round_trip() { - for x in [ - 0, - 1, - 42, - 59243, - 5, - 8, - 7, - 63, - 64, - 5892389523, - (1 << 56) - 1, - 5892389523582389523, - 1 << 56, - u64::MAX, - ] { - let mut b = [0u8; 10]; - let wrote = VarInt::write(x, &mut b); - assert!(wrote >= 1 && wrote <= 10); - let ptr = b.as_ptr(); - let (y, read) = unsafe { VarInt::read(ptr) }; - assert_eq!(wrote, read); - assert_eq!(x, y); + for x in [0, 1, 42, 59243, 5, 8, 7, 63, 64] { + assert_correct(x); + } + + for x in 0..=u16::MAX { + assert_correct(x as u64); + } + + for x in 0..=100 { + assert_correct(usize::MAX as u64 - x); } } }