From fff2b1c27478e428e6f181a834bf3da2f6d36107 Mon Sep 17 00:00:00 2001 From: Thomas Pendock Date: Mon, 23 Feb 2026 17:02:49 -0800 Subject: [PATCH 1/3] Support MSRV 1.60 and re-organize into workspace --- .github/workflows/rust.yml | 25 +- Cargo.lock | 453 +----------------- Cargo.toml | 49 +- README.md | 85 +--- bench/Cargo.toml | 25 + {benches => bench/benches}/bench.rs | 0 {benches => bench/benches}/eq.rs | 0 memory.py => bench/memory.py | 0 {tests => bench/tests}/memory.rs | 0 cold-string/Cargo.lock | 276 +++++++++++ cold-string/Cargo.toml | 29 ++ LICENSE-APACHE => cold-string/LICENSE-APACHE | 0 LICENSE-MIT => cold-string/LICENSE-MIT | 0 cold-string/README.md | 85 ++++ {src => cold-string/src}/lib.rs | 83 +++- {src => cold-string/src}/vint.rs | 6 +- .../tests}/property.proptest-regressions | 0 {tests => cold-string/tests}/property.rs | 4 +- tests/common.rs | 1 - 19 files changed, 511 insertions(+), 610 deletions(-) mode change 100644 => 120000 README.md create mode 100644 bench/Cargo.toml rename {benches => bench/benches}/bench.rs (100%) rename {benches => bench/benches}/eq.rs (100%) rename memory.py => bench/memory.py (100%) rename {tests => bench/tests}/memory.rs (100%) create mode 100644 cold-string/Cargo.lock create mode 100644 cold-string/Cargo.toml rename LICENSE-APACHE => cold-string/LICENSE-APACHE (100%) rename LICENSE-MIT => cold-string/LICENSE-MIT (100%) create mode 100644 cold-string/README.md rename {src => cold-string/src}/lib.rs (86%) rename {src => cold-string/src}/vint.rs (87%) rename {tests => cold-string/tests}/property.proptest-regressions (100%) rename {tests => cold-string/tests}/property.rs (95%) delete mode 100644 tests/common.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index b309048..316f7e2 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -19,29 +19,20 @@ jobs: - name: Install cargo-hack run: cargo install cargo-hack - name: Build - run: cargo hack build --verbose --release --feature-powerset + run: cargo hack build --manifest-path cold-string/Cargo.toml --verbose --release --feature-powerset --version-range 1.60.. + - name: Check + run: cargo hack check --manifest-path cold-string/Cargo.toml --verbose --release --feature-powerset --version-range 1.60.. + - name: Test No Exposed Provenance + run: cargo +1.74 hack test --manifest-path cold-string/Cargo.toml --verbose --release --feature-powerset - name: Tests - run: cargo hack test --verbose --release --feature-powerset + run: cargo hack test --manifest-path cold-string/Cargo.toml --verbose --release --feature-powerset - name: Install nightly + Miri run: | rustup toolchain install nightly rustup component add miri --toolchain nightly - name: Run Miri run: | - cargo +nightly miri test --test property + cargo +nightly miri test --manifest-path cold-string/Cargo.toml --test property - name: Run Miri Unknown run: | - cargo +nightly miri test --test property --target mips-unknown-linux-gnu - msrv: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions-rs/toolchain@v1 - with: - toolchain: "1.84" - override: true - - name: Install cargo-hack - run: cargo +1.84 install cargo-hack --version 0.6.37 --force --locked - - name: Build - run: cargo +1.84 hack build --verbose --release --feature-powerset - + cargo +nightly miri test --manifest-path cold-string/Cargo.toml --test property --target mips-unknown-linux-gnu diff --git a/Cargo.lock b/Cargo.lock index 89bc6ce..f2852a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,12 +23,6 @@ version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" -[[package]] -name = "anyhow" -version = "1.0.102" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" - [[package]] name = "autocfg" version = "1.5.0" @@ -36,20 +30,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] -name = "bit-set" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +name = "benches" +version = "0.1.0" dependencies = [ - "bit-vec", + "cold-string", + "compact_str", + "compact_string", + "criterion", + "fastrand", + "rand", + "smol_str", + "sysinfo", ] -[[package]] -name = "bit-vec" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" - [[package]] name = "bitflags" version = "2.11.0" @@ -154,16 +147,7 @@ checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" name = "cold-string" version = "0.1.0" dependencies = [ - "compact_str", - "compact_string", - "criterion", - "fastrand", - "proptest", - "rand 0.8.5", - "serde", - "serde_test", - "smol_str", - "sysinfo", + "rustversion", ] [[package]] @@ -263,40 +247,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - -[[package]] -name = "errno" -version = "0.3.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" -dependencies = [ - "libc", - "windows-sys", -] - [[package]] name = "fastrand" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "foldhash" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" - [[package]] name = "getrandom" version = "0.2.17" @@ -308,31 +264,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "getrandom" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasip2", -] - -[[package]] -name = "getrandom" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasip2", - "wasip3", -] - [[package]] name = "half" version = "2.7.1" @@ -344,51 +275,12 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "hashbrown" -version = "0.15.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" -dependencies = [ - "foldhash", -] - -[[package]] -name = "hashbrown" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - [[package]] name = "hermit-abi" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" -[[package]] -name = "id-arena" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" - -[[package]] -name = "indexmap" -version = "2.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" -dependencies = [ - "equivalent", - "hashbrown 0.16.1", - "serde", - "serde_core", -] - [[package]] name = "is-terminal" version = "0.4.17" @@ -425,30 +317,12 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "leb128fmt" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" - [[package]] name = "libc" version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" -[[package]] -name = "linux-raw-sys" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" - -[[package]] -name = "log" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" - [[package]] name = "memchr" version = "2.8.0" @@ -541,16 +415,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "prettyplease" -version = "0.2.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" -dependencies = [ - "proc-macro2", - "syn", -] - [[package]] name = "proc-macro2" version = "1.0.106" @@ -560,31 +424,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "proptest" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37566cb3fdacef14c0737f9546df7cfeadbfbc9fef10991038bf5015d0c80532" -dependencies = [ - "bit-set", - "bit-vec", - "bitflags", - "num-traits", - "rand 0.9.2", - "rand_chacha 0.9.0", - "rand_xorshift", - "regex-syntax", - "rusty-fork", - "tempfile", - "unarray", -] - -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - [[package]] name = "quote" version = "1.0.44" @@ -594,12 +433,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "r-efi" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" - [[package]] name = "rand" version = "0.8.5" @@ -607,18 +440,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.4", -] - -[[package]] -name = "rand" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" -dependencies = [ - "rand_chacha 0.9.0", - "rand_core 0.9.5", + "rand_chacha", + "rand_core", ] [[package]] @@ -628,17 +451,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core 0.6.4", -] - -[[package]] -name = "rand_chacha" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" -dependencies = [ - "ppv-lite86", - "rand_core 0.9.5", + "rand_core", ] [[package]] @@ -647,25 +460,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.17", -] - -[[package]] -name = "rand_core" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" -dependencies = [ - "getrandom 0.3.4", -] - -[[package]] -name = "rand_xorshift" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" -dependencies = [ - "rand_core 0.9.5", + "getrandom", ] [[package]] @@ -717,37 +512,12 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" -[[package]] -name = "rustix" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] - [[package]] name = "rustversion" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" -[[package]] -name = "rusty-fork" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" -dependencies = [ - "fnv", - "quick-error", - "tempfile", - "wait-timeout", -] - [[package]] name = "ryu" version = "1.0.23" @@ -763,12 +533,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "semver" -version = "1.0.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" - [[package]] name = "serde" version = "1.0.228" @@ -812,15 +576,6 @@ dependencies = [ "zmij", ] -[[package]] -name = "serde_test" -version = "1.0.177" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f901ee573cab6b3060453d2d5f0bae4e6d628c23c0a962ff9b5f1d7c8d4f1ed" -dependencies = [ - "serde", -] - [[package]] name = "smol_str" version = "0.3.5" @@ -862,19 +617,6 @@ dependencies = [ "windows", ] -[[package]] -name = "tempfile" -version = "3.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" -dependencies = [ - "fastrand", - "getrandom 0.4.1", - "once_cell", - "rustix", - "windows-sys", -] - [[package]] name = "thiserror" version = "1.0.69" @@ -905,33 +647,12 @@ dependencies = [ "serde_json", ] -[[package]] -name = "unarray" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" - [[package]] name = "unicode-ident" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" -[[package]] -name = "unicode-xid" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" - -[[package]] -name = "wait-timeout" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" -dependencies = [ - "libc", -] - [[package]] name = "walkdir" version = "2.5.0" @@ -948,24 +669,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasip2" -version = "1.0.2+wasi-0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" -dependencies = [ - "wit-bindgen", -] - -[[package]] -name = "wasip3" -version = "0.4.0+wasi-0.3.0-rc-2026-01-06" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" -dependencies = [ - "wit-bindgen", -] - [[package]] name = "wasm-bindgen" version = "0.2.111" @@ -1011,45 +714,11 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "wasm-encoder" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" -dependencies = [ - "leb128fmt", - "wasmparser", -] - -[[package]] -name = "wasm-metadata" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" -dependencies = [ - "anyhow", - "indexmap", - "wasm-encoder", - "wasmparser", -] - -[[package]] -name = "wasmparser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" -dependencies = [ - "bitflags", - "hashbrown 0.15.5", - "indexmap", - "semver", -] - [[package]] name = "web-sys" -version = "0.3.70" +version = "0.3.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" +checksum = "9d6bb20ed2d9572df8584f6dc81d68a41a625cadc6f15999d649a70ce7e3597a" dependencies = [ "js-sys", "wasm-bindgen", @@ -1205,94 +874,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "wit-bindgen" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" -dependencies = [ - "wit-bindgen-rust-macro", -] - -[[package]] -name = "wit-bindgen-core" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" -dependencies = [ - "anyhow", - "heck", - "wit-parser", -] - -[[package]] -name = "wit-bindgen-rust" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" -dependencies = [ - "anyhow", - "heck", - "indexmap", - "prettyplease", - "syn", - "wasm-metadata", - "wit-bindgen-core", - "wit-component", -] - -[[package]] -name = "wit-bindgen-rust-macro" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" -dependencies = [ - "anyhow", - "prettyplease", - "proc-macro2", - "quote", - "syn", - "wit-bindgen-core", - "wit-bindgen-rust", -] - -[[package]] -name = "wit-component" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" -dependencies = [ - "anyhow", - "bitflags", - "indexmap", - "log", - "serde", - "serde_derive", - "serde_json", - "wasm-encoder", - "wasm-metadata", - "wasmparser", - "wit-parser", -] - -[[package]] -name = "wit-parser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" -dependencies = [ - "anyhow", - "id-arena", - "indexmap", - "log", - "semver", - "serde", - "serde_derive", - "serde_json", - "unicode-xid", - "wasmparser", -] - [[package]] name = "zerocopy" version = "0.8.39" diff --git a/Cargo.toml b/Cargo.toml index 3de6715..377cb7e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,44 +1,7 @@ -[package] -name = "cold-string" -version = "0.1.0" -edition = "2021" -rust-version = "1.84.0" -authors = ["tomtomwombat"] -description = "A 1-word SSO string that saves up to 23 bytes over String." -license = "MIT OR Apache-2.0" -homepage = "https://github.com/tomtomwombat/cold-string/" -repository = "https://github.com/tomtomwombat/cold-string/" -keywords = ["string", "compact", "small", "memory"] -categories = ["encoding", "parsing", "memory-management", "text-processing"] -readme = "README.md" +[workspace] +members = [ + "bench", +] +exclude = ["cold-string"] -[badges] -maintenance = { status = "actively-developed" } - -[features] -default = ["std"] -std = [] -serde = ["dep:serde"] - -[dependencies] -serde = { version = "1.0.228", optional = true } - -[dev-dependencies] -serde_test = "1.0.177" -criterion = "0.5" -rand = "0.8" -proptest = "1.9.0" -sysinfo = "0.38.2" -fastrand = "2.3.0" - -compact_string = "0.1.0" -smol_str = "0.3.5" -compact_str = "0.9.0" - -[[bench]] -name = "bench" -harness = false - -[[bench]] -name = "eq" -harness = false \ No newline at end of file +resolver = "2" \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index 710c142..0000000 --- a/README.md +++ /dev/null @@ -1,84 +0,0 @@ -# cold-string -[![Github](https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github)](https://github.com/tomtomwombat/cold-string) -[![Crates.io](https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust)](https://crates.io/crates/cold-string) -[![docs.rs](https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs)](https://docs.rs/cold-string) -![Downloads](https://img.shields.io/crates/d/cold-string?style=for-the-badge) - -Compact representation of immutable UTF-8 strings. Optimized for memory usage and struct packing. - -# Usage - -Use it like a `String`: -```rust -use cold_string::ColdString; - -let s = ColdString::new("qwerty"); -assert_eq!(s.as_str(), "qwerty"); -``` - -Packs well with other types: -```rust -use std::mem; -use cold_string::ColdString; - -assert_eq!(mem::size_of::(), 8); -assert_eq!(mem::align_of::(), 1); - -assert_eq!(mem::size_of::<(ColdString, u8)>(), 9); -assert_eq!(mem::align_of::<(ColdString, u8)>(), 1); -``` - -# How It Works - -ColdString is an 8 byte array (4 bytes on 32-bit machines): -```rust,ignore -pub struct ColdString([u8; 8]); -``` -The array acts as either a pointer to heap data for strings longer than 8 bytes or is the inlined data itself. The first byte indicates one of 3 encodings: - -## Inline Mode (0 to 7 Bytes) -The first byte has bits 11111xxx, where xxx is the length. `self.0[1]` to `self.0[7]` store the bytes of string. - -## Inline Mode (8 Bytes) -`self.0` stores the bytes of string. Since the string is UTF-8, the first byte is guaranteed to not be 10xxxxx or 11111xxx. - -## Heap Mode -`self.0` is an encoded pointer to heap, where first byte is 10xxxxxx. 10xxxxxx is chosen because it's a UTF-8 continuation byte and therefore an impossible first byte for inline mode. Since a heap-alignment of 4 is chosen, the pointer's least significant 2 bits are guaranteed to be 0 ([See more](https://doc.rust-lang.org/beta/std/alloc/struct.Layout.html#method.from_size_align)). These bits are swapped with the 10 "tag" bits when de/coding between `self.0` and the address value. - -On the heap, the data starts with a variable length integer encoding of the length, followed by the bytes. -```text,ignore -ptr --> -``` - -# Memory Comparisons - -![string_memory](https://github.com/user-attachments/assets/6644ae40-1da7-42e2-9ae6-0596e77e953e) - -## Memory Usage Comparison (RSS per String) - -| Crate | 0–4 chars | 0–8 chars | 0–16 chars | 0–32 chars | 0–64 chars | -| :--- | :---: | :---: | :---: | :---: | :---: | -| `std` | 36.9 B | 38.4 B | 46.8 B | 55.3 B | 71.4 B | -| `smol_str` | 24.0 B | 24.0 B | 24.0 B | 41.1 B | 72.2 B | -| `compact_str` | 24.0 B | 24.0 B | 24.0 B | 35.4 B | 61.0 B | -| `compact_string` | 24.1 B | 25.8 B | 32.6 B | 40.5 B | 56.5 B | -| **`cold-string`** | **8.0 B** | **8.0 B** | **23.2 B** | **35.7 B** | **53.0 B** | - -**Note:** Columns represent string length (bytes/chars). Values represent average Resident Set Size (RSS) in bytes per string instance. Measurements taken with 10M iterations. - -## License - -Licensed under either of - - * Apache License, Version 2.0 - ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) - * MIT license - ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) - -at your option. - -## Contribution - -Unless you explicitly state otherwise, any contribution intentionally submitted -for inclusion in the work by you, as defined in the Apache-2.0 license, shall be -dual licensed as above, without any additional terms or conditions. diff --git a/README.md b/README.md new file mode 120000 index 0000000..371cf54 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +cold-string/README.md \ No newline at end of file diff --git a/bench/Cargo.toml b/bench/Cargo.toml new file mode 100644 index 0000000..27b311f --- /dev/null +++ b/bench/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "benches" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +cold-string = { path = "../cold-string" } +criterion = "0.5" +rand = "0.8" +sysinfo = "0.38.2" +fastrand = "2.3.0" + +compact_string = "0.1.0" +smol_str = "0.3.5" +compact_str = "0.9.0" + +[[bench]] +name = "bench" +harness = false + +[[bench]] +name = "eq" +harness = false diff --git a/benches/bench.rs b/bench/benches/bench.rs similarity index 100% rename from benches/bench.rs rename to bench/benches/bench.rs diff --git a/benches/eq.rs b/bench/benches/eq.rs similarity index 100% rename from benches/eq.rs rename to bench/benches/eq.rs diff --git a/memory.py b/bench/memory.py similarity index 100% rename from memory.py rename to bench/memory.py diff --git a/tests/memory.rs b/bench/tests/memory.rs similarity index 100% rename from tests/memory.rs rename to bench/tests/memory.rs diff --git a/cold-string/Cargo.lock b/cold-string/Cargo.lock new file mode 100644 index 0000000..e31ebeb --- /dev/null +++ b/cold-string/Cargo.lock @@ -0,0 +1,276 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cold-string" +version = "0.1.0" +dependencies = [ + "proptest", + "rustversion", + "serde", + "serde_test", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.182" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proptest" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bb0be07becd10686a0bb407298fb425360a5c44a663774406340c59a22de4ce" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags", + "lazy_static", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "unarray", +] + +[[package]] +name = "quote" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core", +] + +[[package]] +name = "regex-syntax" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_test" +version = "1.0.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f901ee573cab6b3060453d2d5f0bae4e6d628c23c0a962ff9b5f1d7c8d4f1ed" +dependencies = [ + "serde", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" + +[[package]] +name = "zerocopy" +version = "0.8.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/cold-string/Cargo.toml b/cold-string/Cargo.toml new file mode 100644 index 0000000..4d9942a --- /dev/null +++ b/cold-string/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "cold-string" +version = "0.1.0" +edition = "2021" +rust-version = "1.60.0" +authors = ["tomtomwombat"] +description = "A 1-word SSO string that saves up to 24 bytes over String." +license = "MIT OR Apache-2.0" +homepage = "https://github.com/tomtomwombat/cold-string/" +repository = "https://github.com/tomtomwombat/cold-string/" +keywords = ["string", "compact", "small", "memory"] +categories = ["encoding", "parsing", "memory-management", "text-processing"] +readme = "README.md" + +[badges] +maintenance = { status = "actively-developed" } + +[features] +default = ["std"] +std = [] +serde = ["dep:serde"] + +[dependencies] +serde = { version = "1.0.228", optional = true } +rustversion = "1.0.22" + +[dev-dependencies] +serde_test = "1.0.177" +proptest = {version = "=1.8.0", default-features = false, features = ["std", "bit-set"] } diff --git a/LICENSE-APACHE b/cold-string/LICENSE-APACHE similarity index 100% rename from LICENSE-APACHE rename to cold-string/LICENSE-APACHE diff --git a/LICENSE-MIT b/cold-string/LICENSE-MIT similarity index 100% rename from LICENSE-MIT rename to cold-string/LICENSE-MIT diff --git a/cold-string/README.md b/cold-string/README.md new file mode 100644 index 0000000..65e166d --- /dev/null +++ b/cold-string/README.md @@ -0,0 +1,85 @@ +# cold-string +[![Github](https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github)](https://github.com/tomtomwombat/cold-string) +[![Crates.io](https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust)](https://crates.io/crates/cold-string) +[![docs.rs](https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs)](https://docs.rs/cold-string) +![MSRV](https://img.shields.io/crates/msrv/cold-string?style=for-the-badge) +![Downloads](https://img.shields.io/crates/d/cold-string?style=for-the-badge) + +Compact representation of immutable UTF-8 strings. Optimized for memory usage and struct packing. + +# Usage + +Use it like a `String`: +```rust +use cold_string::ColdString; + +let s = ColdString::new("qwerty"); +assert_eq!(s.as_str(), "qwerty"); +``` + +Packs well with other types: +```rust +use std::mem; +use cold_string::ColdString; + +assert_eq!(mem::size_of::(), 8); +assert_eq!(mem::align_of::(), 1); + +assert_eq!(mem::size_of::<(ColdString, u8)>(), 9); +assert_eq!(mem::align_of::<(ColdString, u8)>(), 1); +``` + +# How It Works + +ColdString is an 8 byte array (4 bytes on 32-bit machines): +```rust,ignore +pub struct ColdString([u8; 8]); +``` +The array acts as either a pointer to heap data for strings longer than 8 bytes or is the inlined data itself. The first byte indicates one of 3 encodings: + +## Inline Mode (0 to 7 Bytes) +The first byte has bits 11111xxx, where xxx is the length. `self.0[1]` to `self.0[7]` store the bytes of string. + +## Inline Mode (8 Bytes) +`self.0` stores the bytes of string. Since the string is UTF-8, the first byte is guaranteed to not be 10xxxxx or 11111xxx. + +## Heap Mode +`self.0` encodes the pointer to heap, where first byte is 10xxxxxx. 10xxxxxx is chosen because it's a UTF-8 continuation byte and therefore an impossible first byte for inline mode. Since a heap-alignment of 4 is chosen, the pointer's least significant 2 bits are guaranteed to be 0 ([See more](https://doc.rust-lang.org/beta/std/alloc/struct.Layout.html#method.from_size_align)). These bits are swapped with the 10 "tag" bits when de/coding between `self.0` and the address value. + +On the heap, the data starts with a variable length integer encoding of the length, followed by the bytes. +```text,ignore +ptr --> +``` + +# Memory Comparisons + +![string_memory](https://github.com/user-attachments/assets/6644ae40-1da7-42e2-9ae6-0596e77e953e) + +## Memory Usage Comparison (RSS per String) + +| Crate | 0–4 chars | 0–8 chars | 0–16 chars | 0–32 chars | 0–64 chars | +| :--- | :---: | :---: | :---: | :---: | :---: | +| `std` | 36.9 B | 38.4 B | 46.8 B | 55.3 B | 71.4 B | +| `smol_str` | 24.0 B | 24.0 B | 24.0 B | 41.1 B | 72.2 B | +| `compact_str` | 24.0 B | 24.0 B | 24.0 B | 35.4 B | 61.0 B | +| `compact_string` | 24.1 B | 25.8 B | 32.6 B | 40.5 B | 56.5 B | +| **`cold-string`** | **8.0 B** | **8.0 B** | **23.2 B** | **35.7 B** | **53.0 B** | + +**Note:** Columns represent string length (bytes/chars). Values represent average Resident Set Size (RSS) in bytes per string instance. Measurements taken with 10M iterations. + +## License + +Licensed under either of + + * Apache License, Version 2.0 + ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) + * MIT license + ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) + +at your option. + +## Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. diff --git a/src/lib.rs b/cold-string/src/lib.rs similarity index 86% rename from src/lib.rs rename to cold-string/src/lib.rs index 72c1431..af70a95 100644 --- a/src/lib.rs +++ b/cold-string/src/lib.rs @@ -12,10 +12,10 @@ use alloc::{ use core::{ fmt, hash::{Hash, Hasher}, + iter::FromIterator, mem, ops::Deref, - ptr::{self, with_exposed_provenance_mut}, - slice, str, + ptr, slice, str, }; mod vint; @@ -44,6 +44,9 @@ const WIDTH: usize = mem::size_of::(); pub struct ColdString([u8; WIDTH]); impl ColdString { + const INLINE_MASK: u8 = 0b11111000; + const PTR_TAG: u8 = 0b10000000; + /// Convert a slice of bytes into a [`ColdString`]. /// /// A [`ColdString`] is a contiguous collection of bytes (`u8`s) that is valid [`UTF-8`](https://en.wikipedia.org/wiki/UTF-8). @@ -107,26 +110,50 @@ impl ColdString { } } - /// Returns `true` if the string bytes are inlined. #[inline] - pub const fn is_inline(&self) -> bool { - (self.0[0] & 0b11000000) != 0b10000000 - } - - #[inline] - const fn new_inline(s: &str) -> Self { + fn new_inline(s: &str) -> Self { debug_assert!(s.len() <= WIDTH); let mut buf = [0u8; WIDTH]; - unsafe { - let dest_ptr = buf.as_mut_ptr().add((s.len() < WIDTH) as usize); - ptr::copy_nonoverlapping(s.as_ptr(), dest_ptr, s.len()); - } + let start = (s.len() < WIDTH) as usize; + buf[start..s.len() + start].copy_from_slice(s.as_bytes()); if s.len() < WIDTH { - buf[0] = 0b11111000 | (s.len() as u8); + buf[0] = Self::INLINE_MASK | (s.len() as u8); } Self(buf) } + /// Returns `true` if the string bytes are inlined. + #[inline] + pub const fn is_inline(&self) -> bool { + (self.0[0] & 0b11000000) != Self::PTR_TAG + } + + #[rustversion::since(1.84)] + #[inline] + fn ptr_to_addr(ptr: *mut T) -> usize { + ptr.expose_provenance() + } + + #[rustversion::before(1.84)] + #[inline] + fn ptr_to_addr(ptr: *mut T) -> usize { + ptr as usize + } + + #[rustversion::attr(since(1.91), const)] + #[rustversion::since(1.84)] + #[inline] + fn addr_to_ptr(addr: usize) -> *mut T { + ptr::with_exposed_provenance_mut::(addr) + } + + #[rustversion::attr(since(1.83), const)] + #[rustversion::before(1.84)] + #[inline] + fn addr_to_ptr(addr: usize) -> *mut T { + addr as *mut T + } + #[inline] fn new_heap(s: &str) -> Self { let len = s.len(); @@ -145,28 +172,29 @@ impl ColdString { ptr::copy_nonoverlapping(len_buf.as_ptr(), ptr, vint_len); ptr::copy_nonoverlapping(s.as_ptr(), ptr.add(vint_len), len); - let addr = ptr.expose_provenance(); - debug_assert!(addr % 2 == 0); + let addr = Self::ptr_to_addr(ptr); + debug_assert!(addr % HEAP_ALIGN == 0); let mut addr = addr.rotate_left(6); - addr |= 0b10000000; + addr |= Self::PTR_TAG as usize; Self(addr.to_le_bytes()) } } + #[rustversion::attr(since(1.91), const)] #[inline] fn heap_ptr(&self) -> *mut u8 { debug_assert!(!self.is_inline()); let mut addr = usize::from_le_bytes(self.0); - addr ^= 0b10000000; + addr ^= Self::PTR_TAG as usize; let addr = addr.rotate_right(6); - debug_assert!(addr % 2 == 0); - with_exposed_provenance_mut::(addr) // const in 1.91 + debug_assert!(addr % HEAP_ALIGN == 0); + Self::addr_to_ptr(addr) } #[inline] const fn inline_len(&self) -> usize { - match self.0[0] & 0b11111000 { - 0b11111000 => (self.0[0] & 0b00000111) as usize, + match self.0[0] & Self::INLINE_MASK { + Self::INLINE_MASK => (self.0[0] & !Self::INLINE_MASK) as usize, _ => 8, } } @@ -187,6 +215,7 @@ impl ColdString { /// assert_eq!(fancy_f.len(), 4); /// assert_eq!(fancy_f.chars().count(), 3); /// ``` + #[rustversion::attr(since(1.91), const)] #[inline] pub fn len(&self) -> usize { if self.is_inline() { @@ -450,9 +479,13 @@ mod tests { assert_eq!(cs.clone(), cs); #[cfg(feature = "std")] { - use std::hash::{BuildHasher, RandomState}; - let bh = RandomState::new(); - assert_eq!(bh.hash_one(&cs), bh.hash_one(&cs.clone())); + use std::hash::BuildHasher; + let bh = std::collections::hash_map::RandomState::new(); + let mut hasher1 = bh.build_hasher(); + cs.hash(&mut hasher1); + let mut hasher2 = bh.build_hasher(); + cs.clone().hash(&mut hasher2); + assert_eq!(hasher1.finish(), hasher2.finish()); } assert_eq!(cs, s); assert_eq!(s, cs); diff --git a/src/vint.rs b/cold-string/src/vint.rs similarity index 87% rename from src/vint.rs rename to cold-string/src/vint.rs index 9ac842a..ab43af4 100644 --- a/src/vint.rs +++ b/cold-string/src/vint.rs @@ -1,7 +1,8 @@ pub struct VarInt; impl VarInt { - pub const fn write(mut value: u64, buf: &mut [u8; 10]) -> usize { + #[rustversion::attr(since(1.83), const)] + pub fn write(mut value: u64, buf: &mut [u8; 10]) -> usize { let mut i = 0; loop { let mut byte = (value & 0x7F) as u8; @@ -18,8 +19,9 @@ impl VarInt { i } + #[rustversion::attr(since(1.83), const)] #[allow(unsafe_op_in_unsafe_fn)] - pub const unsafe fn read(ptr: *const u8) -> (u64, usize) { + pub unsafe fn read(ptr: *const u8) -> (u64, usize) { let mut result = 0u64; let mut shift = 0; let mut i = 0; diff --git a/tests/property.proptest-regressions b/cold-string/tests/property.proptest-regressions similarity index 100% rename from tests/property.proptest-regressions rename to cold-string/tests/property.proptest-regressions diff --git a/tests/property.rs b/cold-string/tests/property.rs similarity index 95% rename from tests/property.rs rename to cold-string/tests/property.rs index aa550eb..ac3d879 100644 --- a/tests/property.rs +++ b/cold-string/tests/property.rs @@ -5,14 +5,14 @@ use proptest::prelude::*; fn proptest_config() -> ProptestConfig { ProptestConfig { failure_persistence: None, - cases: 4, + cases: 8, ..Default::default() } } #[cfg(not(miri))] fn proptest_config() -> ProptestConfig { - ProptestConfig::with_cases(65536) + ProptestConfig::with_cases(131072) } proptest! { diff --git a/tests/common.rs b/tests/common.rs deleted file mode 100644 index 8b13789..0000000 --- a/tests/common.rs +++ /dev/null @@ -1 +0,0 @@ - From 3c48534eedb8c57db7759383f5f4bc9a00fede5d Mon Sep 17 00:00:00 2001 From: Thomas Pendock Date: Mon, 23 Feb 2026 17:02:49 -0800 Subject: [PATCH 2/3] Support MSRV 1.60 and re-organize into workspace --- bench/Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bench/Cargo.toml b/bench/Cargo.toml index 27b311f..f15e2d9 100644 --- a/bench/Cargo.toml +++ b/bench/Cargo.toml @@ -1,10 +1,9 @@ [package] name = "benches" +publish = false version = "0.1.0" edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] cold-string = { path = "../cold-string" } criterion = "0.5" From f25b3e6356194a5e2aa863e1c6dd05ae1612e012 Mon Sep 17 00:00:00 2001 From: Thomas Pendock Date: Tue, 24 Feb 2026 22:58:29 -0800 Subject: [PATCH 3/3] use strict provenance --- .github/workflows/rust.yml | 8 +- Cargo.lock | 7 + bench/benches/bench.rs | 25 ---- bench/benches/eq.rs | 2 +- cold-string/Cargo.lock | 59 +++++++- cold-string/Cargo.toml | 11 +- cold-string/README.md | 28 ++-- cold-string/src/lib.rs | 259 ++++++++++++++++++++++++---------- cold-string/src/vint.rs | 2 - cold-string/tests/property.rs | 10 +- 10 files changed, 283 insertions(+), 128 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 316f7e2..9a1fb77 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -19,9 +19,9 @@ jobs: - name: Install cargo-hack run: cargo install cargo-hack - name: Build - run: cargo hack build --manifest-path cold-string/Cargo.toml --verbose --release --feature-powerset --version-range 1.60.. + run: cargo hack build --manifest-path cold-string/Cargo.toml --verbose --feature-powerset --version-range 1.60.. - name: Check - run: cargo hack check --manifest-path cold-string/Cargo.toml --verbose --release --feature-powerset --version-range 1.60.. + run: cargo hack check --manifest-path cold-string/Cargo.toml --verbose --feature-powerset --version-range 1.60.. - name: Test No Exposed Provenance run: cargo +1.74 hack test --manifest-path cold-string/Cargo.toml --verbose --release --feature-powerset - name: Tests @@ -32,7 +32,7 @@ jobs: rustup component add miri --toolchain nightly - name: Run Miri run: | - cargo +nightly miri test --manifest-path cold-string/Cargo.toml --test property + cargo +nightly miri test --manifest-path cold-string/Cargo.toml - name: Run Miri Unknown run: | - cargo +nightly miri test --manifest-path cold-string/Cargo.toml --test property --target mips-unknown-linux-gnu + cargo +nightly miri test --manifest-path cold-string/Cargo.toml --target mips-unknown-linux-gnu diff --git a/Cargo.lock b/Cargo.lock index f2852a5..35b844a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -148,6 +148,7 @@ name = "cold-string" version = "0.1.0" dependencies = [ "rustversion", + "sptr", ] [[package]] @@ -586,6 +587,12 @@ dependencies = [ "serde_core", ] +[[package]] +name = "sptr" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a" + [[package]] name = "static_assertions" version = "1.1.0" diff --git a/bench/benches/bench.rs b/bench/benches/bench.rs index 1956f55..e3cb162 100644 --- a/bench/benches/bench.rs +++ b/bench/benches/bench.rs @@ -98,36 +98,12 @@ fn bench_hash(c: &mut Criterion) { group.finish(); } -/* -fn bench_eq(c: &mut Criterion) { - let cold1 = ColdString::from(LONG); - let cold2 = ColdString::from(LONG); - - let string1 = String::from(LONG); - let string2 = String::from(LONG); - - let mut group = c.benchmark_group("eq"); - - group.bench_function("ColdString eq", |b| { - b.iter(|| black_box(&cold1 == &cold2)) - }); - - group.bench_function("String eq", |b| { - b.iter(|| black_box(&string1 == &string2)) - }); - - group.finish(); -} -*/ - fn bench_clone(c: &mut Criterion) { let cold = ColdString::from(LONG); let string = String::from(LONG); let mut group = c.benchmark_group("clone"); - group.bench_function("ColdString clone", |b| b.iter(|| black_box(cold.clone()))); - group.bench_function("String clone", |b| b.iter(|| black_box(string.clone()))); group.finish(); @@ -139,7 +115,6 @@ criterion_group!( bench_len, bench_as_str, bench_hash, - // bench_eq, bench_clone ); criterion_main!(benches); diff --git a/bench/benches/eq.rs b/bench/benches/eq.rs index b0dd683..ff78deb 100644 --- a/bench/benches/eq.rs +++ b/bench/benches/eq.rs @@ -71,7 +71,7 @@ where fn bench_eq(c: &mut Criterion) { bench_eq_type::(c, "ColdString_eq"); - //bench_eq_type::(c, "String_eq"); + bench_eq_type::(c, "String_eq"); } criterion_group!(benches, bench_eq); diff --git a/cold-string/Cargo.lock b/cold-string/Cargo.lock index e31ebeb..6c993dd 100644 --- a/cold-string/Cargo.lock +++ b/cold-string/Cargo.lock @@ -2,6 +2,17 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.17", + "once_cell", + "version_check", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -39,10 +50,23 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" name = "cold-string" version = "0.1.0" dependencies = [ + "hashbrown", "proptest", "rustversion", "serde", "serde_test", + "sptr", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", ] [[package]] @@ -57,6 +81,15 @@ dependencies = [ "wasip2", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -78,6 +111,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -155,7 +194,7 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ - "getrandom", + "getrandom 0.3.4", ] [[package]] @@ -217,6 +256,12 @@ dependencies = [ "serde", ] +[[package]] +name = "sptr" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a" + [[package]] name = "syn" version = "2.0.117" @@ -240,6 +285,18 @@ version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" diff --git a/cold-string/Cargo.toml b/cold-string/Cargo.toml index 4d9942a..f270347 100644 --- a/cold-string/Cargo.toml +++ b/cold-string/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cold-string" version = "0.1.0" -edition = "2021" +edition = "2018" rust-version = "1.60.0" authors = ["tomtomwombat"] description = "A 1-word SSO string that saves up to 24 bytes over String." @@ -16,14 +16,15 @@ readme = "README.md" maintenance = { status = "actively-developed" } [features] -default = ["std"] -std = [] -serde = ["dep:serde"] +default = [] +serde = ["dep:serde", "serde/alloc"] [dependencies] -serde = { version = "1.0.228", optional = true } +serde = { version = "1.0.228", optional = true, default-features = false } +sptr = { version = "0.3.2", default-features = false } rustversion = "1.0.22" [dev-dependencies] +hashbrown = "0.12.3" serde_test = "1.0.177" proptest = {version = "=1.8.0", default-features = false, features = ["std", "bit-set"] } diff --git a/cold-string/README.md b/cold-string/README.md index 65e166d..e2d69da 100644 --- a/cold-string/README.md +++ b/cold-string/README.md @@ -5,7 +5,7 @@ ![MSRV](https://img.shields.io/crates/msrv/cold-string?style=for-the-badge) ![Downloads](https://img.shields.io/crates/d/cold-string?style=for-the-badge) -Compact representation of immutable UTF-8 strings. Optimized for memory usage and struct packing. +A 1-word sized representation of immutable UTF-8 strings. In-lines up to 1 word bytes. Optimized for memory usage and struct packing. # Usage @@ -22,29 +22,37 @@ Packs well with other types: use std::mem; use cold_string::ColdString; -assert_eq!(mem::size_of::(), 8); +assert_eq!(mem::size_of::(), mem::size_of::()); assert_eq!(mem::align_of::(), 1); -assert_eq!(mem::size_of::<(ColdString, u8)>(), 9); +assert_eq!(mem::size_of::<(ColdString, u8)>(), mem::size_of::() + 1); assert_eq!(mem::align_of::<(ColdString, u8)>(), 1); ``` # How It Works -ColdString is an 8 byte array (4 bytes on 32-bit machines): -```rust,ignore -pub struct ColdString([u8; 8]); +ColdString is 8-byte tagged pointer (4 bytes on 32-bit machines): +```rust +#[repr(packed)] +pub struct ColdString { + /// The first byte of `encoded` is the "tag" and it determines the type: + /// - 10xxxxxx: an encoded address for the heap. To decode, 10 is set to 00 and swapped + /// with the LSB bits of the tag byte. The address is always a multiple of 4 (`HEAP_ALIGN`). + /// - 11111xxx: xxx is the length in range 0..=7, followed by length UTF-8 bytes. + /// - xxxxxxxx (valid UTF-8): 8 UTF-8 bytes. + encoded: *mut u8, +} ``` -The array acts as either a pointer to heap data for strings longer than 8 bytes or is the inlined data itself. The first byte indicates one of 3 encodings: +`encoded` acts as either a pointer to the heap for strings longer than 8 bytes or is the inlined data itself. The first/"tag" byte indicates one of 3 encodings: ## Inline Mode (0 to 7 Bytes) -The first byte has bits 11111xxx, where xxx is the length. `self.0[1]` to `self.0[7]` store the bytes of string. +The tag byte has bits 11111xxx, where xxx is the length. `self.0[1]` to `self.0[7]` store the bytes of string. ## Inline Mode (8 Bytes) -`self.0` stores the bytes of string. Since the string is UTF-8, the first byte is guaranteed to not be 10xxxxx or 11111xxx. +The tag byte is any valid UTF-8 byte. `self.0` stores the bytes of string. Since the string is UTF-8, the tag byte is guaranteed to not be 10xxxxx or 11111xxx. ## Heap Mode -`self.0` encodes the pointer to heap, where first byte is 10xxxxxx. 10xxxxxx is chosen because it's a UTF-8 continuation byte and therefore an impossible first byte for inline mode. Since a heap-alignment of 4 is chosen, the pointer's least significant 2 bits are guaranteed to be 0 ([See more](https://doc.rust-lang.org/beta/std/alloc/struct.Layout.html#method.from_size_align)). These bits are swapped with the 10 "tag" bits when de/coding between `self.0` and the address value. +`self.0` encodes the pointer to heap, where tag byte is 10xxxxxx. 10xxxxxx is chosen because it's a UTF-8 continuation byte and therefore an impossible tag byte for inline mode. Since a heap-alignment of 4 is chosen, the pointer's least significant 2 bits are guaranteed to be 0 ([See more](https://doc.rust-lang.org/beta/std/alloc/struct.Layout.html#method.from_size_align)). These bits are swapped with the 10 "tag" bits when de/coding between `self.0` and the address value. On the heap, the data starts with a variable length integer encoding of the length, followed by the bytes. ```text,ignore diff --git a/cold-string/src/lib.rs b/cold-string/src/lib.rs index af70a95..1f356a5 100644 --- a/cold-string/src/lib.rs +++ b/cold-string/src/lib.rs @@ -1,9 +1,13 @@ #![allow(rustdoc::bare_urls)] #![doc = include_str!("../README.md")] -#![cfg_attr(not(feature = "std"), no_std)] +#![allow(unstable_name_collisions)] +#![no_std] extern crate alloc; +#[rustversion::before(1.84)] +use sptr::Strict; + use alloc::{ alloc::{alloc, dealloc, Layout}, str::Utf8Error, @@ -32,20 +36,34 @@ const WIDTH: usize = mem::size_of::(); /// assert_eq!(s.as_str(), "qwerty"); /// ``` /// ``` -/// use std::mem; +/// use core::mem; /// use cold_string::ColdString; /// -/// assert_eq!(mem::size_of::(), 8); +/// assert_eq!(mem::size_of::(), mem::size_of::()); /// assert_eq!(mem::align_of::(), 1); -/// assert_eq!(mem::size_of::<(ColdString, u8)>(), 9); +/// assert_eq!(mem::size_of::<(ColdString, u8)>(), mem::size_of::() + 1); /// assert_eq!(mem::align_of::<(ColdString, u8)>(), 1); /// ``` -#[repr(transparent)] -pub struct ColdString([u8; WIDTH]); +#[repr(packed)] +pub struct ColdString { + /// The first byte of `encoded` is the "tag" and it determines the type: + /// - 10xxxxxx: an encoded address for the heap. To decode, 10 is set to 00 and swapped + /// with the LSB bits of the tag byte. The address is always a multiple of 4 (`HEAP_ALIGN`). + /// - 11111xxx: xxx is the length in range 0..=7, followed by length UTF-8 bytes. + /// - xxxxxxxx (valid UTF-8): 8 UTF-8 bytes. + encoded: *mut u8, +} impl ColdString { - const INLINE_MASK: u8 = 0b11111000; - const PTR_TAG: u8 = 0b10000000; + const TAG_MASK: usize = usize::from_ne_bytes(0b11000000usize.to_le_bytes()); + const INLINE_TAG: usize = usize::from_ne_bytes(0b11111000usize.to_le_bytes()); + const PTR_TAG: usize = usize::from_ne_bytes(0b10000000usize.to_le_bytes()); + const LEN_MASK: usize = usize::from_ne_bytes(0b111usize.to_le_bytes()); + const ROT: u32 = if cfg!(target_endian = "little") { + 0 + } else { + 8 * (WIDTH - 1) as u32 + }; /// Convert a slice of bytes into a [`ColdString`]. /// @@ -100,7 +118,8 @@ impl ColdString { } /// Creates a new [`ColdString`] from any type that implements `AsRef`. - /// If the string is short enough, then it will be inlined on the stack. + /// If the string is shorter than `core::mem::size_of::()`, then it + /// will be inlined on the stack. pub fn new>(x: T) -> Self { let s = x.as_ref(); if s.len() <= WIDTH { @@ -111,47 +130,90 @@ impl ColdString { } #[inline] - fn new_inline(s: &str) -> Self { + const fn inline_buf(s: &str) -> [u8; WIDTH] { debug_assert!(s.len() <= WIDTH); let mut buf = [0u8; WIDTH]; - let start = (s.len() < WIDTH) as usize; - buf[start..s.len() + start].copy_from_slice(s.as_bytes()); if s.len() < WIDTH { - buf[0] = Self::INLINE_MASK | (s.len() as u8); + let tag = + (Self::INLINE_TAG | s.len().rotate_left(Self::ROT)).rotate_right(Self::ROT) as u8; + buf[0] = tag; } - Self(buf) + buf } - /// Returns `true` if the string bytes are inlined. + #[rustversion::attr(since(1.61), const)] #[inline] - pub const fn is_inline(&self) -> bool { - (self.0[0] & 0b11000000) != Self::PTR_TAG + fn from_inline_buf(b: [u8; WIDTH]) -> Self { + let encoded = ptr::null_mut::().wrapping_add(usize::from_ne_bytes(b)); + Self { encoded } } - #[rustversion::since(1.84)] #[inline] - fn ptr_to_addr(ptr: *mut T) -> usize { - ptr.expose_provenance() + const fn utf8_start(l: usize) -> usize { + (l < WIDTH) as usize } - #[rustversion::before(1.84)] #[inline] - fn ptr_to_addr(ptr: *mut T) -> usize { - ptr as usize + fn new_inline(s: &str) -> Self { + let mut buf = Self::inline_buf(s); + let start = Self::utf8_start(s.len()); + buf[start..s.len() + start].copy_from_slice(s.as_bytes()); + Self::from_inline_buf(buf) } - #[rustversion::attr(since(1.91), const)] - #[rustversion::since(1.84)] + /// Creates a new inline [`ColdString`] from `&'static str` at compile time. + /// + /// In a dynamic context you can use the method [`ColdString::new()`]. + /// + /// # Panics + /// The string must be less than `core::mem::size_of::()`. Creating + /// a [`ColdString`] larger than that is not supported. + /// + /// + /// # Examples + /// ``` + /// use cold_string::ColdString; + /// + /// const DEFAULT_NAME: ColdString = ColdString::new_inline_const("cold"); + /// ``` + #[rustversion::since(1.61)] #[inline] - fn addr_to_ptr(addr: usize) -> *mut T { - ptr::with_exposed_provenance_mut::(addr) + pub const fn new_inline_const(s: &str) -> Self { + if s.len() > WIDTH { + panic!( + "Length for `new_inline_const` must be less than `core::mem::size_of::()`." + ); + } + let mut buf = Self::inline_buf(s); + let start = Self::utf8_start(s.len()); + let mut i = 0; + while i < s.len() { + buf[i + start] = s.as_bytes()[i]; + i += 1; + } + Self::from_inline_buf(buf) } - #[rustversion::attr(since(1.83), const)] - #[rustversion::before(1.84)] + #[rustversion::attr(since(1.71), const)] #[inline] - fn addr_to_ptr(addr: usize) -> *mut T { - addr as *mut T + unsafe fn ptr(&self) -> *mut u8 { + ptr::read_unaligned(ptr::addr_of!(self.encoded)) + } + + #[inline] + fn addr(&self) -> usize { + unsafe { self.ptr().addr() } + } + + #[inline] + fn tag(&self) -> usize { + self.addr() & Self::TAG_MASK + } + + /// Returns `true` if the string bytes are inlined. + #[inline] + pub fn is_inline(&self) -> bool { + self.tag() != Self::PTR_TAG } #[inline] @@ -171,31 +233,35 @@ impl ColdString { // TODO: can optimize this ptr::copy_nonoverlapping(len_buf.as_ptr(), ptr, vint_len); ptr::copy_nonoverlapping(s.as_ptr(), ptr.add(vint_len), len); - - let addr = Self::ptr_to_addr(ptr); - debug_assert!(addr % HEAP_ALIGN == 0); - let mut addr = addr.rotate_left(6); - addr |= Self::PTR_TAG as usize; - Self(addr.to_le_bytes()) + let encoded = ptr.map_addr(|addr| { + debug_assert!(addr % HEAP_ALIGN == 0); + let mut addr = addr.rotate_left(6 + Self::ROT); + addr |= Self::PTR_TAG; + addr + }); + Self { encoded } } } - #[rustversion::attr(since(1.91), const)] #[inline] fn heap_ptr(&self) -> *mut u8 { debug_assert!(!self.is_inline()); - let mut addr = usize::from_le_bytes(self.0); - addr ^= Self::PTR_TAG as usize; - let addr = addr.rotate_right(6); - debug_assert!(addr % HEAP_ALIGN == 0); - Self::addr_to_ptr(addr) + unsafe { + self.ptr().map_addr(|mut addr| { + addr ^= Self::PTR_TAG; + let addr = addr.rotate_right(6 + Self::ROT); + debug_assert!(addr % HEAP_ALIGN == 0); + addr + }) + } } #[inline] - const fn inline_len(&self) -> usize { - match self.0[0] & Self::INLINE_MASK { - Self::INLINE_MASK => (self.0[0] & !Self::INLINE_MASK) as usize, - _ => 8, + fn inline_len(&self) -> usize { + let addr = self.addr(); + match addr & Self::INLINE_TAG { + Self::INLINE_TAG => (addr & Self::LEN_MASK).rotate_right(Self::ROT), + _ => WIDTH, } } @@ -215,7 +281,6 @@ impl ColdString { /// assert_eq!(fancy_f.len(), 4); /// assert_eq!(fancy_f.chars().count(), 3); /// ``` - #[rustversion::attr(since(1.91), const)] #[inline] pub fn len(&self) -> usize { if self.is_inline() { @@ -233,8 +298,10 @@ impl ColdString { #[inline] unsafe fn decode_inline(&self) -> &[u8] { let len = self.inline_len(); - let ptr = self.0.as_ptr().add((len < WIDTH) as usize); - slice::from_raw_parts(ptr, len) + // SAFETY: addr_of! avoids &self.ptr (which is UB due to alignment) + let self_bytes_ptr = ptr::addr_of!(self.encoded) as *const u8; + let start = Self::utf8_start(len); + slice::from_raw_parts(self_bytes_ptr.add(start), len) } #[allow(unsafe_op_in_unsafe_fn)] @@ -305,7 +372,11 @@ impl Drop for ColdString { impl Clone for ColdString { fn clone(&self) -> Self { match self.is_inline() { - true => Self(self.0), + true => unsafe { + Self { + encoded: self.ptr(), + } + }, false => Self::new_heap(self.as_str()), } } @@ -314,7 +385,7 @@ impl Clone for ColdString { impl PartialEq for ColdString { fn eq(&self, other: &Self) -> bool { match (self.is_inline(), other.is_inline()) { - (true, true) => self.0 == other.0, + (true, true) => unsafe { self.ptr() == other.ptr() }, (false, false) => unsafe { self.decode_heap() == other.decode_heap() }, _ => false, } @@ -331,13 +402,13 @@ impl Hash for ColdString { impl fmt::Debug for ColdString { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.as_str().fmt(f) + fmt::Debug::fmt(self.as_str(), f) } } impl fmt::Display for ColdString { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.as_str().fmt(f) + fmt::Display::fmt(self.as_str(), f) } } @@ -455,38 +526,68 @@ mod serde_tests { #[cfg(test)] mod tests { use super::*; + use core::hash::BuildHasher; + use hashbrown::hash_map::DefaultHashBuilder; #[test] fn test_layout() { - assert_eq!(mem::size_of::(), 8); + assert_eq!(mem::size_of::(), mem::size_of::()); assert_eq!(mem::align_of::(), 1); struct Foo { _s: ColdString, _b: u8, } - assert_eq!(mem::size_of::(), 9); + assert_eq!(mem::size_of::(), mem::size_of::() + 1); assert_eq!(mem::align_of::(), 1); } #[test] fn it_works() { - for s in ["test", "", "1234567", "12345678", "longer test"] { + for s in [ + "1", + "12", + "123", + "1234", + "12345", + "123456", + "1234567", + "12345678", + "123456789", + str::from_utf8(&[240, 159, 146, 150]).unwrap(), + "✅", + "❤️", + "🦀💯", + "🦀", + "💯", + "abcd", + "test", + "", + "\0", + "\0\0", + "\0\0\0", + "\0\0\0\0", + "\0\0\0\0\0\0\0", + "\0\0\0\0\0\0\0\0", + "1234567", + "12345678", + "longer test", + str::from_utf8(&[103, 39, 240, 145, 167, 156, 194, 165]).unwrap(), + "AaAa0 ® ", + str::from_utf8(&[240, 158, 186, 128, 240, 145, 143, 151]).unwrap(), + ] { let cs = ColdString::new(s); - assert_eq!(s.len() <= 8, cs.is_inline()); + assert_eq!(s.len() <= mem::size_of::(), cs.is_inline()); assert_eq!(cs.len(), s.len()); + assert_eq!(cs.as_bytes(), s.as_bytes()); assert_eq!(cs.as_str(), s); assert_eq!(cs.clone(), cs); - #[cfg(feature = "std")] - { - use std::hash::BuildHasher; - let bh = std::collections::hash_map::RandomState::new(); - let mut hasher1 = bh.build_hasher(); - cs.hash(&mut hasher1); - let mut hasher2 = bh.build_hasher(); - cs.clone().hash(&mut hasher2); - assert_eq!(hasher1.finish(), hasher2.finish()); - } + let bh = DefaultHashBuilder::new(); + let mut hasher1 = bh.build_hasher(); + cs.hash(&mut hasher1); + let mut hasher2 = bh.build_hasher(); + cs.clone().hash(&mut hasher2); + assert_eq!(hasher1.finish(), hasher2.finish()); assert_eq!(cs, s); assert_eq!(s, cs); assert_eq!(cs, *s); @@ -495,16 +596,18 @@ mod tests { } #[test] - fn test_regression() { - for s in [ - str::from_utf8(&[103, 39, 240, 145, 167, 156, 194, 165]).unwrap(), - "AaAa0 ® ", - str::from_utf8(&[240, 158, 186, 128, 240, 145, 143, 151]).unwrap(), - ] { - let cs = ColdString::new(s); - assert_eq!(s.len() <= 8, cs.is_inline()); - assert_eq!(s.len(), cs.len()); - assert_eq!(cs.as_str(), s); + fn test_unaligned_placement() { + for s_content in ["torture", "tor", "tortures", "tort", "torture torture"] { + let mut buffer = [0u8; 32]; + for offset in 0..8 { + unsafe { + let dst = buffer.as_mut_ptr().add(offset) as *mut ColdString; + let s = ColdString::new(s_content); + ptr::write_unaligned(dst, s); + let recovered = ptr::read_unaligned(dst); + assert_eq!(recovered.as_str(), s_content); + } + } } } } diff --git a/cold-string/src/vint.rs b/cold-string/src/vint.rs index ab43af4..df3595c 100644 --- a/cold-string/src/vint.rs +++ b/cold-string/src/vint.rs @@ -1,7 +1,6 @@ pub struct VarInt; impl VarInt { - #[rustversion::attr(since(1.83), const)] pub fn write(mut value: u64, buf: &mut [u8; 10]) -> usize { let mut i = 0; loop { @@ -19,7 +18,6 @@ impl VarInt { i } - #[rustversion::attr(since(1.83), const)] #[allow(unsafe_op_in_unsafe_fn)] pub unsafe fn read(ptr: *const u8) -> (u64, usize) { let mut result = 0u64; diff --git a/cold-string/tests/property.rs b/cold-string/tests/property.rs index ac3d879..ec64c2a 100644 --- a/cold-string/tests/property.rs +++ b/cold-string/tests/property.rs @@ -5,7 +5,7 @@ use proptest::prelude::*; fn proptest_config() -> ProptestConfig { ProptestConfig { failure_persistence: None, - cases: 8, + cases: 16, ..Default::default() } } @@ -32,11 +32,17 @@ proptest! { #[test] fn arb_string(s in any::()) { let cold = ColdString::new(s.as_str()); - assert_eq!(s.len() <= 8, cold.is_inline()); + assert_eq!(s.len() <= core::mem::size_of::(), cold.is_inline()); assert_eq!(cold.len(), s.len()); assert_eq!(cold.as_str(), s.as_str()); assert_eq!(cold, ColdString::from(s.as_str())); assert_eq!(cold, cold.clone()); + assert_eq!(cold, s.as_str()); + assert_eq!(s.as_str(), cold); + assert_eq!(unsafe { ColdString::from_utf8_unchecked(s.as_bytes()).as_bytes() }, s.as_bytes()); + if s.len() <= core::mem::size_of::() { + assert_eq!(ColdString::new_inline_const(&s), cold); + } } }