Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions bench/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,7 @@ harness = false
[[bench]]
name = "eq"
harness = false

[[bench]]
name = "collection"
harness = false
69 changes: 69 additions & 0 deletions bench/benches/collection.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
use ahash::AHashSet;
use bench::*;
use criterion::{
black_box, criterion_group, criterion_main, measurement::WallTime, BenchmarkGroup, Criterion,
};
use std::hash::Hash;
use std::str::FromStr;

const LENGTHS: &[usize] = &[64];

fn bench_hashset_inner<T: FromStr + Eq + Hash>(
g: &mut BenchmarkGroup<'_, WallTime>,
name: &'static str,
min: usize,
max: usize,
string_array: &[String],
indices: &[usize],
) {
let string_vec: Vec<_> = string_array
.iter()
.map(|s| T::from_str(s).map_err(|_| ()).unwrap())
.collect();
let strings: AHashSet<_> = string_array
.iter()
.map(|s| T::from_str(s).map_err(|_| ()).unwrap())
.collect();

let strings = black_box(strings);
let label = format!("{}-len={}-{}", name, min, max);

g.bench_function(&label, |b| {
b.iter(|| {
for &i in indices.iter() {
let s = &string_vec[i];
let _ = black_box(strings.contains(s));
}
})
});
}

#[rustfmt::skip]
fn bench_hashset(c: &mut Criterion) {
let mut group = c.benchmark_group("hashset");
let count = 1_000_000;

let mut indices: Vec<usize> = (0..count).collect();
fastrand::shuffle(&mut indices);
let indices_subset = &indices[..1000];

for len in LENGTHS {
for min in [0, *len] {
let mut strings = Vec::with_capacity(count);
for _ in 0..count {
strings.push(random_string(min, *len));
}
bench_hashset_inner::<String>(&mut group, "std", min, *len, &strings, indices_subset);
bench_hashset_inner::<smol_str::SmolStr>(&mut group, "smol_str", min, *len, &strings, indices_subset);
bench_hashset_inner::<compact_str::CompactString>(&mut group, "compact_str", min, *len, &strings, indices_subset);
bench_hashset_inner::<smartstring::alias::String>(&mut group, "smartstring", min, *len, &strings, indices_subset);
bench_hashset_inner::<smallstr::SmallString<[u8; 8]>>(&mut group, "smallstr", min, *len, &strings, indices_subset);
bench_hashset_inner::<compact_string::CompactString>(&mut group, "compact_string", min, *len, &strings, indices_subset);
bench_hashset_inner::<cold_string::ColdString>(&mut group, "cold-string", min, *len, &strings, indices_subset);
}
}
group.finish();
}

criterion_group!(benches, bench_hashset);
criterion_main!(benches);
2 changes: 1 addition & 1 deletion bench/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def main():
for file in sorted(csv_files):
xs, ys = read_csv(file)
label = os.path.splitext(os.path.basename(file))[0]
plt.plot(xs, ys, label=label, linewidth=3.5, alpha = 0.75)
plt.plot(xs, ys, label=label, linewidth=3.5, alpha = 1.0)

plt.xlabel("String Length")
plt.ylabel("Memory Usage (bytes)")
Expand Down
72 changes: 50 additions & 22 deletions bench/tests/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

use bench::*;

use ahash::{HashMap, HashMapExt};
use ahash::{HashSet, HashSetExt};
use std::alloc::{GlobalAlloc, Layout, System};
use std::cmp::Ord;
use std::collections::BTreeMap;
Expand Down Expand Up @@ -78,10 +78,10 @@ fn test_allocator_memory() {
allocator_memory::<cold_string::ColdString>("cold-string");
}

fn hash_map_workload<T: FromStr + Hash + Eq>(min: usize, max: usize) {
let mut strings: HashMap<T, T> = HashMap::with_capacity(TRIALS);
fn hash_set_workload<T: FromStr + Hash + Eq>(min: usize, max: usize) {
let mut strings: HashSet<T> = HashSet::with_capacity(TRIALS);
for _ in 0..TRIALS {
strings.insert(random_string(min, max), random_string(min, max));
strings.insert(random_string(min, max));
}
let strings = std::hint::black_box(strings);
std::mem::forget(strings);
Expand Down Expand Up @@ -148,17 +148,8 @@ fn system_memory(name: &str, workload: impl Fn(usize, usize)) {
print!("\n");
}

/// Not run automatically.
/// Run with `cargo test test_system_memory --release -- --no-capture --include-ignored`
/// Or specify min,max:
/// ```
/// cargo test test_system_memory --release -- --no-capture --include-ignored
/// ```
#[test]
#[rustfmt::skip]
#[ignore]
fn test_system_memory() {
print!("{:<NAME_WIDTH$} ", "Crate");
fn print_table_header(title: &str) {
print!("{:<NAME_WIDTH$} ", title);
for &size in SIZES {
print!(" | {:>CELL_WIDTH$}", format!("{}..={}", 0, size));
}
Expand All @@ -169,12 +160,49 @@ fn test_system_memory() {
print!(" {: ^CELL_WIDTH$} |", ":---:");
}
println!();
}

system_memory("cold-string", hash_map_workload::<cold_string::ColdString>);
system_memory("compact_str", hash_map_workload::<compact_str::CompactString>);
system_memory("compact_string", hash_map_workload::<compact_string::CompactString>);
system_memory("smallstr", hash_map_workload::<smallstr::SmallString<[u8; 8]>>);
system_memory("smartstring", hash_map_workload::<smartstring::alias::String>);
system_memory("smol_str", hash_map_workload::<smol_str::SmolStr>);
system_memory("std", hash_map_workload::<String>);
/// `cargo test test_system_memory_vec --release -- --no-capture --include-ignored`
#[test]
#[rustfmt::skip]
#[ignore]
fn test_system_memory_vec() {
print_table_header("Vec");
system_memory("cold-string", vec_workload::<cold_string::ColdString>);
system_memory("compact_str", vec_workload::<compact_str::CompactString>);
system_memory("compact_string", vec_workload::<compact_string::CompactString>);
system_memory("smallstr", vec_workload::<smallstr::SmallString<[u8; 8]>>);
system_memory("smartstring", vec_workload::<smartstring::alias::String>);
system_memory("smol_str", vec_workload::<smol_str::SmolStr>);
system_memory("std", vec_workload::<String>);
}

/// `cargo test test_system_memory_hashset --release -- --no-capture --include-ignored`
#[test]
#[rustfmt::skip]
#[ignore]
fn test_system_memory_hashset() {
print_table_header("HashSet");
system_memory("cold-string", hash_set_workload::<cold_string::ColdString>);
system_memory("compact_str", hash_set_workload::<compact_str::CompactString>);
system_memory("compact_string", hash_set_workload::<compact_string::CompactString>);
system_memory("smallstr", hash_set_workload::<smallstr::SmallString<[u8; 8]>>);
system_memory("smartstring", hash_set_workload::<smartstring::alias::String>);
system_memory("smol_str", hash_set_workload::<smol_str::SmolStr>);
system_memory("std", hash_set_workload::<String>);
}

/// `cargo test test_system_memory_btreeset --release -- --no-capture --include-ignored`
#[test]
#[rustfmt::skip]
#[ignore]
fn test_system_memory_btreeset() {
print_table_header("BTreeSet");
system_memory("cold-string", btree_workload::<cold_string::ColdString>);
system_memory("compact_str", btree_workload::<compact_str::CompactString>);
system_memory("compact_string", btree_workload::<compact_string::CompactString>);
system_memory("smallstr", btree_workload::<smallstr::SmallString<[u8; 8]>>);
system_memory("smartstring", btree_workload::<smartstring::alias::String>);
system_memory("smol_str", btree_workload::<smol_str::SmolStr>);
system_memory("std", btree_workload::<String>);
}
111 changes: 48 additions & 63 deletions cold-string/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,27 @@
[![Crates.io](https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust)](https://crates.io/crates/cold-string)
[![docs.rs](https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs)](https://docs.rs/cold-string)
![MSRV](https://img.shields.io/crates/msrv/cold-string?style=for-the-badge)
![Downloads](https://img.shields.io/crates/d/cold-string?style=for-the-badge)

A 1-word (8-byte) sized representation of immutable UTF-8 strings that in-lines up to 8 bytes. Optimized for memory usage and struct packing.

# Overview
## Overview

`ColdString` is optimized for memory efficiency for **large** and **short** strings:
- 0..=8 bytes: always 8 bytes total (fully inlined).
- 9..=128 bytes: 8-byte pointer + 1-byte length encoding
- 129..=16384 bytes: 8-byte pointer + 2-byte length encoding
- Continues logarithmically up to 18 bytes overhead for sizes up to `isize::MAX`.
`ColdString` minimizes per-string overhead for both **short and large** strings.
- Strings ≤ 8 bytes: **8 bytes total**
- Larger strings: **~9–10 bytes overhead** (other string libraries have 24 bytes per value)

Compared to `String`, which stores capacity and length inline (3 machine words), `ColdString` avoids storing length inline for heap strings and compresses metadata into tagged pointer space. This leads to substantial memory savings in benchmarks (see [Memory Comparison (System RSS)](#memory-comparison-system-rss)):
- **36% – 68%** smaller than `String` in `HashMap`
- **28% – 65%** smaller than other short-string crates in `HashMap`
This leads to substantial memory savings over both `String` and other short-string crates (see [Memory Comparison (System RSS)](#memory-comparison-system-rss)):
- **35% – 67%** smaller than `String` in `HashSet`
- **35% – 64%** smaller than other short-string crates in `HashSet`
- **30% – 75%** smaller than `String` in `BTreeSet`
- **13% – 63%** smaller than other short-string crates in `BTreeSet`

`ColdString`'s MSRV is 1.60, is `no_std` compatible, and is a drop in replacement for immutable Strings.

### Safety
`ColdString` is written using [Rust's strict provenance API](https://doc.rust-lang.org/beta/std/ptr/index.html#strict-provenance), carefully handles unaligned access internally, and is validated with property testing and MIRI.
---

### Why "Cold"?

The heap representation stores the length on the heap, not inline in the struct. This saves memory in the struct itself but *slightly* increases the cost of `len()` since it requires a heap read. In practice, the `len()` cost is only marginally slower than inline storage and is typically negligible compared to:
- Memory savings
- Cache density improvements
- Faster collection operations due to reduced footprint
### Portability
`ColdString`'s MSRV is 1.60, is `no_std` compatible, and is a drop in replacement for immutable Strings.

# Usage
## Usage

Use it like a `String`:
```rust
Expand All @@ -45,57 +35,54 @@ assert_eq!(s.as_str(), "qwerty");

Packs well with other types:
```rust
use std::mem;
use cold_string::ColdString;
use std::mem::{align_of, size_of};

assert_eq!(mem::size_of::<ColdString>(), mem::size_of::<usize>());
assert_eq!(mem::align_of::<ColdString>(), 1);
assert_eq!(size_of::<ColdString>(), size_of::<usize>());
assert_eq!(align_of::<ColdString>(), 1);

assert_eq!(mem::size_of::<(ColdString, u8)>(), mem::size_of::<usize>() + 1);
assert_eq!(mem::align_of::<(ColdString, u8)>(), 1);
assert_eq!(size_of::<(ColdString, u8)>(), size_of::<usize>() + 1);
assert_eq!(size_of::<Option<ColdString>>(), size_of::<usize>() + 1);
```

# How It Works
## How It Works

ColdString is 8-byte tagged pointer (4 bytes on 32-bit machines):
ColdString is an 8-byte tagged pointer (4 bytes on 32-bit machines):
```rust
#[repr(packed)]
pub struct ColdString {
/// The first byte of `encoded` is the "tag" and it determines the type:
/// - 10xxxxxx: an encoded address for the heap. To decode, 10 is set to 00 and swapped
/// with the LSB bits of the tag byte. The address is always a multiple of 4 (`HEAP_ALIGN`).
/// - 11111xxx: xxx is the length in range 0..=7, followed by length UTF-8 bytes.
/// - xxxxxxxx (valid UTF-8): 8 UTF-8 bytes.
encoded: *mut u8,
}
```
`encoded` acts as either a pointer to the heap for strings longer than 8 bytes or is the inlined data itself. The first/"tag" byte indicates one of 3 encodings:
The 8 bytes encode one of three representations indicated by the 1st byte:
- `10xxxxxx`: `encoded` contains a tagged heap pointer. To decode the address, clear the tag bits (`10 → 00`) and rotate so the `00` bits become the least-significant bits. The heap allocation uses [4-byte alignment](https://doc.rust-lang.org/beta/std/alloc/struct.Layout.html#method.from_size_align), guaranteeing the
least-significant 2 bits of the address are `00`. On the heap, the UTF-8 characters are preceded by the variable-length encoding of the size. The size uses 1 byte for 0 - 127, 2 bytes for 128 - 16383, etc.
- `11111xxx`: xxx is the length and the remaining 0-7 bytes are UTF-8 characters.
- `xxxxxxxx`: All 8 bytes are UTF-8.

### Inline Mode (0 to 7 Bytes)
The tag byte has bits 11111xxx, where xxx is the length. `self.0[1]` to `self.0[7]` store the bytes of string.
`10xxxxxx` and `11111xxx` are chosen because they cannot be valid first bytes of UTF-8.

### Inline Mode (8 Bytes)
The tag byte is any valid UTF-8 byte. `self.0` stores the bytes of string. Since the string is UTF-8, the tag byte is guaranteed to not be 10xxxxx or 11111xxx.
### Why "Cold"?

### Heap Mode
`self.0` encodes the pointer to heap, where tag byte is 10xxxxxx. 10xxxxxx is chosen because it's a UTF-8 continuation byte and therefore an impossible tag byte for inline mode. Since a heap-alignment of 4 is chosen, the pointer's least significant 2 bits are guaranteed to be 0 ([See more](https://doc.rust-lang.org/beta/std/alloc/struct.Layout.html#method.from_size_align)). These bits are swapped with the 10 "tag" bits when de/coding between `self.0` and the address value.
The heap representation stores the length on the heap, not inline in the struct. This saves memory in the struct itself but *slightly* increases the cost of `len()` since it requires a heap read. In practice, the `len()` cost is only marginally slower than inline storage and is typically negligible compared to memory savings, cache density improvements, and 3x faster operations on inlined strings.

On the heap, the data starts with a variable length integer encoding of the length, followed by the bytes.
```text,ignore
ptr --> <var int length> <data>
```
### Safety

# Memory Comparisons (Allocator)
`ColdString` uses `unsafe` to implement its packed representation and pointer tagging. Usage of `unsafe` is narrowly scoped to where layout control is required, and each instance is documented with `// SAFETY: <invariant>`. To further ensure soundness, `ColdString` is written using [Rust's strict provenance API](https://doc.rust-lang.org/beta/std/ptr/index.html#strict-provenance), handles unaligned access internally, maintains explicit heap alignment guarantees, and is validated with property testing and MIRI.

## Benchmarks

### Memory Comparisons (Allocator)

Memory usage per string, measured by tracking the memory requested by the allocator:

![string_memory](https://github.com/user-attachments/assets/adf09756-9910-4618-a97f-b5ab91a2515a)

## Memory Comparison (System RSS)
### Memory Comparison (System RSS)

RSS per insertion of various collections containing strings of random lengths 0..=N:
Resident set size in bytes per insertion of various collections. Insertions are strings with random length 0..=N:

Vec | 0..=4 | 0..=8 | 0..=16 | 0..=32 | 0..=64
Vec | 0..=4 | 0..=8 | 0..=16 | 0..=32 | 0..=64
:--- | :---: | :---: | :---: | :---: | :---: |
cold-string | 8.0 | 8.0 | 23.2 | 33.7 | 53.4
compact_str | 24.0 | 24.0 | 24.0 | 34.6 | 60.6
Expand All @@ -105,17 +92,17 @@ smartstring | 24.0 | 24.0 | 24.0 | 40.4 | 65.4
smol_str | 24.0 | 24.0 | 24.0 | 39.9 | 71.2
std | 35.8 | 37.4 | 45.8 | 54.2 | 70.5

HashMap | 0..=4 | 0..=8 | 0..=16 | 0..=32 | 0..=64
HashSet | 0..=4 | 0..=8 | 0..=16 | 0..=32 | 0..=64
:--- | :---: | :---: | :---: | :---: | :---: |
cold-string | 35.7 | 35.7 | 63.3 | 88.2 | 125.1
compact_str | 102.8 | 102.8 | 102.8 | 123.7 | 175.5
compact_string | 45.4 | 59.6 | 78.2 | 97.1 | 130.1
smallstr | 102.8 | 102.8 | 129.7 | 155.0 | 191.6
smartstring | 102.8 | 102.8 | 102.8 | 135.9 | 185.8
smol_str | 102.8 | 102.8 | 102.8 | 134.8 | 196.6
std | 112.8 | 123.9 | 143.2 | 161.8 | 195.3

B-Tree Set | 0..=4 | 0..=8 | 0..=16 | 0..=32 | 0..=64
cold-string | 18.9 | 18.9 | 34.5 | 45.5 | 64.0
compact_str | 52.4 | 52.4 | 52.4 | 62.2 | 88.9
compact_string | 23.2 | 30.0 | 39.6 | 49.1 | 65.9
smallstr | 52.4 | 52.4 | 66.5 | 78.6 | 96.9
smartstring | 52.4 | 52.4 | 52.4 | 68.2 | 94.0
smol_str | 52.4 | 52.4 | 52.4 | 68.3 | 99.4
std | 56.8 | 61.9 | 72.2 | 81.7 | 98.5

BTreeSet | 0..=4 | 0..=8 | 0..=16 | 0..=32 | 0..=64
:--- | :---: | :---: | :---: | :---: | :---: |
cold-string | 10.1 | 18.9 | 49.3 | 79.1 | 117.2
compact_str | 24.8 | 48.4 | 61.5 | 90.5 | 145.7
Expand All @@ -125,10 +112,8 @@ smartstring | 24.5 | 48.6 | 61.1 | 102.3 | 155.8
smol_str | 25.0 | 48.3 | 61.6 | 100.7 | 166.7
std | 35.8 | 70.4 | 102.9 | 128.9 | 165.5

**Note:** Columns represent string length (bytes/chars). Values represent average Resident Set Size (RSS) in bytes per string instance. Measurements taken with 10M iterations.

## Speed
### Construction: Variable Length (0..=N) [ns/op]
### Speed
#### Construction: Variable Length (0..=N) [ns/op]
Crate | 0..=4 | 0..=8 | 0..=16 | 0..=32 | 0..=64
:--- | :---: | :---: | :---: | :---: | :---:
cold-string | 10.0 | 9.2 | 25.3 | 30.0 | 37.2
Expand All @@ -139,7 +124,7 @@ smartstring | 14.8 | 15.1 | 15.0 | 26.9 | 4
smol_str | 19.2 | 19.8 | 20.1 | 23.4 | 33.7
std | 28.6 | 31.4 | 34.9 | 32.0 | 33.1

### Construction: Fixed Length (N..=N) [ns/op]
#### Construction: Fixed Length (N..=N) [ns/op]
Crate | 4..=4 | 8..=8 | 16..=16 | 32..=32 | 64..=64
:--- | :---: | :---: | :---: | :---: | :---:
cold-string | 6.5 | 4.2 | 34.2 | 34.3 | 36.2
Expand Down