From db77a39f0135488ddfdfb3ba5b7156d815fbcc10 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 2 Apr 2026 12:02:23 -0700 Subject: [PATCH 1/2] GC: implement a grow-vs-collect heuristic. This implements the heuristic discussed in #12860: it replaces the existing behavior where Wasmtime's GC, when allocating, will continue growing the GC heap up to its size limit before initiating a collection. That behavior optimizes for allocation performance but at the cost of resident memory size -- it is at one extreme end of that tradeoff spectrum. There are a number of use-cases where there may be heavy allocation traffic but a relatively small live-heap size compared to the total volume of allocations. For example, lots of temporary "garbage" may be allocated by many workloads. Or, more pertinently to #12860, a C/C++ workload that uses the underlying GC heap only for exceptions, and uses those exceptions in a way that only one `exnref` is live at a time (no `exn` objects are stashed away and used later), will also generate a lot of "garbage" during normal execution. These kinds of workloads benefit significantly from more frequent collection to keep the resident-set size small. This also may benefit performance, even accounting for the cost of the collection itself, because it keeps the footprint of touched memory within higher cache-hierarchy levels. In order to accommodate that kind of workload while also presenting reasonable behavior to large-working-set-size benchmarks, it is desirable to implement an *adaptive* policy. To that end, this PR implements a scheme similar to our OwnedRooted allocation/collection algorithm (and specified explicitly [here] by fitzgen): we use the last live-heap size (post-collection) compared to current capacity to decide whether to grow or collect. When the current capacity is more than twice the last live-heap size, we collect first; if we still can't allocate, then we grow. Otherwise, we just grow. The idea is that (when combined with an exponential heap-growth rule) the continuous-allocation case will collect once at each power-of-two, then grow; this is "amortized constant time" overhead. A case with a stable working-set size but with some ups and downs will never hit a "threshold-thrashing" problem: the heap capacity will tend toward twice the live-heap size, in the steady state (see proof [here](proof) for the analogous algorithm for `OwnedRooted`). Thus we have a nice, deterministic bound no matter what, with no bad (quadratic or worse) cases. This PR adds a test that creates a bunch of almost-immediately-dead garbage (allocates a GC struct that is only live for one iteration of a loop) and checks the heap size at each iteration. To allow this check, it also adds a method to `Store` to get the current GC heap capacity, which seems like a generally useful kind of observability as well. [here]: https://github.com/bytecodealliance/wasmtime/pull/12860#issuecomment-4158147842 [proof]: https://github.com/bytecodealliance/wasmtime/blob/e5b127ccd71dbd7d447a32722b2c699abc46fe61/crates/wasmtime/src/runtime/gc/enabled/rooting.rs#L617-L678 --- crates/wasmtime/src/runtime/func.rs | 8 +++ crates/wasmtime/src/runtime/store.rs | 17 ++++++ crates/wasmtime/src/runtime/store/gc.rs | 60 ++++++++++++++++--- crates/wasmtime/src/runtime/vm/gc.rs | 12 ++++ .../wasmtime/src/runtime/vm/gc/enabled/drc.rs | 12 ++++ .../src/runtime/vm/gc/enabled/null.rs | 7 +++ .../wasmtime/src/runtime/vm/gc/gc_runtime.rs | 6 ++ tests/all/gc.rs | 54 +++++++++++++++++ 8 files changed, 167 insertions(+), 9 deletions(-) diff --git a/crates/wasmtime/src/runtime/func.rs b/crates/wasmtime/src/runtime/func.rs index d9d1079d03f9..865615b1baf2 100644 --- a/crates/wasmtime/src/runtime/func.rs +++ b/crates/wasmtime/src/runtime/func.rs @@ -2124,6 +2124,14 @@ impl Caller<'_, T> { self.store.gc(why) } + /// Returns the current capacity of the GC heap in bytes. + /// + /// Same as [`Store::gc_heap_capacity`](crate::Store::gc_heap_capacity). + #[cfg(feature = "gc")] + pub fn gc_heap_capacity(&self) -> usize { + self.store.0.gc_heap_capacity() + } + /// Perform garbage collection asynchronously. /// /// Same as [`Store::gc_async`](crate::Store::gc_async). diff --git a/crates/wasmtime/src/runtime/store.rs b/crates/wasmtime/src/runtime/store.rs index 557428e1e76d..4a16aaeca56b 100644 --- a/crates/wasmtime/src/runtime/store.rs +++ b/crates/wasmtime/src/runtime/store.rs @@ -1016,6 +1016,13 @@ impl Store { StoreContextMut(&mut self.inner).gc(why) } + /// Returns the current capacity of the GC heap in bytes, or 0 if the GC + /// heap has not been initialized yet. + #[cfg(feature = "gc")] + pub fn gc_heap_capacity(&self) -> usize { + self.inner.gc_heap_capacity() + } + /// Returns the amount fuel in this [`Store`]. When fuel is enabled, it must /// be configured via [`Store::set_fuel`]. /// @@ -2032,6 +2039,16 @@ impl StoreOpaque { } } + /// Returns the current capacity of the GC heap in bytes, or 0 if the GC + /// heap has not been initialized yet. + #[cfg(feature = "gc")] + pub(crate) fn gc_heap_capacity(&self) -> usize { + match self.gc_store.as_ref() { + Some(gc_store) => gc_store.gc_heap_capacity(), + None => 0, + } + } + /// Helper to assert that a GC store was previously allocated and is /// present. /// diff --git a/crates/wasmtime/src/runtime/store/gc.rs b/crates/wasmtime/src/runtime/store/gc.rs index 10e1805d08ad..7927373f7e11 100644 --- a/crates/wasmtime/src/runtime/store/gc.rs +++ b/crates/wasmtime/src/runtime/store/gc.rs @@ -56,12 +56,12 @@ impl StoreOpaque { bytes_needed: Option, asyncness: Asyncness, ) { + // When explicitly called (e.g., from Store::gc), always collect. + // If bytes_needed is specified, also try to grow if needed. + self.do_gc(asyncness).await; if let Some(n) = bytes_needed { - if self.grow_gc_heap(limiter, n).await.is_ok() { - return; - } + let _ = self.grow_gc_heap(limiter, n).await; } - self.do_gc(asyncness).await; } /// Attempt to grow the GC heap by `bytes_needed` bytes. @@ -165,8 +165,14 @@ impl StoreOpaque { } } - /// Attempt an allocation, if it fails due to GC OOM, then do a GC and - /// retry. + /// Attempt an allocation, if it fails due to GC OOM, apply the + /// grow-or-collect heuristic and retry. + /// + /// The heuristic is: + /// - If the last post-collection heap usage is less than half the current + /// capacity, collect first, then retry. If that still fails, grow and + /// retry one final time. + /// - Otherwise, grow first and retry. pub(crate) async fn retry_after_gc_async( &mut self, mut limiter: Option<&mut StoreResourceLimiter<'_>>, @@ -183,9 +189,45 @@ impl StoreOpaque { Err(e) => match e.downcast::>() { Ok(oom) => { let (value, oom) = oom.take_inner(); - self.gc(limiter, None, Some(oom.bytes_needed()), asyncness) - .await; - alloc_func(self, value) + let bytes_needed = oom.bytes_needed(); + + // Determine whether to collect or grow first. + let should_collect_first = self.gc_store.as_ref().map_or(false, |gc_store| { + let capacity = gc_store.gc_heap_capacity(); + let last_usage = gc_store.last_post_gc_allocated_bytes.unwrap_or(0); + last_usage < capacity / 2 + }); + + if should_collect_first { + // Collect first, then retry. + self.gc(limiter.as_deref_mut(), None, None, asyncness).await; + + match alloc_func(self, value) { + Ok(x) => Ok(x), + Err(e) => match e.downcast::>() { + Ok(oom2) => { + // Collection wasn't enough; grow and try + // one final time. + let (value, _) = oom2.take_inner(); + // Ignore error; we'll get one + // from `alloc_func` below if + // growth failed and failure to + // grow was fatal. + let _ = self.grow_gc_heap(limiter, bytes_needed).await; + alloc_func(self, value) + } + Err(e) => Err(e), + }, + } + } else { + // Grow first and retry. + // + // Ignore error; we'll get one from + // `alloc_func` below if growth failed and + // failure to grow was fatal. + let _ = self.grow_gc_heap(limiter, bytes_needed).await; + alloc_func(self, value) + } } Err(e) => Err(e), }, diff --git a/crates/wasmtime/src/runtime/vm/gc.rs b/crates/wasmtime/src/runtime/vm/gc.rs index 6067b565d577..3096f2dec0d2 100644 --- a/crates/wasmtime/src/runtime/vm/gc.rs +++ b/crates/wasmtime/src/runtime/vm/gc.rs @@ -48,6 +48,11 @@ pub struct GcStore { /// The function-references table for this GC heap. pub func_ref_table: FuncRefTable, + + /// The total allocated bytes recorded after the last GC collection. + /// `None` if no collection has been performed yet. Used by the + /// grow-or-collect heuristic. + pub last_post_gc_allocated_bytes: Option, } impl GcStore { @@ -60,6 +65,7 @@ impl GcStore { gc_heap, host_data_table, func_ref_table, + last_post_gc_allocated_bytes: None, } } @@ -68,10 +74,16 @@ impl GcStore { self.gc_heap.vmmemory() } + /// Get the current capacity (in bytes) of this GC heap. + pub fn gc_heap_capacity(&self) -> usize { + self.gc_heap.heap_slice().len() + } + /// Asynchronously perform garbage collection within this heap. pub async fn gc(&mut self, asyncness: Asyncness, roots: GcRootsIter<'_>) { let collection = self.gc_heap.gc(roots, &mut self.host_data_table); collect_async(collection, asyncness).await; + self.last_post_gc_allocated_bytes = Some(self.gc_heap.allocated_bytes()); } /// Get the kind of the given GC reference. diff --git a/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs b/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs index 94bc99abb9f1..aedeb57f9660 100644 --- a/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs +++ b/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs @@ -151,6 +151,9 @@ struct DrcHeap { /// behind an empty vec instead of `None`) but we keep it because it will /// help us catch unexpected re-entry, similar to how a `RefCell` would. dec_ref_stack: Option>, + + /// Running total of bytes currently allocated (live objects) in this heap. + allocated_bytes: usize, } impl DrcHeap { @@ -166,6 +169,7 @@ impl DrcHeap { vmmemory: None, free_list: None, dec_ref_stack: Some(Vec::with_capacity(1)), + allocated_bytes: 0, }) } @@ -176,6 +180,7 @@ impl DrcHeap { fn dealloc(&mut self, gc_ref: VMGcRef) { let drc_ref = drc_ref(&gc_ref); let size = self.index(drc_ref).object_size(); + self.allocated_bytes -= size; let layout = FreeList::layout(size); self.free_list .as_mut() @@ -698,6 +703,7 @@ unsafe impl GcHeap for DrcHeap { dec_ref_stack, memory, vmmemory, + allocated_bytes, // NB: we will only ever be reused with the same engine, so no need // to clear out our tracing info just to fill it back in with the @@ -709,6 +715,7 @@ unsafe impl GcHeap for DrcHeap { **over_approximated_stack_roots = None; *free_list = None; *vmmemory = None; + *allocated_bytes = 0; debug_assert!(dec_ref_stack.as_ref().is_some_and(|s| s.is_empty())); memory.take().unwrap() @@ -834,6 +841,7 @@ unsafe impl GcHeap for DrcHeap { next_over_approximated_stack_root: None, object_size, }; + self.allocated_bytes += layout.size(); log::trace!("new object: increment {gc_ref:#p} ref count -> 1"); Ok(Ok(gc_ref)) } @@ -891,6 +899,10 @@ unsafe impl GcHeap for DrcHeap { .length } + fn allocated_bytes(&self) -> usize { + self.allocated_bytes + } + fn gc<'a>( &'a mut self, roots: GcRootsIter<'a>, diff --git a/crates/wasmtime/src/runtime/vm/gc/enabled/null.rs b/crates/wasmtime/src/runtime/vm/gc/enabled/null.rs index b71d56979fdd..a02207887618 100644 --- a/crates/wasmtime/src/runtime/vm/gc/enabled/null.rs +++ b/crates/wasmtime/src/runtime/vm/gc/enabled/null.rs @@ -327,6 +327,13 @@ unsafe impl GcHeap for NullHeap { self.index(arrayref).length } + fn allocated_bytes(&self) -> usize { + // The null collector never frees, so everything from the start of + // the heap up to the bump pointer is allocated. + let next = unsafe { *self.next.get() }; + usize::try_from(next.get()).unwrap() + } + fn gc<'a>( &'a mut self, _roots: GcRootsIter<'a>, diff --git a/crates/wasmtime/src/runtime/vm/gc/gc_runtime.rs b/crates/wasmtime/src/runtime/vm/gc/gc_runtime.rs index 2808b46fc89c..669ba6b199b7 100644 --- a/crates/wasmtime/src/runtime/vm/gc/gc_runtime.rs +++ b/crates/wasmtime/src/runtime/vm/gc/gc_runtime.rs @@ -342,6 +342,12 @@ pub unsafe trait GcHeap: 'static + Send + Sync { //////////////////////////////////////////////////////////////////////////// // Garbage Collection Methods + /// Get the total number of bytes currently allocated (live) in this heap. + /// + /// This is the sum of all object sizes that have been allocated but not yet + /// freed. This is distinct from the heap capacity. + fn allocated_bytes(&self) -> usize; + /// Start a new garbage collection process. /// /// The given `roots` are GC roots and should not be collected (nor anything diff --git a/tests/all/gc.rs b/tests/all/gc.rs index a49ada513380..4d7c2e57af96 100644 --- a/tests/all/gc.rs +++ b/tests/all/gc.rs @@ -1877,3 +1877,57 @@ fn select_gc_ref_stack_map() -> Result<()> { Ok(()) } + +#[test] +#[cfg_attr(miri, ignore)] +fn gc_heap_does_not_grow_unboundedly() -> Result<()> { + let _ = env_logger::try_init(); + + let mut config = Config::new(); + config.wasm_function_references(true); + config.wasm_gc(true); + config.collector(Collector::DeferredReferenceCounting); + + let engine = Engine::new(&config)?; + + let module = Module::new( + &engine, + r#" + (module + (type $small (struct (field i32))) + (import "" "check" (func $check)) + + (func (export "run") (param i32) + (local $i i32) + (local $tmp (ref null $small)) + (loop $loop + (local.set $tmp (struct.new $small (i32.const 42))) + + ;; Call the host to check heap size. + (call $check) + + ;; Loop counter. + (local.set $i (i32.add (local.get $i) (i32.const 1))) + (br_if $loop (i32.lt_u (local.get $i) (local.get 0))) + ) + ) + ) + "#, + )?; + + let mut store = Store::new(&engine, ()); + + let check = Func::wrap(&mut store, |caller: Caller<'_, _>| { + let heap_size = caller.gc_heap_capacity(); + assert!( + heap_size <= 65536, + "GC heap grew too large: {heap_size} bytes (limit: 64KiB)" + ); + }); + + let instance = Instance::new(&mut store, &module, &[check.into()])?; + let run = instance.get_typed_func::<(i32,), ()>(&mut store, "run")?; + run.call(&mut store, (100_000,))?; + + Ok(()) +} From 02d471e3d44fc82e85a645c4a4ac6b69a451b375 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 3 Apr 2026 15:19:37 -0700 Subject: [PATCH 2/2] Review feedback. --- crates/wasmtime/src/runtime/store/gc.rs | 22 +++++++++++++------ .../wasmtime/src/runtime/vm/gc/enabled/drc.rs | 2 +- .../wasmtime/src/runtime/vm/gc/gc_runtime.rs | 6 ++--- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/crates/wasmtime/src/runtime/store/gc.rs b/crates/wasmtime/src/runtime/store/gc.rs index 7927373f7e11..67f064f965c8 100644 --- a/crates/wasmtime/src/runtime/store/gc.rs +++ b/crates/wasmtime/src/runtime/store/gc.rs @@ -4,8 +4,7 @@ use super::*; use crate::runtime::vm::VMGcRef; impl StoreOpaque { - /// Attempt to grow the GC heap by `bytes_needed` or, if that fails, perform - /// a garbage collection. + /// Perform any growth or GC needed to allocate `bytes_needed` bytes. /// /// Note that even when this function returns it is not guaranteed /// that a GC allocation of size `bytes_needed` will succeed. Growing the GC @@ -28,7 +27,7 @@ impl StoreOpaque { let root = root.map(|r| scope.gc_roots_mut().push_lifo_root(store_id, r)); scope - .grow_or_collect_gc_heap(limiter, bytes_needed, asyncness) + .collect_and_maybe_grow_gc_heap(limiter, bytes_needed, asyncness) .await; root.map(|r| { @@ -50,16 +49,25 @@ impl StoreOpaque { } } - async fn grow_or_collect_gc_heap( + /// Helper invoked as part of `gc`, whose purpose is to GC and + /// maybe grow for a pending allocation of a given size. + async fn collect_and_maybe_grow_gc_heap( &mut self, limiter: Option<&mut StoreResourceLimiter<'_>>, bytes_needed: Option, asyncness: Asyncness, ) { - // When explicitly called (e.g., from Store::gc), always collect. - // If bytes_needed is specified, also try to grow if needed. + // First, always collect. Then, if bytes_needed is specified, + // also try to grow if that size is greater than GC heap + // capacity minus sum of allocated layout sizes. self.do_gc(asyncness).await; - if let Some(n) = bytes_needed { + if let Some(n) = bytes_needed + && n > u64::try_from(self.gc_heap_capacity()) + .unwrap() + .saturating_sub(self.gc_store.as_ref().map_or(0, |gc| { + u64::try_from(gc.last_post_gc_allocated_bytes.unwrap_or(0)).unwrap() + })) + { let _ = self.grow_gc_heap(limiter, n).await; } } diff --git a/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs b/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs index aedeb57f9660..14239ed2f6a9 100644 --- a/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs +++ b/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs @@ -180,8 +180,8 @@ impl DrcHeap { fn dealloc(&mut self, gc_ref: VMGcRef) { let drc_ref = drc_ref(&gc_ref); let size = self.index(drc_ref).object_size(); - self.allocated_bytes -= size; let layout = FreeList::layout(size); + self.allocated_bytes -= layout.size(); self.free_list .as_mut() .unwrap() diff --git a/crates/wasmtime/src/runtime/vm/gc/gc_runtime.rs b/crates/wasmtime/src/runtime/vm/gc/gc_runtime.rs index 669ba6b199b7..d386ba437829 100644 --- a/crates/wasmtime/src/runtime/vm/gc/gc_runtime.rs +++ b/crates/wasmtime/src/runtime/vm/gc/gc_runtime.rs @@ -342,10 +342,10 @@ pub unsafe trait GcHeap: 'static + Send + Sync { //////////////////////////////////////////////////////////////////////////// // Garbage Collection Methods - /// Get the total number of bytes currently allocated (live) in this heap. + /// Get the total number of bytes currently allocated (live or + /// dead-but-not-collected) in this heap. /// - /// This is the sum of all object sizes that have been allocated but not yet - /// freed. This is distinct from the heap capacity. + /// This is distinct from the heap capacity. fn allocated_bytes(&self) -> usize; /// Start a new garbage collection process.