From 24e86eba8e531df6b73793d5ae9f406a9e3ddd43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sosth=C3=A8ne=20Gu=C3=A9don?= Date: Thu, 2 Apr 2026 09:15:53 +0200 Subject: [PATCH] Remove the modulo operations in spsc These modulo operations used to be well optimized when N was a power of 2 However, the consumer and producer now use view-types that make N runtime dependant, preventing the compiler from optimizing these modulo operations even when N is always a power of 2. This patch leverages the fact that `head` and `tail` are always kept lower than N to replace the modulo operations with a simple if, which gets optimized pretty well by the compiler and no branch is left. Closes https://github.com/rust-embedded/heapless/issues/650 --- CHANGELOG.md | 1 + src/spsc.rs | 102 +++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 88 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5440935b37..2aba11e865 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ of panicking drop implementations. - Added `from_bytes_truncating_at_nul` to `CString` - Added `CString::{into_bytes, into_bytes_with_nul, into_string}` - Added `pop_front_if` and `pop_back_if` to `Deque` +- Fixed long division being instroduced by the const-erasure in spsc ## [v0.9.2] 2025-11-12 diff --git a/src/spsc.rs b/src/spsc.rs index 6eb6fde4d7..0c1439a9c5 100644 --- a/src/spsc.rs +++ b/src/spsc.rs @@ -129,12 +129,6 @@ pub struct QueueInner { /// A statically allocated single-producer, single-consumer queue with a capacity of `N - 1` /// elements. /// -///
-/// -/// To get better performance, use a value for `N` that is a power of 2. -/// -///
-/// /// You will likely want to use [`split`](QueueInner::split) to create a producer-consumer pair. pub type Queue = QueueInner>; @@ -182,7 +176,12 @@ impl QueueInner { #[inline] fn increment(&self, val: usize) -> usize { - (val + 1) % self.n() + let val = val + 1; + if val >= self.n() { + val - self.n() + } else { + val + } } #[inline] @@ -202,10 +201,13 @@ impl QueueInner { let current_head = self.head.load(Ordering::Relaxed); let current_tail = self.tail.load(Ordering::Relaxed); - current_tail - .wrapping_sub(current_head) - .wrapping_add(self.n()) - % self.n() + if current_tail >= current_head { + current_tail - current_head + } else { + current_tail + .wrapping_sub(current_head) + .wrapping_add(self.n()) + } } /// Returns whether the queue is empty. @@ -626,7 +628,8 @@ impl<'a, T> Iterator for Iter<'a, T> { if self.index < self.len { let head = self.rb.head.load(Ordering::Relaxed); - let i = (head + self.index) % self.rb.n(); + let i = head + self.index; + let i = if i >= self.rb.n() { i - self.rb.n() } else { i }; self.index += 1; Some(unsafe { &*(self.rb.buffer.borrow().get_unchecked(i).get() as *const T) }) @@ -643,7 +646,8 @@ impl<'a, T> Iterator for IterMut<'a, T> { if self.index < self.len { let head = self.rb.head.load(Ordering::Relaxed); - let i = (head + self.index) % self.rb.n(); + let i = head + self.index; + let i = if i >= self.rb.n() { i - self.rb.n() } else { i }; self.index += 1; Some(unsafe { &mut *self.rb.buffer.borrow().get_unchecked(i).get().cast::() }) @@ -659,7 +663,8 @@ impl DoubleEndedIterator for Iter<'_, T> { let head = self.rb.head.load(Ordering::Relaxed); // self.len > 0, since it's larger than self.index > 0 - let i = (head + self.len - 1) % self.rb.n(); + let i = head + self.len - 1; + let i = if i >= self.rb.n() { i - self.rb.n() } else { i }; self.len -= 1; Some(unsafe { &*(self.rb.buffer.borrow().get_unchecked(i).get() as *const T) }) } else { @@ -674,7 +679,8 @@ impl DoubleEndedIterator for IterMut<'_, T> { let head = self.rb.head.load(Ordering::Relaxed); // self.len > 0, since it's larger than self.index > 0 - let i = (head + self.len - 1) % self.rb.n(); + let i = head + self.len - 1; + let i = if i >= self.rb.n() { i - self.rb.n() } else { i }; self.len -= 1; Some(unsafe { &mut *self.rb.buffer.borrow().get_unchecked(i).get().cast::() }) } else { @@ -1076,6 +1082,28 @@ mod tests { assert_eq!(items.next(), None); } + /// Exercise the modulo self.n() operation in next() + #[test] + fn iter_modulo() { + let mut rb: Queue = Queue::new(); + + for _ in 0..2 { + rb.enqueue(0).unwrap(); + rb.dequeue().unwrap(); + } + rb.enqueue(1).unwrap(); + rb.enqueue(2).unwrap(); + rb.enqueue(3).unwrap(); + + let mut items = rb.iter(); + + // assert_eq!(items.next(), Some(&0)); + assert_eq!(items.next(), Some(&1)); + assert_eq!(items.next(), Some(&2)); + assert_eq!(items.next(), Some(&3)); + assert_eq!(items.next(), None); + } + #[test] fn iter_double_ended() { let mut rb: Queue = Queue::new(); @@ -1093,6 +1121,28 @@ mod tests { assert_eq!(items.next_back(), None); } + /// Test that the modulo in next_back works as expected + #[test] + fn iter_double_ended_modulo() { + let mut rb: Queue = Queue::new(); + + for _ in 0..2 { + rb.enqueue(0).unwrap(); + rb.dequeue().unwrap(); + } + rb.enqueue(0).unwrap(); + rb.enqueue(1).unwrap(); + rb.enqueue(2).unwrap(); + + let mut items = rb.iter(); + + assert_eq!(items.next(), Some(&0)); + assert_eq!(items.next_back(), Some(&2)); + assert_eq!(items.next(), Some(&1)); + assert_eq!(items.next(), None); + assert_eq!(items.next_back(), None); + } + #[test] fn iter_mut() { let mut rb: Queue = Queue::new(); @@ -1126,6 +1176,28 @@ mod tests { assert_eq!(items.next_back(), None); } + /// Test that the modulo in next_back works as expected + #[test] + fn iter_mut_double_ended_modulo() { + let mut rb: Queue = Queue::new(); + + for _ in 0..2 { + rb.enqueue(0).unwrap(); + rb.dequeue().unwrap(); + } + rb.enqueue(0).unwrap(); + rb.enqueue(1).unwrap(); + rb.enqueue(2).unwrap(); + + let mut items = rb.iter_mut(); + + assert_eq!(items.next(), Some(&mut 0)); + assert_eq!(items.next_back(), Some(&mut 2)); + assert_eq!(items.next(), Some(&mut 1)); + assert_eq!(items.next(), None); + assert_eq!(items.next_back(), None); + } + #[test] fn wrap_around() { let mut rb: Queue = Queue::new();