From 77246e39dd36bf02ddfaab3aa18ca5ef55b8f523 Mon Sep 17 00:00:00 2001 From: Snehal Reddy Date: Sun, 8 Feb 2026 15:49:02 +0000 Subject: [PATCH 1/2] fix(cuda_std): use correct PTX scope suffix in atomic load/store --- crates/cuda_std/src/atomic/intrinsics.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/cuda_std/src/atomic/intrinsics.rs b/crates/cuda_std/src/atomic/intrinsics.rs index 7a48d5f4..1da0c68b 100644 --- a/crates/cuda_std/src/atomic/intrinsics.rs +++ b/crates/cuda_std/src/atomic/intrinsics.rs @@ -52,11 +52,11 @@ pub unsafe fn fence_acqrel_system() { #[allow(unused_macros)] macro_rules! load_scope { - (volatile, $scope:ident) => { + (volatile, $scope_asm:ident) => { "" }; - ($ordering:ident, $scope:ident) => { - concat!(".", stringify!($scope)) + ($ordering:ident, $scope_asm:ident) => { + concat!(".", stringify!($scope_asm)) }; } @@ -70,7 +70,7 @@ macro_rules! load { pub unsafe fn [](ptr: *const []) -> [] { let mut out; asm!( - concat!("ld.", stringify!($ordering), load_scope!($ordering, $scope), ".", stringify!([]), " {}, [{}];"), + concat!("ld.", stringify!($ordering), load_scope!($ordering, $scope_asm), ".", stringify!([]), " {}, [{}];"), out([]) out, in(reg64) ptr ); @@ -116,7 +116,7 @@ macro_rules! store { #[doc = concat!("Performs a ", stringify!($ordering), " atomic store at the ", stringify!($scope), " level with a width of ", stringify!($width), " bits")] pub unsafe fn [](ptr: *mut [], val: []) { asm!( - concat!("st.", stringify!($ordering), load_scope!($ordering, $scope), ".", stringify!([]), " [{}], {};"), + concat!("st.", stringify!($ordering), load_scope!($ordering, $scope_asm), ".", stringify!([]), " [{}], {};"), in(reg64) ptr, in([]) val, ); From 51526f7c62a342e1e0d4abe9f6c42d23594ed34f Mon Sep 17 00:00:00 2001 From: Snehal Reddy Date: Mon, 9 Feb 2026 07:29:44 +0000 Subject: [PATCH 2/2] fix(cuda_std): Fix inverted logic in warp_vote_any and warp_vote_all intrinsics --- crates/cuda_std/src/warp.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/cuda_std/src/warp.rs b/crates/cuda_std/src/warp.rs index 94adec0a..8ce9cc01 100644 --- a/crates/cuda_std/src/warp.rs +++ b/crates/cuda_std/src/warp.rs @@ -351,7 +351,7 @@ pub unsafe fn warp_vote_all(mask: u32, predicate: bool) -> bool { ".reg .pred %p<3>;", "setp.eq.u32 %p1, {}, 1;", "vote.sync.all.pred %p2, %p1, {};", - "selp.u32 {}, 0, 1, %p2;", + "selp.u32 {}, 1, 0, %p2;", "}}", in(reg32) predicate as u32, in(reg32) mask, @@ -383,7 +383,7 @@ pub unsafe fn warp_vote_any(mask: u32, predicate: bool) -> bool { ".reg .pred %p<3>;", "setp.eq.u32 %p1, {}, 1;", "vote.sync.any.pred %p2, %p1, {};", - "selp.u32 {}, 0, 1, %p2;", + "selp.u32 {}, 1, 0, %p2;", "}}", in(reg32) predicate as u32, in(reg32) mask,