diff --git a/crates/cuda_std/src/atomic/intrinsics.rs b/crates/cuda_std/src/atomic/intrinsics.rs index 7a48d5f4..1da0c68b 100644 --- a/crates/cuda_std/src/atomic/intrinsics.rs +++ b/crates/cuda_std/src/atomic/intrinsics.rs @@ -52,11 +52,11 @@ pub unsafe fn fence_acqrel_system() { #[allow(unused_macros)] macro_rules! load_scope { - (volatile, $scope:ident) => { + (volatile, $scope_asm:ident) => { "" }; - ($ordering:ident, $scope:ident) => { - concat!(".", stringify!($scope)) + ($ordering:ident, $scope_asm:ident) => { + concat!(".", stringify!($scope_asm)) }; } @@ -70,7 +70,7 @@ macro_rules! load { pub unsafe fn [](ptr: *const []) -> [] { let mut out; asm!( - concat!("ld.", stringify!($ordering), load_scope!($ordering, $scope), ".", stringify!([]), " {}, [{}];"), + concat!("ld.", stringify!($ordering), load_scope!($ordering, $scope_asm), ".", stringify!([]), " {}, [{}];"), out([]) out, in(reg64) ptr ); @@ -116,7 +116,7 @@ macro_rules! store { #[doc = concat!("Performs a ", stringify!($ordering), " atomic store at the ", stringify!($scope), " level with a width of ", stringify!($width), " bits")] pub unsafe fn [](ptr: *mut [], val: []) { asm!( - concat!("st.", stringify!($ordering), load_scope!($ordering, $scope), ".", stringify!([]), " [{}], {};"), + concat!("st.", stringify!($ordering), load_scope!($ordering, $scope_asm), ".", stringify!([]), " [{}], {};"), in(reg64) ptr, in([]) val, ); diff --git a/crates/cuda_std/src/warp.rs b/crates/cuda_std/src/warp.rs index 94adec0a..8ce9cc01 100644 --- a/crates/cuda_std/src/warp.rs +++ b/crates/cuda_std/src/warp.rs @@ -351,7 +351,7 @@ pub unsafe fn warp_vote_all(mask: u32, predicate: bool) -> bool { ".reg .pred %p<3>;", "setp.eq.u32 %p1, {}, 1;", "vote.sync.all.pred %p2, %p1, {};", - "selp.u32 {}, 0, 1, %p2;", + "selp.u32 {}, 1, 0, %p2;", "}}", in(reg32) predicate as u32, in(reg32) mask, @@ -383,7 +383,7 @@ pub unsafe fn warp_vote_any(mask: u32, predicate: bool) -> bool { ".reg .pred %p<3>;", "setp.eq.u32 %p1, {}, 1;", "vote.sync.any.pred %p2, %p1, {};", - "selp.u32 {}, 0, 1, %p2;", + "selp.u32 {}, 1, 0, %p2;", "}}", in(reg32) predicate as u32, in(reg32) mask,