Skip to content
Open
35 changes: 35 additions & 0 deletions skyscraper/bn254-multiplier/gen_multiples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Generate Rust const lookup tables for multiples of the BN254 scalar field prime."""

p = 0x30644E72E131A029B85045B68181585D2833E84879B9709143E1F593F0000001


def int_to_limbs(size, n, count):
mask = 2**size - 1
limbs = []
for _ in range(count):
limbs.append(n & mask)
n >>= size
return limbs


def main():
multiples_64 = [int_to_limbs(64, k * p, 4) for k in range(0, 6)]
multiples_51 = [int_to_limbs(51, k * p, 5) for k in range(0, 6)]

# Print 64-bit table (for constants.rs)
print("pub const U64_P_MULTIPLES: [[u64; 4]; 6] = [")
for k, limbs in enumerate(multiples_64):
fmt = ", ".join(f"0x{l:016x}" for l in limbs)
print(f" [{fmt}], // {k}P")
print("];")

# Print 51-bit table (for rne/constants.rs)
print("\npub const U51_P_MULTIPLES: [[u64; 5]; 6] = [")
for k, limbs in enumerate(multiples_51):
fmt = ", ".join(f"0x{l:013x}" for l in limbs)
print(f" [{fmt}], // {k}P")
print("];")


if __name__ == "__main__":
main()
54 changes: 42 additions & 12 deletions skyscraper/bn254-multiplier/src/constants.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,47 @@
pub const U64_NP0: u64 = 0xc2e1f593efffffff;
pub const U64_P: [u64; 4] = U64_P_MULTIPLES[1];

pub const U64_P: [u64; 4] = [
0x43e1f593f0000001,
0x2833e84879b97091,
0xb85045b68181585d,
0x30644e72e131a029,
];
pub const U64_2P: [u64; 4] = U64_P_MULTIPLES[2];

pub const U64_2P: [u64; 4] = [
0x87c3eb27e0000002,
0x5067d090f372e122,
0x70a08b6d0302b0ba,
0x60c89ce5c2634053,
/// Lookup table: `U64_P_MULTIPLES[k]` = `k * P` for k in 0..=5.
/// Index 0 is all-zeros; use as `x - U64_P_MULTIPLES[k]` to subtract k copies
/// of P.
pub const U64_P_MULTIPLES: [[u64; 4]; 6] = [
[
0x0000000000000000,
0x0000000000000000,
0x0000000000000000,
0x0000000000000000,
], // 0P
[
0x43e1f593f0000001,
0x2833e84879b97091,
0xb85045b68181585d,
0x30644e72e131a029,
], // 1P
[
0x87c3eb27e0000002,
0x5067d090f372e122,
0x70a08b6d0302b0ba,
0x60c89ce5c2634053,
], // 2P
[
0xcba5e0bbd0000003,
0x789bb8d96d2c51b3,
0x28f0d12384840917,
0x912ceb58a394e07d,
], // 3P
[
0x0f87d64fc0000004,
0xa0cfa121e6e5c245,
0xe14116da06056174,
0xc19139cb84c680a6,
], // 4P
[
0x5369cbe3b0000005,
0xc903896a609f32d6,
0x99915c908786b9d1,
0xf1f5883e65f820d0,
], // 5P
];

// R mod P
Expand Down
2 changes: 1 addition & 1 deletion skyscraper/bn254-multiplier/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub mod rtz;
pub mod constants;
pub mod rne;
mod scalar;
mod utils;
pub mod utils;

#[cfg(not(target_arch = "wasm32"))] // Proptest not supported on WASI
mod test_utils;
Expand Down
53 changes: 47 additions & 6 deletions skyscraper/bn254-multiplier/src/rne/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,53 @@ use crate::pow_2;
pub const U51_NP0: u64 = 0x1f593efffffff;

/// BN254 scalar field prime
pub const U51_P: [u64; 5] = [
0x1f593f0000001,
0x10f372e12287c,
0x6056174a0cfa1,
0x014dc2822db40,
0x30644e72e131a,
pub const U51_P: [u64; 5] = U51_P_MULTIPLES[1];

/// Lookup table: `U51_P_MULTIPLES[k]` = `k * P` for k in 0..=5, in 51-bit
/// limbs.
pub const U51_P_MULTIPLES: [[u64; 5]; 6] = [
[
0x0000000000000,
0x0000000000000,
0x0000000000000,
0x0000000000000,
0x0000000000000,
], // 0P
[
0x1f593f0000001,
0x10f372e12287c,
0x6056174a0cfa1,
0x014dc2822db40,
0x30644e72e131a,
], // 1P
[
0x3eb27e0000002,
0x21e6e5c2450f8,
0x40ac2e9419f42,
0x029b85045b681,
0x60c89ce5c2634,
], // 2P
[
0x5e0bbd0000003,
0x32da58a367974,
0x210245de26ee3,
0x03e94786891c2,
0x112ceb58a394e,
], // 3P
[
0x7d64fc0000004,
0x43cdcb848a1f0,
0x01585d2833e84,
0x05370a08b6d03,
0x419139cb84c68,
], // 4P
[
0x1cbe3b0000005,
0x54c13e65aca6d,
0x61ae747240e25,
0x0684cc8ae4843,
0x71f5883e65f82,
], // 5P
];

/// Bit mask for 51-bit limbs.
Expand Down
135 changes: 134 additions & 1 deletion skyscraper/bn254-multiplier/src/utils.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::constants::U64_2P;
use crate::constants::{self, U64_2P};

/// Macro to extract a subarray from an array.
///
Expand Down Expand Up @@ -97,3 +97,136 @@ pub const fn carrying_mul_add(a: u64, b: u64, add: u64, carry: u64) -> (u64, u64
let c: u128 = widening_mul(a, b) + carry as u128 + add as u128;
(c as u64, (c >> 64) as u64)
}

/// Precomputed magic constants for fast approximate floor division by the
/// BN254 prime P, following Warren's "Hacker's Delight" (integer division by
/// constants). Guarantees `div_p(val) ≤ ⌊val / P⌋` for all inputs within the
/// declared bit range.
pub struct MulShift {
mul: u64,
shift: u32,
}

impl MulShift {
/// Computes magic multiplication and shift constants for approximate floor
/// division by P, such that `div_p(val) ≤ ⌊val / P⌋` for all `val` within
/// `max_bit_size` bits. `precision` controls how many of the top bits of
/// `val` are used — higher precision yields a tighter approximation.
pub const fn new(max_bit_size: u32, precision: u32) -> Self {
// Generate magic numbers for division by bn254's prime for a range of top-bit
// widths.

// Based on Warren's "Hacker's Delight" (integer
// division by constants) to find a magic multiplier for each bit-width.

use crate::constants::U64_P;

let d = (U64_P[3] >> (max_bit_size - 192 - precision)) + 1; // d = divisor = ceil(p / 2^(max_bit_size-w))
let nc = 2_u64.pow(precision) - 1 - (2_u64.pow(precision) % d); // nc = largest value s.t. nc mod d == d-1
let mut s = precision; // start at precision; s < precision values are skipped
let m;
loop {
// s = shift exponent
if 2_u64.pow(s) > nc * (d - 1 - (2_u64.pow(s) - 1) % d) {
m = (2_u64.pow(s) + d - 1 - (2_u64.pow(s) - 1) % d) / d; // m = magic multiplier
break;
}
s += 1;
assert!(s < 64, "no magic multiplier found");
}

MulShift { mul: m, shift: s }
}

#[inline(always)]
/// Returns an under-approximation of ⌊val / P⌋ (result ≤ true quotient).
/// `val` must fit within the `max_bit_size` passed to [`MulShift::new`].
pub const fn div_p(&self, val: u64) -> u64 {
// assumes systems can handle multiplication by 64 bits without performance
// penalty.
(val * self.mul) >> self.shift
}
}

/// Approximate floor division by P using the upper 6 bits of `x`.
/// `x` must be the upper limb of a u256 in 64-bit radix.
///
/// Returns a value ≤ ⌊x / P⌋. This is the most precise
/// approximation achievable without multiplication on ARM64 and x86.
///
/// Tradeoff: due the limited range of this division \[0,4\] (instead of \[0,5\]
/// for u256) will lead to a larger value after subtraction reduction.
/// subtraction reduction output: [0, 1+ε] with ε < 0.3.
#[inline(always)]
pub fn div_p_6b(x: u64) -> u64 {
let upper_bits = x >> (64 - 6);
// const to force compile time evaluation
const MULSHIFT: MulShift = MulShift::new(256, 6);
MULSHIFT.div_p(upper_bits)
}

/// Approximate floor division by P using the upper 32 bits of `x`.
/// `x` must be the upper limb of a u256 in 64-bit radix.
///
/// Returns a value ≤ ⌊x / P⌋. This is the most precise
/// approximation achievable with a 32bx32b->64b multiplier.
#[inline(always)]
pub fn div_p_32b(x: u64) -> u64 {
let upper_bits = x >> (64 - 32);
// const to force compile time evaluation
const MULSHIFT: MulShift = MulShift::new(256, 32);
MULSHIFT.div_p(upper_bits)
}

/// Subtracts an approximate multiple of P from `x` using `div_p` on the high
/// limb.
///
/// The result is not fully reduced; the output range depends on the precision
/// of the supplied `div_p` — see [`div_p_6b`] and [`div_p_32b`].
#[inline(always)]
pub fn subtraction_reduce<F: Fn(u64) -> u64>(div_p: F, x: [u64; 4]) -> [u64; 4] {
// No clamping as the max value of x can't go past 5. Which is the maximum of
// the table.
let q = div_p(x[3]) as usize;
sub(x, constants::U64_P_MULTIPLES[q])
}

#[cfg(kani)]
mod proofs {
use {
super::{constants::U64_2P, div_p_32b, div_p_6b},
crate::constants::U64_P_MULTIPLES,
};

/// Lexicographic ≤ on little-endian 256-bit integers.
fn le256(a: [u64; 4], b: [u64; 4]) -> bool {
for i in (0..4).rev() {
if a[i] != b[i] {
return a[i] < b[i];
}
}
true
}

/// TODO: tighter bounds
#[kani::proof]
fn div_p_32b_underapprox() {
let x: u64 = kani::any();
let q = div_p_32b(x);

let r = U64_P_MULTIPLES[q as usize][3];
assert!(x >= r);
assert!(le256([0, 0, 0, x - r], U64_2P));
}

#[kani::proof]
// TODO tighter bounds
fn div_p_6b_underapprox() {
let x: u64 = kani::any();
let q = div_p_6b(x);

let r = U64_P_MULTIPLES[q as usize][3];
assert!(x >= r);
assert!(le256([0, 0, 0, x - r], U64_2P));
}
}
Loading
Loading