Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
build*/*
node_modules/*
dist/*
37 changes: 30 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,19 @@ if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv
endif()
endif()

# WASM SIMD
set_source_files_properties(${randomx_sources} COMPILE_FLAGS -msimd128)
# RANDOMX_NO_SIMD option: build without WASM SIMD128 for compatibility with
# instrumentation tools (e.g. Wasabi) that do not support SIMD.
option(RANDOMX_NO_SIMD "Build without WASM SIMD128 instructions" OFF)

if(NOT RANDOMX_NO_SIMD)
# WASM SIMD
set_source_files_properties(${randomx_sources} COMPILE_FLAGS -msimd128)
else()
add_definitions(-DRANDOMX_NO_SIMD)
# Disable post-MVP features unsupported by Wasabi
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mno-bulk-memory -mno-sign-ext -mno-nontrapping-fptoint")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-bulk-memory -mno-sign-ext -mno-nontrapping-fptoint")
endif()

set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "WebRandomX Include path")

Expand Down Expand Up @@ -167,25 +178,37 @@ target_link_libraries(web-randomx
PRIVATE randomx)
set_property(TARGET web-randomx PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET web-randomx PROPERTY CXX_STANDARD 11)
set_target_properties(web-randomx PROPERTIES LINK_FLAGS "-os -s WASM=1 -s MODULARIZE=1 -s WASM_BIGINT -s TOTAL_MEMORY=272MB -msimd128 -s EXPORTED_FUNCTIONS=\"['_free', '_malloc']\"")
if(NOT RANDOMX_NO_SIMD)
set(COMPAT_LINK_FLAGS "-msimd128")
else()
set(COMPAT_LINK_FLAGS "-mno-bulk-memory -mno-sign-ext -mno-nontrapping-fptoint")
endif()

set(COMMON_LINK_FLAGS "-os -s WASM=1 -s MODULARIZE=1 -s WASM_BIGINT -s TOTAL_MEMORY=272MB ${COMPAT_LINK_FLAGS} -s EXPORTED_FUNCTIONS=\"['_free', '_malloc']\" -s EXPORTED_RUNTIME_METHODS=\"['HEAPU8']\"")

set_target_properties(web-randomx PROPERTIES LINK_FLAGS "${COMMON_LINK_FLAGS}")

# Tests
if(TESTS AND TESTS STREQUAL "true")
set_source_files_properties(src/cpp/tests/tests.cpp COMPILE_FLAGS -msimd128)
if(NOT RANDOMX_NO_SIMD)
set_source_files_properties(src/cpp/tests/tests.cpp COMPILE_FLAGS -msimd128)
endif()
add_executable(web-randomx-tests
src/cpp/tests/tests.cpp)
target_link_libraries(web-randomx-tests
PRIVATE randomx)
set_property(TARGET web-randomx-tests PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET web-randomx-tests PROPERTY CXX_STANDARD 11)
set_target_properties(web-randomx-tests PROPERTIES LINK_FLAGS "-g3 -s WASM=1 -s ALLOW_MEMORY_GROWTH=1 -msimd128")
set_target_properties(web-randomx-tests PROPERTIES LINK_FLAGS "-g3 -s WASM=1 -s ALLOW_MEMORY_GROWTH=1 ${COMPAT_LINK_FLAGS}")

set_source_files_properties(src/cpp/tests/benchmark.cpp COMPILE_FLAGS -msimd128)
if(NOT RANDOMX_NO_SIMD)
set_source_files_properties(src/cpp/tests/benchmark.cpp COMPILE_FLAGS -msimd128)
endif()
add_executable(web-randomx-benchmark
src/cpp/tests/benchmark.cpp)
target_link_libraries(web-randomx-benchmark
PRIVATE randomx)
set_property(TARGET web-randomx-benchmark PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET web-randomx-benchmark PROPERTY CXX_STANDARD 11)
set_target_properties(web-randomx-benchmark PROPERTIES LINK_FLAGS "-os -s WASM=1 -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH=1 -msimd128")
set_target_properties(web-randomx-benchmark PROPERTIES LINK_FLAGS "-os -s WASM=1 -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH=1 ${COMPAT_LINK_FLAGS}")
endif()
6 changes: 6 additions & 0 deletions Makefile.noscimd
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
all:
mkdir -p build-noscimd && cd build-noscimd && \
emcmake cmake -DARCH=native -DRANDOMX_NO_SIMD=ON .. && make

clean:
rm -rf build-noscimd
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ emcmake cmake -DARCH=native ..
make
```

To generate `web-randomx-tests` and `web-randomx-benchmark` executables for testing, just set the `TESTS` option to true and run the generated scripts with Node.js:
To generate `web-randomx-tests` and `web-randomx-benchmark` executables for testing, just set the `TESTS` option to true and run the generated scripts with Node.js:

```shell
emcmake cmake -DARCH=native -DTESTS=true ..
Expand Down Expand Up @@ -54,3 +54,5 @@ npm run build
Webpack will generate the files and put them in the WebRandomX/dist folder. They can be deployed with nginx or Apache.

**Note**: The proxy server address should be configured in `src/js/job.js`.

> Looking for a build non-simd compatible See [README_NOSIMD.md](README_NOSIMD.md).
85 changes: 85 additions & 0 deletions README_NOSIMD.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# WebRandomX — PoC no-SIMD build

## Quick start

Prerequisites: `emcc, cmake, make`

```shell
make -f Makefile.noscimd
```

Or manually:

```shell
mkdir build-noscimd && cd build-noscimd
emcmake cmake -DARCH=native -DRANDOMX_NO_SIMD=ON ..
make
```

This produces a WebAssembly binary without SIMD128, bulk-memory, sign-extension, or non-trapping float-to-int instructions. The output is functionally equivalent (bit-identical hashes) to the standard SIMD build.

## Test: SIMD vs no-SIMD comparison

To verify functional equivalence and measure performance:

```shell
./test_simd_comparison.sh [--nonces N] # default: 100
```

The script builds both variants (with tests and benchmarks enabled) and runs:

1. **WASM feature audit** — counts WASM extension instructions via `wasm-objdump`
2. **Functional correctness** — runs the test suite (known hash vectors) on both builds
3. **Hash equivalence** — compares test outputs between SIMD and no-SIMD
4. **Benchmark** — measures ms/hash on both builds and computes the slowdown factor

---

## Why a no-SIMD build?

### The problem: WASM extensions limit portability

Modern WebAssembly toolchains (Emscripten >= 3.x) emit binaries that use **non-baseline WASM extensions** enabled by default:

| Extension | Emscripten flag | Instructions emitted |
| ------------------------- | --------------- | --------------------------------------- |
| SIMD128 | `-msimd128` | `v128.*`, `i32x4.*`, `f64x2.*`, etc. |
| Bulk memory | (default ON) | `memory.copy`, `memory.fill` |
| Sign extension | (default ON) | `i32.extend8_s`, `i64.extend32_s`, etc. |
| Non-trapping float-to-int | (default ON) | `i32.trunc_sat_*`, `i64.trunc_sat_*` |

WebRandomX explicitly uses SIMD128 for its Argon2 memory-hard function (`argon2_simd.c`, `blamka-round-simd.h`) and AES emulation (`intrin_wasm.h`). The other three extensions are injected implicitly by the compiler backend.

These extensions are not supported in several environments:

- **Dynamic analysis frameworks**
- **Lightweight/IoT runtimes**
- **Older browsers and embedded WebView**

This PoC explores whether WebRandomX can be compiled using only baseline WASM instructions, making it portable to the widest possible range of environments.

### Functional equivalence guarantee

The no-SIMD build replaces SIMD intrinsics with **semantically identical scalar operations**. The 128-bit `v128_t` type is replaced by a union of scalar fields (`uint64_t u64[2]`, `uint32_t u32[4]`, `double f64[2]`), and each SIMD intrinsic is replaced by equivalent element-wise operations.

This is verified empirically: given identical inputs, both builds produce **bit-identical hash outputs** across the entire RandomX test vector suite.

---

## Implementation details

### Build system (`CMakeLists.txt`)

A CMake option `RANDOMX_NO_SIMD` (default OFF) controls the build variant:

```cmake
option(RANDOMX_NO_SIMD "Build without WASM SIMD128 instructions" OFF)

if(NOT RANDOMX_NO_SIMD)
set_source_files_properties(${randomx_sources} COMPILE_FLAGS -msimd128)
else()
add_definitions(-DRANDOMX_NO_SIMD)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mno-bulk-memory -mno-sign-ext -mno-nontrapping-fptoint")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-bulk-memory -mno-sign-ext -mno-nontrapping-fptoint")
endif()
```
52 changes: 51 additions & 1 deletion src/cpp/argon2_simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ randomx_argon2_impl* randomx_argon2_impl_simd() {
return &randomx_argon2_fill_segment_simd;
}

#ifndef RANDOMX_NO_SIMD

static void fill_block(v128_t* state, const block* ref_block,
block* next_block, int with_xor) {
v128_t block_XY[ARGON2_OWORDS_IN_BLOCK];
Expand Down Expand Up @@ -54,6 +56,50 @@ static void fill_block(v128_t* state, const block* ref_block,
}
}

#else /* RANDOMX_NO_SIMD — scalar emulation */

#include "rx_vec_i128.h"

static void fill_block(rx_vec_i128* state, const block* ref_block,
block* next_block, int with_xor) {
rx_vec_i128 block_XY[ARGON2_OWORDS_IN_BLOCK];
unsigned int i;

if (with_xor) {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
rx_vec_i128 ref = rx_load_vec_i128((const rx_vec_i128*)ref_block->v + i);
rx_vec_i128 next = rx_load_vec_i128((const rx_vec_i128*)next_block->v + i);
state[i] = rx_xor_vec_i128(state[i], ref);
block_XY[i] = rx_xor_vec_i128(state[i], next);
}
}
else {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
rx_vec_i128 ref = rx_load_vec_i128((const rx_vec_i128*)ref_block->v + i);
block_XY[i] = state[i] = rx_xor_vec_i128(state[i], ref);
}
}

for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
state[8 * i + 6], state[8 * i + 7]);
}

for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
state[8 * 6 + i], state[8 * 7 + i]);
}

for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = rx_xor_vec_i128(state[i], block_XY[i]);
rx_store_vec_i128((rx_vec_i128*)next_block->v + i, state[i]);
}
}

#endif /* RANDOMX_NO_SIMD */


void randomx_argon2_fill_segment_simd(const argon2_instance_t* instance,
argon2_position_t position) {
Expand All @@ -62,7 +108,11 @@ void randomx_argon2_fill_segment_simd(const argon2_instance_t* instance,
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index, i;
#ifndef RANDOMX_NO_SIMD
v128_t state[ARGON2_OWORDS_IN_BLOCK];
#else
rx_vec_i128 state[ARGON2_OWORDS_IN_BLOCK];
#endif

if (instance == NULL) {
return;
Expand Down Expand Up @@ -132,4 +182,4 @@ void randomx_argon2_fill_segment_simd(const argon2_instance_t* instance,
}
}
}
}
}
111 changes: 110 additions & 1 deletion src/cpp/blamka-round-simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "blake2-impl.h"
#include "intrin_wasm_simd.hpp"

#ifndef RANDOMX_NO_SIMD

#define rotr32(x) \
wasm_i32x4_shuffle_imm(x, _WASM_SHUFFLE(2, 3, 0, 1))
#define rotr24(x) \
Expand Down Expand Up @@ -89,6 +91,113 @@ static inline v128_t fBlaMka(v128_t x, v128_t y) {
D1 = wasm_unpackhi_i64x2(D1, wasm_unpacklo_i64x2(t1, t1)); \
} while ((void)0, 0)

#else /* RANDOMX_NO_SIMD — scalar emulation */

static inline uint64_t scalar_rotr64(uint64_t x, unsigned int n) {
return (x >> n) | (x << (64 - n));
}

#define rotr32(x) wasm_i32x4_shuffle_imm(x, _WASM_SHUFFLE(2, 3, 0, 1))

static inline rx_vec_i128 scalar_rotr_vec(rx_vec_i128 x, unsigned int n) {
rx_vec_i128 r;
r.u64[0] = scalar_rotr64(x.u64[0], n);
r.u64[1] = scalar_rotr64(x.u64[1], n);
return r;
}

#define rotr24(x) scalar_rotr_vec(x, 24)
#define rotr16(x) scalar_rotr_vec(x, 16)
#define rotr63(x) scalar_rotr_vec(x, 63)

static inline rx_vec_i128 fBlaMka(rx_vec_i128 x, rx_vec_i128 y) {
rx_vec_i128 z = wasm_u64x2_mulu(x, y);
rx_vec_i128 r;
r.u64[0] = x.u64[0] + y.u64[0] + 2 * z.u64[0];
r.u64[1] = x.u64[1] + y.u64[1] + 2 * z.u64[1];
return r;
}

static inline rx_vec_i128 scalar_xor_vec(rx_vec_i128 a, rx_vec_i128 b) {
rx_vec_i128 r;
r.u64[0] = a.u64[0] ^ b.u64[0];
r.u64[1] = a.u64[1] ^ b.u64[1];
return r;
}

#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = fBlaMka(A0, B0); \
A1 = fBlaMka(A1, B1); \
\
D0 = scalar_xor_vec(D0, A0); \
D1 = scalar_xor_vec(D1, A1); \
\
D0 = rotr32(D0); \
D1 = rotr32(D1); \
\
C0 = fBlaMka(C0, D0); \
C1 = fBlaMka(C1, D1); \
\
B0 = scalar_xor_vec(B0, C0); \
B1 = scalar_xor_vec(B1, C1); \
\
B0 = rotr24(B0); \
B1 = rotr24(B1); \
} while ((void)0, 0)

#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = fBlaMka(A0, B0); \
A1 = fBlaMka(A1, B1); \
\
D0 = scalar_xor_vec(D0, A0); \
D1 = scalar_xor_vec(D1, A1); \
\
D0 = rotr16(D0); \
D1 = rotr16(D1); \
\
C0 = fBlaMka(C0, D0); \
C1 = fBlaMka(C1, D1); \
\
B0 = scalar_xor_vec(B0, C0); \
B1 = scalar_xor_vec(B1, C1); \
\
B0 = rotr63(B0); \
B1 = rotr63(B1); \
} while ((void)0, 0)

#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
rx_vec_i128 t0 = D0; \
rx_vec_i128 t1 = B0; \
\
D0 = C0; \
C0 = C1; \
C1 = D0; \
\
D0 = wasm_unpackhi_i64x2(D1, wasm_unpacklo_i64x2(t0, t0)); \
D1 = wasm_unpackhi_i64x2(t0, wasm_unpacklo_i64x2(D1, D1)); \
B0 = wasm_unpackhi_i64x2(B0, wasm_unpacklo_i64x2(B1, B1)); \
B1 = wasm_unpackhi_i64x2(B1, wasm_unpacklo_i64x2(t1, t1)); \
} while ((void)0, 0)

#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
rx_vec_i128 t0 = C0; \
C0 = C1; \
C1 = t0; \
t0 = B0; \
rx_vec_i128 t1 = D0; \
\
B0 = wasm_unpackhi_i64x2(B1, wasm_unpacklo_i64x2(B0, B0)); \
B1 = wasm_unpackhi_i64x2(t0, wasm_unpacklo_i64x2(B1, B1)); \
D0 = wasm_unpackhi_i64x2(D0, wasm_unpacklo_i64x2(D1, D1)); \
D1 = wasm_unpackhi_i64x2(D1, wasm_unpacklo_i64x2(t1, t1)); \
} while ((void)0, 0)

#endif /* RANDOMX_NO_SIMD */

#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
Expand All @@ -102,4 +211,4 @@ static inline v128_t fBlaMka(v128_t x, v128_t y) {
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)

#endif /* BLAKE_ROUND_MKA_OPT_H */
#endif /* BLAKE_ROUND_MKA_OPT_H */
Loading