diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b052dc8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+build*/*
+node_modules/*
+dist/*
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5d94015..3d7a8fa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -130,8 +130,19 @@ if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv
   endif()
 endif()
 
-# WASM SIMD
-set_source_files_properties(${randomx_sources} COMPILE_FLAGS -msimd128)
+# RANDOMX_NO_SIMD option: build without WASM SIMD128 for compatibility with
+# instrumentation tools (e.g. Wasabi) that do not support SIMD.
+option(RANDOMX_NO_SIMD "Build without WASM SIMD128 instructions" OFF)
+
+if(NOT RANDOMX_NO_SIMD)
+  # WASM SIMD
+  set_source_files_properties(${randomx_sources} COMPILE_FLAGS -msimd128)
+else()
+  add_definitions(-DRANDOMX_NO_SIMD)
+  # Disable post-MVP features unsupported by Wasabi
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mno-bulk-memory -mno-sign-ext -mno-nontrapping-fptoint")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-bulk-memory -mno-sign-ext -mno-nontrapping-fptoint")
+endif()
 
 set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "WebRandomX Include path")
 
@@ -167,25 +178,37 @@ target_link_libraries(web-randomx
   PRIVATE randomx)
 set_property(TARGET web-randomx PROPERTY POSITION_INDEPENDENT_CODE ON)
 set_property(TARGET web-randomx PROPERTY CXX_STANDARD 11)
-set_target_properties(web-randomx PROPERTIES LINK_FLAGS "-os -s WASM=1 -s MODULARIZE=1 -s WASM_BIGINT -s TOTAL_MEMORY=272MB -msimd128 -s EXPORTED_FUNCTIONS=\"['_free', '_malloc']\"")
+if(NOT RANDOMX_NO_SIMD)
+  set(COMPAT_LINK_FLAGS "-msimd128")
+else()
+  set(COMPAT_LINK_FLAGS "-mno-bulk-memory -mno-sign-ext -mno-nontrapping-fptoint")
+endif()
+
+set(COMMON_LINK_FLAGS "-os -s WASM=1 -s MODULARIZE=1 -s WASM_BIGINT -s TOTAL_MEMORY=272MB ${COMPAT_LINK_FLAGS} -s EXPORTED_FUNCTIONS=\"['_free', '_malloc']\" -s EXPORTED_RUNTIME_METHODS=\"['HEAPU8']\"")
+
+set_target_properties(web-randomx PROPERTIES LINK_FLAGS "${COMMON_LINK_FLAGS}")
 
 # Tests
 if(TESTS AND TESTS STREQUAL "true")
-  set_source_files_properties(src/cpp/tests/tests.cpp COMPILE_FLAGS -msimd128)
+  if(NOT RANDOMX_NO_SIMD)
+    set_source_files_properties(src/cpp/tests/tests.cpp COMPILE_FLAGS -msimd128)
+  endif()
   add_executable(web-randomx-tests
     src/cpp/tests/tests.cpp)
   target_link_libraries(web-randomx-tests
     PRIVATE randomx)
   set_property(TARGET web-randomx-tests PROPERTY POSITION_INDEPENDENT_CODE ON)
   set_property(TARGET web-randomx-tests PROPERTY CXX_STANDARD 11)
-  set_target_properties(web-randomx-tests PROPERTIES LINK_FLAGS "-g3 -s WASM=1 -s ALLOW_MEMORY_GROWTH=1 -msimd128")
+  set_target_properties(web-randomx-tests PROPERTIES LINK_FLAGS "-g3 -s WASM=1 -s ALLOW_MEMORY_GROWTH=1 ${COMPAT_LINK_FLAGS}")
 
-  set_source_files_properties(src/cpp/tests/benchmark.cpp COMPILE_FLAGS -msimd128)
+  if(NOT RANDOMX_NO_SIMD)
+    set_source_files_properties(src/cpp/tests/benchmark.cpp COMPILE_FLAGS -msimd128)
+  endif()
   add_executable(web-randomx-benchmark
     src/cpp/tests/benchmark.cpp)
   target_link_libraries(web-randomx-benchmark
     PRIVATE randomx)
   set_property(TARGET web-randomx-benchmark PROPERTY POSITION_INDEPENDENT_CODE ON)
   set_property(TARGET web-randomx-benchmark PROPERTY CXX_STANDARD 11)
-  set_target_properties(web-randomx-benchmark PROPERTIES LINK_FLAGS "-os -s WASM=1 -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH=1 -msimd128")
+  set_target_properties(web-randomx-benchmark PROPERTIES LINK_FLAGS "-os -s WASM=1 -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH=1 ${COMPAT_LINK_FLAGS}")
 endif()
diff --git a/Makefile.noscimd b/Makefile.noscimd
new file mode 100644
index 0000000..4995035
--- /dev/null
+++ b/Makefile.noscimd
@@ -0,0 +1,6 @@
+all:
+	mkdir -p build-noscimd && cd build-noscimd && \
+	emcmake cmake -DARCH=native -DRANDOMX_NO_SIMD=ON .. && make
+
+clean:
+	rm -rf build-noscimd
diff --git a/README.md b/README.md
index 99aa0a4..edbf0c4 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ emcmake cmake -DARCH=native ..
 make
 ```
 
-To generate `web-randomx-tests` and  `web-randomx-benchmark` executables for testing, just set the `TESTS` option to true and run the generated scripts with Node.js:
+To generate `web-randomx-tests` and `web-randomx-benchmark` executables for testing, just set the `TESTS` option to true and run the generated scripts with Node.js:
 
 ```shell
 emcmake cmake -DARCH=native -DTESTS=true ..
@@ -54,3 +54,5 @@ npm run build
 Webpack will generate the files and put them in the WebRandomX/dist folder. They can be deployed with nginx or Apache.
 
 **Note**: The proxy server address should be configured in `src/js/job.js`.
+
+> Looking for a build non-simd compatible See [README_NOSIMD.md](README_NOSIMD.md).
diff --git a/README_NOSIMD.md b/README_NOSIMD.md
new file mode 100644
index 0000000..88d9dc5
--- /dev/null
+++ b/README_NOSIMD.md
@@ -0,0 +1,85 @@
+# WebRandomX — PoC no-SIMD build
+
+## Quick start
+
+Prerequisites: `emcc, cmake, make`
+
+```shell
+make -f Makefile.noscimd
+```
+
+Or manually:
+
+```shell
+mkdir build-noscimd && cd build-noscimd
+emcmake cmake -DARCH=native -DRANDOMX_NO_SIMD=ON ..
+make
+```
+
+This produces a WebAssembly binary without SIMD128, bulk-memory, sign-extension, or non-trapping float-to-int instructions. The output is functionally equivalent (bit-identical hashes) to the standard SIMD build.
+
+## Test: SIMD vs no-SIMD comparison
+
+To verify functional equivalence and measure performance:
+
+```shell
+./test_simd_comparison.sh [--nonces N]   # default: 100
+```
+
+The script builds both variants (with tests and benchmarks enabled) and runs:
+
+1. **WASM feature audit** — counts WASM extension instructions via `wasm-objdump`
+2. **Functional correctness** — runs the test suite (known hash vectors) on both builds
+3. **Hash equivalence** — compares test outputs between SIMD and no-SIMD
+4. **Benchmark** — measures ms/hash on both builds and computes the slowdown factor
+
+---
+
+## Why a no-SIMD build?
+
+### The problem: WASM extensions limit portability
+
+Modern WebAssembly toolchains (Emscripten >= 3.x) emit binaries that use **non-baseline WASM extensions** enabled by default:
+
+| Extension                 | Emscripten flag | Instructions emitted                    |
+| ------------------------- | --------------- | --------------------------------------- |
+| SIMD128                   | `-msimd128`     | `v128.*`, `i32x4.*`, `f64x2.*`, etc.    |
+| Bulk memory               | (default ON)    | `memory.copy`, `memory.fill`            |
+| Sign extension            | (default ON)    | `i32.extend8_s`, `i64.extend32_s`, etc. |
+| Non-trapping float-to-int | (default ON)    | `i32.trunc_sat_*`, `i64.trunc_sat_*`    |
+
+WebRandomX explicitly uses SIMD128 for its Argon2 memory-hard function (`argon2_simd.c`, `blamka-round-simd.h`) and AES emulation (`intrin_wasm.h`). The other three extensions are injected implicitly by the compiler backend.
+
+These extensions are not supported in several environments:
+
+- **Dynamic analysis frameworks**
+- **Lightweight/IoT runtimes**
+- **Older browsers and embedded WebView**
+
+This PoC explores whether WebRandomX can be compiled using only baseline WASM instructions, making it portable to the widest possible range of environments.
+
+### Functional equivalence guarantee
+
+The no-SIMD build replaces SIMD intrinsics with **semantically identical scalar operations**. The 128-bit `v128_t` type is replaced by a union of scalar fields (`uint64_t u64[2]`, `uint32_t u32[4]`, `double f64[2]`), and each SIMD intrinsic is replaced by equivalent element-wise operations.
+
+This is verified empirically: given identical inputs, both builds produce **bit-identical hash outputs** across the entire RandomX test vector suite.
+
+---
+
+## Implementation details
+
+### Build system (`CMakeLists.txt`)
+
+A CMake option `RANDOMX_NO_SIMD` (default OFF) controls the build variant:
+
+```cmake
+option(RANDOMX_NO_SIMD "Build without WASM SIMD128 instructions" OFF)
+
+if(NOT RANDOMX_NO_SIMD)
+  set_source_files_properties(${randomx_sources} COMPILE_FLAGS -msimd128)
+else()
+  add_definitions(-DRANDOMX_NO_SIMD)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mno-bulk-memory -mno-sign-ext -mno-nontrapping-fptoint")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-bulk-memory -mno-sign-ext -mno-nontrapping-fptoint")
+endif()
+```
diff --git a/src/cpp/argon2_simd.c b/src/cpp/argon2_simd.c
index d708d2c..d45db76 100644
--- a/src/cpp/argon2_simd.c
+++ b/src/cpp/argon2_simd.c
@@ -16,6 +16,8 @@ randomx_argon2_impl* randomx_argon2_impl_simd() {
 	return &randomx_argon2_fill_segment_simd;
 }
 
+#ifndef RANDOMX_NO_SIMD
+
 static void fill_block(v128_t* state, const block* ref_block,
 	block* next_block, int with_xor) {
 	v128_t block_XY[ARGON2_OWORDS_IN_BLOCK];
@@ -54,6 +56,50 @@ static void fill_block(v128_t* state, const block* ref_block,
 	}
 }
 
+#else /* RANDOMX_NO_SIMD — scalar emulation */
+
+#include "rx_vec_i128.h"
+
+static void fill_block(rx_vec_i128* state, const block* ref_block,
+	block* next_block, int with_xor) {
+	rx_vec_i128 block_XY[ARGON2_OWORDS_IN_BLOCK];
+	unsigned int i;
+
+	if (with_xor) {
+		for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+			rx_vec_i128 ref = rx_load_vec_i128((const rx_vec_i128*)ref_block->v + i);
+			rx_vec_i128 next = rx_load_vec_i128((const rx_vec_i128*)next_block->v + i);
+			state[i] = rx_xor_vec_i128(state[i], ref);
+			block_XY[i] = rx_xor_vec_i128(state[i], next);
+		}
+	}
+	else {
+		for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+			rx_vec_i128 ref = rx_load_vec_i128((const rx_vec_i128*)ref_block->v + i);
+			block_XY[i] = state[i] = rx_xor_vec_i128(state[i], ref);
+		}
+	}
+
+	for (i = 0; i < 8; ++i) {
+		BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
+			state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
+			state[8 * i + 6], state[8 * i + 7]);
+	}
+
+	for (i = 0; i < 8; ++i) {
+		BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
+			state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
+			state[8 * 6 + i], state[8 * 7 + i]);
+	}
+
+	for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+		state[i] = rx_xor_vec_i128(state[i], block_XY[i]);
+		rx_store_vec_i128((rx_vec_i128*)next_block->v + i, state[i]);
+	}
+}
+
+#endif /* RANDOMX_NO_SIMD */
+
 
 void randomx_argon2_fill_segment_simd(const argon2_instance_t* instance,
 	argon2_position_t position) {
@@ -62,7 +108,11 @@ void randomx_argon2_fill_segment_simd(const argon2_instance_t* instance,
 	uint64_t pseudo_rand, ref_index, ref_lane;
 	uint32_t prev_offset, curr_offset;
 	uint32_t starting_index, i;
+#ifndef RANDOMX_NO_SIMD
 	v128_t state[ARGON2_OWORDS_IN_BLOCK];
+#else
+	rx_vec_i128 state[ARGON2_OWORDS_IN_BLOCK];
+#endif
 
 	if (instance == NULL) {
 		return;
@@ -132,4 +182,4 @@ void randomx_argon2_fill_segment_simd(const argon2_instance_t* instance,
 			}
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/src/cpp/blamka-round-simd.h b/src/cpp/blamka-round-simd.h
index ad2c247..b276af4 100644
--- a/src/cpp/blamka-round-simd.h
+++ b/src/cpp/blamka-round-simd.h
@@ -4,6 +4,8 @@
 #include "blake2-impl.h"
 #include "intrin_wasm_simd.hpp"
 
+#ifndef RANDOMX_NO_SIMD
+
 #define rotr32(x)                                                              \
     wasm_i32x4_shuffle_imm(x, _WASM_SHUFFLE(2, 3, 0, 1))
 #define rotr24(x)                                                              \
@@ -89,6 +91,113 @@ static inline v128_t fBlaMka(v128_t x, v128_t y) {
         D1 = wasm_unpackhi_i64x2(D1, wasm_unpacklo_i64x2(t1, t1));             \
     } while ((void)0, 0)
 
+#else /* RANDOMX_NO_SIMD — scalar emulation */
+
+static inline uint64_t scalar_rotr64(uint64_t x, unsigned int n) {
+    return (x >> n) | (x << (64 - n));
+}
+
+#define rotr32(x) wasm_i32x4_shuffle_imm(x, _WASM_SHUFFLE(2, 3, 0, 1))
+
+static inline rx_vec_i128 scalar_rotr_vec(rx_vec_i128 x, unsigned int n) {
+    rx_vec_i128 r;
+    r.u64[0] = scalar_rotr64(x.u64[0], n);
+    r.u64[1] = scalar_rotr64(x.u64[1], n);
+    return r;
+}
+
+#define rotr24(x) scalar_rotr_vec(x, 24)
+#define rotr16(x) scalar_rotr_vec(x, 16)
+#define rotr63(x) scalar_rotr_vec(x, 63)
+
+static inline rx_vec_i128 fBlaMka(rx_vec_i128 x, rx_vec_i128 y) {
+    rx_vec_i128 z = wasm_u64x2_mulu(x, y);
+    rx_vec_i128 r;
+    r.u64[0] = x.u64[0] + y.u64[0] + 2 * z.u64[0];
+    r.u64[1] = x.u64[1] + y.u64[1] + 2 * z.u64[1];
+    return r;
+}
+
+static inline rx_vec_i128 scalar_xor_vec(rx_vec_i128 a, rx_vec_i128 b) {
+    rx_vec_i128 r;
+    r.u64[0] = a.u64[0] ^ b.u64[0];
+    r.u64[1] = a.u64[1] ^ b.u64[1];
+    return r;
+}
+
+#define G1(A0, B0, C0, D0, A1, B1, C1, D1)                                     \
+    do {                                                                       \
+        A0 = fBlaMka(A0, B0);                                                  \
+        A1 = fBlaMka(A1, B1);                                                  \
+                                                                               \
+        D0 = scalar_xor_vec(D0, A0);                                           \
+        D1 = scalar_xor_vec(D1, A1);                                           \
+                                                                               \
+        D0 = rotr32(D0);                                                       \
+        D1 = rotr32(D1);                                                       \
+                                                                               \
+        C0 = fBlaMka(C0, D0);                                                  \
+        C1 = fBlaMka(C1, D1);                                                  \
+                                                                               \
+        B0 = scalar_xor_vec(B0, C0);                                           \
+        B1 = scalar_xor_vec(B1, C1);                                           \
+                                                                               \
+        B0 = rotr24(B0);                                                       \
+        B1 = rotr24(B1);                                                       \
+    } while ((void)0, 0)
+
+#define G2(A0, B0, C0, D0, A1, B1, C1, D1)                                     \
+    do {                                                                       \
+        A0 = fBlaMka(A0, B0);                                                  \
+        A1 = fBlaMka(A1, B1);                                                  \
+                                                                               \
+        D0 = scalar_xor_vec(D0, A0);                                           \
+        D1 = scalar_xor_vec(D1, A1);                                           \
+                                                                               \
+        D0 = rotr16(D0);                                                       \
+        D1 = rotr16(D1);                                                       \
+                                                                               \
+        C0 = fBlaMka(C0, D0);                                                  \
+        C1 = fBlaMka(C1, D1);                                                  \
+                                                                               \
+        B0 = scalar_xor_vec(B0, C0);                                           \
+        B1 = scalar_xor_vec(B1, C1);                                           \
+                                                                               \
+        B0 = rotr63(B0);                                                       \
+        B1 = rotr63(B1);                                                       \
+    } while ((void)0, 0)
+
+#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1)                            \
+    do {                                                                       \
+        rx_vec_i128 t0 = D0;                                                   \
+        rx_vec_i128 t1 = B0;                                                   \
+                                                                               \
+        D0 = C0;                                                               \
+        C0 = C1;                                                               \
+        C1 = D0;                                                               \
+                                                                               \
+        D0 = wasm_unpackhi_i64x2(D1, wasm_unpacklo_i64x2(t0, t0));             \
+        D1 = wasm_unpackhi_i64x2(t0, wasm_unpacklo_i64x2(D1, D1));             \
+        B0 = wasm_unpackhi_i64x2(B0, wasm_unpacklo_i64x2(B1, B1));             \
+        B1 = wasm_unpackhi_i64x2(B1, wasm_unpacklo_i64x2(t1, t1));             \
+    } while ((void)0, 0)
+
+#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1)                          \
+    do {                                                                       \
+        rx_vec_i128 t0 = C0;                                                   \
+        C0 = C1;                                                               \
+        C1 = t0;                                                               \
+        t0 = B0;                                                               \
+        rx_vec_i128 t1 = D0;                                                   \
+                                                                               \
+        B0 = wasm_unpackhi_i64x2(B1, wasm_unpacklo_i64x2(B0, B0));             \
+        B1 = wasm_unpackhi_i64x2(t0, wasm_unpacklo_i64x2(B1, B1));             \
+        D0 = wasm_unpackhi_i64x2(D0, wasm_unpacklo_i64x2(D1, D1));             \
+        D1 = wasm_unpackhi_i64x2(D1, wasm_unpacklo_i64x2(t1, t1));             \
+    } while ((void)0, 0)
+
+#endif /* RANDOMX_NO_SIMD */
+
 #define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1)                           \
     do {                                                                       \
         G1(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
@@ -102,4 +211,4 @@ static inline v128_t fBlaMka(v128_t x, v128_t y) {
         UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1);                         \
     } while ((void)0, 0)
 
-#endif /* BLAKE_ROUND_MKA_OPT_H */
\ No newline at end of file
+#endif /* BLAKE_ROUND_MKA_OPT_H */
diff --git a/src/cpp/intrin_wasm.h b/src/cpp/intrin_wasm.h
index a777af3..50c9ced 100644
--- a/src/cpp/intrin_wasm.h
+++ b/src/cpp/intrin_wasm.h
@@ -4,7 +4,7 @@
 #include <cstdint>
 #include <cstdlib>
 #include <stdexcept>
-#include <wasm_simd128.h>
+#include <cstring>
 
 #include "endian.h"
 #include "softfloat.hpp"
@@ -34,6 +34,10 @@ constexpr int RoundToZero = 3;
 
 #define rx_sqrt sqrt
 
+#ifndef RANDOMX_NO_SIMD
+
+#include <wasm_simd128.h>
+
 typedef v128_t rx_vec_i128;
 typedef v128_t rx_vec_f128;
 
@@ -139,6 +143,179 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
   return wasm_f64x2_make(lo, hi);
 }
 
+#else /* RANDOMX_NO_SIMD — scalar emulation */
+
+#include "rx_vec_i128.h"
+
+#define rx_aligned_alloc(a, b) aligned_alloc(b, a)
+#define rx_aligned_free(a) free(a)
+#define rx_prefetch_nta(x)
+#define rx_prefetch_t0(x)
+
+FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const void* addr) {
+  rx_vec_f128 r;
+  memcpy(&r, addr, 16);
+  return r;
+}
+
+FORCE_INLINE void rx_store_vec_f128(void* addr, rx_vec_f128 a) {
+  memcpy(addr, &a, 16);
+}
+
+FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
+  rx_vec_f128 r;
+  r.u64[0] = x0;
+  r.u64[1] = x1;
+  return r;
+}
+
+FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
+  rx_vec_f128 r;
+  r.u64[0] = x;
+  r.u64[1] = x;
+  return r;
+}
+
+FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
+  rx_vec_f128 r;
+  r.u64[0] = a.u64[1];
+  r.u64[1] = a.u64[0];
+  return r;
+}
+
+FORCE_INLINE double rx_vec_f128_lo(rx_vec_f128 a) {
+  return a.f64[0];
+}
+
+FORCE_INLINE double rx_vec_f128_hi(rx_vec_f128 a) {
+  return a.f64[1];
+}
+
+FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+  rx_vec_f128 r;
+  if (globalRoundingMode == round_near_even) {
+    r.f64[0] = a.f64[0] + b.f64[0];
+    r.f64[1] = a.f64[1] + b.f64[1];
+    return r;
+  }
+  softdouble rlo = softdouble(a.f64[0]) + softdouble(b.f64[0]);
+  softdouble rhi = softdouble(a.f64[1]) + softdouble(b.f64[1]);
+  r.f64[0] = double(rlo);
+  r.f64[1] = double(rhi);
+  return r;
+}
+
+FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+  rx_vec_f128 r;
+  if (globalRoundingMode == round_near_even) {
+    r.f64[0] = a.f64[0] - b.f64[0];
+    r.f64[1] = a.f64[1] - b.f64[1];
+    return r;
+  }
+  softdouble rlo = softdouble(a.f64[0]) - softdouble(b.f64[0]);
+  softdouble rhi = softdouble(a.f64[1]) - softdouble(b.f64[1]);
+  r.f64[0] = double(rlo);
+  r.f64[1] = double(rhi);
+  return r;
+}
+
+FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+  rx_vec_f128 r;
+  if (globalRoundingMode == round_near_even) {
+    r.f64[0] = a.f64[0] * b.f64[0];
+    r.f64[1] = a.f64[1] * b.f64[1];
+    return r;
+  }
+  softdouble rlo = softdouble(a.f64[0]) * softdouble(b.f64[0]);
+  softdouble rhi = softdouble(a.f64[1]) * softdouble(b.f64[1]);
+  r.f64[0] = double(rlo);
+  r.f64[1] = double(rhi);
+  return r;
+}
+
+FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+  rx_vec_f128 r;
+  if (globalRoundingMode == round_near_even) {
+    r.f64[0] = a.f64[0] / b.f64[0];
+    r.f64[1] = a.f64[1] / b.f64[1];
+    return r;
+  }
+  softdouble rlo = softdouble(a.f64[0]) / softdouble(b.f64[0]);
+  softdouble rhi = softdouble(a.f64[1]) / softdouble(b.f64[1]);
+  r.f64[0] = double(rlo);
+  r.f64[1] = double(rhi);
+  return r;
+}
+
+FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) {
+  rx_vec_f128 r;
+  if (globalRoundingMode == round_near_even) {
+    r.f64[0] = sqrt(a.f64[0]);
+    r.f64[1] = sqrt(a.f64[1]);
+    return r;
+  }
+  softdouble rlo = sqrt(softdouble(a.f64[0]));
+  softdouble rhi = sqrt(softdouble(a.f64[1]));
+  r.f64[0] = double(rlo);
+  r.f64[1] = double(rhi);
+  return r;
+}
+
+FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+  rx_vec_f128 r;
+  r.u64[0] = a.u64[0] ^ b.u64[0];
+  r.u64[1] = a.u64[1] ^ b.u64[1];
+  return r;
+}
+
+FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+  rx_vec_f128 r;
+  r.u64[0] = a.u64[0] & b.u64[0];
+  r.u64[1] = a.u64[1] & b.u64[1];
+  return r;
+}
+
+FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+  rx_vec_f128 r;
+  r.u64[0] = a.u64[0] | b.u64[0];
+  r.u64[1] = a.u64[1] | b.u64[1];
+  return r;
+}
+
+FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
+  return a.i32[0];
+}
+
+FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
+  return a.i32[1];
+}
+
+FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
+  return a.i32[2];
+}
+
+FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
+  return a.i32[3];
+}
+
+FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
+  rx_vec_i128 r;
+  r.i32[0] = _I0;
+  r.i32[1] = _I1;
+  r.i32[2] = _I2;
+  r.i32[3] = _I3;
+  return r;
+}
+
+FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
+  rx_vec_f128 r;
+  r.f64[0] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
+  r.f64[1] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
+  return r;
+}
+
+#endif /* RANDOMX_NO_SIMD */
+
 #define RANDOMX_DEFAULT_FENV
 
 #ifdef RANDOMX_DEFAULT_FENV
diff --git a/src/cpp/intrin_wasm_simd.hpp b/src/cpp/intrin_wasm_simd.hpp
index 6ad3adb..c054adc 100644
--- a/src/cpp/intrin_wasm_simd.hpp
+++ b/src/cpp/intrin_wasm_simd.hpp
@@ -1,5 +1,7 @@
 #pragma once
 
+#ifndef RANDOMX_NO_SIMD
+
 #include <wasm_simd128.h>
 
 // shuffle
@@ -54,75 +56,51 @@ static inline v128_t wasm_unpackhi_i64x2(v128_t a, v128_t b) {
                             28, 29, 30, 31);
 }
 
-// convert
-// inline v128_t v128_cvtu8x16_i16x8(const v128_t a) {
-//   const v128_t z = wasm_i8x16_splat(0);
-//   return wasm_unpacklo_i8x16(a, z);
-// }
-
-// inline v128_t v128_cvti8x16_i16x8(const v128_t a) {
-//   return wasm_i16x8_shr(wasm_unpacklo_i8x16(a, a), 8);
-// }
-
-// inline v128_t v128_cvtu8x16_i32x4(const v128_t a) {
-//   const v128_t z = wasm_i8x16_splat(0);
-//   return wasm_unpacklo_i16x8(wasm_unpacklo_i8x16(a, z), z);
-// }
-
-// inline v128_t v128_cvti8x16_i32x4(const v128_t a) {
-//   v128_t r = wasm_unpacklo_i8x16(a, a);
-//   r = wasm_unpacklo_i8x16(r, r);
-//   return wasm_i32x4_shr(r, 24);
-// }
-
-// inline v128_t v128_cvtu16x8_i32x4(const v128_t a) {
-//   const v128_t z = wasm_i8x16_splat(0);
-//   return wasm_unpacklo_i16x8(a, z);
-// }
-
-// inline v128_t v128_cvti16x8_i32x4(const v128_t a) {
-//   return wasm_i32x4_shr(wasm_unpacklo_i16x8(a, a), 16);
-// }
-
-// inline v128_t v128_cvtu32x4_i64x2(const v128_t a) {
-//   const v128_t z = wasm_i8x16_splat(0);
-//   return wasm_unpacklo_i32x4(a, z);
-// }
-
-// inline v128_t v128_cvti32x4_i64x2(const v128_t a) {
-//   return wasm_unpacklo_i32x4(a, wasm_i32x4_shr(a, 31));
-// }
-
-// inline v128_t v128_cvtu8x16_i16x8_high(const v128_t a) {
-//   const v128_t z = wasm_i8x16_splat(0);
-//   return wasm_unpackhi_i8x16(a, z);
-// }
-
-// inline v128_t v128_cvti8x16_i16x8_high(const v128_t a) {
-//   return wasm_i16x8_shr(wasm_unpackhi_i8x16(a, a), 8);
-// }
-
-// inline v128_t v128_cvtu16x8_i32x4_high(const v128_t a) {
-//   const v128_t z = wasm_i8x16_splat(0);
-//   return wasm_unpackhi_i16x8(a, z);
-// }
-
-// inline v128_t v128_cvti16x8_i32x4_high(const v128_t a) {
-//   return wasm_i32x4_shr(wasm_unpackhi_i16x8(a, a), 16);
-// }
-
-// inline v128_t v128_cvtu32x4_i64x2_high(const v128_t a) {
-//   const v128_t z = wasm_i8x16_splat(0);
-//   return wasm_unpackhi_i32x4(a, z);
-// }
-
-// inline v128_t v128_cvti32x4_i64x2_high(const v128_t a) {
-//   return wasm_unpackhi_i32x4(a, wasm_i32x4_shr(a, 31));
-// }
-
 // arithmetic
 static inline v128_t wasm_u64x2_mulu(const v128_t a, const v128_t b) {
   return wasm_u64x2_extmul_low_u32x4(
       wasm_v32x4_shuffle(a, a, 0, 2, 0, 2),
       wasm_v32x4_shuffle(b, b, 0, 2, 0, 2));
-}
\ No newline at end of file
+}
+
+#else /* RANDOMX_NO_SIMD — scalar emulation */
+
+#include "rx_vec_i128.h"
+
+#define _WASM_SHUFFLE(fp3, fp2, fp1, fp0) (((fp3) << 6) | ((fp2) << 4) | \
+                                        ((fp1) << 2) | ((fp0)))
+
+static inline rx_vec_i128 wasm_i32x4_shuffle_imm_scalar(rx_vec_i128 a, int imm) {
+  rx_vec_i128 r;
+  r.u32[0] = a.u32[(imm >> 0) & 3];
+  r.u32[1] = a.u32[(imm >> 2) & 3];
+  r.u32[2] = a.u32[(imm >> 4) & 3];
+  r.u32[3] = a.u32[(imm >> 6) & 3];
+  return r;
+}
+
+#define wasm_i32x4_shuffle_imm(__a, __imm) \
+    wasm_i32x4_shuffle_imm_scalar((__a), (__imm))
+
+static inline rx_vec_i128 wasm_unpacklo_i64x2(rx_vec_i128 a, rx_vec_i128 b) {
+  rx_vec_i128 r;
+  r.u64[0] = a.u64[0];
+  r.u64[1] = b.u64[0];
+  return r;
+}
+
+static inline rx_vec_i128 wasm_unpackhi_i64x2(rx_vec_i128 a, rx_vec_i128 b) {
+  rx_vec_i128 r;
+  r.u64[0] = a.u64[1];
+  r.u64[1] = b.u64[1];
+  return r;
+}
+
+static inline rx_vec_i128 wasm_u64x2_mulu(rx_vec_i128 a, rx_vec_i128 b) {
+  rx_vec_i128 r;
+  r.u64[0] = (uint64_t)(uint32_t)a.u64[0] * (uint64_t)(uint32_t)b.u64[0];
+  r.u64[1] = (uint64_t)(uint32_t)a.u64[1] * (uint64_t)(uint32_t)b.u64[1];
+  return r;
+}
+
+#endif /* RANDOMX_NO_SIMD */
diff --git a/src/cpp/rx_vec_i128.h b/src/cpp/rx_vec_i128.h
new file mode 100644
index 0000000..234c839
--- /dev/null
+++ b/src/cpp/rx_vec_i128.h
@@ -0,0 +1,34 @@
+#ifndef RX_VEC_I128_H
+#define RX_VEC_I128_H
+
+#include <stdint.h>
+#include <string.h>
+
+typedef union {
+    uint8_t  u8[16];
+    uint32_t u32[4];
+    uint64_t u64[2];
+    int32_t  i32[4];
+    double   f64[2];
+} rx_vec_i128;
+
+typedef rx_vec_i128 rx_vec_f128;
+
+static inline rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 a, rx_vec_i128 b) {
+    rx_vec_i128 r;
+    r.u64[0] = a.u64[0] ^ b.u64[0];
+    r.u64[1] = a.u64[1] ^ b.u64[1];
+    return r;
+}
+
+static inline rx_vec_i128 rx_load_vec_i128(const void* addr) {
+    rx_vec_i128 r;
+    memcpy(&r, addr, 16);
+    return r;
+}
+
+static inline void rx_store_vec_i128(void* addr, rx_vec_i128 a) {
+    memcpy(addr, &a, 16);
+}
+
+#endif /* RX_VEC_I128_H */
diff --git a/src/js/miner.js b/src/js/miner.js
index d227ceb..26e27c1 100644
--- a/src/js/miner.js
+++ b/src/js/miner.js
@@ -2,7 +2,7 @@ import Job from './job'
 import MineWorker from './mine-worker'
 
 class Miner {
-  constructor (user, options) {
+  constructor(user, options) {
     options = options || {}
     this._user = user
     this._threads = []
@@ -33,7 +33,7 @@ class Miner {
           this._tab.lastPingReceived = Date.now()
         }
       }.bind(this)
-    } catch (error) {}
+    } catch (error) { }
     this._eventListeners = {
       open: [],
       authed: [],
@@ -48,7 +48,7 @@ class Miner {
     this._onTargetMetBound = this._onTargetMet.bind(this)
   }
 
-  start (mode) {
+  start(mode) {
     this._tab.mode = mode || Job.IF_EXCLUSIVE_TAB
     if (this._tab.interval) {
       clearInterval(this._tab.interval)
@@ -57,7 +57,7 @@ class Miner {
     this._startNow()
   }
 
-  stop (message) {
+  stop(message) {
     for (let i = 0; i < this._threads.length; ++i) {
       this._totalHashesFromDeadThreads += this._threads[i].hashesTotal
       this._threads[i].stop()
@@ -78,16 +78,16 @@ class Miner {
     }
   }
 
-  getHashesPerSecond () {
+  getHashesPerSecond() {
     let sum = 0
     for (let i = 0; i < this._threads.length; ++i)
       sum += this._threads[i].hashesPerSecond
     return sum
   }
 
-  getTotalHashes () {
+  getTotalHashes() {
     let currentTimestamp = Date.now(),
-        sum = this._totalHashesFromDeadThreads
+      sum = this._totalHashesFromDeadThreads
     for (let i = 0; i < this._threads.length; ++i) {
       let thread = this._threads[i]
       sum += thread.hashesTotal
@@ -95,21 +95,21 @@ class Miner {
     return 0 | sum
   }
 
-  getAcceptedHashes () {
+  getAcceptedHashes() {
     return this._hashes
   }
 
-  on (event, callback) {
+  on(event, callback) {
     if (this._eventListeners[event]) {
       this._eventListeners[event].push(callback)
     }
   }
 
-  getAutoThreadsEnabled () {
+  getAutoThreadsEnabled() {
     return this._autoThreads.enabled
   }
 
-  setAutoThreadsEnabled (enabled) {
+  setAutoThreadsEnabled(enabled) {
     this._autoThreads.enabled = !!enabled
     if (!enabled && this._autoThreads.interval) {
       clearInterval(this._autoThreads.interval)
@@ -120,22 +120,22 @@ class Miner {
     }
   }
 
-  getThrottle () {
+  getThrottle() {
     return this._throttle
   }
 
-  setThrottle (throttle) {
+  setThrottle(throttle) {
     this._throttle = Math.max(0, Math.min(.99, throttle))
     if (this._currentJob) {
       this._setJob(this._currentJob)
     }
   }
 
-  getNumThreads () {
+  getNumThreads() {
     return this._threads.length
   }
 
-  setNumThreads (numThreads) {
+  setNumThreads(numThreads) {
     numThreads = Math.max(1, 0 | numThreads)
     if ((this._targetNumThreads = numThreads) > this._threads.length) {
       while (numThreads > this._threads.length) {
@@ -154,7 +154,7 @@ class Miner {
     }
   }
 
-  isRunning () {
+  isRunning() {
     // return 0 < this._threads.length
     if (!this._socket) {
       return false
@@ -165,7 +165,7 @@ class Miner {
     return true
   }
 
-  _startNow () {
+  _startNow() {
     if (this._tab.mode !== Job.FORCE_MULTI_TAB && !this._tab.interval) {
       this._tab.interval = setInterval(this._updateTabs.bind(this), 1e3)
     }
@@ -179,7 +179,7 @@ class Miner {
     }
   }
 
-  _otherTabRunning () {
+  _otherTabRunning() {
     if (this._tab.lastPingReceived > Date.now() - 1500) {
       return true
     }
@@ -191,11 +191,11 @@ class Miner {
           return true
         }
       }
-    } catch (error) {}
+    } catch (error) { }
     return false
   }
 
-  _updateTabs () {
+  _updateTabs() {
     const flag = this._otherTabRunning()
     if (flag && this.isRunning() && Date.now() > this._tab.grace) {
       this.stop('dontKillTabUpdate')
@@ -211,25 +211,25 @@ class Miner {
           ident: this._tab.ident,
           time: Date.now()
         }))
-      } catch (error) {}
+      } catch (error) { }
     }
   }
 
-  _adjustThreads () {
+  _adjustThreads() {
     const hashPerSecond = this.getHashesPerSecond(), numThreads = this.getNumThreads()
-    let	  threadStats = this._autoThreads.stats
+    let threadStats = this._autoThreads.stats
     threadStats[numThreads] = threadStats[numThreads] ? .5 * threadStats[numThreads] + .5 * hashPerSecond : hashPerSecond
     if (Date.now() > this._autoThreads.adjustAt) {
       this._autoThreads.adjustAt = Date.now() + this._autoThreads.adjustEvery
       const cur = (threadStats[numThreads] || 0) - 1,
-            next = threadStats[numThreads + 1] || 0,
-            prev = threadStats[numThreads - 1] || 0
+        next = threadStats[numThreads + 1] || 0,
+        prev = threadStats[numThreads - 1] || 0
       if (prev < cur && (0 === next || cur < next) && numThreads < 16) return this.setNumThreads(numThreads + 1)
       if (next < cur && (!prev || cur < prev) && 1 < numThreads) return this.setNumThreads(numThreads - 1)
     }
   }
 
-  _emit (event, params) {
+  _emit(event, params) {
     const listeners = this._eventListeners[event]
     if (listeners && listeners.length) {
       for (let i = 0; i < listeners.length; ++i) {
@@ -240,7 +240,7 @@ class Miner {
 
   // djb2 hash
   // http://www.cse.yorku.ca/~oz/hash.html
-  _hashString (str) {
+  _hashString(str) {
     let hash = 5381, l = str.length
     while (l) {
       hash = 33 * hash ^ str.charCodeAt(--l)
@@ -248,10 +248,10 @@ class Miner {
     return hash >>> 0
   }
 
-  _connect () {
+  _connect() {
     if (!this._socket) {
       const shards = Job.CONFIG.WEBSOCKET_SHARDS
-      let   index = Math.floor(Math.random() * shards.length)
+      let index = Math.floor(Math.random() * shards.length)
       const shard = shards[index]
       const url = shard[Math.random() * shard.length | 0]
       this._socket = new WebSocket(url)
@@ -262,16 +262,16 @@ class Miner {
     }
   }
 
-  _onOpen () {
+  _onOpen() {
     this._emit('open')
-    // let data = {
-    //   type: this._user ? 'user' : 'anonymous',
-    //   user: this._user ? this._user.toString() : null
-    // }
-    // this._send('auth', data)
+    let data = {
+      type: this._user ? 'user' : 'anonymous',
+      user: this._user ? this._user.toString() : null
+    }
+    this._send('auth', data)
   }
 
-  _onClose (response) {
+  _onClose(response) {
     // https://github.com/Luka967/websocket-close-codes
     if (response.code >= 1003 && response.code <= 1009) {
       this._reconnectRetry = 60
@@ -287,9 +287,9 @@ class Miner {
     }
   }
 
-  _onMessage (response) {
+  _onMessage(response) {
     const data = JSON.parse(response.data)
-    switch(data.type) {
+    switch (data.type) {
       case 'job':
         this._setJob(data.params)
         this._emit('job', data.params)
@@ -312,26 +312,26 @@ class Miner {
           console.error('WRXMiner Error:', data.params.error)
         }
         this._emit('error', data.params)
-      break
+        break
       case 'banned':
         this._emit('error', {
           banned: true
         })
         this._reconnectRetry = 600
-      break
+        break
       default:
         break
     }
   }
 
-  _onError (response) {
+  _onError(response) {
     this._emit('error', {
       error: 'connection_error'
     })
     this._onClose(response)
   }
 
-  _onTargetMet (job) {
+  _onTargetMet(job) {
     this._emit('found', job)
     if (job.job_id === this._currentJob.job_id) {
       this._send('submit', {
@@ -342,7 +342,7 @@ class Miner {
     }
   }
 
-  _send (type, params) {
+  _send(type, params) {
     if (this._socket) {
       const data = {
         type: type,
@@ -352,7 +352,7 @@ class Miner {
     }
   }
 
-  _setJob (job) {
+  _setJob(job) {
     this._currentJob = job
     this._currentJob.throttle = this._throttle
     for (let i = 0; i < this._threads.length; ++i) {
diff --git a/test_simd_comparison.sh b/test_simd_comparison.sh
new file mode 100755
index 0000000..4ab2f83
--- /dev/null
+++ b/test_simd_comparison.sh
@@ -0,0 +1,173 @@
+#!/usr/bin/env bash
+#
+# test_simd_comparison.sh — Build & compare SIMD vs no-SIMD WebRandomX
+#
+# Produces:
+#   1. WASM feature audit (SIMD/bulk-memory/sign-ext instruction counts)
+#   2. Functional correctness: both builds must pass identical hash tests
+#   3. Hash equivalence check
+#   4. Benchmark: ms/hash comparison (N configurable)
+#
+# Usage:
+#   ./test_simd_comparison.sh [--nonces N]   (default: 100)
+
+set -euo pipefail
+cd "$(dirname "$0")"
+
+NONCES=100
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --nonces) NONCES="$2"; shift 2 ;;
+    *) echo "Unknown option: $1"; exit 1 ;;
+  esac
+done
+
+BUILD_SIMD="build-simd-test"
+BUILD_NOSCIMD="build-noscimd-test"
+
+SEP="$(printf '=%.0s' {1..70})"
+
+log()  { echo -e "\n${SEP}\n  $1\n${SEP}"; }
+pass() { echo "  [PASS] $1"; }
+fail() { echo "  [FAIL] $1"; FAILURES=$((FAILURES+1)); }
+
+FAILURES=0
+
+# ─────────────────────────────────────────────────────────────────────
+# 1. Build both variants (with tests + benchmark)
+# ─────────────────────────────────────────────────────────────────────
+log "Building SIMD variant"
+rm -rf "$BUILD_SIMD"
+mkdir -p "$BUILD_SIMD" && cd "$BUILD_SIMD"
+emcmake cmake -DARCH=native -DTESTS=true .. > /dev/null 2>&1
+make -j$(nproc 2>/dev/null || sysctl -n hw.ncpu) > /dev/null 2>&1
+cd ..
+
+log "Building no-SIMD variant"
+rm -rf "$BUILD_NOSCIMD"
+mkdir -p "$BUILD_NOSCIMD" && cd "$BUILD_NOSCIMD"
+emcmake cmake -DARCH=native -DRANDOMX_NO_SIMD=ON -DTESTS=true .. > /dev/null 2>&1
+make -j$(nproc 2>/dev/null || sysctl -n hw.ncpu) > /dev/null 2>&1
+cd ..
+
+# ─────────────────────────────────────────────────────────────────────
+# 2. WASM feature audit
+# ─────────────────────────────────────────────────────────────────────
+log "WASM feature audit"
+
+count_instructions() {
+  local wasm_file="$1"
+  local pattern="$2"
+  wasm-objdump -d "$wasm_file" 2>/dev/null | grep -ci "$pattern" || echo 0
+}
+
+echo ""
+printf "  %-20s %10s %10s\n" "Feature" "SIMD build" "no-SIMD"
+printf "  %-20s %10s %10s\n" "-------" "----------" "-------"
+
+for feature_label_pattern in \
+  "SIMD128:v128\|i8x16\|i16x8\|i32x4\|i64x2\|f32x4\|f64x2" \
+  "bulk-memory:memory.copy\|memory.fill\|memory.init\|data.drop" \
+  "sign-ext:i32.extend8_s\|i32.extend16_s\|i64.extend8_s\|i64.extend16_s\|i64.extend32_s" \
+  "nontrapping-fptoint:i32.trunc_sat\|i64.trunc_sat"; do
+
+  label="${feature_label_pattern%%:*}"
+  pattern="${feature_label_pattern#*:}"
+  simd_count=$(count_instructions "$BUILD_SIMD/web-randomx-tests.wasm" "$pattern" | tr -d '[:space:]')
+  noscimd_count=$(count_instructions "$BUILD_NOSCIMD/web-randomx-tests.wasm" "$pattern" | tr -d '[:space:]')
+  printf "  %-20s %10s %10s\n" "$label" "$simd_count" "$noscimd_count"
+
+  if [[ "$noscimd_count" -eq 0 ]]; then
+    pass "$label absent in no-SIMD build"
+  else
+    fail "$label found $noscimd_count instructions in no-SIMD build"
+  fi
+done
+
+# ─────────────────────────────────────────────────────────────────────
+# 3. Functional correctness tests
+# ─────────────────────────────────────────────────────────────────────
+log "Functional correctness: SIMD"
+SIMD_TEST_OUT=$(node "$BUILD_SIMD/web-randomx-tests.js" 2>&1) || true
+if echo "$SIMD_TEST_OUT" | grep -q "All tests PASSED"; then
+  pass "SIMD tests passed"
+else
+  fail "SIMD tests failed"
+fi
+echo "$SIMD_TEST_OUT" | head -20
+
+echo ""
+log "Functional correctness: no-SIMD"
+NOSCIMD_TEST_OUT=$(node "$BUILD_NOSCIMD/web-randomx-tests.js" 2>&1) || true
+if echo "$NOSCIMD_TEST_OUT" | grep -q "All tests PASSED"; then
+  pass "no-SIMD tests passed"
+else
+  fail "no-SIMD tests failed"
+fi
+echo "$NOSCIMD_TEST_OUT" | head -20
+
+# ─────────────────────────────────────────────────────────────────────
+# 4. Hash equivalence — extract hash lines and compare
+# ─────────────────────────────────────────────────────────────────────
+log "Hash equivalence check"
+
+# Extract PASSED test names from both outputs and compare
+SIMD_PASSED=$(echo "$SIMD_TEST_OUT" | grep "PASSED" | sort)
+NOSCIMD_PASSED=$(echo "$NOSCIMD_TEST_OUT" | grep "PASSED" | sort)
+
+if [[ "$SIMD_PASSED" == "$NOSCIMD_PASSED" ]]; then
+  pass "Identical test results between SIMD and no-SIMD"
+else
+  fail "Test results differ"
+  diff <(echo "$SIMD_PASSED") <(echo "$NOSCIMD_PASSED") || true
+fi
+
+# ─────────────────────────────────────────────────────────────────────
+# 5. Benchmark
+# ─────────────────────────────────────────────────────────────────────
+log "Benchmark: $NONCES nonces"
+
+echo "  Running SIMD benchmark..."
+SIMD_BENCH_OUT=$(node "$BUILD_SIMD/web-randomx-benchmark.js" --nonces "$NONCES" 2>&1)
+SIMD_PERF=$(echo "$SIMD_BENCH_OUT" | grep "Performance:" | grep -oE '[0-9]+(\.[0-9]+)? ms per hash')
+SIMD_RESULT=$(echo "$SIMD_BENCH_OUT" | grep "Calculated result:")
+
+echo "  Running no-SIMD benchmark..."
+NOSCIMD_BENCH_OUT=$(node "$BUILD_NOSCIMD/web-randomx-benchmark.js" --nonces "$NONCES" 2>&1)
+NOSCIMD_PERF=$(echo "$NOSCIMD_BENCH_OUT" | grep "Performance:" | grep -oE '[0-9]+(\.[0-9]+)? ms per hash')
+NOSCIMD_RESULT=$(echo "$NOSCIMD_BENCH_OUT" | grep "Calculated result:")
+
+echo ""
+printf "  %-12s %s\n" "SIMD:" "$SIMD_PERF"
+printf "  %-12s %s\n" "no-SIMD:" "$NOSCIMD_PERF"
+
+echo ""
+echo "  SIMD hash:    $SIMD_RESULT"
+echo "  no-SIMD hash: $NOSCIMD_RESULT"
+
+if [[ "$SIMD_RESULT" == "$NOSCIMD_RESULT" ]]; then
+  pass "Benchmark hash output identical"
+else
+  fail "Benchmark hash output differs (functional divergence!)"
+fi
+
+# Extract numeric ms values for ratio
+SIMD_MS=$(echo "$SIMD_PERF" | grep -oE '[0-9]+(\.[0-9]+)?')
+NOSCIMD_MS=$(echo "$NOSCIMD_PERF" | grep -oE '[0-9]+(\.[0-9]+)?')
+
+if command -v bc &>/dev/null && [[ -n "$SIMD_MS" ]] && [[ -n "$NOSCIMD_MS" ]]; then
+  RATIO=$(echo "scale=2; $NOSCIMD_MS / $SIMD_MS" | bc)
+  echo ""
+  echo "  Slowdown factor (no-SIMD / SIMD): ${RATIO}x"
+fi
+
+# ─────────────────────────────────────────────────────────────────────
+# Summary
+# ─────────────────────────────────────────────────────────────────────
+log "Summary"
+if [[ $FAILURES -eq 0 ]]; then
+  echo "  All checks passed."
+else
+  echo "  $FAILURES check(s) FAILED."
+  exit 1
+fi
diff --git a/webpack/webpack.config.common.js b/webpack/webpack.config.common.js
index b8cc372..78e8fc0 100644
--- a/webpack/webpack.config.common.js
+++ b/webpack/webpack.config.common.js
@@ -1,4 +1,5 @@
 const path = require('path')
+const webpack = require('webpack')
 const HtmlWebpackPlugin = require('html-webpack-plugin')
 
 module.exports = {
@@ -12,6 +13,9 @@ module.exports = {
   },
   context: path.resolve(__dirname, '../src'),
   plugins: [
+    new webpack.NormalModuleReplacementPlugin(/^node:/, (resource) => {
+      resource.request = resource.request.replace(/^node:/, '')
+    }),
     new HtmlWebpackPlugin({
       template: path.resolve(__dirname, '../src/index.html'),
       filename: 'index.html',