From 0eae3f72f2ac729b17c7c97874569ef06384c702 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Wed, 11 Feb 2026 11:48:16 +0100
Subject: [PATCH 01/37] fix: correct frame data extraction

---
 .../object_detection/ObjectDetection.cpp      | 123 ++++++++++++++++--
 1 file changed, 111 insertions(+), 12 deletions(-)
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
index 8b5bc022f..f17a4f074 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
@@ -2,14 +2,17 @@
 
 #include <rnexecutorch/Error.h>
 #include <rnexecutorch/ErrorCodes.h>
+#include <rnexecutorch/Log.h>
 #include <rnexecutorch/data_processing/ImageProcessing.h>
+#include <rnexecutorch/host_objects/JsiConversions.h>
+#include <rnexecutorch/utils/FrameProcessor.h>
 
 namespace rnexecutorch::models::object_detection {
 
 ObjectDetection::ObjectDetection(
     const std::string &modelSource,
     std::shared_ptr<react::CallInvoker> callInvoker)
-    : BaseModel(modelSource, callInvoker) {
+    : VisionModel(modelSource, callInvoker) {
   auto inputTensors = getAllInputShapes();
   if (inputTensors.size() == 0) {
     throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
@@ -29,14 +32,49 @@ ObjectDetection::ObjectDetection(
                             modelInputShape[modelInputShape.size() - 2]);
 }
 
+cv::Mat ObjectDetection::preprocessFrame(const cv::Mat &frame) const {
+  // Get target size from model input shape
+  const std::vector<int32_t> tensorDims = getAllInputShapes()[0];
+  cv::Size tensorSize = cv::Size(tensorDims[tensorDims.size() - 1],
+                                 tensorDims[tensorDims.size() - 2]);
+
+  cv::Mat rgb;
+
+  // Convert RGBA/BGRA to RGB if needed (for VisionCamera frames)
+  if (frame.channels() == 4) {
+// Platform-specific color conversion:
+// iOS uses BGRA format, Android uses RGBA format
+#ifdef __APPLE__
+    // iOS: BGRA → RGB
+    cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB);
+#else
+    // Android: RGBA → RGB
+    cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB);
+#endif
+  } else if (frame.channels() == 3) {
+    // Already RGB
+    rgb = frame;
+  } else {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Unsupported frame format: %d channels", frame.channels());
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
+  }
+
+  // Only resize if dimensions don't match
+  if (rgb.size() != tensorSize) {
+    cv::Mat resized;
+    cv::resize(rgb, resized, tensorSize);
+    return resized;
+  }
+
+  return rgb;
+}
+
 std::vector<types::Detection>
 ObjectDetection::postprocess(const std::vector<EValue> &tensors,
                              cv::Size originalSize, double detectionThreshold) {
-  if (detectionThreshold <= 0 || detectionThreshold > 1) {
-    throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig,
-                            "Detection threshold must be greater than 0 "
-                            "and less than or equal to 1.");
-  }
   float widthRatio =
       static_cast<float>(originalSize.width) / modelImageSize.width;
   float heightRatio =
@@ -70,14 +108,23 @@ ObjectDetection::postprocess(const std::vector<EValue> &tensors,
                             scores[i]);
   }
 
-  std::vector<types::Detection> output = utils::nonMaxSuppression(detections);
-  return output;
+  return utils::nonMaxSuppression(detections);
 }
 
 std::vector<types::Detection>
-ObjectDetection::generate(std::string imageSource, double detectionThreshold) {
-  auto [inputTensor, originalSize] =
-      image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]);
+ObjectDetection::runInference(cv::Mat image, double detectionThreshold) {
+  std::lock_guard<std::mutex> lock(inference_mutex_);
+
+  // Store original size for postprocessing
+  cv::Size originalSize = image.size();
+
+  // Preprocess the image using model-specific preprocessing
+  cv::Mat preprocessed = preprocessFrame(image);
+
+  // Create tensor and run inference
+  const std::vector<int32_t> tensorDims = getAllInputShapes()[0];
+  auto inputTensor =
+      image_processing::getTensorFromMatrix(tensorDims, preprocessed);
 
   auto forwardResult = BaseModel::forward(inputTensor);
   if (!forwardResult.ok()) {
@@ -88,4 +135,56 @@ ObjectDetection::generate(std::string imageSource, double detectionThreshold) {
 
   return postprocess(forwardResult.get(), originalSize, detectionThreshold);
 }
-} // namespace rnexecutorch::models::object_detection
+
+std::vector<types::Detection>
+ObjectDetection::generateFromString(std::string imageSource,
+                                    double detectionThreshold) {
+  // Read image using OpenCV (BGR format)
+  cv::Mat image = image_processing::readImage(imageSource);
+
+  // Convert BGR to RGB (OpenCV imread returns BGR)
+  cv::Mat imageRGB;
+  cv::cvtColor(image, imageRGB, cv::COLOR_BGR2RGB);
+
+  // Use the internal helper - it handles locking, preprocessing, and inference
+  return runInference(imageRGB, detectionThreshold);
+}
+
+std::vector<types::Detection>
+ObjectDetection::generateFromFrame(jsi::Runtime &runtime,
+                                   const jsi::Value &frameData,
+                                   double detectionThreshold) {
+  // Try-lock: skip frame if model is busy (non-blocking for camera)
+  if (!inference_mutex_.try_lock()) {
+    return {}; // Return empty vector, don't block camera thread
+  }
+
+  // Extract frame (under lock to ensure thread safety)
+  cv::Mat frame;
+  {
+    std::lock_guard<std::mutex> lock(inference_mutex_, std::adopt_lock);
+    auto frameObj = frameData.asObject(runtime);
+    frame =
+        rnexecutorch::utils::FrameProcessor::extractFrame(runtime, frameObj);
+  }
+  // Lock is automatically released here when going out of scope
+
+  // Use the internal helper - it handles locking, preprocessing, and inference
+  return runInference(frame, detectionThreshold);
+}
+
+std::vector<types::Detection>
+ObjectDetection::generateFromPixels(jsi::Runtime &runtime,
+                                    const jsi::Value &pixelData,
+                                    double detectionThreshold) {
+  // Convert JSI value to JSTensorViewIn
+  auto tensorView =
+      jsi_conversion::getValue<JSTensorViewIn>(pixelData, runtime);
+
+  // Extract raw pixel data to cv::Mat
+  cv::Mat image = extractFromPixels(tensorView);
+
+  // Use the internal helper - it handles locking, preprocessing, and inference
+  return runInference(image, detectionThreshold);
+}
+} // namespace rnexecutorch::models::object_detection
\ No newline at end of file

From 65667ad331dd55635bb97c345b27b97eff8b0789 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Thu, 12 Feb 2026 14:24:02 +0100
Subject: [PATCH 02/37] feat: frame extractor for zero-copy approach

---
 .../rnexecutorch/utils/FrameExtractor.cpp     | 151 ++++++++++++++++++
 .../rnexecutorch/utils/FrameExtractor.h       |  60 +++++++
 .../src/types/common.ts                       |  33 ++++
 yarn.lock                                     |  11 +-
 4 files changed, 245 insertions(+), 10 deletions(-)
 create mode 100644 packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
 create mode 100644 packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h

diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
new file mode 100644
index 000000000..f64855131
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
@@ -0,0 +1,151 @@
+#include "FrameExtractor.h"
+#include <rnexecutorch/Log.h>
+
+#ifdef __APPLE__
+#import <CoreVideo/CoreVideo.h>
+#endif
+
+#ifdef __ANDROID__
+#if __ANDROID_API__ >= 26
+#include <android/hardware_buffer.h>
+#endif
+#endif
+
+namespace rnexecutorch {
+namespace utils {
+
+cv::Mat FrameExtractor::extractFromNativeBuffer(uint64_t bufferPtr) {
+#ifdef __APPLE__
+  return extractFromCVPixelBuffer(reinterpret_cast<void *>(bufferPtr));
+#elif defined(__ANDROID__)
+  return extractFromAHardwareBuffer(reinterpret_cast<void *>(bufferPtr));
+#else
+  throw std::runtime_error("NativeBuffer not supported on this platform");
+#endif
+}
+
+#ifdef __APPLE__
+cv::Mat FrameExtractor::extractFromCVPixelBuffer(void *pixelBuffer) {
+  CVPixelBufferRef buffer = static_cast<CVPixelBufferRef>(pixelBuffer);
+
+  // Get buffer properties
+  size_t width = CVPixelBufferGetWidth(buffer);
+  size_t height = CVPixelBufferGetHeight(buffer);
+  size_t bytesPerRow = CVPixelBufferGetBytesPerRow(buffer);
+  OSType pixelFormat = CVPixelBufferGetPixelFormatType(buffer);
+
+  // Lock the buffer (Vision Camera should have already locked it, but ensure)
+  CVPixelBufferLockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly);
+  void *baseAddress = CVPixelBufferGetBaseAddress(buffer);
+
+  cv::Mat mat;
+
+  // Log pixel format once for debugging
+  static bool loggedPixelFormat = false;
+  if (!loggedPixelFormat) {
+    log(LOG_LEVEL::Debug, "CVPixelBuffer format code: ", pixelFormat);
+    loggedPixelFormat = true;
+  }
+
+  if (pixelFormat == kCVPixelFormatType_32BGRA) {
+    // BGRA format (most common on iOS when using pixelFormat: 'rgb')
+    if (!loggedPixelFormat) {
+      log(LOG_LEVEL::Debug, "Extracting from CVPixelBuffer: BGRA format, ",
+          width, "x", height, ", stride: ", bytesPerRow);
+    }
+    mat = cv::Mat(static_cast<int>(height), static_cast<int>(width), CV_8UC4,
+                  baseAddress, bytesPerRow);
+  } else if (pixelFormat == kCVPixelFormatType_32RGBA) {
+    // RGBA format
+    if (!loggedPixelFormat) {
+      log(LOG_LEVEL::Debug, "Extracting from CVPixelBuffer: RGBA format, ",
+          width, "x", height, ", stride: ", bytesPerRow);
+    }
+    mat = cv::Mat(static_cast<int>(height), static_cast<int>(width), CV_8UC4,
+                  baseAddress, bytesPerRow);
+  } else if (pixelFormat == kCVPixelFormatType_24RGB) {
+    // RGB format
+    if (!loggedPixelFormat) {
+      log(LOG_LEVEL::Debug, "Extracting from CVPixelBuffer: RGB format, ",
+          width, "x", height, ", stride: ", bytesPerRow);
+    }
+    mat = cv::Mat(static_cast<int>(height), static_cast<int>(width), CV_8UC3,
+                  baseAddress, bytesPerRow);
+  } else {
+    CVPixelBufferUnlockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly);
+    throw std::runtime_error("Unsupported CVPixelBuffer format: " +
+                             std::to_string(pixelFormat));
+  }
+
+  // Note: We don't unlock here - Vision Camera manages the lifecycle
+  // When frame.dispose() is called, Vision Camera will unlock and release
+
+  return mat;
+}
+#endif
+
+#ifdef __ANDROID__
+cv::Mat FrameExtractor::extractFromAHardwareBuffer(void *hardwareBuffer) {
+#if __ANDROID_API__ >= 26
+  AHardwareBuffer *buffer = static_cast<AHardwareBuffer *>(hardwareBuffer);
+
+  // Get buffer description
+  AHardwareBuffer_Desc desc;
+  AHardwareBuffer_describe(buffer, &desc);
+
+  // Lock the buffer for CPU read access
+  void *data = nullptr;
+  int lockResult = AHardwareBuffer_lock(
+      buffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, nullptr, &data);
+
+  if (lockResult != 0) {
+    throw std::runtime_error("Failed to lock AHardwareBuffer");
+  }
+
+  cv::Mat mat;
+
+  // Log format once for debugging
+  static bool loggedFormat = false;
+  if (!loggedFormat) {
+    log(LOG_LEVEL::Debug, "AHardwareBuffer format code: ", desc.format);
+    loggedFormat = true;
+  }
+
+  if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM) {
+    // RGBA format (expected when using pixelFormat: 'rgb' on Android)
+    if (!loggedFormat) {
+      log(LOG_LEVEL::Debug, "Extracting from AHardwareBuffer: RGBA format, ",
+          desc.width, "x", desc.height, ", stride: ", desc.stride * 4);
+    }
+    mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4);
+  } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM) {
+    // RGBX format (treated as RGBA)
+    if (!loggedFormat) {
+      log(LOG_LEVEL::Debug, "Extracting from AHardwareBuffer: RGBX format, ",
+          desc.width, "x", desc.height, ", stride: ", desc.stride * 4);
+    }
+    mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4);
+  } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM) {
+    // RGB format (less common)
+    if (!loggedFormat) {
+      log(LOG_LEVEL::Debug, "Extracting from AHardwareBuffer: RGB format, ",
+          desc.width, "x", desc.height, ", stride: ", desc.stride * 3);
+    }
+    mat = cv::Mat(desc.height, desc.width, CV_8UC3, data, desc.stride * 3);
+  } else {
+    AHardwareBuffer_unlock(buffer, nullptr);
+    throw std::runtime_error("Unsupported AHardwareBuffer format: " +
+                             std::to_string(desc.format));
+  }
+
+  // Note: We don't unlock here - Vision Camera manages the lifecycle
+
+  return mat;
+#else
+  throw std::runtime_error("AHardwareBuffer requires Android API 26+");
+#endif // __ANDROID_API__ >= 26
+}
+#endif // __ANDROID__
+
+} // namespace utils
+} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h
new file mode 100644
index 000000000..a90e6ad23
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <cstdint>
+#include <opencv2/opencv.hpp>
+
+namespace rnexecutorch {
+namespace utils {
+
+/**
+ * @brief Utility class for extracting cv::Mat from native platform buffers
+ *
+ * Provides zero-copy extraction of frames from:
+ * - iOS: CVPixelBufferRef
+ * - Android: AHardwareBuffer
+ */
+class FrameExtractor {
+public:
+  /**
+   * @brief Extract cv::Mat from a native buffer pointer
+   *
+   * @param bufferPtr Platform-specific buffer pointer (uint64_t)
+   *                  - iOS: CVPixelBufferRef
+   *                  - Android: AHardwareBuffer*
+   * @return cv::Mat wrapping the buffer data (zero-copy)
+   *
+   * @note The returned cv::Mat does not own the data.
+   *       The caller must ensure the buffer remains valid.
+   * @note The buffer must be locked before calling and unlocked after use.
+   */
+  static cv::Mat extractFromNativeBuffer(uint64_t bufferPtr);
+
+#ifdef __APPLE__
+  /**
+   * @brief Extract cv::Mat from CVPixelBuffer (iOS)
+   *
+   * @param pixelBuffer CVPixelBufferRef pointer
+   * @return cv::Mat wrapping the pixel buffer data
+   *
+   * @note Assumes buffer is already locked by Vision Camera
+   * @note Supports kCVPixelFormatType_32BGRA and kCVPixelFormatType_24RGB
+   */
+  static cv::Mat extractFromCVPixelBuffer(void *pixelBuffer);
+#endif
+
+#ifdef __ANDROID__
+  /**
+   * @brief Extract cv::Mat from AHardwareBuffer (Android)
+   *
+   * @param hardwareBuffer AHardwareBuffer* pointer
+   * @return cv::Mat wrapping the hardware buffer data
+   *
+   * @note Assumes buffer is already locked by Vision Camera
+   * @note Supports AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM and R8G8B8_UNORM
+   */
+  static cv::Mat extractFromAHardwareBuffer(void *hardwareBuffer);
+#endif
+};
+
+} // namespace utils
+} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/src/types/common.ts b/packages/react-native-executorch/src/types/common.ts
index 384caa861..439e18597 100644
--- a/packages/react-native-executorch/src/types/common.ts
+++ b/packages/react-native-executorch/src/types/common.ts
@@ -151,3 +151,36 @@ export type LabelEnum = Readonly<Record<string, number | string>>;
  * @category Types
  */
 export type Triple<T> = readonly [T, T, T];
+
+/**
+ * Frame data for vision model processing.
+ * Supports two modes:
+ * 1. ArrayBuffer mode (with memory copy) - Compatible with all platforms
+ * 2. NativeBuffer mode (zero-copy) - Better performance with Vision Camera v5
+ */
+export interface FrameData {
+  /**
+   * Raw pixel data as ArrayBuffer (requires memory copy).
+   * Use this for compatibility or when getNativeBuffer is not available.
+   */
+  data?: ArrayBuffer | ArrayBufferLike;
+
+  /**
+   * Pointer to native platform buffer (zero-copy, best performance).
+   * - On iOS: CVPixelBufferRef pointer
+   * - On Android: AHardwareBuffer* pointer
+   *
+   * Obtain from Vision Camera v5: `frame.getNativeBuffer().pointer`
+   */
+  nativeBuffer?: bigint;
+
+  /**
+   * Frame width in pixels
+   */
+  width: number;
+
+  /**
+   * Frame height in pixels
+   */
+  height: number;
+}
diff --git a/yarn.lock b/yarn.lock
index 436005c8d..90ac56b11 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -13799,16 +13799,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1":
-  version: 7.7.3
-  resolution: "semver@npm:7.7.3"
-  bin:
-    semver: bin/semver.js
-  checksum: 10/8dbc3168e057a38fc322af909c7f5617483c50caddba135439ff09a754b20bdd6482a5123ff543dad4affa488ecf46ec5fb56d61312ad20bb140199b88dfaea9
-  languageName: node
-  linkType: hard
-
-"semver@npm:^7.7.3":
+"semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1, semver@npm:^7.7.3":
   version: 7.7.4
   resolution: "semver@npm:7.7.4"
   bin:

From daed38a3bb216c6b4fa40354f029650c13265969 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Fri, 13 Feb 2026 11:29:23 +0100
Subject: [PATCH 03/37] chore: num minSdkVersion to 26

---
 packages/react-native-executorch/android/gradle.properties | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/react-native-executorch/android/gradle.properties b/packages/react-native-executorch/android/gradle.properties
index b30a8b11d..97cdd1854 100644
--- a/packages/react-native-executorch/android/gradle.properties
+++ b/packages/react-native-executorch/android/gradle.properties
@@ -1,5 +1,5 @@
 RnExecutorch_kotlinVersion=1.7.0
-RnExecutorch_minSdkVersion=21
+RnExecutorch_minSdkVersion=26
 RnExecutorch_targetSdkVersion=31
 RnExecutorch_compileSdkVersion=31
-RnExecutorch_ndkversion=21.4.7075529
+RnExecutorch_ndkversion=21.4.7075529
\ No newline at end of file

From 3d534dea4875ac316bcf0d93a13ef924ec13d34d Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 16 Feb 2026 10:37:11 +0100
Subject: [PATCH 04/37] feat: unify frame extraction and preprocessing

---
 .../rnexecutorch/models/VisionModel.cpp       |  20 +++
 .../common/rnexecutorch/models/VisionModel.h  | 139 +++++++++++++++++
 .../models/classification/Classification.cpp  |   2 +-
 .../rnexecutorch/utils/FrameProcessor.cpp     | 142 ++++++++++++++++++
 .../rnexecutorch/utils/FrameProcessor.h       | 109 ++++++++++++++
 .../src/modules/BaseModule.ts                 |  79 +++++++++-
 yarn.lock                                     |  11 +-
 7 files changed, 498 insertions(+), 4 deletions(-)
 create mode 100644 packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
 create mode 100644 packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
 create mode 100644 packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
 create mode 100644 packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
new file mode 100644
index 000000000..671ed03c8
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
@@ -0,0 +1,20 @@
+#include "VisionModel.h"
+#include <rnexecutorch/utils/FrameProcessor.h>
+
+namespace rnexecutorch {
+namespace models {
+
+using namespace facebook;
+
+cv::Mat VisionModel::extractAndPreprocess(jsi::Runtime &runtime,
+                                          const jsi::Value &frameData) const {
+  // Extract frame using FrameProcessor utility
+  auto frameObj = frameData.asObject(runtime);
+  cv::Mat frame = utils::FrameProcessor::extractFrame(runtime, frameObj);
+
+  // Apply model-specific preprocessing
+  return preprocessFrame(frame);
+}
+
+} // namespace models
+} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
new file mode 100644
index 000000000..11da49547
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
@@ -0,0 +1,139 @@
+#pragma once
+
+#include <jsi/jsi.h>
+#include <mutex>
+#include <opencv2/opencv.hpp>
+#include <rnexecutorch/metaprogramming/ConstructorHelpers.h>
+#include <rnexecutorch/models/BaseModel.h>
+
+namespace rnexecutorch {
+namespace models {
+
+/**
+ * @brief Base class for computer vision models that support real-time camera
+ * input
+ *
+ * VisionModel extends BaseModel with thread-safe inference and automatic frame
+ * extraction from VisionCamera. This class is designed for models that need to
+ * process camera frames in real-time (e.g., at 30fps).
+ *
+ * Thread Safety:
+ * - All inference operations are protected by a mutex
+ * - generateFromFrame() uses try_lock() to skip frames when the model is busy
+ * - This prevents blocking the camera thread and maintains smooth frame rates
+ *
+ * Usage:
+ * Subclasses should:
+ * 1. Inherit from VisionModel instead of BaseModel
+ * 2. Implement preprocessFrame() with model-specific preprocessing
+ * 3. Use inference_mutex_ when calling forward() in custom generate methods
+ * 4. Use lock_guard for blocking operations (JS API)
+ * 5. Use try_lock() for non-blocking operations (camera API)
+ *
+ * Example:
+ * @code
+ * class Classification : public VisionModel {
+ * public:
+ *   std::unordered_map<std::string_view, float>
+ *   generateFromFrame(jsi::Runtime& runtime, const jsi::Value& frameValue) {
+ *     // try_lock is handled automatically
+ *     auto frameObject = frameValue.asObject(runtime);
+ *     cv::Mat frame = FrameExtractor::extractFrame(runtime, frameObject);
+ *
+ *     // Lock before inference
+ *     if (!inference_mutex_.try_lock()) {
+ *       return {}; // Skip frame if busy
+ *     }
+ *     std::lock_guard<std::mutex> lock(inference_mutex_, std::adopt_lock);
+ *
+ *     auto preprocessed = preprocessFrame(frame);
+ *     // ... run inference
+ *   }
+ * };
+ * @endcode
+ */
+class VisionModel : public BaseModel {
+public:
+  /**
+   * @brief Construct a VisionModel with the same parameters as BaseModel
+   *
+   * VisionModel uses the same construction pattern as BaseModel, just adding
+   * thread-safety on top.
+   */
+  VisionModel(const std::string &modelSource,
+              std::shared_ptr<react::CallInvoker> callInvoker)
+      : BaseModel(modelSource, callInvoker) {}
+
+  /**
+   * @brief Virtual destructor for proper cleanup in derived classes
+   */
+  virtual ~VisionModel() = default;
+
+protected:
+  /**
+   * @brief Mutex to ensure thread-safe inference
+   *
+   * This mutex protects against race conditions when:
+   * - generateFromFrame() is called from VisionCamera worklet thread (30fps)
+   * - generate() is called from JavaScript thread simultaneously
+   *
+   * Usage guidelines:
+   * - Use std::lock_guard for blocking operations (JS API can wait)
+   * - Use try_lock() for non-blocking operations (camera should skip frames)
+   *
+   * @note Marked mutable to allow locking in const methods if needed
+   */
+  mutable std::mutex inference_mutex_;
+
+  /**
+   * @brief Preprocess a camera frame for model input
+   *
+   * This method should implement model-specific preprocessing such as:
+   * - Resizing to the model's expected input size
+   * - Color space conversion (e.g., BGR to RGB)
+   * - Normalization
+   * - Any other model-specific transformations
+   *
+   * @param frame Input frame from camera (already extracted and rotated by
+   * FrameExtractor)
+   * @return Preprocessed cv::Mat ready for tensor conversion
+   *
+   * @note The input frame is already in RGB format and rotated 90° clockwise
+   * @note This method is called under mutex protection in generateFromFrame()
+   */
+  virtual cv::Mat preprocessFrame(const cv::Mat &frame) const = 0;
+
+  /**
+   * @brief Extract and preprocess frame from VisionCamera in one call
+   *
+   * This is a convenience method that combines frame extraction and
+   * preprocessing. It handles both nativeBuffer (zero-copy) and ArrayBuffer
+   * paths automatically.
+   *
+   * @param runtime JSI runtime
+   * @param frameData JSI value containing frame data from VisionCamera
+   *
+   * @return Preprocessed cv::Mat ready for tensor conversion
+   *
+   * @throws std::runtime_error if frame extraction fails
+   *
+   * @note This method does NOT acquire the inference mutex - caller is
+   * responsible
+   * @note Typical usage:
+   * @code
+   *   cv::Mat preprocessed = extractAndPreprocess(runtime, frameData);
+   *   auto tensor = image_processing::getTensorFromMatrix(dims, preprocessed);
+   * @endcode
+   */
+  cv::Mat extractAndPreprocess(jsi::Runtime &runtime,
+                               const jsi::Value &frameData) const;
+};
+
+} // namespace models
+// Register VisionModel constructor traits
+// Even though VisionModel is abstract, the metaprogramming system needs to know
+// its constructor signature for derived classes
+REGISTER_CONSTRUCTOR(models::VisionModel, std::string,
+                     std::shared_ptr<react::CallInvoker>);
+
+} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
index 0fba07108..b9fad1b88 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
@@ -73,4 +73,4 @@ Classification::postprocess(const Tensor &tensor) {
   return probs;
 }
 
-} // namespace rnexecutorch::models::classification
+} // namespace rnexecutorch::models::classification
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
new file mode 100644
index 000000000..02faa072d
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
@@ -0,0 +1,142 @@
+#include "FrameProcessor.h"
+#include "FrameExtractor.h"
+#include <rnexecutorch/Log.h>
+#include <stdexcept>
+
+namespace rnexecutorch {
+namespace utils {
+
+cv::Mat FrameProcessor::extractFrame(jsi::Runtime &runtime,
+                                     const jsi::Object &frameData) {
+  // Get frame dimensions
+  int width =
+      static_cast<int>(frameData.getProperty(runtime, "width").asNumber());
+  int height =
+      static_cast<int>(frameData.getProperty(runtime, "height").asNumber());
+
+  // Try zero-copy path first (nativeBuffer)
+  if (hasNativeBuffer(runtime, frameData)) {
+    static bool loggedPath = false;
+    if (!loggedPath) {
+      log(LOG_LEVEL::Debug, "FrameProcessor: Using zero-copy nativeBuffer");
+      loggedPath = true;
+    }
+
+    try {
+      return extractFromNativeBuffer(runtime, frameData, width, height);
+    } catch (const std::exception &e) {
+      log(LOG_LEVEL::Debug,
+          "FrameProcessor: nativeBuffer extraction failed: ", e.what());
+      log(LOG_LEVEL::Debug, "FrameProcessor: Falling back to ArrayBuffer");
+    }
+  }
+
+  // Fallback to ArrayBuffer path (with copy)
+  if (frameData.hasProperty(runtime, "data")) {
+    static bool loggedPath = false;
+    if (!loggedPath) {
+      log(LOG_LEVEL::Debug, "FrameProcessor: Using ArrayBuffer (with copy)");
+      loggedPath = true;
+    }
+
+    return extractFromArrayBuffer(runtime, frameData, width, height);
+  }
+
+  // No valid frame data source
+  throw std::runtime_error(
+      "FrameProcessor: No valid frame data (neither nativeBuffer nor data "
+      "property found)");
+}
+
+cv::Size FrameProcessor::getFrameSize(jsi::Runtime &runtime,
+                                      const jsi::Object &frameData) {
+  if (!frameData.hasProperty(runtime, "width") ||
+      !frameData.hasProperty(runtime, "height")) {
+    throw std::runtime_error("FrameProcessor: Frame data missing width or "
+                             "height property");
+  }
+
+  int width =
+      static_cast<int>(frameData.getProperty(runtime, "width").asNumber());
+  int height =
+      static_cast<int>(frameData.getProperty(runtime, "height").asNumber());
+
+  return cv::Size(width, height);
+}
+
+bool FrameProcessor::hasNativeBuffer(jsi::Runtime &runtime,
+                                     const jsi::Object &frameData) {
+  return frameData.hasProperty(runtime, "nativeBuffer");
+}
+
+cv::Mat FrameProcessor::extractFromNativeBuffer(jsi::Runtime &runtime,
+                                                const jsi::Object &frameData,
+                                                int width, int height) {
+  auto nativeBufferValue = frameData.getProperty(runtime, "nativeBuffer");
+
+  // Handle bigint pointer value from JavaScript
+  uint64_t bufferPtr = static_cast<uint64_t>(
+      nativeBufferValue.asBigInt(runtime).asUint64(runtime));
+
+  // Use FrameExtractor to get cv::Mat from platform-specific buffer
+  cv::Mat frame = FrameExtractor::extractFromNativeBuffer(bufferPtr);
+
+  // Validate extracted frame dimensions match expected
+  if (frame.cols != width || frame.rows != height) {
+    log(LOG_LEVEL::Debug, "FrameProcessor: Dimension mismatch - expected ",
+        width, "x", height, " but got ", frame.cols, "x", frame.rows);
+  }
+
+  return frame;
+}
+
+cv::Mat FrameProcessor::extractFromArrayBuffer(jsi::Runtime &runtime,
+                                               const jsi::Object &frameData,
+                                               int width, int height) {
+  auto pixelData = frameData.getProperty(runtime, "data");
+  auto arrayBuffer = pixelData.asObject(runtime).getArrayBuffer(runtime);
+  uint8_t *data = arrayBuffer.data(runtime);
+  size_t bufferSize = arrayBuffer.size(runtime);
+
+  // Determine format based on buffer size
+  size_t stride = bufferSize / height;
+  size_t expectedRGBAStride = width * 4;
+  size_t expectedRGBStride = width * 3;
+
+  cv::Mat frame;
+
+  if (stride == expectedRGBAStride || bufferSize >= width * height * 4) {
+    // RGBA format with potential padding
+    frame = cv::Mat(height, width, CV_8UC4, data, stride);
+
+    static bool loggedFormat = false;
+    if (!loggedFormat) {
+      log(LOG_LEVEL::Debug,
+          "FrameProcessor: ArrayBuffer format is RGBA, "
+          "stride: ",
+          stride);
+      loggedFormat = true;
+    }
+  } else if (stride >= expectedRGBStride) {
+    // RGB format
+    frame = cv::Mat(height, width, CV_8UC3, data, stride);
+
+    static bool loggedFormat = false;
+    if (!loggedFormat) {
+      log(LOG_LEVEL::Debug,
+          "FrameProcessor: ArrayBuffer format is RGB, stride: ", stride);
+      loggedFormat = true;
+    }
+  } else {
+    throw std::runtime_error(
+        "FrameProcessor: Unexpected buffer size - expected " +
+        std::to_string(expectedRGBStride) + " or " +
+        std::to_string(expectedRGBAStride) + " bytes per row, got " +
+        std::to_string(stride));
+  }
+
+  return frame;
+}
+
+} // namespace utils
+} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h
new file mode 100644
index 000000000..e37b5bfd6
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h
@@ -0,0 +1,109 @@
+#pragma once
+
+#include <jsi/jsi.h>
+#include <opencv2/opencv.hpp>
+
+namespace rnexecutorch {
+namespace utils {
+
+using namespace facebook;
+
+/**
+ * @brief Utility class for processing camera frames from VisionCamera
+ *
+ * Provides high-level helpers for extracting and working with frames from
+ * react-native-vision-camera in a consistent way across all vision models.
+ *
+ * This class abstracts away the complexity of:
+ * - Handling both nativeBuffer (zero-copy) and ArrayBuffer (with copy) paths
+ * - Platform-specific buffer formats (CVPixelBuffer on iOS, AHardwareBuffer
+ * on Android)
+ * - JSI object property access and type conversions
+ *
+ * Usage:
+ * @code
+ * auto frameObj = frameData.asObject(runtime);
+ * cv::Mat frame = FrameProcessor::extractFrame(runtime, frameObj);
+ * cv::Size size = FrameProcessor::getFrameSize(runtime, frameObj);
+ * @endcode
+ */
+class FrameProcessor {
+public:
+  /**
+   * @brief Extract cv::Mat from VisionCamera frame data
+   *
+   * Handles both zero-copy (nativeBuffer) and copy-based (ArrayBuffer) paths
+   * automatically. Prefers nativeBuffer when available for best performance.
+   *
+   * @param runtime JSI runtime
+   * @param frameData JSI object containing frame data from VisionCamera
+   *                  Expected properties:
+   *                  - nativeBuffer (optional): BigInt pointer to native buffer
+   *                  - data (optional): ArrayBuffer with pixel data
+   *                  - width: number
+   *                  - height: number
+   *
+   * @return cv::Mat wrapping or containing the frame data
+   *
+   * @throws std::runtime_error if neither nativeBuffer nor data is available
+   * @throws std::runtime_error if nativeBuffer extraction fails
+   *
+   * @note The returned cv::Mat may not own the data (zero-copy path).
+   *       Caller must ensure the source frame remains valid during use.
+   */
+  static cv::Mat extractFrame(jsi::Runtime &runtime,
+                              const jsi::Object &frameData);
+
+  /**
+   * @brief Get frame dimensions from VisionCamera frame data
+   *
+   * @param runtime JSI runtime
+   * @param frameData JSI object containing frame data
+   *
+   * @return cv::Size with frame width and height
+   *
+   * @throws std::runtime_error if width or height properties are missing
+   */
+  static cv::Size getFrameSize(jsi::Runtime &runtime,
+                               const jsi::Object &frameData);
+
+  /**
+   * @brief Check if frame data has nativeBuffer (zero-copy path available)
+   *
+   * @param runtime JSI runtime
+   * @param frameData JSI object containing frame data
+   * @return true if nativeBuffer is available, false otherwise
+   */
+  static bool hasNativeBuffer(jsi::Runtime &runtime,
+                              const jsi::Object &frameData);
+
+private:
+  /**
+   * @brief Extract frame from nativeBuffer pointer (zero-copy)
+   *
+   * @param runtime JSI runtime
+   * @param frameData JSI object with nativeBuffer property
+   * @param width Frame width
+   * @param height Frame height
+   * @return cv::Mat wrapping the native buffer data
+   */
+  static cv::Mat extractFromNativeBuffer(jsi::Runtime &runtime,
+                                         const jsi::Object &frameData,
+                                         int width, int height);
+
+  /**
+   * @brief Extract frame from ArrayBuffer (with copy)
+   *
+   * @param runtime JSI runtime
+   * @param frameData JSI object with data property
+   * @param width Frame width
+   * @param height Frame height
+   * @return cv::Mat containing or wrapping the array buffer data
+   */
+  static cv::Mat extractFromArrayBuffer(jsi::Runtime &runtime,
+                                        const jsi::Object &frameData, int width,
+                                        int height);
+};
+
+} // namespace utils
+} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/src/modules/BaseModule.ts b/packages/react-native-executorch/src/modules/BaseModule.ts
index 6aefc8b2a..315b82249 100644
--- a/packages/react-native-executorch/src/modules/BaseModule.ts
+++ b/packages/react-native-executorch/src/modules/BaseModule.ts
@@ -1,12 +1,68 @@
 import { ResourceSource } from '../types/common';
 import { TensorPtr } from '../types/common';
 
+/**
+ * Base class for all React Native Executorch modules.
+ *
+ * Provides core functionality for loading models, running inference,
+ * and managing native resources.
+ *
+ * @category Base Classes
+ */
 export abstract class BaseModule {
   /**
-   * Native module instance
+   * Native module instance (JSI Host Object)
+   * @internal
    */
   nativeModule: any = null;
 
+  /**
+   * Process a camera frame directly for real-time inference.
+   *
+   * This method is bound to a native JSI function after calling `load()`,
+   * making it worklet-compatible and safe to call from VisionCamera's
+   * frame processor thread.
+   *
+   * **Performance characteristics:**
+   * - **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5,
+   *   frame data is accessed directly without copying (fastest, recommended).
+   * - **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied
+   *   from native to JS, then accessed from native code (slower, fallback).
+   *
+   * **Usage with VisionCamera:**
+   * ```typescript
+   * const frameOutput = useFrameOutput({
+   *   pixelFormat: 'rgb',
+   *   onFrame(frame) {
+   *     'worklet';
+   *     // Zero-copy approach (recommended)
+   *     const nativeBuffer = frame.getNativeBuffer();
+   *     const result = model.generateFromFrame(
+   *       { nativeBuffer: nativeBuffer.pointer, width: frame.width, height: frame.height },
+   *       ...args
+   *     );
+   *     nativeBuffer.release();
+   *     frame.dispose();
+   *   }
+   * });
+   * ```
+   *
+   * @param frameData Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer)
+   * @param args Additional model-specific arguments (e.g., threshold, options)
+   * @returns Model-specific output (e.g., detections, classifications, embeddings)
+   *
+   * @see {@link FrameData} for frame data format details
+   */
+  public generateFromFrame!: (frameData: FrameData, ...args: any[]) => any;
+
+  /**
+   * Load the model and prepare it for inference.
+   *
+   * @param modelSource - Resource location of the model binary
+   * @param onDownloadProgressCallback - Optional callback to monitor download progress (0-1)
+   * @param args - Additional model-specific loading arguments
+   */
+
   abstract load(
     modelSource: ResourceSource,
     onDownloadProgressCallback: (_: number) => void,
@@ -19,6 +75,7 @@ export abstract class BaseModule {
    *
    * @param inputTensor - Array of input tensors.
    * @returns Array of output tensors.
+   * @internal
    */
   protected async forwardET(inputTensor: TensorPtr[]): Promise<TensorPtr[]> {
     return await this.nativeModule.forward(inputTensor);
@@ -36,11 +93,29 @@ export abstract class BaseModule {
   }
 
   /**
-   * Unloads the model from memory.
+   * Unloads the model from memory and releases native resources.
+   *
+   * Always call this method when you're done with a model to prevent memory leaks.
    */
   delete() {
     if (this.nativeModule !== null) {
       this.nativeModule.unload();
     }
   }
+
+  /**
+   * Bind JSI methods to this instance for worklet compatibility.
+   *
+   * This makes native JSI functions accessible from worklet threads,
+   * which is essential for VisionCamera frame processing.
+   *
+   * @internal
+   */
+  protected bindJSIMethods() {
+    if (this.nativeModule && this.nativeModule.generateFromFrame) {
+      // Bind the native JSI method directly to this instance
+      // This makes it worklet-compatible since JSI functions work across threads
+      this.generateFromFrame = this.nativeModule.generateFromFrame;
+    }
+  }
 }
diff --git a/yarn.lock b/yarn.lock
index 90ac56b11..436005c8d 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -13799,7 +13799,16 @@ __metadata:
   languageName: node
   linkType: hard
 
-"semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1, semver@npm:^7.7.3":
+"semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1":
+  version: 7.7.3
+  resolution: "semver@npm:7.7.3"
+  bin:
+    semver: bin/semver.js
+  checksum: 10/8dbc3168e057a38fc322af909c7f5617483c50caddba135439ff09a754b20bdd6482a5123ff543dad4affa488ecf46ec5fb56d61312ad20bb140199b88dfaea9
+  languageName: node
+  linkType: hard
+
+"semver@npm:^7.7.3":
   version: 7.7.4
   resolution: "semver@npm:7.7.4"
   bin:

From 9ce35daccb16772b2803d2d817d193966bde85a5 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 16 Feb 2026 11:22:16 +0100
Subject: [PATCH 05/37] feat: remove unused bindJSIMethods

---
 .../src/modules/BaseModule.ts                    | 16 ----------------
 .../computer_vision/ObjectDetectionModule.ts     |  1 -
 2 files changed, 17 deletions(-)

diff --git a/packages/react-native-executorch/src/modules/BaseModule.ts b/packages/react-native-executorch/src/modules/BaseModule.ts
index 315b82249..0870a30b6 100644
--- a/packages/react-native-executorch/src/modules/BaseModule.ts
+++ b/packages/react-native-executorch/src/modules/BaseModule.ts
@@ -102,20 +102,4 @@ export abstract class BaseModule {
       this.nativeModule.unload();
     }
   }
-
-  /**
-   * Bind JSI methods to this instance for worklet compatibility.
-   *
-   * This makes native JSI functions accessible from worklet threads,
-   * which is essential for VisionCamera frame processing.
-   *
-   * @internal
-   */
-  protected bindJSIMethods() {
-    if (this.nativeModule && this.nativeModule.generateFromFrame) {
-      // Bind the native JSI method directly to this instance
-      // This makes it worklet-compatible since JSI functions work across threads
-      this.generateFromFrame = this.nativeModule.generateFromFrame;
-    }
-  }
 }
diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
index 95b9e436b..78dfed4f6 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
@@ -28,7 +28,6 @@ export class ObjectDetectionModule extends BaseModule {
         onDownloadProgressCallback,
         model.modelSource
       );
-
       if (!paths?.[0]) {
         throw new RnExecutorchError(
           RnExecutorchErrorCode.DownloadInterrupted,

From 66af65c844d9f59301ac73b21dea3c9aafe21784 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Tue, 17 Feb 2026 13:05:14 +0100
Subject: [PATCH 06/37] feat: initial version of vision model API

---
 .cspell-wordlist.txt                          |   1 +
 .../app/object_detection/index.tsx            | 167 ++++++++++++++++-
 .../rnexecutorch/RnExecutorchInstaller.h      |  12 +-
 .../host_objects/ModelHostObject.h            |  21 ++-
 .../metaprogramming/TypeConcepts.h            |   9 +-
 .../rnexecutorch/models/VisionModel.cpp       |  47 ++++-
 .../common/rnexecutorch/models/VisionModel.h  |  42 ++++-
 .../models/embeddings/image/ImageEmbeddings.h |   2 +-
 .../BaseImageSegmentation.h                   |   2 +-
 .../image_segmentation/ImageSegmentation.cpp  | 170 ++++++++++++++++++
 .../models/object_detection/ObjectDetection.h |  18 +-
 .../models/style_transfer/StyleTransfer.h     |   2 +-
 .../tests/integration/ObjectDetectionTest.cpp |  25 +--
 .../computer_vision/useObjectDetection.ts     |   7 +-
 .../src/hooks/useModule.ts                    |  38 ++++
 .../computer_vision/ObjectDetectionModule.ts  |  24 +--
 .../modules/computer_vision/VisionModule.ts   | 154 ++++++++++++++++
 .../src/types/objectDetection.ts              |  73 +++++++-
 18 files changed, 745 insertions(+), 69 deletions(-)
 create mode 100644 packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp
 create mode 100644 packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts

diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt
index 419872562..a2e8ecbab 100644
--- a/.cspell-wordlist.txt
+++ b/.cspell-wordlist.txt
@@ -116,3 +116,4 @@ antonov
 rfdetr
 basemodule
 IMAGENET
+worklet
\ No newline at end of file
diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
index 6a43dd920..9e60589fb 100644
--- a/apps/computer-vision/app/object_detection/index.tsx
+++ b/apps/computer-vision/app/object_detection/index.tsx
@@ -1,16 +1,66 @@
 import Spinner from '../../components/Spinner';
-import { BottomBar } from '../../components/BottomBar';
 import { getImage } from '../../utils';
 import {
   Detection,
   useObjectDetection,
   SSDLITE_320_MOBILENET_V3_LARGE,
 } from 'react-native-executorch';
-import { View, StyleSheet, Image } from 'react-native';
+import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native';
 import ImageWithBboxes from '../../components/ImageWithBboxes';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
+import ColorPalette from '../../colors';
+import { Images } from 'react-native-nitro-image';
+
+// Helper function to convert image URI to raw pixel data using NitroImage
+async function imageUriToPixelData(
+  uri: string,
+  targetWidth: number,
+  targetHeight: number
+): Promise<{
+  data: ArrayBuffer;
+  width: number;
+  height: number;
+  channels: number;
+}> {
+  try {
+    // Load image and resize to target dimensions
+    const image = await Images.loadFromFileAsync(uri);
+    const resized = image.resize(targetWidth, targetHeight);
+
+    // Get pixel data as ArrayBuffer (RGBA format)
+    const pixelData = resized.toRawPixelData();
+    const buffer =
+      pixelData instanceof ArrayBuffer ? pixelData : pixelData.buffer;
+
+    // Calculate actual buffer dimensions (accounts for device pixel ratio)
+    const bufferSize = buffer?.byteLength || 0;
+    const totalPixels = bufferSize / 4; // RGBA = 4 bytes per pixel
+    const aspectRatio = targetWidth / targetHeight;
+    const actualHeight = Math.sqrt(totalPixels / aspectRatio);
+    const actualWidth = totalPixels / actualHeight;
+
+    console.log('Requested:', targetWidth, 'x', targetHeight);
+    console.log('Buffer size:', bufferSize);
+    console.log(
+      'Actual dimensions:',
+      Math.round(actualWidth),
+      'x',
+      Math.round(actualHeight)
+    );
+
+    return {
+      data: buffer,
+      width: Math.round(actualWidth),
+      height: Math.round(actualHeight),
+      channels: 4, // RGBA
+    };
+  } catch (error) {
+    console.error('Error loading image with NitroImage:', error);
+    throw error;
+  }
+}
 
 export default function ObjectDetectionScreen() {
   const [imageUri, setImageUri] = useState('');
@@ -42,10 +92,41 @@ export default function ObjectDetectionScreen() {
   const runForward = async () => {
     if (imageUri) {
       try {
-        const output = await ssdLite.forward(imageUri);
+        console.log('Running forward with string URI...');
+        const output = await ssdLite.forward(imageUri, 0.5);
+        console.log('String URI result:', output.length, 'detections');
         setResults(output);
       } catch (e) {
-        console.error(e);
+        console.error('Error in runForward:', e);
+      }
+    }
+  };
+
+  const runForwardPixels = async () => {
+    if (imageUri && imageDimensions) {
+      try {
+        console.log('Converting image to pixel data...');
+        // Resize to 640x640 to avoid memory issues
+        const intermediateSize = 640;
+        const pixelData = await imageUriToPixelData(
+          imageUri,
+          intermediateSize,
+          intermediateSize
+        );
+
+        console.log('Running forward with pixel data...', {
+          width: pixelData.width,
+          height: pixelData.height,
+          channels: pixelData.channels,
+          dataSize: pixelData.data.byteLength,
+        });
+
+        // Run inference using unified forward() API
+        const output = await ssdLite.forward(pixelData, 0.5);
+        console.log('Pixel data result:', output.length, 'detections');
+        setResults(output);
+      } catch (e) {
+        console.error('Error in runForwardPixels:', e);
       }
     }
   };
@@ -81,10 +162,41 @@ export default function ObjectDetectionScreen() {
           )}
         </View>
       </View>
-      <BottomBar
-        handleCameraPress={handleCameraPress}
-        runForward={runForward}
-      />
+
+      {/* Custom bottom bar with two buttons */}
+      <View style={styles.bottomContainer}>
+        <View style={styles.bottomIconsContainer}>
+          <TouchableOpacity onPress={() => handleCameraPress(false)}>
+            <Text style={styles.iconText}>📷 Gallery</Text>
+          </TouchableOpacity>
+        </View>
+
+        <View style={styles.buttonsRow}>
+          <TouchableOpacity
+            style={[
+              styles.button,
+              styles.halfButton,
+              !imageUri && styles.buttonDisabled,
+            ]}
+            onPress={runForward}
+            disabled={!imageUri}
+          >
+            <Text style={styles.buttonText}>Run (String)</Text>
+          </TouchableOpacity>
+
+          <TouchableOpacity
+            style={[
+              styles.button,
+              styles.halfButton,
+              !imageUri && styles.buttonDisabled,
+            ]}
+            onPress={runForwardPixels}
+            disabled={!imageUri}
+          >
+            <Text style={styles.buttonText}>Run (Pixels)</Text>
+          </TouchableOpacity>
+        </View>
+      </View>
     </ScreenWrapper>
   );
 }
@@ -129,4 +241,43 @@ const styles = StyleSheet.create({
     width: '100%',
     height: '100%',
   },
+  bottomContainer: {
+    width: '100%',
+    gap: 15,
+    alignItems: 'center',
+    padding: 16,
+    flex: 1,
+  },
+  bottomIconsContainer: {
+    flexDirection: 'row',
+    justifyContent: 'center',
+    width: '100%',
+  },
+  iconText: {
+    fontSize: 16,
+    color: ColorPalette.primary,
+  },
+  buttonsRow: {
+    flexDirection: 'row',
+    width: '100%',
+    gap: 10,
+  },
+  button: {
+    height: 50,
+    justifyContent: 'center',
+    alignItems: 'center',
+    backgroundColor: ColorPalette.primary,
+    color: '#fff',
+    borderRadius: 8,
+  },
+  halfButton: {
+    flex: 1,
+  },
+  buttonDisabled: {
+    opacity: 0.5,
+  },
+  buttonText: {
+    color: '#fff',
+    fontSize: 16,
+  },
 });
diff --git a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h
index d5c98763d..80b7d18b3 100644
--- a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h
+++ b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h
@@ -54,8 +54,16 @@ class RnExecutorchInstaller {
                 meta::createConstructorArgsWithCallInvoker<ModelT>(
                     args, runtime, jsCallInvoker);
 
-            auto modelImplementationPtr = std::make_shared<ModelT>(
-                std::make_from_tuple<ModelT>(constructorArgs));
+            // This unpacks the tuple and calls the constructor directly inside
+            // make_shared. It avoids creating a temporary object, so no
+            // move/copy is required.
+            auto modelImplementationPtr = std::apply(
+                [](auto &&...unpackedArgs) {
+                  return std::make_shared<ModelT>(
+                      std::forward<decltype(unpackedArgs)>(unpackedArgs)...);
+                },
+                std::move(constructorArgs));
+
             auto modelHostObject = std::make_shared<ModelHostObject<ModelT>>(
                 modelImplementationPtr, jsCallInvoker);
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
index 7712b2b9d..9a2e6776e 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -45,10 +45,11 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
           "getInputShape"));
     }
 
-    if constexpr (meta::HasGenerate<Model>) {
-      addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
-                                       promiseHostFunction<&Model::generate>,
-                                       "generate"));
+    if constexpr (meta::HasGenerateFromString<Model>) {
+      addFunctions(
+          JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                              promiseHostFunction<&Model::generateFromString>,
+                              "generateFromString"));
     }
 
     if constexpr (meta::HasEncode<Model>) {
@@ -168,10 +169,22 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
       addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
                                        promiseHostFunction<&Model::stream>,
                                        "stream"));
+    }
+
+    // Register generateFromFrame for all VisionModel subclasses
+    if constexpr (meta::DerivedFromOrSameAs<Model, models::VisionModel>) {
       addFunctions(JSI_EXPORT_FUNCTION(
           ModelHostObject<Model>, synchronousHostFunction<&Model::streamStop>,
           "streamStop"));
     }
+
+    // Register generateFromPixels for models that support it
+    if constexpr (meta::HasGenerateFromPixels<Model>) {
+      addFunctions(
+          JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                              visionHostFunction<&Model::generateFromPixels>,
+                              "generateFromPixels"));
+    }
   }
 
   // A generic host function that runs synchronously, works analogously to the
diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
index 85a3db449..8100a471b 100644
--- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
+++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
@@ -12,8 +12,13 @@ template <typename T, typename Base>
 concept SameAs = std::is_same_v<Base, T>;
 
 template <typename T>
-concept HasGenerate = requires(T t) {
-  { &T::generate };
+concept HasGenerateFromString = requires(T t) {
+  { &T::generateFromString };
+};
+
+template <typename T>
+concept HasGenerateFromPixels = requires(T t) {
+  { &T::generateFromPixels };
 };
 
 template <typename T>
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
index 671ed03c8..54c0adfd2 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
@@ -6,8 +6,8 @@ namespace models {
 
 using namespace facebook;
 
-cv::Mat VisionModel::extractAndPreprocess(jsi::Runtime &runtime,
-                                          const jsi::Value &frameData) const {
+cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
+                                      const jsi::Value &frameData) const {
   // Extract frame using FrameProcessor utility
   auto frameObj = frameData.asObject(runtime);
   cv::Mat frame = utils::FrameProcessor::extractFrame(runtime, frameObj);
@@ -16,5 +16,48 @@ cv::Mat VisionModel::extractAndPreprocess(jsi::Runtime &runtime,
   return preprocessFrame(frame);
 }
 
+cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime,
+                                       const jsi::Object &pixelData) const {
+  // Extract width, height, and channels
+  if (!pixelData.hasProperty(runtime, "width") ||
+      !pixelData.hasProperty(runtime, "height") ||
+      !pixelData.hasProperty(runtime, "channels") ||
+      !pixelData.hasProperty(runtime, "data")) {
+    throw std::runtime_error(
+        "Invalid pixel data: must contain width, height, channels, and data");
+  }
+
+  int width = pixelData.getProperty(runtime, "width").asNumber();
+  int height = pixelData.getProperty(runtime, "height").asNumber();
+  int channels = pixelData.getProperty(runtime, "channels").asNumber();
+
+  // Get the ArrayBuffer
+  auto dataValue = pixelData.getProperty(runtime, "data");
+  if (!dataValue.isObject() ||
+      !dataValue.asObject(runtime).isArrayBuffer(runtime)) {
+    throw std::runtime_error(
+        "pixel data 'data' property must be an ArrayBuffer");
+  }
+
+  auto arrayBuffer = dataValue.asObject(runtime).getArrayBuffer(runtime);
+  size_t expectedSize = width * height * channels;
+
+  if (arrayBuffer.size(runtime) != expectedSize) {
+    throw std::runtime_error(
+        "ArrayBuffer size does not match width * height * channels");
+  }
+
+  // Create cv::Mat and copy the data
+  // OpenCV uses BGR/BGRA format internally, but we'll create as-is and let
+  // preprocessFrame handle conversion
+  int cvType = (channels == 3) ? CV_8UC3 : CV_8UC4;
+  cv::Mat image(height, width, cvType);
+
+  // Copy data from ArrayBuffer to cv::Mat
+  std::memcpy(image.data, arrayBuffer.data(runtime), expectedSize);
+
+  return image;
+}
+
 } // namespace models
 } // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
index 11da49547..9ba5cf7e4 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
@@ -121,12 +121,48 @@ class VisionModel : public BaseModel {
    * responsible
    * @note Typical usage:
    * @code
-   *   cv::Mat preprocessed = extractAndPreprocess(runtime, frameData);
+   *   cv::Mat preprocessed = extractFromFrame(runtime, frameData);
    *   auto tensor = image_processing::getTensorFromMatrix(dims, preprocessed);
    * @endcode
    */
-  cv::Mat extractAndPreprocess(jsi::Runtime &runtime,
-                               const jsi::Value &frameData) const;
+  cv::Mat extractFromFrame(jsi::Runtime &runtime,
+                           const jsi::Value &frameData) const;
+
+  /**
+   * @brief Extract cv::Mat from raw pixel data (ArrayBuffer) sent from
+   * JavaScript
+   *
+   * This method enables users to run inference on raw pixel data without file
+   * I/O. Useful for processing images already in memory (e.g., from canvas,
+   * image library).
+   *
+   * @param runtime JSI runtime
+   * @param pixelData JSI object containing:
+   *                  - data: ArrayBuffer with raw pixel values
+   *                  - width: number - image width
+   *                  - height: number - image height
+   *                  - channels: number - number of channels (3 for RGB, 4 for
+   * RGBA)
+   *
+   * @return cv::Mat containing the pixel data
+   *
+   * @throws std::runtime_error if pixelData format is invalid
+   *
+   * @note The returned cv::Mat owns a copy of the data
+   * @note Expected pixel format: RGB or RGBA, row-major order
+   * @note Typical usage from JS:
+   * @code
+   *   const pixels = new Uint8Array([...]);  // Raw pixel data
+   *   const result = model.generateFromPixels({
+   *     data: pixels.buffer,
+   *     width: 640,
+   *     height: 480,
+   *     channels: 3
+   *   }, 0.5);
+   * @endcode
+   */
+  cv::Mat extractFromPixels(jsi::Runtime &runtime,
+                            const jsi::Object &pixelData) const;
 };
 
 } // namespace models
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
index 7e114e939..9a1d6429b 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
@@ -27,4 +27,4 @@ class ImageEmbeddings final : public BaseEmbeddings {
 
 REGISTER_CONSTRUCTOR(models::embeddings::ImageEmbeddings, std::string,
                      std::shared_ptr<react::CallInvoker>);
-} // namespace rnexecutorch
+} // namespace rnexecutorch
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
index f46f41d69..34ad8dffd 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
@@ -58,4 +58,4 @@ class BaseImageSegmentation : public BaseModel {
 REGISTER_CONSTRUCTOR(models::image_segmentation::BaseImageSegmentation,
                      std::string, std::vector<float>, std::vector<float>,
                      std::shared_ptr<react::CallInvoker>);
-} // namespace rnexecutorch
+} // namespace rnexecutorch
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp
new file mode 100644
index 000000000..08f2a4683
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp
@@ -0,0 +1,170 @@
+#include "ImageSegmentation.h"
+
+#include <future>
+
+#include <executorch/extension/tensor/tensor.h>
+#include <rnexecutorch/Error.h>
+#include <rnexecutorch/data_processing/ImageProcessing.h>
+#include <rnexecutorch/data_processing/Numerical.h>
+#include <rnexecutorch/host_objects/JsiConversions.h>
+#include <rnexecutorch/models/image_segmentation/Constants.h>
+
+namespace rnexecutorch::models::image_segmentation {
+
+ImageSegmentation::ImageSegmentation(
+    const std::string &modelSource,
+    std::shared_ptr<react::CallInvoker> callInvoker)
+    : BaseModel(modelSource, callInvoker) {
+  auto inputShapes = getAllInputShapes();
+  if (inputShapes.size() == 0) {
+    throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
+                            "Model seems to not take any input tensors.");
+  }
+  std::vector<int32_t> modelInputShape = inputShapes[0];
+  if (modelInputShape.size() < 2) {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Unexpected model input size, expected at least 2 dimentions "
+                  "but got: %zu.",
+                  modelInputShape.size());
+    throw RnExecutorchError(RnExecutorchErrorCode::WrongDimensions,
+                            errorMessage);
+  }
+  modelImageSize = cv::Size(modelInputShape[modelInputShape.size() - 1],
+                            modelInputShape[modelInputShape.size() - 2]);
+  numModelPixels = modelImageSize.area();
+}
+
+std::shared_ptr<jsi::Object> ImageSegmentation::generate(
+    std::string imageSource,
+    std::set<std::string, std::less<>> classesOfInterest, bool resize) {
+  auto [inputTensor, originalSize] =
+      image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]);
+
+  auto forwardResult = BaseModel::forward(inputTensor);
+  if (!forwardResult.ok()) {
+    throw RnExecutorchError(forwardResult.error(),
+                            "The model's forward function did not succeed. "
+                            "Ensure the model input is correct.");
+  }
+
+  return postprocess(forwardResult->at(0).toTensor(), originalSize,
+                     classesOfInterest, resize);
+}
+
+std::shared_ptr<jsi::Object> ImageSegmentation::postprocess(
+    const Tensor &tensor, cv::Size originalSize,
+    std::set<std::string, std::less<>> classesOfInterest, bool resize) {
+
+  auto dataPtr = static_cast<const float *>(tensor.const_data_ptr());
+  auto resultData = std::span(dataPtr, tensor.numel());
+
+  // We copy the ET-owned data to jsi array buffers that can be directly
+  // returned to JS
+  std::vector<std::shared_ptr<OwningArrayBuffer>> resultClasses;
+  resultClasses.reserve(numClasses);
+  for (std::size_t cl = 0; cl < numClasses; ++cl) {
+    auto classBuffer = std::make_shared<OwningArrayBuffer>(
+        &resultData[cl * numModelPixels], numModelPixels * sizeof(float));
+    resultClasses.push_back(classBuffer);
+  }
+
+  // Apply softmax per each pixel across all classes
+  for (std::size_t pixel = 0; pixel < numModelPixels; ++pixel) {
+    std::vector<float> classValues(numClasses);
+    for (std::size_t cl = 0; cl < numClasses; ++cl) {
+      classValues[cl] =
+          reinterpret_cast<float *>(resultClasses[cl]->data())[pixel];
+    }
+    numerical::softmax(classValues);
+    for (std::size_t cl = 0; cl < numClasses; ++cl) {
+      reinterpret_cast<float *>(resultClasses[cl]->data())[pixel] =
+          classValues[cl];
+    }
+  }
+
+  // Calculate the maximum class for each pixel
+  auto argmax =
+      std::make_shared<OwningArrayBuffer>(numModelPixels * sizeof(int32_t));
+  for (std::size_t pixel = 0; pixel < numModelPixels; ++pixel) {
+    float max = reinterpret_cast<float *>(resultClasses[0]->data())[pixel];
+    int maxInd = 0;
+    for (int cl = 1; cl < numClasses; ++cl) {
+      if (reinterpret_cast<float *>(resultClasses[cl]->data())[pixel] > max) {
+        maxInd = cl;
+        max = reinterpret_cast<float *>(resultClasses[cl]->data())[pixel];
+      }
+    }
+    reinterpret_cast<int32_t *>(argmax->data())[pixel] = maxInd;
+  }
+
+  auto buffersToReturn = std::make_shared<std::unordered_map<
+      std::string_view, std::shared_ptr<OwningArrayBuffer>>>();
+  for (std::size_t cl = 0; cl < numClasses; ++cl) {
+    if (classesOfInterest.contains(constants::kDeeplabV3Resnet50Labels[cl])) {
+      (*buffersToReturn)[constants::kDeeplabV3Resnet50Labels[cl]] =
+          resultClasses[cl];
+    }
+  }
+
+  // Resize selected classes and argmax
+  if (resize) {
+    cv::Mat argmaxMat(modelImageSize, CV_32SC1, argmax->data());
+    cv::resize(argmaxMat, argmaxMat, originalSize, 0, 0,
+               cv::InterpolationFlags::INTER_NEAREST);
+    argmax = std::make_shared<OwningArrayBuffer>(
+        argmaxMat.data, originalSize.area() * sizeof(int32_t));
+
+    for (auto &[label, arrayBuffer] : *buffersToReturn) {
+      cv::Mat classMat(modelImageSize, CV_32FC1, arrayBuffer->data());
+      cv::resize(classMat, classMat, originalSize);
+      arrayBuffer = std::make_shared<OwningArrayBuffer>(
+          classMat.data, originalSize.area() * sizeof(float));
+    }
+  }
+  return populateDictionary(argmax, buffersToReturn);
+}
+
+std::shared_ptr<jsi::Object> ImageSegmentation::populateDictionary(
+    std::shared_ptr<OwningArrayBuffer> argmax,
+    std::shared_ptr<std::unordered_map<std::string_view,
+                                       std::shared_ptr<OwningArrayBuffer>>>
+        classesToOutput) {
+  // Synchronize the invoked thread to return when the dict is constructed
+  auto promisePtr = std::make_shared<std::promise<void>>();
+  std::future<void> doneFuture = promisePtr->get_future();
+
+  std::shared_ptr<jsi::Object> dictPtr = nullptr;
+  callInvoker->invokeAsync(
+      [argmax, classesToOutput, &dictPtr, promisePtr](jsi::Runtime &runtime) {
+        dictPtr = std::make_shared<jsi::Object>(runtime);
+        auto argmaxArrayBuffer = jsi::ArrayBuffer(runtime, argmax);
+
+        auto int32ArrayCtor =
+            runtime.global().getPropertyAsFunction(runtime, "Int32Array");
+        auto int32Array =
+            int32ArrayCtor.callAsConstructor(runtime, argmaxArrayBuffer)
+                .getObject(runtime);
+        dictPtr->setProperty(runtime, "ARGMAX", int32Array);
+
+        for (auto &[classLabel, owningBuffer] : *classesToOutput) {
+          auto classArrayBuffer = jsi::ArrayBuffer(runtime, owningBuffer);
+
+          auto float32ArrayCtor =
+              runtime.global().getPropertyAsFunction(runtime, "Float32Array");
+          auto float32Array =
+              float32ArrayCtor.callAsConstructor(runtime, classArrayBuffer)
+                  .getObject(runtime);
+
+          dictPtr->setProperty(
+              runtime, jsi::String::createFromAscii(runtime, classLabel.data()),
+              float32Array);
+        }
+        promisePtr->set_value();
+      });
+
+  doneFuture.wait();
+  return dictPtr;
+}
+
+} // namespace rnexecutorch::models::image_segmentation
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h
index bba09a6d8..fc554003b 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h
@@ -8,7 +8,7 @@
 
 #include "Types.h"
 #include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
-#include <rnexecutorch/models/BaseModel.h>
+#include <rnexecutorch/models/VisionModel.h>
 #include <rnexecutorch/models/object_detection/Utils.h>
 
 namespace rnexecutorch {
@@ -16,12 +16,24 @@ namespace models::object_detection {
 using executorch::extension::TensorPtr;
 using executorch::runtime::EValue;
 
-class ObjectDetection : public BaseModel {
+class ObjectDetection : public VisionModel {
 public:
   ObjectDetection(const std::string &modelSource,
                   std::shared_ptr<react::CallInvoker> callInvoker);
   [[nodiscard("Registered non-void function")]] std::vector<types::Detection>
-  generate(std::string imageSource, double detectionThreshold);
+  generateFromString(std::string imageSource, double detectionThreshold);
+  [[nodiscard("Registered non-void function")]] std::vector<types::Detection>
+  generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData,
+                    double detectionThreshold);
+  [[nodiscard("Registered non-void function")]] std::vector<types::Detection>
+  generateFromPixels(jsi::Runtime &runtime, const jsi::Value &pixelData,
+                     double detectionThreshold);
+
+protected:
+  // Internal helper for shared preprocessing and inference logic
+  std::vector<types::Detection> runInference(cv::Mat image,
+                                             double detectionThreshold);
+  cv::Mat preprocessFrame(const cv::Mat &frame) const override;
 
 private:
   std::vector<types::Detection> postprocess(const std::vector<EValue> &tensors,
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
index 73744c4d8..8eed3c888 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
@@ -33,4 +33,4 @@ class StyleTransfer : public BaseModel {
 
 REGISTER_CONSTRUCTOR(models::style_transfer::StyleTransfer, std::string,
                      std::shared_ptr<react::CallInvoker>);
-} // namespace rnexecutorch
+} // namespace rnexecutorch
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp
index ae80208a6..074ee0751 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp
@@ -29,7 +29,7 @@ template <> struct ModelTraits<ObjectDetection> {
   }
 
   static void callGenerate(ModelType &model) {
-    (void)model.generate(kValidTestImagePath, 0.5);
+    (void)model.generateFromString(kValidTestImagePath, 0.5);
   }
 };
 } // namespace model_tests
@@ -43,49 +43,50 @@ INSTANTIATE_TYPED_TEST_SUITE_P(ObjectDetection, CommonModelTest,
 // ============================================================================
 TEST(ObjectDetectionGenerateTests, InvalidImagePathThrows) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  EXPECT_THROW((void)model.generate("nonexistent_image.jpg", 0.5),
+  EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg", 0.5),
                RnExecutorchError);
 }
 
 TEST(ObjectDetectionGenerateTests, EmptyImagePathThrows) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  EXPECT_THROW((void)model.generate("", 0.5), RnExecutorchError);
+  EXPECT_THROW((void)model.generateFromString("", 0.5), RnExecutorchError);
 }
 
 TEST(ObjectDetectionGenerateTests, MalformedURIThrows) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  EXPECT_THROW((void)model.generate("not_a_valid_uri://bad", 0.5),
+  EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad", 0.5),
                RnExecutorchError);
 }
 
 TEST(ObjectDetectionGenerateTests, NegativeThresholdThrows) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  EXPECT_THROW((void)model.generate(kValidTestImagePath, -0.1),
+  EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, -0.1),
                RnExecutorchError);
 }
 
 TEST(ObjectDetectionGenerateTests, ThresholdAboveOneThrows) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  EXPECT_THROW((void)model.generate(kValidTestImagePath, 1.1),
+  EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, 1.1),
                RnExecutorchError);
 }
 
 TEST(ObjectDetectionGenerateTests, ValidImageReturnsResults) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  auto results = model.generate(kValidTestImagePath, 0.3);
+  auto results = model.generateFromString(kValidTestImagePath, 0.3);
   EXPECT_GE(results.size(), 0u);
 }
 
 TEST(ObjectDetectionGenerateTests, HighThresholdReturnsFewerResults) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  auto lowThresholdResults = model.generate(kValidTestImagePath, 0.1);
-  auto highThresholdResults = model.generate(kValidTestImagePath, 0.9);
+  auto lowThresholdResults = model.generateFromString(kValidTestImagePath, 0.1);
+  auto highThresholdResults =
+      model.generateFromString(kValidTestImagePath, 0.9);
   EXPECT_GE(lowThresholdResults.size(), highThresholdResults.size());
 }
 
 TEST(ObjectDetectionGenerateTests, DetectionsHaveValidBoundingBoxes) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  auto results = model.generate(kValidTestImagePath, 0.3);
+  auto results = model.generateFromString(kValidTestImagePath, 0.3);
 
   for (const auto &detection : results) {
     EXPECT_LE(detection.x1, detection.x2);
@@ -97,7 +98,7 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidBoundingBoxes) {
 
 TEST(ObjectDetectionGenerateTests, DetectionsHaveValidScores) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  auto results = model.generate(kValidTestImagePath, 0.3);
+  auto results = model.generateFromString(kValidTestImagePath, 0.3);
 
   for (const auto &detection : results) {
     EXPECT_GE(detection.score, 0.0f);
@@ -107,7 +108,7 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidScores) {
 
 TEST(ObjectDetectionGenerateTests, DetectionsHaveValidLabels) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  auto results = model.generate(kValidTestImagePath, 0.3);
+  auto results = model.generateFromString(kValidTestImagePath, 0.3);
 
   for (const auto &detection : results) {
     EXPECT_GE(detection.label, 0);
diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts b/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts
index 2d52eb706..845f1aa23 100644
--- a/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts
+++ b/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts
@@ -15,9 +15,10 @@ import {
 export const useObjectDetection = ({
   model,
   preventLoad = false,
-}: ObjectDetectionProps): ObjectDetectionType =>
-  useModule({
+}: ObjectDetectionProps): ObjectDetectionType => {
+  return useModule({
     module: ObjectDetectionModule,
     model,
     preventLoad: preventLoad,
-  });
+  }) as ObjectDetectionType;
+};
diff --git a/packages/react-native-executorch/src/hooks/useModule.ts b/packages/react-native-executorch/src/hooks/useModule.ts
index 1a35885d5..624094afb 100644
--- a/packages/react-native-executorch/src/hooks/useModule.ts
+++ b/packages/react-native-executorch/src/hooks/useModule.ts
@@ -6,6 +6,7 @@ interface Module {
   load: (...args: any[]) => Promise<void>;
   forward: (...args: any[]) => Promise<any>;
   delete: () => void;
+  nativeModule?: any; // JSI host object with native methods
 }
 
 interface ModuleConstructor<M extends Module> {
@@ -31,6 +32,7 @@ export const useModule = <
   const [isGenerating, setIsGenerating] = useState(false);
   const [downloadProgress, setDownloadProgress] = useState(0);
   const [moduleInstance] = useState(() => new module());
+  const [runOnFrame, setRunOnFrame] = useState<any>(null);
 
   useEffect(() => {
     if (preventLoad) return;
@@ -46,6 +48,15 @@ export const useModule = <
           if (isMounted) setDownloadProgress(progress);
         });
         if (isMounted) setIsReady(true);
+
+        // Extract runOnFrame worklet from VisionModule if available
+        // Use "state trick" to make the worklet serializable for VisionCamera
+        if ('runOnFrame' in moduleInstance) {
+          const worklet = moduleInstance.runOnFrame;
+          if (worklet) {
+            setRunOnFrame(() => worklet);
+          }
+        }
       } catch (err) {
         if (isMounted) setError(parseUnknownError(err));
       }
@@ -99,5 +110,32 @@ export const useModule = <
      */
     downloadProgress,
     forward,
+
+    /**
+     * Synchronous worklet function for real-time VisionCamera frame processing.
+     * Automatically handles native buffer extraction and cleanup.
+     *
+     * Only available for Computer Vision modules that support real-time frame processing
+     * (e.g., ObjectDetection, Classification, ImageSegmentation).
+     * Returns `null` if the module doesn't implement frame processing.
+     *
+     * **Use this for VisionCamera frame processing in worklets.**
+     * For async processing, use `forward()` instead.
+     *
+     * @example
+     * ```typescript
+     * const { runOnFrame } = useObjectDetection({ model: MODEL });
+     *
+     * const frameOutput = useFrameOutput({
+     *   onFrame(frame) {
+     *     'worklet';
+     *     if (!runOnFrame) return;
+     *     const detections = runOnFrame(frame, 0.5);
+     *     frame.dispose();
+     *   }
+     * });
+     * ```
+     */
+    runOnFrame,
   };
 };
diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
index 78dfed4f6..0818d9682 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
@@ -3,15 +3,15 @@ import { ResourceSource } from '../../types/common';
 import { Detection } from '../../types/objectDetection';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
 import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
-import { BaseModule } from '../BaseModule';
 import { Logger } from '../../common/Logger';
+import { VisionModule } from './VisionModule';
 
 /**
  * Module for object detection tasks.
  *
  * @category Typescript API
  */
-export class ObjectDetectionModule extends BaseModule {
+export class ObjectDetectionModule extends VisionModule<Detection[]> {
   /**
    * Loads the model, where `modelSource` is a string that specifies the location of the model binary.
    * To track the download progress, supply a callback function `onDownloadProgressCallback`.
@@ -41,24 +41,4 @@ export class ObjectDetectionModule extends BaseModule {
       throw parseUnknownError(error);
     }
   }
-
-  /**
-   * Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string.
-   * `detectionThreshold` can be supplied to alter the sensitivity of the detection.
-   *
-   * @param imageSource - The image source to be processed.
-   * @param detectionThreshold - The threshold for detection sensitivity. Default is 0.7.
-   * @returns An array of Detection objects representing detected items in the image.
-   */
-  async forward(
-    imageSource: string,
-    detectionThreshold: number = 0.7
-  ): Promise<Detection[]> {
-    if (this.nativeModule == null)
-      throw new RnExecutorchError(
-        RnExecutorchErrorCode.ModuleNotLoaded,
-        'The model is currently not loaded. Please load the model before calling forward().'
-      );
-    return await this.nativeModule.generate(imageSource, detectionThreshold);
-  }
 }
diff --git a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
new file mode 100644
index 000000000..06acf6654
--- /dev/null
+++ b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
@@ -0,0 +1,154 @@
+import { BaseModule } from '../BaseModule';
+import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
+import { RnExecutorchError } from '../../errors/errorUtils';
+
+/**
+ * Raw pixel data for vision model inference.
+ */
+export type PixelData = {
+  data: ArrayBuffer;
+  width: number;
+  height: number;
+  channels: number;
+};
+
+/**
+ * VisionCamera Frame object for real-time processing.
+ */
+export type Frame = {
+  getNativeBuffer(): { pointer: number; release(): void };
+  width: number;
+  height: number;
+};
+
+/**
+ * Base class for computer vision models that support multiple input types.
+ *
+ * VisionModule extends BaseModule with:
+ * - Unified `forward()` API accepting string paths or raw pixel data
+ * - `runOnFrame` getter for real-time VisionCamera frame processing
+ * - Shared frame processor creation logic
+ *
+ * Subclasses should only implement model-specific loading logic.
+ *
+ * @category Typescript API
+ */
+export abstract class VisionModule<TOutput> extends BaseModule {
+  /**
+   * Synchronous worklet function for real-time VisionCamera frame processing.
+   *
+   * Only available after the model is loaded. Returns null if not loaded.
+   *
+   * **Use this for VisionCamera frame processing in worklets.**
+   * For async processing, use `forward()` instead.
+   *
+   * @example
+   * ```typescript
+   * const model = new ClassificationModule();
+   * await model.load({ modelSource: MODEL });
+   *
+   * const frameOutput = useFrameOutput({
+   *   onFrame(frame) {
+   *     'worklet';
+   *     if (!model.runOnFrame) return;
+   *     const result = model.runOnFrame(frame);
+   *     frame.dispose();
+   *   }
+   * });
+   * ```
+   */
+  get runOnFrame(): ((frame: Frame, ...args: any[]) => TOutput) | null {
+    if (!this.nativeModule?.generateFromFrame) {
+      return null;
+    }
+
+    // Extract pure JSI function reference (runs on JS thread)
+    const nativeGenerateFromFrame = this.nativeModule.generateFromFrame;
+
+    // Return worklet that captures ONLY the JSI function
+    return (frame: any, ...args: any[]): TOutput => {
+      'worklet';
+
+      let nativeBuffer: any = null;
+      try {
+        nativeBuffer = frame.getNativeBuffer();
+        const frameData = {
+          nativeBuffer: nativeBuffer.pointer,
+          width: frame.width,
+          height: frame.height,
+        };
+        return nativeGenerateFromFrame(frameData, ...args);
+      } finally {
+        if (nativeBuffer?.release) {
+          nativeBuffer.release();
+        }
+      }
+    };
+  }
+
+  /**
+   * Executes the model's forward pass with automatic input type detection.
+   *
+   * Supports two input types:
+   * 1. **String path/URI**: File path, URL, or Base64-encoded string
+   * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
+   *
+   * **Note**: For VisionCamera frame processing, use `forwardSync` instead.
+   * This method is async and cannot be called in worklet context.
+   *
+   * @param input - Image source (string path or PixelData object)
+   * @param args - Additional model-specific arguments
+   * @returns A Promise that resolves to the model output.
+   *
+   * @example
+   * ```typescript
+   * // String path (async)
+   * const result1 = await model.forward('file:///path/to/image.jpg');
+   *
+   * // Pixel data (async)
+   * const result2 = await model.forward({
+   *   data: pixelBuffer,
+   *   width: 640,
+   *   height: 480,
+   *   channels: 3
+   * });
+   *
+   * // For VisionCamera frames, use runOnFrame in worklet:
+   * const frameOutput = useFrameOutput({
+   *   onFrame(frame) {
+   *     'worklet';
+   *     if (!model.runOnFrame) return;
+   *     const result = model.runOnFrame(frame);
+   *   }
+   * });
+   * ```
+   */
+  async forward(input: string | PixelData, ...args: any[]): Promise<TOutput> {
+    if (this.nativeModule == null)
+      throw new RnExecutorchError(
+        RnExecutorchErrorCode.ModuleNotLoaded,
+        'The model is currently not loaded. Please load the model before calling forward().'
+      );
+
+    // Type detection and routing
+    if (typeof input === 'string') {
+      // String path → generateFromString()
+      return await this.nativeModule.generateFromString(input, ...args);
+    } else if (
+      typeof input === 'object' &&
+      'data' in input &&
+      input.data instanceof ArrayBuffer &&
+      typeof input.width === 'number' &&
+      typeof input.height === 'number' &&
+      typeof input.channels === 'number'
+    ) {
+      // Pixel data → generateFromPixels()
+      return await this.nativeModule.generateFromPixels(input, ...args);
+    } else {
+      throw new RnExecutorchError(
+        RnExecutorchErrorCode.InvalidArgument,
+        'Invalid input: expected string path or PixelData object. For VisionCamera frames, use runOnFrame instead.'
+      );
+    }
+  }
+}
diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts
index 94f7cf5c0..2dddaad64 100644
--- a/packages/react-native-executorch/src/types/objectDetection.ts
+++ b/packages/react-native-executorch/src/types/objectDetection.ts
@@ -170,14 +170,77 @@ export interface ObjectDetectionType {
   downloadProgress: number;
 
   /**
-   * Executes the model's forward pass to detect objects within the provided image.
-   * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed.
-   * @param detectionThreshold - An optional number between 0 and 1 representing the minimum confidence score required for an object to be included in the results. Default is 0.7.
-   * @returns A Promise that resolves to an array of `Detection` objects, where each object typically contains bounding box coordinates, a class label, and a confidence score.
+   * Executes the model's forward pass with automatic input type detection.
+   *
+   * Supports two input types:
+   * 1. **String path/URI**: File path, URL, or Base64-encoded string
+   * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
+   *
+   * **Note**: For VisionCamera frame processing, use `processFrame` instead.
+   *
+   * @param input - Image source (string or PixelData object)
+   * @param detectionThreshold - An optional number between 0 and 1 representing the minimum confidence score. Default is 0.7.
+   * @returns A Promise that resolves to an array of `Detection` objects.
    * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image.
+   *
+   * @example
+   * ```typescript
+   * // String path
+   * const detections1 = await model.forward('file:///path/to/image.jpg');
+   *
+   * // Pixel data
+   * const detections2 = await model.forward({
+   *   data: pixelBuffer,
+   *   width: 640,
+   *   height: 480,
+   *   channels: 3
+   * });
+   * ```
    */
   forward: (
-    imageSource: string,
+    input:
+      | string
+      | {
+          data: ArrayBuffer;
+          width: number;
+          height: number;
+          channels: number;
+        },
     detectionThreshold?: number
   ) => Promise<Detection[]>;
+
+  /**
+   * Synchronous worklet function for real-time VisionCamera frame processing.
+   * Automatically handles native buffer extraction and cleanup.
+   *
+   * **Use this for VisionCamera frame processing in worklets.**
+   * For async processing, use `forward()` instead.
+   *
+   * Available after model is loaded (`isReady: true`).
+   *
+   * @example
+   * ```typescript
+   * const { runOnFrame, isReady } = useObjectDetection({ model: MODEL });
+   *
+   * const frameOutput = useFrameOutput({
+   *   onFrame(frame) {
+   *     'worklet';
+   *     if (!runOnFrame) return;
+   *     const detections = runOnFrame(frame, 0.5);
+   *     frame.dispose();
+   *   }
+   * });
+   * ```
+   *
+   * @param frame - VisionCamera Frame object
+   * @param detectionThreshold - The threshold for detection sensitivity. Default is 0.7.
+   * @returns Array of Detection objects representing detected items in the frame.
+   */
+  runOnFrame: ((frame: any, detectionThreshold?: number) => Detection[]) | null;
+
+  /**
+   * Direct reference to the module instance for advanced use cases.
+   * Most users should use `forward()` for async processing or `runOnFrame` for real-time frame processing.
+   */
+  moduleInstance: any;
 }

From 6e413ac5c36d39f6f0489487060c09a46b29dbb1 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Tue, 17 Feb 2026 17:51:10 +0100
Subject: [PATCH 07/37] refactor: errors, logs, unnecessary comments, use
 existing TensorPtr

---
 .../app/object_detection/index.tsx            | 61 +++++++-------
 apps/computer-vision/package.json             |  9 ++-
 .../host_objects/JsiConversions.h             | 19 +++++
 .../host_objects/ModelHostObject.h            | 62 +++++++++++++-
 .../rnexecutorch/models/VisionModel.cpp       | 69 +++++++++-------
 .../rnexecutorch/utils/FrameExtractor.cpp     | 63 +++++----------
 .../rnexecutorch/utils/FrameProcessor.cpp     | 80 +++++--------------
 .../rnexecutorch/utils/FrameProcessor.h       | 14 ++--
 .../src/hooks/useModule.ts                    |  1 -
 .../modules/computer_vision/VisionModule.ts   | 41 +++-------
 .../src/types/common.ts                       | 59 +++++++++-----
 .../src/types/objectDetection.ts              |  6 --
 12 files changed, 255 insertions(+), 229 deletions(-)

diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
index 9e60589fb..54c0eb18f 100644
--- a/apps/computer-vision/app/object_detection/index.tsx
+++ b/apps/computer-vision/app/object_detection/index.tsx
@@ -13,7 +13,26 @@ import ScreenWrapper from '../../ScreenWrapper';
 import ColorPalette from '../../colors';
 import { Images } from 'react-native-nitro-image';
 
-// Helper function to convert image URI to raw pixel data using NitroImage
+// Helper function to convert BGRA to RGB
+function convertBGRAtoRGB(
+  buffer: ArrayBuffer,
+  width: number,
+  height: number
+): ArrayBuffer {
+  const source = new Uint8Array(buffer);
+  const rgb = new Uint8Array(width * height * 3);
+
+  for (let i = 0; i < width * height; i++) {
+    // BGRA format: [B, G, R, A] → RGB: [R, G, B]
+    rgb[i * 3 + 0] = source[i * 4 + 2]; // R
+    rgb[i * 3 + 1] = source[i * 4 + 1]; // G
+    rgb[i * 3 + 2] = source[i * 4 + 0]; // B
+  }
+
+  return rgb.buffer;
+}
+
+// Helper function to convert image URI to raw RGB pixel data
 async function imageUriToPixelData(
   uri: string,
   targetWidth: number,
@@ -29,32 +48,19 @@ async function imageUriToPixelData(
     const image = await Images.loadFromFileAsync(uri);
     const resized = image.resize(targetWidth, targetHeight);
 
-    // Get pixel data as ArrayBuffer (RGBA format)
-    const pixelData = resized.toRawPixelData();
+    // Get pixel data as ArrayBuffer (BGRA format from NitroImage)
+    const rawPixelData = resized.toRawPixelData();
     const buffer =
-      pixelData instanceof ArrayBuffer ? pixelData : pixelData.buffer;
-
-    // Calculate actual buffer dimensions (accounts for device pixel ratio)
-    const bufferSize = buffer?.byteLength || 0;
-    const totalPixels = bufferSize / 4; // RGBA = 4 bytes per pixel
-    const aspectRatio = targetWidth / targetHeight;
-    const actualHeight = Math.sqrt(totalPixels / aspectRatio);
-    const actualWidth = totalPixels / actualHeight;
+      rawPixelData instanceof ArrayBuffer ? rawPixelData : rawPixelData.buffer;
 
-    console.log('Requested:', targetWidth, 'x', targetHeight);
-    console.log('Buffer size:', bufferSize);
-    console.log(
-      'Actual dimensions:',
-      Math.round(actualWidth),
-      'x',
-      Math.round(actualHeight)
-    );
+    // Convert BGRA to RGB as required by the native API
+    const rgbBuffer = convertBGRAtoRGB(buffer, targetWidth, targetHeight);
 
     return {
-      data: buffer,
-      width: Math.round(actualWidth),
-      height: Math.round(actualHeight),
-      channels: 4, // RGBA
+      data: rgbBuffer,
+      width: targetWidth,
+      height: targetHeight,
+      channels: 3, // RGB
     };
   } catch (error) {
     console.error('Error loading image with NitroImage:', error);
@@ -106,12 +112,11 @@ export default function ObjectDetectionScreen() {
     if (imageUri && imageDimensions) {
       try {
         console.log('Converting image to pixel data...');
-        // Resize to 640x640 to avoid memory issues
-        const intermediateSize = 640;
+        // Use original dimensions - let the model resize internally
         const pixelData = await imageUriToPixelData(
           imageUri,
-          intermediateSize,
-          intermediateSize
+          imageDimensions.width,
+          imageDimensions.height
         );
 
         console.log('Running forward with pixel data...', {
@@ -122,7 +127,7 @@ export default function ObjectDetectionScreen() {
         });
 
         // Run inference using unified forward() API
-        const output = await ssdLite.forward(pixelData, 0.5);
+        const output = await ssdLite.forward(pixelData, 0.3);
         console.log('Pixel data result:', output.length, 'detections');
         setResults(output);
       } catch (e) {
diff --git a/apps/computer-vision/package.json b/apps/computer-vision/package.json
index cce918197..3f47c357c 100644
--- a/apps/computer-vision/package.json
+++ b/apps/computer-vision/package.json
@@ -17,6 +17,7 @@
     "@react-navigation/native": "^7.1.6",
     "@shopify/react-native-skia": "2.2.12",
     "expo": "^54.0.27",
+    "expo-build-properties": "~1.0.10",
     "expo-constants": "~18.0.11",
     "expo-font": "~14.0.10",
     "expo-linking": "~8.0.10",
@@ -30,17 +31,19 @@
     "react-native-gesture-handler": "~2.28.0",
     "react-native-image-picker": "^7.2.2",
     "react-native-loading-spinner-overlay": "^3.0.1",
-    "react-native-reanimated": "~4.1.1",
+    "react-native-nitro-image": "0.10.2",
+    "react-native-nitro-modules": "0.33.4",
+    "react-native-reanimated": "~4.2.1",
     "react-native-safe-area-context": "~5.6.0",
     "react-native-screens": "~4.16.0",
     "react-native-svg": "15.12.1",
     "react-native-svg-transformer": "^1.5.0",
-    "react-native-worklets": "0.5.1"
+    "react-native-worklets": "^0.7.2"
   },
   "devDependencies": {
     "@babel/core": "^7.25.2",
     "@types/pngjs": "^6.0.5",
-    "@types/react": "~19.1.10"
+    "@types/react": "~19.2.0"
   },
   "private": true
 }
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
index df9abbdef..b4409b0f2 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -360,6 +360,25 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) {
   return {runtime, bigInt};
 }
 
+inline jsi::Value getJsiValue(const std::vector<uint64_t> &vec,
+                              jsi::Runtime &runtime) {
+  jsi::Array array(runtime, vec.size());
+  for (size_t i = 0; i < vec.size(); i++) {
+    // JS numbers are doubles. Large uint64s > 2^53 will lose precision.
+    array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
+  }
+  return {runtime, array};
+}
+
+inline jsi::Value getJsiValue(const std::vector<int64_t> &vec,
+                              jsi::Runtime &runtime) {
+  jsi::Array array(runtime, vec.size());
+  for (size_t i = 0; i < vec.size(); i++) {
+    array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
+  }
+  return {runtime, array};
+}
+
 inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) {
   return {runtime, val};
 }
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
index 9a2e6776e..9554f1888 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -171,14 +171,12 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
                                        "stream"));
     }
 
-    // Register generateFromFrame for all VisionModel subclasses
     if constexpr (meta::DerivedFromOrSameAs<Model, models::VisionModel>) {
       addFunctions(JSI_EXPORT_FUNCTION(
           ModelHostObject<Model>, synchronousHostFunction<&Model::streamStop>,
           "streamStop"));
     }
 
-    // Register generateFromPixels for models that support it
     if constexpr (meta::HasGenerateFromPixels<Model>) {
       addFunctions(
           JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
@@ -234,6 +232,66 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
     }
   }
 
+  template <auto FnPtr> JSI_HOST_FUNCTION(visionHostFunction) {
+    // 1. Check Argument Count
+    // (We rely on our new FunctionTraits)
+    constexpr std::size_t cppArgCount =
+        meta::FunctionTraits<decltype(FnPtr)>::arity;
+
+    // We expect JS args = (Total C++ Args) - (2 injected args: Runtime + Value)
+    constexpr std::size_t expectedJsArgs = cppArgCount - 1;
+    log(LOG_LEVEL::Debug, cppArgCount, count);
+    if (count != expectedJsArgs) {
+      throw jsi::JSError(runtime, "Argument count mismatch in vision function");
+    }
+
+    try {
+      // 2. The Magic Trick
+      // We get a pointer to a dummy function: void dummy(Rest...) {}
+      // This function has exactly the signature of the arguments we want to
+      // parse.
+      auto dummyFuncPtr = &meta::TailSignature<decltype(FnPtr)>::dummy;
+
+      // 3. Let existing helpers do the work
+      // We pass the dummy pointer. The helper inspects its arguments (Rest...)
+      // and converts args[0]...args[N] accordingly.
+      // Note: We pass (args + 1) because JS args[0] is the PixelData, which we
+      // handle manually. Note: We use expectedJsArgs - 1 because we skipped one
+      // JS arg.
+      auto tailArgsTuple =
+          meta::createArgsTupleFromJsi(dummyFuncPtr, args + 1, runtime);
+
+      // 4. Invoke
+      using ReturnType =
+          typename meta::FunctionTraits<decltype(FnPtr)>::return_type;
+
+      if constexpr (std::is_void_v<ReturnType>) {
+        std::apply(
+            [&](auto &&...tailArgs) {
+              (model.get()->*FnPtr)(
+                  runtime,
+                  args[0], // 1. PixelData (Manually passed)
+                  std::forward<decltype(tailArgs)>(
+                      tailArgs)...); // 2. The rest (Auto parsed)
+            },
+            std::move(tailArgsTuple));
+        return jsi::Value::undefined();
+      } else {
+        auto result = std::apply(
+            [&](auto &&...tailArgs) {
+              return (model.get()->*FnPtr)(
+                  runtime, args[0],
+                  std::forward<decltype(tailArgs)>(tailArgs)...);
+            },
+            std::move(tailArgsTuple));
+
+        return jsi_conversion::getJsiValue(std::move(result), runtime);
+      }
+    } catch (const std::exception &e) {
+      throw jsi::JSError(runtime, e.what());
+    }
+  }
+
   // A generic host function that resolves a promise with a result of a
   // function. JSI arguments are converted to the types provided in the function
   // signature, and the return value is converted back to JSI before resolving.
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
index 54c0adfd2..fd2c40ee8 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
@@ -1,4 +1,9 @@
 #include "VisionModel.h"
+#include <rnexecutorch/Error.h>
+#include <rnexecutorch/ErrorCodes.h>
+#include <rnexecutorch/Log.h>
+#include <rnexecutorch/host_objects/JSTensorViewIn.h>
+#include <rnexecutorch/host_objects/JsiConversions.h>
 #include <rnexecutorch/utils/FrameProcessor.h>
 
 namespace rnexecutorch {
@@ -18,45 +23,47 @@ cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
 
 cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime,
                                        const jsi::Object &pixelData) const {
-  // Extract width, height, and channels
-  if (!pixelData.hasProperty(runtime, "width") ||
-      !pixelData.hasProperty(runtime, "height") ||
-      !pixelData.hasProperty(runtime, "channels") ||
-      !pixelData.hasProperty(runtime, "data")) {
-    throw std::runtime_error(
-        "Invalid pixel data: must contain width, height, channels, and data");
-  }
-
-  int width = pixelData.getProperty(runtime, "width").asNumber();
-  int height = pixelData.getProperty(runtime, "height").asNumber();
-  int channels = pixelData.getProperty(runtime, "channels").asNumber();
+  // PixelData follows TensorPtr structure (dataPtr, sizes, scalarType)
+  // Use JSI conversion helper to extract the data
+  auto tensorView = jsi::fromHostObject<JSTensorViewIn>(runtime, pixelData);
 
-  // Get the ArrayBuffer
-  auto dataValue = pixelData.getProperty(runtime, "data");
-  if (!dataValue.isObject() ||
-      !dataValue.asObject(runtime).isArrayBuffer(runtime)) {
-    throw std::runtime_error(
-        "pixel data 'data' property must be an ArrayBuffer");
+  // Validate dimensions: sizes must be [height, width, channels]
+  if (tensorView.sizes.size() != 3) {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Invalid pixel data: sizes must have 3 elements "
+                  "[height, width, channels], got %zu",
+                  tensorView.sizes.size());
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
   }
 
-  auto arrayBuffer = dataValue.asObject(runtime).getArrayBuffer(runtime);
-  size_t expectedSize = width * height * channels;
+  int height = tensorView.sizes[0];
+  int width = tensorView.sizes[1];
+  int channels = tensorView.sizes[2];
 
-  if (arrayBuffer.size(runtime) != expectedSize) {
-    throw std::runtime_error(
-        "ArrayBuffer size does not match width * height * channels");
+  // Pixel data must be RGB (3 channels) and BYTE type
+  if (channels != 3) {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Invalid pixel data: expected 3 channels (RGB), got %d",
+                  channels);
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
   }
 
-  // Create cv::Mat and copy the data
-  // OpenCV uses BGR/BGRA format internally, but we'll create as-is and let
-  // preprocessFrame handle conversion
-  int cvType = (channels == 3) ? CV_8UC3 : CV_8UC4;
-  cv::Mat image(height, width, cvType);
+  if (tensorView.scalarType != ScalarType::Byte) {
+    throw RnExecutorchError(
+        RnExecutorchErrorCode::InvalidUserInput,
+        "Invalid pixel data: scalarType must be BYTE (Uint8Array)");
+  }
 
-  // Copy data from ArrayBuffer to cv::Mat
-  std::memcpy(image.data, arrayBuffer.data(runtime), expectedSize);
+  // Create cv::Mat directly from dataPtr (zero-copy view)
+  uint8_t *dataPtr = static_cast<uint8_t *>(tensorView.dataPtr);
+  cv::Mat image(height, width, CV_8UC3, dataPtr);
 
-  return image;
+  // Clone to own the data, since JS memory may be GC'd
+  return image.clone();
 }
 
 } // namespace models
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
index f64855131..9fbbaeb74 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
@@ -1,4 +1,6 @@
 #include "FrameExtractor.h"
+#include <rnexecutorch/Error.h>
+#include <rnexecutorch/ErrorCodes.h>
 #include <rnexecutorch/Log.h>
 
 #ifdef __APPLE__
@@ -20,7 +22,8 @@ cv::Mat FrameExtractor::extractFromNativeBuffer(uint64_t bufferPtr) {
 #elif defined(__ANDROID__)
   return extractFromAHardwareBuffer(reinterpret_cast<void *>(bufferPtr));
 #else
-  throw std::runtime_error("NativeBuffer not supported on this platform");
+  throw RnExecutorchError(RnExecutorchErrorCode::NotSupported,
+                          "NativeBuffer not supported on this platform");
 #endif
 }
 
@@ -40,41 +43,25 @@ cv::Mat FrameExtractor::extractFromCVPixelBuffer(void *pixelBuffer) {
 
   cv::Mat mat;
 
-  // Log pixel format once for debugging
-  static bool loggedPixelFormat = false;
-  if (!loggedPixelFormat) {
-    log(LOG_LEVEL::Debug, "CVPixelBuffer format code: ", pixelFormat);
-    loggedPixelFormat = true;
-  }
-
   if (pixelFormat == kCVPixelFormatType_32BGRA) {
     // BGRA format (most common on iOS when using pixelFormat: 'rgb')
-    if (!loggedPixelFormat) {
-      log(LOG_LEVEL::Debug, "Extracting from CVPixelBuffer: BGRA format, ",
-          width, "x", height, ", stride: ", bytesPerRow);
-    }
     mat = cv::Mat(static_cast<int>(height), static_cast<int>(width), CV_8UC4,
                   baseAddress, bytesPerRow);
   } else if (pixelFormat == kCVPixelFormatType_32RGBA) {
     // RGBA format
-    if (!loggedPixelFormat) {
-      log(LOG_LEVEL::Debug, "Extracting from CVPixelBuffer: RGBA format, ",
-          width, "x", height, ", stride: ", bytesPerRow);
-    }
     mat = cv::Mat(static_cast<int>(height), static_cast<int>(width), CV_8UC4,
                   baseAddress, bytesPerRow);
   } else if (pixelFormat == kCVPixelFormatType_24RGB) {
     // RGB format
-    if (!loggedPixelFormat) {
-      log(LOG_LEVEL::Debug, "Extracting from CVPixelBuffer: RGB format, ",
-          width, "x", height, ", stride: ", bytesPerRow);
-    }
     mat = cv::Mat(static_cast<int>(height), static_cast<int>(width), CV_8UC3,
                   baseAddress, bytesPerRow);
   } else {
     CVPixelBufferUnlockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly);
-    throw std::runtime_error("Unsupported CVPixelBuffer format: " +
-                             std::to_string(pixelFormat));
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Unsupported CVPixelBuffer format: %u", pixelFormat);
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
   }
 
   // Note: We don't unlock here - Vision Camera manages the lifecycle
@@ -99,50 +86,36 @@ cv::Mat FrameExtractor::extractFromAHardwareBuffer(void *hardwareBuffer) {
       buffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, nullptr, &data);
 
   if (lockResult != 0) {
-    throw std::runtime_error("Failed to lock AHardwareBuffer");
+    throw RnExecutorchError(RnExecutorchErrorCode::AccessFailed,
+                            "Failed to lock AHardwareBuffer");
   }
 
   cv::Mat mat;
 
-  // Log format once for debugging
-  static bool loggedFormat = false;
-  if (!loggedFormat) {
-    log(LOG_LEVEL::Debug, "AHardwareBuffer format code: ", desc.format);
-    loggedFormat = true;
-  }
-
   if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM) {
     // RGBA format (expected when using pixelFormat: 'rgb' on Android)
-    if (!loggedFormat) {
-      log(LOG_LEVEL::Debug, "Extracting from AHardwareBuffer: RGBA format, ",
-          desc.width, "x", desc.height, ", stride: ", desc.stride * 4);
-    }
     mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4);
   } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM) {
     // RGBX format (treated as RGBA)
-    if (!loggedFormat) {
-      log(LOG_LEVEL::Debug, "Extracting from AHardwareBuffer: RGBX format, ",
-          desc.width, "x", desc.height, ", stride: ", desc.stride * 4);
-    }
     mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4);
   } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM) {
     // RGB format (less common)
-    if (!loggedFormat) {
-      log(LOG_LEVEL::Debug, "Extracting from AHardwareBuffer: RGB format, ",
-          desc.width, "x", desc.height, ", stride: ", desc.stride * 3);
-    }
     mat = cv::Mat(desc.height, desc.width, CV_8UC3, data, desc.stride * 3);
   } else {
     AHardwareBuffer_unlock(buffer, nullptr);
-    throw std::runtime_error("Unsupported AHardwareBuffer format: " +
-                             std::to_string(desc.format));
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Unsupported AHardwareBuffer format: %u", desc.format);
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
   }
 
   // Note: We don't unlock here - Vision Camera manages the lifecycle
 
   return mat;
 #else
-  throw std::runtime_error("AHardwareBuffer requires Android API 26+");
+  throw RnExecutorchError(RnExecutorchErrorCode::NotSupported,
+                          "AHardwareBuffer requires Android API 26+");
 #endif // __ANDROID_API__ >= 26
 }
 #endif // __ANDROID__
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
index 02faa072d..087aec816 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
@@ -1,7 +1,8 @@
 #include "FrameProcessor.h"
 #include "FrameExtractor.h"
+#include <rnexecutorch/Error.h>
+#include <rnexecutorch/ErrorCodes.h>
 #include <rnexecutorch/Log.h>
-#include <stdexcept>
 
 namespace rnexecutorch {
 namespace utils {
@@ -16,34 +17,21 @@ cv::Mat FrameProcessor::extractFrame(jsi::Runtime &runtime,
 
   // Try zero-copy path first (nativeBuffer)
   if (hasNativeBuffer(runtime, frameData)) {
-    static bool loggedPath = false;
-    if (!loggedPath) {
-      log(LOG_LEVEL::Debug, "FrameProcessor: Using zero-copy nativeBuffer");
-      loggedPath = true;
-    }
-
     try {
-      return extractFromNativeBuffer(runtime, frameData, width, height);
+      return extractFromNativeBuffer(runtime, frameData);
     } catch (const std::exception &e) {
-      log(LOG_LEVEL::Debug,
-          "FrameProcessor: nativeBuffer extraction failed: ", e.what());
-      log(LOG_LEVEL::Debug, "FrameProcessor: Falling back to ArrayBuffer");
+      // Fallback to ArrayBuffer on failure
     }
   }
 
   // Fallback to ArrayBuffer path (with copy)
   if (frameData.hasProperty(runtime, "data")) {
-    static bool loggedPath = false;
-    if (!loggedPath) {
-      log(LOG_LEVEL::Debug, "FrameProcessor: Using ArrayBuffer (with copy)");
-      loggedPath = true;
-    }
-
     return extractFromArrayBuffer(runtime, frameData, width, height);
   }
 
   // No valid frame data source
-  throw std::runtime_error(
+  throw RnExecutorchError(
+      RnExecutorchErrorCode::InvalidUserInput,
       "FrameProcessor: No valid frame data (neither nativeBuffer nor data "
       "property found)");
 }
@@ -52,8 +40,9 @@ cv::Size FrameProcessor::getFrameSize(jsi::Runtime &runtime,
                                       const jsi::Object &frameData) {
   if (!frameData.hasProperty(runtime, "width") ||
       !frameData.hasProperty(runtime, "height")) {
-    throw std::runtime_error("FrameProcessor: Frame data missing width or "
-                             "height property");
+    throw RnExecutorchError(
+        RnExecutorchErrorCode::InvalidUserInput,
+        "FrameProcessor: Frame data missing width or height property");
   }
 
   int width =
@@ -70,8 +59,7 @@ bool FrameProcessor::hasNativeBuffer(jsi::Runtime &runtime,
 }
 
 cv::Mat FrameProcessor::extractFromNativeBuffer(jsi::Runtime &runtime,
-                                                const jsi::Object &frameData,
-                                                int width, int height) {
+                                                const jsi::Object &frameData) {
   auto nativeBufferValue = frameData.getProperty(runtime, "nativeBuffer");
 
   // Handle bigint pointer value from JavaScript
@@ -79,15 +67,8 @@ cv::Mat FrameProcessor::extractFromNativeBuffer(jsi::Runtime &runtime,
       nativeBufferValue.asBigInt(runtime).asUint64(runtime));
 
   // Use FrameExtractor to get cv::Mat from platform-specific buffer
-  cv::Mat frame = FrameExtractor::extractFromNativeBuffer(bufferPtr);
-
-  // Validate extracted frame dimensions match expected
-  if (frame.cols != width || frame.rows != height) {
-    log(LOG_LEVEL::Debug, "FrameProcessor: Dimension mismatch - expected ",
-        width, "x", height, " but got ", frame.cols, "x", frame.rows);
-  }
-
-  return frame;
+  // Native buffer contains all metadata (width, height, format)
+  return FrameExtractor::extractFromNativeBuffer(bufferPtr);
 }
 
 cv::Mat FrameProcessor::extractFromArrayBuffer(jsi::Runtime &runtime,
@@ -103,39 +84,22 @@ cv::Mat FrameProcessor::extractFromArrayBuffer(jsi::Runtime &runtime,
   size_t expectedRGBAStride = width * 4;
   size_t expectedRGBStride = width * 3;
 
-  cv::Mat frame;
-
   if (stride == expectedRGBAStride || bufferSize >= width * height * 4) {
     // RGBA format with potential padding
-    frame = cv::Mat(height, width, CV_8UC4, data, stride);
-
-    static bool loggedFormat = false;
-    if (!loggedFormat) {
-      log(LOG_LEVEL::Debug,
-          "FrameProcessor: ArrayBuffer format is RGBA, "
-          "stride: ",
-          stride);
-      loggedFormat = true;
-    }
+    return cv::Mat(height, width, CV_8UC4, data, stride);
   } else if (stride >= expectedRGBStride) {
     // RGB format
-    frame = cv::Mat(height, width, CV_8UC3, data, stride);
-
-    static bool loggedFormat = false;
-    if (!loggedFormat) {
-      log(LOG_LEVEL::Debug,
-          "FrameProcessor: ArrayBuffer format is RGB, stride: ", stride);
-      loggedFormat = true;
-    }
+    return cv::Mat(height, width, CV_8UC3, data, stride);
   } else {
-    throw std::runtime_error(
-        "FrameProcessor: Unexpected buffer size - expected " +
-        std::to_string(expectedRGBStride) + " or " +
-        std::to_string(expectedRGBAStride) + " bytes per row, got " +
-        std::to_string(stride));
+    char errorMessage[200];
+    std::snprintf(
+        errorMessage, sizeof(errorMessage),
+        "FrameProcessor: Unexpected buffer size - expected %zu or %zu bytes "
+        "per row, got %zu",
+        expectedRGBStride, expectedRGBAStride, stride);
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
   }
-
-  return frame;
 }
 
 } // namespace utils
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h
index e37b5bfd6..0838b6594 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h
@@ -45,8 +45,8 @@ class FrameProcessor {
    *
    * @return cv::Mat wrapping or containing the frame data
    *
-   * @throws std::runtime_error if neither nativeBuffer nor data is available
-   * @throws std::runtime_error if nativeBuffer extraction fails
+   * @throws RnExecutorchError if neither nativeBuffer nor data is available
+   * @throws RnExecutorchError if nativeBuffer extraction fails
    *
    * @note The returned cv::Mat may not own the data (zero-copy path).
    *       Caller must ensure the source frame remains valid during use.
@@ -62,7 +62,7 @@ class FrameProcessor {
    *
    * @return cv::Size with frame width and height
    *
-   * @throws std::runtime_error if width or height properties are missing
+   * @throws RnExecutorchError if width or height properties are missing
    */
   static cv::Size getFrameSize(jsi::Runtime &runtime,
                                const jsi::Object &frameData);
@@ -81,15 +81,15 @@ class FrameProcessor {
   /**
    * @brief Extract frame from nativeBuffer pointer (zero-copy)
    *
+   * Native buffer contains all metadata (width, height, format), so no need to
+   * pass dimensions separately.
+   *
    * @param runtime JSI runtime
    * @param frameData JSI object with nativeBuffer property
-   * @param width Frame width
-   * @param height Frame height
    * @return cv::Mat wrapping the native buffer data
    */
   static cv::Mat extractFromNativeBuffer(jsi::Runtime &runtime,
-                                         const jsi::Object &frameData,
-                                         int width, int height);
+                                         const jsi::Object &frameData);
 
   /**
    * @brief Extract frame from ArrayBuffer (with copy)
diff --git a/packages/react-native-executorch/src/hooks/useModule.ts b/packages/react-native-executorch/src/hooks/useModule.ts
index 624094afb..f5f260787 100644
--- a/packages/react-native-executorch/src/hooks/useModule.ts
+++ b/packages/react-native-executorch/src/hooks/useModule.ts
@@ -6,7 +6,6 @@ interface Module {
   load: (...args: any[]) => Promise<void>;
   forward: (...args: any[]) => Promise<any>;
   delete: () => void;
-  nativeModule?: any; // JSI host object with native methods
 }
 
 interface ModuleConstructor<M extends Module> {
diff --git a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
index 06acf6654..72e797437 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
@@ -1,25 +1,7 @@
 import { BaseModule } from '../BaseModule';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
 import { RnExecutorchError } from '../../errors/errorUtils';
-
-/**
- * Raw pixel data for vision model inference.
- */
-export type PixelData = {
-  data: ArrayBuffer;
-  width: number;
-  height: number;
-  channels: number;
-};
-
-/**
- * VisionCamera Frame object for real-time processing.
- */
-export type Frame = {
-  getNativeBuffer(): { pointer: number; release(): void };
-  width: number;
-  height: number;
-};
+import { Frame, PixelData, ScalarType } from '../../types/common';
 
 /**
  * Base class for computer vision models that support multiple input types.
@@ -74,8 +56,6 @@ export abstract class VisionModule<TOutput> extends BaseModule {
         nativeBuffer = frame.getNativeBuffer();
         const frameData = {
           nativeBuffer: nativeBuffer.pointer,
-          width: frame.width,
-          height: frame.height,
         };
         return nativeGenerateFromFrame(frameData, ...args);
       } finally {
@@ -107,10 +87,9 @@ export abstract class VisionModule<TOutput> extends BaseModule {
    *
    * // Pixel data (async)
    * const result2 = await model.forward({
-   *   data: pixelBuffer,
-   *   width: 640,
-   *   height: 480,
-   *   channels: 3
+   *   dataPtr: new Uint8Array(pixelBuffer),
+   *   sizes: [480, 640, 3],
+   *   scalarType: ScalarType.BYTE
    * });
    *
    * // For VisionCamera frames, use runOnFrame in worklet:
@@ -136,11 +115,13 @@ export abstract class VisionModule<TOutput> extends BaseModule {
       return await this.nativeModule.generateFromString(input, ...args);
     } else if (
       typeof input === 'object' &&
-      'data' in input &&
-      input.data instanceof ArrayBuffer &&
-      typeof input.width === 'number' &&
-      typeof input.height === 'number' &&
-      typeof input.channels === 'number'
+      'dataPtr' in input &&
+      input.dataPtr instanceof Uint8Array &&
+      'sizes' in input &&
+      Array.isArray(input.sizes) &&
+      input.sizes.length === 3 &&
+      'scalarType' in input &&
+      input.scalarType === ScalarType.BYTE
     ) {
       // Pixel data → generateFromPixels()
       return await this.nativeModule.generateFromPixels(input, ...args);
diff --git a/packages/react-native-executorch/src/types/common.ts b/packages/react-native-executorch/src/types/common.ts
index 439e18597..7a3ca8afc 100644
--- a/packages/react-native-executorch/src/types/common.ts
+++ b/packages/react-native-executorch/src/types/common.ts
@@ -151,6 +151,45 @@ export type LabelEnum = Readonly<Record<string, number | string>>;
  * @category Types
  */
 export type Triple<T> = readonly [T, T, T];
+/**
+ * Represents raw pixel data in RGB format for vision models.
+ *
+ * This type extends TensorPtr with constraints specific to image data:
+ * - dataPtr must be Uint8Array (8-bit unsigned integers)
+ * - scalarType is always BYTE (ScalarType.BYTE)
+ * - sizes represents [height, width, channels] where channels must be 3 (RGB)
+ *
+ * @category Types
+ * @example
+ * ```typescript
+ * const pixelData: PixelData = {
+ *   dataPtr: new Uint8Array(width * height * 3), // RGB pixel data
+ *   sizes: [height, width, 3], // [height, width, channels]
+ *   scalarType: ScalarType.BYTE
+ * };
+ * ```
+ */
+export interface PixelData extends Omit<TensorPtr, 'dataPtr' | 'scalarType'> {
+  /**
+   * RGB pixel data as Uint8Array.
+   * Expected format: RGB (3 channels), not RGBA or BGRA.
+   * Size must equal: width * height * 3
+   */
+  dataPtr: Uint8Array;
+
+  /**
+   * Dimensions of the pixel data: [height, width, channels].
+   * - sizes[0]: height (number of rows)
+   * - sizes[1]: width (number of columns)
+   * - sizes[2]: channels (must be 3 for RGB)
+   */
+  sizes: [number, number, 3];
+
+  /**
+   * Scalar type is always BYTE for pixel data.
+   */
+  scalarType: ScalarType.BYTE;
+}
 
 /**
  * Frame data for vision model processing.
@@ -158,13 +197,7 @@ export type Triple<T> = readonly [T, T, T];
  * 1. ArrayBuffer mode (with memory copy) - Compatible with all platforms
  * 2. NativeBuffer mode (zero-copy) - Better performance with Vision Camera v5
  */
-export interface FrameData {
-  /**
-   * Raw pixel data as ArrayBuffer (requires memory copy).
-   * Use this for compatibility or when getNativeBuffer is not available.
-   */
-  data?: ArrayBuffer | ArrayBufferLike;
-
+export interface Frame {
   /**
    * Pointer to native platform buffer (zero-copy, best performance).
    * - On iOS: CVPixelBufferRef pointer
@@ -172,15 +205,5 @@ export interface FrameData {
    *
    * Obtain from Vision Camera v5: `frame.getNativeBuffer().pointer`
    */
-  nativeBuffer?: bigint;
-
-  /**
-   * Frame width in pixels
-   */
-  width: number;
-
-  /**
-   * Frame height in pixels
-   */
-  height: number;
+  getNativeBuffer(): { pointer: number; release(): void };
 }
diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts
index 2dddaad64..abb0142a7 100644
--- a/packages/react-native-executorch/src/types/objectDetection.ts
+++ b/packages/react-native-executorch/src/types/objectDetection.ts
@@ -237,10 +237,4 @@ export interface ObjectDetectionType {
    * @returns Array of Detection objects representing detected items in the frame.
    */
   runOnFrame: ((frame: any, detectionThreshold?: number) => Detection[]) | null;
-
-  /**
-   * Direct reference to the module instance for advanced use cases.
-   * Most users should use `forward()` for async processing or `runOnFrame` for real-time frame processing.
-   */
-  moduleInstance: any;
 }

From 53bcd96ae50f327859a6ae96a6c04609aa2bde05 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Tue, 17 Feb 2026 17:51:37 +0100
Subject: [PATCH 08/37] fix: change Frame import in BaseModule

---
 packages/react-native-executorch/src/modules/BaseModule.ts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/react-native-executorch/src/modules/BaseModule.ts b/packages/react-native-executorch/src/modules/BaseModule.ts
index 0870a30b6..41a2da6cf 100644
--- a/packages/react-native-executorch/src/modules/BaseModule.ts
+++ b/packages/react-native-executorch/src/modules/BaseModule.ts
@@ -1,4 +1,4 @@
-import { ResourceSource } from '../types/common';
+import { Frame, ResourceSource } from '../types/common';
 import { TensorPtr } from '../types/common';
 
 /**
@@ -51,9 +51,9 @@ export abstract class BaseModule {
    * @param args Additional model-specific arguments (e.g., threshold, options)
    * @returns Model-specific output (e.g., detections, classifications, embeddings)
    *
-   * @see {@link FrameData} for frame data format details
+   * @see {@link Frame} for frame data format details
    */
-  public generateFromFrame!: (frameData: FrameData, ...args: any[]) => any;
+  public generateFromFrame!: (frameData: Frame, ...args: any[]) => any;
 
   /**
    * Load the model and prepare it for inference.

From cd0b123c76f95785d3ad860c187e4e6f9aae15db Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Wed, 18 Feb 2026 12:49:15 +0100
Subject: [PATCH 09/37] feat: use TensorPtrish type for Pixel data input

---
 .../app/object_detection/index.tsx            | 119 ++++++------------
 .../rnexecutorch/models/VisionModel.cpp       |  13 +-
 .../common/rnexecutorch/models/VisionModel.h  |  29 ++---
 .../rnexecutorch/utils/FrameExtractor.cpp     |   2 +-
 .../rnexecutorch/utils/FrameProcessor.cpp     |  12 +-
 .../modules/computer_vision/VisionModule.ts   |   2 +-
 .../src/types/common.ts                       |   2 +-
 .../src/types/objectDetection.ts              |  22 ++--
 8 files changed, 72 insertions(+), 129 deletions(-)

diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
index 54c0eb18f..d843682eb 100644
--- a/apps/computer-vision/app/object_detection/index.tsx
+++ b/apps/computer-vision/app/object_detection/index.tsx
@@ -4,6 +4,8 @@ import {
   Detection,
   useObjectDetection,
   SSDLITE_320_MOBILENET_V3_LARGE,
+  ScalarType,
+  PixelData,
 } from 'react-native-executorch';
 import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native';
 import ImageWithBboxes from '../../components/ImageWithBboxes';
@@ -11,62 +13,6 @@ import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
 import ColorPalette from '../../colors';
-import { Images } from 'react-native-nitro-image';
-
-// Helper function to convert BGRA to RGB
-function convertBGRAtoRGB(
-  buffer: ArrayBuffer,
-  width: number,
-  height: number
-): ArrayBuffer {
-  const source = new Uint8Array(buffer);
-  const rgb = new Uint8Array(width * height * 3);
-
-  for (let i = 0; i < width * height; i++) {
-    // BGRA format: [B, G, R, A] → RGB: [R, G, B]
-    rgb[i * 3 + 0] = source[i * 4 + 2]; // R
-    rgb[i * 3 + 1] = source[i * 4 + 1]; // G
-    rgb[i * 3 + 2] = source[i * 4 + 0]; // B
-  }
-
-  return rgb.buffer;
-}
-
-// Helper function to convert image URI to raw RGB pixel data
-async function imageUriToPixelData(
-  uri: string,
-  targetWidth: number,
-  targetHeight: number
-): Promise<{
-  data: ArrayBuffer;
-  width: number;
-  height: number;
-  channels: number;
-}> {
-  try {
-    // Load image and resize to target dimensions
-    const image = await Images.loadFromFileAsync(uri);
-    const resized = image.resize(targetWidth, targetHeight);
-
-    // Get pixel data as ArrayBuffer (BGRA format from NitroImage)
-    const rawPixelData = resized.toRawPixelData();
-    const buffer =
-      rawPixelData instanceof ArrayBuffer ? rawPixelData : rawPixelData.buffer;
-
-    // Convert BGRA to RGB as required by the native API
-    const rgbBuffer = convertBGRAtoRGB(buffer, targetWidth, targetHeight);
-
-    return {
-      data: rgbBuffer,
-      width: targetWidth,
-      height: targetHeight,
-      channels: 3, // RGB
-    };
-  } catch (error) {
-    console.error('Error loading image with NitroImage:', error);
-    throw error;
-  }
-}
 
 export default function ObjectDetectionScreen() {
   const [imageUri, setImageUri] = useState('');
@@ -109,30 +55,45 @@ export default function ObjectDetectionScreen() {
   };
 
   const runForwardPixels = async () => {
-    if (imageUri && imageDimensions) {
-      try {
-        console.log('Converting image to pixel data...');
-        // Use original dimensions - let the model resize internally
-        const pixelData = await imageUriToPixelData(
-          imageUri,
-          imageDimensions.width,
-          imageDimensions.height
-        );
-
-        console.log('Running forward with pixel data...', {
-          width: pixelData.width,
-          height: pixelData.height,
-          channels: pixelData.channels,
-          dataSize: pixelData.data.byteLength,
-        });
-
-        // Run inference using unified forward() API
-        const output = await ssdLite.forward(pixelData, 0.3);
-        console.log('Pixel data result:', output.length, 'detections');
-        setResults(output);
-      } catch (e) {
-        console.error('Error in runForwardPixels:', e);
+    try {
+      console.log('Testing with hardcoded pixel data...');
+
+      // Create a simple 320x320 test image (all zeros - black image)
+      // In a real scenario, you would load actual image pixel data here
+      const width = 320;
+      const height = 320;
+      const channels = 3; // RGB
+
+      // Create a black image (you can replace this with actual pixel data)
+      const rgbData = new Uint8Array(width * height * channels);
+
+      // Optionally, add some test pattern (e.g., white square in center)
+      for (let y = 100; y < 220; y++) {
+        for (let x = 100; x < 220; x++) {
+          const idx = (y * width + x) * 3;
+          rgbData[idx + 0] = 255; // R
+          rgbData[idx + 1] = 255; // G
+          rgbData[idx + 2] = 255; // B
+        }
       }
+
+      const pixelData: PixelData = {
+        dataPtr: rgbData,
+        sizes: [height, width, channels],
+        scalarType: ScalarType.BYTE,
+      };
+
+      console.log('Running forward with hardcoded pixel data...', {
+        sizes: pixelData.sizes,
+        dataSize: pixelData.dataPtr.byteLength,
+      });
+
+      // Run inference using unified forward() API
+      const output = await ssdLite.forward(pixelData, 0.3);
+      console.log('Pixel data result:', output.length, 'detections');
+      setResults(output);
+    } catch (e) {
+      console.error('Error in runForwardPixels:', e);
     }
   };
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
index fd2c40ee8..8155b8819 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
@@ -2,8 +2,6 @@
 #include <rnexecutorch/Error.h>
 #include <rnexecutorch/ErrorCodes.h>
 #include <rnexecutorch/Log.h>
-#include <rnexecutorch/host_objects/JSTensorViewIn.h>
-#include <rnexecutorch/host_objects/JsiConversions.h>
 #include <rnexecutorch/utils/FrameProcessor.h>
 
 namespace rnexecutorch {
@@ -21,12 +19,7 @@ cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
   return preprocessFrame(frame);
 }
 
-cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime,
-                                       const jsi::Object &pixelData) const {
-  // PixelData follows TensorPtr structure (dataPtr, sizes, scalarType)
-  // Use JSI conversion helper to extract the data
-  auto tensorView = jsi::fromHostObject<JSTensorViewIn>(runtime, pixelData);
-
+cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {
   // Validate dimensions: sizes must be [height, width, channels]
   if (tensorView.sizes.size() != 3) {
     char errorMessage[100];
@@ -59,11 +52,11 @@ cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime,
   }
 
   // Create cv::Mat directly from dataPtr (zero-copy view)
+  // Data is valid for the duration of this synchronous call
   uint8_t *dataPtr = static_cast<uint8_t *>(tensorView.dataPtr);
   cv::Mat image(height, width, CV_8UC3, dataPtr);
 
-  // Clone to own the data, since JS memory may be GC'd
-  return image.clone();
+  return image;
 }
 
 } // namespace models
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
index 9ba5cf7e4..c362d745f 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
@@ -129,40 +129,35 @@ class VisionModel : public BaseModel {
                            const jsi::Value &frameData) const;
 
   /**
-   * @brief Extract cv::Mat from raw pixel data (ArrayBuffer) sent from
+   * @brief Extract cv::Mat from raw pixel data (TensorPtr) sent from
    * JavaScript
    *
    * This method enables users to run inference on raw pixel data without file
    * I/O. Useful for processing images already in memory (e.g., from canvas,
    * image library).
    *
-   * @param runtime JSI runtime
-   * @param pixelData JSI object containing:
-   *                  - data: ArrayBuffer with raw pixel values
-   *                  - width: number - image width
-   *                  - height: number - image height
-   *                  - channels: number - number of channels (3 for RGB, 4 for
-   * RGBA)
+   * @param tensorView JSTensorViewIn containing:
+   *                   - dataPtr: Pointer to raw pixel values (RGB format)
+   *                   - sizes: [height, width, channels] - must be 3D
+   *                   - scalarType: Must be ScalarType::Byte (Uint8Array)
    *
    * @return cv::Mat containing the pixel data
    *
-   * @throws std::runtime_error if pixelData format is invalid
+   * @throws RnExecutorchError if tensorView format is invalid
    *
    * @note The returned cv::Mat owns a copy of the data
-   * @note Expected pixel format: RGB or RGBA, row-major order
+   * @note Expected pixel format: RGB (3 channels), row-major order
    * @note Typical usage from JS:
    * @code
-   *   const pixels = new Uint8Array([...]);  // Raw pixel data
+   *   const pixels = new Uint8Array([...]);  // Raw RGB pixel data
    *   const result = model.generateFromPixels({
-   *     data: pixels.buffer,
-   *     width: 640,
-   *     height: 480,
-   *     channels: 3
+   *     dataPtr: pixels,
+   *     sizes: [480, 640, 3],
+   *     scalarType: ScalarType.BYTE
    *   }, 0.5);
    * @endcode
    */
-  cv::Mat extractFromPixels(jsi::Runtime &runtime,
-                            const jsi::Object &pixelData) const;
+  cv::Mat extractFromPixels(const JSTensorViewIn &tensorView) const;
 };
 
 } // namespace models
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
index 9fbbaeb74..900eae297 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
@@ -86,7 +86,7 @@ cv::Mat FrameExtractor::extractFromAHardwareBuffer(void *hardwareBuffer) {
       buffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, nullptr, &data);
 
   if (lockResult != 0) {
-    throw RnExecutorchError(RnExecutorchErrorCode::AccessFailed,
+    throw RnExecutorchError(RnExecutorchErrorCode::UnknownError,
                             "Failed to lock AHardwareBuffer");
   }
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
index 087aec816..5e593dfd0 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
@@ -9,13 +9,8 @@ namespace utils {
 
 cv::Mat FrameProcessor::extractFrame(jsi::Runtime &runtime,
                                      const jsi::Object &frameData) {
-  // Get frame dimensions
-  int width =
-      static_cast<int>(frameData.getProperty(runtime, "width").asNumber());
-  int height =
-      static_cast<int>(frameData.getProperty(runtime, "height").asNumber());
-
   // Try zero-copy path first (nativeBuffer)
+  // Native buffer contains dimensions, so we don't need width/height properties
   if (hasNativeBuffer(runtime, frameData)) {
     try {
       return extractFromNativeBuffer(runtime, frameData);
@@ -25,7 +20,12 @@ cv::Mat FrameProcessor::extractFrame(jsi::Runtime &runtime,
   }
 
   // Fallback to ArrayBuffer path (with copy)
+  // Get frame dimensions for ArrayBuffer path
   if (frameData.hasProperty(runtime, "data")) {
+    int width =
+        static_cast<int>(frameData.getProperty(runtime, "width").asNumber());
+    int height =
+        static_cast<int>(frameData.getProperty(runtime, "height").asNumber());
     return extractFromArrayBuffer(runtime, frameData, width, height);
   }
 
diff --git a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
index 72e797437..d6a0038ee 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
@@ -73,7 +73,7 @@ export abstract class VisionModule<TOutput> extends BaseModule {
    * 1. **String path/URI**: File path, URL, or Base64-encoded string
    * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
    *
-   * **Note**: For VisionCamera frame processing, use `forwardSync` instead.
+   * **Note**: For VisionCamera frame processing, use `runOnFrame` instead.
    * This method is async and cannot be called in worklet context.
    *
    * @param input - Image source (string path or PixelData object)
diff --git a/packages/react-native-executorch/src/types/common.ts b/packages/react-native-executorch/src/types/common.ts
index 7a3ca8afc..1ebfb3534 100644
--- a/packages/react-native-executorch/src/types/common.ts
+++ b/packages/react-native-executorch/src/types/common.ts
@@ -205,5 +205,5 @@ export interface Frame {
    *
    * Obtain from Vision Camera v5: `frame.getNativeBuffer().pointer`
    */
-  getNativeBuffer(): { pointer: number; release(): void };
+  getNativeBuffer(): { pointer: bigint; release(): void };
 }
diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts
index abb0142a7..c2281598a 100644
--- a/packages/react-native-executorch/src/types/objectDetection.ts
+++ b/packages/react-native-executorch/src/types/objectDetection.ts
@@ -1,5 +1,5 @@
 import { RnExecutorchError } from '../errors/errorUtils';
-import { ResourceSource } from './common';
+import { ResourceSource, PixelData, Frame } from './common';
 
 /**
  * Represents a bounding box for a detected object in an image.
@@ -190,22 +190,14 @@ export interface ObjectDetectionType {
    *
    * // Pixel data
    * const detections2 = await model.forward({
-   *   data: pixelBuffer,
-   *   width: 640,
-   *   height: 480,
-   *   channels: 3
+   *   dataPtr: new Uint8Array(rgbPixels),
+   *   sizes: [480, 640, 3],
+   *   scalarType: ScalarType.BYTE
    * });
    * ```
    */
   forward: (
-    input:
-      | string
-      | {
-          data: ArrayBuffer;
-          width: number;
-          height: number;
-          channels: number;
-        },
+    input: string | PixelData,
     detectionThreshold?: number
   ) => Promise<Detection[]>;
 
@@ -236,5 +228,7 @@ export interface ObjectDetectionType {
    * @param detectionThreshold - The threshold for detection sensitivity. Default is 0.7.
    * @returns Array of Detection objects representing detected items in the frame.
    */
-  runOnFrame: ((frame: any, detectionThreshold?: number) => Detection[]) | null;
+  runOnFrame:
+    | ((frame: Frame, detectionThreshold?: number) => Detection[])
+    | null;
 }

From e001142fee1b3fa946efbc2ae387f31c11d670fe Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Wed, 18 Feb 2026 13:03:22 +0100
Subject: [PATCH 10/37] refactor: add or remove empty lines

---
 .../rnexecutorch/models/classification/Classification.cpp       | 2 +-
 .../rnexecutorch/models/embeddings/image/ImageEmbeddings.h      | 2 +-
 .../models/image_segmentation/BaseImageSegmentation.h           | 2 +-
 .../models/image_segmentation/ImageSegmentation.cpp             | 2 +-
 .../common/rnexecutorch/models/style_transfer/StyleTransfer.h   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
index b9fad1b88..0fba07108 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
@@ -73,4 +73,4 @@ Classification::postprocess(const Tensor &tensor) {
   return probs;
 }
 
-} // namespace rnexecutorch::models::classification
\ No newline at end of file
+} // namespace rnexecutorch::models::classification
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
index 9a1d6429b..7e114e939 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
@@ -27,4 +27,4 @@ class ImageEmbeddings final : public BaseEmbeddings {
 
 REGISTER_CONSTRUCTOR(models::embeddings::ImageEmbeddings, std::string,
                      std::shared_ptr<react::CallInvoker>);
-} // namespace rnexecutorch
\ No newline at end of file
+} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
index 34ad8dffd..f46f41d69 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
@@ -58,4 +58,4 @@ class BaseImageSegmentation : public BaseModel {
 REGISTER_CONSTRUCTOR(models::image_segmentation::BaseImageSegmentation,
                      std::string, std::vector<float>, std::vector<float>,
                      std::shared_ptr<react::CallInvoker>);
-} // namespace rnexecutorch
\ No newline at end of file
+} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp
index 08f2a4683..a2c1ae865 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp
@@ -167,4 +167,4 @@ std::shared_ptr<jsi::Object> ImageSegmentation::populateDictionary(
   return dictPtr;
 }
 
-} // namespace rnexecutorch::models::image_segmentation
\ No newline at end of file
+} // namespace rnexecutorch::models::image_segmentation
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
index 8eed3c888..73744c4d8 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
@@ -33,4 +33,4 @@ class StyleTransfer : public BaseModel {
 
 REGISTER_CONSTRUCTOR(models::style_transfer::StyleTransfer, std::string,
                      std::shared_ptr<react::CallInvoker>);
-} // namespace rnexecutorch
\ No newline at end of file
+} // namespace rnexecutorch

From ca60d88c18305b19a1a1eec33fe514fc6d16067b Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Thu, 19 Feb 2026 22:34:20 +0100
Subject: [PATCH 11/37] fix: errors after rebase

---
 .../host_objects/JsiConversions.h             |  10 -
 .../host_objects/ModelHostObject.h            |   7 +-
 .../metaprogramming/FunctionHelpers.h         |  67 +++-
 .../metaprogramming/TypeConcepts.h            |   5 +
 yarn.lock                                     | 311 ++++++++++++++++--
 5 files changed, 364 insertions(+), 36 deletions(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
index b4409b0f2..5fc8615ea 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -360,16 +360,6 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) {
   return {runtime, bigInt};
 }
 
-inline jsi::Value getJsiValue(const std::vector<uint64_t> &vec,
-                              jsi::Runtime &runtime) {
-  jsi::Array array(runtime, vec.size());
-  for (size_t i = 0; i < vec.size(); i++) {
-    // JS numbers are doubles. Large uint64s > 2^53 will lose precision.
-    array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
-  }
-  return {runtime, array};
-}
-
 inline jsi::Value getJsiValue(const std::vector<int64_t> &vec,
                               jsi::Runtime &runtime) {
   jsi::Array array(runtime, vec.size());
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
index 9554f1888..3190bc6f4 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -17,6 +17,7 @@
 #include <rnexecutorch/metaprogramming/FunctionHelpers.h>
 #include <rnexecutorch/metaprogramming/TypeConcepts.h>
 #include <rnexecutorch/models/BaseModel.h>
+#include <rnexecutorch/models/VisionModel.h>
 #include <rnexecutorch/models/llm/LLM.h>
 #include <rnexecutorch/models/ocr/OCR.h>
 #include <rnexecutorch/models/speech_to_text/SpeechToText.h>
@@ -171,10 +172,10 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
                                        "stream"));
     }
 
-    if constexpr (meta::DerivedFromOrSameAs<Model, models::VisionModel>) {
+    if constexpr (meta::HasGenerateFromFrame<Model>) {
       addFunctions(JSI_EXPORT_FUNCTION(
-          ModelHostObject<Model>, synchronousHostFunction<&Model::streamStop>,
-          "streamStop"));
+          ModelHostObject<Model>, visionHostFunction<&Model::generateFromFrame>,
+          "generateFromFrame"));
     }
 
     if constexpr (meta::HasGenerateFromPixels<Model>) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h
index 8290a810b..a48aa0119 100644
--- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h
+++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h
@@ -3,12 +3,39 @@
 #include <cstddef>
 #include <jsi/jsi.h>
 #include <tuple>
+#include <type_traits>
 
 #include <rnexecutorch/host_objects/JsiConversions.h>
 
 namespace rnexecutorch::meta {
 using namespace facebook;
 
+// =========================================================================
+// 1. Function Traits (Extracts Arity, Return Type, Args)
+// =========================================================================
+
+template <typename T> struct FunctionTraits;
+
+// Specialization for Member Functions
+template <typename R, typename C, typename... Args>
+struct FunctionTraits<R (C::*)(Args...)> {
+  static constexpr std::size_t arity = sizeof...(Args);
+  using return_type = R;
+  using args_tuple = std::tuple<Args...>;
+};
+
+// Specialization for const Member Functions
+template <typename R, typename C, typename... Args>
+struct FunctionTraits<R (C::*)(Args...) const> {
+  static constexpr std::size_t arity = sizeof...(Args);
+  using return_type = R;
+  using args_tuple = std::tuple<Args...>;
+};
+
+// =========================================================================
+// 2. Argument Counting Helpers
+// =========================================================================
+
 template <typename Model, typename R, typename... Types>
 constexpr std::size_t getArgumentCount(R (Model::*f)(Types...)) {
   return sizeof...(Types);
@@ -19,6 +46,10 @@ constexpr std::size_t getArgumentCount(R (Model::*f)(Types...) const) {
   return sizeof...(Types);
 }
 
+// =========================================================================
+// 3. JSI -> Tuple Conversion Logic
+// =========================================================================
+
 template <typename... Types, std::size_t... I>
 std::tuple<Types...> fillTupleFromArgs(std::index_sequence<I...>,
                                        const jsi::Value *args,
@@ -31,7 +62,6 @@ std::tuple<Types...> fillTupleFromArgs(std::index_sequence<I...>,
  * arguments for method supplied with a pointer. The types in the tuple are
  * inferred from the method pointer.
  */
-
 template <typename Model, typename R, typename... Types>
 std::tuple<Types...> createArgsTupleFromJsi(R (Model::*f)(Types...),
                                             const jsi::Value *args,
@@ -47,4 +77,37 @@ std::tuple<Types...> createArgsTupleFromJsi(R (Model::*f)(Types...) const,
   return fillTupleFromArgs<Types...>(std::index_sequence_for<Types...>{}, args,
                                      runtime);
 }
-} // namespace rnexecutorch::meta
\ No newline at end of file
+
+// Overload for free functions (used by TailSignature dummy)
+template <typename... Types>
+std::tuple<Types...> createArgsTupleFromJsi(void (*f)(Types...),
+                                            const jsi::Value *args,
+                                            jsi::Runtime &runtime) {
+  return fillTupleFromArgs<Types...>(std::index_sequence_for<Types...>{}, args,
+                                     runtime);
+}
+
+// =========================================================================
+// 4. Tail Signature Helper (Crucial for Vision Functions)
+// =========================================================================
+
+// Extracts the "Tail" arguments of a function signature, skipping the first
+// two arguments (Runtime and FrameValue).
+template <typename T> struct TailSignature;
+
+// Non-const member function specialization
+template <typename R, typename C, typename Arg1, typename Arg2,
+          typename... Rest>
+struct TailSignature<R (C::*)(Arg1, Arg2, Rest...)> {
+  // A dummy function that has the signature of just the "Rest" arguments.
+  static void dummy(Rest...) {}
+};
+
+// Const member function specialization
+template <typename R, typename C, typename Arg1, typename Arg2,
+          typename... Rest>
+struct TailSignature<R (C::*)(Arg1, Arg2, Rest...) const> {
+  static void dummy(Rest...) {}
+};
+
+} // namespace rnexecutorch::meta
diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
index 8100a471b..f625bf6e7 100644
--- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
+++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
@@ -21,6 +21,11 @@ concept HasGenerateFromPixels = requires(T t) {
   { &T::generateFromPixels };
 };
 
+template <typename T>
+concept HasGenerateFromFrame = requires(T t) {
+  { &T::generateFromFrame };
+};
+
 template <typename T>
 concept HasEncode = requires(T t) {
   { &T::encode };
diff --git a/yarn.lock b/yarn.lock
index 436005c8d..3d2d9f7ee 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -53,6 +53,17 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@babel/code-frame@npm:^7.28.6, @babel/code-frame@npm:^7.29.0":
+  version: 7.29.0
+  resolution: "@babel/code-frame@npm:7.29.0"
+  dependencies:
+    "@babel/helper-validator-identifier": "npm:^7.28.5"
+    js-tokens: "npm:^4.0.0"
+    picocolors: "npm:^1.1.1"
+  checksum: 10/199e15ff89007dd30675655eec52481cb245c9fdf4f81e4dc1f866603b0217b57aff25f5ffa0a95bbc8e31eb861695330cd7869ad52cc211aa63016320ef72c5
+  languageName: node
+  linkType: hard
+
 "@babel/compat-data@npm:^7.20.5, @babel/compat-data@npm:^7.27.2, @babel/compat-data@npm:^7.27.7, @babel/compat-data@npm:^7.28.5":
   version: 7.28.5
   resolution: "@babel/compat-data@npm:7.28.5"
@@ -110,6 +121,19 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@babel/generator@npm:^7.29.0":
+  version: 7.29.1
+  resolution: "@babel/generator@npm:7.29.1"
+  dependencies:
+    "@babel/parser": "npm:^7.29.0"
+    "@babel/types": "npm:^7.29.0"
+    "@jridgewell/gen-mapping": "npm:^0.3.12"
+    "@jridgewell/trace-mapping": "npm:^0.3.28"
+    jsesc: "npm:^3.0.2"
+  checksum: 10/61fe4ddd6e817aa312a14963ccdbb5c9a8c57e8b97b98d19a8a99ccab2215fda1a5f52bc8dd8d2e3c064497ddeb3ab8ceb55c76fa0f58f8169c34679d2256fe0
+  languageName: node
+  linkType: hard
+
 "@babel/helper-annotate-as-pure@npm:^7.27.1, @babel/helper-annotate-as-pure@npm:^7.27.3":
   version: 7.27.3
   resolution: "@babel/helper-annotate-as-pure@npm:7.27.3"
@@ -149,6 +173,23 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@babel/helper-create-class-features-plugin@npm:^7.28.6":
+  version: 7.28.6
+  resolution: "@babel/helper-create-class-features-plugin@npm:7.28.6"
+  dependencies:
+    "@babel/helper-annotate-as-pure": "npm:^7.27.3"
+    "@babel/helper-member-expression-to-functions": "npm:^7.28.5"
+    "@babel/helper-optimise-call-expression": "npm:^7.27.1"
+    "@babel/helper-replace-supers": "npm:^7.28.6"
+    "@babel/helper-skip-transparent-expression-wrappers": "npm:^7.27.1"
+    "@babel/traverse": "npm:^7.28.6"
+    semver: "npm:^6.3.1"
+  peerDependencies:
+    "@babel/core": ^7.0.0
+  checksum: 10/11f55607fcf66827ade745c0616aa3c6086aa655c0fab665dd3c4961829752e4c94c942262db30c4831ef9bce37ad444722e85ef1b7136587e28c6b1ef8ad43c
+  languageName: node
+  linkType: hard
+
 "@babel/helper-create-regexp-features-plugin@npm:^7.18.6, @babel/helper-create-regexp-features-plugin@npm:^7.27.1":
   version: 7.28.5
   resolution: "@babel/helper-create-regexp-features-plugin@npm:7.28.5"
@@ -242,6 +283,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@babel/helper-plugin-utils@npm:^7.28.6":
+  version: 7.28.6
+  resolution: "@babel/helper-plugin-utils@npm:7.28.6"
+  checksum: 10/21c853bbc13dbdddf03309c9a0477270124ad48989e1ad6524b83e83a77524b333f92edd2caae645c5a7ecf264ec6d04a9ebe15aeb54c7f33c037b71ec521e4a
+  languageName: node
+  linkType: hard
+
 "@babel/helper-remap-async-to-generator@npm:^7.18.9, @babel/helper-remap-async-to-generator@npm:^7.27.1":
   version: 7.27.1
   resolution: "@babel/helper-remap-async-to-generator@npm:7.27.1"
@@ -268,6 +316,19 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@babel/helper-replace-supers@npm:^7.28.6":
+  version: 7.28.6
+  resolution: "@babel/helper-replace-supers@npm:7.28.6"
+  dependencies:
+    "@babel/helper-member-expression-to-functions": "npm:^7.28.5"
+    "@babel/helper-optimise-call-expression": "npm:^7.27.1"
+    "@babel/traverse": "npm:^7.28.6"
+  peerDependencies:
+    "@babel/core": ^7.0.0
+  checksum: 10/ad2724713a4d983208f509e9607e8f950855f11bd97518a700057eb8bec69d687a8f90dc2da0c3c47281d2e3b79cf1d14ecf1fe3e1ee0a8e90b61aee6759c9a7
+  languageName: node
+  linkType: hard
+
 "@babel/helper-skip-transparent-expression-wrappers@npm:^7.20.0, @babel/helper-skip-transparent-expression-wrappers@npm:^7.27.1":
   version: 7.27.1
   resolution: "@babel/helper-skip-transparent-expression-wrappers@npm:7.27.1"
@@ -343,6 +404,17 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@babel/parser@npm:^7.28.6, @babel/parser@npm:^7.29.0":
+  version: 7.29.0
+  resolution: "@babel/parser@npm:7.29.0"
+  dependencies:
+    "@babel/types": "npm:^7.29.0"
+  bin:
+    parser: ./bin/babel-parser.js
+  checksum: 10/b1576dca41074997a33ee740d87b330ae2e647f4b7da9e8d2abd3772b18385d303b0cee962b9b88425e0f30d58358dbb8d63792c1a2d005c823d335f6a029747
+  languageName: node
+  linkType: hard
+
 "@babel/plugin-bugfix-firefox-class-in-computed-class-key@npm:^7.28.5":
   version: 7.28.5
   resolution: "@babel/plugin-bugfix-firefox-class-in-computed-class-key@npm:7.28.5"
@@ -767,6 +839,17 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@babel/plugin-syntax-typescript@npm:^7.28.6":
+  version: 7.28.6
+  resolution: "@babel/plugin-syntax-typescript@npm:7.28.6"
+  dependencies:
+    "@babel/helper-plugin-utils": "npm:^7.28.6"
+  peerDependencies:
+    "@babel/core": ^7.0.0-0
+  checksum: 10/5c55f9c63bd36cf3d7e8db892294c8f85000f9c1526c3a1cc310d47d1e174f5c6f6605e5cc902c4636d885faba7a9f3d5e5edc6b35e4f3b1fd4c2d58d0304fa5
+  languageName: node
+  linkType: hard
+
 "@babel/plugin-syntax-unicode-sets-regex@npm:^7.18.6":
   version: 7.18.6
   resolution: "@babel/plugin-syntax-unicode-sets-regex@npm:7.18.6"
@@ -779,7 +862,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@babel/plugin-transform-arrow-functions@npm:^7.0.0, @babel/plugin-transform-arrow-functions@npm:^7.0.0-0, @babel/plugin-transform-arrow-functions@npm:^7.24.7, @babel/plugin-transform-arrow-functions@npm:^7.27.1":
+"@babel/plugin-transform-arrow-functions@npm:7.27.1, @babel/plugin-transform-arrow-functions@npm:^7.0.0, @babel/plugin-transform-arrow-functions@npm:^7.0.0-0, @babel/plugin-transform-arrow-functions@npm:^7.24.7, @babel/plugin-transform-arrow-functions@npm:^7.27.1":
   version: 7.27.1
   resolution: "@babel/plugin-transform-arrow-functions@npm:7.27.1"
   dependencies:
@@ -838,7 +921,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@babel/plugin-transform-class-properties@npm:^7.0.0-0, @babel/plugin-transform-class-properties@npm:^7.25.4, @babel/plugin-transform-class-properties@npm:^7.27.1":
+"@babel/plugin-transform-class-properties@npm:7.27.1, @babel/plugin-transform-class-properties@npm:^7.0.0-0, @babel/plugin-transform-class-properties@npm:^7.25.4, @babel/plugin-transform-class-properties@npm:^7.27.1":
   version: 7.27.1
   resolution: "@babel/plugin-transform-class-properties@npm:7.27.1"
   dependencies:
@@ -862,7 +945,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@babel/plugin-transform-classes@npm:^7.0.0, @babel/plugin-transform-classes@npm:^7.0.0-0, @babel/plugin-transform-classes@npm:^7.25.4, @babel/plugin-transform-classes@npm:^7.28.4":
+"@babel/plugin-transform-classes@npm:7.28.4, @babel/plugin-transform-classes@npm:^7.0.0, @babel/plugin-transform-classes@npm:^7.0.0-0, @babel/plugin-transform-classes@npm:^7.25.4, @babel/plugin-transform-classes@npm:^7.28.4":
   version: 7.28.4
   resolution: "@babel/plugin-transform-classes@npm:7.28.4"
   dependencies:
@@ -1136,7 +1219,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@babel/plugin-transform-nullish-coalescing-operator@npm:^7.0.0-0, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.24.7, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.27.1":
+"@babel/plugin-transform-nullish-coalescing-operator@npm:7.27.1, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.0.0-0, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.24.7, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.27.1":
   version: 7.27.1
   resolution: "@babel/plugin-transform-nullish-coalescing-operator@npm:7.27.1"
   dependencies:
@@ -1196,6 +1279,18 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@babel/plugin-transform-optional-chaining@npm:7.27.1":
+  version: 7.27.1
+  resolution: "@babel/plugin-transform-optional-chaining@npm:7.27.1"
+  dependencies:
+    "@babel/helper-plugin-utils": "npm:^7.27.1"
+    "@babel/helper-skip-transparent-expression-wrappers": "npm:^7.27.1"
+  peerDependencies:
+    "@babel/core": ^7.0.0-0
+  checksum: 10/34b0f96400c259a2722740d17a001fe45f78d8ff052c40e29db2e79173be72c1cfe8d9681067e3f5da3989e4a557402df5c982c024c18257587a41e022f95640
+  languageName: node
+  linkType: hard
+
 "@babel/plugin-transform-optional-chaining@npm:^7.0.0-0, @babel/plugin-transform-optional-chaining@npm:^7.24.8, @babel/plugin-transform-optional-chaining@npm:^7.27.1, @babel/plugin-transform-optional-chaining@npm:^7.28.5":
   version: 7.28.5
   resolution: "@babel/plugin-transform-optional-chaining@npm:7.28.5"
@@ -1376,7 +1471,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@babel/plugin-transform-shorthand-properties@npm:^7.0.0, @babel/plugin-transform-shorthand-properties@npm:^7.0.0-0, @babel/plugin-transform-shorthand-properties@npm:^7.24.7, @babel/plugin-transform-shorthand-properties@npm:^7.27.1":
+"@babel/plugin-transform-shorthand-properties@npm:7.27.1, @babel/plugin-transform-shorthand-properties@npm:^7.0.0, @babel/plugin-transform-shorthand-properties@npm:^7.0.0-0, @babel/plugin-transform-shorthand-properties@npm:^7.24.7, @babel/plugin-transform-shorthand-properties@npm:^7.27.1":
   version: 7.27.1
   resolution: "@babel/plugin-transform-shorthand-properties@npm:7.27.1"
   dependencies:
@@ -1421,7 +1516,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@babel/plugin-transform-template-literals@npm:^7.0.0-0, @babel/plugin-transform-template-literals@npm:^7.27.1":
+"@babel/plugin-transform-template-literals@npm:7.27.1, @babel/plugin-transform-template-literals@npm:^7.0.0-0, @babel/plugin-transform-template-literals@npm:^7.27.1":
   version: 7.27.1
   resolution: "@babel/plugin-transform-template-literals@npm:7.27.1"
   dependencies:
@@ -1458,6 +1553,21 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@babel/plugin-transform-typescript@npm:^7.27.1":
+  version: 7.28.6
+  resolution: "@babel/plugin-transform-typescript@npm:7.28.6"
+  dependencies:
+    "@babel/helper-annotate-as-pure": "npm:^7.27.3"
+    "@babel/helper-create-class-features-plugin": "npm:^7.28.6"
+    "@babel/helper-plugin-utils": "npm:^7.28.6"
+    "@babel/helper-skip-transparent-expression-wrappers": "npm:^7.27.1"
+    "@babel/plugin-syntax-typescript": "npm:^7.28.6"
+  peerDependencies:
+    "@babel/core": ^7.0.0-0
+  checksum: 10/a0bccc531fa8710a45b0b593140273741e0e4a0721b1ef6ef9dfefae0bbe61528440d65aab7936929551fd76793272257d74f60cf66891352f793294930a4b67
+  languageName: node
+  linkType: hard
+
 "@babel/plugin-transform-unicode-escapes@npm:^7.27.1":
   version: 7.27.1
   resolution: "@babel/plugin-transform-unicode-escapes@npm:7.27.1"
@@ -1481,7 +1591,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@babel/plugin-transform-unicode-regex@npm:^7.0.0, @babel/plugin-transform-unicode-regex@npm:^7.0.0-0, @babel/plugin-transform-unicode-regex@npm:^7.24.7, @babel/plugin-transform-unicode-regex@npm:^7.27.1":
+"@babel/plugin-transform-unicode-regex@npm:7.27.1, @babel/plugin-transform-unicode-regex@npm:^7.0.0, @babel/plugin-transform-unicode-regex@npm:^7.0.0-0, @babel/plugin-transform-unicode-regex@npm:^7.24.7, @babel/plugin-transform-unicode-regex@npm:^7.27.1":
   version: 7.27.1
   resolution: "@babel/plugin-transform-unicode-regex@npm:7.27.1"
   dependencies:
@@ -1614,6 +1724,21 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@babel/preset-typescript@npm:7.27.1":
+  version: 7.27.1
+  resolution: "@babel/preset-typescript@npm:7.27.1"
+  dependencies:
+    "@babel/helper-plugin-utils": "npm:^7.27.1"
+    "@babel/helper-validator-option": "npm:^7.27.1"
+    "@babel/plugin-syntax-jsx": "npm:^7.27.1"
+    "@babel/plugin-transform-modules-commonjs": "npm:^7.27.1"
+    "@babel/plugin-transform-typescript": "npm:^7.27.1"
+  peerDependencies:
+    "@babel/core": ^7.0.0-0
+  checksum: 10/9d8e75326b3c93fa016ba7aada652800fc77bc05fcc181888700a049935e8cf1284b549de18a5d62ef3591d02f097ea6de1111f7d71a991aaf36ba74657bd145
+  languageName: node
+  linkType: hard
+
 "@babel/preset-typescript@npm:^7.16.7, @babel/preset-typescript@npm:^7.23.0, @babel/preset-typescript@npm:^7.24.7":
   version: 7.28.5
   resolution: "@babel/preset-typescript@npm:7.28.5"
@@ -1647,6 +1772,17 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@babel/template@npm:^7.28.6":
+  version: 7.28.6
+  resolution: "@babel/template@npm:7.28.6"
+  dependencies:
+    "@babel/code-frame": "npm:^7.28.6"
+    "@babel/parser": "npm:^7.28.6"
+    "@babel/types": "npm:^7.28.6"
+  checksum: 10/0ad6e32bf1e7e31bf6b52c20d15391f541ddd645cbd488a77fe537a15b280ee91acd3a777062c52e03eedbc2e1f41548791f6a3697c02476ec5daf49faa38533
+  languageName: node
+  linkType: hard
+
 "@babel/traverse--for-generate-function-map@npm:@babel/traverse@^7.25.3, @babel/traverse@npm:^7.25.3, @babel/traverse@npm:^7.27.1, @babel/traverse@npm:^7.28.0, @babel/traverse@npm:^7.28.3, @babel/traverse@npm:^7.28.4, @babel/traverse@npm:^7.28.5":
   version: 7.28.5
   resolution: "@babel/traverse@npm:7.28.5"
@@ -1662,6 +1798,21 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@babel/traverse@npm:^7.28.6":
+  version: 7.29.0
+  resolution: "@babel/traverse@npm:7.29.0"
+  dependencies:
+    "@babel/code-frame": "npm:^7.29.0"
+    "@babel/generator": "npm:^7.29.0"
+    "@babel/helper-globals": "npm:^7.28.0"
+    "@babel/parser": "npm:^7.29.0"
+    "@babel/template": "npm:^7.28.6"
+    "@babel/types": "npm:^7.29.0"
+    debug: "npm:^4.3.1"
+  checksum: 10/3a0d0438f1ba9fed4fbe1706ea598a865f9af655a16ca9517ab57bda526e224569ca1b980b473fb68feea5e08deafbbf2cf9febb941f92f2d2533310c3fc4abc
+  languageName: node
+  linkType: hard
+
 "@babel/types@npm:^7.0.0, @babel/types@npm:^7.20.7, @babel/types@npm:^7.21.3, @babel/types@npm:^7.24.7, @babel/types@npm:^7.25.2, @babel/types@npm:^7.26.0, @babel/types@npm:^7.27.1, @babel/types@npm:^7.27.3, @babel/types@npm:^7.28.2, @babel/types@npm:^7.28.4, @babel/types@npm:^7.28.5, @babel/types@npm:^7.3.3, @babel/types@npm:^7.4.4":
   version: 7.28.5
   resolution: "@babel/types@npm:7.28.5"
@@ -1672,6 +1823,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@babel/types@npm:^7.28.6, @babel/types@npm:^7.29.0":
+  version: 7.29.0
+  resolution: "@babel/types@npm:7.29.0"
+  dependencies:
+    "@babel/helper-string-parser": "npm:^7.27.1"
+    "@babel/helper-validator-identifier": "npm:^7.28.5"
+  checksum: 10/bfc2b211210f3894dcd7e6a33b2d1c32c93495dc1e36b547376aa33441abe551ab4bc1640d4154ee2acd8e46d3bbc925c7224caae02fcaf0e6a771e97fccc661
+  languageName: node
+  linkType: hard
+
 "@bcoe/v8-coverage@npm:^0.2.3":
   version: 0.2.3
   resolution: "@bcoe/v8-coverage@npm:0.2.3"
@@ -4799,6 +4960,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@types/react@npm:~19.2.0":
+  version: 19.2.14
+  resolution: "@types/react@npm:19.2.14"
+  dependencies:
+    csstype: "npm:^3.2.2"
+  checksum: 10/fbff239089ee64b6bd9b00543594db498278b06de527ef1b0f71bb0eb09cc4445a71b5dd3c0d3d0257255c4eed94406be40a74ad4a987ade8a8d5dd65c82bc5f
+  languageName: node
+  linkType: hard
+
 "@types/semver@npm:^7.3.12":
   version: 7.7.1
   resolution: "@types/semver@npm:7.7.1"
@@ -5147,6 +5317,18 @@ __metadata:
   languageName: node
   linkType: hard
 
+"ajv@npm:^8.11.0":
+  version: 8.18.0
+  resolution: "ajv@npm:8.18.0"
+  dependencies:
+    fast-deep-equal: "npm:^3.1.3"
+    fast-uri: "npm:^3.0.1"
+    json-schema-traverse: "npm:^1.0.0"
+    require-from-string: "npm:^2.0.2"
+  checksum: 10/bfed9de827a2b27c6d4084324eda76a4e32bdde27410b3e9b81d06e6f8f5c78370fc6b93fe1d869f1939ff1d7c4ae8896960995acb8425e3e9288c8884247c48
+  languageName: node
+  linkType: hard
+
 "anser@npm:^1.4.9":
   version: 1.4.10
   resolution: "anser@npm:1.4.10"
@@ -6387,8 +6569,9 @@ __metadata:
     "@react-navigation/native": "npm:^7.1.6"
     "@shopify/react-native-skia": "npm:2.2.12"
     "@types/pngjs": "npm:^6.0.5"
-    "@types/react": "npm:~19.1.10"
+    "@types/react": "npm:~19.2.0"
     expo: "npm:^54.0.27"
+    expo-build-properties: "npm:~1.0.10"
     expo-constants: "npm:~18.0.11"
     expo-font: "npm:~14.0.10"
     expo-linking: "npm:~8.0.10"
@@ -6402,12 +6585,14 @@ __metadata:
     react-native-gesture-handler: "npm:~2.28.0"
     react-native-image-picker: "npm:^7.2.2"
     react-native-loading-spinner-overlay: "npm:^3.0.1"
-    react-native-reanimated: "npm:~4.1.1"
+    react-native-nitro-image: "npm:0.10.2"
+    react-native-nitro-modules: "npm:0.33.4"
+    react-native-reanimated: "npm:~4.2.1"
     react-native-safe-area-context: "npm:~5.6.0"
     react-native-screens: "npm:~4.16.0"
     react-native-svg: "npm:15.12.1"
     react-native-svg-transformer: "npm:^1.5.0"
-    react-native-worklets: "npm:0.5.1"
+    react-native-worklets: "npm:^0.7.2"
   languageName: unknown
   linkType: soft
 
@@ -6437,7 +6622,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"convert-source-map@npm:^2.0.0":
+"convert-source-map@npm:2.0.0, convert-source-map@npm:^2.0.0":
   version: 2.0.0
   resolution: "convert-source-map@npm:2.0.0"
   checksum: 10/c987be3ec061348cdb3c2bfb924bec86dea1eacad10550a85ca23edb0fe3556c3a61c7399114f3331ccb3499d7fd0285ab24566e5745929412983494c3926e15
@@ -6757,7 +6942,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"csstype@npm:^3.0.2":
+"csstype@npm:^3.0.2, csstype@npm:^3.2.2":
   version: 3.2.3
   resolution: "csstype@npm:3.2.3"
   checksum: 10/ad41baf7e2ffac65ab544d79107bf7cd1a4bb9bab9ac3302f59ab4ba655d5e30942a8ae46e10ba160c6f4ecea464cc95b975ca2fefbdeeacd6ac63f12f99fe1f
@@ -7849,6 +8034,18 @@ __metadata:
   languageName: node
   linkType: hard
 
+"expo-build-properties@npm:~1.0.10":
+  version: 1.0.10
+  resolution: "expo-build-properties@npm:1.0.10"
+  dependencies:
+    ajv: "npm:^8.11.0"
+    semver: "npm:^7.6.0"
+  peerDependencies:
+    expo: "*"
+  checksum: 10/0dde41d659d243268ceae49bba3e4c07b72c245df8124f86fb720bc0556a2c4d03dd75e59e068a07438ef5ba3188b67a7a6516d2a37d3d91429070745b2506a2
+  languageName: node
+  linkType: hard
+
 "expo-calendar@npm:~15.0.8":
   version: 15.0.8
   resolution: "expo-calendar@npm:15.0.8"
@@ -8236,6 +8433,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"fast-uri@npm:^3.0.1":
+  version: 3.1.0
+  resolution: "fast-uri@npm:3.1.0"
+  checksum: 10/818b2c96dc913bcf8511d844c3d2420e2c70b325c0653633f51821e4e29013c2015387944435cd0ef5322c36c9beecc31e44f71b257aeb8e0b333c1d62bb17c2
+  languageName: node
+  linkType: hard
+
 "fast-xml-parser@npm:^4.4.1":
   version: 4.5.3
   resolution: "fast-xml-parser@npm:4.5.3"
@@ -10192,6 +10396,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"json-schema-traverse@npm:^1.0.0":
+  version: 1.0.0
+  resolution: "json-schema-traverse@npm:1.0.0"
+  checksum: 10/02f2f466cdb0362558b2f1fd5e15cce82ef55d60cd7f8fa828cf35ba74330f8d767fcae5c5c2adb7851fa811766c694b9405810879bc4e1ddd78a7c0e03658ad
+  languageName: node
+  linkType: hard
+
 "json-stable-stringify-without-jsonify@npm:^1.0.1":
   version: 1.0.1
   resolution: "json-stable-stringify-without-jsonify@npm:1.0.1"
@@ -13138,7 +13349,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"react-native-is-edge-to-edge@npm:^1.1.6, react-native-is-edge-to-edge@npm:^1.2.1":
+"react-native-is-edge-to-edge@npm:1.2.1, react-native-is-edge-to-edge@npm:^1.1.6, react-native-is-edge-to-edge@npm:^1.2.1":
   version: 1.2.1
   resolution: "react-native-is-edge-to-edge@npm:1.2.1"
   peerDependencies:
@@ -13183,6 +13394,27 @@ __metadata:
   languageName: node
   linkType: hard
 
+"react-native-nitro-image@npm:0.10.2":
+  version: 0.10.2
+  resolution: "react-native-nitro-image@npm:0.10.2"
+  peerDependencies:
+    react: "*"
+    react-native: "*"
+    react-native-nitro-modules: "*"
+  checksum: 10/3be75e93da369adfe00441dae78171572dec38d3d7e75e5d4cb302b81479be9686c8d8dc0ea4b331514b8725099bf3eb069ab9933f7029627d12a72d71766cb4
+  languageName: node
+  linkType: hard
+
+"react-native-nitro-modules@npm:0.33.4":
+  version: 0.33.4
+  resolution: "react-native-nitro-modules@npm:0.33.4"
+  peerDependencies:
+    react: "*"
+    react-native: "*"
+  checksum: 10/a737ff6b142c55821688612305245fd10a7cff36f0ee66cad0956c6815a60cdd4ba64cdfba6137a6dbfe815645763ce5d406cf488876edd47dab7f8d0031e01a
+  languageName: node
+  linkType: hard
+
 "react-native-reanimated@npm:~4.1.1":
   version: 4.1.6
   resolution: "react-native-reanimated@npm:4.1.6"
@@ -13198,6 +13430,20 @@ __metadata:
   languageName: node
   linkType: hard
 
+"react-native-reanimated@npm:~4.2.1":
+  version: 4.2.2
+  resolution: "react-native-reanimated@npm:4.2.2"
+  dependencies:
+    react-native-is-edge-to-edge: "npm:1.2.1"
+    semver: "npm:7.7.3"
+  peerDependencies:
+    react: "*"
+    react-native: "*"
+    react-native-worklets: ">=0.7.0"
+  checksum: 10/2ad24cc827aaabb54c18d75a4ab98b92a25dd57c05bfabb886341c0e62d8efc5d5973f415cb1da2ecab9ebe077bec1179b91c681de90e124dbf1160a418ee29d
+  languageName: node
+  linkType: hard
+
 "react-native-safe-area-context@npm:~5.6.0":
   version: 5.6.2
   resolution: "react-native-safe-area-context@npm:5.6.2"
@@ -13274,6 +13520,29 @@ __metadata:
   languageName: node
   linkType: hard
 
+"react-native-worklets@npm:^0.7.2":
+  version: 0.7.4
+  resolution: "react-native-worklets@npm:0.7.4"
+  dependencies:
+    "@babel/plugin-transform-arrow-functions": "npm:7.27.1"
+    "@babel/plugin-transform-class-properties": "npm:7.27.1"
+    "@babel/plugin-transform-classes": "npm:7.28.4"
+    "@babel/plugin-transform-nullish-coalescing-operator": "npm:7.27.1"
+    "@babel/plugin-transform-optional-chaining": "npm:7.27.1"
+    "@babel/plugin-transform-shorthand-properties": "npm:7.27.1"
+    "@babel/plugin-transform-template-literals": "npm:7.27.1"
+    "@babel/plugin-transform-unicode-regex": "npm:7.27.1"
+    "@babel/preset-typescript": "npm:7.27.1"
+    convert-source-map: "npm:2.0.0"
+    semver: "npm:7.7.3"
+  peerDependencies:
+    "@babel/core": "*"
+    react: "*"
+    react-native: "*"
+  checksum: 10/922b209940e298d21313d22f8a6eb87ad603442850c7ff8bc9cfef694cb211d7ec9903e24ee20b6bcf6164f8e7c165b65307dcca3d67465fdffda1c45fe05d1d
+  languageName: node
+  linkType: hard
+
 "react-native@npm:0.81.5":
   version: 0.81.5
   resolution: "react-native@npm:0.81.5"
@@ -13790,21 +14059,21 @@ __metadata:
   languageName: node
   linkType: hard
 
-"semver@npm:^6.3.0, semver@npm:^6.3.1":
-  version: 6.3.1
-  resolution: "semver@npm:6.3.1"
+"semver@npm:7.7.3, semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1":
+  version: 7.7.3
+  resolution: "semver@npm:7.7.3"
   bin:
     semver: bin/semver.js
-  checksum: 10/1ef3a85bd02a760c6ef76a45b8c1ce18226de40831e02a00bad78485390b98b6ccaa31046245fc63bba4a47a6a592b6c7eedc65cc47126e60489f9cc1ce3ed7e
+  checksum: 10/8dbc3168e057a38fc322af909c7f5617483c50caddba135439ff09a754b20bdd6482a5123ff543dad4affa488ecf46ec5fb56d61312ad20bb140199b88dfaea9
   languageName: node
   linkType: hard
 
-"semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1":
-  version: 7.7.3
-  resolution: "semver@npm:7.7.3"
+"semver@npm:^6.3.0, semver@npm:^6.3.1":
+  version: 6.3.1
+  resolution: "semver@npm:6.3.1"
   bin:
     semver: bin/semver.js
-  checksum: 10/8dbc3168e057a38fc322af909c7f5617483c50caddba135439ff09a754b20bdd6482a5123ff543dad4affa488ecf46ec5fb56d61312ad20bb140199b88dfaea9
+  checksum: 10/1ef3a85bd02a760c6ef76a45b8c1ce18226de40831e02a00bad78485390b98b6ccaa31046245fc63bba4a47a6a592b6c7eedc65cc47126e60489f9cc1ce3ed7e
   languageName: node
   linkType: hard
 

From 62df7ceb464e9bcb50dfcea537f967a3d102aabf Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Fri, 20 Feb 2026 09:27:37 +0100
Subject: [PATCH 12/37] fix: remove redundant preprocessing step

---
 .../common/rnexecutorch/models/VisionModel.cpp              | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
index 8155b8819..a81518921 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
@@ -11,12 +11,8 @@ using namespace facebook;
 
 cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
                                       const jsi::Value &frameData) const {
-  // Extract frame using FrameProcessor utility
   auto frameObj = frameData.asObject(runtime);
-  cv::Mat frame = utils::FrameProcessor::extractFrame(runtime, frameObj);
-
-  // Apply model-specific preprocessing
-  return preprocessFrame(frame);
+  return ::rnexecutorch::utils::FrameProcessor::extractFrame(runtime, frameObj);
 }
 
 cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {

From 962f1c385cbcfb56497b14a5b6cfc10c93b561a8 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 23 Feb 2026 11:46:18 +0100
Subject: [PATCH 13/37] refactor: changes suggested in review

---
 .cspell-wordlist.txt                          |   4 +-
 .../rnexecutorch/RnExecutorchInstaller.h      |   3 -
 .../rnexecutorch/models/VisionModel.cpp       |  18 +--
 .../common/rnexecutorch/models/VisionModel.h  |  24 +---
 .../object_detection/ObjectDetection.cpp      |  41 +------
 .../models/object_detection/ObjectDetection.h |   1 -
 .../rnexecutorch/utils/FrameExtractor.cpp     |  50 ++++----
 .../rnexecutorch/utils/FrameExtractor.h       |  61 +++-------
 .../rnexecutorch/utils/FrameProcessor.cpp     | 102 ++---------------
 .../rnexecutorch/utils/FrameProcessor.h       | 108 +++---------------
 .../src/hooks/useModule.ts                    |   1 -
 .../modules/computer_vision/VisionModule.ts   |  27 +++--
 12 files changed, 91 insertions(+), 349 deletions(-)

diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt
index a2e8ecbab..cb92b44bd 100644
--- a/.cspell-wordlist.txt
+++ b/.cspell-wordlist.txt
@@ -116,4 +116,6 @@ antonov
 rfdetr
 basemodule
 IMAGENET
-worklet
\ No newline at end of file
+worklet
+worklets
+BGRA
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h
index 80b7d18b3..54e8c1cbb 100644
--- a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h
+++ b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h
@@ -54,9 +54,6 @@ class RnExecutorchInstaller {
                 meta::createConstructorArgsWithCallInvoker<ModelT>(
                     args, runtime, jsCallInvoker);
 
-            // This unpacks the tuple and calls the constructor directly inside
-            // make_shared. It avoids creating a temporary object, so no
-            // move/copy is required.
             auto modelImplementationPtr = std::apply(
                 [](auto &&...unpackedArgs) {
                   return std::make_shared<ModelT>(
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
index a81518921..b88310e12 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
@@ -4,19 +4,17 @@
 #include <rnexecutorch/Log.h>
 #include <rnexecutorch/utils/FrameProcessor.h>
 
-namespace rnexecutorch {
-namespace models {
+namespace rnexecutorch::models {
 
 using namespace facebook;
 
 cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
                                       const jsi::Value &frameData) const {
   auto frameObj = frameData.asObject(runtime);
-  return ::rnexecutorch::utils::FrameProcessor::extractFrame(runtime, frameObj);
+  return ::rnexecutorch::utils::extractFrame(runtime, frameObj);
 }
 
 cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {
-  // Validate dimensions: sizes must be [height, width, channels]
   if (tensorView.sizes.size() != 3) {
     char errorMessage[100];
     std::snprintf(errorMessage, sizeof(errorMessage),
@@ -27,11 +25,10 @@ cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {
                             errorMessage);
   }
 
-  int height = tensorView.sizes[0];
-  int width = tensorView.sizes[1];
-  int channels = tensorView.sizes[2];
+  int32_t height = tensorView.sizes[0];
+  int32_t width = tensorView.sizes[1];
+  int32_t channels = tensorView.sizes[2];
 
-  // Pixel data must be RGB (3 channels) and BYTE type
   if (channels != 3) {
     char errorMessage[100];
     std::snprintf(errorMessage, sizeof(errorMessage),
@@ -47,13 +44,10 @@ cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {
         "Invalid pixel data: scalarType must be BYTE (Uint8Array)");
   }
 
-  // Create cv::Mat directly from dataPtr (zero-copy view)
-  // Data is valid for the duration of this synchronous call
   uint8_t *dataPtr = static_cast<uint8_t *>(tensorView.dataPtr);
   cv::Mat image(height, width, CV_8UC3, dataPtr);
 
   return image;
 }
 
-} // namespace models
-} // namespace rnexecutorch
+} // namespace rnexecutorch::models
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
index c362d745f..82d544db3 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
@@ -18,17 +18,13 @@ namespace models {
  * process camera frames in real-time (e.g., at 30fps).
  *
  * Thread Safety:
- * - All inference operations are protected by a mutex
- * - generateFromFrame() uses try_lock() to skip frames when the model is busy
- * - This prevents blocking the camera thread and maintains smooth frame rates
+ * - All inference operations are protected by a mutex via scoped_lock
  *
  * Usage:
  * Subclasses should:
  * 1. Inherit from VisionModel instead of BaseModel
  * 2. Implement preprocessFrame() with model-specific preprocessing
- * 3. Use inference_mutex_ when calling forward() in custom generate methods
- * 4. Use lock_guard for blocking operations (JS API)
- * 5. Use try_lock() for non-blocking operations (camera API)
+ * 3. Delegate to runInference() which handles locking internally
  *
  * Example:
  * @code
@@ -36,18 +32,9 @@ namespace models {
  * public:
  *   std::unordered_map<std::string_view, float>
  *   generateFromFrame(jsi::Runtime& runtime, const jsi::Value& frameValue) {
- *     // try_lock is handled automatically
  *     auto frameObject = frameValue.asObject(runtime);
- *     cv::Mat frame = FrameExtractor::extractFrame(runtime, frameObject);
- *
- *     // Lock before inference
- *     if (!inference_mutex_.try_lock()) {
- *       return {}; // Skip frame if busy
- *     }
- *     std::lock_guard<std::mutex> lock(inference_mutex_, std::adopt_lock);
- *
- *     auto preprocessed = preprocessFrame(frame);
- *     // ... run inference
+ *     cv::Mat frame = utils::extractFrame(runtime, frameObject);
+ *     return runInference(frame);
  *   }
  * };
  * @endcode
@@ -64,9 +51,6 @@ class VisionModel : public BaseModel {
               std::shared_ptr<react::CallInvoker> callInvoker)
       : BaseModel(modelSource, callInvoker) {}
 
-  /**
-   * @brief Virtual destructor for proper cleanup in derived classes
-   */
   virtual ~VisionModel() = default;
 
 protected:
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
index f17a4f074..1ae2460c3 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
@@ -33,26 +33,19 @@ ObjectDetection::ObjectDetection(
 }
 
 cv::Mat ObjectDetection::preprocessFrame(const cv::Mat &frame) const {
-  // Get target size from model input shape
   const std::vector<int32_t> tensorDims = getAllInputShapes()[0];
   cv::Size tensorSize = cv::Size(tensorDims[tensorDims.size() - 1],
                                  tensorDims[tensorDims.size() - 2]);
 
   cv::Mat rgb;
 
-  // Convert RGBA/BGRA to RGB if needed (for VisionCamera frames)
   if (frame.channels() == 4) {
-// Platform-specific color conversion:
-// iOS uses BGRA format, Android uses RGBA format
 #ifdef __APPLE__
-    // iOS: BGRA → RGB
     cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB);
 #else
-    // Android: RGBA → RGB
     cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB);
 #endif
   } else if (frame.channels() == 3) {
-    // Already RGB
     rgb = frame;
   } else {
     char errorMessage[100];
@@ -113,15 +106,11 @@ ObjectDetection::postprocess(const std::vector<EValue> &tensors,
 
 std::vector<types::Detection>
 ObjectDetection::runInference(cv::Mat image, double detectionThreshold) {
-  std::lock_guard<std::mutex> lock(inference_mutex_);
+  std::scoped_lock lock(inference_mutex_);
 
-  // Store original size for postprocessing
   cv::Size originalSize = image.size();
-
-  // Preprocess the image using model-specific preprocessing
   cv::Mat preprocessed = preprocessFrame(image);
 
-  // Create tensor and run inference
   const std::vector<int32_t> tensorDims = getAllInputShapes()[0];
   auto inputTensor =
       image_processing::getTensorFromMatrix(tensorDims, preprocessed);
@@ -139,14 +128,11 @@ ObjectDetection::runInference(cv::Mat image, double detectionThreshold) {
 std::vector<types::Detection>
 ObjectDetection::generateFromString(std::string imageSource,
                                     double detectionThreshold) {
-  // Read image using OpenCV (BGR format)
-  cv::Mat image = image_processing::readImage(imageSource);
+  cv::Mat imageBGR = image_processing::readImage(imageSource);
 
-  // Convert BGR to RGB (OpenCV imread returns BGR)
   cv::Mat imageRGB;
-  cv::cvtColor(image, imageRGB, cv::COLOR_BGR2RGB);
+  cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB);
 
-  // Use the internal helper - it handles locking, preprocessing, and inference
   return runInference(imageRGB, detectionThreshold);
 }
 
@@ -154,22 +140,9 @@ std::vector<types::Detection>
 ObjectDetection::generateFromFrame(jsi::Runtime &runtime,
                                    const jsi::Value &frameData,
                                    double detectionThreshold) {
-  // Try-lock: skip frame if model is busy (non-blocking for camera)
-  if (!inference_mutex_.try_lock()) {
-    return {}; // Return empty vector, don't block camera thread
-  }
+  auto frameObj = frameData.asObject(runtime);
+  cv::Mat frame = rnexecutorch::utils::extractFrame(runtime, frameObj);
 
-  // Extract frame (under lock to ensure thread safety)
-  cv::Mat frame;
-  {
-    std::lock_guard<std::mutex> lock(inference_mutex_, std::adopt_lock);
-    auto frameObj = frameData.asObject(runtime);
-    frame =
-        rnexecutorch::utils::FrameProcessor::extractFrame(runtime, frameObj);
-  }
-  // Lock is automatically released here when going out of scope
-
-  // Use the internal helper - it handles locking, preprocessing, and inference
   return runInference(frame, detectionThreshold);
 }
 
@@ -177,14 +150,10 @@ std::vector<types::Detection>
 ObjectDetection::generateFromPixels(jsi::Runtime &runtime,
                                     const jsi::Value &pixelData,
                                     double detectionThreshold) {
-  // Convert JSI value to JSTensorViewIn
   auto tensorView =
       jsi_conversion::getValue<JSTensorViewIn>(pixelData, runtime);
-
-  // Extract raw pixel data to cv::Mat
   cv::Mat image = extractFromPixels(tensorView);
 
-  // Use the internal helper - it handles locking, preprocessing, and inference
   return runInference(image, detectionThreshold);
 }
 } // namespace rnexecutorch::models::object_detection
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h
index fc554003b..bf231ff0b 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h
@@ -30,7 +30,6 @@ class ObjectDetection : public VisionModel {
                      double detectionThreshold);
 
 protected:
-  // Internal helper for shared preprocessing and inference logic
   std::vector<types::Detection> runInference(cv::Mat image,
                                              double detectionThreshold);
   cv::Mat preprocessFrame(const cv::Mat &frame) const override;
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
index 900eae297..3b31bc10f 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
@@ -7,52 +7,35 @@
 #import <CoreVideo/CoreVideo.h>
 #endif
 
-#ifdef __ANDROID__
-#if __ANDROID_API__ >= 26
+#if defined(__ANDROID__) && __ANDROID_API__ >= 26
 #include <android/hardware_buffer.h>
 #endif
-#endif
 
-namespace rnexecutorch {
-namespace utils {
+namespace rnexecutorch::utils {
 
-cv::Mat FrameExtractor::extractFromNativeBuffer(uint64_t bufferPtr) {
-#ifdef __APPLE__
-  return extractFromCVPixelBuffer(reinterpret_cast<void *>(bufferPtr));
-#elif defined(__ANDROID__)
-  return extractFromAHardwareBuffer(reinterpret_cast<void *>(bufferPtr));
-#else
-  throw RnExecutorchError(RnExecutorchErrorCode::NotSupported,
-                          "NativeBuffer not supported on this platform");
-#endif
-}
+namespace {
 
 #ifdef __APPLE__
-cv::Mat FrameExtractor::extractFromCVPixelBuffer(void *pixelBuffer) {
+cv::Mat extractFromCVPixelBuffer(void *pixelBuffer) {
   CVPixelBufferRef buffer = static_cast<CVPixelBufferRef>(pixelBuffer);
 
-  // Get buffer properties
   size_t width = CVPixelBufferGetWidth(buffer);
   size_t height = CVPixelBufferGetHeight(buffer);
   size_t bytesPerRow = CVPixelBufferGetBytesPerRow(buffer);
   OSType pixelFormat = CVPixelBufferGetPixelFormatType(buffer);
 
-  // Lock the buffer (Vision Camera should have already locked it, but ensure)
   CVPixelBufferLockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly);
   void *baseAddress = CVPixelBufferGetBaseAddress(buffer);
 
   cv::Mat mat;
 
   if (pixelFormat == kCVPixelFormatType_32BGRA) {
-    // BGRA format (most common on iOS when using pixelFormat: 'rgb')
     mat = cv::Mat(static_cast<int>(height), static_cast<int>(width), CV_8UC4,
                   baseAddress, bytesPerRow);
   } else if (pixelFormat == kCVPixelFormatType_32RGBA) {
-    // RGBA format
     mat = cv::Mat(static_cast<int>(height), static_cast<int>(width), CV_8UC4,
                   baseAddress, bytesPerRow);
   } else if (pixelFormat == kCVPixelFormatType_24RGB) {
-    // RGB format
     mat = cv::Mat(static_cast<int>(height), static_cast<int>(width), CV_8UC3,
                   baseAddress, bytesPerRow);
   } else {
@@ -72,15 +55,13 @@ cv::Mat FrameExtractor::extractFromCVPixelBuffer(void *pixelBuffer) {
 #endif
 
 #ifdef __ANDROID__
-cv::Mat FrameExtractor::extractFromAHardwareBuffer(void *hardwareBuffer) {
+cv::Mat extractFromAHardwareBuffer(void *hardwareBuffer) {
 #if __ANDROID_API__ >= 26
   AHardwareBuffer *buffer = static_cast<AHardwareBuffer *>(hardwareBuffer);
 
-  // Get buffer description
   AHardwareBuffer_Desc desc;
   AHardwareBuffer_describe(buffer, &desc);
 
-  // Lock the buffer for CPU read access
   void *data = nullptr;
   int lockResult = AHardwareBuffer_lock(
       buffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, nullptr, &data);
@@ -93,13 +74,10 @@ cv::Mat FrameExtractor::extractFromAHardwareBuffer(void *hardwareBuffer) {
   cv::Mat mat;
 
   if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM) {
-    // RGBA format (expected when using pixelFormat: 'rgb' on Android)
     mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4);
   } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM) {
-    // RGBX format (treated as RGBA)
     mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4);
   } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM) {
-    // RGB format (less common)
     mat = cv::Mat(desc.height, desc.width, CV_8UC3, data, desc.stride * 3);
   } else {
     AHardwareBuffer_unlock(buffer, nullptr);
@@ -118,7 +96,19 @@ cv::Mat FrameExtractor::extractFromAHardwareBuffer(void *hardwareBuffer) {
                           "AHardwareBuffer requires Android API 26+");
 #endif // __ANDROID_API__ >= 26
 }
-#endif // __ANDROID__
+#endif
+
+} // namespace
+
+cv::Mat extractFromNativeBuffer(uint64_t bufferPtr) {
+#ifdef __APPLE__
+  return extractFromCVPixelBuffer(reinterpret_cast<void *>(bufferPtr));
+#elif defined(__ANDROID__)
+  return extractFromAHardwareBuffer(reinterpret_cast<void *>(bufferPtr));
+#else
+  throw RnExecutorchError(RnExecutorchErrorCode::NotSupported,
+                          "NativeBuffer not supported on this platform");
+#endif
+}
 
-} // namespace utils
-} // namespace rnexecutorch
+} // namespace rnexecutorch::utils
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h
index a90e6ad23..f5d7c2094 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h
@@ -3,58 +3,23 @@
 #include <cstdint>
 #include <opencv2/opencv.hpp>
 
-namespace rnexecutorch {
-namespace utils {
+namespace rnexecutorch::utils {
 
 /**
- * @brief Utility class for extracting cv::Mat from native platform buffers
+ * @brief Extract cv::Mat from a native platform buffer pointer (zero-copy)
  *
- * Provides zero-copy extraction of frames from:
+ * Dispatches to the platform-specific implementation:
  * - iOS: CVPixelBufferRef
  * - Android: AHardwareBuffer
+ *
+ * @param bufferPtr Platform-specific buffer pointer (uint64_t)
+ * @return cv::Mat wrapping the buffer data (zero-copy)
+ *
+ * @throws RnExecutorchError if the platform is unsupported or extraction fails
+ *
+ * @note The returned cv::Mat does not own the data.
+ *       Caller must ensure the buffer remains valid during use.
  */
-class FrameExtractor {
-public:
-  /**
-   * @brief Extract cv::Mat from a native buffer pointer
-   *
-   * @param bufferPtr Platform-specific buffer pointer (uint64_t)
-   *                  - iOS: CVPixelBufferRef
-   *                  - Android: AHardwareBuffer*
-   * @return cv::Mat wrapping the buffer data (zero-copy)
-   *
-   * @note The returned cv::Mat does not own the data.
-   *       The caller must ensure the buffer remains valid.
-   * @note The buffer must be locked before calling and unlocked after use.
-   */
-  static cv::Mat extractFromNativeBuffer(uint64_t bufferPtr);
-
-#ifdef __APPLE__
-  /**
-   * @brief Extract cv::Mat from CVPixelBuffer (iOS)
-   *
-   * @param pixelBuffer CVPixelBufferRef pointer
-   * @return cv::Mat wrapping the pixel buffer data
-   *
-   * @note Assumes buffer is already locked by Vision Camera
-   * @note Supports kCVPixelFormatType_32BGRA and kCVPixelFormatType_24RGB
-   */
-  static cv::Mat extractFromCVPixelBuffer(void *pixelBuffer);
-#endif
-
-#ifdef __ANDROID__
-  /**
-   * @brief Extract cv::Mat from AHardwareBuffer (Android)
-   *
-   * @param hardwareBuffer AHardwareBuffer* pointer
-   * @return cv::Mat wrapping the hardware buffer data
-   *
-   * @note Assumes buffer is already locked by Vision Camera
-   * @note Supports AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM and R8G8B8_UNORM
-   */
-  static cv::Mat extractFromAHardwareBuffer(void *hardwareBuffer);
-#endif
-};
+cv::Mat extractFromNativeBuffer(uint64_t bufferPtr);
 
-} // namespace utils
-} // namespace rnexecutorch
+} // namespace rnexecutorch::utils
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
index 5e593dfd0..30238ad5c 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
@@ -2,105 +2,27 @@
 #include "FrameExtractor.h"
 #include <rnexecutorch/Error.h>
 #include <rnexecutorch/ErrorCodes.h>
-#include <rnexecutorch/Log.h>
 
-namespace rnexecutorch {
-namespace utils {
+namespace rnexecutorch::utils {
 
-cv::Mat FrameProcessor::extractFrame(jsi::Runtime &runtime,
-                                     const jsi::Object &frameData) {
-  // Try zero-copy path first (nativeBuffer)
-  // Native buffer contains dimensions, so we don't need width/height properties
-  if (hasNativeBuffer(runtime, frameData)) {
-    try {
-      return extractFromNativeBuffer(runtime, frameData);
-    } catch (const std::exception &e) {
-      // Fallback to ArrayBuffer on failure
-    }
-  }
-
-  // Fallback to ArrayBuffer path (with copy)
-  // Get frame dimensions for ArrayBuffer path
-  if (frameData.hasProperty(runtime, "data")) {
-    int width =
-        static_cast<int>(frameData.getProperty(runtime, "width").asNumber());
-    int height =
-        static_cast<int>(frameData.getProperty(runtime, "height").asNumber());
-    return extractFromArrayBuffer(runtime, frameData, width, height);
-  }
+namespace {
 
-  // No valid frame data source
-  throw RnExecutorchError(
-      RnExecutorchErrorCode::InvalidUserInput,
-      "FrameProcessor: No valid frame data (neither nativeBuffer nor data "
-      "property found)");
+bool hasNativeBuffer(jsi::Runtime &runtime, const jsi::Object &frameData) {
+  return frameData.hasProperty(runtime, "nativeBuffer");
 }
 
-cv::Size FrameProcessor::getFrameSize(jsi::Runtime &runtime,
-                                      const jsi::Object &frameData) {
-  if (!frameData.hasProperty(runtime, "width") ||
-      !frameData.hasProperty(runtime, "height")) {
-    throw RnExecutorchError(
-        RnExecutorchErrorCode::InvalidUserInput,
-        "FrameProcessor: Frame data missing width or height property");
-  }
-
-  int width =
-      static_cast<int>(frameData.getProperty(runtime, "width").asNumber());
-  int height =
-      static_cast<int>(frameData.getProperty(runtime, "height").asNumber());
-
-  return cv::Size(width, height);
-}
+} // namespace
 
-bool FrameProcessor::hasNativeBuffer(jsi::Runtime &runtime,
-                                     const jsi::Object &frameData) {
-  return frameData.hasProperty(runtime, "nativeBuffer");
-}
+cv::Mat extractFrame(jsi::Runtime &runtime, const jsi::Object &frameData) {
+  if (!hasNativeBuffer(runtime, frameData)) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            "FrameProcessor: No nativeBuffer found in frame");
+  }
 
-cv::Mat FrameProcessor::extractFromNativeBuffer(jsi::Runtime &runtime,
-                                                const jsi::Object &frameData) {
   auto nativeBufferValue = frameData.getProperty(runtime, "nativeBuffer");
-
-  // Handle bigint pointer value from JavaScript
   uint64_t bufferPtr = static_cast<uint64_t>(
       nativeBufferValue.asBigInt(runtime).asUint64(runtime));
 
-  // Use FrameExtractor to get cv::Mat from platform-specific buffer
-  // Native buffer contains all metadata (width, height, format)
-  return FrameExtractor::extractFromNativeBuffer(bufferPtr);
-}
-
-cv::Mat FrameProcessor::extractFromArrayBuffer(jsi::Runtime &runtime,
-                                               const jsi::Object &frameData,
-                                               int width, int height) {
-  auto pixelData = frameData.getProperty(runtime, "data");
-  auto arrayBuffer = pixelData.asObject(runtime).getArrayBuffer(runtime);
-  uint8_t *data = arrayBuffer.data(runtime);
-  size_t bufferSize = arrayBuffer.size(runtime);
-
-  // Determine format based on buffer size
-  size_t stride = bufferSize / height;
-  size_t expectedRGBAStride = width * 4;
-  size_t expectedRGBStride = width * 3;
-
-  if (stride == expectedRGBAStride || bufferSize >= width * height * 4) {
-    // RGBA format with potential padding
-    return cv::Mat(height, width, CV_8UC4, data, stride);
-  } else if (stride >= expectedRGBStride) {
-    // RGB format
-    return cv::Mat(height, width, CV_8UC3, data, stride);
-  } else {
-    char errorMessage[200];
-    std::snprintf(
-        errorMessage, sizeof(errorMessage),
-        "FrameProcessor: Unexpected buffer size - expected %zu or %zu bytes "
-        "per row, got %zu",
-        expectedRGBStride, expectedRGBAStride, stride);
-    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
-                            errorMessage);
-  }
+  return extractFromNativeBuffer(bufferPtr);
 }
-
-} // namespace utils
-} // namespace rnexecutorch
+} // namespace rnexecutorch::utils
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h
index 0838b6594..403f4bde9 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h
@@ -3,107 +3,25 @@
 #include <jsi/jsi.h>
 #include <opencv2/opencv.hpp>
 
-namespace rnexecutorch {
-namespace utils {
+namespace rnexecutorch::utils {
 
 using namespace facebook;
 
 /**
- * @brief Utility class for processing camera frames from VisionCamera
+ * @brief Extract cv::Mat from VisionCamera frame data via nativeBuffer
  *
- * Provides high-level helpers for extracting and working with frames from
- * react-native-vision-camera in a consistent way across all vision models.
+ * @param runtime JSI runtime
+ * @param frameData JSI object containing frame data from VisionCamera
+ *                  Expected properties:
+ *                  - nativeBuffer: BigInt pointer to native buffer
  *
- * This class abstracts away the complexity of:
- * - Handling both nativeBuffer (zero-copy) and ArrayBuffer (with copy) paths
- * - Platform-specific buffer formats (CVPixelBuffer on iOS, AHardwareBuffer
- * on Android)
- * - JSI object property access and type conversions
+ * @return cv::Mat wrapping the frame data (zero-copy)
  *
- * Usage:
- * @code
- * auto frameObj = frameData.asObject(runtime);
- * cv::Mat frame = FrameProcessor::extractFrame(runtime, frameObj);
- * cv::Size size = FrameProcessor::getFrameSize(runtime, frameObj);
- * @endcode
+ * @throws RnExecutorchError if nativeBuffer is not present or extraction fails
+ *
+ * @note The returned cv::Mat does not own the data.
+ *       Caller must ensure the source frame remains valid during use.
  */
-class FrameProcessor {
-public:
-  /**
-   * @brief Extract cv::Mat from VisionCamera frame data
-   *
-   * Handles both zero-copy (nativeBuffer) and copy-based (ArrayBuffer) paths
-   * automatically. Prefers nativeBuffer when available for best performance.
-   *
-   * @param runtime JSI runtime
-   * @param frameData JSI object containing frame data from VisionCamera
-   *                  Expected properties:
-   *                  - nativeBuffer (optional): BigInt pointer to native buffer
-   *                  - data (optional): ArrayBuffer with pixel data
-   *                  - width: number
-   *                  - height: number
-   *
-   * @return cv::Mat wrapping or containing the frame data
-   *
-   * @throws RnExecutorchError if neither nativeBuffer nor data is available
-   * @throws RnExecutorchError if nativeBuffer extraction fails
-   *
-   * @note The returned cv::Mat may not own the data (zero-copy path).
-   *       Caller must ensure the source frame remains valid during use.
-   */
-  static cv::Mat extractFrame(jsi::Runtime &runtime,
-                              const jsi::Object &frameData);
-
-  /**
-   * @brief Get frame dimensions from VisionCamera frame data
-   *
-   * @param runtime JSI runtime
-   * @param frameData JSI object containing frame data
-   *
-   * @return cv::Size with frame width and height
-   *
-   * @throws RnExecutorchError if width or height properties are missing
-   */
-  static cv::Size getFrameSize(jsi::Runtime &runtime,
-                               const jsi::Object &frameData);
-
-  /**
-   * @brief Check if frame data has nativeBuffer (zero-copy path available)
-   *
-   * @param runtime JSI runtime
-   * @param frameData JSI object containing frame data
-   * @return true if nativeBuffer is available, false otherwise
-   */
-  static bool hasNativeBuffer(jsi::Runtime &runtime,
-                              const jsi::Object &frameData);
-
-private:
-  /**
-   * @brief Extract frame from nativeBuffer pointer (zero-copy)
-   *
-   * Native buffer contains all metadata (width, height, format), so no need to
-   * pass dimensions separately.
-   *
-   * @param runtime JSI runtime
-   * @param frameData JSI object with nativeBuffer property
-   * @return cv::Mat wrapping the native buffer data
-   */
-  static cv::Mat extractFromNativeBuffer(jsi::Runtime &runtime,
-                                         const jsi::Object &frameData);
-
-  /**
-   * @brief Extract frame from ArrayBuffer (with copy)
-   *
-   * @param runtime JSI runtime
-   * @param frameData JSI object with data property
-   * @param width Frame width
-   * @param height Frame height
-   * @return cv::Mat containing or wrapping the array buffer data
-   */
-  static cv::Mat extractFromArrayBuffer(jsi::Runtime &runtime,
-                                        const jsi::Object &frameData, int width,
-                                        int height);
-};
+cv::Mat extractFrame(jsi::Runtime &runtime, const jsi::Object &frameData);
 
-} // namespace utils
-} // namespace rnexecutorch
+} // namespace rnexecutorch::utils
diff --git a/packages/react-native-executorch/src/hooks/useModule.ts b/packages/react-native-executorch/src/hooks/useModule.ts
index f5f260787..82a0bb72f 100644
--- a/packages/react-native-executorch/src/hooks/useModule.ts
+++ b/packages/react-native-executorch/src/hooks/useModule.ts
@@ -48,7 +48,6 @@ export const useModule = <
         });
         if (isMounted) setIsReady(true);
 
-        // Extract runOnFrame worklet from VisionModule if available
         // Use "state trick" to make the worklet serializable for VisionCamera
         if ('runOnFrame' in moduleInstance) {
           const worklet = moduleInstance.runOnFrame;
diff --git a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
index d6a0038ee..eabe50ab0 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
@@ -15,6 +15,20 @@ import { Frame, PixelData, ScalarType } from '../../types/common';
  *
  * @category Typescript API
  */
+function isPixelData(input: unknown): input is PixelData {
+  return (
+    typeof input === 'object' &&
+    input !== null &&
+    'dataPtr' in input &&
+    input.dataPtr instanceof Uint8Array &&
+    'sizes' in input &&
+    Array.isArray(input.sizes) &&
+    input.sizes.length === 3 &&
+    'scalarType' in input &&
+    input.scalarType === ScalarType.BYTE
+  );
+}
+
 export abstract class VisionModule<TOutput> extends BaseModule {
   /**
    * Synchronous worklet function for real-time VisionCamera frame processing.
@@ -111,19 +125,8 @@ export abstract class VisionModule<TOutput> extends BaseModule {
 
     // Type detection and routing
     if (typeof input === 'string') {
-      // String path → generateFromString()
       return await this.nativeModule.generateFromString(input, ...args);
-    } else if (
-      typeof input === 'object' &&
-      'dataPtr' in input &&
-      input.dataPtr instanceof Uint8Array &&
-      'sizes' in input &&
-      Array.isArray(input.sizes) &&
-      input.sizes.length === 3 &&
-      'scalarType' in input &&
-      input.scalarType === ScalarType.BYTE
-    ) {
-      // Pixel data → generateFromPixels()
+    } else if (isPixelData(input)) {
       return await this.nativeModule.generateFromPixels(input, ...args);
     } else {
       throw new RnExecutorchError(

From 7753bd130880b78530821c58959e100c5bb90ea3 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 23 Feb 2026 13:00:39 +0100
Subject: [PATCH 14/37] fix: not existing error type, add comments to JSI code

---
 .../host_objects/ModelHostObject.h            | 60 +++++++++--------
 .../metaprogramming/FunctionHelpers.h         | 66 ++++++++-----------
 .../rnexecutorch/utils/FrameExtractor.cpp     |  7 +-
 3 files changed, 60 insertions(+), 73 deletions(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
index 3190bc6f4..abf920223 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -46,13 +46,6 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
           "getInputShape"));
     }
 
-    if constexpr (meta::HasGenerateFromString<Model>) {
-      addFunctions(
-          JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
-                              promiseHostFunction<&Model::generateFromString>,
-                              "generateFromString"));
-    }
-
     if constexpr (meta::HasEncode<Model>) {
       addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
                                        promiseHostFunction<&Model::encode>,
@@ -172,6 +165,13 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
                                        "stream"));
     }
 
+    if constexpr (meta::HasGenerateFromString<Model>) {
+      addFunctions(
+          JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                              promiseHostFunction<&Model::generateFromString>,
+                              "generateFromString"));
+    }
+
     if constexpr (meta::HasGenerateFromFrame<Model>) {
       addFunctions(JSI_EXPORT_FUNCTION(
           ModelHostObject<Model>, visionHostFunction<&Model::generateFromFrame>,
@@ -181,7 +181,7 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
     if constexpr (meta::HasGenerateFromPixels<Model>) {
       addFunctions(
           JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
-                              visionHostFunction<&Model::generateFromPixels>,
+                              promiseHostFunction<&Model::generateFromPixels>,
                               "generateFromPixels"));
     }
   }
@@ -233,36 +233,40 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
     }
   }
 
+  /**
+   * Unlike promiseHostFunction, this runs synchronously on the JS thread,
+   * which is required for VisionCamera worklet frame processors.
+   *
+   * The key challenge is argument mapping: the C++ function takes
+   * (Runtime, frameData, Rest...) but from the JS side, Runtime is injected
+   * automatically and frameData is JS args[0]. The remaining args (Rest...)
+   * map to JS args[1..N].
+   *
+   * This is achieved via TailSignature: it extracts the Rest... parameter pack
+   * from the function pointer type, creates a dummy free function with only
+   * those types, then uses createArgsTupleFromJsi on that dummy to convert
+   * args[1..N] — bypassing the manually-handled frameData at args[0].
+   *
+   * Argument mapping:
+   *   C++ params:  (Runtime&,  frameData,  Rest[0],   Rest[1], ...)
+   *   JS args:     (           args[0],    args[1],   args[2], ...)
+   *   JS arg count = C++ arity - 1  (Runtime is injected, not counted)
+   *
+   */
   template <auto FnPtr> JSI_HOST_FUNCTION(visionHostFunction) {
-    // 1. Check Argument Count
-    // (We rely on our new FunctionTraits)
     constexpr std::size_t cppArgCount =
         meta::FunctionTraits<decltype(FnPtr)>::arity;
-
-    // We expect JS args = (Total C++ Args) - (2 injected args: Runtime + Value)
     constexpr std::size_t expectedJsArgs = cppArgCount - 1;
-    log(LOG_LEVEL::Debug, cppArgCount, count);
+
     if (count != expectedJsArgs) {
       throw jsi::JSError(runtime, "Argument count mismatch in vision function");
     }
 
     try {
-      // 2. The Magic Trick
-      // We get a pointer to a dummy function: void dummy(Rest...) {}
-      // This function has exactly the signature of the arguments we want to
-      // parse.
       auto dummyFuncPtr = &meta::TailSignature<decltype(FnPtr)>::dummy;
-
-      // 3. Let existing helpers do the work
-      // We pass the dummy pointer. The helper inspects its arguments (Rest...)
-      // and converts args[0]...args[N] accordingly.
-      // Note: We pass (args + 1) because JS args[0] is the PixelData, which we
-      // handle manually. Note: We use expectedJsArgs - 1 because we skipped one
-      // JS arg.
       auto tailArgsTuple =
           meta::createArgsTupleFromJsi(dummyFuncPtr, args + 1, runtime);
 
-      // 4. Invoke
       using ReturnType =
           typename meta::FunctionTraits<decltype(FnPtr)>::return_type;
 
@@ -270,10 +274,8 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
         std::apply(
             [&](auto &&...tailArgs) {
               (model.get()->*FnPtr)(
-                  runtime,
-                  args[0], // 1. PixelData (Manually passed)
-                  std::forward<decltype(tailArgs)>(
-                      tailArgs)...); // 2. The rest (Auto parsed)
+                  runtime, args[0],
+                  std::forward<decltype(tailArgs)>(tailArgs)...);
             },
             std::move(tailArgsTuple));
         return jsi::Value::undefined();
diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h
index a48aa0119..88bf04bf0 100644
--- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h
+++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h
@@ -10,32 +10,6 @@
 namespace rnexecutorch::meta {
 using namespace facebook;
 
-// =========================================================================
-// 1. Function Traits (Extracts Arity, Return Type, Args)
-// =========================================================================
-
-template <typename T> struct FunctionTraits;
-
-// Specialization for Member Functions
-template <typename R, typename C, typename... Args>
-struct FunctionTraits<R (C::*)(Args...)> {
-  static constexpr std::size_t arity = sizeof...(Args);
-  using return_type = R;
-  using args_tuple = std::tuple<Args...>;
-};
-
-// Specialization for const Member Functions
-template <typename R, typename C, typename... Args>
-struct FunctionTraits<R (C::*)(Args...) const> {
-  static constexpr std::size_t arity = sizeof...(Args);
-  using return_type = R;
-  using args_tuple = std::tuple<Args...>;
-};
-
-// =========================================================================
-// 2. Argument Counting Helpers
-// =========================================================================
-
 template <typename Model, typename R, typename... Types>
 constexpr std::size_t getArgumentCount(R (Model::*f)(Types...)) {
   return sizeof...(Types);
@@ -46,10 +20,6 @@ constexpr std::size_t getArgumentCount(R (Model::*f)(Types...) const) {
   return sizeof...(Types);
 }
 
-// =========================================================================
-// 3. JSI -> Tuple Conversion Logic
-// =========================================================================
-
 template <typename... Types, std::size_t... I>
 std::tuple<Types...> fillTupleFromArgs(std::index_sequence<I...>,
                                        const jsi::Value *args,
@@ -62,6 +32,7 @@ std::tuple<Types...> fillTupleFromArgs(std::index_sequence<I...>,
  * arguments for method supplied with a pointer. The types in the tuple are
  * inferred from the method pointer.
  */
+
 template <typename Model, typename R, typename... Types>
 std::tuple<Types...> createArgsTupleFromJsi(R (Model::*f)(Types...),
                                             const jsi::Value *args,
@@ -78,7 +49,9 @@ std::tuple<Types...> createArgsTupleFromJsi(R (Model::*f)(Types...) const,
                                      runtime);
 }
 
-// Overload for free functions (used by TailSignature dummy)
+// Free function overload used by visionHostFunction: accepts a dummy free
+// function pointer whose parameter types (Rest...) are extracted by
+// TailSignature and converted from JSI args.
 template <typename... Types>
 std::tuple<Types...> createArgsTupleFromJsi(void (*f)(Types...),
                                             const jsi::Value *args,
@@ -87,27 +60,40 @@ std::tuple<Types...> createArgsTupleFromJsi(void (*f)(Types...),
                                      runtime);
 }
 
-// =========================================================================
-// 4. Tail Signature Helper (Crucial for Vision Functions)
-// =========================================================================
+// Extracts arity, return type, and argument types from a member function
+// pointer at compile time. Used by visionHostFunction to determine the expected
+// JS argument count and invoke the correct return path.
+template <typename T> struct FunctionTraits;
+
+template <typename R, typename C, typename... Args>
+struct FunctionTraits<R (C::*)(Args...)> {
+  static constexpr std::size_t arity = sizeof...(Args);
+  using return_type = R;
+  using args_tuple = std::tuple<Args...>;
+};
+
+template <typename R, typename C, typename... Args>
+struct FunctionTraits<R (C::*)(Args...) const> {
+  static constexpr std::size_t arity = sizeof...(Args);
+  using return_type = R;
+  using args_tuple = std::tuple<Args...>;
+};
 
-// Extracts the "Tail" arguments of a function signature, skipping the first
-// two arguments (Runtime and FrameValue).
+// Strips the first two parameters (Runtime& and jsi::Value&) from a member
+// function pointer and exposes the remaining types as a dummy free function.
+// Used by visionHostFunction to parse only the tail JS args via
+// createArgsTupleFromJsi, while frameData at args[0] is passed manually.
 template <typename T> struct TailSignature;
 
-// Non-const member function specialization
 template <typename R, typename C, typename Arg1, typename Arg2,
           typename... Rest>
 struct TailSignature<R (C::*)(Arg1, Arg2, Rest...)> {
-  // A dummy function that has the signature of just the "Rest" arguments.
   static void dummy(Rest...) {}
 };
 
-// Const member function specialization
 template <typename R, typename C, typename Arg1, typename Arg2,
           typename... Rest>
 struct TailSignature<R (C::*)(Arg1, Arg2, Rest...) const> {
   static void dummy(Rest...) {}
 };
-
 } // namespace rnexecutorch::meta
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
index 3b31bc10f..51d206cdd 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
@@ -84,15 +84,14 @@ cv::Mat extractFromAHardwareBuffer(void *hardwareBuffer) {
     char errorMessage[100];
     std::snprintf(errorMessage, sizeof(errorMessage),
                   "Unsupported AHardwareBuffer format: %u", desc.format);
-    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
-                            errorMessage);
+    throw RnExecutorchError(RnExecutorchErrorCode::UnknownError, errorMessage);
   }
 
   // Note: We don't unlock here - Vision Camera manages the lifecycle
 
   return mat;
 #else
-  throw RnExecutorchError(RnExecutorchErrorCode::NotSupported,
+  throw RnExecutorchError(RnExecutorchErrorCode::UnknownError,
                           "AHardwareBuffer requires Android API 26+");
 #endif // __ANDROID_API__ >= 26
 }
@@ -106,7 +105,7 @@ cv::Mat extractFromNativeBuffer(uint64_t bufferPtr) {
 #elif defined(__ANDROID__)
   return extractFromAHardwareBuffer(reinterpret_cast<void *>(bufferPtr));
 #else
-  throw RnExecutorchError(RnExecutorchErrorCode::NotSupported,
+  throw RnExecutorchError(RnExecutorchErrorCode::UnknownError,
                           "NativeBuffer not supported on this platform");
 #endif
 }

From a9c01a9a7a05f41f38d98c7d9b59e847db5731e9 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 23 Feb 2026 13:06:50 +0100
Subject: [PATCH 15/37] feat: add new PlatformNotSupported error

---
 .../common/rnexecutorch/ErrorCodes.h                     | 5 +++++
 .../common/rnexecutorch/utils/FrameExtractor.cpp         | 9 +++++----
 .../react-native-executorch/src/errors/ErrorCodes.ts     | 4 ++++
 scripts/errors.config.ts                                 | 4 ++++
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h b/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h
index f4fd2e7f0..d49f3a175 100644
--- a/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h
+++ b/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h
@@ -75,6 +75,11 @@ enum class RnExecutorchErrorCode : int32_t {
    * interruptions.
    */
   DownloadInterrupted = 118,
+  /**
+   * Thrown when a feature or platform is not supported in the current
+   * environment.
+   */
+  PlatformNotSupported = 119,
   /**
    * Thrown when an error occurs with the tokenizer or tokenization process.
    */
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
index 51d206cdd..baae35dc3 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
@@ -43,7 +43,7 @@ cv::Mat extractFromCVPixelBuffer(void *pixelBuffer) {
     char errorMessage[100];
     std::snprintf(errorMessage, sizeof(errorMessage),
                   "Unsupported CVPixelBuffer format: %u", pixelFormat);
-    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+    throw RnExecutorchError(RnExecutorchErrorCode::PlatformNotSupported,
                             errorMessage);
   }
 
@@ -84,14 +84,15 @@ cv::Mat extractFromAHardwareBuffer(void *hardwareBuffer) {
     char errorMessage[100];
     std::snprintf(errorMessage, sizeof(errorMessage),
                   "Unsupported AHardwareBuffer format: %u", desc.format);
-    throw RnExecutorchError(RnExecutorchErrorCode::UnknownError, errorMessage);
+    throw RnExecutorchError(RnExecutorchErrorCode::PlatformNotSupported,
+                            errorMessage);
   }
 
   // Note: We don't unlock here - Vision Camera manages the lifecycle
 
   return mat;
 #else
-  throw RnExecutorchError(RnExecutorchErrorCode::UnknownError,
+  throw RnExecutorchError(RnExecutorchErrorCode::PlatformNotSupported,
                           "AHardwareBuffer requires Android API 26+");
 #endif // __ANDROID_API__ >= 26
 }
@@ -105,7 +106,7 @@ cv::Mat extractFromNativeBuffer(uint64_t bufferPtr) {
 #elif defined(__ANDROID__)
   return extractFromAHardwareBuffer(reinterpret_cast<void *>(bufferPtr));
 #else
-  throw RnExecutorchError(RnExecutorchErrorCode::UnknownError,
+  throw RnExecutorchError(RnExecutorchErrorCode::PlatformNotSupported,
                           "NativeBuffer not supported on this platform");
 #endif
 }
diff --git a/packages/react-native-executorch/src/errors/ErrorCodes.ts b/packages/react-native-executorch/src/errors/ErrorCodes.ts
index 3e4e557a1..4ccb1f07f 100644
--- a/packages/react-native-executorch/src/errors/ErrorCodes.ts
+++ b/packages/react-native-executorch/src/errors/ErrorCodes.ts
@@ -58,6 +58,10 @@ export enum RnExecutorchErrorCode {
    * Thrown when the number of downloaded files is unexpected, due to download interruptions.
    */
   DownloadInterrupted = 118,
+  /**
+   * Thrown when a feature or platform is not supported in the current environment.
+   */
+  PlatformNotSupported = 119,
   /**
    * Thrown when an error occurs with the tokenizer or tokenization process.
    */
diff --git a/scripts/errors.config.ts b/scripts/errors.config.ts
index 3e6cf1090..6953eec2e 100644
--- a/scripts/errors.config.ts
+++ b/scripts/errors.config.ts
@@ -59,6 +59,10 @@ export const errorDefinitions = {
    * Thrown when the number of downloaded files is unexpected, due to download interruptions.
    */
   DownloadInterrupted: 0x76,
+  /*
+   * Thrown when a feature or platform is not supported in the current environment.
+   */
+  PlatformNotSupported: 0x77,
 
   /*
    * Thrown when an error occurs with the tokenizer or tokenization process.

From 98395af46cf6634fee2ef6b1dc1dcb09da7cdb88 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 23 Feb 2026 13:38:56 +0100
Subject: [PATCH 16/37] fix: compilation JSI error

---
 .../common/rnexecutorch/metaprogramming/FunctionHelpers.h  | 3 ++-
 .../models/object_detection/ObjectDetection.cpp            | 7 ++-----
 .../rnexecutorch/models/object_detection/ObjectDetection.h | 3 +--
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h
index 88bf04bf0..fde81e046 100644
--- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h
+++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h
@@ -24,7 +24,8 @@ template <typename... Types, std::size_t... I>
 std::tuple<Types...> fillTupleFromArgs(std::index_sequence<I...>,
                                        const jsi::Value *args,
                                        jsi::Runtime &runtime) {
-  return std::make_tuple(jsi_conversion::getValue<Types>(args[I], runtime)...);
+  return std::tuple<Types...>{
+      jsi_conversion::getValue<Types>(args[I], runtime)...};
 }
 
 /**
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
index 1ae2460c3..26e85da9c 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
@@ -147,12 +147,9 @@ ObjectDetection::generateFromFrame(jsi::Runtime &runtime,
 }
 
 std::vector<types::Detection>
-ObjectDetection::generateFromPixels(jsi::Runtime &runtime,
-                                    const jsi::Value &pixelData,
+ObjectDetection::generateFromPixels(JSTensorViewIn pixelData,
                                     double detectionThreshold) {
-  auto tensorView =
-      jsi_conversion::getValue<JSTensorViewIn>(pixelData, runtime);
-  cv::Mat image = extractFromPixels(tensorView);
+  cv::Mat image = extractFromPixels(pixelData);
 
   return runInference(image, detectionThreshold);
 }
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h
index bf231ff0b..d32eea95e 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h
@@ -26,8 +26,7 @@ class ObjectDetection : public VisionModel {
   generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData,
                     double detectionThreshold);
   [[nodiscard("Registered non-void function")]] std::vector<types::Detection>
-  generateFromPixels(jsi::Runtime &runtime, const jsi::Value &pixelData,
-                     double detectionThreshold);
+  generateFromPixels(JSTensorViewIn pixelData, double detectionThreshold);
 
 protected:
   std::vector<types::Detection> runInference(cv::Mat image,

From ffcf72f6aa5e0fc46b4638c06b437983f26f8ac8 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 23 Feb 2026 18:50:24 +0100
Subject: [PATCH 17/37] feat: add tests for generateFromPixels method

---
 .../app/object_detection/index.tsx            | 133 ++----------------
 .../object_detection/ObjectDetection.cpp      |   4 +
 .../common/rnexecutorch/tests/CMakeLists.txt  |   5 +-
 .../tests/integration/ObjectDetectionTest.cpp |  69 +++++++++
 .../tests/integration/stubs/jsi_stubs.cpp     |   8 ++
 .../computer_vision/ObjectDetectionModule.ts  |   9 +-
 .../src/types/objectDetection.ts              |   4 +-
 7 files changed, 103 insertions(+), 129 deletions(-)

diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
index d843682eb..6a43dd920 100644
--- a/apps/computer-vision/app/object_detection/index.tsx
+++ b/apps/computer-vision/app/object_detection/index.tsx
@@ -1,18 +1,16 @@
 import Spinner from '../../components/Spinner';
+import { BottomBar } from '../../components/BottomBar';
 import { getImage } from '../../utils';
 import {
   Detection,
   useObjectDetection,
   SSDLITE_320_MOBILENET_V3_LARGE,
-  ScalarType,
-  PixelData,
 } from 'react-native-executorch';
-import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native';
+import { View, StyleSheet, Image } from 'react-native';
 import ImageWithBboxes from '../../components/ImageWithBboxes';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
-import ColorPalette from '../../colors';
 
 export default function ObjectDetectionScreen() {
   const [imageUri, setImageUri] = useState('');
@@ -44,59 +42,14 @@ export default function ObjectDetectionScreen() {
   const runForward = async () => {
     if (imageUri) {
       try {
-        console.log('Running forward with string URI...');
-        const output = await ssdLite.forward(imageUri, 0.5);
-        console.log('String URI result:', output.length, 'detections');
+        const output = await ssdLite.forward(imageUri);
         setResults(output);
       } catch (e) {
-        console.error('Error in runForward:', e);
+        console.error(e);
       }
     }
   };
 
-  const runForwardPixels = async () => {
-    try {
-      console.log('Testing with hardcoded pixel data...');
-
-      // Create a simple 320x320 test image (all zeros - black image)
-      // In a real scenario, you would load actual image pixel data here
-      const width = 320;
-      const height = 320;
-      const channels = 3; // RGB
-
-      // Create a black image (you can replace this with actual pixel data)
-      const rgbData = new Uint8Array(width * height * channels);
-
-      // Optionally, add some test pattern (e.g., white square in center)
-      for (let y = 100; y < 220; y++) {
-        for (let x = 100; x < 220; x++) {
-          const idx = (y * width + x) * 3;
-          rgbData[idx + 0] = 255; // R
-          rgbData[idx + 1] = 255; // G
-          rgbData[idx + 2] = 255; // B
-        }
-      }
-
-      const pixelData: PixelData = {
-        dataPtr: rgbData,
-        sizes: [height, width, channels],
-        scalarType: ScalarType.BYTE,
-      };
-
-      console.log('Running forward with hardcoded pixel data...', {
-        sizes: pixelData.sizes,
-        dataSize: pixelData.dataPtr.byteLength,
-      });
-
-      // Run inference using unified forward() API
-      const output = await ssdLite.forward(pixelData, 0.3);
-      console.log('Pixel data result:', output.length, 'detections');
-      setResults(output);
-    } catch (e) {
-      console.error('Error in runForwardPixels:', e);
-    }
-  };
-
   if (!ssdLite.isReady) {
     return (
       <Spinner
@@ -128,41 +81,10 @@ export default function ObjectDetectionScreen() {
           )}
         </View>
       </View>
-
-      {/* Custom bottom bar with two buttons */}
-      <View style={styles.bottomContainer}>
-        <View style={styles.bottomIconsContainer}>
-          <TouchableOpacity onPress={() => handleCameraPress(false)}>
-            <Text style={styles.iconText}>📷 Gallery</Text>
-          </TouchableOpacity>
-        </View>
-
-        <View style={styles.buttonsRow}>
-          <TouchableOpacity
-            style={[
-              styles.button,
-              styles.halfButton,
-              !imageUri && styles.buttonDisabled,
-            ]}
-            onPress={runForward}
-            disabled={!imageUri}
-          >
-            <Text style={styles.buttonText}>Run (String)</Text>
-          </TouchableOpacity>
-
-          <TouchableOpacity
-            style={[
-              styles.button,
-              styles.halfButton,
-              !imageUri && styles.buttonDisabled,
-            ]}
-            onPress={runForwardPixels}
-            disabled={!imageUri}
-          >
-            <Text style={styles.buttonText}>Run (Pixels)</Text>
-          </TouchableOpacity>
-        </View>
-      </View>
+      <BottomBar
+        handleCameraPress={handleCameraPress}
+        runForward={runForward}
+      />
     </ScreenWrapper>
   );
 }
@@ -207,43 +129,4 @@ const styles = StyleSheet.create({
     width: '100%',
     height: '100%',
   },
-  bottomContainer: {
-    width: '100%',
-    gap: 15,
-    alignItems: 'center',
-    padding: 16,
-    flex: 1,
-  },
-  bottomIconsContainer: {
-    flexDirection: 'row',
-    justifyContent: 'center',
-    width: '100%',
-  },
-  iconText: {
-    fontSize: 16,
-    color: ColorPalette.primary,
-  },
-  buttonsRow: {
-    flexDirection: 'row',
-    width: '100%',
-    gap: 10,
-  },
-  button: {
-    height: 50,
-    justifyContent: 'center',
-    alignItems: 'center',
-    backgroundColor: ColorPalette.primary,
-    color: '#fff',
-    borderRadius: 8,
-  },
-  halfButton: {
-    flex: 1,
-  },
-  buttonDisabled: {
-    opacity: 0.5,
-  },
-  buttonText: {
-    color: '#fff',
-    fontSize: 16,
-  },
 });
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
index 26e85da9c..2670cf9dd 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
@@ -106,6 +106,10 @@ ObjectDetection::postprocess(const std::vector<EValue> &tensors,
 
 std::vector<types::Detection>
 ObjectDetection::runInference(cv::Mat image, double detectionThreshold) {
+  if (detectionThreshold < 0.0 || detectionThreshold > 1.0) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            "detectionThreshold must be in range [0, 1]");
+  }
   std::scoped_lock lock(inference_mutex_);
 
   cv::Size originalSize = image.size();
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
index e2a8c16bf..c45ab9107 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
@@ -156,8 +156,11 @@ add_rn_test(ObjectDetectionTests integration/ObjectDetectionTest.cpp
     SOURCES
         ${RNEXECUTORCH_DIR}/models/object_detection/ObjectDetection.cpp
         ${RNEXECUTORCH_DIR}/models/object_detection/Utils.cpp
+        ${RNEXECUTORCH_DIR}/models/VisionModel.cpp
+        ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp
+        ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp
         ${IMAGE_UTILS_SOURCES}
-    LIBS opencv_deps
+    LIBS opencv_deps android
 )
 
 add_rn_test(ImageEmbeddingsTests integration/ImageEmbeddingsTest.cpp
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp
index 074ee0751..93cdbf07c 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp
@@ -1,6 +1,8 @@
 #include "BaseModelTests.h"
+#include <executorch/extension/tensor/tensor.h>
 #include <gtest/gtest.h>
 #include <rnexecutorch/Error.h>
+#include <rnexecutorch/host_objects/JSTensorViewIn.h>
 #include <rnexecutorch/models/object_detection/Constants.h>
 #include <rnexecutorch/models/object_detection/ObjectDetection.h>
 
@@ -115,6 +117,73 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidLabels) {
   }
 }
 
+// ============================================================================
+// generateFromPixels tests
+// ============================================================================
+TEST(ObjectDetectionPixelTests, ValidPixelDataReturnsResults) {
+  ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
+  constexpr int width = 4, height = 4, channels = 3;
+  std::vector<uint8_t> pixelData(width * height * channels, 128);
+  JSTensorViewIn tensorView{pixelData.data(),
+                            {height, width, channels},
+                            executorch::aten::ScalarType::Byte};
+  auto results = model.generateFromPixels(tensorView, 0.3);
+  EXPECT_GE(results.size(), 0u);
+}
+
+TEST(ObjectDetectionPixelTests, WrongSizesLengthThrows) {
+  ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
+  std::vector<uint8_t> pixelData(16, 0);
+  JSTensorViewIn tensorView{
+      pixelData.data(), {4, 4}, executorch::aten::ScalarType::Byte};
+  EXPECT_THROW((void)model.generateFromPixels(tensorView, 0.5),
+               RnExecutorchError);
+}
+
+TEST(ObjectDetectionPixelTests, WrongChannelCountThrows) {
+  ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
+  constexpr int width = 4, height = 4, channels = 4;
+  std::vector<uint8_t> pixelData(width * height * channels, 0);
+  JSTensorViewIn tensorView{pixelData.data(),
+                            {height, width, channels},
+                            executorch::aten::ScalarType::Byte};
+  EXPECT_THROW((void)model.generateFromPixels(tensorView, 0.5),
+               RnExecutorchError);
+}
+
+TEST(ObjectDetectionPixelTests, WrongScalarTypeThrows) {
+  ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
+  constexpr int width = 4, height = 4, channels = 3;
+  std::vector<uint8_t> pixelData(width * height * channels, 0);
+  JSTensorViewIn tensorView{pixelData.data(),
+                            {height, width, channels},
+                            executorch::aten::ScalarType::Float};
+  EXPECT_THROW((void)model.generateFromPixels(tensorView, 0.5),
+               RnExecutorchError);
+}
+
+TEST(ObjectDetectionPixelTests, NegativeThresholdThrows) {
+  ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
+  constexpr int width = 4, height = 4, channels = 3;
+  std::vector<uint8_t> pixelData(width * height * channels, 128);
+  JSTensorViewIn tensorView{pixelData.data(),
+                            {height, width, channels},
+                            executorch::aten::ScalarType::Byte};
+  EXPECT_THROW((void)model.generateFromPixels(tensorView, -0.1),
+               RnExecutorchError);
+}
+
+TEST(ObjectDetectionPixelTests, ThresholdAboveOneThrows) {
+  ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
+  constexpr int width = 4, height = 4, channels = 3;
+  std::vector<uint8_t> pixelData(width * height * channels, 128);
+  JSTensorViewIn tensorView{pixelData.data(),
+                            {height, width, channels},
+                            executorch::aten::ScalarType::Byte};
+  EXPECT_THROW((void)model.generateFromPixels(tensorView, 1.1),
+               RnExecutorchError);
+}
+
 TEST(ObjectDetectionInheritedTests, GetInputShapeWorks) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
   auto shape = model.getInputShape("forward", 0);
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/jsi_stubs.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/jsi_stubs.cpp
index 39b8ae09c..897a2778e 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/jsi_stubs.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/jsi_stubs.cpp
@@ -14,6 +14,14 @@ namespace facebook::jsi {
 MutableBuffer::~MutableBuffer() {}
 Value::~Value() {}
 Value::Value(Value &&other) noexcept {}
+
+// Needed to link ObjectDetectionTests: generateFromFrame and FrameProcessor
+// pull in these JSI symbols, but they are never called in tests.
+Object Value::asObject(Runtime &) const & { __builtin_unreachable(); }
+BigInt Value::asBigInt(Runtime &) const & { __builtin_unreachable(); }
+
+uint64_t BigInt::asUint64(Runtime &) const { return 0; }
+
 } // namespace facebook::jsi
 
 namespace facebook::react {
diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
index 0818d9682..e62c7221c 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
@@ -1,5 +1,5 @@
 import { ResourceFetcher } from '../../utils/ResourceFetcher';
-import { ResourceSource } from '../../types/common';
+import { ResourceSource, PixelData } from '../../types/common';
 import { Detection } from '../../types/objectDetection';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
 import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
@@ -41,4 +41,11 @@ export class ObjectDetectionModule extends VisionModule<Detection[]> {
       throw parseUnknownError(error);
     }
   }
+
+  async forward(
+    input: string | PixelData,
+    detectionThreshold: number = 0.5
+  ): Promise<Detection[]> {
+    return super.forward(input, detectionThreshold);
+  }
 }
diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts
index c2281598a..11953c954 100644
--- a/packages/react-native-executorch/src/types/objectDetection.ts
+++ b/packages/react-native-executorch/src/types/objectDetection.ts
@@ -179,7 +179,7 @@ export interface ObjectDetectionType {
    * **Note**: For VisionCamera frame processing, use `processFrame` instead.
    *
    * @param input - Image source (string or PixelData object)
-   * @param detectionThreshold - An optional number between 0 and 1 representing the minimum confidence score. Default is 0.7.
+   * @param detectionThreshold - An optional number between 0 and 1 representing the minimum confidence score. Default is 0.5.
    * @returns A Promise that resolves to an array of `Detection` objects.
    * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image.
    *
@@ -225,7 +225,7 @@ export interface ObjectDetectionType {
    * ```
    *
    * @param frame - VisionCamera Frame object
-   * @param detectionThreshold - The threshold for detection sensitivity. Default is 0.7.
+   * @param detectionThreshold - The threshold for detection sensitivity. Default is 0.5.
    * @returns Array of Detection objects representing detected items in the frame.
    */
   runOnFrame:

From 44676fc65a080c917117438530c5c081946bb1f7 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 23 Feb 2026 19:32:05 +0100
Subject: [PATCH 18/37] feat: add example screen with vision camera to computer
 vision app

---
 ...ative-vision-camera@npm-5.0.0-beta.1.patch | 713 ++++++++++++++++++
 apps/computer-vision/app.json                 |   3 +-
 apps/computer-vision/app/_layout.tsx          |   8 +
 apps/computer-vision/app/index.tsx            |   6 +
 .../app/object_detection_live/index.tsx       | 224 ++++++
 apps/computer-vision/package.json             |   5 +-
 .../src/types/objectDetection.ts              |   4 +-
 yarn.lock                                     |  47 +-
 8 files changed, 995 insertions(+), 15 deletions(-)
 create mode 100644 .yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch
 create mode 100644 apps/computer-vision/app/object_detection_live/index.tsx

diff --git a/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch b/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch
new file mode 100644
index 000000000..73f999e9a
--- /dev/null
+++ b/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch
@@ -0,0 +1,713 @@
+diff --git a/lib/expo-plugin/withVisionCamera.js b/lib/expo-plugin/withVisionCamera.js
+index 32418a9..f7a8c5c 100644
+--- a/lib/expo-plugin/withVisionCamera.js
++++ b/lib/expo-plugin/withVisionCamera.js
+@@ -1,4 +1,4 @@
+-import { AndroidConfig, withPlugins, } from '@expo/config-plugins';
++const { AndroidConfig, withPlugins } = require('@expo/config-plugins');
+ const CAMERA_USAGE = 'Allow $(PRODUCT_NAME) to access your camera';
+ const MICROPHONE_USAGE = 'Allow $(PRODUCT_NAME) to access your microphone';
+ const withVisionCamera = (config, props = {}) => {
+@@ -30,4 +30,4 @@ const withVisionCamera = (config, props = {}) => {
+         [AndroidConfig.Permissions.withPermissions, androidPermissions],
+     ]);
+ };
+-export default withVisionCamera;
++module.exports = withVisionCamera;
+diff --git a/cpp/Frame Processors/HybridWorkletQueueFactory.cpp b/cpp/Frame Processors/HybridWorkletQueueFactory.cpp
+new file mode 100644
+index 0000000..5da4ef9
+--- /dev/null
++++ b/cpp/Frame Processors/HybridWorkletQueueFactory.cpp
+@@ -0,0 +1,50 @@
++///
++/// HybridWorkletQueueFactory.cpp
++/// VisionCamera
++/// Copyright © 2025 Marc Rousavy @ Margelo
++///
++
++#include "HybridWorkletQueueFactory.hpp"
++
++#include "JSIConverter+AsyncQueue.hpp"
++#include "NativeThreadAsyncQueue.hpp"
++#include "NativeThreadDispatcher.hpp"
++#include <atomic>
++#include <jsi/jsi.h>
++
++namespace margelo::nitro::camera {
++
++HybridWorkletQueueFactory::HybridWorkletQueueFactory() : HybridObject(TAG) {}
++
++void HybridWorkletQueueFactory::loadHybridMethods() {
++  HybridWorkletQueueFactorySpec::loadHybridMethods();
++  registerHybrids(this, [](Prototype& prototype) {
++    prototype.registerRawHybridMethod("installDispatcher", 1, &HybridWorkletQueueFactory::installDispatcher);
++  });
++}
++
++std::shared_ptr<worklets::AsyncQueue> HybridWorkletQueueFactory::wrapThreadInQueue(const std::shared_ptr<HybridNativeThreadSpec>& thread) {
++  return std::make_shared<NativeThreadAsyncQueue>(thread);
++}
++
++double HybridWorkletQueueFactory::getCurrentThreadMarker() {
++  static std::atomic_size_t threadCounter{1};
++  static thread_local size_t thisThreadId{0};
++  if (thisThreadId == 0) {
++    thisThreadId = threadCounter.fetch_add(1);
++  }
++  return static_cast<double>(thisThreadId);
++}
++
++jsi::Value HybridWorkletQueueFactory::installDispatcher(jsi::Runtime& runtime, const jsi::Value&, const jsi::Value* args, size_t count) {
++  if (count != 1)
++    throw std::runtime_error("installDispatcher(..) must be called with exactly 1 argument!");
++  auto thread = JSIConverter<std::shared_ptr<HybridNativeThreadSpec>>::fromJSI(runtime, args[0]);
++
++  auto dispatcher = std::make_shared<NativeThreadDispatcher>(thread);
++  Dispatcher::installRuntimeGlobalDispatcher(runtime, dispatcher);
++
++  return jsi::Value::undefined();
++}
++
++} // namespace margelo::nitro::camera
+diff --git a/android/CMakeLists.txt b/android/CMakeLists.txt
+index 0000000..1111111 100644
+--- a/android/CMakeLists.txt
++++ b/android/CMakeLists.txt
+@@ -20,6 +20,7 @@
+         "src/main/cpp"
+         "../cpp"
+         "../cpp/Frame Processors"
++        "../nitrogen/generated/shared/c++"
+ )
+
+ find_library(LOG_LIB log)
+diff --git a/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.cpp b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.cpp
+new file mode 100644
+index 0000000..5da4ef9
+--- /dev/null
++++ b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.cpp
+@@ -0,0 +1,50 @@
++///
++/// HybridWorkletQueueFactory.cpp
++/// VisionCamera
++/// Copyright © 2025 Marc Rousavy @ Margelo
++///
++
++#include "HybridWorkletQueueFactory.hpp"
++
++#include "JSIConverter+AsyncQueue.hpp"
++#include "NativeThreadAsyncQueue.hpp"
++#include "NativeThreadDispatcher.hpp"
++#include <atomic>
++#include <jsi/jsi.h>
++
++namespace margelo::nitro::camera {
++
++HybridWorkletQueueFactory::HybridWorkletQueueFactory() : HybridObject(TAG) {}
++
++void HybridWorkletQueueFactory::loadHybridMethods() {
++  HybridWorkletQueueFactorySpec::loadHybridMethods();
++  registerHybrids(this, [](Prototype& prototype) {
++    prototype.registerRawHybridMethod("installDispatcher", 1, &HybridWorkletQueueFactory::installDispatcher);
++  });
++}
++
++std::shared_ptr<worklets::AsyncQueue> HybridWorkletQueueFactory::wrapThreadInQueue(const std::shared_ptr<HybridNativeThreadSpec>& thread) {
++  return std::make_shared<NativeThreadAsyncQueue>(thread);
++}
++
++double HybridWorkletQueueFactory::getCurrentThreadMarker() {
++  static std::atomic_size_t threadCounter{1};
++  static thread_local size_t thisThreadId{0};
++  if (thisThreadId == 0) {
++    thisThreadId = threadCounter.fetch_add(1);
++  }
++  return static_cast<double>(thisThreadId);
++}
++
++jsi::Value HybridWorkletQueueFactory::installDispatcher(jsi::Runtime& runtime, const jsi::Value&, const jsi::Value* args, size_t count) {
++  if (count != 1)
++    throw std::runtime_error("installDispatcher(..) must be called with exactly 1 argument!");
++  auto thread = JSIConverter<std::shared_ptr<HybridNativeThreadSpec>>::fromJSI(runtime, args[0]);
++
++  auto dispatcher = std::make_shared<NativeThreadDispatcher>(thread);
++  Dispatcher::installRuntimeGlobalDispatcher(runtime, dispatcher);
++
++  return jsi::Value::undefined();
++}
++
++} // namespace margelo::nitro::camera
+diff --git a/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.hpp b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.hpp
+new file mode 100644
+index 0000000..daa16d2
+--- /dev/null
++++ b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.hpp
+@@ -0,0 +1,29 @@
++///
++/// HybridWorkletQueueFactory.hpp
++/// VisionCamera
++/// Copyright © 2025 Marc Rousavy @ Margelo
++///
++
++#pragma once
++
++#include "HybridWorkletQueueFactorySpec.hpp"
++#include "JSIConverter+AsyncQueue.hpp"
++#include <atomic>
++#include <jsi/jsi.h>
++
++namespace margelo::nitro::camera {
++
++class HybridWorkletQueueFactory : public HybridWorkletQueueFactorySpec {
++public:
++  HybridWorkletQueueFactory();
++
++public:
++  std::shared_ptr<worklets::AsyncQueue> wrapThreadInQueue(const std::shared_ptr<HybridNativeThreadSpec>& thread) override;
++  double getCurrentThreadMarker() override;
++
++  jsi::Value installDispatcher(jsi::Runtime& runtime, const jsi::Value&, const jsi::Value* args, size_t count);
++
++  void loadHybridMethods() override;
++};
++
++} // namespace margelo::nitro::camera
+diff --git a/nitrogen/generated/shared/c++/JSIConverter+AsyncQueue.hpp b/nitrogen/generated/shared/c++/JSIConverter+AsyncQueue.hpp
+new file mode 100644
+index 0000000..5b93f2d
+--- /dev/null
++++ b/nitrogen/generated/shared/c++/JSIConverter+AsyncQueue.hpp
+@@ -0,0 +1,24 @@
++///
++/// JSIConverter+AsyncQueue.swift
++/// VisionCamera
++/// Copyright © 2025 Marc Rousavy @ Margelo
++///
++
++#pragma once
++
++#include <NitroModules/JSIConverter.hpp>
++#include <jsi/jsi.h>
++#if __has_include(<worklets/RunLoop/AsyncQueue.h>)
++#include <worklets/RunLoop/AsyncQueue.h>
++#elif __has_include(<RNWorklets/worklets/RunLoop/AsyncQueue.h>)
++#include <RNWorklets/worklets/RunLoop/AsyncQueue.h>
++#else
++#error react-native-worklets Prefab not found!
++#endif
++
++namespace margelo::nitro {
++
++// JSIConverter<std::shared_ptr<worklets::AsyncQueue>> is implemented
++// in JSIConverter<std::shared_ptr<jsi::NativeState>>
++
++}
+diff --git a/nitrogen/generated/shared/c++/NativeThreadAsyncQueue.hpp b/nitrogen/generated/shared/c++/NativeThreadAsyncQueue.hpp
+new file mode 100644
+index 0000000..d5a0958
+--- /dev/null
++++ b/nitrogen/generated/shared/c++/NativeThreadAsyncQueue.hpp
+@@ -0,0 +1,34 @@
++///
++/// NativeThreadAsyncQueue.hpp
++/// VisionCamera
++/// Copyright © 2025 Marc Rousavy @ Margelo
++///
++
++#pragma once
++
++#include "HybridNativeThreadSpec.hpp"
++#include "JSIConverter+AsyncQueue.hpp"
++#include <jsi/jsi.h>
++
++namespace margelo::nitro::camera {
++
++/**
++ * An implementation of `worklets::AsyncQueue` that uses a `NativeThread` to run its jobs.
++ *
++ * The `NativeThread` (`HybridNativeThreadSpec`) is a platform-implemented object,
++ * e.g. using `DispatchQueue` on iOS.
++ */
++class NativeThreadAsyncQueue : public worklets::AsyncQueue {
++public:
++  NativeThreadAsyncQueue(std::shared_ptr<HybridNativeThreadSpec> thread) : _thread(std::move(thread)) {}
++
++  void push(std::function<void()>&& job) override {
++    auto jobCopy = job;
++    _thread->runOnThread(jobCopy);
++  }
++
++private:
++  std::shared_ptr<HybridNativeThreadSpec> _thread;
++};
++
++} // namespace margelo::nitro::camera
+diff --git a/nitrogen/generated/shared/c++/NativeThreadDispatcher.hpp b/nitrogen/generated/shared/c++/NativeThreadDispatcher.hpp
+new file mode 100644
+index 0000000..758d2f2
+--- /dev/null
++++ b/nitrogen/generated/shared/c++/NativeThreadDispatcher.hpp
+@@ -0,0 +1,36 @@
++///
++/// NativeThreadDispatcher.hpp
++/// VisionCamera
++/// Copyright © 2025 Marc Rousavy @ Margelo
++///
++
++#pragma once
++
++#include "HybridNativeThreadSpec.hpp"
++#include "JSIConverter+AsyncQueue.hpp"
++#include <jsi/jsi.h>
++
++namespace margelo::nitro::camera {
++
++/**
++ * An implementation of `nitro::Dispatcher` that uses a `NativeThread` to run its jobs.
++ *
++ * The `NativeThread` (`HybridNativeThreadSpec`) is a platform-implemented object,
++ * e.g. using `DispatchQueue` on iOS.
++ */
++class NativeThreadDispatcher : public nitro::Dispatcher {
++public:
++  NativeThreadDispatcher(std::shared_ptr<HybridNativeThreadSpec> thread) : _thread(std::move(thread)) {}
++
++  void runSync(std::function<void()>&&) override {
++    throw std::runtime_error("runSync(...) is not implemented for NativeThreadDispatcher!");
++  }
++  void runAsync(std::function<void()>&& function) override {
++    _thread->runOnThread(function);
++  }
++
++private:
++  std::shared_ptr<HybridNativeThreadSpec> _thread;
++};
++
++} // namespace margelo::nitro::camera
+diff --git a/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/BoundingBox.kt b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/BoundingBox.kt
+new file mode 100644
+index 0000000..aaaaaaa
+--- /dev/null
++++ b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/BoundingBox.kt
+@@ -0,0 +1,47 @@
++///
++/// BoundingBox.kt
++/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE.
++/// https://github.com/mrousavy/nitro
++/// Copyright © Marc Rousavy @ Margelo
++///
++
++package com.margelo.nitro.camera
++
++import androidx.annotation.Keep
++import com.facebook.proguard.annotations.DoNotStrip
++
++
++/**
++ * Represents the JavaScript object/struct "BoundingBox".
++ */
++@DoNotStrip
++@Keep
++data class BoundingBox(
++  @DoNotStrip
++  @Keep
++  val x: Double,
++  @DoNotStrip
++  @Keep
++  val y: Double,
++  @DoNotStrip
++  @Keep
++  val width: Double,
++  @DoNotStrip
++  @Keep
++  val height: Double
++) {
++  /* primary constructor */
++
++  companion object {
++    /**
++     * Constructor called from C++
++     */
++    @DoNotStrip
++    @Keep
++    @Suppress("unused")
++    @JvmStatic
++    private fun fromCpp(x: Double, y: Double, width: Double, height: Double): BoundingBox {
++      return BoundingBox(x, y, width, height)
++    }
++  }
++}
+diff --git a/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/HybridScannedObjectSpec.kt b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/HybridScannedObjectSpec.kt
+new file mode 100644
+index 0000000..bbbbbbb
+--- /dev/null
++++ b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/HybridScannedObjectSpec.kt
+@@ -0,0 +1,60 @@
++///
++/// HybridScannedObjectSpec.kt
++/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE.
++/// https://github.com/mrousavy/nitro
++/// Copyright © Marc Rousavy @ Margelo
++///
++
++package com.margelo.nitro.camera
++
++import androidx.annotation.Keep
++import com.facebook.jni.HybridData
++import com.facebook.proguard.annotations.DoNotStrip
++import com.margelo.nitro.core.HybridObject
++
++/**
++ * A Kotlin class representing the ScannedObject HybridObject.
++ * Implement this abstract class to create Kotlin-based instances of ScannedObject.
++ */
++@DoNotStrip
++@Keep
++@Suppress(
++  "KotlinJniMissingFunction", "unused",
++  "RedundantSuppression", "RedundantUnitReturnType", "SimpleRedundantLet",
++  "LocalVariableName", "PropertyName", "PrivatePropertyName", "FunctionName"
++)
++abstract class HybridScannedObjectSpec: HybridObject() {
++  @DoNotStrip
++  private var mHybridData: HybridData = initHybrid()
++
++  init {
++    super.updateNative(mHybridData)
++  }
++
++  override fun updateNative(hybridData: HybridData) {
++    mHybridData = hybridData
++    super.updateNative(hybridData)
++  }
++
++  // Default implementation of `HybridObject.toString()`
++  override fun toString(): String {
++    return "[HybridObject ScannedObject]"
++  }
++
++  // Properties
++  @get:DoNotStrip
++  @get:Keep
++  abstract val type: ScannedObjectType
++
++  @get:DoNotStrip
++  @get:Keep
++  abstract val boundingBox: BoundingBox
++
++  // Methods
++
++  private external fun initHybrid(): HybridData
++
++  companion object {
++    protected const val TAG = "HybridScannedObjectSpec"
++  }
++}
+diff --git a/nitrogen/generated/android/c++/JBoundingBox.hpp b/nitrogen/generated/android/c++/JBoundingBox.hpp
+new file mode 100644
+index 0000000..ccccccc
+--- /dev/null
++++ b/nitrogen/generated/android/c++/JBoundingBox.hpp
+@@ -0,0 +1,69 @@
++///
++/// JBoundingBox.hpp
++/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE.
++/// https://github.com/mrousavy/nitro
++/// Copyright © Marc Rousavy @ Margelo
++///
++
++#pragma once
++
++#include <fbjni/fbjni.h>
++#include "BoundingBox.hpp"
++
++
++
++namespace margelo::nitro::camera {
++
++  using namespace facebook;
++
++  /**
++   * The C++ JNI bridge between the C++ struct "BoundingBox" and the the Kotlin data class "BoundingBox".
++   */
++  struct JBoundingBox final: public jni::JavaClass<JBoundingBox> {
++  public:
++    static auto constexpr kJavaDescriptor = "Lcom/margelo/nitro/camera/BoundingBox;";
++
++  public:
++    /**
++     * Convert this Java/Kotlin-based struct to the C++ struct BoundingBox by copying all values to C++.
++     */
++    [[maybe_unused]]
++    [[nodiscard]]
++    BoundingBox toCpp() const {
++      static const auto clazz = javaClassStatic();
++      static const auto fieldX = clazz->getField<double>("x");
++      double x = this->getFieldValue(fieldX);
++      static const auto fieldY = clazz->getField<double>("y");
++      double y = this->getFieldValue(fieldY);
++      static const auto fieldWidth = clazz->getField<double>("width");
++      double width = this->getFieldValue(fieldWidth);
++      static const auto fieldHeight = clazz->getField<double>("height");
++      double height = this->getFieldValue(fieldHeight);
++      return BoundingBox(
++        x,
++        y,
++        width,
++        height
++      );
++    }
++
++  public:
++    /**
++     * Create a Java/Kotlin-based struct by copying all values from the given C++ struct to Java.
++     */
++    [[maybe_unused]]
++    static jni::local_ref<JBoundingBox::javaobject> fromCpp(const BoundingBox& value) {
++      using JSignature = JBoundingBox(double, double, double, double);
++      static const auto clazz = javaClassStatic();
++      static const auto create = clazz->getStaticMethod<JSignature>("fromCpp");
++      return create(
++        clazz,
++        value.x,
++        value.y,
++        value.width,
++        value.height
++      );
++    }
++  };
++
++} // namespace margelo::nitro::camera
+diff --git a/nitrogen/generated/android/c++/JHybridScannedObjectSpec.hpp b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.hpp
+new file mode 100644
+index 0000000..ddddddd
+--- /dev/null
++++ b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.hpp
+@@ -0,0 +1,63 @@
++///
++/// JHybridScannedObjectSpec.hpp
++/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE.
++/// https://github.com/mrousavy/nitro
++/// Copyright © Marc Rousavy @ Margelo
++///
++
++#pragma once
++
++#include <NitroModules/JHybridObject.hpp>
++#include <fbjni/fbjni.h>
++#include "HybridScannedObjectSpec.hpp"
++
++
++
++
++namespace margelo::nitro::camera {
++
++  using namespace facebook;
++
++  class JHybridScannedObjectSpec: public jni::HybridClass<JHybridScannedObjectSpec, JHybridObject>,
++                                  public virtual HybridScannedObjectSpec {
++  public:
++    static auto constexpr kJavaDescriptor = "Lcom/margelo/nitro/camera/HybridScannedObjectSpec;";
++    static jni::local_ref<jhybriddata> initHybrid(jni::alias_ref<jhybridobject> jThis);
++    static void registerNatives();
++
++  protected:
++    // C++ constructor (called from Java via `initHybrid()`)
++    explicit JHybridScannedObjectSpec(jni::alias_ref<jhybridobject> jThis) :
++      HybridObject(HybridScannedObjectSpec::TAG),
++      HybridBase(jThis),
++      _javaPart(jni::make_global(jThis)) {}
++
++  public:
++    ~JHybridScannedObjectSpec() override {
++      // Hermes GC can destroy JS objects on a non-JNI Thread.
++      jni::ThreadScope::WithClassLoader([&] { _javaPart.reset(); });
++    }
++
++  public:
++    size_t getExternalMemorySize() noexcept override;
++    bool equals(const std::shared_ptr<HybridObject>& other) override;
++    void dispose() noexcept override;
++    std::string toString() override;
++
++  public:
++    inline const jni::global_ref<JHybridScannedObjectSpec::javaobject>& getJavaPart() const noexcept {
++      return _javaPart;
++    }
++
++  public:
++    // Properties
++    ScannedObjectType getType() override;
++    BoundingBox getBoundingBox() override;
++
++  private:
++    friend HybridBase;
++    using HybridBase::HybridBase;
++    jni::global_ref<JHybridScannedObjectSpec::javaobject> _javaPart;
++  };
++
++} // namespace margelo::nitro::camera
+diff --git a/nitrogen/generated/android/VisionCamera+autolinking.cmake b/nitrogen/generated/android/VisionCamera+autolinking.cmake
+index 0000000..1111111 100644
+--- a/nitrogen/generated/android/VisionCamera+autolinking.cmake
++++ b/nitrogen/generated/android/VisionCamera+autolinking.cmake
+@@ -112,3 +112,4 @@
+   ../nitrogen/generated/android/c++/JHybridPreviewViewSpec.cpp
+   ../nitrogen/generated/android/c++/views/JHybridPreviewViewStateUpdater.cpp
++  ../nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp
+ )
+diff --git a/nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp
+new file mode 100644
+index 0000000..eeeeeee
+--- /dev/null
++++ b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp
+@@ -0,0 +1,69 @@
++///
++/// JHybridScannedObjectSpec.cpp
++/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE.
++/// https://github.com/mrousavy/nitro
++/// Copyright © Marc Rousavy @ Margelo
++///
++
++#include "JHybridScannedObjectSpec.hpp"
++
++// Forward declaration of `ScannedObjectType` to properly resolve imports.
++namespace margelo::nitro::camera { enum class ScannedObjectType; }
++// Forward declaration of `BoundingBox` to properly resolve imports.
++namespace margelo::nitro::camera { struct BoundingBox; }
++
++#include "ScannedObjectType.hpp"
++#include "JScannedObjectType.hpp"
++#include "BoundingBox.hpp"
++#include "JBoundingBox.hpp"
++
++namespace margelo::nitro::camera {
++
++  jni::local_ref<JHybridScannedObjectSpec::jhybriddata> JHybridScannedObjectSpec::initHybrid(jni::alias_ref<jhybridobject> jThis) {
++    return makeCxxInstance(jThis);
++  }
++
++  void JHybridScannedObjectSpec::registerNatives() {
++    registerHybrid({
++      makeNativeMethod("initHybrid", JHybridScannedObjectSpec::initHybrid),
++    });
++  }
++
++  size_t JHybridScannedObjectSpec::getExternalMemorySize() noexcept {
++    static const auto method = javaClassStatic()->getMethod<jlong()>("getMemorySize");
++    return method(_javaPart);
++  }
++
++  bool JHybridScannedObjectSpec::equals(const std::shared_ptr<HybridObject>& other) {
++    if (auto otherCast = std::dynamic_pointer_cast<JHybridScannedObjectSpec>(other)) {
++      return _javaPart == otherCast->_javaPart;
++    }
++    return false;
++  }
++
++  void JHybridScannedObjectSpec::dispose() noexcept {
++    static const auto method = javaClassStatic()->getMethod<void()>("dispose");
++    method(_javaPart);
++  }
++
++  std::string JHybridScannedObjectSpec::toString() {
++    static const auto method = javaClassStatic()->getMethod<jni::JString()>("toString");
++    auto javaString = method(_javaPart);
++    return javaString->toStdString();
++  }
++
++  // Properties
++  ScannedObjectType JHybridScannedObjectSpec::getType() {
++    static const auto method = javaClassStatic()->getMethod<jni::local_ref<JScannedObjectType>()>("getType");
++    auto __result = method(_javaPart);
++    return __result->toCpp();
++  }
++  BoundingBox JHybridScannedObjectSpec::getBoundingBox() {
++    static const auto method = javaClassStatic()->getMethod<jni::local_ref<JBoundingBox>()>("getBoundingBox");
++    auto __result = method(_javaPart);
++    return __result->toCpp();
++  }
++
++  // Methods
++
++} // namespace margelo::nitro::camera
+diff --git a/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt b/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt
+index aaaaaaa..bbbbbbb 100644
+--- a/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt
++++ b/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt
+@@ -55,6 +55,6 @@
+           when (event) {
+             is VideoRecordEvent.Start -> {
+-              promise.resolve()
++              promise.resolve(Unit)
+               didResolve = true
+             }
+
+@@ -98,27 +98,48 @@
+   override fun stopRecording(): Promise<Unit> {
+-    return Promise.parallel(executor) {
+-      val recording = recording ?: throw Error("Not currently recording!")
+-      recording.stop()
+-      this.isPaused = false
+-      this.recording = null
+-      this.recordedDuration = 0.0
+-      this.recordedFileSize = 0.0
+-    }
++    val promise = Promise<Unit>()
++    executor.execute {
++      try {
++        val recording = recording ?: throw Error("Not currently recording!")
++        recording.stop()
++        this.isPaused = false
++        this.recording = null
++        this.recordedDuration = 0.0
++        this.recordedFileSize = 0.0
++        promise.resolve(Unit)
++      } catch (e: Throwable) {
++        promise.reject(e)
++      }
++    }
++    return promise
+   }
+
+   override fun pauseRecording(): Promise<Unit> {
+-    return Promise.parallel(executor) {
+-      val recording = recording ?: throw Error("Not currently recording!")
+-      recording.pause()
+-      this.isPaused = true
+-    }
++    val promise = Promise<Unit>()
++    executor.execute {
++      try {
++        val recording = recording ?: throw Error("Not currently recording!")
++        recording.pause()
++        this.isPaused = true
++        promise.resolve(Unit)
++      } catch (e: Throwable) {
++        promise.reject(e)
++      }
++    }
++    return promise
+   }
+
+   override fun resumeRecording(): Promise<Unit> {
+-    return Promise.parallel(executor) {
+-      val recording = recording ?: throw Error("Not currently recording!")
+-      recording.resume()
+-      this.isPaused = false
+-    }
++    val promise = Promise<Unit>()
++    executor.execute {
++      try {
++        val recording = recording ?: throw Error("Not currently recording!")
++        recording.resume()
++        this.isPaused = false
++        promise.resolve(Unit)
++      } catch (e: Throwable) {
++        promise.reject(e)
++      }
++    }
++    return promise
+   }
+ }
diff --git a/apps/computer-vision/app.json b/apps/computer-vision/app.json
index 4d68c039b..5db8c1390 100644
--- a/apps/computer-vision/app.json
+++ b/apps/computer-vision/app.json
@@ -25,7 +25,8 @@
         "foregroundImage": "./assets/icons/adaptive-icon.png",
         "backgroundColor": "#ffffff"
       },
-      "package": "com.anonymous.computervision"
+      "package": "com.anonymous.computervision",
+      "permissions": ["android.permission.CAMERA"]
     },
     "web": {
       "favicon": "./assets/icons/favicon.png"
diff --git a/apps/computer-vision/app/_layout.tsx b/apps/computer-vision/app/_layout.tsx
index 35fba7fb1..3970ac316 100644
--- a/apps/computer-vision/app/_layout.tsx
+++ b/apps/computer-vision/app/_layout.tsx
@@ -83,6 +83,14 @@ export default function _layout() {
             headerTitleStyle: { color: ColorPalette.primary },
           }}
         />
+        <Drawer.Screen
+          name="object_detection_live/index"
+          options={{
+            drawerLabel: 'Object Detection (Live)',
+            title: 'Object Detection (Live)',
+            headerTitleStyle: { color: ColorPalette.primary },
+          }}
+        />
         <Drawer.Screen
           name="ocr/index"
           options={{
diff --git a/apps/computer-vision/app/index.tsx b/apps/computer-vision/app/index.tsx
index 38a77fc27..bf391aeea 100644
--- a/apps/computer-vision/app/index.tsx
+++ b/apps/computer-vision/app/index.tsx
@@ -29,6 +29,12 @@ export default function Home() {
         >
           <Text style={styles.buttonText}>Object Detection</Text>
         </TouchableOpacity>
+        <TouchableOpacity
+          style={styles.button}
+          onPress={() => router.navigate('object_detection_live/')}
+        >
+          <Text style={styles.buttonText}>Object Detection Live</Text>
+        </TouchableOpacity>
         <TouchableOpacity
           style={styles.button}
           onPress={() => router.navigate('ocr/')}
diff --git a/apps/computer-vision/app/object_detection_live/index.tsx b/apps/computer-vision/app/object_detection_live/index.tsx
new file mode 100644
index 000000000..68fdd4fed
--- /dev/null
+++ b/apps/computer-vision/app/object_detection_live/index.tsx
@@ -0,0 +1,224 @@
+import React, {
+  useCallback,
+  useContext,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+} from 'react';
+import {
+  StatusBar,
+  StyleSheet,
+  Text,
+  TouchableOpacity,
+  View,
+} from 'react-native';
+import { useSafeAreaInsets } from 'react-native-safe-area-context';
+
+import {
+  Camera,
+  getCameraFormat,
+  Templates,
+  useCameraDevices,
+  useCameraPermission,
+  useFrameOutput,
+} from 'react-native-vision-camera';
+import { scheduleOnRN } from 'react-native-worklets';
+import {
+  Detection,
+  SSDLITE_320_MOBILENET_V3_LARGE,
+  useObjectDetection,
+} from 'react-native-executorch';
+import { GeneratingContext } from '../../context';
+import Spinner from '../../components/Spinner';
+import ColorPalette from '../../colors';
+
+export default function ObjectDetectionLiveScreen() {
+  const insets = useSafeAreaInsets();
+
+  const model = useObjectDetection({ model: SSDLITE_320_MOBILENET_V3_LARGE });
+  const { setGlobalGenerating } = useContext(GeneratingContext);
+
+  useEffect(() => {
+    setGlobalGenerating(model.isGenerating);
+  }, [model.isGenerating, setGlobalGenerating]);
+
+  const [detectionCount, setDetectionCount] = useState(0);
+  const [fps, setFps] = useState(0);
+  const lastFrameTimeRef = useRef(Date.now());
+
+  const cameraPermission = useCameraPermission();
+  const devices = useCameraDevices();
+  const device = devices.find((d) => d.position === 'back') ?? devices[0];
+
+  const format = useMemo(() => {
+    if (device == null) return undefined;
+    try {
+      return getCameraFormat(device, Templates.Video);
+    } catch {
+      return undefined;
+    }
+  }, [device]);
+
+  const updateStats = useCallback((results: Detection[]) => {
+    setDetectionCount(results.length);
+    const now = Date.now();
+    const timeDiff = now - lastFrameTimeRef.current;
+    if (timeDiff > 0) {
+      setFps(Math.round(1000 / timeDiff));
+    }
+    lastFrameTimeRef.current = now;
+  }, []);
+
+  const frameOutput = useFrameOutput({
+    pixelFormat: 'rgb',
+    onFrame(frame) {
+      'worklet';
+      if (!model.runOnFrame) {
+        frame.dispose();
+        return;
+      }
+      try {
+        const result = model.runOnFrame(frame, 0.5);
+        if (result) {
+          scheduleOnRN(updateStats, result);
+        }
+      } catch {
+        // ignore frame errors
+      } finally {
+        frame.dispose();
+      }
+    },
+  });
+
+  if (!model.isReady) {
+    return (
+      <Spinner
+        visible={!model.isReady}
+        textContent={`Loading the model ${(model.downloadProgress * 100).toFixed(0)} %`}
+      />
+    );
+  }
+
+  if (!cameraPermission.hasPermission) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>Camera access needed</Text>
+        <TouchableOpacity
+          onPress={() => cameraPermission.requestPermission()}
+          style={styles.button}
+        >
+          <Text style={styles.buttonText}>Grant Permission</Text>
+        </TouchableOpacity>
+      </View>
+    );
+  }
+
+  if (device == null) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>No camera device found</Text>
+      </View>
+    );
+  }
+
+  return (
+    <View style={styles.container}>
+      <StatusBar barStyle="light-content" translucent />
+
+      <Camera
+        style={StyleSheet.absoluteFill}
+        device={device}
+        outputs={[frameOutput]}
+        isActive={true}
+        format={format}
+      />
+
+      <View
+        style={[styles.bottomBarWrapper, { paddingBottom: insets.bottom + 12 }]}
+        pointerEvents="none"
+      >
+        <View style={styles.bottomBar}>
+          <View style={styles.statItem}>
+            <Text style={styles.statValue}>{detectionCount}</Text>
+            <Text style={styles.statLabel}>objects</Text>
+          </View>
+          <View style={styles.statDivider} />
+          <View style={styles.statItem}>
+            <Text style={styles.statValue}>{fps}</Text>
+            <Text style={styles.statLabel}>fps</Text>
+          </View>
+        </View>
+      </View>
+    </View>
+  );
+}
+
+const styles = StyleSheet.create({
+  container: {
+    flex: 1,
+    backgroundColor: 'black',
+  },
+  centered: {
+    flex: 1,
+    backgroundColor: 'black',
+    justifyContent: 'center',
+    alignItems: 'center',
+    gap: 16,
+  },
+  message: {
+    color: 'white',
+    fontSize: 18,
+  },
+  button: {
+    paddingHorizontal: 24,
+    paddingVertical: 12,
+    backgroundColor: ColorPalette.primary,
+    borderRadius: 24,
+  },
+  buttonText: {
+    color: 'white',
+    fontSize: 15,
+    fontWeight: '600',
+    letterSpacing: 0.3,
+  },
+
+  // Bottom stats bar
+  bottomBarWrapper: {
+    position: 'absolute',
+    bottom: 0,
+    left: 0,
+    right: 0,
+    alignItems: 'center',
+  },
+  bottomBar: {
+    flexDirection: 'row',
+    alignItems: 'center',
+    backgroundColor: 'rgba(0, 0, 0, 0.55)',
+    borderRadius: 24,
+    paddingHorizontal: 28,
+    paddingVertical: 10,
+    gap: 24,
+  },
+  statItem: {
+    alignItems: 'center',
+  },
+  statValue: {
+    color: 'white',
+    fontSize: 22,
+    fontWeight: '700',
+    letterSpacing: -0.5,
+  },
+  statLabel: {
+    color: 'rgba(255,255,255,0.55)',
+    fontSize: 11,
+    fontWeight: '500',
+    textTransform: 'uppercase',
+    letterSpacing: 0.8,
+  },
+  statDivider: {
+    width: 1,
+    height: 32,
+    backgroundColor: 'rgba(255,255,255,0.2)',
+  },
+});
diff --git a/apps/computer-vision/package.json b/apps/computer-vision/package.json
index 3f47c357c..ee879c6af 100644
--- a/apps/computer-vision/package.json
+++ b/apps/computer-vision/package.json
@@ -31,13 +31,14 @@
     "react-native-gesture-handler": "~2.28.0",
     "react-native-image-picker": "^7.2.2",
     "react-native-loading-spinner-overlay": "^3.0.1",
-    "react-native-nitro-image": "0.10.2",
-    "react-native-nitro-modules": "0.33.4",
+    "react-native-nitro-image": "^0.12.0",
+    "react-native-nitro-modules": "^0.33.9",
     "react-native-reanimated": "~4.2.1",
     "react-native-safe-area-context": "~5.6.0",
     "react-native-screens": "~4.16.0",
     "react-native-svg": "15.12.1",
     "react-native-svg-transformer": "^1.5.0",
+    "react-native-vision-camera": "patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch",
     "react-native-worklets": "^0.7.2"
   },
   "devDependencies": {
diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts
index 11953c954..9532c508f 100644
--- a/packages/react-native-executorch/src/types/objectDetection.ts
+++ b/packages/react-native-executorch/src/types/objectDetection.ts
@@ -225,10 +225,10 @@ export interface ObjectDetectionType {
    * ```
    *
    * @param frame - VisionCamera Frame object
-   * @param detectionThreshold - The threshold for detection sensitivity. Default is 0.5.
+   * @param detectionThreshold - The threshold for detection sensitivity.
    * @returns Array of Detection objects representing detected items in the frame.
    */
   runOnFrame:
-    | ((frame: Frame, detectionThreshold?: number) => Detection[])
+    | ((frame: Frame, detectionThreshold: number) => Detection[])
     | null;
 }
diff --git a/yarn.lock b/yarn.lock
index 3d2d9f7ee..d4316e786 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -6585,13 +6585,14 @@ __metadata:
     react-native-gesture-handler: "npm:~2.28.0"
     react-native-image-picker: "npm:^7.2.2"
     react-native-loading-spinner-overlay: "npm:^3.0.1"
-    react-native-nitro-image: "npm:0.10.2"
-    react-native-nitro-modules: "npm:0.33.4"
+    react-native-nitro-image: "npm:^0.12.0"
+    react-native-nitro-modules: "npm:^0.33.9"
     react-native-reanimated: "npm:~4.2.1"
     react-native-safe-area-context: "npm:~5.6.0"
     react-native-screens: "npm:~4.16.0"
     react-native-svg: "npm:15.12.1"
     react-native-svg-transformer: "npm:^1.5.0"
+    react-native-vision-camera: "patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch"
     react-native-worklets: "npm:^0.7.2"
   languageName: unknown
   linkType: soft
@@ -13394,24 +13395,24 @@ __metadata:
   languageName: node
   linkType: hard
 
-"react-native-nitro-image@npm:0.10.2":
-  version: 0.10.2
-  resolution: "react-native-nitro-image@npm:0.10.2"
+"react-native-nitro-image@npm:^0.12.0":
+  version: 0.12.0
+  resolution: "react-native-nitro-image@npm:0.12.0"
   peerDependencies:
     react: "*"
     react-native: "*"
     react-native-nitro-modules: "*"
-  checksum: 10/3be75e93da369adfe00441dae78171572dec38d3d7e75e5d4cb302b81479be9686c8d8dc0ea4b331514b8725099bf3eb069ab9933f7029627d12a72d71766cb4
+  checksum: 10/03f165381c35e060d4d05eae3ce029b32a4009482f327e9526840f306181ca87a862b335e12667c55d4ee9f2069542ca93dd112feb7f1822bf7d2ddc38fe58f0
   languageName: node
   linkType: hard
 
-"react-native-nitro-modules@npm:0.33.4":
-  version: 0.33.4
-  resolution: "react-native-nitro-modules@npm:0.33.4"
+"react-native-nitro-modules@npm:^0.33.9":
+  version: 0.33.9
+  resolution: "react-native-nitro-modules@npm:0.33.9"
   peerDependencies:
     react: "*"
     react-native: "*"
-  checksum: 10/a737ff6b142c55821688612305245fd10a7cff36f0ee66cad0956c6815a60cdd4ba64cdfba6137a6dbfe815645763ce5d406cf488876edd47dab7f8d0031e01a
+  checksum: 10/4ebf4db46d1e4987a0e52054724081aa9712bcd1d505a6dbdd47aebc6afe72a7abaa0e947651d9f3cc594e4eb3dba47fc6f59db27c5a5ed383946e40d96543a0
   languageName: node
   linkType: hard
 
@@ -13497,6 +13498,32 @@ __metadata:
   languageName: node
   linkType: hard
 
+"react-native-vision-camera@npm:5.0.0-beta.1":
+  version: 5.0.0-beta.1
+  resolution: "react-native-vision-camera@npm:5.0.0-beta.1"
+  peerDependencies:
+    react: "*"
+    react-native: "*"
+    react-native-nitro-image: "*"
+    react-native-nitro-modules: "*"
+    react-native-worklets: "*"
+  checksum: 10/873410a33e33d68b162b6524997480133ef9b6469dce3f87253c371bba1643d326e835891b0c9f75018d376faf4aec23daba5ab729f431c718ecf901601a8d12
+  languageName: node
+  linkType: hard
+
+"react-native-vision-camera@patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch":
+  version: 5.0.0-beta.1
+  resolution: "react-native-vision-camera@patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch::version=5.0.0-beta.1&hash=b52326"
+  peerDependencies:
+    react: "*"
+    react-native: "*"
+    react-native-nitro-image: "*"
+    react-native-nitro-modules: "*"
+    react-native-worklets: "*"
+  checksum: 10/4ddf9325752243c92c5104b2fe8520d91072d4c359c52708872909b2bb85d136db59215bac1c6f902f04eee683a9d3d8ff11f7729e0468b00dee5aa3bb8f1944
+  languageName: node
+  linkType: hard
+
 "react-native-worklets@npm:0.5.1":
   version: 0.5.1
   resolution: "react-native-worklets@npm:0.5.1"

From 983242eb74dcea5e49a2759d105f0f02e4bda378 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Tue, 24 Feb 2026 09:02:14 +0100
Subject: [PATCH 19/37] feat: suggested changes / improve comments

---
 .../app/object_detection_live/index.tsx        |  2 --
 .../rnexecutorch/host_objects/JsiConversions.h | 18 +++++++++---------
 .../host_objects/ModelHostObject.h             |  6 ++++++
 .../metaprogramming/TypeConcepts.h             |  5 +++++
 .../tests/integration/ObjectDetectionTest.cpp  | 10 +++++-----
 .../computer_vision/ObjectDetectionModule.ts   |  1 +
 .../modules/computer_vision/VisionModule.ts    |  9 +++++++--
 .../src/types/common.ts                        |  3 ---
 .../src/types/objectDetection.ts               |  2 +-
 9 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/apps/computer-vision/app/object_detection_live/index.tsx b/apps/computer-vision/app/object_detection_live/index.tsx
index 68fdd4fed..5f8bb2120 100644
--- a/apps/computer-vision/app/object_detection_live/index.tsx
+++ b/apps/computer-vision/app/object_detection_live/index.tsx
@@ -182,8 +182,6 @@ const styles = StyleSheet.create({
     fontWeight: '600',
     letterSpacing: 0.3,
   },
-
-  // Bottom stats bar
   bottomBarWrapper: {
     position: 'absolute',
     bottom: 0,
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
index 5fc8615ea..7b97108b9 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -346,6 +346,15 @@ inline jsi::Value getJsiValue(const std::vector<char> &vec,
   return {runtime, array};
 }
 
+inline jsi::Value getJsiValue(const std::vector<int64_t> &vec,
+                              jsi::Runtime &runtime) {
+  jsi::Array array(runtime, vec.size());
+  for (size_t i = 0; i < vec.size(); i++) {
+    array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
+  }
+  return {runtime, array};
+}
+
 // Conditional as on android, size_t and uint64_t reduce to the same type,
 // introducing ambiguity
 template <typename T,
@@ -360,15 +369,6 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) {
   return {runtime, bigInt};
 }
 
-inline jsi::Value getJsiValue(const std::vector<int64_t> &vec,
-                              jsi::Runtime &runtime) {
-  jsi::Array array(runtime, vec.size());
-  for (size_t i = 0; i < vec.size(); i++) {
-    array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
-  }
-  return {runtime, array};
-}
-
 inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) {
   return {runtime, val};
 }
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
index abf920223..d6489c9be 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -46,6 +46,12 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
           "getInputShape"));
     }
 
+    if constexpr (meta::HasGenerate<Model>) {
+      addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                                       promiseHostFunction<&Model::generate>,
+                                       "generate"));
+    }
+
     if constexpr (meta::HasEncode<Model>) {
       addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
                                        promiseHostFunction<&Model::encode>,
diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
index f625bf6e7..2d7612f25 100644
--- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
+++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
@@ -11,6 +11,11 @@ concept DerivedFromOrSameAs = std::is_base_of_v<Base, T>;
 template <typename T, typename Base>
 concept SameAs = std::is_same_v<Base, T>;
 
+template <typename T>
+concept HasGenerate = requires(T t) {
+  { &T::generate };
+};
+
 template <typename T>
 concept HasGenerateFromString = requires(T t) {
   { &T::generateFromString };
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp
index 93cdbf07c..76c838ca1 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp
@@ -122,7 +122,7 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidLabels) {
 // ============================================================================
 TEST(ObjectDetectionPixelTests, ValidPixelDataReturnsResults) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  constexpr int width = 4, height = 4, channels = 3;
+  constexpr int32_t width = 4, height = 4, channels = 3;
   std::vector<uint8_t> pixelData(width * height * channels, 128);
   JSTensorViewIn tensorView{pixelData.data(),
                             {height, width, channels},
@@ -142,7 +142,7 @@ TEST(ObjectDetectionPixelTests, WrongSizesLengthThrows) {
 
 TEST(ObjectDetectionPixelTests, WrongChannelCountThrows) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  constexpr int width = 4, height = 4, channels = 4;
+  constexpr int32_t width = 4, height = 4, channels = 4;
   std::vector<uint8_t> pixelData(width * height * channels, 0);
   JSTensorViewIn tensorView{pixelData.data(),
                             {height, width, channels},
@@ -153,7 +153,7 @@ TEST(ObjectDetectionPixelTests, WrongChannelCountThrows) {
 
 TEST(ObjectDetectionPixelTests, WrongScalarTypeThrows) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  constexpr int width = 4, height = 4, channels = 3;
+  constexpr int32_t width = 4, height = 4, channels = 3;
   std::vector<uint8_t> pixelData(width * height * channels, 0);
   JSTensorViewIn tensorView{pixelData.data(),
                             {height, width, channels},
@@ -164,7 +164,7 @@ TEST(ObjectDetectionPixelTests, WrongScalarTypeThrows) {
 
 TEST(ObjectDetectionPixelTests, NegativeThresholdThrows) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  constexpr int width = 4, height = 4, channels = 3;
+  constexpr int32_t width = 4, height = 4, channels = 3;
   std::vector<uint8_t> pixelData(width * height * channels, 128);
   JSTensorViewIn tensorView{pixelData.data(),
                             {height, width, channels},
@@ -175,7 +175,7 @@ TEST(ObjectDetectionPixelTests, NegativeThresholdThrows) {
 
 TEST(ObjectDetectionPixelTests, ThresholdAboveOneThrows) {
   ObjectDetection model(kValidObjectDetectionModelPath, nullptr);
-  constexpr int width = 4, height = 4, channels = 3;
+  constexpr int32_t width = 4, height = 4, channels = 3;
   std::vector<uint8_t> pixelData(width * height * channels, 128);
   JSTensorViewIn tensorView{pixelData.data(),
                             {height, width, channels},
diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
index e62c7221c..f056cff62 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
@@ -28,6 +28,7 @@ export class ObjectDetectionModule extends VisionModule<Detection[]> {
         onDownloadProgressCallback,
         model.modelSource
       );
+
       if (!paths?.[0]) {
         throw new RnExecutorchError(
           RnExecutorchErrorCode.DownloadInterrupted,
diff --git a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
index eabe50ab0..762d09987 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts
@@ -43,11 +43,16 @@ export abstract class VisionModule<TOutput> extends BaseModule {
    * const model = new ClassificationModule();
    * await model.load({ modelSource: MODEL });
    *
+   * // Use the functional form of setState to store the worklet — passing it
+   * // directly would cause React to invoke it immediately as an updater fn.
+   * const [runOnFrame, setRunOnFrame] = useState(null);
+   * setRunOnFrame(() => model.runOnFrame);
+   *
    * const frameOutput = useFrameOutput({
    *   onFrame(frame) {
    *     'worklet';
-   *     if (!model.runOnFrame) return;
-   *     const result = model.runOnFrame(frame);
+   *     if (!runOnFrame) return;
+   *     const result = runOnFrame(frame);
    *     frame.dispose();
    *   }
    * });
diff --git a/packages/react-native-executorch/src/types/common.ts b/packages/react-native-executorch/src/types/common.ts
index 1ebfb3534..d992214dd 100644
--- a/packages/react-native-executorch/src/types/common.ts
+++ b/packages/react-native-executorch/src/types/common.ts
@@ -193,9 +193,6 @@ export interface PixelData extends Omit<TensorPtr, 'dataPtr' | 'scalarType'> {
 
 /**
  * Frame data for vision model processing.
- * Supports two modes:
- * 1. ArrayBuffer mode (with memory copy) - Compatible with all platforms
- * 2. NativeBuffer mode (zero-copy) - Better performance with Vision Camera v5
  */
 export interface Frame {
   /**
diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts
index 9532c508f..5aaf81833 100644
--- a/packages/react-native-executorch/src/types/objectDetection.ts
+++ b/packages/react-native-executorch/src/types/objectDetection.ts
@@ -176,7 +176,7 @@ export interface ObjectDetectionType {
    * 1. **String path/URI**: File path, URL, or Base64-encoded string
    * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
    *
-   * **Note**: For VisionCamera frame processing, use `processFrame` instead.
+   * **Note**: For VisionCamera frame processing, use `runOnFrame` instead.
    *
    * @param input - Image source (string or PixelData object)
    * @param detectionThreshold - An optional number between 0 and 1 representing the minimum confidence score. Default is 0.5.

From e0e8bcafce494738f5fe0f762179b0e129d3911e Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Wed, 25 Feb 2026 14:16:42 +0100
Subject: [PATCH 20/37] fix(android): object detection not working on android

---
 ...ative-vision-camera@npm-5.0.0-beta.1.patch | 713 ------------------
 apps/computer-vision/app.json                 |  13 +-
 .../app/object_detection_live/index.tsx       |   3 +-
 apps/computer-vision/package.json             |   2 +-
 yarn.lock                                     |  23 +-
 5 files changed, 20 insertions(+), 734 deletions(-)
 delete mode 100644 .yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch

diff --git a/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch b/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch
deleted file mode 100644
index 73f999e9a..000000000
--- a/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch
+++ /dev/null
@@ -1,713 +0,0 @@
-diff --git a/lib/expo-plugin/withVisionCamera.js b/lib/expo-plugin/withVisionCamera.js
-index 32418a9..f7a8c5c 100644
---- a/lib/expo-plugin/withVisionCamera.js
-+++ b/lib/expo-plugin/withVisionCamera.js
-@@ -1,4 +1,4 @@
--import { AndroidConfig, withPlugins, } from '@expo/config-plugins';
-+const { AndroidConfig, withPlugins } = require('@expo/config-plugins');
- const CAMERA_USAGE = 'Allow $(PRODUCT_NAME) to access your camera';
- const MICROPHONE_USAGE = 'Allow $(PRODUCT_NAME) to access your microphone';
- const withVisionCamera = (config, props = {}) => {
-@@ -30,4 +30,4 @@ const withVisionCamera = (config, props = {}) => {
-         [AndroidConfig.Permissions.withPermissions, androidPermissions],
-     ]);
- };
--export default withVisionCamera;
-+module.exports = withVisionCamera;
-diff --git a/cpp/Frame Processors/HybridWorkletQueueFactory.cpp b/cpp/Frame Processors/HybridWorkletQueueFactory.cpp
-new file mode 100644
-index 0000000..5da4ef9
---- /dev/null
-+++ b/cpp/Frame Processors/HybridWorkletQueueFactory.cpp
-@@ -0,0 +1,50 @@
-+///
-+/// HybridWorkletQueueFactory.cpp
-+/// VisionCamera
-+/// Copyright © 2025 Marc Rousavy @ Margelo
-+///
-+
-+#include "HybridWorkletQueueFactory.hpp"
-+
-+#include "JSIConverter+AsyncQueue.hpp"
-+#include "NativeThreadAsyncQueue.hpp"
-+#include "NativeThreadDispatcher.hpp"
-+#include <atomic>
-+#include <jsi/jsi.h>
-+
-+namespace margelo::nitro::camera {
-+
-+HybridWorkletQueueFactory::HybridWorkletQueueFactory() : HybridObject(TAG) {}
-+
-+void HybridWorkletQueueFactory::loadHybridMethods() {
-+  HybridWorkletQueueFactorySpec::loadHybridMethods();
-+  registerHybrids(this, [](Prototype& prototype) {
-+    prototype.registerRawHybridMethod("installDispatcher", 1, &HybridWorkletQueueFactory::installDispatcher);
-+  });
-+}
-+
-+std::shared_ptr<worklets::AsyncQueue> HybridWorkletQueueFactory::wrapThreadInQueue(const std::shared_ptr<HybridNativeThreadSpec>& thread) {
-+  return std::make_shared<NativeThreadAsyncQueue>(thread);
-+}
-+
-+double HybridWorkletQueueFactory::getCurrentThreadMarker() {
-+  static std::atomic_size_t threadCounter{1};
-+  static thread_local size_t thisThreadId{0};
-+  if (thisThreadId == 0) {
-+    thisThreadId = threadCounter.fetch_add(1);
-+  }
-+  return static_cast<double>(thisThreadId);
-+}
-+
-+jsi::Value HybridWorkletQueueFactory::installDispatcher(jsi::Runtime& runtime, const jsi::Value&, const jsi::Value* args, size_t count) {
-+  if (count != 1)
-+    throw std::runtime_error("installDispatcher(..) must be called with exactly 1 argument!");
-+  auto thread = JSIConverter<std::shared_ptr<HybridNativeThreadSpec>>::fromJSI(runtime, args[0]);
-+
-+  auto dispatcher = std::make_shared<NativeThreadDispatcher>(thread);
-+  Dispatcher::installRuntimeGlobalDispatcher(runtime, dispatcher);
-+
-+  return jsi::Value::undefined();
-+}
-+
-+} // namespace margelo::nitro::camera
-diff --git a/android/CMakeLists.txt b/android/CMakeLists.txt
-index 0000000..1111111 100644
---- a/android/CMakeLists.txt
-+++ b/android/CMakeLists.txt
-@@ -20,6 +20,7 @@
-         "src/main/cpp"
-         "../cpp"
-         "../cpp/Frame Processors"
-+        "../nitrogen/generated/shared/c++"
- )
-
- find_library(LOG_LIB log)
-diff --git a/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.cpp b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.cpp
-new file mode 100644
-index 0000000..5da4ef9
---- /dev/null
-+++ b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.cpp
-@@ -0,0 +1,50 @@
-+///
-+/// HybridWorkletQueueFactory.cpp
-+/// VisionCamera
-+/// Copyright © 2025 Marc Rousavy @ Margelo
-+///
-+
-+#include "HybridWorkletQueueFactory.hpp"
-+
-+#include "JSIConverter+AsyncQueue.hpp"
-+#include "NativeThreadAsyncQueue.hpp"
-+#include "NativeThreadDispatcher.hpp"
-+#include <atomic>
-+#include <jsi/jsi.h>
-+
-+namespace margelo::nitro::camera {
-+
-+HybridWorkletQueueFactory::HybridWorkletQueueFactory() : HybridObject(TAG) {}
-+
-+void HybridWorkletQueueFactory::loadHybridMethods() {
-+  HybridWorkletQueueFactorySpec::loadHybridMethods();
-+  registerHybrids(this, [](Prototype& prototype) {
-+    prototype.registerRawHybridMethod("installDispatcher", 1, &HybridWorkletQueueFactory::installDispatcher);
-+  });
-+}
-+
-+std::shared_ptr<worklets::AsyncQueue> HybridWorkletQueueFactory::wrapThreadInQueue(const std::shared_ptr<HybridNativeThreadSpec>& thread) {
-+  return std::make_shared<NativeThreadAsyncQueue>(thread);
-+}
-+
-+double HybridWorkletQueueFactory::getCurrentThreadMarker() {
-+  static std::atomic_size_t threadCounter{1};
-+  static thread_local size_t thisThreadId{0};
-+  if (thisThreadId == 0) {
-+    thisThreadId = threadCounter.fetch_add(1);
-+  }
-+  return static_cast<double>(thisThreadId);
-+}
-+
-+jsi::Value HybridWorkletQueueFactory::installDispatcher(jsi::Runtime& runtime, const jsi::Value&, const jsi::Value* args, size_t count) {
-+  if (count != 1)
-+    throw std::runtime_error("installDispatcher(..) must be called with exactly 1 argument!");
-+  auto thread = JSIConverter<std::shared_ptr<HybridNativeThreadSpec>>::fromJSI(runtime, args[0]);
-+
-+  auto dispatcher = std::make_shared<NativeThreadDispatcher>(thread);
-+  Dispatcher::installRuntimeGlobalDispatcher(runtime, dispatcher);
-+
-+  return jsi::Value::undefined();
-+}
-+
-+} // namespace margelo::nitro::camera
-diff --git a/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.hpp b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.hpp
-new file mode 100644
-index 0000000..daa16d2
---- /dev/null
-+++ b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.hpp
-@@ -0,0 +1,29 @@
-+///
-+/// HybridWorkletQueueFactory.hpp
-+/// VisionCamera
-+/// Copyright © 2025 Marc Rousavy @ Margelo
-+///
-+
-+#pragma once
-+
-+#include "HybridWorkletQueueFactorySpec.hpp"
-+#include "JSIConverter+AsyncQueue.hpp"
-+#include <atomic>
-+#include <jsi/jsi.h>
-+
-+namespace margelo::nitro::camera {
-+
-+class HybridWorkletQueueFactory : public HybridWorkletQueueFactorySpec {
-+public:
-+  HybridWorkletQueueFactory();
-+
-+public:
-+  std::shared_ptr<worklets::AsyncQueue> wrapThreadInQueue(const std::shared_ptr<HybridNativeThreadSpec>& thread) override;
-+  double getCurrentThreadMarker() override;
-+
-+  jsi::Value installDispatcher(jsi::Runtime& runtime, const jsi::Value&, const jsi::Value* args, size_t count);
-+
-+  void loadHybridMethods() override;
-+};
-+
-+} // namespace margelo::nitro::camera
-diff --git a/nitrogen/generated/shared/c++/JSIConverter+AsyncQueue.hpp b/nitrogen/generated/shared/c++/JSIConverter+AsyncQueue.hpp
-new file mode 100644
-index 0000000..5b93f2d
---- /dev/null
-+++ b/nitrogen/generated/shared/c++/JSIConverter+AsyncQueue.hpp
-@@ -0,0 +1,24 @@
-+///
-+/// JSIConverter+AsyncQueue.swift
-+/// VisionCamera
-+/// Copyright © 2025 Marc Rousavy @ Margelo
-+///
-+
-+#pragma once
-+
-+#include <NitroModules/JSIConverter.hpp>
-+#include <jsi/jsi.h>
-+#if __has_include(<worklets/RunLoop/AsyncQueue.h>)
-+#include <worklets/RunLoop/AsyncQueue.h>
-+#elif __has_include(<RNWorklets/worklets/RunLoop/AsyncQueue.h>)
-+#include <RNWorklets/worklets/RunLoop/AsyncQueue.h>
-+#else
-+#error react-native-worklets Prefab not found!
-+#endif
-+
-+namespace margelo::nitro {
-+
-+// JSIConverter<std::shared_ptr<worklets::AsyncQueue>> is implemented
-+// in JSIConverter<std::shared_ptr<jsi::NativeState>>
-+
-+}
-diff --git a/nitrogen/generated/shared/c++/NativeThreadAsyncQueue.hpp b/nitrogen/generated/shared/c++/NativeThreadAsyncQueue.hpp
-new file mode 100644
-index 0000000..d5a0958
---- /dev/null
-+++ b/nitrogen/generated/shared/c++/NativeThreadAsyncQueue.hpp
-@@ -0,0 +1,34 @@
-+///
-+/// NativeThreadAsyncQueue.hpp
-+/// VisionCamera
-+/// Copyright © 2025 Marc Rousavy @ Margelo
-+///
-+
-+#pragma once
-+
-+#include "HybridNativeThreadSpec.hpp"
-+#include "JSIConverter+AsyncQueue.hpp"
-+#include <jsi/jsi.h>
-+
-+namespace margelo::nitro::camera {
-+
-+/**
-+ * An implementation of `worklets::AsyncQueue` that uses a `NativeThread` to run its jobs.
-+ *
-+ * The `NativeThread` (`HybridNativeThreadSpec`) is a platform-implemented object,
-+ * e.g. using `DispatchQueue` on iOS.
-+ */
-+class NativeThreadAsyncQueue : public worklets::AsyncQueue {
-+public:
-+  NativeThreadAsyncQueue(std::shared_ptr<HybridNativeThreadSpec> thread) : _thread(std::move(thread)) {}
-+
-+  void push(std::function<void()>&& job) override {
-+    auto jobCopy = job;
-+    _thread->runOnThread(jobCopy);
-+  }
-+
-+private:
-+  std::shared_ptr<HybridNativeThreadSpec> _thread;
-+};
-+
-+} // namespace margelo::nitro::camera
-diff --git a/nitrogen/generated/shared/c++/NativeThreadDispatcher.hpp b/nitrogen/generated/shared/c++/NativeThreadDispatcher.hpp
-new file mode 100644
-index 0000000..758d2f2
---- /dev/null
-+++ b/nitrogen/generated/shared/c++/NativeThreadDispatcher.hpp
-@@ -0,0 +1,36 @@
-+///
-+/// NativeThreadDispatcher.hpp
-+/// VisionCamera
-+/// Copyright © 2025 Marc Rousavy @ Margelo
-+///
-+
-+#pragma once
-+
-+#include "HybridNativeThreadSpec.hpp"
-+#include "JSIConverter+AsyncQueue.hpp"
-+#include <jsi/jsi.h>
-+
-+namespace margelo::nitro::camera {
-+
-+/**
-+ * An implementation of `nitro::Dispatcher` that uses a `NativeThread` to run its jobs.
-+ *
-+ * The `NativeThread` (`HybridNativeThreadSpec`) is a platform-implemented object,
-+ * e.g. using `DispatchQueue` on iOS.
-+ */
-+class NativeThreadDispatcher : public nitro::Dispatcher {
-+public:
-+  NativeThreadDispatcher(std::shared_ptr<HybridNativeThreadSpec> thread) : _thread(std::move(thread)) {}
-+
-+  void runSync(std::function<void()>&&) override {
-+    throw std::runtime_error("runSync(...) is not implemented for NativeThreadDispatcher!");
-+  }
-+  void runAsync(std::function<void()>&& function) override {
-+    _thread->runOnThread(function);
-+  }
-+
-+private:
-+  std::shared_ptr<HybridNativeThreadSpec> _thread;
-+};
-+
-+} // namespace margelo::nitro::camera
-diff --git a/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/BoundingBox.kt b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/BoundingBox.kt
-new file mode 100644
-index 0000000..aaaaaaa
---- /dev/null
-+++ b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/BoundingBox.kt
-@@ -0,0 +1,47 @@
-+///
-+/// BoundingBox.kt
-+/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE.
-+/// https://github.com/mrousavy/nitro
-+/// Copyright © Marc Rousavy @ Margelo
-+///
-+
-+package com.margelo.nitro.camera
-+
-+import androidx.annotation.Keep
-+import com.facebook.proguard.annotations.DoNotStrip
-+
-+
-+/**
-+ * Represents the JavaScript object/struct "BoundingBox".
-+ */
-+@DoNotStrip
-+@Keep
-+data class BoundingBox(
-+  @DoNotStrip
-+  @Keep
-+  val x: Double,
-+  @DoNotStrip
-+  @Keep
-+  val y: Double,
-+  @DoNotStrip
-+  @Keep
-+  val width: Double,
-+  @DoNotStrip
-+  @Keep
-+  val height: Double
-+) {
-+  /* primary constructor */
-+
-+  companion object {
-+    /**
-+     * Constructor called from C++
-+     */
-+    @DoNotStrip
-+    @Keep
-+    @Suppress("unused")
-+    @JvmStatic
-+    private fun fromCpp(x: Double, y: Double, width: Double, height: Double): BoundingBox {
-+      return BoundingBox(x, y, width, height)
-+    }
-+  }
-+}
-diff --git a/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/HybridScannedObjectSpec.kt b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/HybridScannedObjectSpec.kt
-new file mode 100644
-index 0000000..bbbbbbb
---- /dev/null
-+++ b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/HybridScannedObjectSpec.kt
-@@ -0,0 +1,60 @@
-+///
-+/// HybridScannedObjectSpec.kt
-+/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE.
-+/// https://github.com/mrousavy/nitro
-+/// Copyright © Marc Rousavy @ Margelo
-+///
-+
-+package com.margelo.nitro.camera
-+
-+import androidx.annotation.Keep
-+import com.facebook.jni.HybridData
-+import com.facebook.proguard.annotations.DoNotStrip
-+import com.margelo.nitro.core.HybridObject
-+
-+/**
-+ * A Kotlin class representing the ScannedObject HybridObject.
-+ * Implement this abstract class to create Kotlin-based instances of ScannedObject.
-+ */
-+@DoNotStrip
-+@Keep
-+@Suppress(
-+  "KotlinJniMissingFunction", "unused",
-+  "RedundantSuppression", "RedundantUnitReturnType", "SimpleRedundantLet",
-+  "LocalVariableName", "PropertyName", "PrivatePropertyName", "FunctionName"
-+)
-+abstract class HybridScannedObjectSpec: HybridObject() {
-+  @DoNotStrip
-+  private var mHybridData: HybridData = initHybrid()
-+
-+  init {
-+    super.updateNative(mHybridData)
-+  }
-+
-+  override fun updateNative(hybridData: HybridData) {
-+    mHybridData = hybridData
-+    super.updateNative(hybridData)
-+  }
-+
-+  // Default implementation of `HybridObject.toString()`
-+  override fun toString(): String {
-+    return "[HybridObject ScannedObject]"
-+  }
-+
-+  // Properties
-+  @get:DoNotStrip
-+  @get:Keep
-+  abstract val type: ScannedObjectType
-+
-+  @get:DoNotStrip
-+  @get:Keep
-+  abstract val boundingBox: BoundingBox
-+
-+  // Methods
-+
-+  private external fun initHybrid(): HybridData
-+
-+  companion object {
-+    protected const val TAG = "HybridScannedObjectSpec"
-+  }
-+}
-diff --git a/nitrogen/generated/android/c++/JBoundingBox.hpp b/nitrogen/generated/android/c++/JBoundingBox.hpp
-new file mode 100644
-index 0000000..ccccccc
---- /dev/null
-+++ b/nitrogen/generated/android/c++/JBoundingBox.hpp
-@@ -0,0 +1,69 @@
-+///
-+/// JBoundingBox.hpp
-+/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE.
-+/// https://github.com/mrousavy/nitro
-+/// Copyright © Marc Rousavy @ Margelo
-+///
-+
-+#pragma once
-+
-+#include <fbjni/fbjni.h>
-+#include "BoundingBox.hpp"
-+
-+
-+
-+namespace margelo::nitro::camera {
-+
-+  using namespace facebook;
-+
-+  /**
-+   * The C++ JNI bridge between the C++ struct "BoundingBox" and the the Kotlin data class "BoundingBox".
-+   */
-+  struct JBoundingBox final: public jni::JavaClass<JBoundingBox> {
-+  public:
-+    static auto constexpr kJavaDescriptor = "Lcom/margelo/nitro/camera/BoundingBox;";
-+
-+  public:
-+    /**
-+     * Convert this Java/Kotlin-based struct to the C++ struct BoundingBox by copying all values to C++.
-+     */
-+    [[maybe_unused]]
-+    [[nodiscard]]
-+    BoundingBox toCpp() const {
-+      static const auto clazz = javaClassStatic();
-+      static const auto fieldX = clazz->getField<double>("x");
-+      double x = this->getFieldValue(fieldX);
-+      static const auto fieldY = clazz->getField<double>("y");
-+      double y = this->getFieldValue(fieldY);
-+      static const auto fieldWidth = clazz->getField<double>("width");
-+      double width = this->getFieldValue(fieldWidth);
-+      static const auto fieldHeight = clazz->getField<double>("height");
-+      double height = this->getFieldValue(fieldHeight);
-+      return BoundingBox(
-+        x,
-+        y,
-+        width,
-+        height
-+      );
-+    }
-+
-+  public:
-+    /**
-+     * Create a Java/Kotlin-based struct by copying all values from the given C++ struct to Java.
-+     */
-+    [[maybe_unused]]
-+    static jni::local_ref<JBoundingBox::javaobject> fromCpp(const BoundingBox& value) {
-+      using JSignature = JBoundingBox(double, double, double, double);
-+      static const auto clazz = javaClassStatic();
-+      static const auto create = clazz->getStaticMethod<JSignature>("fromCpp");
-+      return create(
-+        clazz,
-+        value.x,
-+        value.y,
-+        value.width,
-+        value.height
-+      );
-+    }
-+  };
-+
-+} // namespace margelo::nitro::camera
-diff --git a/nitrogen/generated/android/c++/JHybridScannedObjectSpec.hpp b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.hpp
-new file mode 100644
-index 0000000..ddddddd
---- /dev/null
-+++ b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.hpp
-@@ -0,0 +1,63 @@
-+///
-+/// JHybridScannedObjectSpec.hpp
-+/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE.
-+/// https://github.com/mrousavy/nitro
-+/// Copyright © Marc Rousavy @ Margelo
-+///
-+
-+#pragma once
-+
-+#include <NitroModules/JHybridObject.hpp>
-+#include <fbjni/fbjni.h>
-+#include "HybridScannedObjectSpec.hpp"
-+
-+
-+
-+
-+namespace margelo::nitro::camera {
-+
-+  using namespace facebook;
-+
-+  class JHybridScannedObjectSpec: public jni::HybridClass<JHybridScannedObjectSpec, JHybridObject>,
-+                                  public virtual HybridScannedObjectSpec {
-+  public:
-+    static auto constexpr kJavaDescriptor = "Lcom/margelo/nitro/camera/HybridScannedObjectSpec;";
-+    static jni::local_ref<jhybriddata> initHybrid(jni::alias_ref<jhybridobject> jThis);
-+    static void registerNatives();
-+
-+  protected:
-+    // C++ constructor (called from Java via `initHybrid()`)
-+    explicit JHybridScannedObjectSpec(jni::alias_ref<jhybridobject> jThis) :
-+      HybridObject(HybridScannedObjectSpec::TAG),
-+      HybridBase(jThis),
-+      _javaPart(jni::make_global(jThis)) {}
-+
-+  public:
-+    ~JHybridScannedObjectSpec() override {
-+      // Hermes GC can destroy JS objects on a non-JNI Thread.
-+      jni::ThreadScope::WithClassLoader([&] { _javaPart.reset(); });
-+    }
-+
-+  public:
-+    size_t getExternalMemorySize() noexcept override;
-+    bool equals(const std::shared_ptr<HybridObject>& other) override;
-+    void dispose() noexcept override;
-+    std::string toString() override;
-+
-+  public:
-+    inline const jni::global_ref<JHybridScannedObjectSpec::javaobject>& getJavaPart() const noexcept {
-+      return _javaPart;
-+    }
-+
-+  public:
-+    // Properties
-+    ScannedObjectType getType() override;
-+    BoundingBox getBoundingBox() override;
-+
-+  private:
-+    friend HybridBase;
-+    using HybridBase::HybridBase;
-+    jni::global_ref<JHybridScannedObjectSpec::javaobject> _javaPart;
-+  };
-+
-+} // namespace margelo::nitro::camera
-diff --git a/nitrogen/generated/android/VisionCamera+autolinking.cmake b/nitrogen/generated/android/VisionCamera+autolinking.cmake
-index 0000000..1111111 100644
---- a/nitrogen/generated/android/VisionCamera+autolinking.cmake
-+++ b/nitrogen/generated/android/VisionCamera+autolinking.cmake
-@@ -112,3 +112,4 @@
-   ../nitrogen/generated/android/c++/JHybridPreviewViewSpec.cpp
-   ../nitrogen/generated/android/c++/views/JHybridPreviewViewStateUpdater.cpp
-+  ../nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp
- )
-diff --git a/nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp
-new file mode 100644
-index 0000000..eeeeeee
---- /dev/null
-+++ b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp
-@@ -0,0 +1,69 @@
-+///
-+/// JHybridScannedObjectSpec.cpp
-+/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE.
-+/// https://github.com/mrousavy/nitro
-+/// Copyright © Marc Rousavy @ Margelo
-+///
-+
-+#include "JHybridScannedObjectSpec.hpp"
-+
-+// Forward declaration of `ScannedObjectType` to properly resolve imports.
-+namespace margelo::nitro::camera { enum class ScannedObjectType; }
-+// Forward declaration of `BoundingBox` to properly resolve imports.
-+namespace margelo::nitro::camera { struct BoundingBox; }
-+
-+#include "ScannedObjectType.hpp"
-+#include "JScannedObjectType.hpp"
-+#include "BoundingBox.hpp"
-+#include "JBoundingBox.hpp"
-+
-+namespace margelo::nitro::camera {
-+
-+  jni::local_ref<JHybridScannedObjectSpec::jhybriddata> JHybridScannedObjectSpec::initHybrid(jni::alias_ref<jhybridobject> jThis) {
-+    return makeCxxInstance(jThis);
-+  }
-+
-+  void JHybridScannedObjectSpec::registerNatives() {
-+    registerHybrid({
-+      makeNativeMethod("initHybrid", JHybridScannedObjectSpec::initHybrid),
-+    });
-+  }
-+
-+  size_t JHybridScannedObjectSpec::getExternalMemorySize() noexcept {
-+    static const auto method = javaClassStatic()->getMethod<jlong()>("getMemorySize");
-+    return method(_javaPart);
-+  }
-+
-+  bool JHybridScannedObjectSpec::equals(const std::shared_ptr<HybridObject>& other) {
-+    if (auto otherCast = std::dynamic_pointer_cast<JHybridScannedObjectSpec>(other)) {
-+      return _javaPart == otherCast->_javaPart;
-+    }
-+    return false;
-+  }
-+
-+  void JHybridScannedObjectSpec::dispose() noexcept {
-+    static const auto method = javaClassStatic()->getMethod<void()>("dispose");
-+    method(_javaPart);
-+  }
-+
-+  std::string JHybridScannedObjectSpec::toString() {
-+    static const auto method = javaClassStatic()->getMethod<jni::JString()>("toString");
-+    auto javaString = method(_javaPart);
-+    return javaString->toStdString();
-+  }
-+
-+  // Properties
-+  ScannedObjectType JHybridScannedObjectSpec::getType() {
-+    static const auto method = javaClassStatic()->getMethod<jni::local_ref<JScannedObjectType>()>("getType");
-+    auto __result = method(_javaPart);
-+    return __result->toCpp();
-+  }
-+  BoundingBox JHybridScannedObjectSpec::getBoundingBox() {
-+    static const auto method = javaClassStatic()->getMethod<jni::local_ref<JBoundingBox>()>("getBoundingBox");
-+    auto __result = method(_javaPart);
-+    return __result->toCpp();
-+  }
-+
-+  // Methods
-+
-+} // namespace margelo::nitro::camera
-diff --git a/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt b/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt
-index aaaaaaa..bbbbbbb 100644
---- a/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt
-+++ b/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt
-@@ -55,6 +55,6 @@
-           when (event) {
-             is VideoRecordEvent.Start -> {
--              promise.resolve()
-+              promise.resolve(Unit)
-               didResolve = true
-             }
-
-@@ -98,27 +98,48 @@
-   override fun stopRecording(): Promise<Unit> {
--    return Promise.parallel(executor) {
--      val recording = recording ?: throw Error("Not currently recording!")
--      recording.stop()
--      this.isPaused = false
--      this.recording = null
--      this.recordedDuration = 0.0
--      this.recordedFileSize = 0.0
--    }
-+    val promise = Promise<Unit>()
-+    executor.execute {
-+      try {
-+        val recording = recording ?: throw Error("Not currently recording!")
-+        recording.stop()
-+        this.isPaused = false
-+        this.recording = null
-+        this.recordedDuration = 0.0
-+        this.recordedFileSize = 0.0
-+        promise.resolve(Unit)
-+      } catch (e: Throwable) {
-+        promise.reject(e)
-+      }
-+    }
-+    return promise
-   }
-
-   override fun pauseRecording(): Promise<Unit> {
--    return Promise.parallel(executor) {
--      val recording = recording ?: throw Error("Not currently recording!")
--      recording.pause()
--      this.isPaused = true
--    }
-+    val promise = Promise<Unit>()
-+    executor.execute {
-+      try {
-+        val recording = recording ?: throw Error("Not currently recording!")
-+        recording.pause()
-+        this.isPaused = true
-+        promise.resolve(Unit)
-+      } catch (e: Throwable) {
-+        promise.reject(e)
-+      }
-+    }
-+    return promise
-   }
-
-   override fun resumeRecording(): Promise<Unit> {
--    return Promise.parallel(executor) {
--      val recording = recording ?: throw Error("Not currently recording!")
--      recording.resume()
--      this.isPaused = false
--    }
-+    val promise = Promise<Unit>()
-+    executor.execute {
-+      try {
-+        val recording = recording ?: throw Error("Not currently recording!")
-+        recording.resume()
-+        this.isPaused = false
-+        promise.resolve(Unit)
-+      } catch (e: Throwable) {
-+        promise.reject(e)
-+      }
-+    }
-+    return promise
-   }
- }
diff --git a/apps/computer-vision/app.json b/apps/computer-vision/app.json
index 5db8c1390..4fcbca2ce 100644
--- a/apps/computer-vision/app.json
+++ b/apps/computer-vision/app.json
@@ -31,6 +31,17 @@
     "web": {
       "favicon": "./assets/icons/favicon.png"
     },
-    "plugins": ["expo-font", "expo-router"]
+    "plugins": [
+      "expo-font",
+      "expo-router",
+      [
+        "expo-build-properties",
+        {
+          "android": {
+            "minSdkVersion": 26
+          }
+        }
+      ]
+    ]
   }
 }
diff --git a/apps/computer-vision/app/object_detection_live/index.tsx b/apps/computer-vision/app/object_detection_live/index.tsx
index 5f8bb2120..cd1e9cca8 100644
--- a/apps/computer-vision/app/object_detection_live/index.tsx
+++ b/apps/computer-vision/app/object_detection_live/index.tsx
@@ -54,7 +54,7 @@ export default function ObjectDetectionLiveScreen() {
   const format = useMemo(() => {
     if (device == null) return undefined;
     try {
-      return getCameraFormat(device, Templates.Video);
+      return getCameraFormat(device, Templates.FrameProcessing);
     } catch {
       return undefined;
     }
@@ -72,6 +72,7 @@ export default function ObjectDetectionLiveScreen() {
 
   const frameOutput = useFrameOutput({
     pixelFormat: 'rgb',
+    dropFramesWhileBusy: true,
     onFrame(frame) {
       'worklet';
       if (!model.runOnFrame) {
diff --git a/apps/computer-vision/package.json b/apps/computer-vision/package.json
index ee879c6af..328e9bc4f 100644
--- a/apps/computer-vision/package.json
+++ b/apps/computer-vision/package.json
@@ -38,7 +38,7 @@
     "react-native-screens": "~4.16.0",
     "react-native-svg": "15.12.1",
     "react-native-svg-transformer": "^1.5.0",
-    "react-native-vision-camera": "patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch",
+    "react-native-vision-camera": "5.0.0-beta.2",
     "react-native-worklets": "^0.7.2"
   },
   "devDependencies": {
diff --git a/yarn.lock b/yarn.lock
index d4316e786..ac9b276b6 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -6592,7 +6592,7 @@ __metadata:
     react-native-screens: "npm:~4.16.0"
     react-native-svg: "npm:15.12.1"
     react-native-svg-transformer: "npm:^1.5.0"
-    react-native-vision-camera: "patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch"
+    react-native-vision-camera: "npm:5.0.0-beta.2"
     react-native-worklets: "npm:^0.7.2"
   languageName: unknown
   linkType: soft
@@ -13498,29 +13498,16 @@ __metadata:
   languageName: node
   linkType: hard
 
-"react-native-vision-camera@npm:5.0.0-beta.1":
-  version: 5.0.0-beta.1
-  resolution: "react-native-vision-camera@npm:5.0.0-beta.1"
+"react-native-vision-camera@npm:5.0.0-beta.2":
+  version: 5.0.0-beta.2
+  resolution: "react-native-vision-camera@npm:5.0.0-beta.2"
   peerDependencies:
     react: "*"
     react-native: "*"
     react-native-nitro-image: "*"
     react-native-nitro-modules: "*"
     react-native-worklets: "*"
-  checksum: 10/873410a33e33d68b162b6524997480133ef9b6469dce3f87253c371bba1643d326e835891b0c9f75018d376faf4aec23daba5ab729f431c718ecf901601a8d12
-  languageName: node
-  linkType: hard
-
-"react-native-vision-camera@patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch":
-  version: 5.0.0-beta.1
-  resolution: "react-native-vision-camera@patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch::version=5.0.0-beta.1&hash=b52326"
-  peerDependencies:
-    react: "*"
-    react-native: "*"
-    react-native-nitro-image: "*"
-    react-native-nitro-modules: "*"
-    react-native-worklets: "*"
-  checksum: 10/4ddf9325752243c92c5104b2fe8520d91072d4c359c52708872909b2bb85d136db59215bac1c6f902f04eee683a9d3d8ff11f7729e0468b00dee5aa3bb8f1944
+  checksum: 10/1f38d097d001c10b8544d0b931a9387a91c5df1e0677ae53e639962a90589586af02ca658ca5e99a5ca179af8d86bc8365227cf70750f2df4bfb775f4a26fc6d
   languageName: node
   linkType: hard
 

From 3aa0f899fe663718c31689214b409b86eacffb93 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Wed, 25 Feb 2026 14:19:49 +0100
Subject: [PATCH 21/37] chore: remove unused ImageSegmentation.cpp

---
 .../image_segmentation/ImageSegmentation.cpp  | 170 ------------------
 1 file changed, 170 deletions(-)
 delete mode 100644 packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp
deleted file mode 100644
index a2c1ae865..000000000
--- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp
+++ /dev/null
@@ -1,170 +0,0 @@
-#include "ImageSegmentation.h"
-
-#include <future>
-
-#include <executorch/extension/tensor/tensor.h>
-#include <rnexecutorch/Error.h>
-#include <rnexecutorch/data_processing/ImageProcessing.h>
-#include <rnexecutorch/data_processing/Numerical.h>
-#include <rnexecutorch/host_objects/JsiConversions.h>
-#include <rnexecutorch/models/image_segmentation/Constants.h>
-
-namespace rnexecutorch::models::image_segmentation {
-
-ImageSegmentation::ImageSegmentation(
-    const std::string &modelSource,
-    std::shared_ptr<react::CallInvoker> callInvoker)
-    : BaseModel(modelSource, callInvoker) {
-  auto inputShapes = getAllInputShapes();
-  if (inputShapes.size() == 0) {
-    throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
-                            "Model seems to not take any input tensors.");
-  }
-  std::vector<int32_t> modelInputShape = inputShapes[0];
-  if (modelInputShape.size() < 2) {
-    char errorMessage[100];
-    std::snprintf(errorMessage, sizeof(errorMessage),
-                  "Unexpected model input size, expected at least 2 dimentions "
-                  "but got: %zu.",
-                  modelInputShape.size());
-    throw RnExecutorchError(RnExecutorchErrorCode::WrongDimensions,
-                            errorMessage);
-  }
-  modelImageSize = cv::Size(modelInputShape[modelInputShape.size() - 1],
-                            modelInputShape[modelInputShape.size() - 2]);
-  numModelPixels = modelImageSize.area();
-}
-
-std::shared_ptr<jsi::Object> ImageSegmentation::generate(
-    std::string imageSource,
-    std::set<std::string, std::less<>> classesOfInterest, bool resize) {
-  auto [inputTensor, originalSize] =
-      image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]);
-
-  auto forwardResult = BaseModel::forward(inputTensor);
-  if (!forwardResult.ok()) {
-    throw RnExecutorchError(forwardResult.error(),
-                            "The model's forward function did not succeed. "
-                            "Ensure the model input is correct.");
-  }
-
-  return postprocess(forwardResult->at(0).toTensor(), originalSize,
-                     classesOfInterest, resize);
-}
-
-std::shared_ptr<jsi::Object> ImageSegmentation::postprocess(
-    const Tensor &tensor, cv::Size originalSize,
-    std::set<std::string, std::less<>> classesOfInterest, bool resize) {
-
-  auto dataPtr = static_cast<const float *>(tensor.const_data_ptr());
-  auto resultData = std::span(dataPtr, tensor.numel());
-
-  // We copy the ET-owned data to jsi array buffers that can be directly
-  // returned to JS
-  std::vector<std::shared_ptr<OwningArrayBuffer>> resultClasses;
-  resultClasses.reserve(numClasses);
-  for (std::size_t cl = 0; cl < numClasses; ++cl) {
-    auto classBuffer = std::make_shared<OwningArrayBuffer>(
-        &resultData[cl * numModelPixels], numModelPixels * sizeof(float));
-    resultClasses.push_back(classBuffer);
-  }
-
-  // Apply softmax per each pixel across all classes
-  for (std::size_t pixel = 0; pixel < numModelPixels; ++pixel) {
-    std::vector<float> classValues(numClasses);
-    for (std::size_t cl = 0; cl < numClasses; ++cl) {
-      classValues[cl] =
-          reinterpret_cast<float *>(resultClasses[cl]->data())[pixel];
-    }
-    numerical::softmax(classValues);
-    for (std::size_t cl = 0; cl < numClasses; ++cl) {
-      reinterpret_cast<float *>(resultClasses[cl]->data())[pixel] =
-          classValues[cl];
-    }
-  }
-
-  // Calculate the maximum class for each pixel
-  auto argmax =
-      std::make_shared<OwningArrayBuffer>(numModelPixels * sizeof(int32_t));
-  for (std::size_t pixel = 0; pixel < numModelPixels; ++pixel) {
-    float max = reinterpret_cast<float *>(resultClasses[0]->data())[pixel];
-    int maxInd = 0;
-    for (int cl = 1; cl < numClasses; ++cl) {
-      if (reinterpret_cast<float *>(resultClasses[cl]->data())[pixel] > max) {
-        maxInd = cl;
-        max = reinterpret_cast<float *>(resultClasses[cl]->data())[pixel];
-      }
-    }
-    reinterpret_cast<int32_t *>(argmax->data())[pixel] = maxInd;
-  }
-
-  auto buffersToReturn = std::make_shared<std::unordered_map<
-      std::string_view, std::shared_ptr<OwningArrayBuffer>>>();
-  for (std::size_t cl = 0; cl < numClasses; ++cl) {
-    if (classesOfInterest.contains(constants::kDeeplabV3Resnet50Labels[cl])) {
-      (*buffersToReturn)[constants::kDeeplabV3Resnet50Labels[cl]] =
-          resultClasses[cl];
-    }
-  }
-
-  // Resize selected classes and argmax
-  if (resize) {
-    cv::Mat argmaxMat(modelImageSize, CV_32SC1, argmax->data());
-    cv::resize(argmaxMat, argmaxMat, originalSize, 0, 0,
-               cv::InterpolationFlags::INTER_NEAREST);
-    argmax = std::make_shared<OwningArrayBuffer>(
-        argmaxMat.data, originalSize.area() * sizeof(int32_t));
-
-    for (auto &[label, arrayBuffer] : *buffersToReturn) {
-      cv::Mat classMat(modelImageSize, CV_32FC1, arrayBuffer->data());
-      cv::resize(classMat, classMat, originalSize);
-      arrayBuffer = std::make_shared<OwningArrayBuffer>(
-          classMat.data, originalSize.area() * sizeof(float));
-    }
-  }
-  return populateDictionary(argmax, buffersToReturn);
-}
-
-std::shared_ptr<jsi::Object> ImageSegmentation::populateDictionary(
-    std::shared_ptr<OwningArrayBuffer> argmax,
-    std::shared_ptr<std::unordered_map<std::string_view,
-                                       std::shared_ptr<OwningArrayBuffer>>>
-        classesToOutput) {
-  // Synchronize the invoked thread to return when the dict is constructed
-  auto promisePtr = std::make_shared<std::promise<void>>();
-  std::future<void> doneFuture = promisePtr->get_future();
-
-  std::shared_ptr<jsi::Object> dictPtr = nullptr;
-  callInvoker->invokeAsync(
-      [argmax, classesToOutput, &dictPtr, promisePtr](jsi::Runtime &runtime) {
-        dictPtr = std::make_shared<jsi::Object>(runtime);
-        auto argmaxArrayBuffer = jsi::ArrayBuffer(runtime, argmax);
-
-        auto int32ArrayCtor =
-            runtime.global().getPropertyAsFunction(runtime, "Int32Array");
-        auto int32Array =
-            int32ArrayCtor.callAsConstructor(runtime, argmaxArrayBuffer)
-                .getObject(runtime);
-        dictPtr->setProperty(runtime, "ARGMAX", int32Array);
-
-        for (auto &[classLabel, owningBuffer] : *classesToOutput) {
-          auto classArrayBuffer = jsi::ArrayBuffer(runtime, owningBuffer);
-
-          auto float32ArrayCtor =
-              runtime.global().getPropertyAsFunction(runtime, "Float32Array");
-          auto float32Array =
-              float32ArrayCtor.callAsConstructor(runtime, classArrayBuffer)
-                  .getObject(runtime);
-
-          dictPtr->setProperty(
-              runtime, jsi::String::createFromAscii(runtime, classLabel.data()),
-              float32Array);
-        }
-        promisePtr->set_value();
-      });
-
-  doneFuture.wait();
-  return dictPtr;
-}
-
-} // namespace rnexecutorch::models::image_segmentation

From fd5aca74ea23a6699e3c9e9ca8eff543bc5f3e4d Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Wed, 25 Feb 2026 14:38:39 +0100
Subject: [PATCH 22/37] docs: add correct api references

---
 .../classes/ClassificationModule.md           |  90 +++++++-
 .../classes/ExecutorchModule.md               |  90 +++++++-
 .../classes/ImageEmbeddingsModule.md          |  90 +++++++-
 .../classes/ImageSegmentationModule.md        |  92 +++++++-
 .../classes/ObjectDetectionModule.md          | 204 +++++++++++++++---
 .../classes/StyleTransferModule.md            |  90 +++++++-
 .../classes/TextEmbeddingsModule.md           |  90 +++++++-
 .../classes/TextToImageModule.md              |  90 +++++++-
 .../06-api-reference/classes/VADModule.md     |  90 +++++++-
 .../enumerations/RnExecutorchErrorCode.md     |  70 +++---
 docs/docs/06-api-reference/index.md           |   2 +
 .../docs/06-api-reference/interfaces/Frame.md |  36 ++++
 .../interfaces/ObjectDetectionType.md         |  80 ++++++-
 .../06-api-reference/interfaces/PixelData.md  |  65 ++++++
 .../docs/06-api-reference/typedoc-sidebar.cjs |   2 +-
 15 files changed, 1070 insertions(+), 111 deletions(-)
 create mode 100644 docs/docs/06-api-reference/interfaces/Frame.md
 create mode 100644 docs/docs/06-api-reference/interfaces/PixelData.md

diff --git a/docs/docs/06-api-reference/classes/ClassificationModule.md b/docs/docs/06-api-reference/classes/ClassificationModule.md
index f39a1ae9e..066dd9a45 100644
--- a/docs/docs/06-api-reference/classes/ClassificationModule.md
+++ b/docs/docs/06-api-reference/classes/ClassificationModule.md
@@ -24,13 +24,87 @@ Module for image classification tasks.
 
 ## Properties
 
+### generateFromFrame()
+
+> **generateFromFrame**: (`frameData`, ...`args`) => `any`
+
+Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56)
+
+Process a camera frame directly for real-time inference.
+
+This method is bound to a native JSI function after calling `load()`,
+making it worklet-compatible and safe to call from VisionCamera's
+frame processor thread.
+
+**Performance characteristics:**
+
+- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5,
+  frame data is accessed directly without copying (fastest, recommended).
+- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied
+  from native to JS, then accessed from native code (slower, fallback).
+
+**Usage with VisionCamera:**
+
+```typescript
+const frameOutput = useFrameOutput({
+  pixelFormat: 'rgb',
+  onFrame(frame) {
+    'worklet';
+    // Zero-copy approach (recommended)
+    const nativeBuffer = frame.getNativeBuffer();
+    const result = model.generateFromFrame(
+      {
+        nativeBuffer: nativeBuffer.pointer,
+        width: frame.width,
+        height: frame.height,
+      },
+      ...args
+    );
+    nativeBuffer.release();
+    frame.dispose();
+  },
+});
+```
+
+#### Parameters
+
+##### frameData
+
+[`Frame`](../interfaces/Frame.md)
+
+Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer)
+
+##### args
+
+...`any`[]
+
+Additional model-specific arguments (e.g., threshold, options)
+
+#### Returns
+
+`any`
+
+Model-specific output (e.g., detections, classifications, embeddings)
+
+#### See
+
+[Frame](../interfaces/Frame.md) for frame data format details
+
+#### Inherited from
+
+`BaseModule.generateFromFrame`
+
+---
+
 ### nativeModule
 
 > **nativeModule**: `any` = `null`
 
-Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8)
+Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17)
+
+**`Internal`**
 
-Native module instance
+Native module instance (JSI Host Object)
 
 #### Inherited from
 
@@ -42,9 +116,11 @@ Native module instance
 
 > **delete**(): `void`
 
-Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41)
+Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100)
 
-Unloads the model from memory.
+Unloads the model from memory and releases native resources.
+
+Always call this method when you're done with a model to prevent memory leaks.
 
 #### Returns
 
@@ -84,7 +160,9 @@ The classification result.
 
 > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\>
 
-Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23)
+Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80)
+
+**`Internal`**
 
 Runs the model's forward method with the given input tensors.
 It returns the output tensors that mimic the structure of output from ExecuTorch.
@@ -113,7 +191,7 @@ Array of output tensors.
 
 > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\>
 
-Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34)
+Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91)
 
 Gets the input shape for a given method and index.
 
diff --git a/docs/docs/06-api-reference/classes/ExecutorchModule.md b/docs/docs/06-api-reference/classes/ExecutorchModule.md
index 992deeaee..7935e39cf 100644
--- a/docs/docs/06-api-reference/classes/ExecutorchModule.md
+++ b/docs/docs/06-api-reference/classes/ExecutorchModule.md
@@ -24,13 +24,87 @@ General module for executing custom Executorch models.
 
 ## Properties
 
+### generateFromFrame()
+
+> **generateFromFrame**: (`frameData`, ...`args`) => `any`
+
+Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56)
+
+Process a camera frame directly for real-time inference.
+
+This method is bound to a native JSI function after calling `load()`,
+making it worklet-compatible and safe to call from VisionCamera's
+frame processor thread.
+
+**Performance characteristics:**
+
+- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5,
+  frame data is accessed directly without copying (fastest, recommended).
+- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied
+  from native to JS, then accessed from native code (slower, fallback).
+
+**Usage with VisionCamera:**
+
+```typescript
+const frameOutput = useFrameOutput({
+  pixelFormat: 'rgb',
+  onFrame(frame) {
+    'worklet';
+    // Zero-copy approach (recommended)
+    const nativeBuffer = frame.getNativeBuffer();
+    const result = model.generateFromFrame(
+      {
+        nativeBuffer: nativeBuffer.pointer,
+        width: frame.width,
+        height: frame.height,
+      },
+      ...args
+    );
+    nativeBuffer.release();
+    frame.dispose();
+  },
+});
+```
+
+#### Parameters
+
+##### frameData
+
+[`Frame`](../interfaces/Frame.md)
+
+Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer)
+
+##### args
+
+...`any`[]
+
+Additional model-specific arguments (e.g., threshold, options)
+
+#### Returns
+
+`any`
+
+Model-specific output (e.g., detections, classifications, embeddings)
+
+#### See
+
+[Frame](../interfaces/Frame.md) for frame data format details
+
+#### Inherited from
+
+`BaseModule.generateFromFrame`
+
+---
+
 ### nativeModule
 
 > **nativeModule**: `any` = `null`
 
-Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8)
+Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17)
+
+**`Internal`**
 
-Native module instance
+Native module instance (JSI Host Object)
 
 #### Inherited from
 
@@ -42,9 +116,11 @@ Native module instance
 
 > **delete**(): `void`
 
-Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41)
+Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100)
 
-Unloads the model from memory.
+Unloads the model from memory and releases native resources.
+
+Always call this method when you're done with a model to prevent memory leaks.
 
 #### Returns
 
@@ -85,7 +161,9 @@ An array of output tensor pointers.
 
 > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\>
 
-Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23)
+Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80)
+
+**`Internal`**
 
 Runs the model's forward method with the given input tensors.
 It returns the output tensors that mimic the structure of output from ExecuTorch.
@@ -114,7 +192,7 @@ Array of output tensors.
 
 > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\>
 
-Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34)
+Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91)
 
 Gets the input shape for a given method and index.
 
diff --git a/docs/docs/06-api-reference/classes/ImageEmbeddingsModule.md b/docs/docs/06-api-reference/classes/ImageEmbeddingsModule.md
index 68595c61c..ba6016f47 100644
--- a/docs/docs/06-api-reference/classes/ImageEmbeddingsModule.md
+++ b/docs/docs/06-api-reference/classes/ImageEmbeddingsModule.md
@@ -24,13 +24,87 @@ Module for generating image embeddings from input images.
 
 ## Properties
 
+### generateFromFrame()
+
+> **generateFromFrame**: (`frameData`, ...`args`) => `any`
+
+Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56)
+
+Process a camera frame directly for real-time inference.
+
+This method is bound to a native JSI function after calling `load()`,
+making it worklet-compatible and safe to call from VisionCamera's
+frame processor thread.
+
+**Performance characteristics:**
+
+- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5,
+  frame data is accessed directly without copying (fastest, recommended).
+- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied
+  from native to JS, then accessed from native code (slower, fallback).
+
+**Usage with VisionCamera:**
+
+```typescript
+const frameOutput = useFrameOutput({
+  pixelFormat: 'rgb',
+  onFrame(frame) {
+    'worklet';
+    // Zero-copy approach (recommended)
+    const nativeBuffer = frame.getNativeBuffer();
+    const result = model.generateFromFrame(
+      {
+        nativeBuffer: nativeBuffer.pointer,
+        width: frame.width,
+        height: frame.height,
+      },
+      ...args
+    );
+    nativeBuffer.release();
+    frame.dispose();
+  },
+});
+```
+
+#### Parameters
+
+##### frameData
+
+[`Frame`](../interfaces/Frame.md)
+
+Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer)
+
+##### args
+
+...`any`[]
+
+Additional model-specific arguments (e.g., threshold, options)
+
+#### Returns
+
+`any`
+
+Model-specific output (e.g., detections, classifications, embeddings)
+
+#### See
+
+[Frame](../interfaces/Frame.md) for frame data format details
+
+#### Inherited from
+
+`BaseModule.generateFromFrame`
+
+---
+
 ### nativeModule
 
 > **nativeModule**: `any` = `null`
 
-Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8)
+Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17)
+
+**`Internal`**
 
-Native module instance
+Native module instance (JSI Host Object)
 
 #### Inherited from
 
@@ -42,9 +116,11 @@ Native module instance
 
 > **delete**(): `void`
 
-Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41)
+Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100)
 
-Unloads the model from memory.
+Unloads the model from memory and releases native resources.
+
+Always call this method when you're done with a model to prevent memory leaks.
 
 #### Returns
 
@@ -84,7 +160,9 @@ A Float32Array containing the image embeddings.
 
 > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\>
 
-Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23)
+Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80)
+
+**`Internal`**
 
 Runs the model's forward method with the given input tensors.
 It returns the output tensors that mimic the structure of output from ExecuTorch.
@@ -113,7 +191,7 @@ Array of output tensors.
 
 > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\>
 
-Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34)
+Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91)
 
 Gets the input shape for a given method and index.
 
diff --git a/docs/docs/06-api-reference/classes/ImageSegmentationModule.md b/docs/docs/06-api-reference/classes/ImageSegmentationModule.md
index b395640ac..6b4128906 100644
--- a/docs/docs/06-api-reference/classes/ImageSegmentationModule.md
+++ b/docs/docs/06-api-reference/classes/ImageSegmentationModule.md
@@ -21,13 +21,87 @@ or a custom [LabelEnum](../type-aliases/LabelEnum.md) label map.
 
 ## Properties
 
+### generateFromFrame()
+
+> **generateFromFrame**: (`frameData`, ...`args`) => `any`
+
+Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56)
+
+Process a camera frame directly for real-time inference.
+
+This method is bound to a native JSI function after calling `load()`,
+making it worklet-compatible and safe to call from VisionCamera's
+frame processor thread.
+
+**Performance characteristics:**
+
+- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5,
+  frame data is accessed directly without copying (fastest, recommended).
+- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied
+  from native to JS, then accessed from native code (slower, fallback).
+
+**Usage with VisionCamera:**
+
+```typescript
+const frameOutput = useFrameOutput({
+  pixelFormat: 'rgb',
+  onFrame(frame) {
+    'worklet';
+    // Zero-copy approach (recommended)
+    const nativeBuffer = frame.getNativeBuffer();
+    const result = model.generateFromFrame(
+      {
+        nativeBuffer: nativeBuffer.pointer,
+        width: frame.width,
+        height: frame.height,
+      },
+      ...args
+    );
+    nativeBuffer.release();
+    frame.dispose();
+  },
+});
+```
+
+#### Parameters
+
+##### frameData
+
+[`Frame`](../interfaces/Frame.md)
+
+Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer)
+
+##### args
+
+...`any`[]
+
+Additional model-specific arguments (e.g., threshold, options)
+
+#### Returns
+
+`any`
+
+Model-specific output (e.g., detections, classifications, embeddings)
+
+#### See
+
+[Frame](../interfaces/Frame.md) for frame data format details
+
+#### Inherited from
+
+`BaseModule.generateFromFrame`
+
+---
+
 ### nativeModule
 
 > **nativeModule**: `any` = `null`
 
-Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8)
+Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17)
+
+**`Internal`**
 
-Native module instance
+Native module instance (JSI Host Object)
 
 #### Inherited from
 
@@ -39,9 +113,11 @@ Native module instance
 
 > **delete**(): `void`
 
-Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41)
+Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100)
 
-Unloads the model from memory.
+Unloads the model from memory and releases native resources.
+
+Always call this method when you're done with a model to prevent memory leaks.
 
 #### Returns
 
@@ -103,7 +179,9 @@ If the model is not loaded.
 
 > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\>
 
-Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23)
+Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80)
+
+**`Internal`**
 
 Runs the model's forward method with the given input tensors.
 It returns the output tensors that mimic the structure of output from ExecuTorch.
@@ -132,7 +210,7 @@ Array of output tensors.
 
 > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\>
 
-Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34)
+Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91)
 
 Gets the input shape for a given method and index.
 
@@ -168,6 +246,8 @@ The input shape as an array of numbers.
 
 Defined in: [modules/computer_vision/ImageSegmentationModule.ts:76](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L76)
 
+Load the model and prepare it for inference.
+
 #### Returns
 
 `Promise`\<`void`\>
diff --git a/docs/docs/06-api-reference/classes/ObjectDetectionModule.md b/docs/docs/06-api-reference/classes/ObjectDetectionModule.md
index 38fd14f56..f0c61d6a6 100644
--- a/docs/docs/06-api-reference/classes/ObjectDetectionModule.md
+++ b/docs/docs/06-api-reference/classes/ObjectDetectionModule.md
@@ -6,7 +6,7 @@ Module for object detection tasks.
 
 ## Extends
 
-- `BaseModule`
+- `VisionModule`\<[`Detection`](../interfaces/Detection.md)[]\>
 
 ## Constructors
 
@@ -20,21 +20,141 @@ Module for object detection tasks.
 
 #### Inherited from
 
-`BaseModule.constructor`
+`VisionModule<Detection[]>.constructor`
 
 ## Properties
 
+### generateFromFrame()
+
+> **generateFromFrame**: (`frameData`, ...`args`) => `any`
+
+Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56)
+
+Process a camera frame directly for real-time inference.
+
+This method is bound to a native JSI function after calling `load()`,
+making it worklet-compatible and safe to call from VisionCamera's
+frame processor thread.
+
+**Performance characteristics:**
+
+- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5,
+  frame data is accessed directly without copying (fastest, recommended).
+- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied
+  from native to JS, then accessed from native code (slower, fallback).
+
+**Usage with VisionCamera:**
+
+```typescript
+const frameOutput = useFrameOutput({
+  pixelFormat: 'rgb',
+  onFrame(frame) {
+    'worklet';
+    // Zero-copy approach (recommended)
+    const nativeBuffer = frame.getNativeBuffer();
+    const result = model.generateFromFrame(
+      {
+        nativeBuffer: nativeBuffer.pointer,
+        width: frame.width,
+        height: frame.height,
+      },
+      ...args
+    );
+    nativeBuffer.release();
+    frame.dispose();
+  },
+});
+```
+
+#### Parameters
+
+##### frameData
+
+[`Frame`](../interfaces/Frame.md)
+
+Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer)
+
+##### args
+
+...`any`[]
+
+Additional model-specific arguments (e.g., threshold, options)
+
+#### Returns
+
+`any`
+
+Model-specific output (e.g., detections, classifications, embeddings)
+
+#### See
+
+[Frame](../interfaces/Frame.md) for frame data format details
+
+#### Inherited from
+
+`VisionModule.generateFromFrame`
+
+---
+
 ### nativeModule
 
 > **nativeModule**: `any` = `null`
 
-Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8)
+Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17)
+
+**`Internal`**
+
+Native module instance (JSI Host Object)
 
-Native module instance
+#### Inherited from
+
+`VisionModule.nativeModule`
+
+## Accessors
+
+### runOnFrame
+
+#### Get Signature
+
+> **get** **runOnFrame**(): (`frame`, ...`args`) => `TOutput` \| `null`
+
+Defined in: [modules/computer_vision/VisionModule.ts:61](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts#L61)
+
+Synchronous worklet function for real-time VisionCamera frame processing.
+
+Only available after the model is loaded. Returns null if not loaded.
+
+**Use this for VisionCamera frame processing in worklets.**
+For async processing, use `forward()` instead.
+
+##### Example
+
+```typescript
+const model = new ClassificationModule();
+await model.load({ modelSource: MODEL });
+
+// Use the functional form of setState to store the worklet — passing it
+// directly would cause React to invoke it immediately as an updater fn.
+const [runOnFrame, setRunOnFrame] = useState(null);
+setRunOnFrame(() => model.runOnFrame);
+
+const frameOutput = useFrameOutput({
+  onFrame(frame) {
+    'worklet';
+    if (!runOnFrame) return;
+    const result = runOnFrame(frame);
+    frame.dispose();
+  },
+});
+```
+
+##### Returns
+
+(`frame`, ...`args`) => `TOutput` \| `null`
 
 #### Inherited from
 
-`BaseModule.nativeModule`
+`VisionModule.runOnFrame`
 
 ## Methods
 
@@ -42,9 +162,11 @@ Native module instance
 
 > **delete**(): `void`
 
-Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41)
+Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100)
+
+Unloads the model from memory and releases native resources.
 
-Unloads the model from memory.
+Always call this method when you're done with a model to prevent memory leaks.
 
 #### Returns
 
@@ -52,38 +174,70 @@ Unloads the model from memory.
 
 #### Inherited from
 
-`BaseModule.delete`
+`VisionModule.delete`
 
 ---
 
 ### forward()
 
-> **forward**(`imageSource`, `detectionThreshold`): `Promise`\<[`Detection`](../interfaces/Detection.md)[]\>
+> **forward**(`input`, `detectionThreshold`): `Promise`\<[`Detection`](../interfaces/Detection.md)[]\>
+
+Defined in: [modules/computer_vision/ObjectDetectionModule.ts:46](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts#L46)
+
+Executes the model's forward pass with automatic input type detection.
 
-Defined in: [modules/computer_vision/ObjectDetectionModule.ts:54](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts#L54)
+Supports two input types:
 
-Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string.
-`detectionThreshold` can be supplied to alter the sensitivity of the detection.
+1. **String path/URI**: File path, URL, or Base64-encoded string
+2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
+
+**Note**: For VisionCamera frame processing, use `runOnFrame` instead.
+This method is async and cannot be called in worklet context.
 
 #### Parameters
 
-##### imageSource
+##### input
 
-`string`
+Image source (string path or PixelData object)
 
-The image source to be processed.
+`string` | [`PixelData`](../interfaces/PixelData.md)
 
 ##### detectionThreshold
 
-`number` = `0.7`
-
-The threshold for detection sensitivity. Default is 0.7.
+`number` = `0.5`
 
 #### Returns
 
 `Promise`\<[`Detection`](../interfaces/Detection.md)[]\>
 
-An array of Detection objects representing detected items in the image.
+A Promise that resolves to the model output.
+
+#### Example
+
+```typescript
+// String path (async)
+const result1 = await model.forward('file:///path/to/image.jpg');
+
+// Pixel data (async)
+const result2 = await model.forward({
+  dataPtr: new Uint8Array(pixelBuffer),
+  sizes: [480, 640, 3],
+  scalarType: ScalarType.BYTE,
+});
+
+// For VisionCamera frames, use runOnFrame in worklet:
+const frameOutput = useFrameOutput({
+  onFrame(frame) {
+    'worklet';
+    if (!model.runOnFrame) return;
+    const result = model.runOnFrame(frame);
+  },
+});
+```
+
+#### Overrides
+
+`VisionModule.forward`
 
 ---
 
@@ -91,7 +245,9 @@ An array of Detection objects representing detected items in the image.
 
 > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\>
 
-Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23)
+Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80)
+
+**`Internal`**
 
 Runs the model's forward method with the given input tensors.
 It returns the output tensors that mimic the structure of output from ExecuTorch.
@@ -112,7 +268,7 @@ Array of output tensors.
 
 #### Inherited from
 
-`BaseModule.forwardET`
+`VisionModule.forwardET`
 
 ---
 
@@ -120,7 +276,7 @@ Array of output tensors.
 
 > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\>
 
-Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34)
+Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91)
 
 Gets the input shape for a given method and index.
 
@@ -146,7 +302,7 @@ The input shape as an array of numbers.
 
 #### Inherited from
 
-`BaseModule.getInputShape`
+`VisionModule.getInputShape`
 
 ---
 
@@ -181,4 +337,4 @@ Optional callback to monitor download progress.
 
 #### Overrides
 
-`BaseModule.load`
+`VisionModule.load`
diff --git a/docs/docs/06-api-reference/classes/StyleTransferModule.md b/docs/docs/06-api-reference/classes/StyleTransferModule.md
index 1efc27c02..c6923ddf6 100644
--- a/docs/docs/06-api-reference/classes/StyleTransferModule.md
+++ b/docs/docs/06-api-reference/classes/StyleTransferModule.md
@@ -24,13 +24,87 @@ Module for style transfer tasks.
 
 ## Properties
 
+### generateFromFrame()
+
+> **generateFromFrame**: (`frameData`, ...`args`) => `any`
+
+Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56)
+
+Process a camera frame directly for real-time inference.
+
+This method is bound to a native JSI function after calling `load()`,
+making it worklet-compatible and safe to call from VisionCamera's
+frame processor thread.
+
+**Performance characteristics:**
+
+- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5,
+  frame data is accessed directly without copying (fastest, recommended).
+- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied
+  from native to JS, then accessed from native code (slower, fallback).
+
+**Usage with VisionCamera:**
+
+```typescript
+const frameOutput = useFrameOutput({
+  pixelFormat: 'rgb',
+  onFrame(frame) {
+    'worklet';
+    // Zero-copy approach (recommended)
+    const nativeBuffer = frame.getNativeBuffer();
+    const result = model.generateFromFrame(
+      {
+        nativeBuffer: nativeBuffer.pointer,
+        width: frame.width,
+        height: frame.height,
+      },
+      ...args
+    );
+    nativeBuffer.release();
+    frame.dispose();
+  },
+});
+```
+
+#### Parameters
+
+##### frameData
+
+[`Frame`](../interfaces/Frame.md)
+
+Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer)
+
+##### args
+
+...`any`[]
+
+Additional model-specific arguments (e.g., threshold, options)
+
+#### Returns
+
+`any`
+
+Model-specific output (e.g., detections, classifications, embeddings)
+
+#### See
+
+[Frame](../interfaces/Frame.md) for frame data format details
+
+#### Inherited from
+
+`BaseModule.generateFromFrame`
+
+---
+
 ### nativeModule
 
 > **nativeModule**: `any` = `null`
 
-Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8)
+Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17)
+
+**`Internal`**
 
-Native module instance
+Native module instance (JSI Host Object)
 
 #### Inherited from
 
@@ -42,9 +116,11 @@ Native module instance
 
 > **delete**(): `void`
 
-Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41)
+Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100)
 
-Unloads the model from memory.
+Unloads the model from memory and releases native resources.
+
+Always call this method when you're done with a model to prevent memory leaks.
 
 #### Returns
 
@@ -84,7 +160,9 @@ The stylized image as a Base64-encoded string.
 
 > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\>
 
-Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23)
+Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80)
+
+**`Internal`**
 
 Runs the model's forward method with the given input tensors.
 It returns the output tensors that mimic the structure of output from ExecuTorch.
@@ -113,7 +191,7 @@ Array of output tensors.
 
 > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\>
 
-Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34)
+Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91)
 
 Gets the input shape for a given method and index.
 
diff --git a/docs/docs/06-api-reference/classes/TextEmbeddingsModule.md b/docs/docs/06-api-reference/classes/TextEmbeddingsModule.md
index 72053896b..9c7dece38 100644
--- a/docs/docs/06-api-reference/classes/TextEmbeddingsModule.md
+++ b/docs/docs/06-api-reference/classes/TextEmbeddingsModule.md
@@ -24,13 +24,87 @@ Module for generating text embeddings from input text.
 
 ## Properties
 
+### generateFromFrame()
+
+> **generateFromFrame**: (`frameData`, ...`args`) => `any`
+
+Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56)
+
+Process a camera frame directly for real-time inference.
+
+This method is bound to a native JSI function after calling `load()`,
+making it worklet-compatible and safe to call from VisionCamera's
+frame processor thread.
+
+**Performance characteristics:**
+
+- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5,
+  frame data is accessed directly without copying (fastest, recommended).
+- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied
+  from native to JS, then accessed from native code (slower, fallback).
+
+**Usage with VisionCamera:**
+
+```typescript
+const frameOutput = useFrameOutput({
+  pixelFormat: 'rgb',
+  onFrame(frame) {
+    'worklet';
+    // Zero-copy approach (recommended)
+    const nativeBuffer = frame.getNativeBuffer();
+    const result = model.generateFromFrame(
+      {
+        nativeBuffer: nativeBuffer.pointer,
+        width: frame.width,
+        height: frame.height,
+      },
+      ...args
+    );
+    nativeBuffer.release();
+    frame.dispose();
+  },
+});
+```
+
+#### Parameters
+
+##### frameData
+
+[`Frame`](../interfaces/Frame.md)
+
+Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer)
+
+##### args
+
+...`any`[]
+
+Additional model-specific arguments (e.g., threshold, options)
+
+#### Returns
+
+`any`
+
+Model-specific output (e.g., detections, classifications, embeddings)
+
+#### See
+
+[Frame](../interfaces/Frame.md) for frame data format details
+
+#### Inherited from
+
+`BaseModule.generateFromFrame`
+
+---
+
 ### nativeModule
 
 > **nativeModule**: `any` = `null`
 
-Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8)
+Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17)
+
+**`Internal`**
 
-Native module instance
+Native module instance (JSI Host Object)
 
 #### Inherited from
 
@@ -42,9 +116,11 @@ Native module instance
 
 > **delete**(): `void`
 
-Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41)
+Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100)
 
-Unloads the model from memory.
+Unloads the model from memory and releases native resources.
+
+Always call this method when you're done with a model to prevent memory leaks.
 
 #### Returns
 
@@ -84,7 +160,9 @@ A Float32Array containing the vector embeddings.
 
 > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\>
 
-Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23)
+Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80)
+
+**`Internal`**
 
 Runs the model's forward method with the given input tensors.
 It returns the output tensors that mimic the structure of output from ExecuTorch.
@@ -113,7 +191,7 @@ Array of output tensors.
 
 > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\>
 
-Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34)
+Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91)
 
 Gets the input shape for a given method and index.
 
diff --git a/docs/docs/06-api-reference/classes/TextToImageModule.md b/docs/docs/06-api-reference/classes/TextToImageModule.md
index 2450c09c3..63bc34ae9 100644
--- a/docs/docs/06-api-reference/classes/TextToImageModule.md
+++ b/docs/docs/06-api-reference/classes/TextToImageModule.md
@@ -36,13 +36,87 @@ Optional callback function that receives the current step index during inference
 
 ## Properties
 
+### generateFromFrame()
+
+> **generateFromFrame**: (`frameData`, ...`args`) => `any`
+
+Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56)
+
+Process a camera frame directly for real-time inference.
+
+This method is bound to a native JSI function after calling `load()`,
+making it worklet-compatible and safe to call from VisionCamera's
+frame processor thread.
+
+**Performance characteristics:**
+
+- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5,
+  frame data is accessed directly without copying (fastest, recommended).
+- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied
+  from native to JS, then accessed from native code (slower, fallback).
+
+**Usage with VisionCamera:**
+
+```typescript
+const frameOutput = useFrameOutput({
+  pixelFormat: 'rgb',
+  onFrame(frame) {
+    'worklet';
+    // Zero-copy approach (recommended)
+    const nativeBuffer = frame.getNativeBuffer();
+    const result = model.generateFromFrame(
+      {
+        nativeBuffer: nativeBuffer.pointer,
+        width: frame.width,
+        height: frame.height,
+      },
+      ...args
+    );
+    nativeBuffer.release();
+    frame.dispose();
+  },
+});
+```
+
+#### Parameters
+
+##### frameData
+
+[`Frame`](../interfaces/Frame.md)
+
+Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer)
+
+##### args
+
+...`any`[]
+
+Additional model-specific arguments (e.g., threshold, options)
+
+#### Returns
+
+`any`
+
+Model-specific output (e.g., detections, classifications, embeddings)
+
+#### See
+
+[Frame](../interfaces/Frame.md) for frame data format details
+
+#### Inherited from
+
+`BaseModule.generateFromFrame`
+
+---
+
 ### nativeModule
 
 > **nativeModule**: `any` = `null`
 
-Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8)
+Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17)
+
+**`Internal`**
 
-Native module instance
+Native module instance (JSI Host Object)
 
 #### Inherited from
 
@@ -54,9 +128,11 @@ Native module instance
 
 > **delete**(): `void`
 
-Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41)
+Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100)
 
-Unloads the model from memory.
+Unloads the model from memory and releases native resources.
+
+Always call this method when you're done with a model to prevent memory leaks.
 
 #### Returns
 
@@ -115,7 +191,9 @@ A Base64-encoded string representing the generated PNG image.
 
 > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\>
 
-Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23)
+Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80)
+
+**`Internal`**
 
 Runs the model's forward method with the given input tensors.
 It returns the output tensors that mimic the structure of output from ExecuTorch.
@@ -144,7 +222,7 @@ Array of output tensors.
 
 > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\>
 
-Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34)
+Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91)
 
 Gets the input shape for a given method and index.
 
diff --git a/docs/docs/06-api-reference/classes/VADModule.md b/docs/docs/06-api-reference/classes/VADModule.md
index f37c5239e..996d69832 100644
--- a/docs/docs/06-api-reference/classes/VADModule.md
+++ b/docs/docs/06-api-reference/classes/VADModule.md
@@ -24,13 +24,87 @@ Module for Voice Activity Detection (VAD) functionalities.
 
 ## Properties
 
+### generateFromFrame()
+
+> **generateFromFrame**: (`frameData`, ...`args`) => `any`
+
+Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56)
+
+Process a camera frame directly for real-time inference.
+
+This method is bound to a native JSI function after calling `load()`,
+making it worklet-compatible and safe to call from VisionCamera's
+frame processor thread.
+
+**Performance characteristics:**
+
+- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5,
+  frame data is accessed directly without copying (fastest, recommended).
+- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied
+  from native to JS, then accessed from native code (slower, fallback).
+
+**Usage with VisionCamera:**
+
+```typescript
+const frameOutput = useFrameOutput({
+  pixelFormat: 'rgb',
+  onFrame(frame) {
+    'worklet';
+    // Zero-copy approach (recommended)
+    const nativeBuffer = frame.getNativeBuffer();
+    const result = model.generateFromFrame(
+      {
+        nativeBuffer: nativeBuffer.pointer,
+        width: frame.width,
+        height: frame.height,
+      },
+      ...args
+    );
+    nativeBuffer.release();
+    frame.dispose();
+  },
+});
+```
+
+#### Parameters
+
+##### frameData
+
+[`Frame`](../interfaces/Frame.md)
+
+Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer)
+
+##### args
+
+...`any`[]
+
+Additional model-specific arguments (e.g., threshold, options)
+
+#### Returns
+
+`any`
+
+Model-specific output (e.g., detections, classifications, embeddings)
+
+#### See
+
+[Frame](../interfaces/Frame.md) for frame data format details
+
+#### Inherited from
+
+`BaseModule.generateFromFrame`
+
+---
+
 ### nativeModule
 
 > **nativeModule**: `any` = `null`
 
-Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8)
+Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17)
+
+**`Internal`**
 
-Native module instance
+Native module instance (JSI Host Object)
 
 #### Inherited from
 
@@ -42,9 +116,11 @@ Native module instance
 
 > **delete**(): `void`
 
-Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41)
+Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100)
 
-Unloads the model from memory.
+Unloads the model from memory and releases native resources.
+
+Always call this method when you're done with a model to prevent memory leaks.
 
 #### Returns
 
@@ -84,7 +160,9 @@ A promise resolving to an array of detected speech segments.
 
 > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\>
 
-Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23)
+Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80)
+
+**`Internal`**
 
 Runs the model's forward method with the given input tensors.
 It returns the output tensors that mimic the structure of output from ExecuTorch.
@@ -113,7 +191,7 @@ Array of output tensors.
 
 > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\>
 
-Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34)
+Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91)
 
 Gets the input shape for a given method and index.
 
diff --git a/docs/docs/06-api-reference/enumerations/RnExecutorchErrorCode.md b/docs/docs/06-api-reference/enumerations/RnExecutorchErrorCode.md
index c5cdde479..8af6a41a0 100644
--- a/docs/docs/06-api-reference/enumerations/RnExecutorchErrorCode.md
+++ b/docs/docs/06-api-reference/enumerations/RnExecutorchErrorCode.md
@@ -8,7 +8,7 @@ Defined in: [errors/ErrorCodes.ts:4](https://github.com/software-mansion/react-n
 
 > **AccessFailed**: `34`
 
-Defined in: [errors/ErrorCodes.ts:156](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L156)
+Defined in: [errors/ErrorCodes.ts:160](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L160)
 
 Could not access a resource.
 
@@ -18,7 +18,7 @@ Could not access a resource.
 
 > **DelegateInvalidCompatibility**: `48`
 
-Defined in: [errors/ErrorCodes.ts:172](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L172)
+Defined in: [errors/ErrorCodes.ts:176](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L176)
 
 Init stage: Backend receives an incompatible delegate version.
 
@@ -28,7 +28,7 @@ Init stage: Backend receives an incompatible delegate version.
 
 > **DelegateInvalidHandle**: `50`
 
-Defined in: [errors/ErrorCodes.ts:180](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L180)
+Defined in: [errors/ErrorCodes.ts:184](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L184)
 
 Execute stage: The handle is invalid.
 
@@ -38,7 +38,7 @@ Execute stage: The handle is invalid.
 
 > **DelegateMemoryAllocationFailed**: `49`
 
-Defined in: [errors/ErrorCodes.ts:176](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L176)
+Defined in: [errors/ErrorCodes.ts:180](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L180)
 
 Init stage: Backend fails to allocate memory.
 
@@ -58,7 +58,7 @@ Thrown when the number of downloaded files is unexpected, due to download interr
 
 > **EndOfMethod**: `3`
 
-Defined in: [errors/ErrorCodes.ts:124](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L124)
+Defined in: [errors/ErrorCodes.ts:128](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L128)
 
 Status indicating there are no more steps of execution to run
 
@@ -88,7 +88,7 @@ An error ocurred when saving a file. This could be, for instance a result image
 
 > **Internal**: `1`
 
-Defined in: [errors/ErrorCodes.ts:116](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L116)
+Defined in: [errors/ErrorCodes.ts:120](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L120)
 
 An internal error occurred.
 
@@ -98,7 +98,7 @@ An internal error occurred.
 
 > **InvalidArgument**: `18`
 
-Defined in: [errors/ErrorCodes.ts:136](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L136)
+Defined in: [errors/ErrorCodes.ts:140](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L140)
 
 User provided an invalid argument.
 
@@ -118,7 +118,7 @@ Thrown when config parameters passed to a model are invalid. For example, when L
 
 > **InvalidExternalData**: `36`
 
-Defined in: [errors/ErrorCodes.ts:164](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L164)
+Defined in: [errors/ErrorCodes.ts:168](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L168)
 
 Error caused by the contents of external data.
 
@@ -148,7 +148,7 @@ Thrown when the type of model source passed by the user is invalid.
 
 > **InvalidProgram**: `35`
 
-Defined in: [errors/ErrorCodes.ts:160](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L160)
+Defined in: [errors/ErrorCodes.ts:164](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L164)
 
 Error caused by the contents of a program.
 
@@ -158,7 +158,7 @@ Error caused by the contents of a program.
 
 > **InvalidState**: `2`
 
-Defined in: [errors/ErrorCodes.ts:120](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L120)
+Defined in: [errors/ErrorCodes.ts:124](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L124)
 
 Status indicating the executor is in an invalid state for a targeted operation.
 
@@ -168,7 +168,7 @@ Status indicating the executor is in an invalid state for a targeted operation.
 
 > **InvalidType**: `19`
 
-Defined in: [errors/ErrorCodes.ts:140](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L140)
+Defined in: [errors/ErrorCodes.ts:144](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L144)
 
 Object is an invalid type for the operation.
 
@@ -198,7 +198,7 @@ Thrown when a language is passed to a multi-language model that is not supported
 
 > **MemoryAllocationFailed**: `33`
 
-Defined in: [errors/ErrorCodes.ts:152](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L152)
+Defined in: [errors/ErrorCodes.ts:156](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L156)
 
 Could not allocate the requested memory.
 
@@ -208,7 +208,7 @@ Could not allocate the requested memory.
 
 > **MissingDataChunk**: `161`
 
-Defined in: [errors/ErrorCodes.ts:72](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L72)
+Defined in: [errors/ErrorCodes.ts:76](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L76)
 
 Thrown when streaming transcription is attempted but audio data chunk is missing.
 
@@ -238,7 +238,7 @@ Thrown when a user tries to run a model that is not yet downloaded or loaded int
 
 > **MultilingualConfiguration**: `160`
 
-Defined in: [errors/ErrorCodes.ts:68](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L68)
+Defined in: [errors/ErrorCodes.ts:72](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L72)
 
 Thrown when there's a configuration mismatch between multilingual and language settings in Speech-to-Text models.
 
@@ -248,7 +248,7 @@ Thrown when there's a configuration mismatch between multilingual and language s
 
 > **NotFound**: `32`
 
-Defined in: [errors/ErrorCodes.ts:148](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L148)
+Defined in: [errors/ErrorCodes.ts:152](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L152)
 
 Requested resource could not be found.
 
@@ -258,7 +258,7 @@ Requested resource could not be found.
 
 > **NotImplemented**: `17`
 
-Defined in: [errors/ErrorCodes.ts:132](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L132)
+Defined in: [errors/ErrorCodes.ts:136](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L136)
 
 Operation is not yet implemented.
 
@@ -268,7 +268,7 @@ Operation is not yet implemented.
 
 > **NotSupported**: `16`
 
-Defined in: [errors/ErrorCodes.ts:128](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L128)
+Defined in: [errors/ErrorCodes.ts:132](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L132)
 
 Operation is not supported in the current context.
 
@@ -278,7 +278,7 @@ Operation is not supported in the current context.
 
 > **Ok**: `0`
 
-Defined in: [errors/ErrorCodes.ts:112](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L112)
+Defined in: [errors/ErrorCodes.ts:116](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L116)
 
 Status indicating a successful operation.
 
@@ -288,7 +288,7 @@ Status indicating a successful operation.
 
 > **OperatorMissing**: `20`
 
-Defined in: [errors/ErrorCodes.ts:144](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L144)
+Defined in: [errors/ErrorCodes.ts:148](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L148)
 
 Operator(s) missing in the operator registry.
 
@@ -298,17 +298,27 @@ Operator(s) missing in the operator registry.
 
 > **OutOfResources**: `37`
 
-Defined in: [errors/ErrorCodes.ts:168](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L168)
+Defined in: [errors/ErrorCodes.ts:172](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L172)
 
 Does not have enough resources to perform the requested operation.
 
 ---
 
+### PlatformNotSupported
+
+> **PlatformNotSupported**: `119`
+
+Defined in: [errors/ErrorCodes.ts:64](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L64)
+
+Thrown when a feature or platform is not supported in the current environment.
+
+---
+
 ### ResourceFetcherAdapterNotInitialized
 
 > **ResourceFetcherAdapterNotInitialized**: `186`
 
-Defined in: [errors/ErrorCodes.ts:108](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L108)
+Defined in: [errors/ErrorCodes.ts:112](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L112)
 
 Thrown when trying to load resources without fetcher initialization.
 
@@ -318,7 +328,7 @@ Thrown when trying to load resources without fetcher initialization.
 
 > **ResourceFetcherAlreadyOngoing**: `183`
 
-Defined in: [errors/ErrorCodes.ts:96](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L96)
+Defined in: [errors/ErrorCodes.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L100)
 
 Thrown when trying to resume a download that is already ongoing.
 
@@ -328,7 +338,7 @@ Thrown when trying to resume a download that is already ongoing.
 
 > **ResourceFetcherAlreadyPaused**: `182`
 
-Defined in: [errors/ErrorCodes.ts:92](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L92)
+Defined in: [errors/ErrorCodes.ts:96](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L96)
 
 Thrown when trying to pause a download that is already paused.
 
@@ -338,7 +348,7 @@ Thrown when trying to pause a download that is already paused.
 
 > **ResourceFetcherDownloadFailed**: `180`
 
-Defined in: [errors/ErrorCodes.ts:84](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L84)
+Defined in: [errors/ErrorCodes.ts:88](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L88)
 
 Thrown when a resource fails to download. This could be due to invalid URL, or for example a network problem.
 
@@ -348,7 +358,7 @@ Thrown when a resource fails to download. This could be due to invalid URL, or f
 
 > **ResourceFetcherDownloadInProgress**: `181`
 
-Defined in: [errors/ErrorCodes.ts:88](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L88)
+Defined in: [errors/ErrorCodes.ts:92](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L92)
 
 Thrown when a user tries to trigger a download that's already in progress.
 
@@ -358,7 +368,7 @@ Thrown when a user tries to trigger a download that's already in progress.
 
 > **ResourceFetcherMissingUri**: `185`
 
-Defined in: [errors/ErrorCodes.ts:104](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L104)
+Defined in: [errors/ErrorCodes.ts:108](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L108)
 
 Thrown when required URI information is missing for a download operation.
 
@@ -368,7 +378,7 @@ Thrown when required URI information is missing for a download operation.
 
 > **ResourceFetcherNotActive**: `184`
 
-Defined in: [errors/ErrorCodes.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L100)
+Defined in: [errors/ErrorCodes.ts:104](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L104)
 
 Thrown when trying to pause, resume, or cancel a download that is not active.
 
@@ -378,7 +388,7 @@ Thrown when trying to pause, resume, or cancel a download that is not active.
 
 > **StreamingInProgress**: `163`
 
-Defined in: [errors/ErrorCodes.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L80)
+Defined in: [errors/ErrorCodes.ts:84](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L84)
 
 Thrown when trying to start a new streaming session while another is already in progress.
 
@@ -388,7 +398,7 @@ Thrown when trying to start a new streaming session while another is already in
 
 > **StreamingNotStarted**: `162`
 
-Defined in: [errors/ErrorCodes.ts:76](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L76)
+Defined in: [errors/ErrorCodes.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L80)
 
 Thrown when trying to stop or insert data into a stream that hasn't been started.
 
@@ -408,7 +418,7 @@ Thrown when React Native ExecuTorch threadpool problem occurs.
 
 > **TokenizerError**: `167`
 
-Defined in: [errors/ErrorCodes.ts:64](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L64)
+Defined in: [errors/ErrorCodes.ts:68](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L68)
 
 Thrown when an error occurs with the tokenizer or tokenization process.
 
diff --git a/docs/docs/06-api-reference/index.md b/docs/docs/06-api-reference/index.md
index 125046b67..f49c25e9d 100644
--- a/docs/docs/06-api-reference/index.md
+++ b/docs/docs/06-api-reference/index.md
@@ -186,6 +186,7 @@
 - [RnExecutorchErrorCode](enumerations/RnExecutorchErrorCode.md)
 - [Logger](classes/Logger.md)
 - [RnExecutorchError](classes/RnExecutorchError.md)
+- [Frame](interfaces/Frame.md)
 
 ## TTS Supported Voices
 
@@ -232,6 +233,7 @@
 - [OCRDetection](interfaces/OCRDetection.md)
 - [OCRProps](interfaces/OCRProps.md)
 - [OCRType](interfaces/OCRType.md)
+- [PixelData](interfaces/PixelData.md)
 - [Point](interfaces/Point.md)
 - [Segment](interfaces/Segment.md)
 - [SpeechToTextModelConfig](interfaces/SpeechToTextModelConfig.md)
diff --git a/docs/docs/06-api-reference/interfaces/Frame.md b/docs/docs/06-api-reference/interfaces/Frame.md
new file mode 100644
index 000000000..149a3837f
--- /dev/null
+++ b/docs/docs/06-api-reference/interfaces/Frame.md
@@ -0,0 +1,36 @@
+# Interface: Frame
+
+Defined in: [types/common.ts:197](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L197)
+
+Frame data for vision model processing.
+
+## Methods
+
+### getNativeBuffer()
+
+> **getNativeBuffer**(): `object`
+
+Defined in: [types/common.ts:205](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L205)
+
+Pointer to native platform buffer (zero-copy, best performance).
+
+- On iOS: CVPixelBufferRef pointer
+- On Android: AHardwareBuffer\* pointer
+
+Obtain from Vision Camera v5: `frame.getNativeBuffer().pointer`
+
+#### Returns
+
+`object`
+
+##### pointer
+
+> **pointer**: `bigint`
+
+##### release()
+
+> **release**(): `void`
+
+###### Returns
+
+`void`
diff --git a/docs/docs/06-api-reference/interfaces/ObjectDetectionType.md b/docs/docs/06-api-reference/interfaces/ObjectDetectionType.md
index a9f28e5cf..4bd5dba98 100644
--- a/docs/docs/06-api-reference/interfaces/ObjectDetectionType.md
+++ b/docs/docs/06-api-reference/interfaces/ObjectDetectionType.md
@@ -29,36 +29,57 @@ Contains the error object if the model failed to load, download, or encountered
 
 ### forward()
 
-> **forward**: (`imageSource`, `detectionThreshold?`) => `Promise`\<[`Detection`](Detection.md)[]\>
+> **forward**: (`input`, `detectionThreshold?`) => `Promise`\<[`Detection`](Detection.md)[]\>
 
-Defined in: [types/objectDetection.ts:179](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/objectDetection.ts#L179)
+Defined in: [types/objectDetection.ts:199](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/objectDetection.ts#L199)
 
-Executes the model's forward pass to detect objects within the provided image.
+Executes the model's forward pass with automatic input type detection.
+
+Supports two input types:
+
+1. **String path/URI**: File path, URL, or Base64-encoded string
+2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
+
+**Note**: For VisionCamera frame processing, use `runOnFrame` instead.
 
 #### Parameters
 
-##### imageSource
+##### input
 
-`string`
+Image source (string or PixelData object)
 
-A string representing the image source (e.g., a file path, URI, or base64 string) to be processed.
+`string` | [`PixelData`](PixelData.md)
 
 ##### detectionThreshold?
 
 `number`
 
-An optional number between 0 and 1 representing the minimum confidence score required for an object to be included in the results. Default is 0.7.
+An optional number between 0 and 1 representing the minimum confidence score. Default is 0.5.
 
 #### Returns
 
 `Promise`\<[`Detection`](Detection.md)[]\>
 
-A Promise that resolves to an array of `Detection` objects, where each object typically contains bounding box coordinates, a class label, and a confidence score.
+A Promise that resolves to an array of `Detection` objects.
 
 #### Throws
 
 If the model is not loaded or is currently processing another image.
 
+#### Example
+
+```typescript
+// String path
+const detections1 = await model.forward('file:///path/to/image.jpg');
+
+// Pixel data
+const detections2 = await model.forward({
+  dataPtr: new Uint8Array(rgbPixels),
+  sizes: [480, 640, 3],
+  scalarType: ScalarType.BYTE,
+});
+```
+
 ---
 
 ### isGenerating
@@ -78,3 +99,46 @@ Indicates whether the model is currently processing an image.
 Defined in: [types/objectDetection.ts:160](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/objectDetection.ts#L160)
 
 Indicates whether the object detection model is loaded and ready to process images.
+
+---
+
+### runOnFrame
+
+> **runOnFrame**: (`frame`, `detectionThreshold`) => [`Detection`](Detection.md)[] \| `null`
+
+Defined in: [types/objectDetection.ts:231](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/objectDetection.ts#L231)
+
+Synchronous worklet function for real-time VisionCamera frame processing.
+Automatically handles native buffer extraction and cleanup.
+
+**Use this for VisionCamera frame processing in worklets.**
+For async processing, use `forward()` instead.
+
+Available after model is loaded (`isReady: true`).
+
+#### Example
+
+```typescript
+const { runOnFrame, isReady } = useObjectDetection({ model: MODEL });
+
+const frameOutput = useFrameOutput({
+  onFrame(frame) {
+    'worklet';
+    if (!runOnFrame) return;
+    const detections = runOnFrame(frame, 0.5);
+    frame.dispose();
+  },
+});
+```
+
+#### Param
+
+VisionCamera Frame object
+
+#### Param
+
+The threshold for detection sensitivity.
+
+#### Returns
+
+Array of Detection objects representing detected items in the frame.
diff --git a/docs/docs/06-api-reference/interfaces/PixelData.md b/docs/docs/06-api-reference/interfaces/PixelData.md
new file mode 100644
index 000000000..7ef9865aa
--- /dev/null
+++ b/docs/docs/06-api-reference/interfaces/PixelData.md
@@ -0,0 +1,65 @@
+# Interface: PixelData
+
+Defined in: [types/common.ts:172](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L172)
+
+Represents raw pixel data in RGB format for vision models.
+
+This type extends TensorPtr with constraints specific to image data:
+
+- dataPtr must be Uint8Array (8-bit unsigned integers)
+- scalarType is always BYTE (ScalarType.BYTE)
+- sizes represents [height, width, channels] where channels must be 3 (RGB)
+
+## Example
+
+```typescript
+const pixelData: PixelData = {
+  dataPtr: new Uint8Array(width * height * 3), // RGB pixel data
+  sizes: [height, width, 3], // [height, width, channels]
+  scalarType: ScalarType.BYTE,
+};
+```
+
+## Extends
+
+- `Omit`\<[`TensorPtr`](TensorPtr.md), `"dataPtr"` \| `"scalarType"`\>
+
+## Properties
+
+### dataPtr
+
+> **dataPtr**: `Uint8Array`
+
+Defined in: [types/common.ts:178](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L178)
+
+RGB pixel data as Uint8Array.
+Expected format: RGB (3 channels), not RGBA or BGRA.
+Size must equal: width _ height _ 3
+
+---
+
+### scalarType
+
+> **scalarType**: [`BYTE`](../enumerations/ScalarType.md#byte)
+
+Defined in: [types/common.ts:191](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L191)
+
+Scalar type is always BYTE for pixel data.
+
+---
+
+### sizes
+
+> **sizes**: \[`number`, `number`, `3`\]
+
+Defined in: [types/common.ts:186](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L186)
+
+Dimensions of the pixel data: [height, width, channels].
+
+- sizes[0]: height (number of rows)
+- sizes[1]: width (number of columns)
+- sizes[2]: channels (must be 3 for RGB)
+
+#### Overrides
+
+[`TensorPtr`](TensorPtr.md).[`sizes`](TensorPtr.md#sizes)
diff --git a/docs/docs/06-api-reference/typedoc-sidebar.cjs b/docs/docs/06-api-reference/typedoc-sidebar.cjs
index bbd478710..f5d9ec3d4 100644
--- a/docs/docs/06-api-reference/typedoc-sidebar.cjs
+++ b/docs/docs/06-api-reference/typedoc-sidebar.cjs
@@ -1,4 +1,4 @@
 // @ts-check
 /** @type {import("@docusaurus/plugin-content-docs").SidebarsConfig} */
-const typedocSidebar = {items:[{type:"category",label:"Hooks",items:[{type:"doc",id:"06-api-reference/functions/useClassification",label:"useClassification"},{type:"doc",id:"06-api-reference/functions/useExecutorchModule",label:"useExecutorchModule"},{type:"doc",id:"06-api-reference/functions/useImageEmbeddings",label:"useImageEmbeddings"},{type:"doc",id:"06-api-reference/functions/useImageSegmentation",label:"useImageSegmentation"},{type:"doc",id:"06-api-reference/functions/useLLM",label:"useLLM"},{type:"doc",id:"06-api-reference/functions/useObjectDetection",label:"useObjectDetection"},{type:"doc",id:"06-api-reference/functions/useOCR",label:"useOCR"},{type:"doc",id:"06-api-reference/functions/useSpeechToText",label:"useSpeechToText"},{type:"doc",id:"06-api-reference/functions/useStyleTransfer",label:"useStyleTransfer"},{type:"doc",id:"06-api-reference/functions/useTextEmbeddings",label:"useTextEmbeddings"},{type:"doc",id:"06-api-reference/functions/useTextToImage",label:"useTextToImage"},{type:"doc",id:"06-api-reference/functions/useTextToSpeech",label:"useTextToSpeech"},{type:"doc",id:"06-api-reference/functions/useTokenizer",label:"useTokenizer"},{type:"doc",id:"06-api-reference/functions/useVAD",label:"useVAD"},{type:"doc",id:"06-api-reference/functions/useVerticalOCR",label:"useVerticalOCR"}]},{type:"category",label:"Interfaces",items:[{type:"doc",id:"06-api-reference/interfaces/ResourceSourceExtended",label:"ResourceSourceExtended"}]},{type:"category",label:"Models - Classification",items:[{type:"doc",id:"06-api-reference/variables/EFFICIENTNET_V2_S",label:"EFFICIENTNET_V2_S"}]},{type:"category",label:"Models - Image Embeddings",items:[{type:"doc",id:"06-api-reference/variables/CLIP_VIT_BASE_PATCH32_IMAGE",label:"CLIP_VIT_BASE_PATCH32_IMAGE"}]},{type:"category",label:"Models - Image Generation",items:[{type:"doc",id:"06-api-reference/variables/BK_SDM_TINY_VPRED_256",label:"BK_SDM_TINY_VPRED_256"},{type:"doc",id:"06-api-reference/variables/BK_SDM_TINY_VPRED_512",label:"BK_SDM_TINY_VPRED_512"}]},{type:"category",label:"Models - Image Segmentation",items:[{type:"doc",id:"06-api-reference/variables/DEEPLAB_V3_RESNET50",label:"DEEPLAB_V3_RESNET50"},{type:"doc",id:"06-api-reference/variables/SELFIE_SEGMENTATION",label:"SELFIE_SEGMENTATION"}]},{type:"category",label:"Models - LMM",items:[{type:"doc",id:"06-api-reference/variables/HAMMER2_1_0_5B",label:"HAMMER2_1_0_5B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_0_5B_QUANTIZED",label:"HAMMER2_1_0_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_1_5B",label:"HAMMER2_1_1_5B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_1_5B_QUANTIZED",label:"HAMMER2_1_1_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_3B",label:"HAMMER2_1_3B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_3B_QUANTIZED",label:"HAMMER2_1_3B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/LFM2_5_1_2B_INSTRUCT",label:"LFM2_5_1_2B_INSTRUCT"},{type:"doc",id:"06-api-reference/variables/LFM2_5_1_2B_INSTRUCT_QUANTIZED",label:"LFM2_5_1_2B_INSTRUCT_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B",label:"LLAMA3_2_1B"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B_QLORA",label:"LLAMA3_2_1B_QLORA"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B_SPINQUANT",label:"LLAMA3_2_1B_SPINQUANT"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B",label:"LLAMA3_2_3B"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B_QLORA",label:"LLAMA3_2_3B_QLORA"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B_SPINQUANT",label:"LLAMA3_2_3B_SPINQUANT"},{type:"doc",id:"06-api-reference/variables/PHI_4_MINI_4B",label:"PHI_4_MINI_4B"},{type:"doc",id:"06-api-reference/variables/PHI_4_MINI_4B_QUANTIZED",label:"PHI_4_MINI_4B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_0_5B",label:"QWEN2_5_0_5B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_0_5B_QUANTIZED",label:"QWEN2_5_0_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_1_5B",label:"QWEN2_5_1_5B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_1_5B_QUANTIZED",label:"QWEN2_5_1_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_3B",label:"QWEN2_5_3B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_3B_QUANTIZED",label:"QWEN2_5_3B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_0_6B",label:"QWEN3_0_6B"},{type:"doc",id:"06-api-reference/variables/QWEN3_0_6B_QUANTIZED",label:"QWEN3_0_6B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_1_7B",label:"QWEN3_1_7B"},{type:"doc",id:"06-api-reference/variables/QWEN3_1_7B_QUANTIZED",label:"QWEN3_1_7B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_4B",label:"QWEN3_4B"},{type:"doc",id:"06-api-reference/variables/QWEN3_4B_QUANTIZED",label:"QWEN3_4B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_1_7B",label:"SMOLLM2_1_1_7B"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_1_7B_QUANTIZED",label:"SMOLLM2_1_1_7B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_135M",label:"SMOLLM2_1_135M"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_135M_QUANTIZED",label:"SMOLLM2_1_135M_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_360M",label:"SMOLLM2_1_360M"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_360M_QUANTIZED",label:"SMOLLM2_1_360M_QUANTIZED"}]},{type:"category",label:"Models - Object Detection",items:[{type:"doc",id:"06-api-reference/variables/SSDLITE_320_MOBILENET_V3_LARGE",label:"SSDLITE_320_MOBILENET_V3_LARGE"}]},{type:"category",label:"Models - Speech To Text",items:[{type:"doc",id:"06-api-reference/variables/WHISPER_BASE",label:"WHISPER_BASE"},{type:"doc",id:"06-api-reference/variables/WHISPER_BASE_EN",label:"WHISPER_BASE_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_SMALL",label:"WHISPER_SMALL"},{type:"doc",id:"06-api-reference/variables/WHISPER_SMALL_EN",label:"WHISPER_SMALL_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY",label:"WHISPER_TINY"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY_EN",label:"WHISPER_TINY_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY_EN_QUANTIZED",label:"WHISPER_TINY_EN_QUANTIZED"}]},{type:"category",label:"Models - Style Transfer",items:[{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_CANDY",label:"STYLE_TRANSFER_CANDY"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_MOSAIC",label:"STYLE_TRANSFER_MOSAIC"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_RAIN_PRINCESS",label:"STYLE_TRANSFER_RAIN_PRINCESS"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_UDNIE",label:"STYLE_TRANSFER_UDNIE"}]},{type:"category",label:"Models - Text Embeddings",items:[{type:"doc",id:"06-api-reference/variables/ALL_MINILM_L6_V2",label:"ALL_MINILM_L6_V2"},{type:"doc",id:"06-api-reference/variables/ALL_MPNET_BASE_V2",label:"ALL_MPNET_BASE_V2"},{type:"doc",id:"06-api-reference/variables/CLIP_VIT_BASE_PATCH32_TEXT",label:"CLIP_VIT_BASE_PATCH32_TEXT"},{type:"doc",id:"06-api-reference/variables/MULTI_QA_MINILM_L6_COS_V1",label:"MULTI_QA_MINILM_L6_COS_V1"},{type:"doc",id:"06-api-reference/variables/MULTI_QA_MPNET_BASE_DOT_V1",label:"MULTI_QA_MPNET_BASE_DOT_V1"}]},{type:"category",label:"Models - Text to Speech",items:[{type:"doc",id:"06-api-reference/variables/KOKORO_MEDIUM",label:"KOKORO_MEDIUM"},{type:"doc",id:"06-api-reference/variables/KOKORO_SMALL",label:"KOKORO_SMALL"}]},{type:"category",label:"Models - Voice Activity Detection",items:[{type:"doc",id:"06-api-reference/variables/FSMN_VAD",label:"FSMN_VAD"}]},{type:"category",label:"OCR Supported Alphabets",items:[{type:"doc",id:"06-api-reference/variables/OCR_ABAZA",label:"OCR_ABAZA"},{type:"doc",id:"06-api-reference/variables/OCR_ADYGHE",label:"OCR_ADYGHE"},{type:"doc",id:"06-api-reference/variables/OCR_AFRIKAANS",label:"OCR_AFRIKAANS"},{type:"doc",id:"06-api-reference/variables/OCR_ALBANIAN",label:"OCR_ALBANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_AVAR",label:"OCR_AVAR"},{type:"doc",id:"06-api-reference/variables/OCR_AZERBAIJANI",label:"OCR_AZERBAIJANI"},{type:"doc",id:"06-api-reference/variables/OCR_BELARUSIAN",label:"OCR_BELARUSIAN"},{type:"doc",id:"06-api-reference/variables/OCR_BOSNIAN",label:"OCR_BOSNIAN"},{type:"doc",id:"06-api-reference/variables/OCR_BULGARIAN",label:"OCR_BULGARIAN"},{type:"doc",id:"06-api-reference/variables/OCR_CHECHEN",label:"OCR_CHECHEN"},{type:"doc",id:"06-api-reference/variables/OCR_CROATIAN",label:"OCR_CROATIAN"},{type:"doc",id:"06-api-reference/variables/OCR_CZECH",label:"OCR_CZECH"},{type:"doc",id:"06-api-reference/variables/OCR_DANISH",label:"OCR_DANISH"},{type:"doc",id:"06-api-reference/variables/OCR_DARGWA",label:"OCR_DARGWA"},{type:"doc",id:"06-api-reference/variables/OCR_DUTCH",label:"OCR_DUTCH"},{type:"doc",id:"06-api-reference/variables/OCR_ENGLISH",label:"OCR_ENGLISH"},{type:"doc",id:"06-api-reference/variables/OCR_ESTONIAN",label:"OCR_ESTONIAN"},{type:"doc",id:"06-api-reference/variables/OCR_FRENCH",label:"OCR_FRENCH"},{type:"doc",id:"06-api-reference/variables/OCR_GERMAN",label:"OCR_GERMAN"},{type:"doc",id:"06-api-reference/variables/OCR_HUNGARIAN",label:"OCR_HUNGARIAN"},{type:"doc",id:"06-api-reference/variables/OCR_ICELANDIC",label:"OCR_ICELANDIC"},{type:"doc",id:"06-api-reference/variables/OCR_INDONESIAN",label:"OCR_INDONESIAN"},{type:"doc",id:"06-api-reference/variables/OCR_INGUSH",label:"OCR_INGUSH"},{type:"doc",id:"06-api-reference/variables/OCR_IRISH",label:"OCR_IRISH"},{type:"doc",id:"06-api-reference/variables/OCR_ITALIAN",label:"OCR_ITALIAN"},{type:"doc",id:"06-api-reference/variables/OCR_JAPANESE",label:"OCR_JAPANESE"},{type:"doc",id:"06-api-reference/variables/OCR_KANNADA",label:"OCR_KANNADA"},{type:"doc",id:"06-api-reference/variables/OCR_KARBADIAN",label:"OCR_KARBADIAN"},{type:"doc",id:"06-api-reference/variables/OCR_KOREAN",label:"OCR_KOREAN"},{type:"doc",id:"06-api-reference/variables/OCR_KURDISH",label:"OCR_KURDISH"},{type:"doc",id:"06-api-reference/variables/OCR_LAK",label:"OCR_LAK"},{type:"doc",id:"06-api-reference/variables/OCR_LATIN",label:"OCR_LATIN"},{type:"doc",id:"06-api-reference/variables/OCR_LATVIAN",label:"OCR_LATVIAN"},{type:"doc",id:"06-api-reference/variables/OCR_LEZGHIAN",label:"OCR_LEZGHIAN"},{type:"doc",id:"06-api-reference/variables/OCR_LITHUANIAN",label:"OCR_LITHUANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_MALAY",label:"OCR_MALAY"},{type:"doc",id:"06-api-reference/variables/OCR_MALTESE",label:"OCR_MALTESE"},{type:"doc",id:"06-api-reference/variables/OCR_MAORI",label:"OCR_MAORI"},{type:"doc",id:"06-api-reference/variables/OCR_MONGOLIAN",label:"OCR_MONGOLIAN"},{type:"doc",id:"06-api-reference/variables/OCR_NORWEGIAN",label:"OCR_NORWEGIAN"},{type:"doc",id:"06-api-reference/variables/OCR_OCCITAN",label:"OCR_OCCITAN"},{type:"doc",id:"06-api-reference/variables/OCR_PALI",label:"OCR_PALI"},{type:"doc",id:"06-api-reference/variables/OCR_POLISH",label:"OCR_POLISH"},{type:"doc",id:"06-api-reference/variables/OCR_PORTUGUESE",label:"OCR_PORTUGUESE"},{type:"doc",id:"06-api-reference/variables/OCR_ROMANIAN",label:"OCR_ROMANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_RUSSIAN",label:"OCR_RUSSIAN"},{type:"doc",id:"06-api-reference/variables/OCR_SERBIAN_CYRILLIC",label:"OCR_SERBIAN_CYRILLIC"},{type:"doc",id:"06-api-reference/variables/OCR_SERBIAN_LATIN",label:"OCR_SERBIAN_LATIN"},{type:"doc",id:"06-api-reference/variables/OCR_SIMPLIFIED_CHINESE",label:"OCR_SIMPLIFIED_CHINESE"},{type:"doc",id:"06-api-reference/variables/OCR_SLOVAK",label:"OCR_SLOVAK"},{type:"doc",id:"06-api-reference/variables/OCR_SLOVENIAN",label:"OCR_SLOVENIAN"},{type:"doc",id:"06-api-reference/variables/OCR_SPANISH",label:"OCR_SPANISH"},{type:"doc",id:"06-api-reference/variables/OCR_SWAHILI",label:"OCR_SWAHILI"},{type:"doc",id:"06-api-reference/variables/OCR_SWEDISH",label:"OCR_SWEDISH"},{type:"doc",id:"06-api-reference/variables/OCR_TABASSARAN",label:"OCR_TABASSARAN"},{type:"doc",id:"06-api-reference/variables/OCR_TAGALOG",label:"OCR_TAGALOG"},{type:"doc",id:"06-api-reference/variables/OCR_TAJIK",label:"OCR_TAJIK"},{type:"doc",id:"06-api-reference/variables/OCR_TELUGU",label:"OCR_TELUGU"},{type:"doc",id:"06-api-reference/variables/OCR_TURKISH",label:"OCR_TURKISH"},{type:"doc",id:"06-api-reference/variables/OCR_UKRAINIAN",label:"OCR_UKRAINIAN"},{type:"doc",id:"06-api-reference/variables/OCR_UZBEK",label:"OCR_UZBEK"},{type:"doc",id:"06-api-reference/variables/OCR_VIETNAMESE",label:"OCR_VIETNAMESE"},{type:"doc",id:"06-api-reference/variables/OCR_WELSH",label:"OCR_WELSH"}]},{type:"category",label:"Other",items:[{type:"doc",id:"06-api-reference/enumerations/RnExecutorchErrorCode",label:"RnExecutorchErrorCode"},{type:"doc",id:"06-api-reference/classes/Logger",label:"Logger"},{type:"doc",id:"06-api-reference/classes/RnExecutorchError",label:"RnExecutorchError"}]},{type:"category",label:"TTS Supported Voices",items:[{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_HEART",label:"KOKORO_VOICE_AF_HEART"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_RIVER",label:"KOKORO_VOICE_AF_RIVER"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_SARAH",label:"KOKORO_VOICE_AF_SARAH"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_ADAM",label:"KOKORO_VOICE_AM_ADAM"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_MICHAEL",label:"KOKORO_VOICE_AM_MICHAEL"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_SANTA",label:"KOKORO_VOICE_AM_SANTA"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_BF_EMMA",label:"KOKORO_VOICE_BF_EMMA"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_BM_DANIEL",label:"KOKORO_VOICE_BM_DANIEL"}]},{type:"category",label:"Types",items:[{type:"doc",id:"06-api-reference/enumerations/CocoLabel",label:"CocoLabel"},{type:"doc",id:"06-api-reference/enumerations/DeeplabLabel",label:"DeeplabLabel"},{type:"doc",id:"06-api-reference/enumerations/DownloadStatus",label:"DownloadStatus"},{type:"doc",id:"06-api-reference/enumerations/HTTP_CODE",label:"HTTP_CODE"},{type:"doc",id:"06-api-reference/enumerations/ScalarType",label:"ScalarType"},{type:"doc",id:"06-api-reference/enumerations/SelfieSegmentationLabel",label:"SelfieSegmentationLabel"},{type:"doc",id:"06-api-reference/enumerations/SourceType",label:"SourceType"},{type:"doc",id:"06-api-reference/interfaces/Bbox",label:"Bbox"},{type:"doc",id:"06-api-reference/interfaces/ChatConfig",label:"ChatConfig"},{type:"doc",id:"06-api-reference/interfaces/ClassificationProps",label:"ClassificationProps"},{type:"doc",id:"06-api-reference/interfaces/ClassificationType",label:"ClassificationType"},{type:"doc",id:"06-api-reference/interfaces/ContextStrategy",label:"ContextStrategy"},{type:"doc",id:"06-api-reference/interfaces/DecodingOptions",label:"DecodingOptions"},{type:"doc",id:"06-api-reference/interfaces/Detection",label:"Detection"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchModuleProps",label:"ExecutorchModuleProps"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchModuleType",label:"ExecutorchModuleType"},{type:"doc",id:"06-api-reference/interfaces/GenerationConfig",label:"GenerationConfig"},{type:"doc",id:"06-api-reference/interfaces/ImageEmbeddingsProps",label:"ImageEmbeddingsProps"},{type:"doc",id:"06-api-reference/interfaces/ImageEmbeddingsType",label:"ImageEmbeddingsType"},{type:"doc",id:"06-api-reference/interfaces/ImageSegmentationProps",label:"ImageSegmentationProps"},{type:"doc",id:"06-api-reference/interfaces/ImageSegmentationType",label:"ImageSegmentationType"},{type:"doc",id:"06-api-reference/interfaces/KokoroConfig",label:"KokoroConfig"},{type:"doc",id:"06-api-reference/interfaces/KokoroVoiceExtras",label:"KokoroVoiceExtras"},{type:"doc",id:"06-api-reference/interfaces/LLMConfig",label:"LLMConfig"},{type:"doc",id:"06-api-reference/interfaces/LLMProps",label:"LLMProps"},{type:"doc",id:"06-api-reference/interfaces/LLMType",label:"LLMType"},{type:"doc",id:"06-api-reference/interfaces/Message",label:"Message"},{type:"doc",id:"06-api-reference/interfaces/ObjectDetectionProps",label:"ObjectDetectionProps"},{type:"doc",id:"06-api-reference/interfaces/ObjectDetectionType",label:"ObjectDetectionType"},{type:"doc",id:"06-api-reference/interfaces/OCRDetection",label:"OCRDetection"},{type:"doc",id:"06-api-reference/interfaces/OCRProps",label:"OCRProps"},{type:"doc",id:"06-api-reference/interfaces/OCRType",label:"OCRType"},{type:"doc",id:"06-api-reference/interfaces/Point",label:"Point"},{type:"doc",id:"06-api-reference/interfaces/Segment",label:"Segment"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextModelConfig",label:"SpeechToTextModelConfig"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextProps",label:"SpeechToTextProps"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextType",label:"SpeechToTextType"},{type:"doc",id:"06-api-reference/interfaces/StyleTransferProps",label:"StyleTransferProps"},{type:"doc",id:"06-api-reference/interfaces/StyleTransferType",label:"StyleTransferType"},{type:"doc",id:"06-api-reference/interfaces/TensorPtr",label:"TensorPtr"},{type:"doc",id:"06-api-reference/interfaces/TextEmbeddingsProps",label:"TextEmbeddingsProps"},{type:"doc",id:"06-api-reference/interfaces/TextEmbeddingsType",label:"TextEmbeddingsType"},{type:"doc",id:"06-api-reference/interfaces/TextToImageProps",label:"TextToImageProps"},{type:"doc",id:"06-api-reference/interfaces/TextToImageType",label:"TextToImageType"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechConfig",label:"TextToSpeechConfig"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechInput",label:"TextToSpeechInput"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechProps",label:"TextToSpeechProps"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechStreamingInput",label:"TextToSpeechStreamingInput"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechType",label:"TextToSpeechType"},{type:"doc",id:"06-api-reference/interfaces/TokenizerProps",label:"TokenizerProps"},{type:"doc",id:"06-api-reference/interfaces/TokenizerType",label:"TokenizerType"},{type:"doc",id:"06-api-reference/interfaces/ToolCall",label:"ToolCall"},{type:"doc",id:"06-api-reference/interfaces/ToolsConfig",label:"ToolsConfig"},{type:"doc",id:"06-api-reference/interfaces/TranscriptionResult",label:"TranscriptionResult"},{type:"doc",id:"06-api-reference/interfaces/TranscriptionSegment",label:"TranscriptionSegment"},{type:"doc",id:"06-api-reference/interfaces/VADProps",label:"VADProps"},{type:"doc",id:"06-api-reference/interfaces/VADType",label:"VADType"},{type:"doc",id:"06-api-reference/interfaces/VerticalOCRProps",label:"VerticalOCRProps"},{type:"doc",id:"06-api-reference/interfaces/VoiceConfig",label:"VoiceConfig"},{type:"doc",id:"06-api-reference/interfaces/Word",label:"Word"},{type:"doc",id:"06-api-reference/type-aliases/LabelEnum",label:"LabelEnum"},{type:"doc",id:"06-api-reference/type-aliases/LLMTool",label:"LLMTool"},{type:"doc",id:"06-api-reference/type-aliases/MessageRole",label:"MessageRole"},{type:"doc",id:"06-api-reference/type-aliases/ModelNameOf",label:"ModelNameOf"},{type:"doc",id:"06-api-reference/type-aliases/ModelSources",label:"ModelSources"},{type:"doc",id:"06-api-reference/type-aliases/OCRLanguage",label:"OCRLanguage"},{type:"doc",id:"06-api-reference/type-aliases/ResourceSource",label:"ResourceSource"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationConfig",label:"SegmentationConfig"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationLabels",label:"SegmentationLabels"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationModelName",label:"SegmentationModelName"},{type:"doc",id:"06-api-reference/type-aliases/SpeechToTextLanguage",label:"SpeechToTextLanguage"},{type:"doc",id:"06-api-reference/type-aliases/TensorBuffer",label:"TensorBuffer"},{type:"doc",id:"06-api-reference/type-aliases/TextToSpeechLanguage",label:"TextToSpeechLanguage"},{type:"doc",id:"06-api-reference/type-aliases/Triple",label:"Triple"},{type:"doc",id:"06-api-reference/variables/SPECIAL_TOKENS",label:"SPECIAL_TOKENS"}]},{type:"category",label:"Typescript API",items:[{type:"doc",id:"06-api-reference/classes/ClassificationModule",label:"ClassificationModule"},{type:"doc",id:"06-api-reference/classes/ExecutorchModule",label:"ExecutorchModule"},{type:"doc",id:"06-api-reference/classes/ImageEmbeddingsModule",label:"ImageEmbeddingsModule"},{type:"doc",id:"06-api-reference/classes/ImageSegmentationModule",label:"ImageSegmentationModule"},{type:"doc",id:"06-api-reference/classes/LLMModule",label:"LLMModule"},{type:"doc",id:"06-api-reference/classes/ObjectDetectionModule",label:"ObjectDetectionModule"},{type:"doc",id:"06-api-reference/classes/OCRModule",label:"OCRModule"},{type:"doc",id:"06-api-reference/classes/SpeechToTextModule",label:"SpeechToTextModule"},{type:"doc",id:"06-api-reference/classes/StyleTransferModule",label:"StyleTransferModule"},{type:"doc",id:"06-api-reference/classes/TextEmbeddingsModule",label:"TextEmbeddingsModule"},{type:"doc",id:"06-api-reference/classes/TextToImageModule",label:"TextToImageModule"},{type:"doc",id:"06-api-reference/classes/TextToSpeechModule",label:"TextToSpeechModule"},{type:"doc",id:"06-api-reference/classes/TokenizerModule",label:"TokenizerModule"},{type:"doc",id:"06-api-reference/classes/VADModule",label:"VADModule"},{type:"doc",id:"06-api-reference/classes/VerticalOCRModule",label:"VerticalOCRModule"}]},{type:"category",label:"Utilities - General",items:[{type:"category",label:"ResourceFetcherUtils",items:[{type:"category",label:"Functions",items:[{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/calculateDownloadProgress",label:"calculateDownloadProgress"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/getFilenameFromUri",label:"getFilenameFromUri"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/hashObject",label:"hashObject"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/removeFilePrefix",label:"removeFilePrefix"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/triggerHuggingFaceDownloadCounter",label:"triggerHuggingFaceDownloadCounter"}]}],link:{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/index"}},{type:"doc",id:"06-api-reference/classes/ResourceFetcher",label:"ResourceFetcher"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchConfig",label:"ExecutorchConfig"},{type:"doc",id:"06-api-reference/interfaces/ResourceFetcherAdapter",label:"ResourceFetcherAdapter"},{type:"doc",id:"06-api-reference/functions/cleanupExecutorch",label:"cleanupExecutorch"},{type:"doc",id:"06-api-reference/functions/initExecutorch",label:"initExecutorch"}]},{type:"category",label:"Utilities - LLM",items:[{type:"doc",id:"06-api-reference/variables/DEFAULT_CHAT_CONFIG",label:"DEFAULT_CHAT_CONFIG"},{type:"doc",id:"06-api-reference/variables/DEFAULT_CONTEXT_BUFFER_TOKENS",label:"DEFAULT_CONTEXT_BUFFER_TOKENS"},{type:"doc",id:"06-api-reference/variables/DEFAULT_MESSAGE_HISTORY",label:"DEFAULT_MESSAGE_HISTORY"},{type:"doc",id:"06-api-reference/variables/DEFAULT_SYSTEM_PROMPT",label:"DEFAULT_SYSTEM_PROMPT"},{type:"doc",id:"06-api-reference/variables/parseToolCall",label:"parseToolCall"},{type:"doc",id:"06-api-reference/functions/DEFAULT_STRUCTURED_OUTPUT_PROMPT",label:"DEFAULT_STRUCTURED_OUTPUT_PROMPT"},{type:"doc",id:"06-api-reference/functions/fixAndValidateStructuredOutput",label:"fixAndValidateStructuredOutput"},{type:"doc",id:"06-api-reference/functions/getStructuredOutputPrompt",label:"getStructuredOutputPrompt"}]},{type:"category",label:"Utils",items:[{type:"doc",id:"06-api-reference/classes/MessageCountContextStrategy",label:"MessageCountContextStrategy"},{type:"doc",id:"06-api-reference/classes/NoopContextStrategy",label:"NoopContextStrategy"},{type:"doc",id:"06-api-reference/classes/SlidingWindowContextStrategy",label:"SlidingWindowContextStrategy"}]}]};
+const typedocSidebar = {items:[{type:"category",label:"Hooks",items:[{type:"doc",id:"06-api-reference/functions/useClassification",label:"useClassification"},{type:"doc",id:"06-api-reference/functions/useExecutorchModule",label:"useExecutorchModule"},{type:"doc",id:"06-api-reference/functions/useImageEmbeddings",label:"useImageEmbeddings"},{type:"doc",id:"06-api-reference/functions/useImageSegmentation",label:"useImageSegmentation"},{type:"doc",id:"06-api-reference/functions/useLLM",label:"useLLM"},{type:"doc",id:"06-api-reference/functions/useObjectDetection",label:"useObjectDetection"},{type:"doc",id:"06-api-reference/functions/useOCR",label:"useOCR"},{type:"doc",id:"06-api-reference/functions/useSpeechToText",label:"useSpeechToText"},{type:"doc",id:"06-api-reference/functions/useStyleTransfer",label:"useStyleTransfer"},{type:"doc",id:"06-api-reference/functions/useTextEmbeddings",label:"useTextEmbeddings"},{type:"doc",id:"06-api-reference/functions/useTextToImage",label:"useTextToImage"},{type:"doc",id:"06-api-reference/functions/useTextToSpeech",label:"useTextToSpeech"},{type:"doc",id:"06-api-reference/functions/useTokenizer",label:"useTokenizer"},{type:"doc",id:"06-api-reference/functions/useVAD",label:"useVAD"},{type:"doc",id:"06-api-reference/functions/useVerticalOCR",label:"useVerticalOCR"}]},{type:"category",label:"Interfaces",items:[{type:"doc",id:"06-api-reference/interfaces/ResourceSourceExtended",label:"ResourceSourceExtended"}]},{type:"category",label:"Models - Classification",items:[{type:"doc",id:"06-api-reference/variables/EFFICIENTNET_V2_S",label:"EFFICIENTNET_V2_S"}]},{type:"category",label:"Models - Image Embeddings",items:[{type:"doc",id:"06-api-reference/variables/CLIP_VIT_BASE_PATCH32_IMAGE",label:"CLIP_VIT_BASE_PATCH32_IMAGE"}]},{type:"category",label:"Models - Image Generation",items:[{type:"doc",id:"06-api-reference/variables/BK_SDM_TINY_VPRED_256",label:"BK_SDM_TINY_VPRED_256"},{type:"doc",id:"06-api-reference/variables/BK_SDM_TINY_VPRED_512",label:"BK_SDM_TINY_VPRED_512"}]},{type:"category",label:"Models - Image Segmentation",items:[{type:"doc",id:"06-api-reference/variables/DEEPLAB_V3_RESNET50",label:"DEEPLAB_V3_RESNET50"},{type:"doc",id:"06-api-reference/variables/SELFIE_SEGMENTATION",label:"SELFIE_SEGMENTATION"}]},{type:"category",label:"Models - LMM",items:[{type:"doc",id:"06-api-reference/variables/HAMMER2_1_0_5B",label:"HAMMER2_1_0_5B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_0_5B_QUANTIZED",label:"HAMMER2_1_0_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_1_5B",label:"HAMMER2_1_1_5B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_1_5B_QUANTIZED",label:"HAMMER2_1_1_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_3B",label:"HAMMER2_1_3B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_3B_QUANTIZED",label:"HAMMER2_1_3B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/LFM2_5_1_2B_INSTRUCT",label:"LFM2_5_1_2B_INSTRUCT"},{type:"doc",id:"06-api-reference/variables/LFM2_5_1_2B_INSTRUCT_QUANTIZED",label:"LFM2_5_1_2B_INSTRUCT_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B",label:"LLAMA3_2_1B"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B_QLORA",label:"LLAMA3_2_1B_QLORA"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B_SPINQUANT",label:"LLAMA3_2_1B_SPINQUANT"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B",label:"LLAMA3_2_3B"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B_QLORA",label:"LLAMA3_2_3B_QLORA"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B_SPINQUANT",label:"LLAMA3_2_3B_SPINQUANT"},{type:"doc",id:"06-api-reference/variables/PHI_4_MINI_4B",label:"PHI_4_MINI_4B"},{type:"doc",id:"06-api-reference/variables/PHI_4_MINI_4B_QUANTIZED",label:"PHI_4_MINI_4B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_0_5B",label:"QWEN2_5_0_5B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_0_5B_QUANTIZED",label:"QWEN2_5_0_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_1_5B",label:"QWEN2_5_1_5B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_1_5B_QUANTIZED",label:"QWEN2_5_1_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_3B",label:"QWEN2_5_3B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_3B_QUANTIZED",label:"QWEN2_5_3B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_0_6B",label:"QWEN3_0_6B"},{type:"doc",id:"06-api-reference/variables/QWEN3_0_6B_QUANTIZED",label:"QWEN3_0_6B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_1_7B",label:"QWEN3_1_7B"},{type:"doc",id:"06-api-reference/variables/QWEN3_1_7B_QUANTIZED",label:"QWEN3_1_7B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_4B",label:"QWEN3_4B"},{type:"doc",id:"06-api-reference/variables/QWEN3_4B_QUANTIZED",label:"QWEN3_4B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_1_7B",label:"SMOLLM2_1_1_7B"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_1_7B_QUANTIZED",label:"SMOLLM2_1_1_7B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_135M",label:"SMOLLM2_1_135M"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_135M_QUANTIZED",label:"SMOLLM2_1_135M_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_360M",label:"SMOLLM2_1_360M"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_360M_QUANTIZED",label:"SMOLLM2_1_360M_QUANTIZED"}]},{type:"category",label:"Models - Object Detection",items:[{type:"doc",id:"06-api-reference/variables/SSDLITE_320_MOBILENET_V3_LARGE",label:"SSDLITE_320_MOBILENET_V3_LARGE"}]},{type:"category",label:"Models - Speech To Text",items:[{type:"doc",id:"06-api-reference/variables/WHISPER_BASE",label:"WHISPER_BASE"},{type:"doc",id:"06-api-reference/variables/WHISPER_BASE_EN",label:"WHISPER_BASE_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_SMALL",label:"WHISPER_SMALL"},{type:"doc",id:"06-api-reference/variables/WHISPER_SMALL_EN",label:"WHISPER_SMALL_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY",label:"WHISPER_TINY"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY_EN",label:"WHISPER_TINY_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY_EN_QUANTIZED",label:"WHISPER_TINY_EN_QUANTIZED"}]},{type:"category",label:"Models - Style Transfer",items:[{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_CANDY",label:"STYLE_TRANSFER_CANDY"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_MOSAIC",label:"STYLE_TRANSFER_MOSAIC"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_RAIN_PRINCESS",label:"STYLE_TRANSFER_RAIN_PRINCESS"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_UDNIE",label:"STYLE_TRANSFER_UDNIE"}]},{type:"category",label:"Models - Text Embeddings",items:[{type:"doc",id:"06-api-reference/variables/ALL_MINILM_L6_V2",label:"ALL_MINILM_L6_V2"},{type:"doc",id:"06-api-reference/variables/ALL_MPNET_BASE_V2",label:"ALL_MPNET_BASE_V2"},{type:"doc",id:"06-api-reference/variables/CLIP_VIT_BASE_PATCH32_TEXT",label:"CLIP_VIT_BASE_PATCH32_TEXT"},{type:"doc",id:"06-api-reference/variables/MULTI_QA_MINILM_L6_COS_V1",label:"MULTI_QA_MINILM_L6_COS_V1"},{type:"doc",id:"06-api-reference/variables/MULTI_QA_MPNET_BASE_DOT_V1",label:"MULTI_QA_MPNET_BASE_DOT_V1"}]},{type:"category",label:"Models - Text to Speech",items:[{type:"doc",id:"06-api-reference/variables/KOKORO_MEDIUM",label:"KOKORO_MEDIUM"},{type:"doc",id:"06-api-reference/variables/KOKORO_SMALL",label:"KOKORO_SMALL"}]},{type:"category",label:"Models - Voice Activity Detection",items:[{type:"doc",id:"06-api-reference/variables/FSMN_VAD",label:"FSMN_VAD"}]},{type:"category",label:"OCR Supported Alphabets",items:[{type:"doc",id:"06-api-reference/variables/OCR_ABAZA",label:"OCR_ABAZA"},{type:"doc",id:"06-api-reference/variables/OCR_ADYGHE",label:"OCR_ADYGHE"},{type:"doc",id:"06-api-reference/variables/OCR_AFRIKAANS",label:"OCR_AFRIKAANS"},{type:"doc",id:"06-api-reference/variables/OCR_ALBANIAN",label:"OCR_ALBANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_AVAR",label:"OCR_AVAR"},{type:"doc",id:"06-api-reference/variables/OCR_AZERBAIJANI",label:"OCR_AZERBAIJANI"},{type:"doc",id:"06-api-reference/variables/OCR_BELARUSIAN",label:"OCR_BELARUSIAN"},{type:"doc",id:"06-api-reference/variables/OCR_BOSNIAN",label:"OCR_BOSNIAN"},{type:"doc",id:"06-api-reference/variables/OCR_BULGARIAN",label:"OCR_BULGARIAN"},{type:"doc",id:"06-api-reference/variables/OCR_CHECHEN",label:"OCR_CHECHEN"},{type:"doc",id:"06-api-reference/variables/OCR_CROATIAN",label:"OCR_CROATIAN"},{type:"doc",id:"06-api-reference/variables/OCR_CZECH",label:"OCR_CZECH"},{type:"doc",id:"06-api-reference/variables/OCR_DANISH",label:"OCR_DANISH"},{type:"doc",id:"06-api-reference/variables/OCR_DARGWA",label:"OCR_DARGWA"},{type:"doc",id:"06-api-reference/variables/OCR_DUTCH",label:"OCR_DUTCH"},{type:"doc",id:"06-api-reference/variables/OCR_ENGLISH",label:"OCR_ENGLISH"},{type:"doc",id:"06-api-reference/variables/OCR_ESTONIAN",label:"OCR_ESTONIAN"},{type:"doc",id:"06-api-reference/variables/OCR_FRENCH",label:"OCR_FRENCH"},{type:"doc",id:"06-api-reference/variables/OCR_GERMAN",label:"OCR_GERMAN"},{type:"doc",id:"06-api-reference/variables/OCR_HUNGARIAN",label:"OCR_HUNGARIAN"},{type:"doc",id:"06-api-reference/variables/OCR_ICELANDIC",label:"OCR_ICELANDIC"},{type:"doc",id:"06-api-reference/variables/OCR_INDONESIAN",label:"OCR_INDONESIAN"},{type:"doc",id:"06-api-reference/variables/OCR_INGUSH",label:"OCR_INGUSH"},{type:"doc",id:"06-api-reference/variables/OCR_IRISH",label:"OCR_IRISH"},{type:"doc",id:"06-api-reference/variables/OCR_ITALIAN",label:"OCR_ITALIAN"},{type:"doc",id:"06-api-reference/variables/OCR_JAPANESE",label:"OCR_JAPANESE"},{type:"doc",id:"06-api-reference/variables/OCR_KANNADA",label:"OCR_KANNADA"},{type:"doc",id:"06-api-reference/variables/OCR_KARBADIAN",label:"OCR_KARBADIAN"},{type:"doc",id:"06-api-reference/variables/OCR_KOREAN",label:"OCR_KOREAN"},{type:"doc",id:"06-api-reference/variables/OCR_KURDISH",label:"OCR_KURDISH"},{type:"doc",id:"06-api-reference/variables/OCR_LAK",label:"OCR_LAK"},{type:"doc",id:"06-api-reference/variables/OCR_LATIN",label:"OCR_LATIN"},{type:"doc",id:"06-api-reference/variables/OCR_LATVIAN",label:"OCR_LATVIAN"},{type:"doc",id:"06-api-reference/variables/OCR_LEZGHIAN",label:"OCR_LEZGHIAN"},{type:"doc",id:"06-api-reference/variables/OCR_LITHUANIAN",label:"OCR_LITHUANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_MALAY",label:"OCR_MALAY"},{type:"doc",id:"06-api-reference/variables/OCR_MALTESE",label:"OCR_MALTESE"},{type:"doc",id:"06-api-reference/variables/OCR_MAORI",label:"OCR_MAORI"},{type:"doc",id:"06-api-reference/variables/OCR_MONGOLIAN",label:"OCR_MONGOLIAN"},{type:"doc",id:"06-api-reference/variables/OCR_NORWEGIAN",label:"OCR_NORWEGIAN"},{type:"doc",id:"06-api-reference/variables/OCR_OCCITAN",label:"OCR_OCCITAN"},{type:"doc",id:"06-api-reference/variables/OCR_PALI",label:"OCR_PALI"},{type:"doc",id:"06-api-reference/variables/OCR_POLISH",label:"OCR_POLISH"},{type:"doc",id:"06-api-reference/variables/OCR_PORTUGUESE",label:"OCR_PORTUGUESE"},{type:"doc",id:"06-api-reference/variables/OCR_ROMANIAN",label:"OCR_ROMANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_RUSSIAN",label:"OCR_RUSSIAN"},{type:"doc",id:"06-api-reference/variables/OCR_SERBIAN_CYRILLIC",label:"OCR_SERBIAN_CYRILLIC"},{type:"doc",id:"06-api-reference/variables/OCR_SERBIAN_LATIN",label:"OCR_SERBIAN_LATIN"},{type:"doc",id:"06-api-reference/variables/OCR_SIMPLIFIED_CHINESE",label:"OCR_SIMPLIFIED_CHINESE"},{type:"doc",id:"06-api-reference/variables/OCR_SLOVAK",label:"OCR_SLOVAK"},{type:"doc",id:"06-api-reference/variables/OCR_SLOVENIAN",label:"OCR_SLOVENIAN"},{type:"doc",id:"06-api-reference/variables/OCR_SPANISH",label:"OCR_SPANISH"},{type:"doc",id:"06-api-reference/variables/OCR_SWAHILI",label:"OCR_SWAHILI"},{type:"doc",id:"06-api-reference/variables/OCR_SWEDISH",label:"OCR_SWEDISH"},{type:"doc",id:"06-api-reference/variables/OCR_TABASSARAN",label:"OCR_TABASSARAN"},{type:"doc",id:"06-api-reference/variables/OCR_TAGALOG",label:"OCR_TAGALOG"},{type:"doc",id:"06-api-reference/variables/OCR_TAJIK",label:"OCR_TAJIK"},{type:"doc",id:"06-api-reference/variables/OCR_TELUGU",label:"OCR_TELUGU"},{type:"doc",id:"06-api-reference/variables/OCR_TURKISH",label:"OCR_TURKISH"},{type:"doc",id:"06-api-reference/variables/OCR_UKRAINIAN",label:"OCR_UKRAINIAN"},{type:"doc",id:"06-api-reference/variables/OCR_UZBEK",label:"OCR_UZBEK"},{type:"doc",id:"06-api-reference/variables/OCR_VIETNAMESE",label:"OCR_VIETNAMESE"},{type:"doc",id:"06-api-reference/variables/OCR_WELSH",label:"OCR_WELSH"}]},{type:"category",label:"Other",items:[{type:"doc",id:"06-api-reference/enumerations/RnExecutorchErrorCode",label:"RnExecutorchErrorCode"},{type:"doc",id:"06-api-reference/classes/Logger",label:"Logger"},{type:"doc",id:"06-api-reference/classes/RnExecutorchError",label:"RnExecutorchError"},{type:"doc",id:"06-api-reference/interfaces/Frame",label:"Frame"}]},{type:"category",label:"TTS Supported Voices",items:[{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_HEART",label:"KOKORO_VOICE_AF_HEART"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_RIVER",label:"KOKORO_VOICE_AF_RIVER"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_SARAH",label:"KOKORO_VOICE_AF_SARAH"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_ADAM",label:"KOKORO_VOICE_AM_ADAM"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_MICHAEL",label:"KOKORO_VOICE_AM_MICHAEL"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_SANTA",label:"KOKORO_VOICE_AM_SANTA"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_BF_EMMA",label:"KOKORO_VOICE_BF_EMMA"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_BM_DANIEL",label:"KOKORO_VOICE_BM_DANIEL"}]},{type:"category",label:"Types",items:[{type:"doc",id:"06-api-reference/enumerations/CocoLabel",label:"CocoLabel"},{type:"doc",id:"06-api-reference/enumerations/DeeplabLabel",label:"DeeplabLabel"},{type:"doc",id:"06-api-reference/enumerations/DownloadStatus",label:"DownloadStatus"},{type:"doc",id:"06-api-reference/enumerations/HTTP_CODE",label:"HTTP_CODE"},{type:"doc",id:"06-api-reference/enumerations/ScalarType",label:"ScalarType"},{type:"doc",id:"06-api-reference/enumerations/SelfieSegmentationLabel",label:"SelfieSegmentationLabel"},{type:"doc",id:"06-api-reference/enumerations/SourceType",label:"SourceType"},{type:"doc",id:"06-api-reference/interfaces/Bbox",label:"Bbox"},{type:"doc",id:"06-api-reference/interfaces/ChatConfig",label:"ChatConfig"},{type:"doc",id:"06-api-reference/interfaces/ClassificationProps",label:"ClassificationProps"},{type:"doc",id:"06-api-reference/interfaces/ClassificationType",label:"ClassificationType"},{type:"doc",id:"06-api-reference/interfaces/ContextStrategy",label:"ContextStrategy"},{type:"doc",id:"06-api-reference/interfaces/DecodingOptions",label:"DecodingOptions"},{type:"doc",id:"06-api-reference/interfaces/Detection",label:"Detection"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchModuleProps",label:"ExecutorchModuleProps"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchModuleType",label:"ExecutorchModuleType"},{type:"doc",id:"06-api-reference/interfaces/GenerationConfig",label:"GenerationConfig"},{type:"doc",id:"06-api-reference/interfaces/ImageEmbeddingsProps",label:"ImageEmbeddingsProps"},{type:"doc",id:"06-api-reference/interfaces/ImageEmbeddingsType",label:"ImageEmbeddingsType"},{type:"doc",id:"06-api-reference/interfaces/ImageSegmentationProps",label:"ImageSegmentationProps"},{type:"doc",id:"06-api-reference/interfaces/ImageSegmentationType",label:"ImageSegmentationType"},{type:"doc",id:"06-api-reference/interfaces/KokoroConfig",label:"KokoroConfig"},{type:"doc",id:"06-api-reference/interfaces/KokoroVoiceExtras",label:"KokoroVoiceExtras"},{type:"doc",id:"06-api-reference/interfaces/LLMConfig",label:"LLMConfig"},{type:"doc",id:"06-api-reference/interfaces/LLMProps",label:"LLMProps"},{type:"doc",id:"06-api-reference/interfaces/LLMType",label:"LLMType"},{type:"doc",id:"06-api-reference/interfaces/Message",label:"Message"},{type:"doc",id:"06-api-reference/interfaces/ObjectDetectionProps",label:"ObjectDetectionProps"},{type:"doc",id:"06-api-reference/interfaces/ObjectDetectionType",label:"ObjectDetectionType"},{type:"doc",id:"06-api-reference/interfaces/OCRDetection",label:"OCRDetection"},{type:"doc",id:"06-api-reference/interfaces/OCRProps",label:"OCRProps"},{type:"doc",id:"06-api-reference/interfaces/OCRType",label:"OCRType"},{type:"doc",id:"06-api-reference/interfaces/PixelData",label:"PixelData"},{type:"doc",id:"06-api-reference/interfaces/Point",label:"Point"},{type:"doc",id:"06-api-reference/interfaces/Segment",label:"Segment"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextModelConfig",label:"SpeechToTextModelConfig"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextProps",label:"SpeechToTextProps"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextType",label:"SpeechToTextType"},{type:"doc",id:"06-api-reference/interfaces/StyleTransferProps",label:"StyleTransferProps"},{type:"doc",id:"06-api-reference/interfaces/StyleTransferType",label:"StyleTransferType"},{type:"doc",id:"06-api-reference/interfaces/TensorPtr",label:"TensorPtr"},{type:"doc",id:"06-api-reference/interfaces/TextEmbeddingsProps",label:"TextEmbeddingsProps"},{type:"doc",id:"06-api-reference/interfaces/TextEmbeddingsType",label:"TextEmbeddingsType"},{type:"doc",id:"06-api-reference/interfaces/TextToImageProps",label:"TextToImageProps"},{type:"doc",id:"06-api-reference/interfaces/TextToImageType",label:"TextToImageType"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechConfig",label:"TextToSpeechConfig"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechInput",label:"TextToSpeechInput"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechProps",label:"TextToSpeechProps"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechStreamingInput",label:"TextToSpeechStreamingInput"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechType",label:"TextToSpeechType"},{type:"doc",id:"06-api-reference/interfaces/TokenizerProps",label:"TokenizerProps"},{type:"doc",id:"06-api-reference/interfaces/TokenizerType",label:"TokenizerType"},{type:"doc",id:"06-api-reference/interfaces/ToolCall",label:"ToolCall"},{type:"doc",id:"06-api-reference/interfaces/ToolsConfig",label:"ToolsConfig"},{type:"doc",id:"06-api-reference/interfaces/TranscriptionResult",label:"TranscriptionResult"},{type:"doc",id:"06-api-reference/interfaces/TranscriptionSegment",label:"TranscriptionSegment"},{type:"doc",id:"06-api-reference/interfaces/VADProps",label:"VADProps"},{type:"doc",id:"06-api-reference/interfaces/VADType",label:"VADType"},{type:"doc",id:"06-api-reference/interfaces/VerticalOCRProps",label:"VerticalOCRProps"},{type:"doc",id:"06-api-reference/interfaces/VoiceConfig",label:"VoiceConfig"},{type:"doc",id:"06-api-reference/interfaces/Word",label:"Word"},{type:"doc",id:"06-api-reference/type-aliases/LabelEnum",label:"LabelEnum"},{type:"doc",id:"06-api-reference/type-aliases/LLMTool",label:"LLMTool"},{type:"doc",id:"06-api-reference/type-aliases/MessageRole",label:"MessageRole"},{type:"doc",id:"06-api-reference/type-aliases/ModelNameOf",label:"ModelNameOf"},{type:"doc",id:"06-api-reference/type-aliases/ModelSources",label:"ModelSources"},{type:"doc",id:"06-api-reference/type-aliases/OCRLanguage",label:"OCRLanguage"},{type:"doc",id:"06-api-reference/type-aliases/ResourceSource",label:"ResourceSource"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationConfig",label:"SegmentationConfig"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationLabels",label:"SegmentationLabels"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationModelName",label:"SegmentationModelName"},{type:"doc",id:"06-api-reference/type-aliases/SpeechToTextLanguage",label:"SpeechToTextLanguage"},{type:"doc",id:"06-api-reference/type-aliases/TensorBuffer",label:"TensorBuffer"},{type:"doc",id:"06-api-reference/type-aliases/TextToSpeechLanguage",label:"TextToSpeechLanguage"},{type:"doc",id:"06-api-reference/type-aliases/Triple",label:"Triple"},{type:"doc",id:"06-api-reference/variables/SPECIAL_TOKENS",label:"SPECIAL_TOKENS"}]},{type:"category",label:"Typescript API",items:[{type:"doc",id:"06-api-reference/classes/ClassificationModule",label:"ClassificationModule"},{type:"doc",id:"06-api-reference/classes/ExecutorchModule",label:"ExecutorchModule"},{type:"doc",id:"06-api-reference/classes/ImageEmbeddingsModule",label:"ImageEmbeddingsModule"},{type:"doc",id:"06-api-reference/classes/ImageSegmentationModule",label:"ImageSegmentationModule"},{type:"doc",id:"06-api-reference/classes/LLMModule",label:"LLMModule"},{type:"doc",id:"06-api-reference/classes/ObjectDetectionModule",label:"ObjectDetectionModule"},{type:"doc",id:"06-api-reference/classes/OCRModule",label:"OCRModule"},{type:"doc",id:"06-api-reference/classes/SpeechToTextModule",label:"SpeechToTextModule"},{type:"doc",id:"06-api-reference/classes/StyleTransferModule",label:"StyleTransferModule"},{type:"doc",id:"06-api-reference/classes/TextEmbeddingsModule",label:"TextEmbeddingsModule"},{type:"doc",id:"06-api-reference/classes/TextToImageModule",label:"TextToImageModule"},{type:"doc",id:"06-api-reference/classes/TextToSpeechModule",label:"TextToSpeechModule"},{type:"doc",id:"06-api-reference/classes/TokenizerModule",label:"TokenizerModule"},{type:"doc",id:"06-api-reference/classes/VADModule",label:"VADModule"},{type:"doc",id:"06-api-reference/classes/VerticalOCRModule",label:"VerticalOCRModule"}]},{type:"category",label:"Utilities - General",items:[{type:"category",label:"ResourceFetcherUtils",items:[{type:"category",label:"Functions",items:[{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/calculateDownloadProgress",label:"calculateDownloadProgress"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/getFilenameFromUri",label:"getFilenameFromUri"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/hashObject",label:"hashObject"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/removeFilePrefix",label:"removeFilePrefix"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/triggerHuggingFaceDownloadCounter",label:"triggerHuggingFaceDownloadCounter"}]}],link:{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/index"}},{type:"doc",id:"06-api-reference/classes/ResourceFetcher",label:"ResourceFetcher"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchConfig",label:"ExecutorchConfig"},{type:"doc",id:"06-api-reference/interfaces/ResourceFetcherAdapter",label:"ResourceFetcherAdapter"},{type:"doc",id:"06-api-reference/functions/cleanupExecutorch",label:"cleanupExecutorch"},{type:"doc",id:"06-api-reference/functions/initExecutorch",label:"initExecutorch"}]},{type:"category",label:"Utilities - LLM",items:[{type:"doc",id:"06-api-reference/variables/DEFAULT_CHAT_CONFIG",label:"DEFAULT_CHAT_CONFIG"},{type:"doc",id:"06-api-reference/variables/DEFAULT_CONTEXT_BUFFER_TOKENS",label:"DEFAULT_CONTEXT_BUFFER_TOKENS"},{type:"doc",id:"06-api-reference/variables/DEFAULT_MESSAGE_HISTORY",label:"DEFAULT_MESSAGE_HISTORY"},{type:"doc",id:"06-api-reference/variables/DEFAULT_SYSTEM_PROMPT",label:"DEFAULT_SYSTEM_PROMPT"},{type:"doc",id:"06-api-reference/variables/parseToolCall",label:"parseToolCall"},{type:"doc",id:"06-api-reference/functions/DEFAULT_STRUCTURED_OUTPUT_PROMPT",label:"DEFAULT_STRUCTURED_OUTPUT_PROMPT"},{type:"doc",id:"06-api-reference/functions/fixAndValidateStructuredOutput",label:"fixAndValidateStructuredOutput"},{type:"doc",id:"06-api-reference/functions/getStructuredOutputPrompt",label:"getStructuredOutputPrompt"}]},{type:"category",label:"Utils",items:[{type:"doc",id:"06-api-reference/classes/MessageCountContextStrategy",label:"MessageCountContextStrategy"},{type:"doc",id:"06-api-reference/classes/NoopContextStrategy",label:"NoopContextStrategy"},{type:"doc",id:"06-api-reference/classes/SlidingWindowContextStrategy",label:"SlidingWindowContextStrategy"}]}]};
 module.exports = typedocSidebar.items;
\ No newline at end of file

From 5ddad2fe1dd20f04eac4854d46c734e437203486 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Thu, 12 Feb 2026 14:24:02 +0100
Subject: [PATCH 23/37] feat: frame extractor for zero-copy approach

---
 .../common/rnexecutorch/utils/FrameExtractor.cpp                | 2 +-
 .../common/rnexecutorch/utils/FrameExtractor.h                  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
index baae35dc3..c62d1b21c 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp
@@ -111,4 +111,4 @@ cv::Mat extractFromNativeBuffer(uint64_t bufferPtr) {
 #endif
 }
 
-} // namespace rnexecutorch::utils
+} // namespace rnexecutorch::utils
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h
index f5d7c2094..dda4ff956 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h
@@ -22,4 +22,4 @@ namespace rnexecutorch::utils {
  */
 cv::Mat extractFromNativeBuffer(uint64_t bufferPtr);
 
-} // namespace rnexecutorch::utils
+} // namespace rnexecutorch::utils
\ No newline at end of file

From 41866637ac0e5be340ee12f7a0285091b45bb543 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 16 Feb 2026 10:37:11 +0100
Subject: [PATCH 24/37] feat: unify frame extraction and preprocessing

---
 .../common/rnexecutorch/models/VisionModel.cpp                  | 2 +-
 .../common/rnexecutorch/models/VisionModel.h                    | 2 +-
 .../rnexecutorch/models/classification/Classification.cpp       | 2 +-
 .../common/rnexecutorch/utils/FrameProcessor.cpp                | 2 +-
 .../common/rnexecutorch/utils/FrameProcessor.h                  | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
index b88310e12..c0ce049f2 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
@@ -50,4 +50,4 @@ cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {
   return image;
 }
 
-} // namespace rnexecutorch::models
+} // namespace rnexecutorch::models
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
index 82d544db3..e0ec03912 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
@@ -151,4 +151,4 @@ class VisionModel : public BaseModel {
 REGISTER_CONSTRUCTOR(models::VisionModel, std::string,
                      std::shared_ptr<react::CallInvoker>);
 
-} // namespace rnexecutorch
+} // namespace rnexecutorch
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
index 0fba07108..b9fad1b88 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
@@ -73,4 +73,4 @@ Classification::postprocess(const Tensor &tensor) {
   return probs;
 }
 
-} // namespace rnexecutorch::models::classification
+} // namespace rnexecutorch::models::classification
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
index 30238ad5c..1d03b97ba 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp
@@ -25,4 +25,4 @@ cv::Mat extractFrame(jsi::Runtime &runtime, const jsi::Object &frameData) {
 
   return extractFromNativeBuffer(bufferPtr);
 }
-} // namespace rnexecutorch::utils
+} // namespace rnexecutorch::utils
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h
index 403f4bde9..6bbb3390d 100644
--- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h
+++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h
@@ -24,4 +24,4 @@ using namespace facebook;
  */
 cv::Mat extractFrame(jsi::Runtime &runtime, const jsi::Object &frameData);
 
-} // namespace rnexecutorch::utils
+} // namespace rnexecutorch::utils
\ No newline at end of file

From 6a89b0899e07989e09a3542f271904d7babf4448 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Tue, 17 Feb 2026 13:05:14 +0100
Subject: [PATCH 25/37] feat: initial version of vision model API

---
 .../app/object_detection/index.tsx            | 167 +++++++++++++++++-
 .../host_objects/ModelHostObject.h            |   2 +-
 .../metaprogramming/TypeConcepts.h            |   9 +-
 .../models/embeddings/image/ImageEmbeddings.h |   2 +-
 .../BaseImageSegmentation.h                   |   2 +-
 .../models/style_transfer/StyleTransfer.h     |   2 +-
 .../computer_vision/ObjectDetectionModule.ts  | 165 +++++++++++++----
 7 files changed, 298 insertions(+), 51 deletions(-)

diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
index 6a43dd920..9e60589fb 100644
--- a/apps/computer-vision/app/object_detection/index.tsx
+++ b/apps/computer-vision/app/object_detection/index.tsx
@@ -1,16 +1,66 @@
 import Spinner from '../../components/Spinner';
-import { BottomBar } from '../../components/BottomBar';
 import { getImage } from '../../utils';
 import {
   Detection,
   useObjectDetection,
   SSDLITE_320_MOBILENET_V3_LARGE,
 } from 'react-native-executorch';
-import { View, StyleSheet, Image } from 'react-native';
+import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native';
 import ImageWithBboxes from '../../components/ImageWithBboxes';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
+import ColorPalette from '../../colors';
+import { Images } from 'react-native-nitro-image';
+
+// Helper function to convert image URI to raw pixel data using NitroImage
+async function imageUriToPixelData(
+  uri: string,
+  targetWidth: number,
+  targetHeight: number
+): Promise<{
+  data: ArrayBuffer;
+  width: number;
+  height: number;
+  channels: number;
+}> {
+  try {
+    // Load image and resize to target dimensions
+    const image = await Images.loadFromFileAsync(uri);
+    const resized = image.resize(targetWidth, targetHeight);
+
+    // Get pixel data as ArrayBuffer (RGBA format)
+    const pixelData = resized.toRawPixelData();
+    const buffer =
+      pixelData instanceof ArrayBuffer ? pixelData : pixelData.buffer;
+
+    // Calculate actual buffer dimensions (accounts for device pixel ratio)
+    const bufferSize = buffer?.byteLength || 0;
+    const totalPixels = bufferSize / 4; // RGBA = 4 bytes per pixel
+    const aspectRatio = targetWidth / targetHeight;
+    const actualHeight = Math.sqrt(totalPixels / aspectRatio);
+    const actualWidth = totalPixels / actualHeight;
+
+    console.log('Requested:', targetWidth, 'x', targetHeight);
+    console.log('Buffer size:', bufferSize);
+    console.log(
+      'Actual dimensions:',
+      Math.round(actualWidth),
+      'x',
+      Math.round(actualHeight)
+    );
+
+    return {
+      data: buffer,
+      width: Math.round(actualWidth),
+      height: Math.round(actualHeight),
+      channels: 4, // RGBA
+    };
+  } catch (error) {
+    console.error('Error loading image with NitroImage:', error);
+    throw error;
+  }
+}
 
 export default function ObjectDetectionScreen() {
   const [imageUri, setImageUri] = useState('');
@@ -42,10 +92,41 @@ export default function ObjectDetectionScreen() {
   const runForward = async () => {
     if (imageUri) {
       try {
-        const output = await ssdLite.forward(imageUri);
+        console.log('Running forward with string URI...');
+        const output = await ssdLite.forward(imageUri, 0.5);
+        console.log('String URI result:', output.length, 'detections');
         setResults(output);
       } catch (e) {
-        console.error(e);
+        console.error('Error in runForward:', e);
+      }
+    }
+  };
+
+  const runForwardPixels = async () => {
+    if (imageUri && imageDimensions) {
+      try {
+        console.log('Converting image to pixel data...');
+        // Resize to 640x640 to avoid memory issues
+        const intermediateSize = 640;
+        const pixelData = await imageUriToPixelData(
+          imageUri,
+          intermediateSize,
+          intermediateSize
+        );
+
+        console.log('Running forward with pixel data...', {
+          width: pixelData.width,
+          height: pixelData.height,
+          channels: pixelData.channels,
+          dataSize: pixelData.data.byteLength,
+        });
+
+        // Run inference using unified forward() API
+        const output = await ssdLite.forward(pixelData, 0.5);
+        console.log('Pixel data result:', output.length, 'detections');
+        setResults(output);
+      } catch (e) {
+        console.error('Error in runForwardPixels:', e);
       }
     }
   };
@@ -81,10 +162,41 @@ export default function ObjectDetectionScreen() {
           )}
         </View>
       </View>
-      <BottomBar
-        handleCameraPress={handleCameraPress}
-        runForward={runForward}
-      />
+
+      {/* Custom bottom bar with two buttons */}
+      <View style={styles.bottomContainer}>
+        <View style={styles.bottomIconsContainer}>
+          <TouchableOpacity onPress={() => handleCameraPress(false)}>
+            <Text style={styles.iconText}>📷 Gallery</Text>
+          </TouchableOpacity>
+        </View>
+
+        <View style={styles.buttonsRow}>
+          <TouchableOpacity
+            style={[
+              styles.button,
+              styles.halfButton,
+              !imageUri && styles.buttonDisabled,
+            ]}
+            onPress={runForward}
+            disabled={!imageUri}
+          >
+            <Text style={styles.buttonText}>Run (String)</Text>
+          </TouchableOpacity>
+
+          <TouchableOpacity
+            style={[
+              styles.button,
+              styles.halfButton,
+              !imageUri && styles.buttonDisabled,
+            ]}
+            onPress={runForwardPixels}
+            disabled={!imageUri}
+          >
+            <Text style={styles.buttonText}>Run (Pixels)</Text>
+          </TouchableOpacity>
+        </View>
+      </View>
     </ScreenWrapper>
   );
 }
@@ -129,4 +241,43 @@ const styles = StyleSheet.create({
     width: '100%',
     height: '100%',
   },
+  bottomContainer: {
+    width: '100%',
+    gap: 15,
+    alignItems: 'center',
+    padding: 16,
+    flex: 1,
+  },
+  bottomIconsContainer: {
+    flexDirection: 'row',
+    justifyContent: 'center',
+    width: '100%',
+  },
+  iconText: {
+    fontSize: 16,
+    color: ColorPalette.primary,
+  },
+  buttonsRow: {
+    flexDirection: 'row',
+    width: '100%',
+    gap: 10,
+  },
+  button: {
+    height: 50,
+    justifyContent: 'center',
+    alignItems: 'center',
+    backgroundColor: ColorPalette.primary,
+    color: '#fff',
+    borderRadius: 8,
+  },
+  halfButton: {
+    flex: 1,
+  },
+  buttonDisabled: {
+    opacity: 0.5,
+  },
+  buttonText: {
+    color: '#fff',
+    fontSize: 16,
+  },
 });
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
index d6489c9be..8b0384626 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -433,4 +433,4 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
   std::shared_ptr<react::CallInvoker> callInvoker;
 };
 
-} // namespace rnexecutorch
+} // namespace rnexecutorch
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
index 2d7612f25..fdf8c9dba 100644
--- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
+++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
@@ -12,8 +12,13 @@ template <typename T, typename Base>
 concept SameAs = std::is_same_v<Base, T>;
 
 template <typename T>
-concept HasGenerate = requires(T t) {
-  { &T::generate };
+concept HasGenerateFromString = requires(T t) {
+  { &T::generateFromString };
+};
+
+template <typename T>
+concept HasGenerateFromPixels = requires(T t) {
+  { &T::generateFromPixels };
 };
 
 template <typename T>
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
index 7e114e939..9a1d6429b 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
@@ -27,4 +27,4 @@ class ImageEmbeddings final : public BaseEmbeddings {
 
 REGISTER_CONSTRUCTOR(models::embeddings::ImageEmbeddings, std::string,
                      std::shared_ptr<react::CallInvoker>);
-} // namespace rnexecutorch
+} // namespace rnexecutorch
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
index f46f41d69..34ad8dffd 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
@@ -58,4 +58,4 @@ class BaseImageSegmentation : public BaseModel {
 REGISTER_CONSTRUCTOR(models::image_segmentation::BaseImageSegmentation,
                      std::string, std::vector<float>, std::vector<float>,
                      std::shared_ptr<react::CallInvoker>);
-} // namespace rnexecutorch
+} // namespace rnexecutorch
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
index 73744c4d8..8eed3c888 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
@@ -33,4 +33,4 @@ class StyleTransfer : public BaseModel {
 
 REGISTER_CONSTRUCTOR(models::style_transfer::StyleTransfer, std::string,
                      std::shared_ptr<react::CallInvoker>);
-} // namespace rnexecutorch
+} // namespace rnexecutorch
\ No newline at end of file
diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
index f056cff62..762d09987 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
@@ -1,52 +1,143 @@
-import { ResourceFetcher } from '../../utils/ResourceFetcher';
-import { ResourceSource, PixelData } from '../../types/common';
-import { Detection } from '../../types/objectDetection';
+import { BaseModule } from '../BaseModule';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
-import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
-import { Logger } from '../../common/Logger';
-import { VisionModule } from './VisionModule';
+import { RnExecutorchError } from '../../errors/errorUtils';
+import { Frame, PixelData, ScalarType } from '../../types/common';
 
 /**
- * Module for object detection tasks.
+ * Base class for computer vision models that support multiple input types.
+ *
+ * VisionModule extends BaseModule with:
+ * - Unified `forward()` API accepting string paths or raw pixel data
+ * - `runOnFrame` getter for real-time VisionCamera frame processing
+ * - Shared frame processor creation logic
+ *
+ * Subclasses should only implement model-specific loading logic.
  *
  * @category Typescript API
  */
-export class ObjectDetectionModule extends VisionModule<Detection[]> {
+function isPixelData(input: unknown): input is PixelData {
+  return (
+    typeof input === 'object' &&
+    input !== null &&
+    'dataPtr' in input &&
+    input.dataPtr instanceof Uint8Array &&
+    'sizes' in input &&
+    Array.isArray(input.sizes) &&
+    input.sizes.length === 3 &&
+    'scalarType' in input &&
+    input.scalarType === ScalarType.BYTE
+  );
+}
+
+export abstract class VisionModule<TOutput> extends BaseModule {
   /**
-   * Loads the model, where `modelSource` is a string that specifies the location of the model binary.
-   * To track the download progress, supply a callback function `onDownloadProgressCallback`.
+   * Synchronous worklet function for real-time VisionCamera frame processing.
+   *
+   * Only available after the model is loaded. Returns null if not loaded.
+   *
+   * **Use this for VisionCamera frame processing in worklets.**
+   * For async processing, use `forward()` instead.
+   *
+   * @example
+   * ```typescript
+   * const model = new ClassificationModule();
+   * await model.load({ modelSource: MODEL });
    *
-   * @param model - Object containing `modelSource`.
-   * @param onDownloadProgressCallback - Optional callback to monitor download progress.
+   * // Use the functional form of setState to store the worklet — passing it
+   * // directly would cause React to invoke it immediately as an updater fn.
+   * const [runOnFrame, setRunOnFrame] = useState(null);
+   * setRunOnFrame(() => model.runOnFrame);
+   *
+   * const frameOutput = useFrameOutput({
+   *   onFrame(frame) {
+   *     'worklet';
+   *     if (!runOnFrame) return;
+   *     const result = runOnFrame(frame);
+   *     frame.dispose();
+   *   }
+   * });
+   * ```
    */
-  async load(
-    model: { modelSource: ResourceSource },
-    onDownloadProgressCallback: (progress: number) => void = () => {}
-  ): Promise<void> {
-    try {
-      const paths = await ResourceFetcher.fetch(
-        onDownloadProgressCallback,
-        model.modelSource
-      );
+  get runOnFrame(): ((frame: Frame, ...args: any[]) => TOutput) | null {
+    if (!this.nativeModule?.generateFromFrame) {
+      return null;
+    }
 
-      if (!paths?.[0]) {
-        throw new RnExecutorchError(
-          RnExecutorchErrorCode.DownloadInterrupted,
-          'The download has been interrupted. As a result, not every file was downloaded. Please retry the download.'
-        );
-      }
+    // Extract pure JSI function reference (runs on JS thread)
+    const nativeGenerateFromFrame = this.nativeModule.generateFromFrame;
 
-      this.nativeModule = global.loadObjectDetection(paths[0]);
-    } catch (error) {
-      Logger.error('Load failed:', error);
-      throw parseUnknownError(error);
-    }
+    // Return worklet that captures ONLY the JSI function
+    return (frame: any, ...args: any[]): TOutput => {
+      'worklet';
+
+      let nativeBuffer: any = null;
+      try {
+        nativeBuffer = frame.getNativeBuffer();
+        const frameData = {
+          nativeBuffer: nativeBuffer.pointer,
+        };
+        return nativeGenerateFromFrame(frameData, ...args);
+      } finally {
+        if (nativeBuffer?.release) {
+          nativeBuffer.release();
+        }
+      }
+    };
   }
 
-  async forward(
-    input: string | PixelData,
-    detectionThreshold: number = 0.5
-  ): Promise<Detection[]> {
-    return super.forward(input, detectionThreshold);
+  /**
+   * Executes the model's forward pass with automatic input type detection.
+   *
+   * Supports two input types:
+   * 1. **String path/URI**: File path, URL, or Base64-encoded string
+   * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
+   *
+   * **Note**: For VisionCamera frame processing, use `runOnFrame` instead.
+   * This method is async and cannot be called in worklet context.
+   *
+   * @param input - Image source (string path or PixelData object)
+   * @param args - Additional model-specific arguments
+   * @returns A Promise that resolves to the model output.
+   *
+   * @example
+   * ```typescript
+   * // String path (async)
+   * const result1 = await model.forward('file:///path/to/image.jpg');
+   *
+   * // Pixel data (async)
+   * const result2 = await model.forward({
+   *   dataPtr: new Uint8Array(pixelBuffer),
+   *   sizes: [480, 640, 3],
+   *   scalarType: ScalarType.BYTE
+   * });
+   *
+   * // For VisionCamera frames, use runOnFrame in worklet:
+   * const frameOutput = useFrameOutput({
+   *   onFrame(frame) {
+   *     'worklet';
+   *     if (!model.runOnFrame) return;
+   *     const result = model.runOnFrame(frame);
+   *   }
+   * });
+   * ```
+   */
+  async forward(input: string | PixelData, ...args: any[]): Promise<TOutput> {
+    if (this.nativeModule == null)
+      throw new RnExecutorchError(
+        RnExecutorchErrorCode.ModuleNotLoaded,
+        'The model is currently not loaded. Please load the model before calling forward().'
+      );
+
+    // Type detection and routing
+    if (typeof input === 'string') {
+      return await this.nativeModule.generateFromString(input, ...args);
+    } else if (isPixelData(input)) {
+      return await this.nativeModule.generateFromPixels(input, ...args);
+    } else {
+      throw new RnExecutorchError(
+        RnExecutorchErrorCode.InvalidArgument,
+        'Invalid input: expected string path or PixelData object. For VisionCamera frames, use runOnFrame instead.'
+      );
+    }
   }
 }

From fafb2cce1c26beb6740a90929f0290752c1d5d4b Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Tue, 17 Feb 2026 17:51:10 +0100
Subject: [PATCH 26/37] refactor: errors, logs, unnecessary comments, use
 existing TensorPtr

---
 .../app/object_detection/index.tsx            | 61 ++++++++++---------
 .../host_objects/JsiConversions.h             | 19 ++++++
 2 files changed, 52 insertions(+), 28 deletions(-)

diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
index 9e60589fb..54c0eb18f 100644
--- a/apps/computer-vision/app/object_detection/index.tsx
+++ b/apps/computer-vision/app/object_detection/index.tsx
@@ -13,7 +13,26 @@ import ScreenWrapper from '../../ScreenWrapper';
 import ColorPalette from '../../colors';
 import { Images } from 'react-native-nitro-image';
 
-// Helper function to convert image URI to raw pixel data using NitroImage
+// Helper function to convert BGRA to RGB
+function convertBGRAtoRGB(
+  buffer: ArrayBuffer,
+  width: number,
+  height: number
+): ArrayBuffer {
+  const source = new Uint8Array(buffer);
+  const rgb = new Uint8Array(width * height * 3);
+
+  for (let i = 0; i < width * height; i++) {
+    // BGRA format: [B, G, R, A] → RGB: [R, G, B]
+    rgb[i * 3 + 0] = source[i * 4 + 2]; // R
+    rgb[i * 3 + 1] = source[i * 4 + 1]; // G
+    rgb[i * 3 + 2] = source[i * 4 + 0]; // B
+  }
+
+  return rgb.buffer;
+}
+
+// Helper function to convert image URI to raw RGB pixel data
 async function imageUriToPixelData(
   uri: string,
   targetWidth: number,
@@ -29,32 +48,19 @@ async function imageUriToPixelData(
     const image = await Images.loadFromFileAsync(uri);
     const resized = image.resize(targetWidth, targetHeight);
 
-    // Get pixel data as ArrayBuffer (RGBA format)
-    const pixelData = resized.toRawPixelData();
+    // Get pixel data as ArrayBuffer (BGRA format from NitroImage)
+    const rawPixelData = resized.toRawPixelData();
     const buffer =
-      pixelData instanceof ArrayBuffer ? pixelData : pixelData.buffer;
-
-    // Calculate actual buffer dimensions (accounts for device pixel ratio)
-    const bufferSize = buffer?.byteLength || 0;
-    const totalPixels = bufferSize / 4; // RGBA = 4 bytes per pixel
-    const aspectRatio = targetWidth / targetHeight;
-    const actualHeight = Math.sqrt(totalPixels / aspectRatio);
-    const actualWidth = totalPixels / actualHeight;
+      rawPixelData instanceof ArrayBuffer ? rawPixelData : rawPixelData.buffer;
 
-    console.log('Requested:', targetWidth, 'x', targetHeight);
-    console.log('Buffer size:', bufferSize);
-    console.log(
-      'Actual dimensions:',
-      Math.round(actualWidth),
-      'x',
-      Math.round(actualHeight)
-    );
+    // Convert BGRA to RGB as required by the native API
+    const rgbBuffer = convertBGRAtoRGB(buffer, targetWidth, targetHeight);
 
     return {
-      data: buffer,
-      width: Math.round(actualWidth),
-      height: Math.round(actualHeight),
-      channels: 4, // RGBA
+      data: rgbBuffer,
+      width: targetWidth,
+      height: targetHeight,
+      channels: 3, // RGB
     };
   } catch (error) {
     console.error('Error loading image with NitroImage:', error);
@@ -106,12 +112,11 @@ export default function ObjectDetectionScreen() {
     if (imageUri && imageDimensions) {
       try {
         console.log('Converting image to pixel data...');
-        // Resize to 640x640 to avoid memory issues
-        const intermediateSize = 640;
+        // Use original dimensions - let the model resize internally
         const pixelData = await imageUriToPixelData(
           imageUri,
-          intermediateSize,
-          intermediateSize
+          imageDimensions.width,
+          imageDimensions.height
         );
 
         console.log('Running forward with pixel data...', {
@@ -122,7 +127,7 @@ export default function ObjectDetectionScreen() {
         });
 
         // Run inference using unified forward() API
-        const output = await ssdLite.forward(pixelData, 0.5);
+        const output = await ssdLite.forward(pixelData, 0.3);
         console.log('Pixel data result:', output.length, 'detections');
         setResults(output);
       } catch (e) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
index 7b97108b9..f4bfe09a6 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -369,6 +369,25 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) {
   return {runtime, bigInt};
 }
 
+inline jsi::Value getJsiValue(const std::vector<uint64_t> &vec,
+                              jsi::Runtime &runtime) {
+  jsi::Array array(runtime, vec.size());
+  for (size_t i = 0; i < vec.size(); i++) {
+    // JS numbers are doubles. Large uint64s > 2^53 will lose precision.
+    array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
+  }
+  return {runtime, array};
+}
+
+inline jsi::Value getJsiValue(const std::vector<int64_t> &vec,
+                              jsi::Runtime &runtime) {
+  jsi::Array array(runtime, vec.size());
+  for (size_t i = 0; i < vec.size(); i++) {
+    array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
+  }
+  return {runtime, array};
+}
+
 inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) {
   return {runtime, val};
 }

From 081d6ac3e60dec970988d699a1d0bc3b8484266d Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Wed, 18 Feb 2026 13:03:22 +0100
Subject: [PATCH 27/37] refactor: add or remove empty lines

---
 .../rnexecutorch/models/classification/Classification.cpp       | 2 +-
 .../rnexecutorch/models/embeddings/image/ImageEmbeddings.h      | 2 +-
 .../models/image_segmentation/BaseImageSegmentation.h           | 2 +-
 .../common/rnexecutorch/models/style_transfer/StyleTransfer.h   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
index b9fad1b88..0fba07108 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
@@ -73,4 +73,4 @@ Classification::postprocess(const Tensor &tensor) {
   return probs;
 }
 
-} // namespace rnexecutorch::models::classification
\ No newline at end of file
+} // namespace rnexecutorch::models::classification
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
index 9a1d6429b..7e114e939 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
@@ -27,4 +27,4 @@ class ImageEmbeddings final : public BaseEmbeddings {
 
 REGISTER_CONSTRUCTOR(models::embeddings::ImageEmbeddings, std::string,
                      std::shared_ptr<react::CallInvoker>);
-} // namespace rnexecutorch
\ No newline at end of file
+} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
index 34ad8dffd..f46f41d69 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
@@ -58,4 +58,4 @@ class BaseImageSegmentation : public BaseModel {
 REGISTER_CONSTRUCTOR(models::image_segmentation::BaseImageSegmentation,
                      std::string, std::vector<float>, std::vector<float>,
                      std::shared_ptr<react::CallInvoker>);
-} // namespace rnexecutorch
\ No newline at end of file
+} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
index 8eed3c888..73744c4d8 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
@@ -33,4 +33,4 @@ class StyleTransfer : public BaseModel {
 
 REGISTER_CONSTRUCTOR(models::style_transfer::StyleTransfer, std::string,
                      std::shared_ptr<react::CallInvoker>);
-} // namespace rnexecutorch
\ No newline at end of file
+} // namespace rnexecutorch

From 6192a4a7b62a96ce59254ad8c602f0c5c354a06f Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Thu, 19 Feb 2026 22:34:20 +0100
Subject: [PATCH 28/37] fix: errors after rebase

---
 .../common/rnexecutorch/host_objects/JsiConversions.h  | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
index f4bfe09a6..586d924d3 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -369,16 +369,6 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) {
   return {runtime, bigInt};
 }
 
-inline jsi::Value getJsiValue(const std::vector<uint64_t> &vec,
-                              jsi::Runtime &runtime) {
-  jsi::Array array(runtime, vec.size());
-  for (size_t i = 0; i < vec.size(); i++) {
-    // JS numbers are doubles. Large uint64s > 2^53 will lose precision.
-    array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
-  }
-  return {runtime, array};
-}
-
 inline jsi::Value getJsiValue(const std::vector<int64_t> &vec,
                               jsi::Runtime &runtime) {
   jsi::Array array(runtime, vec.size());

From f9108652c17666ae9732de7807d8f715c901ef8b Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Tue, 24 Feb 2026 09:02:14 +0100
Subject: [PATCH 29/37] feat: suggested changes / improve comments

---
 .../common/rnexecutorch/host_objects/JsiConversions.h    | 9 ---------
 .../common/rnexecutorch/metaprogramming/TypeConcepts.h   | 5 +++++
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
index 586d924d3..7b97108b9 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -369,15 +369,6 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) {
   return {runtime, bigInt};
 }
 
-inline jsi::Value getJsiValue(const std::vector<int64_t> &vec,
-                              jsi::Runtime &runtime) {
-  jsi::Array array(runtime, vec.size());
-  for (size_t i = 0; i < vec.size(); i++) {
-    array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
-  }
-  return {runtime, array};
-}
-
 inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) {
   return {runtime, val};
 }
diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
index fdf8c9dba..216e2bae3 100644
--- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
+++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
@@ -11,6 +11,11 @@ concept DerivedFromOrSameAs = std::is_base_of_v<Base, T>;
 template <typename T, typename Base>
 concept SameAs = std::is_same_v<Base, T>;
 
+template <typename T>
+concept HasGenerate = requires(T t) {
+  { &T::generate };
+};
+
 template <typename T>
 concept HasGenerateFromString = requires(T t) {
   { &T::generateFromString };

From 0a8493b1deb4c4ef642fe0de8e03ac635642173e Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Wed, 25 Feb 2026 13:40:50 +0100
Subject: [PATCH 30/37] feat: make all cv models compatible with Vision Camera

---
 apps/computer-vision/app/_layout.tsx          |  40 +
 .../app/classification_live/index.tsx         | 255 ++++++
 .../app/image_segmentation_live/index.tsx     | 292 +++++++
 .../app/object_detection_live/index.tsx       | 101 ++-
 apps/computer-vision/app/ocr_live/index.tsx   | 329 ++++++++
 .../app/style_transfer/index.tsx              |  73 +-
 .../app/style_transfer_live/index.tsx         | 274 ++++++
 .../app/vision_camera_live/index.tsx          | 798 ++++++++++++++++++
 .../host_objects/JsiConversions.h             |  51 ++
 .../rnexecutorch/models/VisionModel.cpp       |  11 +-
 .../common/rnexecutorch/models/VisionModel.h  |  14 +
 .../models/classification/Classification.cpp  |  66 +-
 .../models/classification/Classification.h    |  21 +-
 .../embeddings/image/ImageEmbeddings.cpp      |  72 +-
 .../models/embeddings/image/ImageEmbeddings.h |  22 +-
 .../BaseImageSegmentation.cpp                 | 143 ++--
 .../BaseImageSegmentation.h                   |  44 +-
 .../models/image_segmentation/Types.h         |  17 +
 .../object_detection/ObjectDetection.cpp      |   5 +-
 .../common/rnexecutorch/models/ocr/OCR.cpp    |  66 +-
 .../common/rnexecutorch/models/ocr/OCR.h      |  11 +-
 .../models/style_transfer/StyleTransfer.cpp   |  94 ++-
 .../models/style_transfer/StyleTransfer.h     |  27 +-
 .../models/style_transfer/Types.h             |  14 +
 .../models/vertical_ocr/VerticalOCR.cpp       |  70 +-
 .../models/vertical_ocr/VerticalOCR.h         |  11 +-
 .../tests/integration/ClassificationTest.cpp  |  16 +-
 .../tests/integration/ImageEmbeddingsTest.cpp |  16 +-
 .../tests/integration/OCRTest.cpp             |  16 +-
 .../tests/integration/StyleTransferTest.cpp   |  43 +-
 .../tests/integration/VerticalOCRTest.cpp     |  41 +-
 .../src/controllers/BaseOCRController.ts      |  57 +-
 .../computer_vision/useImageSegmentation.ts   |  16 +
 .../src/hooks/computer_vision/useOCR.ts       |  14 +-
 .../hooks/computer_vision/useVerticalOCR.ts   |  14 +-
 .../src/hooks/useModule.ts                    |   2 +
 .../computer_vision/ClassificationModule.ts   |  25 +-
 .../computer_vision/ImageEmbeddingsModule.ts  |  22 +-
 .../ImageSegmentationModule.ts                | 134 ++-
 .../computer_vision/StyleTransferModule.ts    |  21 +-
 .../src/types/classification.ts               |  45 +-
 .../src/types/imageEmbeddings.ts              |  27 +-
 .../src/types/imageSegmentation.ts            |  35 +-
 .../react-native-executorch/src/types/ocr.ts  |  32 +-
 .../src/types/styleTransfer.ts                |  29 +-
 45 files changed, 3231 insertions(+), 295 deletions(-)
 create mode 100644 apps/computer-vision/app/classification_live/index.tsx
 create mode 100644 apps/computer-vision/app/image_segmentation_live/index.tsx
 create mode 100644 apps/computer-vision/app/ocr_live/index.tsx
 create mode 100644 apps/computer-vision/app/style_transfer_live/index.tsx
 create mode 100644 apps/computer-vision/app/vision_camera_live/index.tsx
 create mode 100644 packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/Types.h
 create mode 100644 packages/react-native-executorch/common/rnexecutorch/models/style_transfer/Types.h

diff --git a/apps/computer-vision/app/_layout.tsx b/apps/computer-vision/app/_layout.tsx
index 3970ac316..b614b54bf 100644
--- a/apps/computer-vision/app/_layout.tsx
+++ b/apps/computer-vision/app/_layout.tsx
@@ -91,6 +91,46 @@ export default function _layout() {
             headerTitleStyle: { color: ColorPalette.primary },
           }}
         />
+        <Drawer.Screen
+          name="classification_live/index"
+          options={{
+            drawerLabel: 'Classification (Live)',
+            title: 'Classification (Live)',
+            headerTitleStyle: { color: ColorPalette.primary },
+          }}
+        />
+        <Drawer.Screen
+          name="image_segmentation_live/index"
+          options={{
+            drawerLabel: 'Image Segmentation (Live)',
+            title: 'Image Segmentation (Live)',
+            headerTitleStyle: { color: ColorPalette.primary },
+          }}
+        />
+        <Drawer.Screen
+          name="style_transfer_live/index"
+          options={{
+            drawerLabel: 'Style Transfer (Live)',
+            title: 'Style Transfer (Live)',
+            headerTitleStyle: { color: ColorPalette.primary },
+          }}
+        />
+        <Drawer.Screen
+          name="ocr_live/index"
+          options={{
+            drawerLabel: 'OCR (Live)',
+            title: 'OCR (Live)',
+            headerTitleStyle: { color: ColorPalette.primary },
+          }}
+        />
+        <Drawer.Screen
+          name="vision_camera_live/index"
+          options={{
+            drawerLabel: 'Vision Camera (Live)',
+            title: 'Vision Camera',
+            headerTitleStyle: { color: ColorPalette.primary },
+          }}
+        />
         <Drawer.Screen
           name="ocr/index"
           options={{
diff --git a/apps/computer-vision/app/classification_live/index.tsx b/apps/computer-vision/app/classification_live/index.tsx
new file mode 100644
index 000000000..c3b4c5dea
--- /dev/null
+++ b/apps/computer-vision/app/classification_live/index.tsx
@@ -0,0 +1,255 @@
+import React, {
+  useCallback,
+  useContext,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+} from 'react';
+import {
+  StatusBar,
+  StyleSheet,
+  Text,
+  TouchableOpacity,
+  View,
+} from 'react-native';
+import { useSafeAreaInsets } from 'react-native-safe-area-context';
+
+import {
+  Camera,
+  getCameraFormat,
+  Templates,
+  useCameraDevices,
+  useCameraPermission,
+  useFrameOutput,
+} from 'react-native-vision-camera';
+import { scheduleOnRN } from 'react-native-worklets';
+import { EFFICIENTNET_V2_S, useClassification } from 'react-native-executorch';
+import { GeneratingContext } from '../../context';
+import Spinner from '../../components/Spinner';
+import ColorPalette from '../../colors';
+
+export default function ClassificationLiveScreen() {
+  const insets = useSafeAreaInsets();
+
+  const { isReady, isGenerating, downloadProgress, runOnFrame } =
+    useClassification({ model: EFFICIENTNET_V2_S });
+  const { setGlobalGenerating } = useContext(GeneratingContext);
+
+  useEffect(() => {
+    setGlobalGenerating(isGenerating);
+  }, [isGenerating, setGlobalGenerating]);
+
+  const [topLabel, setTopLabel] = useState('');
+  const [topScore, setTopScore] = useState(0);
+  const [fps, setFps] = useState(0);
+  const lastFrameTimeRef = useRef(Date.now());
+
+  const cameraPermission = useCameraPermission();
+  const devices = useCameraDevices();
+  const device = devices.find((d) => d.position === 'back') ?? devices[0];
+
+  const format = useMemo(() => {
+    if (device == null) return undefined;
+    try {
+      return getCameraFormat(device, Templates.FrameProcessing);
+    } catch {
+      return undefined;
+    }
+  }, [device]);
+
+  const updateStats = useCallback(
+    (result: { label: string; score: number }) => {
+      setTopLabel(result.label);
+      setTopScore(result.score);
+      const now = Date.now();
+      const timeDiff = now - lastFrameTimeRef.current;
+      if (timeDiff > 0) {
+        setFps(Math.round(1000 / timeDiff));
+      }
+      lastFrameTimeRef.current = now;
+    },
+    []
+  );
+
+  const frameOutput = useFrameOutput({
+    pixelFormat: 'rgb',
+    onFrame(frame) {
+      'worklet';
+      if (!runOnFrame) {
+        frame.dispose();
+        return;
+      }
+      try {
+        const result = runOnFrame(frame);
+        if (result) {
+          // find the top-1 entry
+          let bestLabel = '';
+          let bestScore = -1;
+          const entries = Object.entries(result);
+          for (let i = 0; i < entries.length; i++) {
+            const [label, score] = entries[i];
+            if ((score as number) > bestScore) {
+              bestScore = score as number;
+              bestLabel = label;
+            }
+          }
+          scheduleOnRN(updateStats, { label: bestLabel, score: bestScore });
+        }
+      } catch {
+        // ignore frame errors
+      } finally {
+        frame.dispose();
+      }
+    },
+  });
+
+  if (!isReady) {
+    return (
+      <Spinner
+        visible={!isReady}
+        textContent={`Loading the model ${(downloadProgress * 100).toFixed(0)} %`}
+      />
+    );
+  }
+
+  if (!cameraPermission.hasPermission) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>Camera access needed</Text>
+        <TouchableOpacity
+          onPress={() => cameraPermission.requestPermission()}
+          style={styles.button}
+        >
+          <Text style={styles.buttonText}>Grant Permission</Text>
+        </TouchableOpacity>
+      </View>
+    );
+  }
+
+  if (device == null) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>No camera device found</Text>
+      </View>
+    );
+  }
+
+  return (
+    <View style={styles.container}>
+      <StatusBar barStyle="light-content" translucent />
+
+      <Camera
+        style={StyleSheet.absoluteFill}
+        device={device}
+        outputs={[frameOutput]}
+        isActive={true}
+        format={format}
+      />
+
+      <View
+        style={[styles.bottomBarWrapper, { paddingBottom: insets.bottom + 12 }]}
+        pointerEvents="none"
+      >
+        <View style={styles.bottomBar}>
+          <View style={styles.labelContainer}>
+            <Text style={styles.labelText} numberOfLines={1}>
+              {topLabel || '—'}
+            </Text>
+            <Text style={styles.scoreText}>
+              {topLabel ? (topScore * 100).toFixed(1) + '%' : ''}
+            </Text>
+          </View>
+          <View style={styles.statDivider} />
+          <View style={styles.statItem}>
+            <Text style={styles.statValue}>{fps}</Text>
+            <Text style={styles.statLabel}>fps</Text>
+          </View>
+        </View>
+      </View>
+    </View>
+  );
+}
+
+const styles = StyleSheet.create({
+  container: {
+    flex: 1,
+    backgroundColor: 'black',
+  },
+  centered: {
+    flex: 1,
+    backgroundColor: 'black',
+    justifyContent: 'center',
+    alignItems: 'center',
+    gap: 16,
+  },
+  message: {
+    color: 'white',
+    fontSize: 18,
+  },
+  button: {
+    paddingHorizontal: 24,
+    paddingVertical: 12,
+    backgroundColor: ColorPalette.primary,
+    borderRadius: 24,
+  },
+  buttonText: {
+    color: 'white',
+    fontSize: 15,
+    fontWeight: '600',
+    letterSpacing: 0.3,
+  },
+  bottomBarWrapper: {
+    position: 'absolute',
+    bottom: 0,
+    left: 0,
+    right: 0,
+    alignItems: 'center',
+    paddingHorizontal: 16,
+  },
+  bottomBar: {
+    flexDirection: 'row',
+    alignItems: 'center',
+    backgroundColor: 'rgba(0, 0, 0, 0.55)',
+    borderRadius: 24,
+    paddingHorizontal: 28,
+    paddingVertical: 10,
+    gap: 24,
+    maxWidth: '100%',
+  },
+  labelContainer: {
+    flex: 1,
+    alignItems: 'flex-start',
+  },
+  labelText: {
+    color: 'white',
+    fontSize: 16,
+    fontWeight: '700',
+  },
+  scoreText: {
+    color: 'rgba(255,255,255,0.7)',
+    fontSize: 13,
+    fontWeight: '500',
+  },
+  statItem: {
+    alignItems: 'center',
+  },
+  statValue: {
+    color: 'white',
+    fontSize: 22,
+    fontWeight: '700',
+    letterSpacing: -0.5,
+  },
+  statLabel: {
+    color: 'rgba(255,255,255,0.55)',
+    fontSize: 11,
+    fontWeight: '500',
+    textTransform: 'uppercase',
+    letterSpacing: 0.8,
+  },
+  statDivider: {
+    width: 1,
+    height: 32,
+    backgroundColor: 'rgba(255,255,255,0.2)',
+  },
+});
diff --git a/apps/computer-vision/app/image_segmentation_live/index.tsx b/apps/computer-vision/app/image_segmentation_live/index.tsx
new file mode 100644
index 000000000..f665c63c5
--- /dev/null
+++ b/apps/computer-vision/app/image_segmentation_live/index.tsx
@@ -0,0 +1,292 @@
+import React, {
+  useCallback,
+  useContext,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+} from 'react';
+import {
+  StatusBar,
+  StyleSheet,
+  Text,
+  TouchableOpacity,
+  useWindowDimensions,
+  View,
+} from 'react-native';
+import { useSafeAreaInsets } from 'react-native-safe-area-context';
+
+import {
+  Camera,
+  getCameraFormat,
+  Templates,
+  useCameraDevices,
+  useCameraPermission,
+  useFrameOutput,
+} from 'react-native-vision-camera';
+import { scheduleOnRN } from 'react-native-worklets';
+import {
+  DEEPLAB_V3_RESNET50,
+  useImageSegmentation,
+} from 'react-native-executorch';
+import {
+  Canvas,
+  Image as SkiaImage,
+  Skia,
+  AlphaType,
+  ColorType,
+  SkImage,
+} from '@shopify/react-native-skia';
+import { GeneratingContext } from '../../context';
+import Spinner from '../../components/Spinner';
+import ColorPalette from '../../colors';
+
+// RGBA colors for each DeepLab V3 class (alpha = 180 for semi-transparency)
+const CLASS_COLORS: number[][] = [
+  [0, 0, 0, 0], // 0 background — transparent
+  [51, 255, 87, 180], // 1 aeroplane
+  [51, 87, 255, 180], // 2 bicycle
+  [255, 51, 246, 180], // 3 bird
+  [51, 255, 246, 180], // 4 boat
+  [243, 255, 51, 180], // 5 bottle
+  [141, 51, 255, 180], // 6 bus
+  [255, 131, 51, 180], // 7 car
+  [51, 255, 131, 180], // 8 cat
+  [131, 51, 255, 180], // 9 chair
+  [255, 255, 51, 180], // 10 cow
+  [51, 255, 255, 180], // 11 diningtable
+  [255, 51, 143, 180], // 12 dog
+  [127, 51, 255, 180], // 13 horse
+  [51, 255, 175, 180], // 14 motorbike
+  [255, 175, 51, 180], // 15 person
+  [179, 255, 51, 180], // 16 pottedplant
+  [255, 87, 51, 180], // 17 sheep
+  [255, 51, 162, 180], // 18 sofa
+  [51, 162, 255, 180], // 19 train
+  [162, 51, 255, 180], // 20 tvmonitor
+];
+
+export default function ImageSegmentationLiveScreen() {
+  const insets = useSafeAreaInsets();
+  const { width: screenWidth, height: screenHeight } = useWindowDimensions();
+
+  const { isReady, isGenerating, downloadProgress, runOnFrame } =
+    useImageSegmentation({ model: DEEPLAB_V3_RESNET50 });
+  const { setGlobalGenerating } = useContext(GeneratingContext);
+
+  useEffect(() => {
+    setGlobalGenerating(isGenerating);
+  }, [isGenerating, setGlobalGenerating]);
+
+  const [maskImage, setMaskImage] = useState<SkImage | null>(null);
+  const [fps, setFps] = useState(0);
+  const lastFrameTimeRef = useRef(Date.now());
+
+  const cameraPermission = useCameraPermission();
+  const devices = useCameraDevices();
+  const device = devices.find((d) => d.position === 'back') ?? devices[0];
+
+  const format = useMemo(() => {
+    if (device == null) return undefined;
+    try {
+      return getCameraFormat(device, Templates.FrameProcessing);
+    } catch {
+      return undefined;
+    }
+  }, [device]);
+
+  const updateMask = useCallback((img: SkImage) => {
+    setMaskImage(img);
+    const now = Date.now();
+    const timeDiff = now - lastFrameTimeRef.current;
+    if (timeDiff > 0) {
+      setFps(Math.round(1000 / timeDiff));
+    }
+    lastFrameTimeRef.current = now;
+  }, []);
+
+  const frameOutput = useFrameOutput({
+    pixelFormat: 'rgb',
+    dropFramesWhileBusy: true,
+    onFrame(frame) {
+      'worklet';
+      if (!runOnFrame) {
+        frame.dispose();
+        return;
+      }
+      try {
+        const result = runOnFrame(frame, [], false);
+        if (result?.ARGMAX) {
+          const argmax: Int32Array = result.ARGMAX;
+          // Model output is always square (modelImageSize × modelImageSize).
+          // Derive width/height from argmax length (sqrt for square output).
+          const side = Math.round(Math.sqrt(argmax.length));
+          const width = side;
+          const height = side;
+
+          // Build RGBA pixel buffer on the worklet thread to avoid transferring
+          // the large Int32Array across the worklet→RN boundary via scheduleOnRN.
+          const pixels = new Uint8Array(width * height * 4);
+          for (let i = 0; i < argmax.length; i++) {
+            const color = CLASS_COLORS[argmax[i]] ?? [0, 0, 0, 0];
+            pixels[i * 4] = color[0]!;
+            pixels[i * 4 + 1] = color[1]!;
+            pixels[i * 4 + 2] = color[2]!;
+            pixels[i * 4 + 3] = color[3]!;
+          }
+
+          const skData = Skia.Data.fromBytes(pixels);
+          const img = Skia.Image.MakeImage(
+            {
+              width,
+              height,
+              alphaType: AlphaType.Unpremul,
+              colorType: ColorType.RGBA_8888,
+            },
+            skData,
+            width * 4
+          );
+          if (img) {
+            scheduleOnRN(updateMask, img);
+          }
+        }
+      } catch (e) {
+        console.log('frame error:', String(e));
+      } finally {
+        frame.dispose();
+      }
+    },
+  });
+
+  if (!isReady) {
+    return (
+      <Spinner
+        visible={!isReady}
+        textContent={`Loading the model ${(downloadProgress * 100).toFixed(0)} %`}
+      />
+    );
+  }
+
+  if (!cameraPermission.hasPermission) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>Camera access needed</Text>
+        <TouchableOpacity
+          onPress={() => cameraPermission.requestPermission()}
+          style={styles.button}
+        >
+          <Text style={styles.buttonText}>Grant Permission</Text>
+        </TouchableOpacity>
+      </View>
+    );
+  }
+
+  if (device == null) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>No camera device found</Text>
+      </View>
+    );
+  }
+
+  return (
+    <View style={styles.container}>
+      <StatusBar barStyle="light-content" translucent />
+
+      <Camera
+        style={StyleSheet.absoluteFill}
+        device={device}
+        outputs={[frameOutput]}
+        isActive={true}
+        format={format}
+      />
+
+      {maskImage && (
+        <Canvas style={StyleSheet.absoluteFill}>
+          <SkiaImage
+            image={maskImage}
+            fit="cover"
+            x={0}
+            y={0}
+            width={screenWidth}
+            height={screenHeight}
+          />
+        </Canvas>
+      )}
+
+      <View
+        style={[styles.bottomBarWrapper, { paddingBottom: insets.bottom + 12 }]}
+        pointerEvents="none"
+      >
+        <View style={styles.bottomBar}>
+          <View style={styles.statItem}>
+            <Text style={styles.statValue}>{fps}</Text>
+            <Text style={styles.statLabel}>fps</Text>
+          </View>
+        </View>
+      </View>
+    </View>
+  );
+}
+
+const styles = StyleSheet.create({
+  container: {
+    flex: 1,
+    backgroundColor: 'black',
+  },
+  centered: {
+    flex: 1,
+    backgroundColor: 'black',
+    justifyContent: 'center',
+    alignItems: 'center',
+    gap: 16,
+  },
+  message: {
+    color: 'white',
+    fontSize: 18,
+  },
+  button: {
+    paddingHorizontal: 24,
+    paddingVertical: 12,
+    backgroundColor: ColorPalette.primary,
+    borderRadius: 24,
+  },
+  buttonText: {
+    color: 'white',
+    fontSize: 15,
+    fontWeight: '600',
+    letterSpacing: 0.3,
+  },
+  bottomBarWrapper: {
+    position: 'absolute',
+    bottom: 0,
+    left: 0,
+    right: 0,
+    alignItems: 'center',
+  },
+  bottomBar: {
+    flexDirection: 'row',
+    alignItems: 'center',
+    backgroundColor: 'rgba(0, 0, 0, 0.55)',
+    borderRadius: 24,
+    paddingHorizontal: 28,
+    paddingVertical: 10,
+    gap: 24,
+  },
+  statItem: {
+    alignItems: 'center',
+  },
+  statValue: {
+    color: 'white',
+    fontSize: 22,
+    fontWeight: '700',
+    letterSpacing: -0.5,
+  },
+  statLabel: {
+    color: 'rgba(255,255,255,0.55)',
+    fontSize: 11,
+    fontWeight: '500',
+    textTransform: 'uppercase',
+    letterSpacing: 0.8,
+  },
+});
diff --git a/apps/computer-vision/app/object_detection_live/index.tsx b/apps/computer-vision/app/object_detection_live/index.tsx
index cd1e9cca8..d883fe8b9 100644
--- a/apps/computer-vision/app/object_detection_live/index.tsx
+++ b/apps/computer-vision/app/object_detection_live/index.tsx
@@ -35,6 +35,7 @@ import ColorPalette from '../../colors';
 
 export default function ObjectDetectionLiveScreen() {
   const insets = useSafeAreaInsets();
+  const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 });
 
   const model = useObjectDetection({ model: SSDLITE_320_MOBILENET_V3_LARGE });
   const { setGlobalGenerating } = useContext(GeneratingContext);
@@ -43,7 +44,8 @@ export default function ObjectDetectionLiveScreen() {
     setGlobalGenerating(model.isGenerating);
   }, [model.isGenerating, setGlobalGenerating]);
 
-  const [detectionCount, setDetectionCount] = useState(0);
+  const [detections, setDetections] = useState<Detection[]>([]);
+  const [imageSize, setImageSize] = useState({ width: 1, height: 1 });
   const [fps, setFps] = useState(0);
   const lastFrameTimeRef = useRef(Date.now());
 
@@ -60,15 +62,23 @@ export default function ObjectDetectionLiveScreen() {
     }
   }, [device]);
 
-  const updateStats = useCallback((results: Detection[]) => {
-    setDetectionCount(results.length);
-    const now = Date.now();
-    const timeDiff = now - lastFrameTimeRef.current;
-    if (timeDiff > 0) {
-      setFps(Math.round(1000 / timeDiff));
-    }
-    lastFrameTimeRef.current = now;
-  }, []);
+  const updateDetections = useCallback(
+    (payload: {
+      results: Detection[];
+      imageWidth: number;
+      imageHeight: number;
+    }) => {
+      setDetections(payload.results);
+      setImageSize({ width: payload.imageWidth, height: payload.imageHeight });
+      const now = Date.now();
+      const timeDiff = now - lastFrameTimeRef.current;
+      if (timeDiff > 0) {
+        setFps(Math.round(1000 / timeDiff));
+      }
+      lastFrameTimeRef.current = now;
+    },
+    []
+  );
 
   const frameOutput = useFrameOutput({
     pixelFormat: 'rgb',
@@ -79,10 +89,19 @@ export default function ObjectDetectionLiveScreen() {
         frame.dispose();
         return;
       }
+      // After 90° CW rotation, the image fed to the model has swapped dims.
+      const imageWidth =
+        frame.width > frame.height ? frame.height : frame.width;
+      const imageHeight =
+        frame.width > frame.height ? frame.width : frame.height;
       try {
         const result = model.runOnFrame(frame, 0.5);
         if (result) {
-          scheduleOnRN(updateStats, result);
+          scheduleOnRN(updateDetections, {
+            results: result,
+            imageWidth,
+            imageHeight,
+          });
         }
       } catch {
         // ignore frame errors
@@ -135,13 +154,51 @@ export default function ObjectDetectionLiveScreen() {
         format={format}
       />
 
+      {/* Bounding box overlay — measured to match the exact camera preview area */}
+      <View
+        style={StyleSheet.absoluteFill}
+        pointerEvents="none"
+        onLayout={(e) =>
+          setCanvasSize({
+            width: e.nativeEvent.layout.width,
+            height: e.nativeEvent.layout.height,
+          })
+        }
+      >
+        {(() => {
+          // Cover-fit: camera preview scales to fill the canvas, cropping the
+          // excess. Compute the same transform so bbox pixel coords map correctly.
+          const scale = Math.max(
+            canvasSize.width / imageSize.width,
+            canvasSize.height / imageSize.height
+          );
+          const offsetX = (canvasSize.width - imageSize.width * scale) / 2;
+          const offsetY = (canvasSize.height - imageSize.height * scale) / 2;
+          return detections.map((det, i) => {
+            const left = det.bbox.x1 * scale + offsetX;
+            const top = det.bbox.y1 * scale + offsetY;
+            const width = (det.bbox.x2 - det.bbox.x1) * scale;
+            const height = (det.bbox.y2 - det.bbox.y1) * scale;
+            return (
+              <View key={i} style={[styles.bbox, { left, top, width, height }]}>
+                <View style={styles.bboxLabel}>
+                  <Text style={styles.bboxLabelText}>
+                    {det.label} {(det.score * 100).toFixed(0)}%
+                  </Text>
+                </View>
+              </View>
+            );
+          });
+        })()}
+      </View>
+
       <View
         style={[styles.bottomBarWrapper, { paddingBottom: insets.bottom + 12 }]}
         pointerEvents="none"
       >
         <View style={styles.bottomBar}>
           <View style={styles.statItem}>
-            <Text style={styles.statValue}>{detectionCount}</Text>
+            <Text style={styles.statValue}>{detections.length}</Text>
             <Text style={styles.statLabel}>objects</Text>
           </View>
           <View style={styles.statDivider} />
@@ -183,6 +240,26 @@ const styles = StyleSheet.create({
     fontWeight: '600',
     letterSpacing: 0.3,
   },
+  bbox: {
+    position: 'absolute',
+    borderWidth: 2,
+    borderColor: ColorPalette.primary,
+    borderRadius: 4,
+  },
+  bboxLabel: {
+    position: 'absolute',
+    top: -22,
+    left: -2,
+    backgroundColor: ColorPalette.primary,
+    paddingHorizontal: 6,
+    paddingVertical: 2,
+    borderRadius: 4,
+  },
+  bboxLabelText: {
+    color: 'white',
+    fontSize: 11,
+    fontWeight: '600',
+  },
   bottomBarWrapper: {
     position: 'absolute',
     bottom: 0,
diff --git a/apps/computer-vision/app/ocr_live/index.tsx b/apps/computer-vision/app/ocr_live/index.tsx
new file mode 100644
index 000000000..a0c93899f
--- /dev/null
+++ b/apps/computer-vision/app/ocr_live/index.tsx
@@ -0,0 +1,329 @@
+import React, {
+  useCallback,
+  useContext,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+} from 'react';
+import {
+  StatusBar,
+  StyleSheet,
+  Text,
+  TouchableOpacity,
+  View,
+} from 'react-native';
+import { useSafeAreaInsets } from 'react-native-safe-area-context';
+
+import {
+  Camera,
+  getCameraFormat,
+  Templates,
+  useCameraDevices,
+  useCameraPermission,
+  useFrameOutput,
+} from 'react-native-vision-camera';
+import { scheduleOnRN } from 'react-native-worklets';
+import { OCR_ENGLISH, useOCR, OCRDetection } from 'react-native-executorch';
+import {
+  Canvas,
+  Path,
+  Skia,
+  Text as SkiaText,
+  matchFont,
+} from '@shopify/react-native-skia';
+import { GeneratingContext } from '../../context';
+import Spinner from '../../components/Spinner';
+import ColorPalette from '../../colors';
+
+interface FrameDetections {
+  detections: OCRDetection[];
+  frameWidth: number;
+  frameHeight: number;
+}
+
+export default function OCRLiveScreen() {
+  const insets = useSafeAreaInsets();
+  const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 });
+
+  const { isReady, isGenerating, downloadProgress, runOnFrame } = useOCR({
+    model: OCR_ENGLISH,
+  });
+  const { setGlobalGenerating } = useContext(GeneratingContext);
+
+  useEffect(() => {
+    setGlobalGenerating(isGenerating);
+  }, [isGenerating, setGlobalGenerating]);
+
+  const [frameDetections, setFrameDetections] = useState<FrameDetections>({
+    detections: [],
+    frameWidth: 1,
+    frameHeight: 1,
+  });
+  const [fps, setFps] = useState(0);
+  const lastFrameTimeRef = useRef(Date.now());
+
+  const font = matchFont({ fontFamily: 'Helvetica', fontSize: 11 });
+
+  const cameraPermission = useCameraPermission();
+  const devices = useCameraDevices();
+  const device = devices.find((d) => d.position === 'back') ?? devices[0];
+
+  const format = useMemo(() => {
+    if (device == null) return undefined;
+    try {
+      return getCameraFormat(device, Templates.FrameProcessing);
+    } catch {
+      return undefined;
+    }
+  }, [device]);
+
+  const updateDetections = useCallback((result: FrameDetections) => {
+    setFrameDetections(result);
+    const now = Date.now();
+    const timeDiff = now - lastFrameTimeRef.current;
+    if (timeDiff > 0) {
+      setFps(Math.round(1000 / timeDiff));
+    }
+    lastFrameTimeRef.current = now;
+  }, []);
+
+  const frameOutput = useFrameOutput({
+    dropFramesWhileBusy: true,
+    pixelFormat: 'rgb',
+    onFrame(frame) {
+      'worklet';
+      if (!runOnFrame) {
+        frame.dispose();
+        return;
+      }
+      const frameWidth = frame.width;
+      const frameHeight = frame.height;
+      try {
+        const result = runOnFrame(frame);
+        if (result) {
+          scheduleOnRN(updateDetections, {
+            detections: result,
+            frameWidth,
+            frameHeight,
+          });
+        }
+      } catch {
+        // ignore frame errors
+      } finally {
+        frame.dispose();
+      }
+    },
+  });
+
+  if (!isReady) {
+    return (
+      <Spinner
+        visible={!isReady}
+        textContent={`Loading the model ${(downloadProgress * 100).toFixed(0)} %`}
+      />
+    );
+  }
+
+  if (!cameraPermission.hasPermission) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>Camera access needed</Text>
+        <TouchableOpacity
+          onPress={() => cameraPermission.requestPermission()}
+          style={styles.button}
+        >
+          <Text style={styles.buttonText}>Grant Permission</Text>
+        </TouchableOpacity>
+      </View>
+    );
+  }
+
+  if (device == null) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>No camera device found</Text>
+      </View>
+    );
+  }
+
+  const { detections, frameWidth, frameHeight } = frameDetections;
+
+  // OCR runs on the raw landscape frame (no rotation applied in native).
+  // The camera preview displays it as portrait (90° CW rotation applied by iOS).
+  // After rotation the image dimensions become (frameHeight × frameWidth).
+  // Cover-fit scale uses post-rotation dims to match what the preview shows.
+  const isLandscape = frameWidth > frameHeight;
+  const imageW = isLandscape ? frameHeight : frameWidth;
+  const imageH = isLandscape ? frameWidth : frameHeight;
+  const scale = Math.max(canvasSize.width / imageW, canvasSize.height / imageH);
+  const offsetX = (canvasSize.width - imageW * scale) / 2;
+  const offsetY = (canvasSize.height - imageH * scale) / 2;
+
+  // Map a raw landscape point to screen coords accounting for rotation + cover-fit.
+  function toScreenX(px: number, py: number) {
+    // After 90° CW: rotated_x = frameHeight - py, rotated_y = px
+    const rx = isLandscape ? frameHeight - py : px;
+    return rx * scale + offsetX;
+  }
+  function toScreenY(px: number, py: number) {
+    const ry = isLandscape ? px : py;
+    return ry * scale + offsetY;
+  }
+
+  return (
+    <View style={styles.container}>
+      <StatusBar barStyle="light-content" translucent />
+
+      <Camera
+        style={StyleSheet.absoluteFill}
+        device={device}
+        outputs={[frameOutput]}
+        isActive={true}
+        format={format}
+      />
+
+      {/* Measure the overlay area, then draw polygons inside a Canvas */}
+      <View
+        style={StyleSheet.absoluteFill}
+        pointerEvents="none"
+        onLayout={(e) =>
+          setCanvasSize({
+            width: e.nativeEvent.layout.width,
+            height: e.nativeEvent.layout.height,
+          })
+        }
+      >
+        <Canvas style={StyleSheet.absoluteFill}>
+          {detections.map((det, i) => {
+            if (!det.bbox || det.bbox.length < 2) return null;
+
+            const path = Skia.Path.Make();
+            path.moveTo(
+              toScreenX(det.bbox[0]!.x, det.bbox[0]!.y),
+              toScreenY(det.bbox[0]!.x, det.bbox[0]!.y)
+            );
+            for (let j = 1; j < det.bbox.length; j++) {
+              path.lineTo(
+                toScreenX(det.bbox[j]!.x, det.bbox[j]!.y),
+                toScreenY(det.bbox[j]!.x, det.bbox[j]!.y)
+              );
+            }
+            path.close();
+
+            const labelX = toScreenX(det.bbox[0]!.x, det.bbox[0]!.y);
+            const labelY = Math.max(
+              0,
+              toScreenY(det.bbox[0]!.x, det.bbox[0]!.y) - 4
+            );
+
+            return (
+              <React.Fragment key={i}>
+                <Path path={path} color="transparent" style="fill" />
+                <Path
+                  path={path}
+                  color={ColorPalette.primary}
+                  style="stroke"
+                  strokeWidth={2}
+                />
+                {font && (
+                  <SkiaText
+                    x={labelX}
+                    y={labelY}
+                    text={`${det.text} ${(det.score * 100).toFixed(0)}%`}
+                    font={font}
+                    color={ColorPalette.primary}
+                  />
+                )}
+              </React.Fragment>
+            );
+          })}
+        </Canvas>
+      </View>
+
+      <View
+        style={[styles.bottomBarWrapper, { paddingBottom: insets.bottom + 12 }]}
+        pointerEvents="none"
+      >
+        <View style={styles.bottomBar}>
+          <View style={styles.statItem}>
+            <Text style={styles.statValue}>{detections.length}</Text>
+            <Text style={styles.statLabel}>regions</Text>
+          </View>
+          <View style={styles.statDivider} />
+          <View style={styles.statItem}>
+            <Text style={styles.statValue}>{fps}</Text>
+            <Text style={styles.statLabel}>fps</Text>
+          </View>
+        </View>
+      </View>
+    </View>
+  );
+}
+
+const styles = StyleSheet.create({
+  container: {
+    flex: 1,
+    backgroundColor: 'black',
+  },
+  centered: {
+    flex: 1,
+    backgroundColor: 'black',
+    justifyContent: 'center',
+    alignItems: 'center',
+    gap: 16,
+  },
+  message: {
+    color: 'white',
+    fontSize: 18,
+  },
+  button: {
+    paddingHorizontal: 24,
+    paddingVertical: 12,
+    backgroundColor: ColorPalette.primary,
+    borderRadius: 24,
+  },
+  buttonText: {
+    color: 'white',
+    fontSize: 15,
+    fontWeight: '600',
+    letterSpacing: 0.3,
+  },
+  bottomBarWrapper: {
+    position: 'absolute',
+    bottom: 0,
+    left: 0,
+    right: 0,
+    alignItems: 'center',
+  },
+  bottomBar: {
+    flexDirection: 'row',
+    alignItems: 'center',
+    backgroundColor: 'rgba(0, 0, 0, 0.55)',
+    borderRadius: 24,
+    paddingHorizontal: 28,
+    paddingVertical: 10,
+    gap: 24,
+  },
+  statItem: {
+    alignItems: 'center',
+  },
+  statValue: {
+    color: 'white',
+    fontSize: 22,
+    fontWeight: '700',
+    letterSpacing: -0.5,
+  },
+  statLabel: {
+    color: 'rgba(255,255,255,0.55)',
+    fontSize: 11,
+    fontWeight: '500',
+    textTransform: 'uppercase',
+    letterSpacing: 0.8,
+  },
+  statDivider: {
+    width: 1,
+    height: 32,
+    backgroundColor: 'rgba(255,255,255,0.2)',
+  },
+});
diff --git a/apps/computer-vision/app/style_transfer/index.tsx b/apps/computer-vision/app/style_transfer/index.tsx
index a1b3a7834..466900a6f 100644
--- a/apps/computer-vision/app/style_transfer/index.tsx
+++ b/apps/computer-vision/app/style_transfer/index.tsx
@@ -5,6 +5,14 @@ import {
   useStyleTransfer,
   STYLE_TRANSFER_CANDY,
 } from 'react-native-executorch';
+import {
+  Canvas,
+  Image as SkiaImage,
+  Skia,
+  AlphaType,
+  ColorType,
+  SkImage,
+} from '@shopify/react-native-skia';
 import { View, StyleSheet, Image } from 'react-native';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
@@ -16,12 +24,16 @@ export default function StyleTransferScreen() {
   useEffect(() => {
     setGlobalGenerating(model.isGenerating);
   }, [model.isGenerating, setGlobalGenerating]);
+
   const [imageUri, setImageUri] = useState('');
+  const [styledImage, setStyledImage] = useState<SkImage | null>(null);
+
   const handleCameraPress = async (isCamera: boolean) => {
     const image = await getImage(isCamera);
     const uri = image?.uri;
     if (typeof uri === 'string') {
-      setImageUri(uri as string);
+      setImageUri(uri);
+      setStyledImage(null);
     }
   };
 
@@ -29,7 +41,29 @@ export default function StyleTransferScreen() {
     if (imageUri) {
       try {
         const output = await model.forward(imageUri);
-        setImageUri(output);
+        const height = output.sizes[0];
+        const width = output.sizes[1];
+        // Convert RGB -> RGBA for Skia
+        const rgba = new Uint8Array(width * height * 4);
+        const rgb = output.dataPtr;
+        for (let i = 0; i < width * height; i++) {
+          rgba[i * 4] = rgb[i * 3];
+          rgba[i * 4 + 1] = rgb[i * 3 + 1];
+          rgba[i * 4 + 2] = rgb[i * 3 + 2];
+          rgba[i * 4 + 3] = 255;
+        }
+        const skData = Skia.Data.fromBytes(rgba);
+        const img = Skia.Image.MakeImage(
+          {
+            width,
+            height,
+            alphaType: AlphaType.Opaque,
+            colorType: ColorType.RGBA_8888,
+          },
+          skData,
+          width * 4
+        );
+        setStyledImage(img);
       } catch (e) {
         console.error(e);
       }
@@ -48,15 +82,28 @@ export default function StyleTransferScreen() {
   return (
     <ScreenWrapper>
       <View style={styles.imageContainer}>
-        <Image
-          style={styles.image}
-          resizeMode="contain"
-          source={
-            imageUri
-              ? { uri: imageUri }
-              : require('../../assets/icons/executorch_logo.png')
-          }
-        />
+        {styledImage ? (
+          <Canvas style={styles.canvas}>
+            <SkiaImage
+              image={styledImage}
+              fit="contain"
+              x={0}
+              y={0}
+              width={styledImage.width()}
+              height={styledImage.height()}
+            />
+          </Canvas>
+        ) : (
+          <Image
+            style={styles.image}
+            resizeMode="contain"
+            source={
+              imageUri
+                ? { uri: imageUri }
+                : require('../../assets/icons/executorch_logo.png')
+            }
+          />
+        )}
       </View>
       <BottomBar
         handleCameraPress={handleCameraPress}
@@ -77,4 +124,8 @@ const styles = StyleSheet.create({
     borderRadius: 8,
     width: '100%',
   },
+  canvas: {
+    flex: 1,
+    width: '100%',
+  },
 });
diff --git a/apps/computer-vision/app/style_transfer_live/index.tsx b/apps/computer-vision/app/style_transfer_live/index.tsx
new file mode 100644
index 000000000..57889313f
--- /dev/null
+++ b/apps/computer-vision/app/style_transfer_live/index.tsx
@@ -0,0 +1,274 @@
+import React, {
+  useCallback,
+  useContext,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+} from 'react';
+import {
+  StatusBar,
+  StyleSheet,
+  Text,
+  TouchableOpacity,
+  useWindowDimensions,
+  View,
+} from 'react-native';
+import { useSafeAreaInsets } from 'react-native-safe-area-context';
+
+import {
+  Camera,
+  getCameraFormat,
+  Templates,
+  useCameraDevices,
+  useCameraPermission,
+  useFrameOutput,
+} from 'react-native-vision-camera';
+import { scheduleOnRN } from 'react-native-worklets';
+import {
+  STYLE_TRANSFER_RAIN_PRINCESS,
+  useStyleTransfer,
+} from 'react-native-executorch';
+import {
+  Canvas,
+  Image as SkiaImage,
+  Skia,
+  AlphaType,
+  ColorType,
+  SkImage,
+} from '@shopify/react-native-skia';
+import { GeneratingContext } from '../../context';
+import Spinner from '../../components/Spinner';
+import ColorPalette from '../../colors';
+
+export default function StyleTransferLiveScreen() {
+  const insets = useSafeAreaInsets();
+  const { width: screenWidth, height: screenHeight } = useWindowDimensions();
+
+  const { isReady, isGenerating, downloadProgress, runOnFrame } =
+    useStyleTransfer({ model: STYLE_TRANSFER_RAIN_PRINCESS });
+  const { setGlobalGenerating } = useContext(GeneratingContext);
+
+  useEffect(() => {
+    setGlobalGenerating(isGenerating);
+  }, [isGenerating, setGlobalGenerating]);
+
+  const [styledImage, setStyledImage] = useState<SkImage | null>(null);
+  const [fps, setFps] = useState(0);
+  const lastFrameTimeRef = useRef(Date.now());
+
+  const cameraPermission = useCameraPermission();
+  const devices = useCameraDevices();
+  const device = devices.find((d) => d.position === 'back') ?? devices[0];
+
+  const format = useMemo(() => {
+    if (device == null) return undefined;
+    try {
+      return getCameraFormat(device, Templates.FrameProcessing);
+    } catch {
+      return undefined;
+    }
+  }, [device]);
+
+  const updateImage = useCallback((img: SkImage) => {
+    setStyledImage((prev) => {
+      prev?.dispose();
+      return img;
+    });
+    const now = Date.now();
+    const timeDiff = now - lastFrameTimeRef.current;
+    if (timeDiff > 0) {
+      setFps(Math.round(1000 / timeDiff));
+    }
+    lastFrameTimeRef.current = now;
+  }, []);
+
+  const frameOutput = useFrameOutput({
+    pixelFormat: 'rgb',
+    dropFramesWhileBusy: true,
+    onFrame(frame) {
+      'worklet';
+      if (!runOnFrame) {
+        frame.dispose();
+        return;
+      }
+      try {
+        const result = runOnFrame(frame);
+        if (result?.dataPtr) {
+          const { dataPtr, sizes } = result;
+          const height = sizes[0];
+          const width = sizes[1];
+          // Build Skia image on the worklet thread — avoids transferring the
+          // large pixel buffer across the worklet→RN boundary via scheduleOnRN.
+          const skData = Skia.Data.fromBytes(dataPtr);
+          const img = Skia.Image.MakeImage(
+            {
+              width,
+              height,
+              alphaType: AlphaType.Opaque,
+              colorType: ColorType.RGBA_8888,
+            },
+            skData,
+            width * 4
+          );
+          if (img) {
+            scheduleOnRN(updateImage, img);
+          }
+        }
+      } catch (e) {
+        console.log('frame error:', String(e));
+      } finally {
+        frame.dispose();
+      }
+    },
+  });
+
+  if (!isReady) {
+    return (
+      <Spinner
+        visible={!isReady}
+        textContent={`Loading the model ${(downloadProgress * 100).toFixed(0)} %`}
+      />
+    );
+  }
+
+  if (!cameraPermission.hasPermission) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>Camera access needed</Text>
+        <TouchableOpacity
+          onPress={() => cameraPermission.requestPermission()}
+          style={styles.button}
+        >
+          <Text style={styles.buttonText}>Grant Permission</Text>
+        </TouchableOpacity>
+      </View>
+    );
+  }
+
+  if (device == null) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>No camera device found</Text>
+      </View>
+    );
+  }
+
+  return (
+    <View style={styles.container}>
+      <StatusBar barStyle="light-content" translucent />
+
+      {/* Camera always runs to keep frame processing active */}
+      <Camera
+        style={StyleSheet.absoluteFill}
+        device={device}
+        outputs={[frameOutput]}
+        isActive={true}
+        format={format}
+      />
+
+      {/* Styled output overlays the camera feed once available */}
+      {styledImage && (
+        <Canvas style={StyleSheet.absoluteFill}>
+          <SkiaImage
+            image={styledImage}
+            fit="cover"
+            x={0}
+            y={0}
+            width={screenWidth}
+            height={screenHeight}
+          />
+        </Canvas>
+      )}
+
+      <View
+        style={[styles.bottomBarWrapper, { paddingBottom: insets.bottom + 12 }]}
+        pointerEvents="none"
+      >
+        <View style={styles.bottomBar}>
+          <View style={styles.statItem}>
+            <Text style={styles.statValue}>{fps}</Text>
+            <Text style={styles.statLabel}>fps</Text>
+          </View>
+          <View style={styles.statDivider} />
+          <View style={styles.statItem}>
+            <Text style={styles.styleLabel}>candy</Text>
+            <Text style={styles.statLabel}>style</Text>
+          </View>
+        </View>
+      </View>
+    </View>
+  );
+}
+
+const styles = StyleSheet.create({
+  container: {
+    flex: 1,
+    backgroundColor: 'black',
+  },
+  centered: {
+    flex: 1,
+    backgroundColor: 'black',
+    justifyContent: 'center',
+    alignItems: 'center',
+    gap: 16,
+  },
+  message: {
+    color: 'white',
+    fontSize: 18,
+  },
+  button: {
+    paddingHorizontal: 24,
+    paddingVertical: 12,
+    backgroundColor: ColorPalette.primary,
+    borderRadius: 24,
+  },
+  buttonText: {
+    color: 'white',
+    fontSize: 15,
+    fontWeight: '600',
+    letterSpacing: 0.3,
+  },
+  bottomBarWrapper: {
+    position: 'absolute',
+    bottom: 0,
+    left: 0,
+    right: 0,
+    alignItems: 'center',
+  },
+  bottomBar: {
+    flexDirection: 'row',
+    alignItems: 'center',
+    backgroundColor: 'rgba(0, 0, 0, 0.55)',
+    borderRadius: 24,
+    paddingHorizontal: 28,
+    paddingVertical: 10,
+    gap: 24,
+  },
+  statItem: {
+    alignItems: 'center',
+  },
+  statValue: {
+    color: 'white',
+    fontSize: 22,
+    fontWeight: '700',
+    letterSpacing: -0.5,
+  },
+  styleLabel: {
+    color: 'white',
+    fontSize: 16,
+    fontWeight: '700',
+  },
+  statLabel: {
+    color: 'rgba(255,255,255,0.55)',
+    fontSize: 11,
+    fontWeight: '500',
+    textTransform: 'uppercase',
+    letterSpacing: 0.8,
+  },
+  statDivider: {
+    width: 1,
+    height: 32,
+    backgroundColor: 'rgba(255,255,255,0.2)',
+  },
+});
diff --git a/apps/computer-vision/app/vision_camera_live/index.tsx b/apps/computer-vision/app/vision_camera_live/index.tsx
new file mode 100644
index 000000000..4c7b425b1
--- /dev/null
+++ b/apps/computer-vision/app/vision_camera_live/index.tsx
@@ -0,0 +1,798 @@
+import React, {
+  useCallback,
+  useContext,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+} from 'react';
+import {
+  ScrollView,
+  StatusBar,
+  StyleSheet,
+  Text,
+  TouchableOpacity,
+  View,
+} from 'react-native';
+import { useSafeAreaInsets } from 'react-native-safe-area-context';
+import {
+  Camera,
+  Frame,
+  getCameraFormat,
+  Templates,
+  useCameraDevices,
+  useCameraPermission,
+  useFrameOutput,
+} from 'react-native-vision-camera';
+import { createSynchronizable, runOnJS } from 'react-native-worklets';
+import {
+  DEEPLAB_V3_RESNET50,
+  Detection,
+  EFFICIENTNET_V2_S,
+  OCRDetection,
+  OCR_ENGLISH,
+  SSDLITE_320_MOBILENET_V3_LARGE,
+  STYLE_TRANSFER_RAIN_PRINCESS,
+  useClassification,
+  useImageSegmentation,
+  useObjectDetection,
+  useOCR,
+  useStyleTransfer,
+} from 'react-native-executorch';
+import {
+  AlphaType,
+  Canvas,
+  ColorType,
+  Image as SkiaImage,
+  matchFont,
+  Path,
+  Skia,
+  SkImage,
+  Text as SkiaText,
+} from '@shopify/react-native-skia';
+import { GeneratingContext } from '../../context';
+import Spinner from '../../components/Spinner';
+import ColorPalette from '../../colors';
+
+// ─── Model IDs ───────────────────────────────────────────────────────────────
+
+type ModelId =
+  | 'classification'
+  | 'object_detection'
+  | 'segmentation'
+  | 'style_transfer'
+  | 'ocr';
+
+const MODELS: { id: ModelId; label: string }[] = [
+  { id: 'classification', label: 'Classification' },
+  { id: 'object_detection', label: 'Object Detection' },
+  { id: 'segmentation', label: 'Segmentation' },
+  { id: 'style_transfer', label: 'Style Transfer' },
+  { id: 'ocr', label: 'OCR' },
+];
+
+// ─── Segmentation colors ─────────────────────────────────────────────────────
+
+const CLASS_COLORS: number[][] = [
+  [0, 0, 0, 0],
+  [51, 255, 87, 180],
+  [51, 87, 255, 180],
+  [255, 51, 246, 180],
+  [51, 255, 246, 180],
+  [243, 255, 51, 180],
+  [141, 51, 255, 180],
+  [255, 131, 51, 180],
+  [51, 255, 131, 180],
+  [131, 51, 255, 180],
+  [255, 255, 51, 180],
+  [51, 255, 255, 180],
+  [255, 51, 143, 180],
+  [127, 51, 255, 180],
+  [51, 255, 175, 180],
+  [255, 175, 51, 180],
+  [179, 255, 51, 180],
+  [255, 87, 51, 180],
+  [255, 51, 162, 180],
+  [51, 162, 255, 180],
+  [162, 51, 255, 180],
+];
+
+// ─── Kill switch — synchronizable boolean shared between JS and worklet thread.
+// setBlocking(true) immediately stops the worklet from dispatching new work
+// (both in onFrame and inside the async callback) before the old model tears down.
+const frameKillSwitch = createSynchronizable(false);
+
+// ─── Screen ──────────────────────────────────────────────────────────────────
+
+export default function VisionCameraLiveScreen() {
+  const insets = useSafeAreaInsets();
+  const [activeModel, setActiveModel] = useState<ModelId>('classification');
+  const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 });
+  const { setGlobalGenerating } = useContext(GeneratingContext);
+
+  // ── Models (only the active model loads; others are prevented) ──
+  const classification = useClassification({
+    model: EFFICIENTNET_V2_S,
+    preventLoad: activeModel !== 'classification',
+  });
+  const objectDetection = useObjectDetection({
+    model: SSDLITE_320_MOBILENET_V3_LARGE,
+    preventLoad: activeModel !== 'object_detection',
+  });
+  const segmentation = useImageSegmentation({
+    model: DEEPLAB_V3_RESNET50,
+    preventLoad: activeModel !== 'segmentation',
+  });
+  const styleTransfer = useStyleTransfer({
+    model: STYLE_TRANSFER_RAIN_PRINCESS,
+    preventLoad: activeModel !== 'style_transfer',
+  });
+  const ocr = useOCR({
+    model: OCR_ENGLISH,
+    preventLoad: activeModel !== 'ocr',
+  });
+
+  const activeIsGenerating = {
+    classification: classification.isGenerating,
+    object_detection: objectDetection.isGenerating,
+    segmentation: segmentation.isGenerating,
+    style_transfer: styleTransfer.isGenerating,
+    ocr: ocr.isGenerating,
+  }[activeModel];
+
+  useEffect(() => {
+    setGlobalGenerating(activeIsGenerating);
+  }, [activeIsGenerating, setGlobalGenerating]);
+
+  // ── Camera ──
+  const [fps, setFps] = useState(0);
+  const lastFrameTimeRef = useRef(Date.now());
+  const cameraPermission = useCameraPermission();
+  const devices = useCameraDevices();
+  const device = devices.find((d) => d.position === 'back') ?? devices[0];
+  const format = useMemo(() => {
+    if (device == null) return undefined;
+    try {
+      return getCameraFormat(device, Templates.FrameProcessing);
+    } catch {
+      return undefined;
+    }
+  }, [device]);
+
+  // ── Per-model result state ──
+  const [classResult, setClassResult] = useState({ label: '', score: 0 });
+  const [detections, setDetections] = useState<Detection[]>([]);
+  const [imageSize, setImageSize] = useState({ width: 1, height: 1 });
+  const [maskImage, setMaskImage] = useState<SkImage | null>(null);
+  const [styledImage, setStyledImage] = useState<SkImage | null>(null);
+  const [ocrData, setOcrData] = useState<{
+    detections: OCRDetection[];
+    frameWidth: number;
+    frameHeight: number;
+  }>({ detections: [], frameWidth: 1, frameHeight: 1 });
+
+  // ── Stable callbacks ──
+  function tick() {
+    const now = Date.now();
+    const diff = now - lastFrameTimeRef.current;
+    if (diff > 0) setFps(Math.round(1000 / diff));
+    lastFrameTimeRef.current = now;
+  }
+
+  const updateClass = useCallback((r: { label: string; score: number }) => {
+    setClassResult(r);
+    tick();
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+
+  const updateDetections = useCallback(
+    (p: { results: Detection[]; imageWidth: number; imageHeight: number }) => {
+      setDetections(p.results);
+      setImageSize({ width: p.imageWidth, height: p.imageHeight });
+      tick();
+    },
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+    []
+  );
+
+  const updateMask = useCallback((img: SkImage) => {
+    setMaskImage((prev) => {
+      prev?.dispose();
+      return img;
+    });
+    tick();
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+
+  const updateStyled = useCallback((img: SkImage) => {
+    setStyledImage((prev) => {
+      prev?.dispose();
+      return img;
+    });
+    tick();
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+
+  const updateOcr = useCallback(
+    (d: {
+      detections: OCRDetection[];
+      frameWidth: number;
+      frameHeight: number;
+    }) => {
+      setOcrData(d);
+      tick();
+    },
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+    []
+  );
+
+  // ── runOnJS-wrapped callbacks — created on the RN thread so the Babel plugin
+  // can serialize them into remote functions. These can then be safely called
+  // from any worklet runtime, including the asyncRunner's worker runtime.
+  const notifyClass = runOnJS(updateClass);
+  const notifyDetections = runOnJS(updateDetections);
+  const notifyMask = runOnJS(updateMask);
+  const notifyStyled = runOnJS(updateStyled);
+  const notifyOcr = runOnJS(updateOcr);
+
+  // ── Pull the active model's runOnFrame out of the hook each render.
+  // These are worklet functions (not plain JS objects), so they CAN be
+  // captured directly in a useCallback closure — the worklets runtime
+  // serializes them correctly. A new closure is produced whenever the
+  // active runOnFrame changes, causing useFrameOutput to re-register.
+  const classRof = classification.runOnFrame;
+  const detRof = objectDetection.runOnFrame;
+  const segRof = segmentation.runOnFrame;
+  const stRof = styleTransfer.runOnFrame;
+  const ocrRof = ocr.runOnFrame;
+
+  // When switching models: activate kill switch synchronously so the worklet
+  // thread stops calling runOnFrame before delete() fires on the old model.
+  // Then re-enable once the new model's preventLoad has taken effect.
+  useEffect(() => {
+    frameKillSwitch.setBlocking(true);
+    setMaskImage((prev) => {
+      prev?.dispose();
+      return null;
+    });
+    setStyledImage((prev) => {
+      prev?.dispose();
+      return null;
+    });
+    const id = setTimeout(() => {
+      frameKillSwitch.setBlocking(false);
+    }, 300);
+    return () => clearTimeout(id);
+  }, [activeModel]);
+
+  // ── Single frame output.
+  // onFrame is re-created (and re-registered by useFrameOutput) whenever the
+  // active model or its runOnFrame worklet changes. The kill switch provides
+  // synchronous cross-thread protection during the transition window.
+  const frameOutput = useFrameOutput({
+    pixelFormat: 'rgb',
+    dropFramesWhileBusy: true,
+    onFrame: useCallback(
+      (frame: Frame) => {
+        'worklet';
+
+        // Kill switch is set synchronously from JS when switching models —
+        // guaranteed visible here before the next frame is dispatched.
+        if (frameKillSwitch.getDirty()) {
+          frame.dispose();
+          return;
+        }
+
+        try {
+          if (activeModel === 'classification') {
+            if (!classRof) return;
+            const result = classRof(frame);
+            if (result) {
+              let bestLabel = '';
+              let bestScore = -1;
+              const entries = Object.entries(result);
+              for (let i = 0; i < entries.length; i++) {
+                const [label, score] = entries[i]!;
+                if ((score as number) > bestScore) {
+                  bestScore = score as number;
+                  bestLabel = label;
+                }
+              }
+              notifyClass({
+                label: bestLabel,
+                score: bestScore,
+              });
+            }
+          } else if (activeModel === 'object_detection') {
+            if (!detRof) return;
+            const iw = frame.width > frame.height ? frame.height : frame.width;
+            const ih = frame.width > frame.height ? frame.width : frame.height;
+            const result = detRof(frame, 0.5);
+            if (result) {
+              notifyDetections({
+                results: result,
+                imageWidth: iw,
+                imageHeight: ih,
+              });
+            }
+          } else if (activeModel === 'segmentation') {
+            if (!segRof) return;
+            const result = segRof(frame, [], false);
+            if (result?.ARGMAX) {
+              const argmax: Int32Array = result.ARGMAX;
+              const side = Math.round(Math.sqrt(argmax.length));
+              const pixels = new Uint8Array(side * side * 4);
+              for (let i = 0; i < argmax.length; i++) {
+                const color = CLASS_COLORS[argmax[i]!] ?? [0, 0, 0, 0];
+                pixels[i * 4] = color[0]!;
+                pixels[i * 4 + 1] = color[1]!;
+                pixels[i * 4 + 2] = color[2]!;
+                pixels[i * 4 + 3] = color[3]!;
+              }
+              const skData = Skia.Data.fromBytes(pixels);
+              const img = Skia.Image.MakeImage(
+                {
+                  width: side,
+                  height: side,
+                  alphaType: AlphaType.Unpremul,
+                  colorType: ColorType.RGBA_8888,
+                },
+                skData,
+                side * 4
+              );
+              if (img) notifyMask(img);
+            }
+          } else if (activeModel === 'style_transfer') {
+            if (!stRof) return;
+            const result = stRof(frame);
+            if (result?.dataPtr) {
+              const { dataPtr, sizes } = result;
+              const h = sizes[0]!;
+              const w = sizes[1]!;
+              const skData = Skia.Data.fromBytes(dataPtr);
+              const img = Skia.Image.MakeImage(
+                {
+                  width: w,
+                  height: h,
+                  alphaType: AlphaType.Opaque,
+                  colorType: ColorType.RGBA_8888,
+                },
+                skData,
+                w * 4
+              );
+              if (img) notifyStyled(img);
+            }
+          } else if (activeModel === 'ocr') {
+            if (!ocrRof) return;
+            const fw = frame.width;
+            const fh = frame.height;
+            const result = ocrRof(frame);
+            if (result) {
+              notifyOcr({
+                detections: result,
+                frameWidth: fw,
+                frameHeight: fh,
+              });
+            }
+          }
+        } catch {
+          // ignore
+        } finally {
+          frame.dispose();
+        }
+      },
+      [
+        activeModel,
+        classRof,
+        detRof,
+        segRof,
+        stRof,
+        ocrRof,
+        notifyClass,
+        notifyDetections,
+        notifyMask,
+        notifyStyled,
+        notifyOcr,
+      ]
+    ),
+  });
+
+  // ── Loading state: only care about the active model ──
+  const activeIsReady = {
+    classification: classification.isReady,
+    object_detection: objectDetection.isReady,
+    segmentation: segmentation.isReady,
+    style_transfer: styleTransfer.isReady,
+    ocr: ocr.isReady,
+  }[activeModel];
+
+  const activeDownloadProgress = {
+    classification: classification.downloadProgress,
+    object_detection: objectDetection.downloadProgress,
+    segmentation: segmentation.downloadProgress,
+    style_transfer: styleTransfer.downloadProgress,
+    ocr: ocr.downloadProgress,
+  }[activeModel];
+
+  if (!cameraPermission.hasPermission) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>Camera access needed</Text>
+        <TouchableOpacity
+          onPress={() => cameraPermission.requestPermission()}
+          style={styles.button}
+        >
+          <Text style={styles.buttonText}>Grant Permission</Text>
+        </TouchableOpacity>
+      </View>
+    );
+  }
+
+  if (device == null) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>No camera device found</Text>
+      </View>
+    );
+  }
+
+  // ── Cover-fit helpers ──
+  function coverFit(imgW: number, imgH: number) {
+    const scale = Math.max(canvasSize.width / imgW, canvasSize.height / imgH);
+    return {
+      scale,
+      offsetX: (canvasSize.width - imgW * scale) / 2,
+      offsetY: (canvasSize.height - imgH * scale) / 2,
+    };
+  }
+
+  // ── OCR coord transform ──
+  const {
+    detections: ocrDets,
+    frameWidth: ocrFW,
+    frameHeight: ocrFH,
+  } = ocrData;
+  const ocrIsLandscape = ocrFW > ocrFH;
+  const ocrImgW = ocrIsLandscape ? ocrFH : ocrFW;
+  const ocrImgH = ocrIsLandscape ? ocrFW : ocrFH;
+  const {
+    scale: ocrScale,
+    offsetX: ocrOX,
+    offsetY: ocrOY,
+  } = coverFit(ocrImgW, ocrImgH);
+  function ocrToX(px: number, py: number) {
+    return (ocrIsLandscape ? ocrFH - py : px) * ocrScale + ocrOX;
+  }
+  function ocrToY(px: number, py: number) {
+    return (ocrIsLandscape ? px : py) * ocrScale + ocrOY;
+  }
+
+  // ── Object detection cover-fit ──
+  const {
+    scale: detScale,
+    offsetX: detOX,
+    offsetY: detOY,
+  } = coverFit(imageSize.width, imageSize.height);
+
+  const font = matchFont({ fontFamily: 'Helvetica', fontSize: 11 });
+
+  return (
+    <View style={styles.container}>
+      <StatusBar barStyle="light-content" translucent />
+
+      <Camera
+        style={StyleSheet.absoluteFill}
+        device={device}
+        outputs={[frameOutput]}
+        isActive={true}
+        format={format}
+      />
+
+      {/* ── Overlays ── */}
+      <View
+        style={StyleSheet.absoluteFill}
+        pointerEvents="none"
+        onLayout={(e) =>
+          setCanvasSize({
+            width: e.nativeEvent.layout.width,
+            height: e.nativeEvent.layout.height,
+          })
+        }
+      >
+        {activeModel === 'segmentation' && maskImage && (
+          <Canvas style={StyleSheet.absoluteFill}>
+            <SkiaImage
+              image={maskImage}
+              fit="cover"
+              x={0}
+              y={0}
+              width={canvasSize.width}
+              height={canvasSize.height}
+            />
+          </Canvas>
+        )}
+
+        {activeModel === 'style_transfer' && styledImage && (
+          <Canvas style={StyleSheet.absoluteFill}>
+            <SkiaImage
+              image={styledImage}
+              fit="cover"
+              x={0}
+              y={0}
+              width={canvasSize.width}
+              height={canvasSize.height}
+            />
+          </Canvas>
+        )}
+
+        {activeModel === 'object_detection' && (
+          <>
+            {detections.map((det, i) => {
+              const left = det.bbox.x1 * detScale + detOX;
+              const top = det.bbox.y1 * detScale + detOY;
+              const w = (det.bbox.x2 - det.bbox.x1) * detScale;
+              const h = (det.bbox.y2 - det.bbox.y1) * detScale;
+              return (
+                <View
+                  key={i}
+                  style={[styles.bbox, { left, top, width: w, height: h }]}
+                >
+                  <View style={styles.bboxLabel}>
+                    <Text style={styles.bboxLabelText}>
+                      {det.label} {(det.score * 100).toFixed(0)}%
+                    </Text>
+                  </View>
+                </View>
+              );
+            })}
+          </>
+        )}
+
+        {activeModel === 'ocr' && (
+          <Canvas style={StyleSheet.absoluteFill}>
+            {ocrDets.map((det, i) => {
+              if (!det.bbox || det.bbox.length < 2) return null;
+              const path = Skia.Path.Make();
+              path.moveTo(
+                ocrToX(det.bbox[0]!.x, det.bbox[0]!.y),
+                ocrToY(det.bbox[0]!.x, det.bbox[0]!.y)
+              );
+              for (let j = 1; j < det.bbox.length; j++) {
+                path.lineTo(
+                  ocrToX(det.bbox[j]!.x, det.bbox[j]!.y),
+                  ocrToY(det.bbox[j]!.x, det.bbox[j]!.y)
+                );
+              }
+              path.close();
+              const lx = ocrToX(det.bbox[0]!.x, det.bbox[0]!.y);
+              const ly = Math.max(
+                0,
+                ocrToY(det.bbox[0]!.x, det.bbox[0]!.y) - 4
+              );
+              return (
+                <React.Fragment key={i}>
+                  <Path path={path} color="transparent" style="fill" />
+                  <Path
+                    path={path}
+                    color={ColorPalette.primary}
+                    style="stroke"
+                    strokeWidth={2}
+                  />
+                  {font && (
+                    <SkiaText
+                      x={lx}
+                      y={ly}
+                      text={`${det.text} ${(det.score * 100).toFixed(0)}%`}
+                      font={font}
+                      color={ColorPalette.primary}
+                    />
+                  )}
+                </React.Fragment>
+              );
+            })}
+          </Canvas>
+        )}
+      </View>
+
+      {!activeIsReady && (
+        <View style={styles.loadingOverlay}>
+          <Spinner
+            visible
+            textContent={`Loading ${MODELS.find((m) => m.id === activeModel)?.label} ${(activeDownloadProgress * 100).toFixed(0)}%`}
+          />
+        </View>
+      )}
+
+      <View style={[styles.topBarWrapper, { paddingTop: insets.top + 8 }]}>
+        <ScrollView
+          horizontal
+          showsHorizontalScrollIndicator={false}
+          contentContainerStyle={styles.pickerContent}
+        >
+          {MODELS.map((m) => (
+            <TouchableOpacity
+              key={m.id}
+              style={[styles.chip, activeModel === m.id && styles.chipActive]}
+              onPress={() => setActiveModel(m.id)}
+            >
+              <Text
+                style={[
+                  styles.chipText,
+                  activeModel === m.id && styles.chipTextActive,
+                ]}
+              >
+                {m.label}
+              </Text>
+            </TouchableOpacity>
+          ))}
+        </ScrollView>
+      </View>
+
+      <View
+        style={[styles.bottomBarWrapper, { paddingBottom: insets.bottom + 12 }]}
+        pointerEvents="none"
+      >
+        <View style={styles.bottomBar}>
+          {activeModel === 'classification' && (
+            <View style={styles.resultContainer}>
+              <Text style={styles.resultText} numberOfLines={1}>
+                {classResult.label || '—'}
+              </Text>
+              {classResult.label ? (
+                <Text style={styles.resultSub}>
+                  {(classResult.score * 100).toFixed(1)}%
+                </Text>
+              ) : null}
+            </View>
+          )}
+          {activeModel === 'object_detection' && (
+            <View style={styles.resultContainer}>
+              <Text style={styles.resultText}>{detections.length}</Text>
+              <Text style={styles.resultSub}>objects</Text>
+            </View>
+          )}
+          {activeModel === 'segmentation' && (
+            <View style={styles.resultContainer}>
+              <Text style={styles.resultText}>DeepLab V3</Text>
+              <Text style={styles.resultSub}>segmentation</Text>
+            </View>
+          )}
+          {activeModel === 'style_transfer' && (
+            <View style={styles.resultContainer}>
+              <Text style={styles.resultText}>Rain Princess</Text>
+              <Text style={styles.resultSub}>style</Text>
+            </View>
+          )}
+          {activeModel === 'ocr' && (
+            <View style={styles.resultContainer}>
+              <Text style={styles.resultText}>{ocrDets.length}</Text>
+              <Text style={styles.resultSub}>regions</Text>
+            </View>
+          )}
+          <View style={styles.statDivider} />
+          <View style={styles.statItem}>
+            <Text style={styles.statValue}>{fps}</Text>
+            <Text style={styles.statLabel}>fps</Text>
+          </View>
+        </View>
+      </View>
+    </View>
+  );
+}
+
+// ─── Styles ──────────────────────────────────────────────────────────────────
+
+const styles = StyleSheet.create({
+  container: { flex: 1, backgroundColor: 'black' },
+  centered: {
+    flex: 1,
+    backgroundColor: 'black',
+    justifyContent: 'center',
+    alignItems: 'center',
+    gap: 16,
+  },
+  message: { color: 'white', fontSize: 18 },
+  button: {
+    paddingHorizontal: 24,
+    paddingVertical: 12,
+    backgroundColor: ColorPalette.primary,
+    borderRadius: 24,
+  },
+  buttonText: { color: 'white', fontSize: 15, fontWeight: '600' },
+  loadingOverlay: {
+    ...StyleSheet.absoluteFillObject,
+    backgroundColor: 'rgba(0,0,0,0.6)',
+    justifyContent: 'center',
+    alignItems: 'center',
+  },
+  topBarWrapper: {
+    position: 'absolute',
+    top: 0,
+    left: 0,
+    right: 0,
+  },
+  pickerContent: {
+    paddingHorizontal: 12,
+    gap: 8,
+  },
+  chip: {
+    paddingHorizontal: 16,
+    paddingVertical: 8,
+    borderRadius: 20,
+    backgroundColor: 'rgba(0,0,0,0.55)',
+    borderWidth: 1,
+    borderColor: 'rgba(255,255,255,0.2)',
+  },
+  chipActive: {
+    backgroundColor: ColorPalette.primary,
+    borderColor: ColorPalette.primary,
+  },
+  chipText: {
+    color: 'rgba(255,255,255,0.8)',
+    fontSize: 13,
+    fontWeight: '600',
+  },
+  chipTextActive: { color: 'white' },
+  bbox: {
+    position: 'absolute',
+    borderWidth: 2,
+    borderColor: ColorPalette.primary,
+    borderRadius: 4,
+  },
+  bboxLabel: {
+    position: 'absolute',
+    top: -22,
+    left: -2,
+    backgroundColor: ColorPalette.primary,
+    paddingHorizontal: 6,
+    paddingVertical: 2,
+    borderRadius: 4,
+  },
+  bboxLabelText: { color: 'white', fontSize: 11, fontWeight: '600' },
+  bottomBarWrapper: {
+    position: 'absolute',
+    bottom: 0,
+    left: 0,
+    right: 0,
+    alignItems: 'center',
+  },
+  bottomBar: {
+    flexDirection: 'row',
+    alignItems: 'center',
+    backgroundColor: 'rgba(0,0,0,0.55)',
+    borderRadius: 24,
+    paddingHorizontal: 28,
+    paddingVertical: 10,
+    gap: 24,
+  },
+  resultContainer: { alignItems: 'flex-start', maxWidth: 220 },
+  resultText: {
+    color: 'white',
+    fontSize: 16,
+    fontWeight: '700',
+  },
+  resultSub: {
+    color: 'rgba(255,255,255,0.6)',
+    fontSize: 12,
+    fontWeight: '500',
+  },
+  statDivider: {
+    width: 1,
+    height: 32,
+    backgroundColor: 'rgba(255,255,255,0.2)',
+  },
+  statItem: { alignItems: 'center' },
+  statValue: {
+    color: 'white',
+    fontSize: 22,
+    fontWeight: '700',
+    letterSpacing: -0.5,
+  },
+  statLabel: {
+    color: 'rgba(255,255,255,0.55)',
+    fontSize: 11,
+    fontWeight: '500',
+    textTransform: 'uppercase',
+    letterSpacing: 0.8,
+  },
+});
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
index 7b97108b9..d0cba9916 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -15,11 +15,13 @@
 #include <rnexecutorch/jsi/OwningArrayBuffer.h>
 
 #include <rnexecutorch/metaprogramming/TypeConcepts.h>
+#include <rnexecutorch/models/image_segmentation/Types.h>
 #include <rnexecutorch/models/object_detection/Constants.h>
 #include <rnexecutorch/models/object_detection/Types.h>
 #include <rnexecutorch/models/ocr/Types.h>
 #include <rnexecutorch/models/speech_to_text/types/Segment.h>
 #include <rnexecutorch/models/speech_to_text/types/TranscriptionResult.h>
+#include <rnexecutorch/models/style_transfer/Types.h>
 #include <rnexecutorch/models/voice_activity_detection/Types.h>
 
 using namespace rnexecutorch::models::speech_to_text::types;
@@ -559,4 +561,53 @@ inline jsi::Value getJsiValue(const TranscriptionResult &result,
 
   return obj;
 }
+inline jsi::Value
+getJsiValue(const models::style_transfer::PixelDataResult &result,
+            jsi::Runtime &runtime) {
+  jsi::Object obj(runtime);
+
+  auto arrayBuffer = jsi::ArrayBuffer(runtime, result.dataPtr);
+  auto uint8ArrayCtor =
+      runtime.global().getPropertyAsFunction(runtime, "Uint8Array");
+  auto uint8Array =
+      uint8ArrayCtor.callAsConstructor(runtime, arrayBuffer).getObject(runtime);
+  obj.setProperty(runtime, "dataPtr", uint8Array);
+
+  auto sizesArray = jsi::Array(runtime, 3);
+  sizesArray.setValueAtIndex(runtime, 0, jsi::Value(result.height));
+  sizesArray.setValueAtIndex(runtime, 1, jsi::Value(result.width));
+  sizesArray.setValueAtIndex(runtime, 2, jsi::Value(4));
+  obj.setProperty(runtime, "sizes", sizesArray);
+
+  obj.setProperty(runtime, "scalarType", jsi::Value(0));
+
+  return obj;
+}
+
+inline jsi::Value
+getJsiValue(const models::image_segmentation::SegmentationResult &result,
+            jsi::Runtime &runtime) {
+  jsi::Object dict(runtime);
+
+  auto argmaxArrayBuffer = jsi::ArrayBuffer(runtime, result.argmax);
+  auto int32ArrayCtor =
+      runtime.global().getPropertyAsFunction(runtime, "Int32Array");
+  auto int32Array = int32ArrayCtor.callAsConstructor(runtime, argmaxArrayBuffer)
+                        .getObject(runtime);
+  dict.setProperty(runtime, "ARGMAX", int32Array);
+
+  for (auto &[classLabel, owningBuffer] : *result.classBuffers) {
+    auto classArrayBuffer = jsi::ArrayBuffer(runtime, owningBuffer);
+    auto float32ArrayCtor =
+        runtime.global().getPropertyAsFunction(runtime, "Float32Array");
+    auto float32Array =
+        float32ArrayCtor.callAsConstructor(runtime, classArrayBuffer)
+            .getObject(runtime);
+    dict.setProperty(runtime, jsi::String::createFromAscii(runtime, classLabel),
+                     float32Array);
+  }
+
+  return dict;
+}
+
 } // namespace rnexecutorch::jsi_conversion
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
index c0ce049f2..8f67175c4 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
@@ -11,7 +11,16 @@ using namespace facebook;
 cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
                                       const jsi::Value &frameData) const {
   auto frameObj = frameData.asObject(runtime);
-  return ::rnexecutorch::utils::extractFrame(runtime, frameObj);
+  cv::Mat frame = ::rnexecutorch::utils::extractFrame(runtime, frameObj);
+
+  // Camera sensors natively deliver frames in landscape orientation.
+  // Rotate 90° CW so all models receive upright portrait frames.
+  if (frame.cols > frame.rows) {
+    cv::Mat upright;
+    cv::rotate(frame, upright, cv::ROTATE_90_CLOCKWISE);
+    return upright;
+  }
+  return frame;
 }
 
 cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
index e0ec03912..a2a461772 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h
@@ -53,6 +53,20 @@ class VisionModel : public BaseModel {
 
   virtual ~VisionModel() = default;
 
+  /**
+   * @brief Thread-safe unload that waits for any in-flight inference to
+   * complete
+   *
+   * Overrides BaseModel::unload() to acquire inference_mutex_ before
+   * resetting the module. This prevents a crash where BaseModel::unload()
+   * destroys module_ while generateFromFrame() is still executing on the
+   * VisionCamera worklet thread.
+   */
+  void unload() noexcept {
+    std::scoped_lock lock(inference_mutex_);
+    BaseModel::unload();
+  }
+
 protected:
   /**
    * @brief Mutex to ensure thread-safe inference
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
index 0fba07108..2a00d5dce 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
@@ -12,7 +12,7 @@ namespace rnexecutorch::models::classification {
 
 Classification::Classification(const std::string &modelSource,
                                std::shared_ptr<react::CallInvoker> callInvoker)
-    : BaseModel(modelSource, callInvoker) {
+    : VisionModel(modelSource, callInvoker) {
   auto inputShapes = getAllInputShapes();
   if (inputShapes.size() == 0) {
     throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
@@ -32,20 +32,78 @@ Classification::Classification(const std::string &modelSource,
                             modelInputShape[modelInputShape.size() - 2]);
 }
 
+cv::Mat Classification::preprocessFrame(const cv::Mat &frame) const {
+  cv::Mat rgb;
+
+  if (frame.channels() == 4) {
+#ifdef __APPLE__
+    cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB);
+#else
+    cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB);
+#endif
+  } else if (frame.channels() == 3) {
+    rgb = frame;
+  } else {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Unsupported frame format: %d channels", frame.channels());
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
+  }
+
+  if (rgb.size() != modelImageSize) {
+    cv::Mat resized;
+    cv::resize(rgb, resized, modelImageSize);
+    return resized;
+  }
+
+  return rgb;
+}
+
 std::unordered_map<std::string_view, float>
-Classification::generate(std::string imageSource) {
+Classification::runInference(cv::Mat image) {
+  std::scoped_lock lock(inference_mutex_);
+
+  cv::Mat preprocessed = preprocessFrame(image);
+
+  const std::vector<int32_t> tensorDims = getAllInputShapes()[0];
   auto inputTensor =
-      image_processing::readImageToTensor(imageSource, getAllInputShapes()[0])
-          .first;
+      image_processing::getTensorFromMatrix(tensorDims, preprocessed);
+
   auto forwardResult = BaseModel::forward(inputTensor);
   if (!forwardResult.ok()) {
     throw RnExecutorchError(forwardResult.error(),
                             "The model's forward function did not succeed. "
                             "Ensure the model input is correct.");
   }
+
   return postprocess(forwardResult->at(0).toTensor());
 }
 
+std::unordered_map<std::string_view, float>
+Classification::generateFromString(std::string imageSource) {
+  cv::Mat imageBGR = image_processing::readImage(imageSource);
+
+  cv::Mat imageRGB;
+  cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB);
+
+  return runInference(imageRGB);
+}
+
+std::unordered_map<std::string_view, float>
+Classification::generateFromFrame(jsi::Runtime &runtime,
+                                  const jsi::Value &frameData) {
+  cv::Mat frame = extractFromFrame(runtime, frameData);
+  return runInference(frame);
+}
+
+std::unordered_map<std::string_view, float>
+Classification::generateFromPixels(JSTensorViewIn pixelData) {
+  cv::Mat image = extractFromPixels(pixelData);
+
+  return runInference(image);
+}
+
 std::unordered_map<std::string_view, float>
 Classification::postprocess(const Tensor &tensor) {
   std::span<const float> resultData(
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h
index 1465fc5f9..473d9b4bb 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h
@@ -3,25 +3,40 @@
 #include <unordered_map>
 
 #include <executorch/extension/tensor/tensor_ptr.h>
+#include <jsi/jsi.h>
 #include <opencv2/opencv.hpp>
 
 #include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
-#include <rnexecutorch/models/BaseModel.h>
+#include <rnexecutorch/models/VisionModel.h>
 
 namespace rnexecutorch {
 namespace models::classification {
 using executorch::aten::Tensor;
 using executorch::extension::TensorPtr;
 
-class Classification : public BaseModel {
+class Classification : public VisionModel {
 public:
   Classification(const std::string &modelSource,
                  std::shared_ptr<react::CallInvoker> callInvoker);
+
   [[nodiscard("Registered non-void function")]] std::unordered_map<
       std::string_view, float>
-  generate(std::string imageSource);
+  generateFromString(std::string imageSource);
+
+  [[nodiscard("Registered non-void function")]] std::unordered_map<
+      std::string_view, float>
+  generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData);
+
+  [[nodiscard("Registered non-void function")]] std::unordered_map<
+      std::string_view, float>
+  generateFromPixels(JSTensorViewIn pixelData);
+
+protected:
+  cv::Mat preprocessFrame(const cv::Mat &frame) const override;
 
 private:
+  std::unordered_map<std::string_view, float> runInference(cv::Mat image);
+
   std::unordered_map<std::string_view, float> postprocess(const Tensor &tensor);
 
   cv::Size modelImageSize{0, 0};
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp
index ec3129e76..a82fffbb2 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp
@@ -1,17 +1,18 @@
 #include "ImageEmbeddings.h"
 
+#include <span>
+
 #include <executorch/extension/tensor/tensor.h>
 #include <rnexecutorch/Error.h>
 #include <rnexecutorch/ErrorCodes.h>
 #include <rnexecutorch/data_processing/ImageProcessing.h>
-#include <rnexecutorch/data_processing/Numerical.h>
 
 namespace rnexecutorch::models::embeddings {
 
 ImageEmbeddings::ImageEmbeddings(
     const std::string &modelSource,
     std::shared_ptr<react::CallInvoker> callInvoker)
-    : BaseEmbeddings(modelSource, callInvoker) {
+    : VisionModel(modelSource, callInvoker) {
   auto inputTensors = getAllInputShapes();
   if (inputTensors.size() == 0) {
     throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
@@ -31,10 +32,43 @@ ImageEmbeddings::ImageEmbeddings(
                             modelInputShape[modelInputShape.size() - 2]);
 }
 
+cv::Mat ImageEmbeddings::preprocessFrame(const cv::Mat &frame) const {
+  cv::Mat rgb;
+
+  if (frame.channels() == 4) {
+#ifdef __APPLE__
+    cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB);
+#else
+    cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB);
+#endif
+  } else if (frame.channels() == 3) {
+    rgb = frame;
+  } else {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Unsupported frame format: %d channels", frame.channels());
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
+  }
+
+  if (rgb.size() != modelImageSize) {
+    cv::Mat resized;
+    cv::resize(rgb, resized, modelImageSize);
+    return resized;
+  }
+
+  return rgb;
+}
+
 std::shared_ptr<OwningArrayBuffer>
-ImageEmbeddings::generate(std::string imageSource) {
-  auto [inputTensor, originalSize] =
-      image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]);
+ImageEmbeddings::runInference(cv::Mat image) {
+  std::scoped_lock lock(inference_mutex_);
+
+  cv::Mat preprocessed = preprocessFrame(image);
+
+  const std::vector<int32_t> tensorDims = getAllInputShapes()[0];
+  auto inputTensor =
+      image_processing::getTensorFromMatrix(tensorDims, preprocessed);
 
   auto forwardResult = BaseModel::forward(inputTensor);
 
@@ -45,7 +79,33 @@ ImageEmbeddings::generate(std::string imageSource) {
         "is correct.");
   }
 
-  return BaseEmbeddings::postprocess(forwardResult);
+  auto forwardResultTensor = forwardResult->at(0).toTensor();
+  return std::make_shared<OwningArrayBuffer>(
+      forwardResultTensor.const_data_ptr(), forwardResultTensor.nbytes());
+}
+
+std::shared_ptr<OwningArrayBuffer>
+ImageEmbeddings::generateFromString(std::string imageSource) {
+  cv::Mat imageBGR = image_processing::readImage(imageSource);
+
+  cv::Mat imageRGB;
+  cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB);
+
+  return runInference(imageRGB);
+}
+
+std::shared_ptr<OwningArrayBuffer>
+ImageEmbeddings::generateFromFrame(jsi::Runtime &runtime,
+                                   const jsi::Value &frameData) {
+  cv::Mat frame = extractFromFrame(runtime, frameData);
+  return runInference(frame);
+}
+
+std::shared_ptr<OwningArrayBuffer>
+ImageEmbeddings::generateFromPixels(JSTensorViewIn pixelData) {
+  cv::Mat image = extractFromPixels(pixelData);
+
+  return runInference(image);
 }
 
 } // namespace rnexecutorch::models::embeddings
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
index 7e114e939..ec11ee5c6 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h
@@ -2,25 +2,41 @@
 
 #include <executorch/extension/tensor/tensor_ptr.h>
 #include <executorch/runtime/core/evalue.h>
+#include <jsi/jsi.h>
 #include <opencv2/opencv.hpp>
 
 #include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
-#include <rnexecutorch/models/embeddings/BaseEmbeddings.h>
+#include <rnexecutorch/jsi/OwningArrayBuffer.h>
+#include <rnexecutorch/models/VisionModel.h>
 
 namespace rnexecutorch {
 namespace models::embeddings {
 using executorch::extension::TensorPtr;
 using executorch::runtime::EValue;
 
-class ImageEmbeddings final : public BaseEmbeddings {
+class ImageEmbeddings final : public VisionModel {
 public:
   ImageEmbeddings(const std::string &modelSource,
                   std::shared_ptr<react::CallInvoker> callInvoker);
+
   [[nodiscard(
       "Registered non-void function")]] std::shared_ptr<OwningArrayBuffer>
-  generate(std::string imageSource);
+  generateFromString(std::string imageSource);
+
+  [[nodiscard(
+      "Registered non-void function")]] std::shared_ptr<OwningArrayBuffer>
+  generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData);
+
+  [[nodiscard(
+      "Registered non-void function")]] std::shared_ptr<OwningArrayBuffer>
+  generateFromPixels(JSTensorViewIn pixelData);
+
+protected:
+  cv::Mat preprocessFrame(const cv::Mat &frame) const override;
 
 private:
+  std::shared_ptr<OwningArrayBuffer> runInference(cv::Mat image);
+
   cv::Size modelImageSize{0, 0};
 };
 } // namespace models::embeddings
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.cpp
index 141ec430e..3a2bfd0cf 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.cpp
@@ -1,7 +1,4 @@
 #include "BaseImageSegmentation.h"
-#include "jsi/jsi.h"
-
-#include <future>
 
 #include <executorch/extension/tensor/tensor.h>
 #include <rnexecutorch/Error.h>
@@ -14,14 +11,14 @@ namespace rnexecutorch::models::image_segmentation {
 BaseImageSegmentation::BaseImageSegmentation(
     const std::string &modelSource,
     std::shared_ptr<react::CallInvoker> callInvoker)
-    : BaseModel(modelSource, callInvoker) {
+    : VisionModel(modelSource, callInvoker) {
   initModelImageSize();
 }
 
 BaseImageSegmentation::BaseImageSegmentation(
     const std::string &modelSource, std::vector<float> normMean,
     std::vector<float> normStd, std::shared_ptr<react::CallInvoker> callInvoker)
-    : BaseModel(modelSource, callInvoker) {
+    : VisionModel(modelSource, callInvoker) {
   initModelImageSize();
   if (normMean.size() == 3) {
     normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]);
@@ -55,7 +52,43 @@ void BaseImageSegmentation::initModelImageSize() {
   numModelPixels = modelImageSize.area();
 }
 
-TensorPtr BaseImageSegmentation::preprocess(const std::string &imageSource,
+cv::Mat BaseImageSegmentation::preprocessFrame(const cv::Mat &frame) const {
+  cv::Mat rgb;
+
+  if (frame.channels() == 4) {
+#ifdef __APPLE__
+    cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB);
+#else
+    cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB);
+#endif
+  } else if (frame.channels() == 3) {
+    rgb = frame;
+  } else {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Unsupported frame format: %d channels", frame.channels());
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
+  }
+
+  cv::Mat processed;
+  if (rgb.size() != modelImageSize) {
+    cv::resize(rgb, processed, modelImageSize);
+  } else {
+    processed = rgb;
+  }
+
+  if (normMean_.has_value() && normStd_.has_value()) {
+    processed.convertTo(processed, CV_32FC3, 1.0 / 255.0);
+    processed -= *normMean_;
+    processed /= *normStd_;
+  }
+
+  return processed;
+}
+
+TensorPtr
+BaseImageSegmentation::preprocessFromString(const std::string &imageSource,
                                             cv::Size &originalSize) {
   auto [inputTensor, origSize] = image_processing::readImageToTensor(
       imageSource, getAllInputShapes()[0], false, normMean_, normStd_);
@@ -63,12 +96,35 @@ TensorPtr BaseImageSegmentation::preprocess(const std::string &imageSource,
   return inputTensor;
 }
 
-std::shared_ptr<jsi::Object> BaseImageSegmentation::generate(
+SegmentationResult BaseImageSegmentation::runInference(
+    cv::Mat image, cv::Size originalSize, std::vector<std::string> allClasses,
+    std::set<std::string, std::less<>> classesOfInterest, bool resize) {
+  std::scoped_lock lock(inference_mutex_);
+
+  cv::Mat preprocessed = preprocessFrame(image);
+
+  const std::vector<int32_t> tensorDims = getAllInputShapes()[0];
+  auto inputTensor =
+      image_processing::getTensorFromMatrix(tensorDims, preprocessed);
+
+  auto forwardResult = BaseModel::forward(inputTensor);
+
+  if (!forwardResult.ok()) {
+    throw RnExecutorchError(forwardResult.error(),
+                            "The model's forward function did not succeed. "
+                            "Ensure the model input is correct.");
+  }
+
+  return postprocess(forwardResult->at(0).toTensor(), originalSize, allClasses,
+                     classesOfInterest, resize);
+}
+
+SegmentationResult BaseImageSegmentation::generateFromString(
     std::string imageSource, std::vector<std::string> allClasses,
     std::set<std::string, std::less<>> classesOfInterest, bool resize) {
 
   cv::Size originalSize;
-  auto inputTensor = preprocess(imageSource, originalSize);
+  auto inputTensor = preprocessFromString(imageSource, originalSize);
 
   auto forwardResult = BaseModel::forward(inputTensor);
 
@@ -82,7 +138,29 @@ std::shared_ptr<jsi::Object> BaseImageSegmentation::generate(
                      classesOfInterest, resize);
 }
 
-std::shared_ptr<jsi::Object> BaseImageSegmentation::postprocess(
+SegmentationResult BaseImageSegmentation::generateFromFrame(
+    jsi::Runtime &runtime, const jsi::Value &frameData,
+    std::vector<std::string> allClasses,
+    std::set<std::string, std::less<>> classesOfInterest, bool resize) {
+  // extractFromFrame rotates landscape frames 90° CW automatically.
+  cv::Mat frame = extractFromFrame(runtime, frameData);
+  cv::Size originalSize = frame.size();
+
+  return runInference(frame, originalSize, std::move(allClasses),
+                      std::move(classesOfInterest), resize);
+}
+
+SegmentationResult BaseImageSegmentation::generateFromPixels(
+    JSTensorViewIn pixelData, std::vector<std::string> allClasses,
+    std::set<std::string, std::less<>> classesOfInterest, bool resize) {
+  cv::Mat image = extractFromPixels(pixelData);
+  cv::Size originalSize = image.size();
+
+  return runInference(image, originalSize, std::move(allClasses),
+                      std::move(classesOfInterest), resize);
+}
+
+SegmentationResult BaseImageSegmentation::postprocess(
     const Tensor &tensor, cv::Size originalSize,
     std::vector<std::string> &allClasses,
     std::set<std::string, std::less<>> &classesOfInterest, bool resize) {
@@ -167,8 +245,8 @@ std::shared_ptr<jsi::Object> BaseImageSegmentation::postprocess(
   }
 
   // Filter classes of interest
-  auto buffersToReturn = std::make_shared<std::unordered_map<
-      std::string_view, std::shared_ptr<OwningArrayBuffer>>>();
+  auto buffersToReturn = std::make_shared<
+      std::unordered_map<std::string, std::shared_ptr<OwningArrayBuffer>>>();
   for (std::size_t cl = 0; cl < resultClasses.size(); ++cl) {
     if (cl < allClasses.size() && classesOfInterest.contains(allClasses[cl])) {
       (*buffersToReturn)[allClasses[cl]] = resultClasses[cl];
@@ -191,48 +269,7 @@ std::shared_ptr<jsi::Object> BaseImageSegmentation::postprocess(
     }
   }
 
-  return populateDictionary(argmax, buffersToReturn);
-}
-
-std::shared_ptr<jsi::Object> BaseImageSegmentation::populateDictionary(
-    std::shared_ptr<OwningArrayBuffer> argmax,
-    std::shared_ptr<std::unordered_map<std::string_view,
-                                       std::shared_ptr<OwningArrayBuffer>>>
-        classesToOutput) {
-  auto promisePtr = std::make_shared<std::promise<void>>();
-  std::future<void> doneFuture = promisePtr->get_future();
-
-  std::shared_ptr<jsi::Object> dictPtr = nullptr;
-  callInvoker->invokeAsync(
-      [argmax, classesToOutput, &dictPtr, promisePtr](jsi::Runtime &runtime) {
-        dictPtr = std::make_shared<jsi::Object>(runtime);
-        auto argmaxArrayBuffer = jsi::ArrayBuffer(runtime, argmax);
-
-        auto int32ArrayCtor =
-            runtime.global().getPropertyAsFunction(runtime, "Int32Array");
-        auto int32Array =
-            int32ArrayCtor.callAsConstructor(runtime, argmaxArrayBuffer)
-                .getObject(runtime);
-        dictPtr->setProperty(runtime, "ARGMAX", int32Array);
-
-        for (auto &[classLabel, owningBuffer] : *classesToOutput) {
-          auto classArrayBuffer = jsi::ArrayBuffer(runtime, owningBuffer);
-
-          auto float32ArrayCtor =
-              runtime.global().getPropertyAsFunction(runtime, "Float32Array");
-          auto float32Array =
-              float32ArrayCtor.callAsConstructor(runtime, classArrayBuffer)
-                  .getObject(runtime);
-
-          dictPtr->setProperty(
-              runtime, jsi::String::createFromAscii(runtime, classLabel.data()),
-              float32Array);
-        }
-        promisePtr->set_value();
-      });
-
-  doneFuture.wait();
-  return dictPtr;
+  return SegmentationResult{argmax, buffersToReturn};
 }
 
 } // namespace rnexecutorch::models::image_segmentation
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
index f46f41d69..49daf5ee5 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h
@@ -8,7 +8,8 @@
 
 #include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
 #include <rnexecutorch/jsi/OwningArrayBuffer.h>
-#include <rnexecutorch/models/BaseModel.h>
+#include <rnexecutorch/models/VisionModel.h>
+#include <rnexecutorch/models/image_segmentation/Types.h>
 
 namespace rnexecutorch {
 namespace models::image_segmentation {
@@ -17,7 +18,7 @@ using namespace facebook;
 using executorch::aten::Tensor;
 using executorch::extension::TensorPtr;
 
-class BaseImageSegmentation : public BaseModel {
+class BaseImageSegmentation : public VisionModel {
 public:
   BaseImageSegmentation(const std::string &modelSource,
                         std::shared_ptr<react::CallInvoker> callInvoker);
@@ -26,14 +27,28 @@ class BaseImageSegmentation : public BaseModel {
                         std::vector<float> normMean, std::vector<float> normStd,
                         std::shared_ptr<react::CallInvoker> callInvoker);
 
-  [[nodiscard("Registered non-void function")]] std::shared_ptr<jsi::Object>
-  generate(std::string imageSource, std::vector<std::string> allClasses,
-           std::set<std::string, std::less<>> classesOfInterest, bool resize);
+  [[nodiscard("Registered non-void function")]] SegmentationResult
+  generateFromString(std::string imageSource,
+                     std::vector<std::string> allClasses,
+                     std::set<std::string, std::less<>> classesOfInterest,
+                     bool resize);
+
+  [[nodiscard("Registered non-void function")]] SegmentationResult
+  generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData,
+                    std::vector<std::string> allClasses,
+                    std::set<std::string, std::less<>> classesOfInterest,
+                    bool resize);
+
+  [[nodiscard("Registered non-void function")]] SegmentationResult
+  generateFromPixels(JSTensorViewIn pixelData,
+                     std::vector<std::string> allClasses,
+                     std::set<std::string, std::less<>> classesOfInterest,
+                     bool resize);
 
 protected:
-  virtual TensorPtr preprocess(const std::string &imageSource,
-                               cv::Size &originalSize);
-  virtual std::shared_ptr<jsi::Object>
+  cv::Mat preprocessFrame(const cv::Mat &frame) const override;
+
+  virtual SegmentationResult
   postprocess(const Tensor &tensor, cv::Size originalSize,
               std::vector<std::string> &allClasses,
               std::set<std::string, std::less<>> &classesOfInterest,
@@ -44,14 +59,15 @@ class BaseImageSegmentation : public BaseModel {
   std::optional<cv::Scalar> normMean_;
   std::optional<cv::Scalar> normStd_;
 
-  std::shared_ptr<jsi::Object> populateDictionary(
-      std::shared_ptr<OwningArrayBuffer> argmax,
-      std::shared_ptr<std::unordered_map<std::string_view,
-                                         std::shared_ptr<OwningArrayBuffer>>>
-          classesToOutput);
-
 private:
   void initModelImageSize();
+
+  SegmentationResult runInference(
+      cv::Mat image, cv::Size originalSize, std::vector<std::string> allClasses,
+      std::set<std::string, std::less<>> classesOfInterest, bool resize);
+
+  TensorPtr preprocessFromString(const std::string &imageSource,
+                                 cv::Size &originalSize);
 };
 } // namespace models::image_segmentation
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/Types.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/Types.h
new file mode 100644
index 000000000..b5d6f5067
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/Types.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <memory>
+#include <rnexecutorch/jsi/OwningArrayBuffer.h>
+#include <string>
+#include <unordered_map>
+
+namespace rnexecutorch::models::image_segmentation {
+
+struct SegmentationResult {
+  std::shared_ptr<OwningArrayBuffer> argmax;
+  std::shared_ptr<
+      std::unordered_map<std::string, std::shared_ptr<OwningArrayBuffer>>>
+      classBuffers;
+};
+
+} // namespace rnexecutorch::models::image_segmentation
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
index 2670cf9dd..7f7216b02 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
@@ -5,7 +5,6 @@
 #include <rnexecutorch/Log.h>
 #include <rnexecutorch/data_processing/ImageProcessing.h>
 #include <rnexecutorch/host_objects/JsiConversions.h>
-#include <rnexecutorch/utils/FrameProcessor.h>
 
 namespace rnexecutorch::models::object_detection {
 
@@ -144,9 +143,7 @@ std::vector<types::Detection>
 ObjectDetection::generateFromFrame(jsi::Runtime &runtime,
                                    const jsi::Value &frameData,
                                    double detectionThreshold) {
-  auto frameObj = frameData.asObject(runtime);
-  cv::Mat frame = rnexecutorch::utils::extractFrame(runtime, frameObj);
-
+  cv::Mat frame = extractFromFrame(runtime, frameData);
   return runInference(frame, detectionThreshold);
 }
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp
index a521b4e8b..50834a1b8 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp
@@ -4,6 +4,7 @@
 #include <rnexecutorch/ErrorCodes.h>
 #include <rnexecutorch/data_processing/ImageProcessing.h>
 #include <rnexecutorch/models/ocr/Constants.h>
+#include <rnexecutorch/utils/FrameProcessor.h>
 
 namespace rnexecutorch::models::ocr {
 OCR::OCR(const std::string &detectorSource, const std::string &recognizerSource,
@@ -12,12 +13,8 @@ OCR::OCR(const std::string &detectorSource, const std::string &recognizerSource,
     : detector(detectorSource, callInvoker),
       recognitionHandler(recognizerSource, symbols, callInvoker) {}
 
-std::vector<types::OCRDetection> OCR::generate(std::string input) {
-  cv::Mat image = image_processing::readImage(input);
-  if (image.empty()) {
-    throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed,
-                            "Failed to load image from path: " + input);
-  }
+std::vector<types::OCRDetection> OCR::runInference(cv::Mat image) {
+  std::scoped_lock lock(inference_mutex_);
 
   /*
    1. Detection process returns the list of bounding boxes containing areas
@@ -43,6 +40,63 @@ std::vector<types::OCRDetection> OCR::generate(std::string input) {
   return result;
 }
 
+std::vector<types::OCRDetection> OCR::generateFromString(std::string input) {
+  cv::Mat image = image_processing::readImage(input);
+  if (image.empty()) {
+    throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed,
+                            "Failed to load image from path: " + input);
+  }
+  return runInference(image);
+}
+
+std::vector<types::OCRDetection>
+OCR::generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData) {
+  auto frameObj = frameData.asObject(runtime);
+  cv::Mat frame = ::rnexecutorch::utils::extractFrame(runtime, frameObj);
+  // extractFrame returns RGB; convert to BGR for consistency with readImage
+  cv::cvtColor(frame, frame, cv::COLOR_RGB2BGR);
+  return runInference(frame);
+}
+
+std::vector<types::OCRDetection>
+OCR::generateFromPixels(JSTensorViewIn pixelData) {
+  if (pixelData.sizes.size() != 3) {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Invalid pixel data: sizes must have 3 elements "
+                  "[height, width, channels], got %zu",
+                  pixelData.sizes.size());
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
+  }
+
+  int32_t height = pixelData.sizes[0];
+  int32_t width = pixelData.sizes[1];
+  int32_t channels = pixelData.sizes[2];
+
+  if (channels != 3) {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Invalid pixel data: expected 3 channels (RGB), got %d",
+                  channels);
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
+  }
+
+  if (pixelData.scalarType != executorch::aten::ScalarType::Byte) {
+    throw RnExecutorchError(
+        RnExecutorchErrorCode::InvalidUserInput,
+        "Invalid pixel data: scalarType must be BYTE (Uint8Array)");
+  }
+
+  uint8_t *dataPtr = static_cast<uint8_t *>(pixelData.dataPtr);
+  // Input is RGB from JS; convert to BGR for consistency with readImage
+  cv::Mat rgbImage(height, width, CV_8UC3, dataPtr);
+  cv::Mat image;
+  cv::cvtColor(rgbImage, image, cv::COLOR_RGB2BGR);
+  return runInference(image);
+}
+
 std::size_t OCR::getMemoryLowerBound() const noexcept {
   return detector.getMemoryLowerBound() +
          recognitionHandler.getMemoryLowerBound();
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h
index d84ba903f..719cb957c 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h
@@ -1,9 +1,11 @@
 #pragma once
 
+#include <mutex>
 #include <string>
 #include <vector>
 
 #include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
+#include <rnexecutorch/host_objects/JSTensorViewIn.h>
 #include <rnexecutorch/models/ocr/Detector.h>
 #include <rnexecutorch/models/ocr/RecognitionHandler.h>
 #include <rnexecutorch/models/ocr/Types.h>
@@ -28,13 +30,20 @@ class OCR final {
                const std::string &recognizerSource, const std::string &symbols,
                std::shared_ptr<react::CallInvoker> callInvoker);
   [[nodiscard("Registered non-void function")]] std::vector<types::OCRDetection>
-  generate(std::string input);
+  generateFromString(std::string input);
+  [[nodiscard("Registered non-void function")]] std::vector<types::OCRDetection>
+  generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData);
+  [[nodiscard("Registered non-void function")]] std::vector<types::OCRDetection>
+  generateFromPixels(JSTensorViewIn pixelData);
   std::size_t getMemoryLowerBound() const noexcept;
   void unload() noexcept;
 
 private:
+  std::vector<types::OCRDetection> runInference(cv::Mat image);
+
   Detector detector;
   RecognitionHandler recognitionHandler;
+  mutable std::mutex inference_mutex_;
 };
 } // namespace models::ocr
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp
index 3b9c0187b..c334f5d84 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp
@@ -6,6 +6,7 @@
 #include <opencv2/opencv.hpp>
 #include <rnexecutorch/Error.h>
 #include <rnexecutorch/ErrorCodes.h>
+#include <rnexecutorch/Log.h>
 
 namespace rnexecutorch::models::style_transfer {
 using namespace facebook;
@@ -13,7 +14,7 @@ using executorch::extension::TensorPtr;
 
 StyleTransfer::StyleTransfer(const std::string &modelSource,
                              std::shared_ptr<react::CallInvoker> callInvoker)
-    : BaseModel(modelSource, callInvoker) {
+    : VisionModel(modelSource, callInvoker) {
   auto inputShapes = getAllInputShapes();
   if (inputShapes.size() == 0) {
     throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
@@ -33,17 +34,67 @@ StyleTransfer::StyleTransfer(const std::string &modelSource,
                             modelInputShape[modelInputShape.size() - 2]);
 }
 
-std::string StyleTransfer::postprocess(const Tensor &tensor,
-                                       cv::Size originalSize) {
+cv::Mat StyleTransfer::preprocessFrame(const cv::Mat &frame) const {
+  cv::Mat rgb;
+
+  if (frame.channels() == 4) {
+#ifdef __APPLE__
+    cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB);
+#else
+    cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB);
+#endif
+  } else if (frame.channels() == 3) {
+    rgb = frame;
+  } else {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Unsupported frame format: %d channels", frame.channels());
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
+  }
+
+  if (rgb.size() != modelImageSize) {
+    cv::Mat resized;
+    cv::resize(rgb, resized, modelImageSize);
+    return resized;
+  }
+
+  return rgb;
+}
+
+PixelDataResult StyleTransfer::postprocess(const Tensor &tensor,
+                                           cv::Size outputSize) {
+  // Convert tensor output (at modelImageSize) to CV_8UC3 BGR mat
   cv::Mat mat = image_processing::getMatrixFromTensor(modelImageSize, tensor);
-  cv::resize(mat, mat, originalSize);
 
-  return image_processing::saveToTempFile(mat);
+  // Resize only if requested output differs from model output size
+  if (mat.size() != outputSize) {
+    cv::resize(mat, mat, outputSize);
+  }
+
+  // Convert BGR -> RGBA so JS can pass the buffer directly to Skia
+  cv::Mat rgba;
+  cv::cvtColor(mat, rgba, cv::COLOR_BGR2RGBA);
+
+  std::size_t dataSize =
+      static_cast<std::size_t>(outputSize.width) * outputSize.height * 4;
+  auto pixelBuffer = std::make_shared<OwningArrayBuffer>(rgba.data, dataSize);
+  log(LOG_LEVEL::Debug,
+      "[StyleTransfer] postprocess: RGBA buffer size:", dataSize,
+      "w:", outputSize.width, "h:", outputSize.height);
+
+  return PixelDataResult{pixelBuffer, outputSize.width, outputSize.height};
 }
 
-std::string StyleTransfer::generate(std::string imageSource) {
-  auto [inputTensor, originalSize] =
-      image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]);
+PixelDataResult StyleTransfer::runInference(cv::Mat image,
+                                            cv::Size originalSize) {
+  std::scoped_lock lock(inference_mutex_);
+
+  cv::Mat preprocessed = preprocessFrame(image);
+
+  const std::vector<int32_t> tensorDims = getAllInputShapes()[0];
+  auto inputTensor =
+      image_processing::getTensorFromMatrix(tensorDims, preprocessed);
 
   auto forwardResult = BaseModel::forward(inputTensor);
   if (!forwardResult.ok()) {
@@ -55,4 +106,31 @@ std::string StyleTransfer::generate(std::string imageSource) {
   return postprocess(forwardResult->at(0).toTensor(), originalSize);
 }
 
+PixelDataResult StyleTransfer::generateFromString(std::string imageSource) {
+  cv::Mat imageBGR = image_processing::readImage(imageSource);
+  cv::Size originalSize = imageBGR.size();
+
+  cv::Mat imageRGB;
+  cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB);
+
+  return runInference(imageRGB, originalSize);
+}
+
+PixelDataResult StyleTransfer::generateFromFrame(jsi::Runtime &runtime,
+                                                 const jsi::Value &frameData) {
+  // extractFromFrame rotates landscape frames 90° CW automatically.
+  cv::Mat frame = extractFromFrame(runtime, frameData);
+
+  // For real-time frame processing, output at modelImageSize to avoid
+  // allocating large buffers (e.g. 1280x720x3 ~2.7MB) on every frame.
+  return runInference(frame, modelImageSize);
+}
+
+PixelDataResult StyleTransfer::generateFromPixels(JSTensorViewIn pixelData) {
+  cv::Mat image = extractFromPixels(pixelData);
+  cv::Size originalSize = image.size();
+
+  return runInference(image, originalSize);
+}
+
 } // namespace rnexecutorch::models::style_transfer
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
index 73744c4d8..99f9f4b3a 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
@@ -9,7 +9,9 @@
 #include <opencv2/opencv.hpp>
 
 #include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
-#include <rnexecutorch/models/BaseModel.h>
+#include <rnexecutorch/jsi/OwningArrayBuffer.h>
+#include <rnexecutorch/models/VisionModel.h>
+#include <rnexecutorch/models/style_transfer/Types.h>
 
 namespace rnexecutorch {
 namespace models::style_transfer {
@@ -17,15 +19,30 @@ using namespace facebook;
 using executorch::aten::Tensor;
 using executorch::extension::TensorPtr;
 
-class StyleTransfer : public BaseModel {
+class StyleTransfer : public VisionModel {
 public:
   StyleTransfer(const std::string &modelSource,
                 std::shared_ptr<react::CallInvoker> callInvoker);
-  [[nodiscard("Registered non-void function")]] std::string
-  generate(std::string imageSource);
+
+  [[nodiscard("Registered non-void function")]] PixelDataResult
+  generateFromString(std::string imageSource);
+
+  [[nodiscard("Registered non-void function")]] PixelDataResult
+  generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData);
+
+  [[nodiscard("Registered non-void function")]] PixelDataResult
+  generateFromPixels(JSTensorViewIn pixelData);
+
+protected:
+  cv::Mat preprocessFrame(const cv::Mat &frame) const override;
 
 private:
-  std::string postprocess(const Tensor &tensor, cv::Size originalSize);
+  // outputSize: size to resize the styled output to before returning.
+  //   Pass modelImageSize for real-time frame processing (avoids large allocs).
+  //   Pass the source image size for generateFromString/generateFromPixels.
+  PixelDataResult runInference(cv::Mat image, cv::Size outputSize);
+
+  PixelDataResult postprocess(const Tensor &tensor, cv::Size outputSize);
 
   cv::Size modelImageSize{0, 0};
 };
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/Types.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/Types.h
new file mode 100644
index 000000000..f677183a6
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/Types.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <memory>
+#include <rnexecutorch/jsi/OwningArrayBuffer.h>
+
+namespace rnexecutorch::models::style_transfer {
+
+struct PixelDataResult {
+  std::shared_ptr<OwningArrayBuffer> dataPtr;
+  int width;
+  int height;
+};
+
+} // namespace rnexecutorch::models::style_transfer
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp
index 0f75d2015..71ea737f8 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp
@@ -1,10 +1,12 @@
 #include "VerticalOCR.h"
 #include <future>
 #include <rnexecutorch/Error.h>
+#include <rnexecutorch/ErrorCodes.h>
 #include <rnexecutorch/data_processing/ImageProcessing.h>
 #include <rnexecutorch/data_processing/Numerical.h>
 #include <rnexecutorch/models/ocr/Constants.h>
 #include <rnexecutorch/models/ocr/Types.h>
+#include <rnexecutorch/utils/FrameProcessor.h>
 #include <tuple>
 
 namespace rnexecutorch::models::ocr {
@@ -16,12 +18,9 @@ VerticalOCR::VerticalOCR(const std::string &detectorSource,
       converter(symbols), independentCharacters(independentChars),
       callInvoker(invoker) {}
 
-std::vector<types::OCRDetection> VerticalOCR::generate(std::string input) {
-  cv::Mat image = image_processing::readImage(input);
-  if (image.empty()) {
-    throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed,
-                            "Failed to load image from path: " + input);
-  }
+std::vector<types::OCRDetection> VerticalOCR::runInference(cv::Mat image) {
+  std::scoped_lock lock(inference_mutex_);
+
   // 1. Large Detector
   std::vector<types::DetectorBBox> largeBoxes =
       detector.generate(image, constants::kLargeDetectorWidth);
@@ -44,6 +43,65 @@ std::vector<types::OCRDetection> VerticalOCR::generate(std::string input) {
   return predictions;
 }
 
+std::vector<types::OCRDetection>
+VerticalOCR::generateFromString(std::string input) {
+  cv::Mat image = image_processing::readImage(input);
+  if (image.empty()) {
+    throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed,
+                            "Failed to load image from path: " + input);
+  }
+  return runInference(image);
+}
+
+std::vector<types::OCRDetection>
+VerticalOCR::generateFromFrame(jsi::Runtime &runtime,
+                               const jsi::Value &frameData) {
+  auto frameObj = frameData.asObject(runtime);
+  cv::Mat frame = ::rnexecutorch::utils::extractFrame(runtime, frameObj);
+  // extractFrame returns RGB; convert to BGR for consistency with readImage
+  cv::cvtColor(frame, frame, cv::COLOR_RGB2BGR);
+  return runInference(frame);
+}
+
+std::vector<types::OCRDetection>
+VerticalOCR::generateFromPixels(JSTensorViewIn pixelData) {
+  if (pixelData.sizes.size() != 3) {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Invalid pixel data: sizes must have 3 elements "
+                  "[height, width, channels], got %zu",
+                  pixelData.sizes.size());
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
+  }
+
+  int32_t height = pixelData.sizes[0];
+  int32_t width = pixelData.sizes[1];
+  int32_t channels = pixelData.sizes[2];
+
+  if (channels != 3) {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Invalid pixel data: expected 3 channels (RGB), got %d",
+                  channels);
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
+  }
+
+  if (pixelData.scalarType != executorch::aten::ScalarType::Byte) {
+    throw RnExecutorchError(
+        RnExecutorchErrorCode::InvalidUserInput,
+        "Invalid pixel data: scalarType must be BYTE (Uint8Array)");
+  }
+
+  uint8_t *dataPtr = static_cast<uint8_t *>(pixelData.dataPtr);
+  // Input is RGB from JS; convert to BGR for consistency with readImage
+  cv::Mat rgbImage(height, width, CV_8UC3, dataPtr);
+  cv::Mat image;
+  cv::cvtColor(rgbImage, image, cv::COLOR_RGB2BGR);
+  return runInference(image);
+}
+
 std::size_t VerticalOCR::getMemoryLowerBound() const noexcept {
   return detector.getMemoryLowerBound() + recognizer.getMemoryLowerBound();
 }
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h
index e97fb9034..4016e2813 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h
@@ -1,12 +1,14 @@
 #pragma once
 
 #include <executorch/extension/tensor/tensor_ptr.h>
+#include <mutex>
 #include <opencv2/opencv.hpp>
 #include <string>
 #include <utility>
 #include <vector>
 
 #include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
+#include <rnexecutorch/host_objects/JSTensorViewIn.h>
 #include <rnexecutorch/models/BaseModel.h>
 #include <rnexecutorch/models/ocr/CTCLabelConverter.h>
 #include <rnexecutorch/models/ocr/Recognizer.h>
@@ -48,11 +50,17 @@ class VerticalOCR final {
                        bool indpendentCharacters,
                        std::shared_ptr<react::CallInvoker> callInvoker);
   [[nodiscard("Registered non-void function")]] std::vector<types::OCRDetection>
-  generate(std::string input);
+  generateFromString(std::string input);
+  [[nodiscard("Registered non-void function")]] std::vector<types::OCRDetection>
+  generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData);
+  [[nodiscard("Registered non-void function")]] std::vector<types::OCRDetection>
+  generateFromPixels(JSTensorViewIn pixelData);
   std::size_t getMemoryLowerBound() const noexcept;
   void unload() noexcept;
 
 private:
+  std::vector<types::OCRDetection> runInference(cv::Mat image);
+
   std::pair<std::string, float> _handleIndependentCharacters(
       const types::DetectorBBox &box, const cv::Mat &originalImage,
       const std::vector<types::DetectorBBox> &characterBoxes,
@@ -75,6 +83,7 @@ class VerticalOCR final {
   CTCLabelConverter converter;
   bool independentCharacters;
   std::shared_ptr<react::CallInvoker> callInvoker;
+  mutable std::mutex inference_mutex_;
 };
 } // namespace models::ocr
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ClassificationTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ClassificationTest.cpp
index 10aa663a4..b64f167c9 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ClassificationTest.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ClassificationTest.cpp
@@ -28,7 +28,7 @@ template <> struct ModelTraits<Classification> {
   }
 
   static void callGenerate(ModelType &model) {
-    (void)model.generate(kValidTestImagePath);
+    (void)model.generateFromString(kValidTestImagePath);
   }
 };
 } // namespace model_tests
@@ -42,37 +42,37 @@ INSTANTIATE_TYPED_TEST_SUITE_P(Classification, CommonModelTest,
 // ============================================================================
 TEST(ClassificationGenerateTests, InvalidImagePathThrows) {
   Classification model(kValidClassificationModelPath, nullptr);
-  EXPECT_THROW((void)model.generate("nonexistent_image.jpg"),
+  EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"),
                RnExecutorchError);
 }
 
 TEST(ClassificationGenerateTests, EmptyImagePathThrows) {
   Classification model(kValidClassificationModelPath, nullptr);
-  EXPECT_THROW((void)model.generate(""), RnExecutorchError);
+  EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError);
 }
 
 TEST(ClassificationGenerateTests, MalformedURIThrows) {
   Classification model(kValidClassificationModelPath, nullptr);
-  EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"),
+  EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"),
                RnExecutorchError);
 }
 
 TEST(ClassificationGenerateTests, ValidImageReturnsResults) {
   Classification model(kValidClassificationModelPath, nullptr);
-  auto results = model.generate(kValidTestImagePath);
+  auto results = model.generateFromString(kValidTestImagePath);
   EXPECT_FALSE(results.empty());
 }
 
 TEST(ClassificationGenerateTests, ResultsHaveCorrectSize) {
   Classification model(kValidClassificationModelPath, nullptr);
-  auto results = model.generate(kValidTestImagePath);
+  auto results = model.generateFromString(kValidTestImagePath);
   auto expectedNumClasses = constants::kImagenet1kV1Labels.size();
   EXPECT_EQ(results.size(), expectedNumClasses);
 }
 
 TEST(ClassificationGenerateTests, ResultsContainValidProbabilities) {
   Classification model(kValidClassificationModelPath, nullptr);
-  auto results = model.generate(kValidTestImagePath);
+  auto results = model.generateFromString(kValidTestImagePath);
 
   float sum = 0.0f;
   for (const auto &[label, prob] : results) {
@@ -85,7 +85,7 @@ TEST(ClassificationGenerateTests, ResultsContainValidProbabilities) {
 
 TEST(ClassificationGenerateTests, TopPredictionHasReasonableConfidence) {
   Classification model(kValidClassificationModelPath, nullptr);
-  auto results = model.generate(kValidTestImagePath);
+  auto results = model.generateFromString(kValidTestImagePath);
 
   float maxProb = 0.0f;
   for (const auto &[label, prob] : results) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ImageEmbeddingsTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ImageEmbeddingsTest.cpp
index 3a2374695..ba76939a8 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ImageEmbeddingsTest.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ImageEmbeddingsTest.cpp
@@ -29,7 +29,7 @@ template <> struct ModelTraits<ImageEmbeddings> {
   }
 
   static void callGenerate(ModelType &model) {
-    (void)model.generate(kValidTestImagePath);
+    (void)model.generateFromString(kValidTestImagePath);
   }
 };
 } // namespace model_tests
@@ -43,31 +43,31 @@ INSTANTIATE_TYPED_TEST_SUITE_P(ImageEmbeddings, CommonModelTest,
 // ============================================================================
 TEST(ImageEmbeddingsGenerateTests, InvalidImagePathThrows) {
   ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr);
-  EXPECT_THROW((void)model.generate("nonexistent_image.jpg"),
+  EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"),
                RnExecutorchError);
 }
 
 TEST(ImageEmbeddingsGenerateTests, EmptyImagePathThrows) {
   ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr);
-  EXPECT_THROW((void)model.generate(""), RnExecutorchError);
+  EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError);
 }
 
 TEST(ImageEmbeddingsGenerateTests, MalformedURIThrows) {
   ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr);
-  EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"),
+  EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"),
                RnExecutorchError);
 }
 
 TEST(ImageEmbeddingsGenerateTests, ValidImageReturnsResults) {
   ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr);
-  auto result = model.generate(kValidTestImagePath);
+  auto result = model.generateFromString(kValidTestImagePath);
   EXPECT_NE(result, nullptr);
   EXPECT_GT(result->size(), 0u);
 }
 
 TEST(ImageEmbeddingsGenerateTests, ResultsHaveCorrectSize) {
   ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr);
-  auto result = model.generate(kValidTestImagePath);
+  auto result = model.generateFromString(kValidTestImagePath);
   size_t numFloats = result->size() / sizeof(float);
   constexpr size_t kClipEmbeddingDimensions = 512;
   EXPECT_EQ(numFloats, kClipEmbeddingDimensions);
@@ -77,7 +77,7 @@ TEST(ImageEmbeddingsGenerateTests, ResultsAreNormalized) {
   // TODO: Investigate the source of the issue;
   GTEST_SKIP() << "Expected to fail in emulator environments";
   ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr);
-  auto result = model.generate(kValidTestImagePath);
+  auto result = model.generateFromString(kValidTestImagePath);
 
   const float *data = reinterpret_cast<const float *>(result->data());
   size_t numFloats = result->size() / sizeof(float);
@@ -92,7 +92,7 @@ TEST(ImageEmbeddingsGenerateTests, ResultsAreNormalized) {
 
 TEST(ImageEmbeddingsGenerateTests, ResultsContainValidValues) {
   ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr);
-  auto result = model.generate(kValidTestImagePath);
+  auto result = model.generateFromString(kValidTestImagePath);
 
   const float *data = reinterpret_cast<const float *>(result->data());
   size_t numFloats = result->size() / sizeof(float);
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp
index 428fb5afb..6f6f708be 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp
@@ -41,7 +41,7 @@ template <> struct ModelTraits<OCR> {
   }
 
   static void callGenerate(ModelType &model) {
-    (void)model.generate(kValidTestImagePath);
+    (void)model.generateFromString(kValidTestImagePath);
   }
 };
 } // namespace model_tests
@@ -67,27 +67,27 @@ TEST(OCRCtorTests, EmptySymbolsThrows) {
 TEST(OCRGenerateTests, InvalidImagePathThrows) {
   OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS,
             createMockCallInvoker());
-  EXPECT_THROW((void)model.generate("nonexistent_image.jpg"),
+  EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"),
                RnExecutorchError);
 }
 
 TEST(OCRGenerateTests, EmptyImagePathThrows) {
   OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS,
             createMockCallInvoker());
-  EXPECT_THROW((void)model.generate(""), RnExecutorchError);
+  EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError);
 }
 
 TEST(OCRGenerateTests, MalformedURIThrows) {
   OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS,
             createMockCallInvoker());
-  EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"),
+  EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"),
                RnExecutorchError);
 }
 
 TEST(OCRGenerateTests, ValidImageReturnsResults) {
   OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS,
             createMockCallInvoker());
-  auto results = model.generate(kValidTestImagePath);
+  auto results = model.generateFromString(kValidTestImagePath);
   // May or may not have detections depending on image content
   EXPECT_GE(results.size(), 0u);
 }
@@ -95,7 +95,7 @@ TEST(OCRGenerateTests, ValidImageReturnsResults) {
 TEST(OCRGenerateTests, DetectionsHaveValidBoundingBoxes) {
   OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS,
             createMockCallInvoker());
-  auto results = model.generate(kValidTestImagePath);
+  auto results = model.generateFromString(kValidTestImagePath);
 
   for (const auto &detection : results) {
     // Each bbox should have 4 points
@@ -110,7 +110,7 @@ TEST(OCRGenerateTests, DetectionsHaveValidBoundingBoxes) {
 TEST(OCRGenerateTests, DetectionsHaveValidScores) {
   OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS,
             createMockCallInvoker());
-  auto results = model.generate(kValidTestImagePath);
+  auto results = model.generateFromString(kValidTestImagePath);
 
   for (const auto &detection : results) {
     EXPECT_GE(detection.score, 0.0f);
@@ -121,7 +121,7 @@ TEST(OCRGenerateTests, DetectionsHaveValidScores) {
 TEST(OCRGenerateTests, DetectionsHaveNonEmptyText) {
   OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS,
             createMockCallInvoker());
-  auto results = model.generate(kValidTestImagePath);
+  auto results = model.generateFromString(kValidTestImagePath);
   for (const auto &detection : results) {
     EXPECT_FALSE(detection.text.empty());
   }
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp
index 3e6951617..5fbf798b6 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp
@@ -1,6 +1,4 @@
 #include "BaseModelTests.h"
-#include "utils/TestUtils.h"
-#include <filesystem>
 #include <gtest/gtest.h>
 #include <rnexecutorch/Error.h>
 #include <rnexecutorch/models/style_transfer/StyleTransfer.h>
@@ -30,7 +28,7 @@ template <> struct ModelTraits<StyleTransfer> {
   }
 
   static void callGenerate(ModelType &model) {
-    (void)model.generate(kValidTestImagePath);
+    (void)model.generateFromString(kValidTestImagePath);
   }
 };
 } // namespace model_tests
@@ -44,51 +42,34 @@ INSTANTIATE_TYPED_TEST_SUITE_P(StyleTransfer, CommonModelTest,
 // ============================================================================
 TEST(StyleTransferGenerateTests, InvalidImagePathThrows) {
   StyleTransfer model(kValidStyleTransferModelPath, nullptr);
-  EXPECT_THROW((void)model.generate("nonexistent_image.jpg"),
+  EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"),
                RnExecutorchError);
 }
 
 TEST(StyleTransferGenerateTests, EmptyImagePathThrows) {
   StyleTransfer model(kValidStyleTransferModelPath, nullptr);
-  EXPECT_THROW((void)model.generate(""), RnExecutorchError);
+  EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError);
 }
 
 TEST(StyleTransferGenerateTests, MalformedURIThrows) {
   StyleTransfer model(kValidStyleTransferModelPath, nullptr);
-  EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"),
+  EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"),
                RnExecutorchError);
 }
 
-TEST(StyleTransferGenerateTests, ValidImageReturnsFilePath) {
+TEST(StyleTransferGenerateTests, ValidImageReturnsNonNull) {
   StyleTransfer model(kValidStyleTransferModelPath, nullptr);
-  auto result = model.generate(kValidTestImagePath);
-  EXPECT_FALSE(result.empty());
-}
-
-TEST(StyleTransferGenerateTests, ResultIsValidFilePath) {
-  StyleTransfer model(kValidStyleTransferModelPath, nullptr);
-  auto result = model.generate(kValidTestImagePath);
-  test_utils::trimFilePrefix(result);
-  EXPECT_TRUE(std::filesystem::exists(result));
-}
-
-TEST(StyleTransferGenerateTests, ResultFileHasContent) {
-  StyleTransfer model(kValidStyleTransferModelPath, nullptr);
-  auto result = model.generate(kValidTestImagePath);
-  test_utils::trimFilePrefix(result);
-  auto fileSize = std::filesystem::file_size(result);
-  EXPECT_GT(fileSize, 0u);
+  auto result = model.generateFromString(kValidTestImagePath);
+  EXPECT_NE(result, nullptr);
 }
 
 TEST(StyleTransferGenerateTests, MultipleGeneratesWork) {
   StyleTransfer model(kValidStyleTransferModelPath, nullptr);
-  EXPECT_NO_THROW((void)model.generate(kValidTestImagePath));
-  auto result1 = model.generate(kValidTestImagePath);
-  auto result2 = model.generate(kValidTestImagePath);
-  test_utils::trimFilePrefix(result1);
-  test_utils::trimFilePrefix(result2);
-  EXPECT_TRUE(std::filesystem::exists(result1));
-  EXPECT_TRUE(std::filesystem::exists(result2));
+  EXPECT_NO_THROW((void)model.generateFromString(kValidTestImagePath));
+  auto result1 = model.generateFromString(kValidTestImagePath);
+  auto result2 = model.generateFromString(kValidTestImagePath);
+  EXPECT_NE(result1, nullptr);
+  EXPECT_NE(result2, nullptr);
 }
 
 TEST(StyleTransferInheritedTests, GetInputShapeWorks) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp
index 7b1010a81..56f18d862 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp
@@ -43,7 +43,7 @@ template <> struct ModelTraits<VerticalOCR> {
   }
 
   static void callGenerate(ModelType &model) {
-    (void)model.generate(kValidVerticalTestImagePath);
+    (void)model.generateFromString(kValidVerticalTestImagePath);
   }
 };
 } // namespace model_tests
@@ -85,34 +85,34 @@ TEST(VerticalOCRCtorTests, IndependentCharsFalseDoesntThrow) {
 TEST(VerticalOCRGenerateTests, IndependentCharsInvalidImageThrows) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, true, createMockCallInvoker());
-  EXPECT_THROW((void)model.generate("nonexistent_image.jpg"),
+  EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"),
                RnExecutorchError);
 }
 
 TEST(VerticalOCRGenerateTests, IndependentCharsEmptyImagePathThrows) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, true, createMockCallInvoker());
-  EXPECT_THROW((void)model.generate(""), RnExecutorchError);
+  EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError);
 }
 
 TEST(VerticalOCRGenerateTests, IndependentCharsMalformedURIThrows) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, true, createMockCallInvoker());
-  EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"),
+  EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"),
                RnExecutorchError);
 }
 
 TEST(VerticalOCRGenerateTests, IndependentCharsValidImageReturnsResults) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, true, createMockCallInvoker());
-  auto results = model.generate(kValidVerticalTestImagePath);
+  auto results = model.generateFromString(kValidVerticalTestImagePath);
   EXPECT_GE(results.size(), 0u);
 }
 
 TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveValidBBoxes) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, true, createMockCallInvoker());
-  auto results = model.generate(kValidVerticalTestImagePath);
+  auto results = model.generateFromString(kValidVerticalTestImagePath);
 
   for (const auto &detection : results) {
     EXPECT_EQ(detection.bbox.size(), 4u);
@@ -126,7 +126,7 @@ TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveValidBBoxes) {
 TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveValidScores) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, true, createMockCallInvoker());
-  auto results = model.generate(kValidVerticalTestImagePath);
+  auto results = model.generateFromString(kValidVerticalTestImagePath);
 
   for (const auto &detection : results) {
     EXPECT_GE(detection.score, 0.0f);
@@ -137,7 +137,7 @@ TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveValidScores) {
 TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveNonEmptyText) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, true, createMockCallInvoker());
-  auto results = model.generate(kValidVerticalTestImagePath);
+  auto results = model.generateFromString(kValidVerticalTestImagePath);
 
   for (const auto &detection : results) {
     EXPECT_FALSE(detection.text.empty());
@@ -148,34 +148,34 @@ TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveNonEmptyText) {
 TEST(VerticalOCRGenerateTests, JointCharsInvalidImageThrows) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, false, createMockCallInvoker());
-  EXPECT_THROW((void)model.generate("nonexistent_image.jpg"),
+  EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"),
                RnExecutorchError);
 }
 
 TEST(VerticalOCRGenerateTests, JointCharsEmptyImagePathThrows) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, false, createMockCallInvoker());
-  EXPECT_THROW((void)model.generate(""), RnExecutorchError);
+  EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError);
 }
 
 TEST(VerticalOCRGenerateTests, JointCharsMalformedURIThrows) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, false, createMockCallInvoker());
-  EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"),
+  EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"),
                RnExecutorchError);
 }
 
 TEST(VerticalOCRGenerateTests, JointCharsValidImageReturnsResults) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, false, createMockCallInvoker());
-  auto results = model.generate(kValidVerticalTestImagePath);
+  auto results = model.generateFromString(kValidVerticalTestImagePath);
   EXPECT_GE(results.size(), 0u);
 }
 
 TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveValidBBoxes) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, false, createMockCallInvoker());
-  auto results = model.generate(kValidVerticalTestImagePath);
+  auto results = model.generateFromString(kValidVerticalTestImagePath);
 
   for (const auto &detection : results) {
     EXPECT_EQ(detection.bbox.size(), 4u);
@@ -189,7 +189,7 @@ TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveValidBBoxes) {
 TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveValidScores) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, false, createMockCallInvoker());
-  auto results = model.generate(kValidVerticalTestImagePath);
+  auto results = model.generateFromString(kValidVerticalTestImagePath);
 
   for (const auto &detection : results) {
     EXPECT_GE(detection.score, 0.0f);
@@ -200,7 +200,7 @@ TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveValidScores) {
 TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveNonEmptyText) {
   VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath,
                     ENGLISH_SYMBOLS, false, createMockCallInvoker());
-  auto results = model.generate(kValidVerticalTestImagePath);
+  auto results = model.generateFromString(kValidVerticalTestImagePath);
 
   for (const auto &detection : results) {
     EXPECT_FALSE(detection.text.empty());
@@ -216,8 +216,10 @@ TEST(VerticalOCRStrategyTests, BothStrategiesRunSuccessfully) {
                          kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false,
                          createMockCallInvoker());
 
-  EXPECT_NO_THROW((void)independentModel.generate(kValidVerticalTestImagePath));
-  EXPECT_NO_THROW((void)jointModel.generate(kValidVerticalTestImagePath));
+  EXPECT_NO_THROW(
+      (void)independentModel.generateFromString(kValidVerticalTestImagePath));
+  EXPECT_NO_THROW(
+      (void)jointModel.generateFromString(kValidVerticalTestImagePath));
 }
 
 TEST(VerticalOCRStrategyTests, BothStrategiesReturnValidResults) {
@@ -229,8 +231,9 @@ TEST(VerticalOCRStrategyTests, BothStrategiesReturnValidResults) {
                          createMockCallInvoker());
 
   auto independentResults =
-      independentModel.generate(kValidVerticalTestImagePath);
-  auto jointResults = jointModel.generate(kValidVerticalTestImagePath);
+      independentModel.generateFromString(kValidVerticalTestImagePath);
+  auto jointResults =
+      jointModel.generateFromString(kValidVerticalTestImagePath);
 
   // Both should return some results (or none if no text detected)
   EXPECT_GE(independentResults.size(), 0u);
diff --git a/packages/react-native-executorch/src/controllers/BaseOCRController.ts b/packages/react-native-executorch/src/controllers/BaseOCRController.ts
index c124dadce..b6e5c3a5b 100644
--- a/packages/react-native-executorch/src/controllers/BaseOCRController.ts
+++ b/packages/react-native-executorch/src/controllers/BaseOCRController.ts
@@ -2,10 +2,24 @@ import { Logger } from '../common/Logger';
 import { symbols } from '../constants/ocr/symbols';
 import { RnExecutorchErrorCode } from '../errors/ErrorCodes';
 import { RnExecutorchError, parseUnknownError } from '../errors/errorUtils';
-import { ResourceSource } from '../types/common';
+import { Frame, PixelData, ResourceSource, ScalarType } from '../types/common';
 import { OCRLanguage, OCRDetection } from '../types/ocr';
 import { ResourceFetcher } from '../utils/ResourceFetcher';
 
+function isPixelData(input: unknown): input is PixelData {
+  return (
+    typeof input === 'object' &&
+    input !== null &&
+    'dataPtr' in input &&
+    input.dataPtr instanceof Uint8Array &&
+    'sizes' in input &&
+    Array.isArray(input.sizes) &&
+    input.sizes.length === 3 &&
+    'scalarType' in input &&
+    input.scalarType === ScalarType.BYTE
+  );
+}
+
 export abstract class BaseOCRController {
   protected nativeModule: any;
   public isReady: boolean = false;
@@ -87,7 +101,34 @@ export abstract class BaseOCRController {
     }
   };
 
-  public forward = async (imageSource: string): Promise<OCRDetection[]> => {
+  get runOnFrame(): ((frame: Frame) => OCRDetection[]) | null {
+    if (!this.nativeModule?.generateFromFrame) {
+      return null;
+    }
+
+    const nativeGenerateFromFrame = this.nativeModule.generateFromFrame;
+
+    return (frame: any): OCRDetection[] => {
+      'worklet';
+
+      let nativeBuffer: any = null;
+      try {
+        nativeBuffer = frame.getNativeBuffer();
+        const frameData = {
+          nativeBuffer: nativeBuffer.pointer,
+        };
+        return nativeGenerateFromFrame(frameData);
+      } finally {
+        if (nativeBuffer?.release) {
+          nativeBuffer.release();
+        }
+      }
+    };
+  }
+
+  public forward = async (
+    input: string | PixelData
+  ): Promise<OCRDetection[]> => {
     if (!this.isReady) {
       throw new RnExecutorchError(
         RnExecutorchErrorCode.ModuleNotLoaded,
@@ -104,7 +145,17 @@ export abstract class BaseOCRController {
     try {
       this.isGenerating = true;
       this.isGeneratingCallback(this.isGenerating);
-      return await this.nativeModule.generate(imageSource);
+
+      if (typeof input === 'string') {
+        return await this.nativeModule.generateFromString(input);
+      } else if (isPixelData(input)) {
+        return await this.nativeModule.generateFromPixels(input);
+      } else {
+        throw new RnExecutorchError(
+          RnExecutorchErrorCode.InvalidArgument,
+          'Invalid input: expected string path or PixelData object. For VisionCamera frames, use runOnFrame instead.'
+        );
+      }
     } catch (e) {
       throw parseUnknownError(e);
     } finally {
diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts b/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts
index 88831f9aa..55b8d8500 100644
--- a/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts
+++ b/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts
@@ -9,6 +9,7 @@ import {
   ModelNameOf,
   ModelSources,
 } from '../../types/imageSegmentation';
+import { Frame } from '../../types/common';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
 import { RnExecutorchError, parseUnknownError } from '../../errors/errorUtils';
 
@@ -41,6 +42,14 @@ export const useImageSegmentation = <C extends ModelSources>({
   const [instance, setInstance] = useState<ImageSegmentationModule<
     ModelNameOf<C>
   > | null>(null);
+  const [runOnFrame, setRunOnFrame] = useState<
+    | ((
+        frame: Frame,
+        classesOfInterest?: string[],
+        resizeToInput?: boolean
+      ) => any)
+    | null
+  >(null);
 
   useEffect(() => {
     if (preventLoad) return;
@@ -62,6 +71,10 @@ export const useImageSegmentation = <C extends ModelSources>({
         if (isMounted) {
           setInstance(currentInstance);
           setIsReady(true);
+          const worklet = currentInstance.runOnFrame;
+          if (worklet) {
+            setRunOnFrame(() => worklet);
+          }
         }
       } catch (err) {
         if (isMounted) setError(parseUnknownError(err));
@@ -70,6 +83,8 @@ export const useImageSegmentation = <C extends ModelSources>({
 
     return () => {
       isMounted = false;
+      setIsReady(false);
+      setRunOnFrame(null);
       currentInstance?.delete();
     };
 
@@ -111,5 +126,6 @@ export const useImageSegmentation = <C extends ModelSources>({
     isGenerating,
     downloadProgress,
     forward,
+    runOnFrame,
   };
 };
diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts b/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts
index 6b2868834..967f750c6 100644
--- a/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts
+++ b/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts
@@ -1,5 +1,6 @@
 import { useEffect, useState } from 'react';
-import { OCRProps, OCRType } from '../../types/ocr';
+import { OCRProps, OCRType, OCRDetection } from '../../types/ocr';
+import { Frame } from '../../types/common';
 import { OCRController } from '../../controllers/OCRController';
 import { RnExecutorchError } from '../../errors/errorUtils';
 
@@ -15,6 +16,9 @@ export const useOCR = ({ model, preventLoad = false }: OCRProps): OCRType => {
   const [isReady, setIsReady] = useState(false);
   const [isGenerating, setIsGenerating] = useState(false);
   const [downloadProgress, setDownloadProgress] = useState(0);
+  const [runOnFrame, setRunOnFrame] = useState<
+    ((frame: Frame) => OCRDetection[]) | null
+  >(null);
 
   const [controllerInstance] = useState(
     () =>
@@ -35,9 +39,16 @@ export const useOCR = ({ model, preventLoad = false }: OCRProps): OCRType => {
         model.language,
         setDownloadProgress
       );
+
+      const worklet = controllerInstance.runOnFrame;
+      if (worklet) {
+        setRunOnFrame(() => worklet);
+      }
     })();
 
     return () => {
+      setRunOnFrame(null);
+      setIsReady(false);
       controllerInstance.delete();
     };
   }, [
@@ -54,5 +65,6 @@ export const useOCR = ({ model, preventLoad = false }: OCRProps): OCRType => {
     isGenerating,
     forward: controllerInstance.forward,
     downloadProgress,
+    runOnFrame,
   };
 };
diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts b/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts
index eb9d289eb..bd479aea2 100644
--- a/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts
+++ b/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts
@@ -1,5 +1,6 @@
 import { useEffect, useState } from 'react';
-import { OCRType, VerticalOCRProps } from '../../types/ocr';
+import { OCRType, VerticalOCRProps, OCRDetection } from '../../types/ocr';
+import { Frame } from '../../types/common';
 import { VerticalOCRController } from '../../controllers/VerticalOCRController';
 import { RnExecutorchError } from '../../errors/errorUtils';
 
@@ -19,6 +20,9 @@ export const useVerticalOCR = ({
   const [isReady, setIsReady] = useState(false);
   const [isGenerating, setIsGenerating] = useState(false);
   const [downloadProgress, setDownloadProgress] = useState(0);
+  const [runOnFrame, setRunOnFrame] = useState<
+    ((frame: Frame) => OCRDetection[]) | null
+  >(null);
 
   const [controllerInstance] = useState(
     () =>
@@ -40,9 +44,16 @@ export const useVerticalOCR = ({
         independentCharacters,
         setDownloadProgress
       );
+
+      const worklet = controllerInstance.runOnFrame;
+      if (worklet) {
+        setRunOnFrame(() => worklet);
+      }
     })();
 
     return () => {
+      setRunOnFrame(null);
+      setIsReady(false);
       controllerInstance.delete();
     };
   }, [
@@ -60,5 +71,6 @@ export const useVerticalOCR = ({
     isGenerating,
     forward: controllerInstance.forward,
     downloadProgress,
+    runOnFrame,
   };
 };
diff --git a/packages/react-native-executorch/src/hooks/useModule.ts b/packages/react-native-executorch/src/hooks/useModule.ts
index 82a0bb72f..9838c4aa0 100644
--- a/packages/react-native-executorch/src/hooks/useModule.ts
+++ b/packages/react-native-executorch/src/hooks/useModule.ts
@@ -62,6 +62,8 @@ export const useModule = <
 
     return () => {
       isMounted = false;
+      setIsReady(false);
+      setRunOnFrame(null);
       moduleInstance.delete();
     };
 
diff --git a/packages/react-native-executorch/src/modules/computer_vision/ClassificationModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ClassificationModule.ts
index 45b7e2b39..d0735ae26 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/ClassificationModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/ClassificationModule.ts
@@ -1,16 +1,18 @@
 import { ResourceFetcher } from '../../utils/ResourceFetcher';
-import { ResourceSource } from '../../types/common';
-import { BaseModule } from '../BaseModule';
+import { ResourceSource, PixelData } from '../../types/common';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
 import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
 import { Logger } from '../../common/Logger';
+import { VisionModule } from './VisionModule';
 
 /**
  * Module for image classification tasks.
  *
  * @category Typescript API
  */
-export class ClassificationModule extends BaseModule {
+export class ClassificationModule extends VisionModule<{
+  [category: string]: number;
+}> {
   /**
    * Loads the model, where `modelSource` is a string that specifies the location of the model binary.
    * To track the download progress, supply a callback function `onDownloadProgressCallback`.
@@ -42,18 +44,9 @@ export class ClassificationModule extends BaseModule {
     }
   }
 
-  /**
-   * Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string.
-   *
-   * @param imageSource - The image source to be classified.
-   * @returns The classification result.
-   */
-  async forward(imageSource: string): Promise<{ [category: string]: number }> {
-    if (this.nativeModule == null)
-      throw new RnExecutorchError(
-        RnExecutorchErrorCode.ModuleNotLoaded,
-        'The model is currently not loaded. Please load the model before calling forward().'
-      );
-    return await this.nativeModule.generate(imageSource);
+  async forward(
+    input: string | PixelData
+  ): Promise<{ [category: string]: number }> {
+    return super.forward(input);
   }
 }
diff --git a/packages/react-native-executorch/src/modules/computer_vision/ImageEmbeddingsModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ImageEmbeddingsModule.ts
index 3e62f450d..6fb78c4cc 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/ImageEmbeddingsModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/ImageEmbeddingsModule.ts
@@ -1,16 +1,16 @@
 import { ResourceFetcher } from '../../utils/ResourceFetcher';
-import { ResourceSource } from '../../types/common';
+import { ResourceSource, PixelData } from '../../types/common';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
 import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
-import { BaseModule } from '../BaseModule';
 import { Logger } from '../../common/Logger';
+import { VisionModule } from './VisionModule';
 
 /**
  * Module for generating image embeddings from input images.
  *
  * @category Typescript API
  */
-export class ImageEmbeddingsModule extends BaseModule {
+export class ImageEmbeddingsModule extends VisionModule<Float32Array> {
   /**
    * Loads the model, where `modelSource` is a string that specifies the location of the model binary.
    *
@@ -41,18 +41,8 @@ export class ImageEmbeddingsModule extends BaseModule {
     }
   }
 
-  /**
-   * Executes the model's forward pass. Returns an embedding array for a given sentence.
-   *
-   * @param imageSource - The image source (URI/URL) to image that will be embedded.
-   * @returns A Float32Array containing the image embeddings.
-   */
-  async forward(imageSource: string): Promise<Float32Array> {
-    if (this.nativeModule == null)
-      throw new RnExecutorchError(
-        RnExecutorchErrorCode.ModuleNotLoaded,
-        'The model is currently not loaded. Please load the model before calling forward().'
-      );
-    return new Float32Array(await this.nativeModule.generate(imageSource));
+  async forward(input: string | PixelData): Promise<Float32Array> {
+    const result = await super.forward(input);
+    return new Float32Array(result as unknown as ArrayBuffer);
   }
 }
diff --git a/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts
index f2de6edd7..b2f7c908f 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts
@@ -1,5 +1,11 @@
 import { ResourceFetcher } from '../../utils/ResourceFetcher';
-import { ResourceSource, LabelEnum } from '../../types/common';
+import {
+  ResourceSource,
+  LabelEnum,
+  Frame,
+  PixelData,
+  ScalarType,
+} from '../../types/common';
 import {
   DeeplabLabel,
   ModelNameOf,
@@ -47,6 +53,20 @@ export type SegmentationLabels<M extends SegmentationModelName> =
 type ResolveLabels<T extends SegmentationModelName | LabelEnum> =
   T extends SegmentationModelName ? SegmentationLabels<T> : T;
 
+function isPixelData(input: unknown): input is PixelData {
+  return (
+    typeof input === 'object' &&
+    input !== null &&
+    'dataPtr' in input &&
+    (input as any).dataPtr instanceof Uint8Array &&
+    'sizes' in input &&
+    Array.isArray((input as any).sizes) &&
+    (input as any).sizes.length === 3 &&
+    'scalarType' in input &&
+    (input as any).scalarType === ScalarType.BYTE
+  );
+}
+
 /**
  * Generic image segmentation module with type-safe label maps.
  * Use a model name (e.g. `'deeplab-v3'`) as the generic parameter for built-in models,
@@ -75,6 +95,75 @@ export class ImageSegmentationModule<
   // TODO: figure it out so we can delete this (we need this because of basemodule inheritance)
   override async load() {}
 
+  /**
+   * Synchronous worklet function for real-time VisionCamera frame processing.
+   * Automatically handles native buffer extraction and cleanup.
+   *
+   * **Use this for VisionCamera frame processing in worklets.**
+   * For async processing, use `forward()` instead.
+   *
+   * Available after model is loaded.
+   *
+   * @example
+   * ```typescript
+   * const [runOnFrame, setRunOnFrame] = useState(null);
+   * setRunOnFrame(() => segmentation.runOnFrame);
+   *
+   * const frameOutput = useFrameOutput({
+   *   onFrame(frame) {
+   *     'worklet';
+   *     if (!runOnFrame) return;
+   *     const result = runOnFrame(frame, [], true);
+   *     frame.dispose();
+   *   }
+   * });
+   * ```
+   *
+   * @param frame - VisionCamera Frame object
+   * @param classesOfInterest - Labels for which to return per-class probability masks.
+   * @param resizeToInput - Whether to resize masks to original frame dimensions. Defaults to `true`.
+   */
+  get runOnFrame():
+    | ((
+        frame: Frame,
+        classesOfInterest?: string[],
+        resizeToInput?: boolean
+      ) => any)
+    | null {
+    if (!this.nativeModule?.generateFromFrame) {
+      return null;
+    }
+
+    const nativeGenerateFromFrame = this.nativeModule.generateFromFrame;
+    const allClassNames = this.allClassNames;
+
+    return (
+      frame: any,
+      classesOfInterest: string[] = [],
+      resizeToInput: boolean = true
+    ): any => {
+      'worklet';
+
+      let nativeBuffer: any = null;
+      try {
+        nativeBuffer = frame.getNativeBuffer();
+        const frameData = {
+          nativeBuffer: nativeBuffer.pointer,
+        };
+        return nativeGenerateFromFrame(
+          frameData,
+          allClassNames,
+          classesOfInterest,
+          resizeToInput
+        );
+      } finally {
+        if (nativeBuffer?.release) {
+          nativeBuffer.release();
+        }
+      }
+    };
+  }
+
   /**
    * Creates a segmentation instance for a built-in model.
    * The config object is discriminated by `modelName` — each model can require different fields.
@@ -167,14 +256,20 @@ export class ImageSegmentationModule<
   /**
    * Executes the model's forward pass to perform semantic segmentation on the provided image.
    *
-   * @param imageSource - A string representing the image source (e.g., a file path, URI, or Base64-encoded string).
+   * Supports two input types:
+   * 1. **String path/URI**: File path, URL, or Base64-encoded string
+   * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
+   *
+   * **Note**: For VisionCamera frame processing, use `runOnFrame` instead.
+   *
+   * @param input - Image source (string or PixelData object)
    * @param classesOfInterest - An optional list of label keys indicating which per-class probability masks to include in the output. `ARGMAX` is always returned regardless.
    * @param resizeToInput - Whether to resize the output masks to the original input image dimensions. If `false`, returns the raw model output dimensions. Defaults to `true`.
    * @returns A Promise resolving to an object with an `'ARGMAX'` key mapped to an `Int32Array` of per-pixel class indices, and each requested class label mapped to a `Float32Array` of per-pixel probabilities.
    * @throws {RnExecutorchError} If the model is not loaded.
    */
   async forward<K extends keyof ResolveLabels<T>>(
-    imageSource: string,
+    input: string | PixelData,
     classesOfInterest: K[] = [],
     resizeToInput: boolean = true
   ): Promise<Record<'ARGMAX', Int32Array> & Record<K, Float32Array>> {
@@ -189,14 +284,29 @@ export class ImageSegmentationModule<
       String(label)
     );
 
-    const nativeResult = await this.nativeModule.generate(
-      imageSource,
-      this.allClassNames,
-      classesOfInterestNames,
-      resizeToInput
-    );
-
-    return nativeResult as Record<'ARGMAX', Int32Array> &
-      Record<K, Float32Array>;
+    if (typeof input === 'string') {
+      const nativeResult = await this.nativeModule.generateFromString(
+        input,
+        this.allClassNames,
+        classesOfInterestNames,
+        resizeToInput
+      );
+      return nativeResult as Record<'ARGMAX', Int32Array> &
+        Record<K, Float32Array>;
+    } else if (isPixelData(input)) {
+      const nativeResult = await this.nativeModule.generateFromPixels(
+        input,
+        this.allClassNames,
+        classesOfInterestNames,
+        resizeToInput
+      );
+      return nativeResult as Record<'ARGMAX', Int32Array> &
+        Record<K, Float32Array>;
+    } else {
+      throw new RnExecutorchError(
+        RnExecutorchErrorCode.InvalidArgument,
+        'Invalid input: expected string path or PixelData object. For VisionCamera frames, use runOnFrame instead.'
+      );
+    }
   }
 }
diff --git a/packages/react-native-executorch/src/modules/computer_vision/StyleTransferModule.ts b/packages/react-native-executorch/src/modules/computer_vision/StyleTransferModule.ts
index 90e5242de..fc83f7bc7 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/StyleTransferModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/StyleTransferModule.ts
@@ -1,16 +1,16 @@
 import { ResourceFetcher } from '../../utils/ResourceFetcher';
-import { ResourceSource } from '../../types/common';
+import { ResourceSource, PixelData } from '../../types/common';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
 import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
-import { BaseModule } from '../BaseModule';
 import { Logger } from '../../common/Logger';
+import { VisionModule } from './VisionModule';
 
 /**
  * Module for style transfer tasks.
  *
  * @category Typescript API
  */
-export class StyleTransferModule extends BaseModule {
+export class StyleTransferModule extends VisionModule<PixelData> {
   /**
    * Loads the model, where `modelSource` is a string that specifies the location of the model binary.
    * To track the download progress, supply a callback function `onDownloadProgressCallback`.
@@ -42,18 +42,7 @@ export class StyleTransferModule extends BaseModule {
     }
   }
 
-  /**
-   * Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string.
-   *
-   * @param imageSource - The image source to be processed.
-   * @returns The stylized image as a Base64-encoded string.
-   */
-  async forward(imageSource: string): Promise<string> {
-    if (this.nativeModule == null)
-      throw new RnExecutorchError(
-        RnExecutorchErrorCode.ModuleNotLoaded,
-        'The model is currently not loaded. Please load the model before calling forward().'
-      );
-    return await this.nativeModule.generate(imageSource);
+  async forward(input: string | PixelData): Promise<PixelData> {
+    return super.forward(input);
   }
 }
diff --git a/packages/react-native-executorch/src/types/classification.ts b/packages/react-native-executorch/src/types/classification.ts
index 51152ec08..64a20ecf3 100644
--- a/packages/react-native-executorch/src/types/classification.ts
+++ b/packages/react-native-executorch/src/types/classification.ts
@@ -1,5 +1,5 @@
 import { RnExecutorchError } from '../errors/errorUtils';
-import { ResourceSource } from './common';
+import { ResourceSource, PixelData, Frame } from './common';
 
 /**
  * Props for the `useClassification` hook.
@@ -43,9 +43,46 @@ export interface ClassificationType {
 
   /**
    * Executes the model's forward pass to classify the provided image.
-   * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be classified.
-   * @returns A Promise that resolves to the classification result (typically containing labels and confidence scores).
+   *
+   * Supports two input types:
+   * 1. **String path/URI**: File path, URL, or Base64-encoded string
+   * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
+   *
+   * **Note**: For VisionCamera frame processing, use `runOnFrame` instead.
+   *
+   * @param input - Image source (string or PixelData object)
+   * @returns A Promise that resolves to the classification result (labels and confidence scores).
    * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image.
    */
-  forward: (imageSource: string) => Promise<{ [category: string]: number }>;
+  forward: (
+    input: string | PixelData
+  ) => Promise<{ [category: string]: number }>;
+
+  /**
+   * Synchronous worklet function for real-time VisionCamera frame processing.
+   * Automatically handles native buffer extraction and cleanup.
+   *
+   * **Use this for VisionCamera frame processing in worklets.**
+   * For async processing, use `forward()` instead.
+   *
+   * Available after model is loaded (`isReady: true`).
+   *
+   * @example
+   * ```typescript
+   * const { runOnFrame, isReady } = useClassification({ model: MODEL });
+   *
+   * const frameOutput = useFrameOutput({
+   *   onFrame(frame) {
+   *     'worklet';
+   *     if (!runOnFrame) return;
+   *     const result = runOnFrame(frame);
+   *     frame.dispose();
+   *   }
+   * });
+   * ```
+   *
+   * @param frame - VisionCamera Frame object
+   * @returns Object mapping class labels to confidence scores.
+   */
+  runOnFrame: ((frame: Frame) => { [category: string]: number }) | null;
 }
diff --git a/packages/react-native-executorch/src/types/imageEmbeddings.ts b/packages/react-native-executorch/src/types/imageEmbeddings.ts
index 5dc23d66f..ccee4b4b1 100644
--- a/packages/react-native-executorch/src/types/imageEmbeddings.ts
+++ b/packages/react-native-executorch/src/types/imageEmbeddings.ts
@@ -1,5 +1,5 @@
 import { RnExecutorchError } from '../errors/errorUtils';
-import { ResourceSource } from './common';
+import { ResourceSource, PixelData, Frame } from './common';
 
 /**
  * Props for the `useImageEmbeddings` hook.
@@ -43,9 +43,30 @@ export interface ImageEmbeddingsType {
 
   /**
    * Executes the model's forward pass to generate embeddings (a feature vector) for the provided image.
-   * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed.
+   *
+   * Supports two input types:
+   * 1. **String path/URI**: File path, URL, or Base64-encoded string
+   * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
+   *
+   * **Note**: For VisionCamera frame processing, use `runOnFrame` instead.
+   *
+   * @param input - Image source (string or PixelData object)
    * @returns A Promise that resolves to a `Float32Array` containing the generated embedding vector.
    * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image.
    */
-  forward: (imageSource: string) => Promise<Float32Array>;
+  forward: (input: string | PixelData) => Promise<Float32Array>;
+
+  /**
+   * Synchronous worklet function for real-time VisionCamera frame processing.
+   * Automatically handles native buffer extraction and cleanup.
+   *
+   * **Use this for VisionCamera frame processing in worklets.**
+   * For async processing, use `forward()` instead.
+   *
+   * Available after model is loaded (`isReady: true`).
+   *
+   * @param frame - VisionCamera Frame object
+   * @returns Float32Array containing the embedding vector for the frame.
+   */
+  runOnFrame: ((frame: Frame) => Float32Array) | null;
 }
diff --git a/packages/react-native-executorch/src/types/imageSegmentation.ts b/packages/react-native-executorch/src/types/imageSegmentation.ts
index 6d79a801d..7e760487c 100644
--- a/packages/react-native-executorch/src/types/imageSegmentation.ts
+++ b/packages/react-native-executorch/src/types/imageSegmentation.ts
@@ -1,5 +1,5 @@
 import { RnExecutorchError } from '../errors/errorUtils';
-import { LabelEnum, Triple, ResourceSource } from './common';
+import { LabelEnum, Triple, ResourceSource, PixelData, Frame } from './common';
 
 /**
  * Configuration for a custom segmentation model.
@@ -127,15 +127,44 @@ export interface ImageSegmentationType<L extends LabelEnum> {
 
   /**
    * Executes the model's forward pass to perform semantic segmentation on the provided image.
-   * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed.
+   *
+   * Supports two input types:
+   * 1. **String path/URI**: File path, URL, or Base64-encoded string
+   * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
+   *
+   * **Note**: For VisionCamera frame processing, use `runOnFrame` instead.
+   *
+   * @param input - Image source (string or PixelData object)
    * @param classesOfInterest - An optional array of label keys indicating which per-class probability masks to include in the output. `ARGMAX` is always returned regardless.
    * @param resizeToInput - Whether to resize the output masks to the original input image dimensions. If `false`, returns the raw model output dimensions. Defaults to `true`.
    * @returns A Promise resolving to an object with an `'ARGMAX'` `Int32Array` of per-pixel class indices, and each requested class label mapped to a `Float32Array` of per-pixel probabilities.
    * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image.
    */
   forward: <K extends keyof L>(
-    imageSource: string,
+    input: string | PixelData,
     classesOfInterest?: K[],
     resizeToInput?: boolean
   ) => Promise<Record<'ARGMAX', Int32Array> & Record<K, Float32Array>>;
+
+  /**
+   * Synchronous worklet function for real-time VisionCamera frame processing.
+   * Automatically handles native buffer extraction and cleanup.
+   *
+   * **Use this for VisionCamera frame processing in worklets.**
+   * For async processing, use `forward()` instead.
+   *
+   * Available after model is loaded (`isReady: true`).
+   *
+   * @param frame - VisionCamera Frame object
+   * @param classesOfInterest - Labels for which to return per-class probability masks.
+   * @param resizeToInput - Whether to resize masks to original frame dimensions. Defaults to `true`.
+   * @returns Object with `ARGMAX` Int32Array and per-class Float32Array masks.
+   */
+  runOnFrame:
+    | ((
+        frame: Frame,
+        classesOfInterest?: string[],
+        resizeToInput?: boolean
+      ) => Record<'ARGMAX', Int32Array> & Record<string, Float32Array>)
+    | null;
 }
diff --git a/packages/react-native-executorch/src/types/ocr.ts b/packages/react-native-executorch/src/types/ocr.ts
index 6ca2f4324..1b0640172 100644
--- a/packages/react-native-executorch/src/types/ocr.ts
+++ b/packages/react-native-executorch/src/types/ocr.ts
@@ -1,6 +1,6 @@
 import { symbols } from '../constants/ocr/symbols';
 import { RnExecutorchError } from '../errors/errorUtils';
-import { ResourceSource } from './common';
+import { Frame, PixelData, ResourceSource } from './common';
 
 /**
  * OCRDetection represents a single detected text instance in an image,
@@ -104,11 +104,35 @@ export interface OCRType {
 
   /**
    * Executes the OCR pipeline (detection and recognition) on the provided image.
-   * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed.
-   * @returns A Promise that resolves to the OCR results (typically containing the recognized text strings and their bounding boxes).
+   *
+   * Supports two input types:
+   * 1. **String path/URI**: File path, URL, or Base64-encoded string
+   * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
+   *
+   * **Note**: For VisionCamera frame processing, use `runOnFrame` instead.
+   *
+   * @param input - Image source (string or PixelData object)
+   * @returns A Promise that resolves to the OCR results (recognized text and bounding boxes).
    * @throws {RnExecutorchError} If the models are not loaded or are currently processing another image.
    */
-  forward: (imageSource: string) => Promise<OCRDetection[]>;
+  forward: (input: string | PixelData) => Promise<OCRDetection[]>;
+
+  /**
+   * Synchronous worklet function for VisionCamera frame processing.
+   * Automatically handles native buffer extraction and cleanup.
+   *
+   * **Use this for VisionCamera frame processing in worklets.**
+   * For async processing, use `forward()` instead.
+   *
+   * **Note**: OCR is a two-stage pipeline (detection + recognition) and may not
+   * achieve real-time frame rates. Frames may be dropped if inference is still running.
+   *
+   * Available after model is loaded (`isReady: true`).
+   *
+   * @param frame - VisionCamera Frame object
+   * @returns Array of OCRDetection results for the frame.
+   */
+  runOnFrame: ((frame: Frame) => OCRDetection[]) | null;
 }
 
 /**
diff --git a/packages/react-native-executorch/src/types/styleTransfer.ts b/packages/react-native-executorch/src/types/styleTransfer.ts
index 162086722..3cf3d17fa 100644
--- a/packages/react-native-executorch/src/types/styleTransfer.ts
+++ b/packages/react-native-executorch/src/types/styleTransfer.ts
@@ -1,5 +1,5 @@
 import { RnExecutorchError } from '../errors/errorUtils';
-import { ResourceSource } from './common';
+import { ResourceSource, PixelData, Frame } from './common';
 
 /**
  * Configuration properties for the `useStyleTransfer` hook.
@@ -43,9 +43,30 @@ export interface StyleTransferType {
 
   /**
    * Executes the model's forward pass to apply the specific artistic style to the provided image.
-   * @param imageSource - A string representing the input image source (e.g., a file path, URI, or base64 string) to be stylized.
-   * @returns A Promise that resolves to a string containing the stylized image (typically as a base64 string or a file URI).
+   *
+   * Supports two input types:
+   * 1. **String path/URI**: File path, URL, or Base64-encoded string
+   * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
+   *
+   * **Note**: For VisionCamera frame processing, use `runOnFrame` instead.
+   *
+   * @param input - Image source (string or PixelData object)
+   * @returns A Promise that resolves to `PixelData` containing the stylized image as raw RGB pixel data.
    * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image.
    */
-  forward: (imageSource: string) => Promise<string>;
+  forward: (input: string | PixelData) => Promise<PixelData>;
+
+  /**
+   * Synchronous worklet function for real-time VisionCamera frame processing.
+   * Automatically handles native buffer extraction and cleanup.
+   *
+   * **Use this for VisionCamera frame processing in worklets.**
+   * For async processing, use `forward()` instead.
+   *
+   * Available after model is loaded (`isReady: true`).
+   *
+   * @param frame - VisionCamera Frame object
+   * @returns PixelData containing the stylized frame as raw RGB pixel data.
+   */
+  runOnFrame: ((frame: Frame) => PixelData) | null;
 }

From 622e8d589b212fafdc7fbe0c51a0be3d8464deab Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Wed, 25 Feb 2026 16:54:40 +0100
Subject: [PATCH 31/37] fix: rebase things

---
 .../app/object_detection/index.tsx            | 172 +-----------------
 .../metaprogramming/TypeConcepts.h            |  10 -
 .../computer_vision/useImageSegmentation.ts   |   4 +-
 .../computer_vision/ObjectDetectionModule.ts  | 165 ++++-------------
 4 files changed, 47 insertions(+), 304 deletions(-)

diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
index 54c0eb18f..6a43dd920 100644
--- a/apps/computer-vision/app/object_detection/index.tsx
+++ b/apps/computer-vision/app/object_detection/index.tsx
@@ -1,72 +1,16 @@
 import Spinner from '../../components/Spinner';
+import { BottomBar } from '../../components/BottomBar';
 import { getImage } from '../../utils';
 import {
   Detection,
   useObjectDetection,
   SSDLITE_320_MOBILENET_V3_LARGE,
 } from 'react-native-executorch';
-import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native';
+import { View, StyleSheet, Image } from 'react-native';
 import ImageWithBboxes from '../../components/ImageWithBboxes';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
-import ColorPalette from '../../colors';
-import { Images } from 'react-native-nitro-image';
-
-// Helper function to convert BGRA to RGB
-function convertBGRAtoRGB(
-  buffer: ArrayBuffer,
-  width: number,
-  height: number
-): ArrayBuffer {
-  const source = new Uint8Array(buffer);
-  const rgb = new Uint8Array(width * height * 3);
-
-  for (let i = 0; i < width * height; i++) {
-    // BGRA format: [B, G, R, A] → RGB: [R, G, B]
-    rgb[i * 3 + 0] = source[i * 4 + 2]; // R
-    rgb[i * 3 + 1] = source[i * 4 + 1]; // G
-    rgb[i * 3 + 2] = source[i * 4 + 0]; // B
-  }
-
-  return rgb.buffer;
-}
-
-// Helper function to convert image URI to raw RGB pixel data
-async function imageUriToPixelData(
-  uri: string,
-  targetWidth: number,
-  targetHeight: number
-): Promise<{
-  data: ArrayBuffer;
-  width: number;
-  height: number;
-  channels: number;
-}> {
-  try {
-    // Load image and resize to target dimensions
-    const image = await Images.loadFromFileAsync(uri);
-    const resized = image.resize(targetWidth, targetHeight);
-
-    // Get pixel data as ArrayBuffer (BGRA format from NitroImage)
-    const rawPixelData = resized.toRawPixelData();
-    const buffer =
-      rawPixelData instanceof ArrayBuffer ? rawPixelData : rawPixelData.buffer;
-
-    // Convert BGRA to RGB as required by the native API
-    const rgbBuffer = convertBGRAtoRGB(buffer, targetWidth, targetHeight);
-
-    return {
-      data: rgbBuffer,
-      width: targetWidth,
-      height: targetHeight,
-      channels: 3, // RGB
-    };
-  } catch (error) {
-    console.error('Error loading image with NitroImage:', error);
-    throw error;
-  }
-}
 
 export default function ObjectDetectionScreen() {
   const [imageUri, setImageUri] = useState('');
@@ -98,40 +42,10 @@ export default function ObjectDetectionScreen() {
   const runForward = async () => {
     if (imageUri) {
       try {
-        console.log('Running forward with string URI...');
-        const output = await ssdLite.forward(imageUri, 0.5);
-        console.log('String URI result:', output.length, 'detections');
-        setResults(output);
-      } catch (e) {
-        console.error('Error in runForward:', e);
-      }
-    }
-  };
-
-  const runForwardPixels = async () => {
-    if (imageUri && imageDimensions) {
-      try {
-        console.log('Converting image to pixel data...');
-        // Use original dimensions - let the model resize internally
-        const pixelData = await imageUriToPixelData(
-          imageUri,
-          imageDimensions.width,
-          imageDimensions.height
-        );
-
-        console.log('Running forward with pixel data...', {
-          width: pixelData.width,
-          height: pixelData.height,
-          channels: pixelData.channels,
-          dataSize: pixelData.data.byteLength,
-        });
-
-        // Run inference using unified forward() API
-        const output = await ssdLite.forward(pixelData, 0.3);
-        console.log('Pixel data result:', output.length, 'detections');
+        const output = await ssdLite.forward(imageUri);
         setResults(output);
       } catch (e) {
-        console.error('Error in runForwardPixels:', e);
+        console.error(e);
       }
     }
   };
@@ -167,41 +81,10 @@ export default function ObjectDetectionScreen() {
           )}
         </View>
       </View>
-
-      {/* Custom bottom bar with two buttons */}
-      <View style={styles.bottomContainer}>
-        <View style={styles.bottomIconsContainer}>
-          <TouchableOpacity onPress={() => handleCameraPress(false)}>
-            <Text style={styles.iconText}>📷 Gallery</Text>
-          </TouchableOpacity>
-        </View>
-
-        <View style={styles.buttonsRow}>
-          <TouchableOpacity
-            style={[
-              styles.button,
-              styles.halfButton,
-              !imageUri && styles.buttonDisabled,
-            ]}
-            onPress={runForward}
-            disabled={!imageUri}
-          >
-            <Text style={styles.buttonText}>Run (String)</Text>
-          </TouchableOpacity>
-
-          <TouchableOpacity
-            style={[
-              styles.button,
-              styles.halfButton,
-              !imageUri && styles.buttonDisabled,
-            ]}
-            onPress={runForwardPixels}
-            disabled={!imageUri}
-          >
-            <Text style={styles.buttonText}>Run (Pixels)</Text>
-          </TouchableOpacity>
-        </View>
-      </View>
+      <BottomBar
+        handleCameraPress={handleCameraPress}
+        runForward={runForward}
+      />
     </ScreenWrapper>
   );
 }
@@ -246,43 +129,4 @@ const styles = StyleSheet.create({
     width: '100%',
     height: '100%',
   },
-  bottomContainer: {
-    width: '100%',
-    gap: 15,
-    alignItems: 'center',
-    padding: 16,
-    flex: 1,
-  },
-  bottomIconsContainer: {
-    flexDirection: 'row',
-    justifyContent: 'center',
-    width: '100%',
-  },
-  iconText: {
-    fontSize: 16,
-    color: ColorPalette.primary,
-  },
-  buttonsRow: {
-    flexDirection: 'row',
-    width: '100%',
-    gap: 10,
-  },
-  button: {
-    height: 50,
-    justifyContent: 'center',
-    alignItems: 'center',
-    backgroundColor: ColorPalette.primary,
-    color: '#fff',
-    borderRadius: 8,
-  },
-  halfButton: {
-    flex: 1,
-  },
-  buttonDisabled: {
-    opacity: 0.5,
-  },
-  buttonText: {
-    color: '#fff',
-    fontSize: 16,
-  },
 });
diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
index 216e2bae3..2d7612f25 100644
--- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
+++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h
@@ -26,16 +26,6 @@ concept HasGenerateFromPixels = requires(T t) {
   { &T::generateFromPixels };
 };
 
-template <typename T>
-concept HasGenerateFromString = requires(T t) {
-  { &T::generateFromString };
-};
-
-template <typename T>
-concept HasGenerateFromPixels = requires(T t) {
-  { &T::generateFromPixels };
-};
-
 template <typename T>
 concept HasGenerateFromFrame = requires(T t) {
   { &T::generateFromFrame };
diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts b/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts
index 55b8d8500..26a804227 100644
--- a/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts
+++ b/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts
@@ -9,7 +9,7 @@ import {
   ModelNameOf,
   ModelSources,
 } from '../../types/imageSegmentation';
-import { Frame } from '../../types/common';
+import { Frame, PixelData } from '../../types/common';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
 import { RnExecutorchError, parseUnknownError } from '../../errors/errorUtils';
 
@@ -92,7 +92,7 @@ export const useImageSegmentation = <C extends ModelSources>({
   }, [model.modelName, model.modelSource, preventLoad]);
 
   const forward = async <K extends keyof SegmentationLabels<ModelNameOf<C>>>(
-    imageSource: string,
+    imageSource: string | PixelData,
     classesOfInterest: K[] = [],
     resizeToInput: boolean = true
   ) => {
diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
index 762d09987..f056cff62 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
@@ -1,143 +1,52 @@
-import { BaseModule } from '../BaseModule';
+import { ResourceFetcher } from '../../utils/ResourceFetcher';
+import { ResourceSource, PixelData } from '../../types/common';
+import { Detection } from '../../types/objectDetection';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
-import { RnExecutorchError } from '../../errors/errorUtils';
-import { Frame, PixelData, ScalarType } from '../../types/common';
+import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
+import { Logger } from '../../common/Logger';
+import { VisionModule } from './VisionModule';
 
 /**
- * Base class for computer vision models that support multiple input types.
- *
- * VisionModule extends BaseModule with:
- * - Unified `forward()` API accepting string paths or raw pixel data
- * - `runOnFrame` getter for real-time VisionCamera frame processing
- * - Shared frame processor creation logic
- *
- * Subclasses should only implement model-specific loading logic.
+ * Module for object detection tasks.
  *
  * @category Typescript API
  */
-function isPixelData(input: unknown): input is PixelData {
-  return (
-    typeof input === 'object' &&
-    input !== null &&
-    'dataPtr' in input &&
-    input.dataPtr instanceof Uint8Array &&
-    'sizes' in input &&
-    Array.isArray(input.sizes) &&
-    input.sizes.length === 3 &&
-    'scalarType' in input &&
-    input.scalarType === ScalarType.BYTE
-  );
-}
-
-export abstract class VisionModule<TOutput> extends BaseModule {
+export class ObjectDetectionModule extends VisionModule<Detection[]> {
   /**
-   * Synchronous worklet function for real-time VisionCamera frame processing.
-   *
-   * Only available after the model is loaded. Returns null if not loaded.
-   *
-   * **Use this for VisionCamera frame processing in worklets.**
-   * For async processing, use `forward()` instead.
-   *
-   * @example
-   * ```typescript
-   * const model = new ClassificationModule();
-   * await model.load({ modelSource: MODEL });
+   * Loads the model, where `modelSource` is a string that specifies the location of the model binary.
+   * To track the download progress, supply a callback function `onDownloadProgressCallback`.
    *
-   * // Use the functional form of setState to store the worklet — passing it
-   * // directly would cause React to invoke it immediately as an updater fn.
-   * const [runOnFrame, setRunOnFrame] = useState(null);
-   * setRunOnFrame(() => model.runOnFrame);
-   *
-   * const frameOutput = useFrameOutput({
-   *   onFrame(frame) {
-   *     'worklet';
-   *     if (!runOnFrame) return;
-   *     const result = runOnFrame(frame);
-   *     frame.dispose();
-   *   }
-   * });
-   * ```
+   * @param model - Object containing `modelSource`.
+   * @param onDownloadProgressCallback - Optional callback to monitor download progress.
    */
-  get runOnFrame(): ((frame: Frame, ...args: any[]) => TOutput) | null {
-    if (!this.nativeModule?.generateFromFrame) {
-      return null;
-    }
-
-    // Extract pure JSI function reference (runs on JS thread)
-    const nativeGenerateFromFrame = this.nativeModule.generateFromFrame;
-
-    // Return worklet that captures ONLY the JSI function
-    return (frame: any, ...args: any[]): TOutput => {
-      'worklet';
+  async load(
+    model: { modelSource: ResourceSource },
+    onDownloadProgressCallback: (progress: number) => void = () => {}
+  ): Promise<void> {
+    try {
+      const paths = await ResourceFetcher.fetch(
+        onDownloadProgressCallback,
+        model.modelSource
+      );
 
-      let nativeBuffer: any = null;
-      try {
-        nativeBuffer = frame.getNativeBuffer();
-        const frameData = {
-          nativeBuffer: nativeBuffer.pointer,
-        };
-        return nativeGenerateFromFrame(frameData, ...args);
-      } finally {
-        if (nativeBuffer?.release) {
-          nativeBuffer.release();
-        }
+      if (!paths?.[0]) {
+        throw new RnExecutorchError(
+          RnExecutorchErrorCode.DownloadInterrupted,
+          'The download has been interrupted. As a result, not every file was downloaded. Please retry the download.'
+        );
       }
-    };
-  }
 
-  /**
-   * Executes the model's forward pass with automatic input type detection.
-   *
-   * Supports two input types:
-   * 1. **String path/URI**: File path, URL, or Base64-encoded string
-   * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
-   *
-   * **Note**: For VisionCamera frame processing, use `runOnFrame` instead.
-   * This method is async and cannot be called in worklet context.
-   *
-   * @param input - Image source (string path or PixelData object)
-   * @param args - Additional model-specific arguments
-   * @returns A Promise that resolves to the model output.
-   *
-   * @example
-   * ```typescript
-   * // String path (async)
-   * const result1 = await model.forward('file:///path/to/image.jpg');
-   *
-   * // Pixel data (async)
-   * const result2 = await model.forward({
-   *   dataPtr: new Uint8Array(pixelBuffer),
-   *   sizes: [480, 640, 3],
-   *   scalarType: ScalarType.BYTE
-   * });
-   *
-   * // For VisionCamera frames, use runOnFrame in worklet:
-   * const frameOutput = useFrameOutput({
-   *   onFrame(frame) {
-   *     'worklet';
-   *     if (!model.runOnFrame) return;
-   *     const result = model.runOnFrame(frame);
-   *   }
-   * });
-   * ```
-   */
-  async forward(input: string | PixelData, ...args: any[]): Promise<TOutput> {
-    if (this.nativeModule == null)
-      throw new RnExecutorchError(
-        RnExecutorchErrorCode.ModuleNotLoaded,
-        'The model is currently not loaded. Please load the model before calling forward().'
-      );
-
-    // Type detection and routing
-    if (typeof input === 'string') {
-      return await this.nativeModule.generateFromString(input, ...args);
-    } else if (isPixelData(input)) {
-      return await this.nativeModule.generateFromPixels(input, ...args);
-    } else {
-      throw new RnExecutorchError(
-        RnExecutorchErrorCode.InvalidArgument,
-        'Invalid input: expected string path or PixelData object. For VisionCamera frames, use runOnFrame instead.'
-      );
+      this.nativeModule = global.loadObjectDetection(paths[0]);
+    } catch (error) {
+      Logger.error('Load failed:', error);
+      throw parseUnknownError(error);
     }
   }
+
+  async forward(
+    input: string | PixelData,
+    detectionThreshold: number = 0.5
+  ): Promise<Detection[]> {
+    return super.forward(input, detectionThreshold);
+  }
 }

From dc5e65eb60ae41a407211b86d9dc2fcb96b55e2e Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Wed, 25 Feb 2026 19:22:16 +0100
Subject: [PATCH 32/37] chore: remove comment

---
 apps/computer-vision/app/vision_camera_live/index.tsx | 2 --
 1 file changed, 2 deletions(-)

diff --git a/apps/computer-vision/app/vision_camera_live/index.tsx b/apps/computer-vision/app/vision_camera_live/index.tsx
index 4c7b425b1..8c5d71d33 100644
--- a/apps/computer-vision/app/vision_camera_live/index.tsx
+++ b/apps/computer-vision/app/vision_camera_live/index.tsx
@@ -71,8 +71,6 @@ const MODELS: { id: ModelId; label: string }[] = [
   { id: 'ocr', label: 'OCR' },
 ];
 
-// ─── Segmentation colors ─────────────────────────────────────────────────────
-
 const CLASS_COLORS: number[][] = [
   [0, 0, 0, 0],
   [51, 255, 87, 180],

From 2c8dd67feaee982ca7aa3477e9cf091c62dbcb4f Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Thu, 26 Feb 2026 11:41:13 +0100
Subject: [PATCH 33/37] feat: add dedicated vision camera screen showcasing
 classification/segmentation/object detection

---
 apps/computer-vision/app/_layout.tsx          |  57 +-
 .../app/classification_live/index.tsx         | 255 ------
 .../app/image_segmentation_live/index.tsx     | 292 -------
 apps/computer-vision/app/index.tsx            |  12 +-
 .../app/object_detection_live/index.tsx       | 300 -------
 apps/computer-vision/app/ocr_live/index.tsx   | 329 --------
 .../app/style_transfer_live/index.tsx         | 274 ------
 .../app/vision_camera/index.tsx               | 665 +++++++++++++++
 .../app/vision_camera_live/index.tsx          | 796 ------------------
 9 files changed, 680 insertions(+), 2300 deletions(-)
 delete mode 100644 apps/computer-vision/app/classification_live/index.tsx
 delete mode 100644 apps/computer-vision/app/image_segmentation_live/index.tsx
 delete mode 100644 apps/computer-vision/app/object_detection_live/index.tsx
 delete mode 100644 apps/computer-vision/app/ocr_live/index.tsx
 delete mode 100644 apps/computer-vision/app/style_transfer_live/index.tsx
 create mode 100644 apps/computer-vision/app/vision_camera/index.tsx
 delete mode 100644 apps/computer-vision/app/vision_camera_live/index.tsx

diff --git a/apps/computer-vision/app/_layout.tsx b/apps/computer-vision/app/_layout.tsx
index b614b54bf..eafbc70e6 100644
--- a/apps/computer-vision/app/_layout.tsx
+++ b/apps/computer-vision/app/_layout.tsx
@@ -59,6 +59,15 @@ export default function _layout() {
           headerTitleStyle: { color: ColorPalette.primary },
         }}
       >
+        <Drawer.Screen
+          name="vision_camera/index"
+          options={{
+            drawerLabel: 'Vision Camera',
+            title: 'Vision Camera',
+            headerShown: false,
+            headerTitleStyle: { color: ColorPalette.primary },
+          }}
+        />
         <Drawer.Screen
           name="classification/index"
           options={{
@@ -83,54 +92,6 @@ export default function _layout() {
             headerTitleStyle: { color: ColorPalette.primary },
           }}
         />
-        <Drawer.Screen
-          name="object_detection_live/index"
-          options={{
-            drawerLabel: 'Object Detection (Live)',
-            title: 'Object Detection (Live)',
-            headerTitleStyle: { color: ColorPalette.primary },
-          }}
-        />
-        <Drawer.Screen
-          name="classification_live/index"
-          options={{
-            drawerLabel: 'Classification (Live)',
-            title: 'Classification (Live)',
-            headerTitleStyle: { color: ColorPalette.primary },
-          }}
-        />
-        <Drawer.Screen
-          name="image_segmentation_live/index"
-          options={{
-            drawerLabel: 'Image Segmentation (Live)',
-            title: 'Image Segmentation (Live)',
-            headerTitleStyle: { color: ColorPalette.primary },
-          }}
-        />
-        <Drawer.Screen
-          name="style_transfer_live/index"
-          options={{
-            drawerLabel: 'Style Transfer (Live)',
-            title: 'Style Transfer (Live)',
-            headerTitleStyle: { color: ColorPalette.primary },
-          }}
-        />
-        <Drawer.Screen
-          name="ocr_live/index"
-          options={{
-            drawerLabel: 'OCR (Live)',
-            title: 'OCR (Live)',
-            headerTitleStyle: { color: ColorPalette.primary },
-          }}
-        />
-        <Drawer.Screen
-          name="vision_camera_live/index"
-          options={{
-            drawerLabel: 'Vision Camera (Live)',
-            title: 'Vision Camera',
-            headerTitleStyle: { color: ColorPalette.primary },
-          }}
-        />
         <Drawer.Screen
           name="ocr/index"
           options={{
diff --git a/apps/computer-vision/app/classification_live/index.tsx b/apps/computer-vision/app/classification_live/index.tsx
deleted file mode 100644
index c3b4c5dea..000000000
--- a/apps/computer-vision/app/classification_live/index.tsx
+++ /dev/null
@@ -1,255 +0,0 @@
-import React, {
-  useCallback,
-  useContext,
-  useEffect,
-  useMemo,
-  useRef,
-  useState,
-} from 'react';
-import {
-  StatusBar,
-  StyleSheet,
-  Text,
-  TouchableOpacity,
-  View,
-} from 'react-native';
-import { useSafeAreaInsets } from 'react-native-safe-area-context';
-
-import {
-  Camera,
-  getCameraFormat,
-  Templates,
-  useCameraDevices,
-  useCameraPermission,
-  useFrameOutput,
-} from 'react-native-vision-camera';
-import { scheduleOnRN } from 'react-native-worklets';
-import { EFFICIENTNET_V2_S, useClassification } from 'react-native-executorch';
-import { GeneratingContext } from '../../context';
-import Spinner from '../../components/Spinner';
-import ColorPalette from '../../colors';
-
-export default function ClassificationLiveScreen() {
-  const insets = useSafeAreaInsets();
-
-  const { isReady, isGenerating, downloadProgress, runOnFrame } =
-    useClassification({ model: EFFICIENTNET_V2_S });
-  const { setGlobalGenerating } = useContext(GeneratingContext);
-
-  useEffect(() => {
-    setGlobalGenerating(isGenerating);
-  }, [isGenerating, setGlobalGenerating]);
-
-  const [topLabel, setTopLabel] = useState('');
-  const [topScore, setTopScore] = useState(0);
-  const [fps, setFps] = useState(0);
-  const lastFrameTimeRef = useRef(Date.now());
-
-  const cameraPermission = useCameraPermission();
-  const devices = useCameraDevices();
-  const device = devices.find((d) => d.position === 'back') ?? devices[0];
-
-  const format = useMemo(() => {
-    if (device == null) return undefined;
-    try {
-      return getCameraFormat(device, Templates.FrameProcessing);
-    } catch {
-      return undefined;
-    }
-  }, [device]);
-
-  const updateStats = useCallback(
-    (result: { label: string; score: number }) => {
-      setTopLabel(result.label);
-      setTopScore(result.score);
-      const now = Date.now();
-      const timeDiff = now - lastFrameTimeRef.current;
-      if (timeDiff > 0) {
-        setFps(Math.round(1000 / timeDiff));
-      }
-      lastFrameTimeRef.current = now;
-    },
-    []
-  );
-
-  const frameOutput = useFrameOutput({
-    pixelFormat: 'rgb',
-    onFrame(frame) {
-      'worklet';
-      if (!runOnFrame) {
-        frame.dispose();
-        return;
-      }
-      try {
-        const result = runOnFrame(frame);
-        if (result) {
-          // find the top-1 entry
-          let bestLabel = '';
-          let bestScore = -1;
-          const entries = Object.entries(result);
-          for (let i = 0; i < entries.length; i++) {
-            const [label, score] = entries[i];
-            if ((score as number) > bestScore) {
-              bestScore = score as number;
-              bestLabel = label;
-            }
-          }
-          scheduleOnRN(updateStats, { label: bestLabel, score: bestScore });
-        }
-      } catch {
-        // ignore frame errors
-      } finally {
-        frame.dispose();
-      }
-    },
-  });
-
-  if (!isReady) {
-    return (
-      <Spinner
-        visible={!isReady}
-        textContent={`Loading the model ${(downloadProgress * 100).toFixed(0)} %`}
-      />
-    );
-  }
-
-  if (!cameraPermission.hasPermission) {
-    return (
-      <View style={styles.centered}>
-        <Text style={styles.message}>Camera access needed</Text>
-        <TouchableOpacity
-          onPress={() => cameraPermission.requestPermission()}
-          style={styles.button}
-        >
-          <Text style={styles.buttonText}>Grant Permission</Text>
-        </TouchableOpacity>
-      </View>
-    );
-  }
-
-  if (device == null) {
-    return (
-      <View style={styles.centered}>
-        <Text style={styles.message}>No camera device found</Text>
-      </View>
-    );
-  }
-
-  return (
-    <View style={styles.container}>
-      <StatusBar barStyle="light-content" translucent />
-
-      <Camera
-        style={StyleSheet.absoluteFill}
-        device={device}
-        outputs={[frameOutput]}
-        isActive={true}
-        format={format}
-      />
-
-      <View
-        style={[styles.bottomBarWrapper, { paddingBottom: insets.bottom + 12 }]}
-        pointerEvents="none"
-      >
-        <View style={styles.bottomBar}>
-          <View style={styles.labelContainer}>
-            <Text style={styles.labelText} numberOfLines={1}>
-              {topLabel || '—'}
-            </Text>
-            <Text style={styles.scoreText}>
-              {topLabel ? (topScore * 100).toFixed(1) + '%' : ''}
-            </Text>
-          </View>
-          <View style={styles.statDivider} />
-          <View style={styles.statItem}>
-            <Text style={styles.statValue}>{fps}</Text>
-            <Text style={styles.statLabel}>fps</Text>
-          </View>
-        </View>
-      </View>
-    </View>
-  );
-}
-
-const styles = StyleSheet.create({
-  container: {
-    flex: 1,
-    backgroundColor: 'black',
-  },
-  centered: {
-    flex: 1,
-    backgroundColor: 'black',
-    justifyContent: 'center',
-    alignItems: 'center',
-    gap: 16,
-  },
-  message: {
-    color: 'white',
-    fontSize: 18,
-  },
-  button: {
-    paddingHorizontal: 24,
-    paddingVertical: 12,
-    backgroundColor: ColorPalette.primary,
-    borderRadius: 24,
-  },
-  buttonText: {
-    color: 'white',
-    fontSize: 15,
-    fontWeight: '600',
-    letterSpacing: 0.3,
-  },
-  bottomBarWrapper: {
-    position: 'absolute',
-    bottom: 0,
-    left: 0,
-    right: 0,
-    alignItems: 'center',
-    paddingHorizontal: 16,
-  },
-  bottomBar: {
-    flexDirection: 'row',
-    alignItems: 'center',
-    backgroundColor: 'rgba(0, 0, 0, 0.55)',
-    borderRadius: 24,
-    paddingHorizontal: 28,
-    paddingVertical: 10,
-    gap: 24,
-    maxWidth: '100%',
-  },
-  labelContainer: {
-    flex: 1,
-    alignItems: 'flex-start',
-  },
-  labelText: {
-    color: 'white',
-    fontSize: 16,
-    fontWeight: '700',
-  },
-  scoreText: {
-    color: 'rgba(255,255,255,0.7)',
-    fontSize: 13,
-    fontWeight: '500',
-  },
-  statItem: {
-    alignItems: 'center',
-  },
-  statValue: {
-    color: 'white',
-    fontSize: 22,
-    fontWeight: '700',
-    letterSpacing: -0.5,
-  },
-  statLabel: {
-    color: 'rgba(255,255,255,0.55)',
-    fontSize: 11,
-    fontWeight: '500',
-    textTransform: 'uppercase',
-    letterSpacing: 0.8,
-  },
-  statDivider: {
-    width: 1,
-    height: 32,
-    backgroundColor: 'rgba(255,255,255,0.2)',
-  },
-});
diff --git a/apps/computer-vision/app/image_segmentation_live/index.tsx b/apps/computer-vision/app/image_segmentation_live/index.tsx
deleted file mode 100644
index f665c63c5..000000000
--- a/apps/computer-vision/app/image_segmentation_live/index.tsx
+++ /dev/null
@@ -1,292 +0,0 @@
-import React, {
-  useCallback,
-  useContext,
-  useEffect,
-  useMemo,
-  useRef,
-  useState,
-} from 'react';
-import {
-  StatusBar,
-  StyleSheet,
-  Text,
-  TouchableOpacity,
-  useWindowDimensions,
-  View,
-} from 'react-native';
-import { useSafeAreaInsets } from 'react-native-safe-area-context';
-
-import {
-  Camera,
-  getCameraFormat,
-  Templates,
-  useCameraDevices,
-  useCameraPermission,
-  useFrameOutput,
-} from 'react-native-vision-camera';
-import { scheduleOnRN } from 'react-native-worklets';
-import {
-  DEEPLAB_V3_RESNET50,
-  useImageSegmentation,
-} from 'react-native-executorch';
-import {
-  Canvas,
-  Image as SkiaImage,
-  Skia,
-  AlphaType,
-  ColorType,
-  SkImage,
-} from '@shopify/react-native-skia';
-import { GeneratingContext } from '../../context';
-import Spinner from '../../components/Spinner';
-import ColorPalette from '../../colors';
-
-// RGBA colors for each DeepLab V3 class (alpha = 180 for semi-transparency)
-const CLASS_COLORS: number[][] = [
-  [0, 0, 0, 0], // 0 background — transparent
-  [51, 255, 87, 180], // 1 aeroplane
-  [51, 87, 255, 180], // 2 bicycle
-  [255, 51, 246, 180], // 3 bird
-  [51, 255, 246, 180], // 4 boat
-  [243, 255, 51, 180], // 5 bottle
-  [141, 51, 255, 180], // 6 bus
-  [255, 131, 51, 180], // 7 car
-  [51, 255, 131, 180], // 8 cat
-  [131, 51, 255, 180], // 9 chair
-  [255, 255, 51, 180], // 10 cow
-  [51, 255, 255, 180], // 11 diningtable
-  [255, 51, 143, 180], // 12 dog
-  [127, 51, 255, 180], // 13 horse
-  [51, 255, 175, 180], // 14 motorbike
-  [255, 175, 51, 180], // 15 person
-  [179, 255, 51, 180], // 16 pottedplant
-  [255, 87, 51, 180], // 17 sheep
-  [255, 51, 162, 180], // 18 sofa
-  [51, 162, 255, 180], // 19 train
-  [162, 51, 255, 180], // 20 tvmonitor
-];
-
-export default function ImageSegmentationLiveScreen() {
-  const insets = useSafeAreaInsets();
-  const { width: screenWidth, height: screenHeight } = useWindowDimensions();
-
-  const { isReady, isGenerating, downloadProgress, runOnFrame } =
-    useImageSegmentation({ model: DEEPLAB_V3_RESNET50 });
-  const { setGlobalGenerating } = useContext(GeneratingContext);
-
-  useEffect(() => {
-    setGlobalGenerating(isGenerating);
-  }, [isGenerating, setGlobalGenerating]);
-
-  const [maskImage, setMaskImage] = useState<SkImage | null>(null);
-  const [fps, setFps] = useState(0);
-  const lastFrameTimeRef = useRef(Date.now());
-
-  const cameraPermission = useCameraPermission();
-  const devices = useCameraDevices();
-  const device = devices.find((d) => d.position === 'back') ?? devices[0];
-
-  const format = useMemo(() => {
-    if (device == null) return undefined;
-    try {
-      return getCameraFormat(device, Templates.FrameProcessing);
-    } catch {
-      return undefined;
-    }
-  }, [device]);
-
-  const updateMask = useCallback((img: SkImage) => {
-    setMaskImage(img);
-    const now = Date.now();
-    const timeDiff = now - lastFrameTimeRef.current;
-    if (timeDiff > 0) {
-      setFps(Math.round(1000 / timeDiff));
-    }
-    lastFrameTimeRef.current = now;
-  }, []);
-
-  const frameOutput = useFrameOutput({
-    pixelFormat: 'rgb',
-    dropFramesWhileBusy: true,
-    onFrame(frame) {
-      'worklet';
-      if (!runOnFrame) {
-        frame.dispose();
-        return;
-      }
-      try {
-        const result = runOnFrame(frame, [], false);
-        if (result?.ARGMAX) {
-          const argmax: Int32Array = result.ARGMAX;
-          // Model output is always square (modelImageSize × modelImageSize).
-          // Derive width/height from argmax length (sqrt for square output).
-          const side = Math.round(Math.sqrt(argmax.length));
-          const width = side;
-          const height = side;
-
-          // Build RGBA pixel buffer on the worklet thread to avoid transferring
-          // the large Int32Array across the worklet→RN boundary via scheduleOnRN.
-          const pixels = new Uint8Array(width * height * 4);
-          for (let i = 0; i < argmax.length; i++) {
-            const color = CLASS_COLORS[argmax[i]] ?? [0, 0, 0, 0];
-            pixels[i * 4] = color[0]!;
-            pixels[i * 4 + 1] = color[1]!;
-            pixels[i * 4 + 2] = color[2]!;
-            pixels[i * 4 + 3] = color[3]!;
-          }
-
-          const skData = Skia.Data.fromBytes(pixels);
-          const img = Skia.Image.MakeImage(
-            {
-              width,
-              height,
-              alphaType: AlphaType.Unpremul,
-              colorType: ColorType.RGBA_8888,
-            },
-            skData,
-            width * 4
-          );
-          if (img) {
-            scheduleOnRN(updateMask, img);
-          }
-        }
-      } catch (e) {
-        console.log('frame error:', String(e));
-      } finally {
-        frame.dispose();
-      }
-    },
-  });
-
-  if (!isReady) {
-    return (
-      <Spinner
-        visible={!isReady}
-        textContent={`Loading the model ${(downloadProgress * 100).toFixed(0)} %`}
-      />
-    );
-  }
-
-  if (!cameraPermission.hasPermission) {
-    return (
-      <View style={styles.centered}>
-        <Text style={styles.message}>Camera access needed</Text>
-        <TouchableOpacity
-          onPress={() => cameraPermission.requestPermission()}
-          style={styles.button}
-        >
-          <Text style={styles.buttonText}>Grant Permission</Text>
-        </TouchableOpacity>
-      </View>
-    );
-  }
-
-  if (device == null) {
-    return (
-      <View style={styles.centered}>
-        <Text style={styles.message}>No camera device found</Text>
-      </View>
-    );
-  }
-
-  return (
-    <View style={styles.container}>
-      <StatusBar barStyle="light-content" translucent />
-
-      <Camera
-        style={StyleSheet.absoluteFill}
-        device={device}
-        outputs={[frameOutput]}
-        isActive={true}
-        format={format}
-      />
-
-      {maskImage && (
-        <Canvas style={StyleSheet.absoluteFill}>
-          <SkiaImage
-            image={maskImage}
-            fit="cover"
-            x={0}
-            y={0}
-            width={screenWidth}
-            height={screenHeight}
-          />
-        </Canvas>
-      )}
-
-      <View
-        style={[styles.bottomBarWrapper, { paddingBottom: insets.bottom + 12 }]}
-        pointerEvents="none"
-      >
-        <View style={styles.bottomBar}>
-          <View style={styles.statItem}>
-            <Text style={styles.statValue}>{fps}</Text>
-            <Text style={styles.statLabel}>fps</Text>
-          </View>
-        </View>
-      </View>
-    </View>
-  );
-}
-
-const styles = StyleSheet.create({
-  container: {
-    flex: 1,
-    backgroundColor: 'black',
-  },
-  centered: {
-    flex: 1,
-    backgroundColor: 'black',
-    justifyContent: 'center',
-    alignItems: 'center',
-    gap: 16,
-  },
-  message: {
-    color: 'white',
-    fontSize: 18,
-  },
-  button: {
-    paddingHorizontal: 24,
-    paddingVertical: 12,
-    backgroundColor: ColorPalette.primary,
-    borderRadius: 24,
-  },
-  buttonText: {
-    color: 'white',
-    fontSize: 15,
-    fontWeight: '600',
-    letterSpacing: 0.3,
-  },
-  bottomBarWrapper: {
-    position: 'absolute',
-    bottom: 0,
-    left: 0,
-    right: 0,
-    alignItems: 'center',
-  },
-  bottomBar: {
-    flexDirection: 'row',
-    alignItems: 'center',
-    backgroundColor: 'rgba(0, 0, 0, 0.55)',
-    borderRadius: 24,
-    paddingHorizontal: 28,
-    paddingVertical: 10,
-    gap: 24,
-  },
-  statItem: {
-    alignItems: 'center',
-  },
-  statValue: {
-    color: 'white',
-    fontSize: 22,
-    fontWeight: '700',
-    letterSpacing: -0.5,
-  },
-  statLabel: {
-    color: 'rgba(255,255,255,0.55)',
-    fontSize: 11,
-    fontWeight: '500',
-    textTransform: 'uppercase',
-    letterSpacing: 0.8,
-  },
-});
diff --git a/apps/computer-vision/app/index.tsx b/apps/computer-vision/app/index.tsx
index bf391aeea..ed8712a54 100644
--- a/apps/computer-vision/app/index.tsx
+++ b/apps/computer-vision/app/index.tsx
@@ -11,6 +11,12 @@ export default function Home() {
       <ExecutorchLogo width={64} height={64} />
       <Text style={styles.headerText}>Select a demo model</Text>
       <View style={styles.buttonContainer}>
+        <TouchableOpacity
+          style={styles.button}
+          onPress={() => router.navigate('vision_camera/')}
+        >
+          <Text style={styles.buttonText}>Vision Camera</Text>
+        </TouchableOpacity>
         <TouchableOpacity
           style={styles.button}
           onPress={() => router.navigate('classification/')}
@@ -29,12 +35,6 @@ export default function Home() {
         >
           <Text style={styles.buttonText}>Object Detection</Text>
         </TouchableOpacity>
-        <TouchableOpacity
-          style={styles.button}
-          onPress={() => router.navigate('object_detection_live/')}
-        >
-          <Text style={styles.buttonText}>Object Detection Live</Text>
-        </TouchableOpacity>
         <TouchableOpacity
           style={styles.button}
           onPress={() => router.navigate('ocr/')}
diff --git a/apps/computer-vision/app/object_detection_live/index.tsx b/apps/computer-vision/app/object_detection_live/index.tsx
deleted file mode 100644
index d883fe8b9..000000000
--- a/apps/computer-vision/app/object_detection_live/index.tsx
+++ /dev/null
@@ -1,300 +0,0 @@
-import React, {
-  useCallback,
-  useContext,
-  useEffect,
-  useMemo,
-  useRef,
-  useState,
-} from 'react';
-import {
-  StatusBar,
-  StyleSheet,
-  Text,
-  TouchableOpacity,
-  View,
-} from 'react-native';
-import { useSafeAreaInsets } from 'react-native-safe-area-context';
-
-import {
-  Camera,
-  getCameraFormat,
-  Templates,
-  useCameraDevices,
-  useCameraPermission,
-  useFrameOutput,
-} from 'react-native-vision-camera';
-import { scheduleOnRN } from 'react-native-worklets';
-import {
-  Detection,
-  SSDLITE_320_MOBILENET_V3_LARGE,
-  useObjectDetection,
-} from 'react-native-executorch';
-import { GeneratingContext } from '../../context';
-import Spinner from '../../components/Spinner';
-import ColorPalette from '../../colors';
-
-export default function ObjectDetectionLiveScreen() {
-  const insets = useSafeAreaInsets();
-  const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 });
-
-  const model = useObjectDetection({ model: SSDLITE_320_MOBILENET_V3_LARGE });
-  const { setGlobalGenerating } = useContext(GeneratingContext);
-
-  useEffect(() => {
-    setGlobalGenerating(model.isGenerating);
-  }, [model.isGenerating, setGlobalGenerating]);
-
-  const [detections, setDetections] = useState<Detection[]>([]);
-  const [imageSize, setImageSize] = useState({ width: 1, height: 1 });
-  const [fps, setFps] = useState(0);
-  const lastFrameTimeRef = useRef(Date.now());
-
-  const cameraPermission = useCameraPermission();
-  const devices = useCameraDevices();
-  const device = devices.find((d) => d.position === 'back') ?? devices[0];
-
-  const format = useMemo(() => {
-    if (device == null) return undefined;
-    try {
-      return getCameraFormat(device, Templates.FrameProcessing);
-    } catch {
-      return undefined;
-    }
-  }, [device]);
-
-  const updateDetections = useCallback(
-    (payload: {
-      results: Detection[];
-      imageWidth: number;
-      imageHeight: number;
-    }) => {
-      setDetections(payload.results);
-      setImageSize({ width: payload.imageWidth, height: payload.imageHeight });
-      const now = Date.now();
-      const timeDiff = now - lastFrameTimeRef.current;
-      if (timeDiff > 0) {
-        setFps(Math.round(1000 / timeDiff));
-      }
-      lastFrameTimeRef.current = now;
-    },
-    []
-  );
-
-  const frameOutput = useFrameOutput({
-    pixelFormat: 'rgb',
-    dropFramesWhileBusy: true,
-    onFrame(frame) {
-      'worklet';
-      if (!model.runOnFrame) {
-        frame.dispose();
-        return;
-      }
-      // After 90° CW rotation, the image fed to the model has swapped dims.
-      const imageWidth =
-        frame.width > frame.height ? frame.height : frame.width;
-      const imageHeight =
-        frame.width > frame.height ? frame.width : frame.height;
-      try {
-        const result = model.runOnFrame(frame, 0.5);
-        if (result) {
-          scheduleOnRN(updateDetections, {
-            results: result,
-            imageWidth,
-            imageHeight,
-          });
-        }
-      } catch {
-        // ignore frame errors
-      } finally {
-        frame.dispose();
-      }
-    },
-  });
-
-  if (!model.isReady) {
-    return (
-      <Spinner
-        visible={!model.isReady}
-        textContent={`Loading the model ${(model.downloadProgress * 100).toFixed(0)} %`}
-      />
-    );
-  }
-
-  if (!cameraPermission.hasPermission) {
-    return (
-      <View style={styles.centered}>
-        <Text style={styles.message}>Camera access needed</Text>
-        <TouchableOpacity
-          onPress={() => cameraPermission.requestPermission()}
-          style={styles.button}
-        >
-          <Text style={styles.buttonText}>Grant Permission</Text>
-        </TouchableOpacity>
-      </View>
-    );
-  }
-
-  if (device == null) {
-    return (
-      <View style={styles.centered}>
-        <Text style={styles.message}>No camera device found</Text>
-      </View>
-    );
-  }
-
-  return (
-    <View style={styles.container}>
-      <StatusBar barStyle="light-content" translucent />
-
-      <Camera
-        style={StyleSheet.absoluteFill}
-        device={device}
-        outputs={[frameOutput]}
-        isActive={true}
-        format={format}
-      />
-
-      {/* Bounding box overlay — measured to match the exact camera preview area */}
-      <View
-        style={StyleSheet.absoluteFill}
-        pointerEvents="none"
-        onLayout={(e) =>
-          setCanvasSize({
-            width: e.nativeEvent.layout.width,
-            height: e.nativeEvent.layout.height,
-          })
-        }
-      >
-        {(() => {
-          // Cover-fit: camera preview scales to fill the canvas, cropping the
-          // excess. Compute the same transform so bbox pixel coords map correctly.
-          const scale = Math.max(
-            canvasSize.width / imageSize.width,
-            canvasSize.height / imageSize.height
-          );
-          const offsetX = (canvasSize.width - imageSize.width * scale) / 2;
-          const offsetY = (canvasSize.height - imageSize.height * scale) / 2;
-          return detections.map((det, i) => {
-            const left = det.bbox.x1 * scale + offsetX;
-            const top = det.bbox.y1 * scale + offsetY;
-            const width = (det.bbox.x2 - det.bbox.x1) * scale;
-            const height = (det.bbox.y2 - det.bbox.y1) * scale;
-            return (
-              <View key={i} style={[styles.bbox, { left, top, width, height }]}>
-                <View style={styles.bboxLabel}>
-                  <Text style={styles.bboxLabelText}>
-                    {det.label} {(det.score * 100).toFixed(0)}%
-                  </Text>
-                </View>
-              </View>
-            );
-          });
-        })()}
-      </View>
-
-      <View
-        style={[styles.bottomBarWrapper, { paddingBottom: insets.bottom + 12 }]}
-        pointerEvents="none"
-      >
-        <View style={styles.bottomBar}>
-          <View style={styles.statItem}>
-            <Text style={styles.statValue}>{detections.length}</Text>
-            <Text style={styles.statLabel}>objects</Text>
-          </View>
-          <View style={styles.statDivider} />
-          <View style={styles.statItem}>
-            <Text style={styles.statValue}>{fps}</Text>
-            <Text style={styles.statLabel}>fps</Text>
-          </View>
-        </View>
-      </View>
-    </View>
-  );
-}
-
-const styles = StyleSheet.create({
-  container: {
-    flex: 1,
-    backgroundColor: 'black',
-  },
-  centered: {
-    flex: 1,
-    backgroundColor: 'black',
-    justifyContent: 'center',
-    alignItems: 'center',
-    gap: 16,
-  },
-  message: {
-    color: 'white',
-    fontSize: 18,
-  },
-  button: {
-    paddingHorizontal: 24,
-    paddingVertical: 12,
-    backgroundColor: ColorPalette.primary,
-    borderRadius: 24,
-  },
-  buttonText: {
-    color: 'white',
-    fontSize: 15,
-    fontWeight: '600',
-    letterSpacing: 0.3,
-  },
-  bbox: {
-    position: 'absolute',
-    borderWidth: 2,
-    borderColor: ColorPalette.primary,
-    borderRadius: 4,
-  },
-  bboxLabel: {
-    position: 'absolute',
-    top: -22,
-    left: -2,
-    backgroundColor: ColorPalette.primary,
-    paddingHorizontal: 6,
-    paddingVertical: 2,
-    borderRadius: 4,
-  },
-  bboxLabelText: {
-    color: 'white',
-    fontSize: 11,
-    fontWeight: '600',
-  },
-  bottomBarWrapper: {
-    position: 'absolute',
-    bottom: 0,
-    left: 0,
-    right: 0,
-    alignItems: 'center',
-  },
-  bottomBar: {
-    flexDirection: 'row',
-    alignItems: 'center',
-    backgroundColor: 'rgba(0, 0, 0, 0.55)',
-    borderRadius: 24,
-    paddingHorizontal: 28,
-    paddingVertical: 10,
-    gap: 24,
-  },
-  statItem: {
-    alignItems: 'center',
-  },
-  statValue: {
-    color: 'white',
-    fontSize: 22,
-    fontWeight: '700',
-    letterSpacing: -0.5,
-  },
-  statLabel: {
-    color: 'rgba(255,255,255,0.55)',
-    fontSize: 11,
-    fontWeight: '500',
-    textTransform: 'uppercase',
-    letterSpacing: 0.8,
-  },
-  statDivider: {
-    width: 1,
-    height: 32,
-    backgroundColor: 'rgba(255,255,255,0.2)',
-  },
-});
diff --git a/apps/computer-vision/app/ocr_live/index.tsx b/apps/computer-vision/app/ocr_live/index.tsx
deleted file mode 100644
index a0c93899f..000000000
--- a/apps/computer-vision/app/ocr_live/index.tsx
+++ /dev/null
@@ -1,329 +0,0 @@
-import React, {
-  useCallback,
-  useContext,
-  useEffect,
-  useMemo,
-  useRef,
-  useState,
-} from 'react';
-import {
-  StatusBar,
-  StyleSheet,
-  Text,
-  TouchableOpacity,
-  View,
-} from 'react-native';
-import { useSafeAreaInsets } from 'react-native-safe-area-context';
-
-import {
-  Camera,
-  getCameraFormat,
-  Templates,
-  useCameraDevices,
-  useCameraPermission,
-  useFrameOutput,
-} from 'react-native-vision-camera';
-import { scheduleOnRN } from 'react-native-worklets';
-import { OCR_ENGLISH, useOCR, OCRDetection } from 'react-native-executorch';
-import {
-  Canvas,
-  Path,
-  Skia,
-  Text as SkiaText,
-  matchFont,
-} from '@shopify/react-native-skia';
-import { GeneratingContext } from '../../context';
-import Spinner from '../../components/Spinner';
-import ColorPalette from '../../colors';
-
-interface FrameDetections {
-  detections: OCRDetection[];
-  frameWidth: number;
-  frameHeight: number;
-}
-
-export default function OCRLiveScreen() {
-  const insets = useSafeAreaInsets();
-  const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 });
-
-  const { isReady, isGenerating, downloadProgress, runOnFrame } = useOCR({
-    model: OCR_ENGLISH,
-  });
-  const { setGlobalGenerating } = useContext(GeneratingContext);
-
-  useEffect(() => {
-    setGlobalGenerating(isGenerating);
-  }, [isGenerating, setGlobalGenerating]);
-
-  const [frameDetections, setFrameDetections] = useState<FrameDetections>({
-    detections: [],
-    frameWidth: 1,
-    frameHeight: 1,
-  });
-  const [fps, setFps] = useState(0);
-  const lastFrameTimeRef = useRef(Date.now());
-
-  const font = matchFont({ fontFamily: 'Helvetica', fontSize: 11 });
-
-  const cameraPermission = useCameraPermission();
-  const devices = useCameraDevices();
-  const device = devices.find((d) => d.position === 'back') ?? devices[0];
-
-  const format = useMemo(() => {
-    if (device == null) return undefined;
-    try {
-      return getCameraFormat(device, Templates.FrameProcessing);
-    } catch {
-      return undefined;
-    }
-  }, [device]);
-
-  const updateDetections = useCallback((result: FrameDetections) => {
-    setFrameDetections(result);
-    const now = Date.now();
-    const timeDiff = now - lastFrameTimeRef.current;
-    if (timeDiff > 0) {
-      setFps(Math.round(1000 / timeDiff));
-    }
-    lastFrameTimeRef.current = now;
-  }, []);
-
-  const frameOutput = useFrameOutput({
-    dropFramesWhileBusy: true,
-    pixelFormat: 'rgb',
-    onFrame(frame) {
-      'worklet';
-      if (!runOnFrame) {
-        frame.dispose();
-        return;
-      }
-      const frameWidth = frame.width;
-      const frameHeight = frame.height;
-      try {
-        const result = runOnFrame(frame);
-        if (result) {
-          scheduleOnRN(updateDetections, {
-            detections: result,
-            frameWidth,
-            frameHeight,
-          });
-        }
-      } catch {
-        // ignore frame errors
-      } finally {
-        frame.dispose();
-      }
-    },
-  });
-
-  if (!isReady) {
-    return (
-      <Spinner
-        visible={!isReady}
-        textContent={`Loading the model ${(downloadProgress * 100).toFixed(0)} %`}
-      />
-    );
-  }
-
-  if (!cameraPermission.hasPermission) {
-    return (
-      <View style={styles.centered}>
-        <Text style={styles.message}>Camera access needed</Text>
-        <TouchableOpacity
-          onPress={() => cameraPermission.requestPermission()}
-          style={styles.button}
-        >
-          <Text style={styles.buttonText}>Grant Permission</Text>
-        </TouchableOpacity>
-      </View>
-    );
-  }
-
-  if (device == null) {
-    return (
-      <View style={styles.centered}>
-        <Text style={styles.message}>No camera device found</Text>
-      </View>
-    );
-  }
-
-  const { detections, frameWidth, frameHeight } = frameDetections;
-
-  // OCR runs on the raw landscape frame (no rotation applied in native).
-  // The camera preview displays it as portrait (90° CW rotation applied by iOS).
-  // After rotation the image dimensions become (frameHeight × frameWidth).
-  // Cover-fit scale uses post-rotation dims to match what the preview shows.
-  const isLandscape = frameWidth > frameHeight;
-  const imageW = isLandscape ? frameHeight : frameWidth;
-  const imageH = isLandscape ? frameWidth : frameHeight;
-  const scale = Math.max(canvasSize.width / imageW, canvasSize.height / imageH);
-  const offsetX = (canvasSize.width - imageW * scale) / 2;
-  const offsetY = (canvasSize.height - imageH * scale) / 2;
-
-  // Map a raw landscape point to screen coords accounting for rotation + cover-fit.
-  function toScreenX(px: number, py: number) {
-    // After 90° CW: rotated_x = frameHeight - py, rotated_y = px
-    const rx = isLandscape ? frameHeight - py : px;
-    return rx * scale + offsetX;
-  }
-  function toScreenY(px: number, py: number) {
-    const ry = isLandscape ? px : py;
-    return ry * scale + offsetY;
-  }
-
-  return (
-    <View style={styles.container}>
-      <StatusBar barStyle="light-content" translucent />
-
-      <Camera
-        style={StyleSheet.absoluteFill}
-        device={device}
-        outputs={[frameOutput]}
-        isActive={true}
-        format={format}
-      />
-
-      {/* Measure the overlay area, then draw polygons inside a Canvas */}
-      <View
-        style={StyleSheet.absoluteFill}
-        pointerEvents="none"
-        onLayout={(e) =>
-          setCanvasSize({
-            width: e.nativeEvent.layout.width,
-            height: e.nativeEvent.layout.height,
-          })
-        }
-      >
-        <Canvas style={StyleSheet.absoluteFill}>
-          {detections.map((det, i) => {
-            if (!det.bbox || det.bbox.length < 2) return null;
-
-            const path = Skia.Path.Make();
-            path.moveTo(
-              toScreenX(det.bbox[0]!.x, det.bbox[0]!.y),
-              toScreenY(det.bbox[0]!.x, det.bbox[0]!.y)
-            );
-            for (let j = 1; j < det.bbox.length; j++) {
-              path.lineTo(
-                toScreenX(det.bbox[j]!.x, det.bbox[j]!.y),
-                toScreenY(det.bbox[j]!.x, det.bbox[j]!.y)
-              );
-            }
-            path.close();
-
-            const labelX = toScreenX(det.bbox[0]!.x, det.bbox[0]!.y);
-            const labelY = Math.max(
-              0,
-              toScreenY(det.bbox[0]!.x, det.bbox[0]!.y) - 4
-            );
-
-            return (
-              <React.Fragment key={i}>
-                <Path path={path} color="transparent" style="fill" />
-                <Path
-                  path={path}
-                  color={ColorPalette.primary}
-                  style="stroke"
-                  strokeWidth={2}
-                />
-                {font && (
-                  <SkiaText
-                    x={labelX}
-                    y={labelY}
-                    text={`${det.text} ${(det.score * 100).toFixed(0)}%`}
-                    font={font}
-                    color={ColorPalette.primary}
-                  />
-                )}
-              </React.Fragment>
-            );
-          })}
-        </Canvas>
-      </View>
-
-      <View
-        style={[styles.bottomBarWrapper, { paddingBottom: insets.bottom + 12 }]}
-        pointerEvents="none"
-      >
-        <View style={styles.bottomBar}>
-          <View style={styles.statItem}>
-            <Text style={styles.statValue}>{detections.length}</Text>
-            <Text style={styles.statLabel}>regions</Text>
-          </View>
-          <View style={styles.statDivider} />
-          <View style={styles.statItem}>
-            <Text style={styles.statValue}>{fps}</Text>
-            <Text style={styles.statLabel}>fps</Text>
-          </View>
-        </View>
-      </View>
-    </View>
-  );
-}
-
-const styles = StyleSheet.create({
-  container: {
-    flex: 1,
-    backgroundColor: 'black',
-  },
-  centered: {
-    flex: 1,
-    backgroundColor: 'black',
-    justifyContent: 'center',
-    alignItems: 'center',
-    gap: 16,
-  },
-  message: {
-    color: 'white',
-    fontSize: 18,
-  },
-  button: {
-    paddingHorizontal: 24,
-    paddingVertical: 12,
-    backgroundColor: ColorPalette.primary,
-    borderRadius: 24,
-  },
-  buttonText: {
-    color: 'white',
-    fontSize: 15,
-    fontWeight: '600',
-    letterSpacing: 0.3,
-  },
-  bottomBarWrapper: {
-    position: 'absolute',
-    bottom: 0,
-    left: 0,
-    right: 0,
-    alignItems: 'center',
-  },
-  bottomBar: {
-    flexDirection: 'row',
-    alignItems: 'center',
-    backgroundColor: 'rgba(0, 0, 0, 0.55)',
-    borderRadius: 24,
-    paddingHorizontal: 28,
-    paddingVertical: 10,
-    gap: 24,
-  },
-  statItem: {
-    alignItems: 'center',
-  },
-  statValue: {
-    color: 'white',
-    fontSize: 22,
-    fontWeight: '700',
-    letterSpacing: -0.5,
-  },
-  statLabel: {
-    color: 'rgba(255,255,255,0.55)',
-    fontSize: 11,
-    fontWeight: '500',
-    textTransform: 'uppercase',
-    letterSpacing: 0.8,
-  },
-  statDivider: {
-    width: 1,
-    height: 32,
-    backgroundColor: 'rgba(255,255,255,0.2)',
-  },
-});
diff --git a/apps/computer-vision/app/style_transfer_live/index.tsx b/apps/computer-vision/app/style_transfer_live/index.tsx
deleted file mode 100644
index 57889313f..000000000
--- a/apps/computer-vision/app/style_transfer_live/index.tsx
+++ /dev/null
@@ -1,274 +0,0 @@
-import React, {
-  useCallback,
-  useContext,
-  useEffect,
-  useMemo,
-  useRef,
-  useState,
-} from 'react';
-import {
-  StatusBar,
-  StyleSheet,
-  Text,
-  TouchableOpacity,
-  useWindowDimensions,
-  View,
-} from 'react-native';
-import { useSafeAreaInsets } from 'react-native-safe-area-context';
-
-import {
-  Camera,
-  getCameraFormat,
-  Templates,
-  useCameraDevices,
-  useCameraPermission,
-  useFrameOutput,
-} from 'react-native-vision-camera';
-import { scheduleOnRN } from 'react-native-worklets';
-import {
-  STYLE_TRANSFER_RAIN_PRINCESS,
-  useStyleTransfer,
-} from 'react-native-executorch';
-import {
-  Canvas,
-  Image as SkiaImage,
-  Skia,
-  AlphaType,
-  ColorType,
-  SkImage,
-} from '@shopify/react-native-skia';
-import { GeneratingContext } from '../../context';
-import Spinner from '../../components/Spinner';
-import ColorPalette from '../../colors';
-
-export default function StyleTransferLiveScreen() {
-  const insets = useSafeAreaInsets();
-  const { width: screenWidth, height: screenHeight } = useWindowDimensions();
-
-  const { isReady, isGenerating, downloadProgress, runOnFrame } =
-    useStyleTransfer({ model: STYLE_TRANSFER_RAIN_PRINCESS });
-  const { setGlobalGenerating } = useContext(GeneratingContext);
-
-  useEffect(() => {
-    setGlobalGenerating(isGenerating);
-  }, [isGenerating, setGlobalGenerating]);
-
-  const [styledImage, setStyledImage] = useState<SkImage | null>(null);
-  const [fps, setFps] = useState(0);
-  const lastFrameTimeRef = useRef(Date.now());
-
-  const cameraPermission = useCameraPermission();
-  const devices = useCameraDevices();
-  const device = devices.find((d) => d.position === 'back') ?? devices[0];
-
-  const format = useMemo(() => {
-    if (device == null) return undefined;
-    try {
-      return getCameraFormat(device, Templates.FrameProcessing);
-    } catch {
-      return undefined;
-    }
-  }, [device]);
-
-  const updateImage = useCallback((img: SkImage) => {
-    setStyledImage((prev) => {
-      prev?.dispose();
-      return img;
-    });
-    const now = Date.now();
-    const timeDiff = now - lastFrameTimeRef.current;
-    if (timeDiff > 0) {
-      setFps(Math.round(1000 / timeDiff));
-    }
-    lastFrameTimeRef.current = now;
-  }, []);
-
-  const frameOutput = useFrameOutput({
-    pixelFormat: 'rgb',
-    dropFramesWhileBusy: true,
-    onFrame(frame) {
-      'worklet';
-      if (!runOnFrame) {
-        frame.dispose();
-        return;
-      }
-      try {
-        const result = runOnFrame(frame);
-        if (result?.dataPtr) {
-          const { dataPtr, sizes } = result;
-          const height = sizes[0];
-          const width = sizes[1];
-          // Build Skia image on the worklet thread — avoids transferring the
-          // large pixel buffer across the worklet→RN boundary via scheduleOnRN.
-          const skData = Skia.Data.fromBytes(dataPtr);
-          const img = Skia.Image.MakeImage(
-            {
-              width,
-              height,
-              alphaType: AlphaType.Opaque,
-              colorType: ColorType.RGBA_8888,
-            },
-            skData,
-            width * 4
-          );
-          if (img) {
-            scheduleOnRN(updateImage, img);
-          }
-        }
-      } catch (e) {
-        console.log('frame error:', String(e));
-      } finally {
-        frame.dispose();
-      }
-    },
-  });
-
-  if (!isReady) {
-    return (
-      <Spinner
-        visible={!isReady}
-        textContent={`Loading the model ${(downloadProgress * 100).toFixed(0)} %`}
-      />
-    );
-  }
-
-  if (!cameraPermission.hasPermission) {
-    return (
-      <View style={styles.centered}>
-        <Text style={styles.message}>Camera access needed</Text>
-        <TouchableOpacity
-          onPress={() => cameraPermission.requestPermission()}
-          style={styles.button}
-        >
-          <Text style={styles.buttonText}>Grant Permission</Text>
-        </TouchableOpacity>
-      </View>
-    );
-  }
-
-  if (device == null) {
-    return (
-      <View style={styles.centered}>
-        <Text style={styles.message}>No camera device found</Text>
-      </View>
-    );
-  }
-
-  return (
-    <View style={styles.container}>
-      <StatusBar barStyle="light-content" translucent />
-
-      {/* Camera always runs to keep frame processing active */}
-      <Camera
-        style={StyleSheet.absoluteFill}
-        device={device}
-        outputs={[frameOutput]}
-        isActive={true}
-        format={format}
-      />
-
-      {/* Styled output overlays the camera feed once available */}
-      {styledImage && (
-        <Canvas style={StyleSheet.absoluteFill}>
-          <SkiaImage
-            image={styledImage}
-            fit="cover"
-            x={0}
-            y={0}
-            width={screenWidth}
-            height={screenHeight}
-          />
-        </Canvas>
-      )}
-
-      <View
-        style={[styles.bottomBarWrapper, { paddingBottom: insets.bottom + 12 }]}
-        pointerEvents="none"
-      >
-        <View style={styles.bottomBar}>
-          <View style={styles.statItem}>
-            <Text style={styles.statValue}>{fps}</Text>
-            <Text style={styles.statLabel}>fps</Text>
-          </View>
-          <View style={styles.statDivider} />
-          <View style={styles.statItem}>
-            <Text style={styles.styleLabel}>candy</Text>
-            <Text style={styles.statLabel}>style</Text>
-          </View>
-        </View>
-      </View>
-    </View>
-  );
-}
-
-const styles = StyleSheet.create({
-  container: {
-    flex: 1,
-    backgroundColor: 'black',
-  },
-  centered: {
-    flex: 1,
-    backgroundColor: 'black',
-    justifyContent: 'center',
-    alignItems: 'center',
-    gap: 16,
-  },
-  message: {
-    color: 'white',
-    fontSize: 18,
-  },
-  button: {
-    paddingHorizontal: 24,
-    paddingVertical: 12,
-    backgroundColor: ColorPalette.primary,
-    borderRadius: 24,
-  },
-  buttonText: {
-    color: 'white',
-    fontSize: 15,
-    fontWeight: '600',
-    letterSpacing: 0.3,
-  },
-  bottomBarWrapper: {
-    position: 'absolute',
-    bottom: 0,
-    left: 0,
-    right: 0,
-    alignItems: 'center',
-  },
-  bottomBar: {
-    flexDirection: 'row',
-    alignItems: 'center',
-    backgroundColor: 'rgba(0, 0, 0, 0.55)',
-    borderRadius: 24,
-    paddingHorizontal: 28,
-    paddingVertical: 10,
-    gap: 24,
-  },
-  statItem: {
-    alignItems: 'center',
-  },
-  statValue: {
-    color: 'white',
-    fontSize: 22,
-    fontWeight: '700',
-    letterSpacing: -0.5,
-  },
-  styleLabel: {
-    color: 'white',
-    fontSize: 16,
-    fontWeight: '700',
-  },
-  statLabel: {
-    color: 'rgba(255,255,255,0.55)',
-    fontSize: 11,
-    fontWeight: '500',
-    textTransform: 'uppercase',
-    letterSpacing: 0.8,
-  },
-  statDivider: {
-    width: 1,
-    height: 32,
-    backgroundColor: 'rgba(255,255,255,0.2)',
-  },
-});
diff --git a/apps/computer-vision/app/vision_camera/index.tsx b/apps/computer-vision/app/vision_camera/index.tsx
new file mode 100644
index 000000000..625018849
--- /dev/null
+++ b/apps/computer-vision/app/vision_camera/index.tsx
@@ -0,0 +1,665 @@
+import React, {
+  useCallback,
+  useContext,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+} from 'react';
+import {
+  ScrollView,
+  StatusBar,
+  StyleSheet,
+  Text,
+  TouchableOpacity,
+  View,
+} from 'react-native';
+import { useSafeAreaInsets } from 'react-native-safe-area-context';
+import {
+  Camera,
+  Frame,
+  getCameraFormat,
+  Templates,
+  useCameraDevices,
+  useCameraPermission,
+  useFrameOutput,
+} from 'react-native-vision-camera';
+import { createSynchronizable, scheduleOnRN } from 'react-native-worklets';
+import {
+  DEEPLAB_V3_RESNET50,
+  Detection,
+  EFFICIENTNET_V2_S,
+  SSDLITE_320_MOBILENET_V3_LARGE,
+  useClassification,
+  useImageSegmentation,
+  useObjectDetection,
+} from 'react-native-executorch';
+import {
+  AlphaType,
+  Canvas,
+  ColorType,
+  Image as SkiaImage,
+  Skia,
+  SkImage,
+} from '@shopify/react-native-skia';
+import { GeneratingContext } from '../../context';
+import Spinner from '../../components/Spinner';
+import ColorPalette from '../../colors';
+
+type TaskId = 'classification' | 'objectDetection' | 'segmentation';
+type ModelId = 'classification' | 'objectDetection' | 'segmentation';
+
+type TaskVariant = { id: ModelId; label: string };
+type Task = { id: TaskId; label: string; variants: TaskVariant[] };
+
+const TASKS: Task[] = [
+  {
+    id: 'classification',
+    label: 'Classify',
+    variants: [{ id: 'classification', label: 'EfficientNet V2 S' }],
+  },
+  {
+    id: 'segmentation',
+    label: 'Segment',
+    variants: [{ id: 'segmentation', label: 'DeepLab V3' }],
+  },
+  {
+    id: 'objectDetection',
+    label: 'Detect',
+    variants: [{ id: 'objectDetection', label: 'SSDLite MobileNet' }],
+  },
+];
+
+const CLASS_COLORS: number[][] = [
+  [0, 0, 0, 0],
+  [51, 255, 87, 180],
+  [51, 87, 255, 180],
+  [255, 51, 246, 180],
+  [51, 255, 246, 180],
+  [243, 255, 51, 180],
+  [141, 51, 255, 180],
+  [255, 131, 51, 180],
+  [51, 255, 131, 180],
+  [131, 51, 255, 180],
+  [255, 255, 51, 180],
+  [51, 255, 255, 180],
+  [255, 51, 143, 180],
+  [127, 51, 255, 180],
+  [51, 255, 175, 180],
+  [255, 175, 51, 180],
+  [179, 255, 51, 180],
+  [255, 87, 51, 180],
+  [255, 51, 162, 180],
+  [51, 162, 255, 180],
+  [162, 51, 255, 180],
+];
+
+function hashLabel(label: string): number {
+  let hash = 5381;
+  for (let i = 0; i < label.length; i++) {
+    hash = (hash + hash * 32 + label.charCodeAt(i)) % 1000003;
+  }
+  return 1 + (Math.abs(hash) % (CLASS_COLORS.length - 1));
+}
+
+function labelColor(label: string): string {
+  const color = CLASS_COLORS[hashLabel(label)]!;
+  return `rgba(${color[0]},${color[1]},${color[2]},1)`;
+}
+
+function labelColorBg(label: string): string {
+  const color = CLASS_COLORS[hashLabel(label)]!;
+  return `rgba(${color[0]},${color[1]},${color[2]},0.75)`;
+}
+
+const frameKillSwitch = createSynchronizable(false);
+
+export default function VisionCameraScreen() {
+  const insets = useSafeAreaInsets();
+  const [activeTask, setActiveTask] = useState<TaskId>('classification');
+  const [activeModel, setActiveModel] = useState<ModelId>('classification');
+  const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 });
+  const { setGlobalGenerating } = useContext(GeneratingContext);
+
+  const classification = useClassification({
+    model: EFFICIENTNET_V2_S,
+    preventLoad: activeModel !== 'classification',
+  });
+  const objectDetection = useObjectDetection({
+    model: SSDLITE_320_MOBILENET_V3_LARGE,
+    preventLoad: activeModel !== 'objectDetection',
+  });
+  const segmentation = useImageSegmentation({
+    model: DEEPLAB_V3_RESNET50,
+    preventLoad: activeModel !== 'segmentation',
+  });
+
+  const activeIsGenerating = {
+    classification: classification.isGenerating,
+    objectDetection: objectDetection.isGenerating,
+    segmentation: segmentation.isGenerating,
+  }[activeModel];
+
+  useEffect(() => {
+    setGlobalGenerating(activeIsGenerating);
+  }, [activeIsGenerating, setGlobalGenerating]);
+
+  const [fps, setFps] = useState(0);
+  const [frameMs, setFrameMs] = useState(0);
+  const lastFrameTimeRef = useRef(Date.now());
+  const cameraPermission = useCameraPermission();
+  const devices = useCameraDevices();
+  const device = devices.find((d) => d.position === 'back') ?? devices[0];
+  const format = useMemo(() => {
+    if (device == null) return undefined;
+    try {
+      return getCameraFormat(device, Templates.FrameProcessing);
+    } catch {
+      return undefined;
+    }
+  }, [device]);
+
+  const [classResult, setClassResult] = useState({ label: '', score: 0 });
+  const [detections, setDetections] = useState<Detection[]>([]);
+  const [imageSize, setImageSize] = useState({ width: 1, height: 1 });
+  const [maskImage, setMaskImage] = useState<SkImage | null>(null);
+
+  const updateClass = useCallback((r: { label: string; score: number }) => {
+    setClassResult(r);
+    const now = Date.now();
+    const diff = now - lastFrameTimeRef.current;
+    if (diff > 0) {
+      setFps(Math.round(1000 / diff));
+      setFrameMs(diff);
+    }
+    lastFrameTimeRef.current = now;
+  }, []);
+
+  const updateFps = useCallback(() => {
+    const now = Date.now();
+    const diff = now - lastFrameTimeRef.current;
+    if (diff > 0) {
+      setFps(Math.round(1000 / diff));
+      setFrameMs(diff);
+    }
+    lastFrameTimeRef.current = now;
+  }, []);
+
+  const updateDetections = useCallback(
+    (p: { results: Detection[]; imageWidth: number; imageHeight: number }) => {
+      setDetections(p.results);
+      setImageSize({ width: p.imageWidth, height: p.imageHeight });
+      updateFps();
+    },
+    [updateFps]
+  );
+
+  const updateMask = useCallback(
+    (img: SkImage) => {
+      setMaskImage((prev) => {
+        prev?.dispose();
+        return img;
+      });
+      updateFps();
+    },
+    [updateFps]
+  );
+
+  const classRof = classification.runOnFrame;
+  const detRof = objectDetection.runOnFrame;
+  const segRof = segmentation.runOnFrame;
+
+  useEffect(() => {
+    frameKillSwitch.setBlocking(true);
+    setMaskImage((prev) => {
+      prev?.dispose();
+      return null;
+    });
+    const id = setTimeout(() => {
+      frameKillSwitch.setBlocking(false);
+    }, 300);
+    return () => clearTimeout(id);
+  }, [activeModel]);
+
+  const frameOutput = useFrameOutput({
+    pixelFormat: 'rgb',
+    dropFramesWhileBusy: true,
+    onFrame: useCallback(
+      (frame: Frame) => {
+        'worklet';
+
+        if (frameKillSwitch.getDirty()) {
+          frame.dispose();
+          return;
+        }
+
+        try {
+          if (activeModel === 'classification') {
+            if (!classRof) return;
+            const result = classRof(frame);
+            if (result) {
+              let bestLabel = '';
+              let bestScore = -1;
+              const entries = Object.entries(result);
+              for (let i = 0; i < entries.length; i++) {
+                const [label, score] = entries[i]!;
+                if ((score as number) > bestScore) {
+                  bestScore = score as number;
+                  bestLabel = label;
+                }
+              }
+              scheduleOnRN(updateClass, { label: bestLabel, score: bestScore });
+            }
+          } else if (activeModel === 'objectDetection') {
+            if (!detRof) return;
+            const iw = frame.width > frame.height ? frame.height : frame.width;
+            const ih = frame.width > frame.height ? frame.width : frame.height;
+            const result = detRof(frame, 0.5);
+            if (result) {
+              scheduleOnRN(updateDetections, {
+                results: result,
+                imageWidth: iw,
+                imageHeight: ih,
+              });
+            }
+          } else if (activeModel === 'segmentation') {
+            if (!segRof) return;
+            const result = segRof(frame, [], false);
+            if (result?.ARGMAX) {
+              const argmax: Int32Array = result.ARGMAX;
+              const side = Math.round(Math.sqrt(argmax.length));
+              const pixels = new Uint8Array(side * side * 4);
+              for (let i = 0; i < argmax.length; i++) {
+                const color = CLASS_COLORS[argmax[i]!] ?? [0, 0, 0, 0];
+                pixels[i * 4] = color[0]!;
+                pixels[i * 4 + 1] = color[1]!;
+                pixels[i * 4 + 2] = color[2]!;
+                pixels[i * 4 + 3] = color[3]!;
+              }
+              const skData = Skia.Data.fromBytes(pixels);
+              const img = Skia.Image.MakeImage(
+                {
+                  width: side,
+                  height: side,
+                  alphaType: AlphaType.Unpremul,
+                  colorType: ColorType.RGBA_8888,
+                },
+                skData,
+                side * 4
+              );
+              if (img) scheduleOnRN(updateMask, img);
+            }
+          }
+        } catch {
+          // ignore
+        } finally {
+          frame.dispose();
+        }
+      },
+      [
+        activeModel,
+        classRof,
+        detRof,
+        segRof,
+        updateClass,
+        updateDetections,
+        updateMask,
+      ]
+    ),
+  });
+
+  const activeIsReady = {
+    classification: classification.isReady,
+    objectDetection: objectDetection.isReady,
+    segmentation: segmentation.isReady,
+  }[activeModel];
+
+  const activeDownloadProgress = {
+    classification: classification.downloadProgress,
+    objectDetection: objectDetection.downloadProgress,
+    segmentation: segmentation.downloadProgress,
+  }[activeModel];
+
+  if (!cameraPermission.hasPermission) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>Camera access needed</Text>
+        <TouchableOpacity
+          onPress={() => cameraPermission.requestPermission()}
+          style={styles.button}
+        >
+          <Text style={styles.buttonText}>Grant Permission</Text>
+        </TouchableOpacity>
+      </View>
+    );
+  }
+
+  if (device == null) {
+    return (
+      <View style={styles.centered}>
+        <Text style={styles.message}>No camera device found</Text>
+      </View>
+    );
+  }
+
+  function coverFit(imgW: number, imgH: number) {
+    const scale = Math.max(canvasSize.width / imgW, canvasSize.height / imgH);
+    return {
+      scale,
+      offsetX: (canvasSize.width - imgW * scale) / 2,
+      offsetY: (canvasSize.height - imgH * scale) / 2,
+    };
+  }
+
+  const {
+    scale: detScale,
+    offsetX: detOX,
+    offsetY: detOY,
+  } = coverFit(imageSize.width, imageSize.height);
+
+  const activeTaskInfo = TASKS.find((t) => t.id === activeTask)!;
+  const activeVariantLabel =
+    activeTaskInfo.variants.find((v) => v.id === activeModel)?.label ??
+    activeTaskInfo.variants[0]!.label;
+
+  return (
+    <View style={styles.container}>
+      <StatusBar barStyle="light-content" translucent />
+
+      <Camera
+        style={StyleSheet.absoluteFill}
+        device={device}
+        outputs={[frameOutput]}
+        isActive={true}
+        format={format}
+      />
+
+      <View
+        style={StyleSheet.absoluteFill}
+        pointerEvents="none"
+        onLayout={(e) =>
+          setCanvasSize({
+            width: e.nativeEvent.layout.width,
+            height: e.nativeEvent.layout.height,
+          })
+        }
+      >
+        {activeModel === 'segmentation' && maskImage && (
+          <Canvas style={StyleSheet.absoluteFill}>
+            <SkiaImage
+              image={maskImage}
+              fit="cover"
+              x={0}
+              y={0}
+              width={canvasSize.width}
+              height={canvasSize.height}
+            />
+          </Canvas>
+        )}
+
+        {activeModel === 'objectDetection' && (
+          <>
+            {detections.map((det, i) => {
+              const left = det.bbox.x1 * detScale + detOX;
+              const top = det.bbox.y1 * detScale + detOY;
+              const w = (det.bbox.x2 - det.bbox.x1) * detScale;
+              const h = (det.bbox.y2 - det.bbox.y1) * detScale;
+              return (
+                <View
+                  key={i}
+                  style={[
+                    styles.bbox,
+                    {
+                      left,
+                      top,
+                      width: w,
+                      height: h,
+                      borderColor: labelColor(det.label),
+                    },
+                  ]}
+                >
+                  <View
+                    style={[
+                      styles.bboxLabel,
+                      { backgroundColor: labelColorBg(det.label) },
+                    ]}
+                  >
+                    <Text style={styles.bboxLabelText}>
+                      {det.label} {(det.score * 100).toFixed(1)}
+                    </Text>
+                  </View>
+                </View>
+              );
+            })}
+          </>
+        )}
+      </View>
+
+      {activeModel === 'classification' && classResult.label ? (
+        <View style={styles.classResultOverlay} pointerEvents="none">
+          <Text style={styles.classResultLabel}>{classResult.label}</Text>
+          <Text style={styles.classResultScore}>
+            {(classResult.score * 100).toFixed(1)}%
+          </Text>
+        </View>
+      ) : null}
+
+      {!activeIsReady && (
+        <View style={styles.loadingOverlay}>
+          <Spinner
+            visible
+            textContent={`Loading ${activeTaskInfo.label} ${(activeDownloadProgress * 100).toFixed(0)}%`}
+          />
+        </View>
+      )}
+
+      <View
+        style={[styles.topOverlay, { paddingTop: insets.top + 8 }]}
+        pointerEvents="box-none"
+      >
+        <View style={styles.titleRow} pointerEvents="none">
+          <Text style={styles.modelTitle}>{activeVariantLabel}</Text>
+          <Text style={styles.fpsText}>
+            {fps} FPS – {frameMs.toFixed(0)} ms
+          </Text>
+        </View>
+
+        <ScrollView
+          horizontal
+          showsHorizontalScrollIndicator={false}
+          contentContainerStyle={styles.tabsContent}
+          pointerEvents="box-none"
+        >
+          {TASKS.map((t) => (
+            <TouchableOpacity
+              key={t.id}
+              style={[styles.tab, activeTask === t.id && styles.tabActive]}
+              onPress={() => {
+                setActiveTask(t.id);
+                setActiveModel(t.variants[0]!.id);
+              }}
+            >
+              <Text
+                style={[
+                  styles.tabText,
+                  activeTask === t.id && styles.tabTextActive,
+                ]}
+              >
+                {t.label}
+              </Text>
+            </TouchableOpacity>
+          ))}
+        </ScrollView>
+
+        <ScrollView
+          horizontal
+          showsHorizontalScrollIndicator={false}
+          contentContainerStyle={styles.chipsContent}
+          pointerEvents="box-none"
+        >
+          {activeTaskInfo.variants.map((v) => (
+            <TouchableOpacity
+              key={v.id}
+              style={[
+                styles.variantChip,
+                activeModel === v.id && styles.variantChipActive,
+              ]}
+              onPress={() => setActiveModel(v.id)}
+            >
+              <Text
+                style={[
+                  styles.variantChipText,
+                  activeModel === v.id && styles.variantChipTextActive,
+                ]}
+              >
+                {v.label}
+              </Text>
+            </TouchableOpacity>
+          ))}
+        </ScrollView>
+      </View>
+    </View>
+  );
+}
+
+const styles = StyleSheet.create({
+  container: { flex: 1, backgroundColor: 'black' },
+  centered: {
+    flex: 1,
+    backgroundColor: 'black',
+    justifyContent: 'center',
+    alignItems: 'center',
+    gap: 16,
+  },
+  message: { color: 'white', fontSize: 18 },
+  button: {
+    paddingHorizontal: 24,
+    paddingVertical: 12,
+    backgroundColor: ColorPalette.primary,
+    borderRadius: 24,
+  },
+  buttonText: { color: 'white', fontSize: 15, fontWeight: '600' },
+  loadingOverlay: {
+    ...StyleSheet.absoluteFillObject,
+    backgroundColor: 'rgba(0,0,0,0.6)',
+    justifyContent: 'center',
+    alignItems: 'center',
+  },
+
+  topOverlay: {
+    position: 'absolute',
+    top: 0,
+    left: 0,
+    right: 0,
+    alignItems: 'center',
+    gap: 8,
+  },
+  titleRow: {
+    alignItems: 'center',
+    paddingHorizontal: 16,
+  },
+  modelTitle: {
+    color: 'white',
+    fontSize: 22,
+    fontWeight: '700',
+    textShadowColor: 'rgba(0,0,0,0.7)',
+    textShadowOffset: { width: 0, height: 1 },
+    textShadowRadius: 4,
+  },
+  fpsText: {
+    color: 'rgba(255,255,255,0.85)',
+    fontSize: 14,
+    fontWeight: '500',
+    marginTop: 2,
+    textShadowColor: 'rgba(0,0,0,0.7)',
+    textShadowOffset: { width: 0, height: 1 },
+    textShadowRadius: 4,
+  },
+
+  tabsContent: {
+    paddingHorizontal: 12,
+    gap: 6,
+  },
+  tab: {
+    paddingHorizontal: 18,
+    paddingVertical: 7,
+    borderRadius: 20,
+    backgroundColor: 'rgba(0,0,0,0.45)',
+    borderWidth: 1,
+    borderColor: 'rgba(255,255,255,0.25)',
+  },
+  tabActive: {
+    backgroundColor: 'rgba(255,255,255,0.2)',
+    borderColor: 'white',
+  },
+  tabText: {
+    color: 'rgba(255,255,255,0.7)',
+    fontSize: 14,
+    fontWeight: '600',
+  },
+  tabTextActive: { color: 'white' },
+
+  chipsContent: {
+    paddingHorizontal: 12,
+    gap: 6,
+  },
+  variantChip: {
+    paddingHorizontal: 14,
+    paddingVertical: 5,
+    borderRadius: 16,
+    backgroundColor: 'rgba(0,0,0,0.35)',
+    borderWidth: 1,
+    borderColor: 'rgba(255,255,255,0.15)',
+  },
+  variantChipActive: {
+    backgroundColor: ColorPalette.primary,
+    borderColor: ColorPalette.primary,
+  },
+  variantChipText: {
+    color: 'rgba(255,255,255,0.6)',
+    fontSize: 12,
+    fontWeight: '500',
+  },
+  variantChipTextActive: { color: 'white' },
+
+  bbox: {
+    position: 'absolute',
+    borderWidth: 2,
+    borderColor: 'cyan',
+    borderRadius: 4,
+  },
+  bboxLabel: {
+    position: 'absolute',
+    top: -22,
+    left: -2,
+    paddingHorizontal: 6,
+    paddingVertical: 2,
+    borderRadius: 4,
+  },
+  bboxLabelText: { color: 'white', fontSize: 11, fontWeight: '600' },
+
+  classResultOverlay: {
+    ...StyleSheet.absoluteFillObject,
+    justifyContent: 'center',
+    alignItems: 'center',
+  },
+  classResultLabel: {
+    color: 'white',
+    fontSize: 28,
+    fontWeight: '700',
+    textAlign: 'center',
+    textShadowColor: 'rgba(0,0,0,0.8)',
+    textShadowOffset: { width: 0, height: 1 },
+    textShadowRadius: 6,
+    paddingHorizontal: 24,
+  },
+  classResultScore: {
+    color: 'rgba(255,255,255,0.75)',
+    fontSize: 18,
+    fontWeight: '500',
+    marginTop: 4,
+    textShadowColor: 'rgba(0,0,0,0.8)',
+    textShadowOffset: { width: 0, height: 1 },
+    textShadowRadius: 6,
+  },
+});
diff --git a/apps/computer-vision/app/vision_camera_live/index.tsx b/apps/computer-vision/app/vision_camera_live/index.tsx
deleted file mode 100644
index 8c5d71d33..000000000
--- a/apps/computer-vision/app/vision_camera_live/index.tsx
+++ /dev/null
@@ -1,796 +0,0 @@
-import React, {
-  useCallback,
-  useContext,
-  useEffect,
-  useMemo,
-  useRef,
-  useState,
-} from 'react';
-import {
-  ScrollView,
-  StatusBar,
-  StyleSheet,
-  Text,
-  TouchableOpacity,
-  View,
-} from 'react-native';
-import { useSafeAreaInsets } from 'react-native-safe-area-context';
-import {
-  Camera,
-  Frame,
-  getCameraFormat,
-  Templates,
-  useCameraDevices,
-  useCameraPermission,
-  useFrameOutput,
-} from 'react-native-vision-camera';
-import { createSynchronizable, runOnJS } from 'react-native-worklets';
-import {
-  DEEPLAB_V3_RESNET50,
-  Detection,
-  EFFICIENTNET_V2_S,
-  OCRDetection,
-  OCR_ENGLISH,
-  SSDLITE_320_MOBILENET_V3_LARGE,
-  STYLE_TRANSFER_RAIN_PRINCESS,
-  useClassification,
-  useImageSegmentation,
-  useObjectDetection,
-  useOCR,
-  useStyleTransfer,
-} from 'react-native-executorch';
-import {
-  AlphaType,
-  Canvas,
-  ColorType,
-  Image as SkiaImage,
-  matchFont,
-  Path,
-  Skia,
-  SkImage,
-  Text as SkiaText,
-} from '@shopify/react-native-skia';
-import { GeneratingContext } from '../../context';
-import Spinner from '../../components/Spinner';
-import ColorPalette from '../../colors';
-
-// ─── Model IDs ───────────────────────────────────────────────────────────────
-
-type ModelId =
-  | 'classification'
-  | 'object_detection'
-  | 'segmentation'
-  | 'style_transfer'
-  | 'ocr';
-
-const MODELS: { id: ModelId; label: string }[] = [
-  { id: 'classification', label: 'Classification' },
-  { id: 'object_detection', label: 'Object Detection' },
-  { id: 'segmentation', label: 'Segmentation' },
-  { id: 'style_transfer', label: 'Style Transfer' },
-  { id: 'ocr', label: 'OCR' },
-];
-
-const CLASS_COLORS: number[][] = [
-  [0, 0, 0, 0],
-  [51, 255, 87, 180],
-  [51, 87, 255, 180],
-  [255, 51, 246, 180],
-  [51, 255, 246, 180],
-  [243, 255, 51, 180],
-  [141, 51, 255, 180],
-  [255, 131, 51, 180],
-  [51, 255, 131, 180],
-  [131, 51, 255, 180],
-  [255, 255, 51, 180],
-  [51, 255, 255, 180],
-  [255, 51, 143, 180],
-  [127, 51, 255, 180],
-  [51, 255, 175, 180],
-  [255, 175, 51, 180],
-  [179, 255, 51, 180],
-  [255, 87, 51, 180],
-  [255, 51, 162, 180],
-  [51, 162, 255, 180],
-  [162, 51, 255, 180],
-];
-
-// ─── Kill switch — synchronizable boolean shared between JS and worklet thread.
-// setBlocking(true) immediately stops the worklet from dispatching new work
-// (both in onFrame and inside the async callback) before the old model tears down.
-const frameKillSwitch = createSynchronizable(false);
-
-// ─── Screen ──────────────────────────────────────────────────────────────────
-
-export default function VisionCameraLiveScreen() {
-  const insets = useSafeAreaInsets();
-  const [activeModel, setActiveModel] = useState<ModelId>('classification');
-  const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 });
-  const { setGlobalGenerating } = useContext(GeneratingContext);
-
-  // ── Models (only the active model loads; others are prevented) ──
-  const classification = useClassification({
-    model: EFFICIENTNET_V2_S,
-    preventLoad: activeModel !== 'classification',
-  });
-  const objectDetection = useObjectDetection({
-    model: SSDLITE_320_MOBILENET_V3_LARGE,
-    preventLoad: activeModel !== 'object_detection',
-  });
-  const segmentation = useImageSegmentation({
-    model: DEEPLAB_V3_RESNET50,
-    preventLoad: activeModel !== 'segmentation',
-  });
-  const styleTransfer = useStyleTransfer({
-    model: STYLE_TRANSFER_RAIN_PRINCESS,
-    preventLoad: activeModel !== 'style_transfer',
-  });
-  const ocr = useOCR({
-    model: OCR_ENGLISH,
-    preventLoad: activeModel !== 'ocr',
-  });
-
-  const activeIsGenerating = {
-    classification: classification.isGenerating,
-    object_detection: objectDetection.isGenerating,
-    segmentation: segmentation.isGenerating,
-    style_transfer: styleTransfer.isGenerating,
-    ocr: ocr.isGenerating,
-  }[activeModel];
-
-  useEffect(() => {
-    setGlobalGenerating(activeIsGenerating);
-  }, [activeIsGenerating, setGlobalGenerating]);
-
-  // ── Camera ──
-  const [fps, setFps] = useState(0);
-  const lastFrameTimeRef = useRef(Date.now());
-  const cameraPermission = useCameraPermission();
-  const devices = useCameraDevices();
-  const device = devices.find((d) => d.position === 'back') ?? devices[0];
-  const format = useMemo(() => {
-    if (device == null) return undefined;
-    try {
-      return getCameraFormat(device, Templates.FrameProcessing);
-    } catch {
-      return undefined;
-    }
-  }, [device]);
-
-  // ── Per-model result state ──
-  const [classResult, setClassResult] = useState({ label: '', score: 0 });
-  const [detections, setDetections] = useState<Detection[]>([]);
-  const [imageSize, setImageSize] = useState({ width: 1, height: 1 });
-  const [maskImage, setMaskImage] = useState<SkImage | null>(null);
-  const [styledImage, setStyledImage] = useState<SkImage | null>(null);
-  const [ocrData, setOcrData] = useState<{
-    detections: OCRDetection[];
-    frameWidth: number;
-    frameHeight: number;
-  }>({ detections: [], frameWidth: 1, frameHeight: 1 });
-
-  // ── Stable callbacks ──
-  function tick() {
-    const now = Date.now();
-    const diff = now - lastFrameTimeRef.current;
-    if (diff > 0) setFps(Math.round(1000 / diff));
-    lastFrameTimeRef.current = now;
-  }
-
-  const updateClass = useCallback((r: { label: string; score: number }) => {
-    setClassResult(r);
-    tick();
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, []);
-
-  const updateDetections = useCallback(
-    (p: { results: Detection[]; imageWidth: number; imageHeight: number }) => {
-      setDetections(p.results);
-      setImageSize({ width: p.imageWidth, height: p.imageHeight });
-      tick();
-    },
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-    []
-  );
-
-  const updateMask = useCallback((img: SkImage) => {
-    setMaskImage((prev) => {
-      prev?.dispose();
-      return img;
-    });
-    tick();
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, []);
-
-  const updateStyled = useCallback((img: SkImage) => {
-    setStyledImage((prev) => {
-      prev?.dispose();
-      return img;
-    });
-    tick();
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, []);
-
-  const updateOcr = useCallback(
-    (d: {
-      detections: OCRDetection[];
-      frameWidth: number;
-      frameHeight: number;
-    }) => {
-      setOcrData(d);
-      tick();
-    },
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-    []
-  );
-
-  // ── runOnJS-wrapped callbacks — created on the RN thread so the Babel plugin
-  // can serialize them into remote functions. These can then be safely called
-  // from any worklet runtime, including the asyncRunner's worker runtime.
-  const notifyClass = runOnJS(updateClass);
-  const notifyDetections = runOnJS(updateDetections);
-  const notifyMask = runOnJS(updateMask);
-  const notifyStyled = runOnJS(updateStyled);
-  const notifyOcr = runOnJS(updateOcr);
-
-  // ── Pull the active model's runOnFrame out of the hook each render.
-  // These are worklet functions (not plain JS objects), so they CAN be
-  // captured directly in a useCallback closure — the worklets runtime
-  // serializes them correctly. A new closure is produced whenever the
-  // active runOnFrame changes, causing useFrameOutput to re-register.
-  const classRof = classification.runOnFrame;
-  const detRof = objectDetection.runOnFrame;
-  const segRof = segmentation.runOnFrame;
-  const stRof = styleTransfer.runOnFrame;
-  const ocrRof = ocr.runOnFrame;
-
-  // When switching models: activate kill switch synchronously so the worklet
-  // thread stops calling runOnFrame before delete() fires on the old model.
-  // Then re-enable once the new model's preventLoad has taken effect.
-  useEffect(() => {
-    frameKillSwitch.setBlocking(true);
-    setMaskImage((prev) => {
-      prev?.dispose();
-      return null;
-    });
-    setStyledImage((prev) => {
-      prev?.dispose();
-      return null;
-    });
-    const id = setTimeout(() => {
-      frameKillSwitch.setBlocking(false);
-    }, 300);
-    return () => clearTimeout(id);
-  }, [activeModel]);
-
-  // ── Single frame output.
-  // onFrame is re-created (and re-registered by useFrameOutput) whenever the
-  // active model or its runOnFrame worklet changes. The kill switch provides
-  // synchronous cross-thread protection during the transition window.
-  const frameOutput = useFrameOutput({
-    pixelFormat: 'rgb',
-    dropFramesWhileBusy: true,
-    onFrame: useCallback(
-      (frame: Frame) => {
-        'worklet';
-
-        // Kill switch is set synchronously from JS when switching models —
-        // guaranteed visible here before the next frame is dispatched.
-        if (frameKillSwitch.getDirty()) {
-          frame.dispose();
-          return;
-        }
-
-        try {
-          if (activeModel === 'classification') {
-            if (!classRof) return;
-            const result = classRof(frame);
-            if (result) {
-              let bestLabel = '';
-              let bestScore = -1;
-              const entries = Object.entries(result);
-              for (let i = 0; i < entries.length; i++) {
-                const [label, score] = entries[i]!;
-                if ((score as number) > bestScore) {
-                  bestScore = score as number;
-                  bestLabel = label;
-                }
-              }
-              notifyClass({
-                label: bestLabel,
-                score: bestScore,
-              });
-            }
-          } else if (activeModel === 'object_detection') {
-            if (!detRof) return;
-            const iw = frame.width > frame.height ? frame.height : frame.width;
-            const ih = frame.width > frame.height ? frame.width : frame.height;
-            const result = detRof(frame, 0.5);
-            if (result) {
-              notifyDetections({
-                results: result,
-                imageWidth: iw,
-                imageHeight: ih,
-              });
-            }
-          } else if (activeModel === 'segmentation') {
-            if (!segRof) return;
-            const result = segRof(frame, [], false);
-            if (result?.ARGMAX) {
-              const argmax: Int32Array = result.ARGMAX;
-              const side = Math.round(Math.sqrt(argmax.length));
-              const pixels = new Uint8Array(side * side * 4);
-              for (let i = 0; i < argmax.length; i++) {
-                const color = CLASS_COLORS[argmax[i]!] ?? [0, 0, 0, 0];
-                pixels[i * 4] = color[0]!;
-                pixels[i * 4 + 1] = color[1]!;
-                pixels[i * 4 + 2] = color[2]!;
-                pixels[i * 4 + 3] = color[3]!;
-              }
-              const skData = Skia.Data.fromBytes(pixels);
-              const img = Skia.Image.MakeImage(
-                {
-                  width: side,
-                  height: side,
-                  alphaType: AlphaType.Unpremul,
-                  colorType: ColorType.RGBA_8888,
-                },
-                skData,
-                side * 4
-              );
-              if (img) notifyMask(img);
-            }
-          } else if (activeModel === 'style_transfer') {
-            if (!stRof) return;
-            const result = stRof(frame);
-            if (result?.dataPtr) {
-              const { dataPtr, sizes } = result;
-              const h = sizes[0]!;
-              const w = sizes[1]!;
-              const skData = Skia.Data.fromBytes(dataPtr);
-              const img = Skia.Image.MakeImage(
-                {
-                  width: w,
-                  height: h,
-                  alphaType: AlphaType.Opaque,
-                  colorType: ColorType.RGBA_8888,
-                },
-                skData,
-                w * 4
-              );
-              if (img) notifyStyled(img);
-            }
-          } else if (activeModel === 'ocr') {
-            if (!ocrRof) return;
-            const fw = frame.width;
-            const fh = frame.height;
-            const result = ocrRof(frame);
-            if (result) {
-              notifyOcr({
-                detections: result,
-                frameWidth: fw,
-                frameHeight: fh,
-              });
-            }
-          }
-        } catch {
-          // ignore
-        } finally {
-          frame.dispose();
-        }
-      },
-      [
-        activeModel,
-        classRof,
-        detRof,
-        segRof,
-        stRof,
-        ocrRof,
-        notifyClass,
-        notifyDetections,
-        notifyMask,
-        notifyStyled,
-        notifyOcr,
-      ]
-    ),
-  });
-
-  // ── Loading state: only care about the active model ──
-  const activeIsReady = {
-    classification: classification.isReady,
-    object_detection: objectDetection.isReady,
-    segmentation: segmentation.isReady,
-    style_transfer: styleTransfer.isReady,
-    ocr: ocr.isReady,
-  }[activeModel];
-
-  const activeDownloadProgress = {
-    classification: classification.downloadProgress,
-    object_detection: objectDetection.downloadProgress,
-    segmentation: segmentation.downloadProgress,
-    style_transfer: styleTransfer.downloadProgress,
-    ocr: ocr.downloadProgress,
-  }[activeModel];
-
-  if (!cameraPermission.hasPermission) {
-    return (
-      <View style={styles.centered}>
-        <Text style={styles.message}>Camera access needed</Text>
-        <TouchableOpacity
-          onPress={() => cameraPermission.requestPermission()}
-          style={styles.button}
-        >
-          <Text style={styles.buttonText}>Grant Permission</Text>
-        </TouchableOpacity>
-      </View>
-    );
-  }
-
-  if (device == null) {
-    return (
-      <View style={styles.centered}>
-        <Text style={styles.message}>No camera device found</Text>
-      </View>
-    );
-  }
-
-  // ── Cover-fit helpers ──
-  function coverFit(imgW: number, imgH: number) {
-    const scale = Math.max(canvasSize.width / imgW, canvasSize.height / imgH);
-    return {
-      scale,
-      offsetX: (canvasSize.width - imgW * scale) / 2,
-      offsetY: (canvasSize.height - imgH * scale) / 2,
-    };
-  }
-
-  // ── OCR coord transform ──
-  const {
-    detections: ocrDets,
-    frameWidth: ocrFW,
-    frameHeight: ocrFH,
-  } = ocrData;
-  const ocrIsLandscape = ocrFW > ocrFH;
-  const ocrImgW = ocrIsLandscape ? ocrFH : ocrFW;
-  const ocrImgH = ocrIsLandscape ? ocrFW : ocrFH;
-  const {
-    scale: ocrScale,
-    offsetX: ocrOX,
-    offsetY: ocrOY,
-  } = coverFit(ocrImgW, ocrImgH);
-  function ocrToX(px: number, py: number) {
-    return (ocrIsLandscape ? ocrFH - py : px) * ocrScale + ocrOX;
-  }
-  function ocrToY(px: number, py: number) {
-    return (ocrIsLandscape ? px : py) * ocrScale + ocrOY;
-  }
-
-  // ── Object detection cover-fit ──
-  const {
-    scale: detScale,
-    offsetX: detOX,
-    offsetY: detOY,
-  } = coverFit(imageSize.width, imageSize.height);
-
-  const font = matchFont({ fontFamily: 'Helvetica', fontSize: 11 });
-
-  return (
-    <View style={styles.container}>
-      <StatusBar barStyle="light-content" translucent />
-
-      <Camera
-        style={StyleSheet.absoluteFill}
-        device={device}
-        outputs={[frameOutput]}
-        isActive={true}
-        format={format}
-      />
-
-      {/* ── Overlays ── */}
-      <View
-        style={StyleSheet.absoluteFill}
-        pointerEvents="none"
-        onLayout={(e) =>
-          setCanvasSize({
-            width: e.nativeEvent.layout.width,
-            height: e.nativeEvent.layout.height,
-          })
-        }
-      >
-        {activeModel === 'segmentation' && maskImage && (
-          <Canvas style={StyleSheet.absoluteFill}>
-            <SkiaImage
-              image={maskImage}
-              fit="cover"
-              x={0}
-              y={0}
-              width={canvasSize.width}
-              height={canvasSize.height}
-            />
-          </Canvas>
-        )}
-
-        {activeModel === 'style_transfer' && styledImage && (
-          <Canvas style={StyleSheet.absoluteFill}>
-            <SkiaImage
-              image={styledImage}
-              fit="cover"
-              x={0}
-              y={0}
-              width={canvasSize.width}
-              height={canvasSize.height}
-            />
-          </Canvas>
-        )}
-
-        {activeModel === 'object_detection' && (
-          <>
-            {detections.map((det, i) => {
-              const left = det.bbox.x1 * detScale + detOX;
-              const top = det.bbox.y1 * detScale + detOY;
-              const w = (det.bbox.x2 - det.bbox.x1) * detScale;
-              const h = (det.bbox.y2 - det.bbox.y1) * detScale;
-              return (
-                <View
-                  key={i}
-                  style={[styles.bbox, { left, top, width: w, height: h }]}
-                >
-                  <View style={styles.bboxLabel}>
-                    <Text style={styles.bboxLabelText}>
-                      {det.label} {(det.score * 100).toFixed(0)}%
-                    </Text>
-                  </View>
-                </View>
-              );
-            })}
-          </>
-        )}
-
-        {activeModel === 'ocr' && (
-          <Canvas style={StyleSheet.absoluteFill}>
-            {ocrDets.map((det, i) => {
-              if (!det.bbox || det.bbox.length < 2) return null;
-              const path = Skia.Path.Make();
-              path.moveTo(
-                ocrToX(det.bbox[0]!.x, det.bbox[0]!.y),
-                ocrToY(det.bbox[0]!.x, det.bbox[0]!.y)
-              );
-              for (let j = 1; j < det.bbox.length; j++) {
-                path.lineTo(
-                  ocrToX(det.bbox[j]!.x, det.bbox[j]!.y),
-                  ocrToY(det.bbox[j]!.x, det.bbox[j]!.y)
-                );
-              }
-              path.close();
-              const lx = ocrToX(det.bbox[0]!.x, det.bbox[0]!.y);
-              const ly = Math.max(
-                0,
-                ocrToY(det.bbox[0]!.x, det.bbox[0]!.y) - 4
-              );
-              return (
-                <React.Fragment key={i}>
-                  <Path path={path} color="transparent" style="fill" />
-                  <Path
-                    path={path}
-                    color={ColorPalette.primary}
-                    style="stroke"
-                    strokeWidth={2}
-                  />
-                  {font && (
-                    <SkiaText
-                      x={lx}
-                      y={ly}
-                      text={`${det.text} ${(det.score * 100).toFixed(0)}%`}
-                      font={font}
-                      color={ColorPalette.primary}
-                    />
-                  )}
-                </React.Fragment>
-              );
-            })}
-          </Canvas>
-        )}
-      </View>
-
-      {!activeIsReady && (
-        <View style={styles.loadingOverlay}>
-          <Spinner
-            visible
-            textContent={`Loading ${MODELS.find((m) => m.id === activeModel)?.label} ${(activeDownloadProgress * 100).toFixed(0)}%`}
-          />
-        </View>
-      )}
-
-      <View style={[styles.topBarWrapper, { paddingTop: insets.top + 8 }]}>
-        <ScrollView
-          horizontal
-          showsHorizontalScrollIndicator={false}
-          contentContainerStyle={styles.pickerContent}
-        >
-          {MODELS.map((m) => (
-            <TouchableOpacity
-              key={m.id}
-              style={[styles.chip, activeModel === m.id && styles.chipActive]}
-              onPress={() => setActiveModel(m.id)}
-            >
-              <Text
-                style={[
-                  styles.chipText,
-                  activeModel === m.id && styles.chipTextActive,
-                ]}
-              >
-                {m.label}
-              </Text>
-            </TouchableOpacity>
-          ))}
-        </ScrollView>
-      </View>
-
-      <View
-        style={[styles.bottomBarWrapper, { paddingBottom: insets.bottom + 12 }]}
-        pointerEvents="none"
-      >
-        <View style={styles.bottomBar}>
-          {activeModel === 'classification' && (
-            <View style={styles.resultContainer}>
-              <Text style={styles.resultText} numberOfLines={1}>
-                {classResult.label || '—'}
-              </Text>
-              {classResult.label ? (
-                <Text style={styles.resultSub}>
-                  {(classResult.score * 100).toFixed(1)}%
-                </Text>
-              ) : null}
-            </View>
-          )}
-          {activeModel === 'object_detection' && (
-            <View style={styles.resultContainer}>
-              <Text style={styles.resultText}>{detections.length}</Text>
-              <Text style={styles.resultSub}>objects</Text>
-            </View>
-          )}
-          {activeModel === 'segmentation' && (
-            <View style={styles.resultContainer}>
-              <Text style={styles.resultText}>DeepLab V3</Text>
-              <Text style={styles.resultSub}>segmentation</Text>
-            </View>
-          )}
-          {activeModel === 'style_transfer' && (
-            <View style={styles.resultContainer}>
-              <Text style={styles.resultText}>Rain Princess</Text>
-              <Text style={styles.resultSub}>style</Text>
-            </View>
-          )}
-          {activeModel === 'ocr' && (
-            <View style={styles.resultContainer}>
-              <Text style={styles.resultText}>{ocrDets.length}</Text>
-              <Text style={styles.resultSub}>regions</Text>
-            </View>
-          )}
-          <View style={styles.statDivider} />
-          <View style={styles.statItem}>
-            <Text style={styles.statValue}>{fps}</Text>
-            <Text style={styles.statLabel}>fps</Text>
-          </View>
-        </View>
-      </View>
-    </View>
-  );
-}
-
-// ─── Styles ──────────────────────────────────────────────────────────────────
-
-const styles = StyleSheet.create({
-  container: { flex: 1, backgroundColor: 'black' },
-  centered: {
-    flex: 1,
-    backgroundColor: 'black',
-    justifyContent: 'center',
-    alignItems: 'center',
-    gap: 16,
-  },
-  message: { color: 'white', fontSize: 18 },
-  button: {
-    paddingHorizontal: 24,
-    paddingVertical: 12,
-    backgroundColor: ColorPalette.primary,
-    borderRadius: 24,
-  },
-  buttonText: { color: 'white', fontSize: 15, fontWeight: '600' },
-  loadingOverlay: {
-    ...StyleSheet.absoluteFillObject,
-    backgroundColor: 'rgba(0,0,0,0.6)',
-    justifyContent: 'center',
-    alignItems: 'center',
-  },
-  topBarWrapper: {
-    position: 'absolute',
-    top: 0,
-    left: 0,
-    right: 0,
-  },
-  pickerContent: {
-    paddingHorizontal: 12,
-    gap: 8,
-  },
-  chip: {
-    paddingHorizontal: 16,
-    paddingVertical: 8,
-    borderRadius: 20,
-    backgroundColor: 'rgba(0,0,0,0.55)',
-    borderWidth: 1,
-    borderColor: 'rgba(255,255,255,0.2)',
-  },
-  chipActive: {
-    backgroundColor: ColorPalette.primary,
-    borderColor: ColorPalette.primary,
-  },
-  chipText: {
-    color: 'rgba(255,255,255,0.8)',
-    fontSize: 13,
-    fontWeight: '600',
-  },
-  chipTextActive: { color: 'white' },
-  bbox: {
-    position: 'absolute',
-    borderWidth: 2,
-    borderColor: ColorPalette.primary,
-    borderRadius: 4,
-  },
-  bboxLabel: {
-    position: 'absolute',
-    top: -22,
-    left: -2,
-    backgroundColor: ColorPalette.primary,
-    paddingHorizontal: 6,
-    paddingVertical: 2,
-    borderRadius: 4,
-  },
-  bboxLabelText: { color: 'white', fontSize: 11, fontWeight: '600' },
-  bottomBarWrapper: {
-    position: 'absolute',
-    bottom: 0,
-    left: 0,
-    right: 0,
-    alignItems: 'center',
-  },
-  bottomBar: {
-    flexDirection: 'row',
-    alignItems: 'center',
-    backgroundColor: 'rgba(0,0,0,0.55)',
-    borderRadius: 24,
-    paddingHorizontal: 28,
-    paddingVertical: 10,
-    gap: 24,
-  },
-  resultContainer: { alignItems: 'flex-start', maxWidth: 220 },
-  resultText: {
-    color: 'white',
-    fontSize: 16,
-    fontWeight: '700',
-  },
-  resultSub: {
-    color: 'rgba(255,255,255,0.6)',
-    fontSize: 12,
-    fontWeight: '500',
-  },
-  statDivider: {
-    width: 1,
-    height: 32,
-    backgroundColor: 'rgba(255,255,255,0.2)',
-  },
-  statItem: { alignItems: 'center' },
-  statValue: {
-    color: 'white',
-    fontSize: 22,
-    fontWeight: '700',
-    letterSpacing: -0.5,
-  },
-  statLabel: {
-    color: 'rgba(255,255,255,0.55)',
-    fontSize: 11,
-    fontWeight: '500',
-    textTransform: 'uppercase',
-    letterSpacing: 0.8,
-  },
-});

From 224cbbf20fd510e2210b77ddea7746e5d4cd4d10 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Thu, 26 Feb 2026 11:53:27 +0100
Subject: [PATCH 34/37] fix: drawing style transfer image

---
 .../app/style_transfer/index.tsx              | 43 ++++++++++---------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/apps/computer-vision/app/style_transfer/index.tsx b/apps/computer-vision/app/style_transfer/index.tsx
index 466900a6f..f7a7022b1 100644
--- a/apps/computer-vision/app/style_transfer/index.tsx
+++ b/apps/computer-vision/app/style_transfer/index.tsx
@@ -27,6 +27,7 @@ export default function StyleTransferScreen() {
 
   const [imageUri, setImageUri] = useState('');
   const [styledImage, setStyledImage] = useState<SkImage | null>(null);
+  const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 });
 
   const handleCameraPress = async (isCamera: boolean) => {
     const image = await getImage(isCamera);
@@ -43,16 +44,8 @@ export default function StyleTransferScreen() {
         const output = await model.forward(imageUri);
         const height = output.sizes[0];
         const width = output.sizes[1];
-        // Convert RGB -> RGBA for Skia
-        const rgba = new Uint8Array(width * height * 4);
-        const rgb = output.dataPtr;
-        for (let i = 0; i < width * height; i++) {
-          rgba[i * 4] = rgb[i * 3];
-          rgba[i * 4 + 1] = rgb[i * 3 + 1];
-          rgba[i * 4 + 2] = rgb[i * 3 + 2];
-          rgba[i * 4 + 3] = 255;
-        }
-        const skData = Skia.Data.fromBytes(rgba);
+        // Native already returns RGBA uint8 — use directly
+        const skData = Skia.Data.fromBytes(output.dataPtr);
         const img = Skia.Image.MakeImage(
           {
             width,
@@ -83,16 +76,26 @@ export default function StyleTransferScreen() {
     <ScreenWrapper>
       <View style={styles.imageContainer}>
         {styledImage ? (
-          <Canvas style={styles.canvas}>
-            <SkiaImage
-              image={styledImage}
-              fit="contain"
-              x={0}
-              y={0}
-              width={styledImage.width()}
-              height={styledImage.height()}
-            />
-          </Canvas>
+          <View
+            style={styles.canvas}
+            onLayout={(e) =>
+              setCanvasSize({
+                width: e.nativeEvent.layout.width,
+                height: e.nativeEvent.layout.height,
+              })
+            }
+          >
+            <Canvas style={StyleSheet.absoluteFill}>
+              <SkiaImage
+                image={styledImage}
+                fit="contain"
+                x={0}
+                y={0}
+                width={canvasSize.width}
+                height={canvasSize.height}
+              />
+            </Canvas>
+          </View>
         ) : (
           <Image
             style={styles.image}

From 09b420faf2304380443ca715874769ded6ca9960 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Thu, 26 Feb 2026 13:07:48 +0100
Subject: [PATCH 35/37] fix: tests

---
 .../app/style_transfer/index.tsx              |  1 -
 .../common/rnexecutorch/tests/CMakeLists.txt  | 23 +++++++++++++++----
 .../tests/integration/StyleTransferTest.cpp   |  8 ++++---
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/apps/computer-vision/app/style_transfer/index.tsx b/apps/computer-vision/app/style_transfer/index.tsx
index f7a7022b1..80c3974d4 100644
--- a/apps/computer-vision/app/style_transfer/index.tsx
+++ b/apps/computer-vision/app/style_transfer/index.tsx
@@ -44,7 +44,6 @@ export default function StyleTransferScreen() {
         const output = await model.forward(imageUri);
         const height = output.sizes[0];
         const width = output.sizes[1];
-        // Native already returns RGBA uint8 — use directly
         const skData = Skia.Data.fromBytes(output.dataPtr);
         const img = Skia.Image.MakeImage(
           {
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
index c45ab9107..79c0b3129 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
@@ -148,8 +148,11 @@ add_rn_test(BaseModelTests integration/BaseModelTest.cpp)
 add_rn_test(ClassificationTests integration/ClassificationTest.cpp
     SOURCES
         ${RNEXECUTORCH_DIR}/models/classification/Classification.cpp
+        ${RNEXECUTORCH_DIR}/models/VisionModel.cpp
+        ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp
+        ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp
         ${IMAGE_UTILS_SOURCES}
-    LIBS opencv_deps
+    LIBS opencv_deps android
 )
 
 add_rn_test(ObjectDetectionTests integration/ObjectDetectionTest.cpp
@@ -167,8 +170,11 @@ add_rn_test(ImageEmbeddingsTests integration/ImageEmbeddingsTest.cpp
     SOURCES
         ${RNEXECUTORCH_DIR}/models/embeddings/image/ImageEmbeddings.cpp
         ${RNEXECUTORCH_DIR}/models/embeddings/BaseEmbeddings.cpp
+        ${RNEXECUTORCH_DIR}/models/VisionModel.cpp
+        ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp
+        ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp
         ${IMAGE_UTILS_SOURCES}
-    LIBS opencv_deps
+    LIBS opencv_deps android
 )
 
 add_rn_test(TextEmbeddingsTests integration/TextEmbeddingsTest.cpp
@@ -182,8 +188,11 @@ add_rn_test(TextEmbeddingsTests integration/TextEmbeddingsTest.cpp
 add_rn_test(StyleTransferTests integration/StyleTransferTest.cpp
     SOURCES
         ${RNEXECUTORCH_DIR}/models/style_transfer/StyleTransfer.cpp
+        ${RNEXECUTORCH_DIR}/models/VisionModel.cpp
+        ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp
+        ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp
         ${IMAGE_UTILS_SOURCES}
-    LIBS opencv_deps
+    LIBS opencv_deps android
 )
 
 add_rn_test(VADTests integration/VoiceActivityDetectionTest.cpp
@@ -244,8 +253,10 @@ add_rn_test(OCRTests integration/OCRTest.cpp
         ${RNEXECUTORCH_DIR}/models/ocr/utils/DetectorUtils.cpp
         ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognitionHandlerUtils.cpp
         ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognizerUtils.cpp
+        ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp
+        ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp
         ${IMAGE_UTILS_SOURCES}
-    LIBS opencv_deps
+    LIBS opencv_deps android
 )
 
 add_rn_test(VerticalOCRTests integration/VerticalOCRTest.cpp
@@ -258,6 +269,8 @@ add_rn_test(VerticalOCRTests integration/VerticalOCRTest.cpp
         ${RNEXECUTORCH_DIR}/models/ocr/utils/DetectorUtils.cpp
         ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognitionHandlerUtils.cpp
         ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognizerUtils.cpp
+        ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp
+        ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp
         ${IMAGE_UTILS_SOURCES}
-    LIBS opencv_deps
+    LIBS opencv_deps android
 )
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp
index 5fbf798b6..5d300de83 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp
@@ -60,7 +60,9 @@ TEST(StyleTransferGenerateTests, MalformedURIThrows) {
 TEST(StyleTransferGenerateTests, ValidImageReturnsNonNull) {
   StyleTransfer model(kValidStyleTransferModelPath, nullptr);
   auto result = model.generateFromString(kValidTestImagePath);
-  EXPECT_NE(result, nullptr);
+  EXPECT_NE(result.dataPtr, nullptr);
+  EXPECT_GT(result.width, 0);
+  EXPECT_GT(result.height, 0);
 }
 
 TEST(StyleTransferGenerateTests, MultipleGeneratesWork) {
@@ -68,8 +70,8 @@ TEST(StyleTransferGenerateTests, MultipleGeneratesWork) {
   EXPECT_NO_THROW((void)model.generateFromString(kValidTestImagePath));
   auto result1 = model.generateFromString(kValidTestImagePath);
   auto result2 = model.generateFromString(kValidTestImagePath);
-  EXPECT_NE(result1, nullptr);
-  EXPECT_NE(result2, nullptr);
+  EXPECT_NE(result1.dataPtr, nullptr);
+  EXPECT_NE(result2.dataPtr, nullptr);
 }
 
 TEST(StyleTransferInheritedTests, GetInputShapeWorks) {

From a06a8b5439db0addaeee12794035ee01f1757131 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Thu, 26 Feb 2026 13:52:30 +0100
Subject: [PATCH 36/37] feat: add possibility to switch between front/back
 camera

---
 .../app/vision_camera/index.tsx               | 63 ++++++++++++++++++-
 .../rnexecutorch/models/VisionModel.cpp       | 11 +---
 2 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/apps/computer-vision/app/vision_camera/index.tsx b/apps/computer-vision/app/vision_camera/index.tsx
index 625018849..ccf8e41d6 100644
--- a/apps/computer-vision/app/vision_camera/index.tsx
+++ b/apps/computer-vision/app/vision_camera/index.tsx
@@ -42,6 +42,7 @@ import {
   Skia,
   SkImage,
 } from '@shopify/react-native-skia';
+import Svg, { Path, Polygon } from 'react-native-svg';
 import { GeneratingContext } from '../../context';
 import Spinner from '../../components/Spinner';
 import ColorPalette from '../../colors';
@@ -119,6 +120,9 @@ export default function VisionCameraScreen() {
   const [activeTask, setActiveTask] = useState<TaskId>('classification');
   const [activeModel, setActiveModel] = useState<ModelId>('classification');
   const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 });
+  const [cameraPosition, setCameraPosition] = useState<'back' | 'front'>(
+    'back'
+  );
   const { setGlobalGenerating } = useContext(GeneratingContext);
 
   const classification = useClassification({
@@ -149,7 +153,8 @@ export default function VisionCameraScreen() {
   const lastFrameTimeRef = useRef(Date.now());
   const cameraPermission = useCameraPermission();
   const devices = useCameraDevices();
-  const device = devices.find((d) => d.position === 'back') ?? devices[0];
+  const device =
+    devices.find((d) => d.position === cameraPosition) ?? devices[0];
   const format = useMemo(() => {
     if (device == null) return undefined;
     try {
@@ -375,7 +380,10 @@ export default function VisionCameraScreen() {
       />
 
       <View
-        style={StyleSheet.absoluteFill}
+        style={[
+          StyleSheet.absoluteFill,
+          cameraPosition === 'front' && { transform: [{ scaleX: -1 }] },
+        ]}
         pointerEvents="none"
         onLayout={(e) =>
           setCanvasSize({
@@ -422,6 +430,9 @@ export default function VisionCameraScreen() {
                     style={[
                       styles.bboxLabel,
                       { backgroundColor: labelColorBg(det.label) },
+                      cameraPosition === 'front' && {
+                        transform: [{ scaleX: -1 }],
+                      },
                     ]}
                   >
                     <Text style={styles.bboxLabelText}>
@@ -518,6 +529,37 @@ export default function VisionCameraScreen() {
           ))}
         </ScrollView>
       </View>
+
+      <View
+        style={[styles.bottomOverlay, { paddingBottom: insets.bottom + 16 }]}
+        pointerEvents="box-none"
+      >
+        <TouchableOpacity
+          style={styles.flipButton}
+          onPress={() =>
+            setCameraPosition((p) => (p === 'back' ? 'front' : 'back'))
+          }
+        >
+          <Svg width={28} height={28} viewBox="0 0 24 24" fill="none">
+            {/* Camera body */}
+            <Path
+              d="M23 19a2 2 0 0 1-2 2H3a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h4l2-3h6l2 3h4a2 2 0 0 1 2 2z"
+              stroke="white"
+              strokeWidth={1.8}
+              strokeLinecap="round"
+              strokeLinejoin="round"
+            />
+            {/* Rotate arrows — arc with arrowhead around the lens */}
+            <Path
+              d="M9 13.5a3 3 0 1 0 3-3"
+              stroke="white"
+              strokeWidth={1.8}
+              strokeLinecap="round"
+            />
+            <Polygon points="8,11 9,13.5 11,12" fill="white" />
+          </Svg>
+        </TouchableOpacity>
+      </View>
     </View>
   );
 }
@@ -662,4 +704,21 @@ const styles = StyleSheet.create({
     textShadowOffset: { width: 0, height: 1 },
     textShadowRadius: 6,
   },
+  bottomOverlay: {
+    position: 'absolute',
+    bottom: 0,
+    left: 0,
+    right: 0,
+    alignItems: 'center',
+  },
+  flipButton: {
+    width: 56,
+    height: 56,
+    borderRadius: 28,
+    backgroundColor: 'rgba(255,255,255,0.2)',
+    justifyContent: 'center',
+    alignItems: 'center',
+    borderWidth: 1.5,
+    borderColor: 'rgba(255,255,255,0.4)',
+  },
 });
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
index 8f67175c4..c0ce049f2 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
@@ -11,16 +11,7 @@ using namespace facebook;
 cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
                                       const jsi::Value &frameData) const {
   auto frameObj = frameData.asObject(runtime);
-  cv::Mat frame = ::rnexecutorch::utils::extractFrame(runtime, frameObj);
-
-  // Camera sensors natively deliver frames in landscape orientation.
-  // Rotate 90° CW so all models receive upright portrait frames.
-  if (frame.cols > frame.rows) {
-    cv::Mat upright;
-    cv::rotate(frame, upright, cv::ROTATE_90_CLOCKWISE);
-    return upright;
-  }
-  return frame;
+  return ::rnexecutorch::utils::extractFrame(runtime, frameObj);
 }
 
 cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {

From 787ea7db1226c3e9070271a6521f395b907eaccd Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Thu, 26 Feb 2026 15:49:29 +0100
Subject: [PATCH 37/37] fix: rotation issue

---
 apps/computer-vision/app/vision_camera/index.tsx      |  1 +
 .../common/rnexecutorch/models/VisionModel.cpp        | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/apps/computer-vision/app/vision_camera/index.tsx b/apps/computer-vision/app/vision_camera/index.tsx
index ccf8e41d6..e09bdcc17 100644
--- a/apps/computer-vision/app/vision_camera/index.tsx
+++ b/apps/computer-vision/app/vision_camera/index.tsx
@@ -377,6 +377,7 @@ export default function VisionCameraScreen() {
         outputs={[frameOutput]}
         isActive={true}
         format={format}
+        orientationSource="interface"
       />
 
       <View
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
index c0ce049f2..8f67175c4 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp
@@ -11,7 +11,16 @@ using namespace facebook;
 cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
                                       const jsi::Value &frameData) const {
   auto frameObj = frameData.asObject(runtime);
-  return ::rnexecutorch::utils::extractFrame(runtime, frameObj);
+  cv::Mat frame = ::rnexecutorch::utils::extractFrame(runtime, frameObj);
+
+  // Camera sensors natively deliver frames in landscape orientation.
+  // Rotate 90° CW so all models receive upright portrait frames.
+  if (frame.cols > frame.rows) {
+    cv::Mat upright;
+    cv::rotate(frame, upright, cv::ROTATE_90_CLOCKWISE);
+    return upright;
+  }
+  return frame;
 }
 
 cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {